From 587297e3ac3ea53d20a1b6556ff2e20f82a3e82d Mon Sep 17 00:00:00 2001 From: Sushant Gokhale Date: Mon, 22 Jun 2026 17:44:34 +0530 Subject: [PATCH 001/511] [SLP][Revec] Fix SLP crash when trying to fold trailing scalars into the reduced value (#203477) Cost modelling change introduced by commit SHA f15666d is enabling revec of test shown in the above issue. This is introducing a crash as the reduced value(a scalar) is being added to a vector tail value. Patch tries to address this. Fixes #203195 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 25 ++++---- .../X86/revec-ordered-reductions.ll | 60 +++++++++++++++++++ llvm/test/Transforms/SLPVectorizer/revec.ll | 39 +++++++++++- 3 files changed, 113 insertions(+), 11 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/revec-ordered-reductions.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 52566c094f6a6..4dcf3243a20ad 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -30440,14 +30440,16 @@ class HorizontalReduction { "op.rdx", ReductionOps); } - // Emit ordered reduction for the vectorized window. + // Emit ordered reduction for the vectorized window. The reduction only + // applies to floating point types. + assert(DestTy->isFPOrFPVectorTy() && + SuccessRoot->getType()->isFPOrFPVectorTy() && + "Expected floating point types for ordered reduction"); Builder.SetCurrentDebugLocation( cast(ReductionRoot)->getDebugLoc()); - if (VectorizedTree) - VectorizedTree = - emitReduction(SuccessRoot, Builder, TTI, DestTy, VectorizedTree); - else - VectorizedTree = emitReduction(SuccessRoot, Builder, TTI, DestTy); + VectorizedTree = createSingleOp(Builder, *TTI, SuccessRoot, /*Scale=*/1, + /*IsSigned=*/false, DestTy, + /*ReducedInTree=*/false, VectorizedTree); // Fold trailing scalars [SuccessStart+SuccessWidth, N). for (Value *RdxVal : @@ -30490,13 +30492,16 @@ class HorizontalReduction { /// scale \p Scale and signedness \p IsSigned. Value *createSingleOp(IRBuilderBase &Builder, const TargetTransformInfo &TTI, Value *Vec, unsigned Scale, bool IsSigned, Type *DestTy, - bool ReducedInTree) { + bool ReducedInTree, Value *Start = nullptr) { Value *Rdx; if (ReducedInTree) { Rdx = Vec; - } else if (auto *VecTy = dyn_cast(DestTy)) { + } else if (auto *VecTy = dyn_cast(DestTy); + VecTy && SLPReVec) { unsigned DestTyNumElements = getNumElements(VecTy); unsigned VF = getNumElements(Vec->getType()) / DestTyNumElements; + assert(getNumElements(Vec->getType()) % DestTyNumElements == 0 && + "Vec element count must be a multiple of DestTy element count"); // e.g. Consider vector reduce add. // Initial reduction is // clang-format off @@ -30515,7 +30520,7 @@ class HorizontalReduction { // %add0 = add <4 x i32> zeroinitializer, %A // %add1 = add <4 x i32> %add0, %B // clang-format on - Rdx = nullptr; + Rdx = Start; for (auto I : seq(VF)) { auto Position = I * DestTyNumElements; Value *SubVec = @@ -30526,7 +30531,7 @@ class HorizontalReduction { Rdx = createOp(Builder, RdxKind, Rdx, SubVec, "rdx.op", ReductionOps); } } else { - Rdx = emitReduction(Vec, Builder, &TTI, DestTy); + Rdx = emitReduction(Vec, Builder, &TTI, DestTy, Start); } if (Rdx->getType() != DestTy) Rdx = Builder.CreateIntCast(Rdx, DestTy, IsSigned); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/revec-ordered-reductions.ll b/llvm/test/Transforms/SLPVectorizer/X86/revec-ordered-reductions.ll new file mode 100644 index 0000000000000..662c5c5fab267 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/revec-ordered-reductions.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -p slp-vectorizer -slp-revec -S < %s | FileCheck %s --check-prefixes=CHECK,POW2 +; RUN: opt -mtriple=x86_64-unknown-linux-gnu -p slp-vectorizer -slp-revec -slp-vectorize-non-power-of-2 -S < %s | FileCheck %s --check-prefixes=CHECK,NON-POW2 + +define <16 x float> @ordered_fadd_chain(ptr %p0, ptr %p1) { +; CHECK-LABEL: define <16 x float> @ordered_fadd_chain( +; CHECK-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = load <16 x float>, ptr [[P0]], align 64 +; CHECK-NEXT: [[V1:%.*]] = load <16 x float>, ptr [[P1]], align 64 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V0]], <16 x float> poison, <64 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <64 x float> , <64 x float> [[TMP1]], <64 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x float> [[V1]], <16 x float> poison, <64 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <64 x float> [[TMP2]], <64 x float> [[TMP3]], <64 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <64 x float> [[TMP4]], <64 x float> poison, <64 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <64 x float> [[TMP5]], <64 x float> poison, <16 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <64 x float> [[TMP5]], <64 x float> poison, <16 x i32> +; CHECK-NEXT: [[OP_RDX:%.*]] = fadd <16 x float> [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <64 x float> [[TMP5]], <64 x float> poison, <16 x i32> +; CHECK-NEXT: [[OP_RDX1:%.*]] = fadd <16 x float> [[OP_RDX]], [[TMP8]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <64 x float> [[TMP5]], <64 x float> poison, <16 x i32> +; CHECK-NEXT: [[OP_RDX2:%.*]] = fadd <16 x float> [[OP_RDX1]], [[TMP9]] +; CHECK-NEXT: ret <16 x float> [[OP_RDX2]] +; + %v0 = load <16 x float>, ptr %p0 + %v1 = load <16 x float>, ptr %p1 + %add0 = fadd <16 x float> %v0, %v0 + %add1 = fadd <16 x float> %add0, %v1 + %add2 = fadd <16 x float> %add1, splat (float 1.000000e+00) + ret <16 x float> %add2 +} + +define <16 x float> @ordered_fadd_chain_non_power_2(ptr %p0, ptr %p1) { +; POW2-LABEL: define <16 x float> @ordered_fadd_chain_non_power_2( +; POW2-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) { +; POW2-NEXT: [[V0:%.*]] = load <16 x float>, ptr [[P0]], align 64 +; POW2-NEXT: [[V1:%.*]] = load <16 x float>, ptr [[P1]], align 64 +; POW2-NEXT: [[ADD0:%.*]] = fadd <16 x float> [[V0]], [[V0]] +; POW2-NEXT: [[ADD1:%.*]] = fadd <16 x float> [[ADD0]], splat (float 1.000000e+00) +; POW2-NEXT: ret <16 x float> [[ADD1]] +; +; NON-POW2-LABEL: define <16 x float> @ordered_fadd_chain_non_power_2( +; NON-POW2-SAME: ptr [[P0:%.*]], ptr [[P1:%.*]]) { +; NON-POW2-NEXT: [[V0:%.*]] = load <16 x float>, ptr [[P0]], align 64 +; NON-POW2-NEXT: [[V1:%.*]] = load <16 x float>, ptr [[P1]], align 64 +; NON-POW2-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V0]], <16 x float> poison, <48 x i32> +; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <48 x float> , <48 x float> [[TMP1]], <48 x i32> +; NON-POW2-NEXT: [[TMP3:%.*]] = shufflevector <48 x float> [[TMP2]], <48 x float> poison, <48 x i32> +; NON-POW2-NEXT: [[TMP4:%.*]] = shufflevector <48 x float> [[TMP3]], <48 x float> poison, <16 x i32> +; NON-POW2-NEXT: [[TMP5:%.*]] = shufflevector <48 x float> [[TMP3]], <48 x float> poison, <16 x i32> +; NON-POW2-NEXT: [[OP_RDX:%.*]] = fadd <16 x float> [[TMP4]], [[TMP5]] +; NON-POW2-NEXT: [[TMP6:%.*]] = shufflevector <48 x float> [[TMP3]], <48 x float> poison, <16 x i32> +; NON-POW2-NEXT: [[OP_RDX1:%.*]] = fadd <16 x float> [[OP_RDX]], [[TMP6]] +; NON-POW2-NEXT: ret <16 x float> [[OP_RDX1]] +; + %v0 = load <16 x float>, ptr %p0 + %v1 = load <16 x float>, ptr %p1 + %add0 = fadd <16 x float> %v0, %v0 + %add1 = fadd <16 x float> %add0, splat (float 1.000000e+00) + ret <16 x float> %add1 +} diff --git a/llvm/test/Transforms/SLPVectorizer/revec.ll b/llvm/test/Transforms/SLPVectorizer/revec.ll index 346669c53824f..1eefceb249717 100644 --- a/llvm/test/Transforms/SLPVectorizer/revec.ll +++ b/llvm/test/Transforms/SLPVectorizer/revec.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s +; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 %s | FileCheck %s --check-prefixes=CHECK,POW2 +; RUN: opt -passes=slp-vectorizer -S -slp-revec -slp-max-reg-size=1024 -slp-threshold=-100 -slp-vectorize-non-power-of-2 %s | FileCheck %s --check-prefixes=CHECK,NON-POW2 define void @test1(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: @test1( @@ -498,3 +499,39 @@ entry: %add3 = add <4 x i32> %add2, %a3 ret <4 x i32> %add3 } + +define <4 x i32> @hor_reduction_four_points_non_pow2(ptr %a) { +; POW2-LABEL: @hor_reduction_four_points_non_pow2( +; POW2-NEXT: entry: +; POW2-NEXT: [[GEP1:%.*]] = getelementptr <4 x i32>, ptr [[A:%.*]], i64 1 +; POW2-NEXT: [[GEP2:%.*]] = getelementptr <4 x i32>, ptr [[A]], i64 2 +; POW2-NEXT: [[A0:%.*]] = load <4 x i32>, ptr [[A]], align 16 +; POW2-NEXT: [[A1:%.*]] = load <4 x i32>, ptr [[GEP1]], align 16 +; POW2-NEXT: [[A2:%.*]] = load <4 x i32>, ptr [[GEP2]], align 16 +; POW2-NEXT: [[ADD0:%.*]] = add <4 x i32> , [[A0]] +; POW2-NEXT: [[ADD1:%.*]] = add <4 x i32> [[ADD0]], [[A1]] +; POW2-NEXT: [[ADD2:%.*]] = add <4 x i32> [[ADD1]], [[A2]] +; POW2-NEXT: ret <4 x i32> [[ADD2]] +; +; NON-POW2-LABEL: @hor_reduction_four_points_non_pow2( +; NON-POW2-NEXT: entry: +; NON-POW2-NEXT: [[TMP0:%.*]] = load <12 x i32>, ptr [[A:%.*]], align 16 +; NON-POW2-NEXT: [[TMP1:%.*]] = shufflevector <12 x i32> [[TMP0]], <12 x i32> poison, <4 x i32> +; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <12 x i32> [[TMP0]], <12 x i32> poison, <4 x i32> +; NON-POW2-NEXT: [[RDX_OP:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]] +; NON-POW2-NEXT: [[TMP3:%.*]] = shufflevector <12 x i32> [[TMP0]], <12 x i32> poison, <4 x i32> +; NON-POW2-NEXT: [[RDX_OP1:%.*]] = add <4 x i32> [[RDX_OP]], [[TMP3]] +; NON-POW2-NEXT: [[OP_RDX:%.*]] = add <4 x i32> [[RDX_OP1]], +; NON-POW2-NEXT: ret <4 x i32> [[OP_RDX]] +; +entry: + %gep1 = getelementptr <4 x i32>, ptr %a, i64 1 + %gep2 = getelementptr <4 x i32>, ptr %a, i64 2 + %a0 = load <4 x i32>, ptr %a + %a1 = load <4 x i32>, ptr %gep1 + %a2 = load <4 x i32>, ptr %gep2 + %add0 = add <4 x i32> , %a0 + %add1 = add <4 x i32> %add0, %a1 + %add2 = add <4 x i32> %add1, %a2 + ret <4 x i32> %add2 +} From dae689c140170dc735d6e483a920b05f8e4aeca3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 14:15:42 +0200 Subject: [PATCH 002/511] clang: Use the effective triple string for offload jobs (#205065) Track the future effective triple for the job, rather than the toolchain's default triple. In the future this will change the result when amdgpu starts adjusting the triples to contain subarches. --- clang/lib/Driver/ToolChains/Clang.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 906abd1474b6c..ca924ccf9152d 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -9395,9 +9395,11 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, } Triples += Action::GetOffloadKindName(CurKind); Triples += '-'; - Triples += - CurTC->getTriple().normalize(llvm::Triple::CanonicalForm::FOUR_IDENT); - if (CurKind != Action::OFK_Host && + Triples += llvm::Triple(CurTC->ComputeEffectiveClangTriple( + TCArgs, CurDep->getOffloadingArch())) + .normalize(llvm::Triple::CanonicalForm::FOUR_IDENT); + + if ((CurKind != Action::OFK_Host) && !StringRef(CurDep->getOffloadingArch()).empty()) { Triples += '-'; Triples += CurDep->getOffloadingArch(); @@ -9473,9 +9475,12 @@ void OffloadBundler::ConstructJobMultipleOutputs( auto &Dep = DepInfo[I]; Triples += Action::GetOffloadKindName(Dep.DependentOffloadKind); Triples += '-'; - Triples += Dep.DependentToolChain->getTriple().normalize( - llvm::Triple::CanonicalForm::FOUR_IDENT); - if (Dep.DependentOffloadKind != Action::OFK_Host && + Triples += llvm::Triple(Dep.DependentToolChain->ComputeEffectiveClangTriple( + TCArgs, Dep.DependentBoundArch)) + .normalize(llvm::Triple::CanonicalForm::FOUR_IDENT); + + if ((Dep.DependentOffloadKind == Action::OFK_HIP || + Dep.DependentOffloadKind == Action::OFK_Cuda) && !Dep.DependentBoundArch.empty()) { Triples += '-'; Triples += Dep.DependentBoundArch; @@ -9544,7 +9549,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, // linker wrapper. SmallVector Parts{ "file=" + File.str(), - "triple=" + TC->getTripleString().str(), + "triple=" + TC->ComputeEffectiveClangTriple(TCArgs, Arch), "arch=" + (Arch.empty() ? "generic" : Arch.str()), "kind=" + Kind.str(), }; From 8ad730d142574d8bb8d124ab78ab2c8c8fae04f3 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 22 Jun 2026 07:21:45 -0500 Subject: [PATCH 003/511] Reapply "[AMDGPU] Add compiler-rt checks for the GPU runtime" (#204898) The original issue should've been solved by https://github.com/llvm/llvm-project/pull/204694 Reverts llvm/llvm-project#204370 --- offload/ci/openmp-offload-amdgpu-libc-runtime.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/offload/ci/openmp-offload-amdgpu-libc-runtime.py b/offload/ci/openmp-offload-amdgpu-libc-runtime.py index f482a5bfc2cf7..e1c4571544ed1 100644 --- a/offload/ci/openmp-offload-amdgpu-libc-runtime.py +++ b/offload/ci/openmp-offload-amdgpu-libc-runtime.py @@ -62,5 +62,12 @@ builddir=llvmbuilddir, ) + with w.step("run check-compiler-rt-amdgcn-amd-amdhsa"): + w.run_ninja( + ["check-compiler-rt-amdgcn-amd-amdhsa"], + add_env={"HSA_ENABLE_SDMA": "0"}, + builddir=llvmbuilddir, + ) + with w.step("LLVM: Install", halt_on_fail=True): w.run_ninja(["install"], builddir=llvmbuilddir) From 4012329a445b83c8ea3b85c86df93ef926a2dda9 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Mon, 22 Jun 2026 22:22:52 +1000 Subject: [PATCH 004/511] [orc-rt] Default QueueingRunner to a synchronized queue. (#205088) Adds orc_rt::detail::SynchronizedDeque, a mutex-protected deque whose pop_front / pop_back return std::optional (std::nullopt indicates the queue is empty), and makes it QueueingRunner's default WorkQueue type. Using a synchronized queue type allows QueueingRunner to be used in multi-threaded contexts. Updates SessionTest and InProcessControllerAccessTest to use the new default, and extends QueueingRunnerTest to cover the new contract. --- orc-rt/include/orc-rt/QueueingRunner.h | 73 ++++++++++++++----- .../InProcessControllerAccessTest.cpp | 6 +- orc-rt/unittests/QueueingRunnerTest.cpp | 67 +++++++++++++---- orc-rt/unittests/SessionTest.cpp | 31 ++++---- 4 files changed, 124 insertions(+), 53 deletions(-) diff --git a/orc-rt/include/orc-rt/QueueingRunner.h b/orc-rt/include/orc-rt/QueueingRunner.h index b3225f149ba53..86798a92eaf60 100644 --- a/orc-rt/include/orc-rt/QueueingRunner.h +++ b/orc-rt/include/orc-rt/QueueingRunner.h @@ -19,9 +19,45 @@ #include "orc-rt-c/WrapperFunction.h" #include +#include +#include +#include #include namespace orc_rt { +namespace detail { + +template class SynchronizedDeque { +public: + void push_back(T V) { + std::scoped_lock Lock(M); + Q.push_back(std::move(V)); + } + + std::optional pop_back() { + std::scoped_lock Lock(M); + if (Q.empty()) + return std::nullopt; + auto V = std::move(Q.back()); + Q.pop_back(); + return V; + } + + std::optional pop_front() { + std::scoped_lock Lock(M); + if (Q.empty()) + return std::nullopt; + auto V = std::move(Q.front()); + Q.pop_front(); + return V; + } + +private: + std::mutex M; + std::deque Q; +}; + +} // namespace detail /// A wrapper-call runner that pushes incoming calls onto a caller-owned work /// queue, leaving the caller free to drain the queue however and whenever @@ -32,14 +68,19 @@ namespace orc_rt { /// alternatives like ThreadPoolRunner are preferred. /// /// WorkQueue may be any container that stores `void()`-callable values and -/// supports `push_back`, `pop_back`, `pop_front`, `back()`, `front()`, and -/// `empty()` (e.g. `std::deque>`). In +/// supports `push_back(T)`, `std::optional pop_back()`, and +/// `std::optional pop_front()`, where the pop operations return +/// `std::nullopt` on an empty queue (e.g. detail::SynchronizedDeque). In /// multi-threaded setups the WorkQueue type itself is responsible for /// providing whatever synchronization is needed for concurrent push and /// drain operations. -template class QueueingRunner { +template >> +class QueueingRunner { public: - QueueingRunner(WorkQueue &Pending) : Pending(Pending) {} + using WorkQueue = WorkQueueT; + + QueueingRunner(WorkQueueT &Pending) : Pending(Pending) {} /// Enqueue a wrapper-function call to be run later. void operator()(orc_rt_SessionRef S, uint64_t CallId, @@ -52,30 +93,24 @@ template class QueueingRunner { /// Run all currently-queued calls in last-in-first-out order, returning when /// the queue is empty. Calls enqueued during draining are run too. - static void runLIFOUntilEmpty(WorkQueue &Q) { - while (!Q.empty()) { - auto Call = std::move(Q.back()); - Q.pop_back(); - Call(); - } + static void runLIFOUntilEmpty(WorkQueueT &Q) { + while (auto Call = Q.pop_back()) + (*Call)(); } /// Run all currently-queued calls in first-in-first-out order, returning /// when the queue is empty. Calls enqueued during draining are run too. - static void runFIFOUntilEmpty(WorkQueue &Q) { - while (!Q.empty()) { - auto Call = std::move(Q.front()); - Q.pop_front(); - Call(); - } + static void runFIFOUntilEmpty(WorkQueueT &Q) { + while (auto Call = Q.pop_front()) + (*Call)(); } private: - WorkQueue &Pending; + WorkQueueT &Pending; }; -template -QueueingRunner(WorkQueue &) -> QueueingRunner; +template +QueueingRunner(WorkQueueT &) -> QueueingRunner; } // namespace orc_rt diff --git a/orc-rt/unittests/InProcessControllerAccessTest.cpp b/orc-rt/unittests/InProcessControllerAccessTest.cpp index d4f2ddfea6d4f..88c7ce0e28b72 100644 --- a/orc-rt/unittests/InProcessControllerAccessTest.cpp +++ b/orc-rt/unittests/InProcessControllerAccessTest.cpp @@ -23,8 +23,6 @@ using namespace orc_rt; -using TaskQueue = std::deque>; - namespace { // A minimal stand-in for llvm::orc::InProcessEPC. Registers itself on the @@ -293,7 +291,7 @@ TEST(InProcessControllerAccessTest, CallFromControllerSuccess) { // invocation; draining the queue runs the wrapper, which echoes its // arguments back. Verify the mock receives the echoed bytes via // ReturnWrapperResult. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); std::unique_ptr Mock; @@ -313,7 +311,7 @@ TEST(InProcessControllerAccessTest, CallFromControllerSuccess) { // dispatched. ASSERT_FALSE(Result); - QueueingRunner::runFIFOUntilEmpty(Tasks); + QueueingRunner<>::runFIFOUntilEmpty(Tasks); ASSERT_TRUE(Result); EXPECT_EQ(*Result, "world"); diff --git a/orc-rt/unittests/QueueingRunnerTest.cpp b/orc-rt/unittests/QueueingRunnerTest.cpp index 06e32676f5959..24d7e61ac215c 100644 --- a/orc-rt/unittests/QueueingRunnerTest.cpp +++ b/orc-rt/unittests/QueueingRunnerTest.cpp @@ -12,14 +12,13 @@ #include #include +#include #include using namespace orc_rt; namespace { -using TaskQueue = std::deque>; - // A dummy SessionRef value used purely to thread an opaque pointer through // the runner's enqueue path. inline orc_rt_SessionRef dummySession() noexcept { @@ -52,15 +51,19 @@ class QueueingRunnerTest : public ::testing::Test { void TearDown() override { RecordingLog = nullptr; } std::vector Log; - TaskQueue Q; + QueueingRunner<>::WorkQueue Q; }; TEST_F(QueueingRunnerTest, EnqueueDoesNotRunImmediately) { - QueueingRunner R(Q); + QueueingRunner<> R(Q); R(dummySession(), /*CallId=*/0, dummyReturn(), recordingFn, WrapperFunctionBuffer()); EXPECT_EQ(Log.size(), 0u) << "Enqueue should not run the call"; - EXPECT_EQ(Q.size(), 1u) << "Call should be sitting in the queue"; + // Pop initial call. + EXPECT_TRUE(Q.pop_back()) + << "At least one call should be sitting in the queue"; + EXPECT_FALSE(Q.pop_back()) + << "Exactly one call should have been sitting in the queue"; } TEST_F(QueueingRunnerTest, RunFIFOUntilEmpty) { @@ -68,13 +71,13 @@ TEST_F(QueueingRunnerTest, RunFIFOUntilEmpty) { for (uint64_t I = 0; I < 3; ++I) R(dummySession(), I, dummyReturn(), recordingFn, WrapperFunctionBuffer()); - QueueingRunner::runFIFOUntilEmpty(Q); + QueueingRunner<>::runFIFOUntilEmpty(Q); ASSERT_EQ(Log.size(), 3u); EXPECT_EQ(Log[0].CallId, 0u); EXPECT_EQ(Log[1].CallId, 1u); EXPECT_EQ(Log[2].CallId, 2u); - EXPECT_TRUE(Q.empty()); + EXPECT_FALSE(Q.pop_back()); // Expect queue to be empty. } TEST_F(QueueingRunnerTest, RunLIFOUntilEmpty) { @@ -82,20 +85,20 @@ TEST_F(QueueingRunnerTest, RunLIFOUntilEmpty) { for (uint64_t I = 0; I < 3; ++I) R(dummySession(), I, dummyReturn(), recordingFn, WrapperFunctionBuffer()); - QueueingRunner::runLIFOUntilEmpty(Q); + QueueingRunner<>::runLIFOUntilEmpty(Q); ASSERT_EQ(Log.size(), 3u); EXPECT_EQ(Log[0].CallId, 2u); EXPECT_EQ(Log[1].CallId, 1u); EXPECT_EQ(Log[2].CallId, 0u); - EXPECT_TRUE(Q.empty()); + EXPECT_FALSE(Q.pop_back()); // Expect queue to be empty. } TEST_F(QueueingRunnerTest, DrainOnEmptyQueueIsNoOp) { // Both drain helpers should return immediately on an empty queue rather // than blocking. - QueueingRunner::runFIFOUntilEmpty(Q); - QueueingRunner::runLIFOUntilEmpty(Q); + QueueingRunner<>::runFIFOUntilEmpty(Q); + QueueingRunner<>::runLIFOUntilEmpty(Q); EXPECT_EQ(Log.size(), 0u); } @@ -107,7 +110,7 @@ TEST_F(QueueingRunnerTest, DrainPicksUpCallsEnqueuedDuringDrain) { // First call enqueues a second call from inside its body. We use a custom // wrapper-function (not recordingFn) to do that, since recordingFn doesn't // know about the queue. - static QueueingRunner *PendingR = nullptr; + static QueueingRunner<> *PendingR = nullptr; PendingR = &R; static auto reentrantFn = [](orc_rt_SessionRef S, uint64_t CallId, orc_rt_WrapperFunctionReturn, @@ -122,7 +125,7 @@ TEST_F(QueueingRunnerTest, DrainPicksUpCallsEnqueuedDuringDrain) { R(dummySession(), /*CallId=*/0, dummyReturn(), reentrantFn, WrapperFunctionBuffer()); - QueueingRunner::runFIFOUntilEmpty(Q); + QueueingRunner<>::runFIFOUntilEmpty(Q); ASSERT_EQ(Log.size(), 2u); EXPECT_EQ(Log[0].CallId, 0u); @@ -130,4 +133,42 @@ TEST_F(QueueingRunnerTest, DrainPicksUpCallsEnqueuedDuringDrain) { PendingR = nullptr; } +TEST_F(QueueingRunnerTest, ConcurrentProducerAndDrainer) { + // Verify that QueueingRunner's default WorkQueue (SynchronizedDeque) + // tolerates concurrent push from one thread and drain from another. + // + // A producer thread enqueues NumCalls wrapper-function invocations while + // the main thread spins draining the queue. Once the producer has finished + // enqueueing, the main thread joins it and then performs a final drain to + // pick up any tail of calls enqueued after its last loop iteration. + constexpr uint64_t NumCalls = 1024; + + QueueingRunner<> R(Q); + + std::thread Producer([&]() { + for (uint64_t I = 0; I < NumCalls; ++I) + R(dummySession(), I, dummyReturn(), recordingFn, WrapperFunctionBuffer()); + }); + + // Drain concurrently with the producer. The drainer doesn't know when the + // producer is done, so we just spin until the producer thread has joined + // (after which a final drain will be definitive). + while (Log.size() < NumCalls) { + QueueingRunner<>::runFIFOUntilEmpty(Q); + std::this_thread::yield(); + } + + Producer.join(); + QueueingRunner<>::runFIFOUntilEmpty(Q); // pick up any tail. + + ASSERT_EQ(Log.size(), NumCalls); + // Producer enqueues in order 0..NumCalls; FIFO drain must observe the same + // order. (Concurrent draining doesn't reorder per-producer enqueues for a + // single producer.) + for (uint64_t I = 0; I < NumCalls; ++I) + EXPECT_EQ(Log[I].CallId, I); + + EXPECT_FALSE(Q.pop_back()) << "Queue should be empty after final drain"; +} + } // end anonymous namespace diff --git a/orc-rt/unittests/SessionTest.cpp b/orc-rt/unittests/SessionTest.cpp index ff23967756df5..7455fc4c182ef 100644 --- a/orc-rt/unittests/SessionTest.cpp +++ b/orc-rt/unittests/SessionTest.cpp @@ -28,8 +28,6 @@ using namespace orc_rt; using ::testing::Eq; using ::testing::Optional; -using TaskQueue = std::deque>; - class MockService : public Service { public: enum class Op { Detach, Shutdown }; @@ -269,10 +267,9 @@ class CallViaMockControllerAccess { }; /// Build a PostFn for MockControllerAccess that pushes its work onto the -/// supplied queue. With this, a single -/// QueueingRunner::runFIFOUntilEmpty(Q) call advances both -/// Session-side and controller-side work. -inline MockControllerAccess::PostFn postOnto(TaskQueue &Q) { +/// supplied queue. With this, a single QueueingRunner::runFIFOUntilEmpty(Q) +/// call advances both Session-side and controller-side work. +inline MockControllerAccess::PostFn postOnto(QueueingRunner<>::WorkQueue &Q) { return [&Q](move_only_function Work) { Q.push_back(std::move(Work)); }; } @@ -363,7 +360,7 @@ TEST(SessionTest, ScheduleShutdownFromOnDetachHandler) { TEST(SessionTest, RedundantAsyncShutdown) { // Check that redundant calls to shutdown have their callbacks run. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); // Initiate shutdown here, and wait for the on-shutdown callbacks to start @@ -387,7 +384,7 @@ TEST(SessionTest, ExpectedShutdownSequenceWithNoActiveManagedCodeCalls) { bool SessionShutdownComplete = false; { - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); S.addService( std::make_unique(DetachOpIdx, ShutdownOpIdx, OpIdx)); @@ -403,7 +400,7 @@ TEST(SessionTest, ExpectedShutdownSequenceWithNoActiveManagedCodeCalls) { } TEST(SessionTest, ActiveManagedCallsDelayShutdown) { - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); size_t OpIdx = 0; @@ -646,12 +643,12 @@ TEST(SessionTest, TryCreateServiceFailure) { TEST(ControllerAccessTest, Basics) { // Test that we can set the ControllerAccess implementation and still shut // down as expected. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); S.attach(std::make_shared(S, postOnto(Tasks)), BootstrapInfo(S)); - QueueingRunner::runFIFOUntilEmpty(Tasks); + QueueingRunner<>::runFIFOUntilEmpty(Tasks); } static void add_sps_wrapper(orc_rt_SessionRef S, uint64_t CallId, @@ -666,7 +663,7 @@ static void add_sps_wrapper(orc_rt_SessionRef S, uint64_t CallId, TEST(ControllerAccessTest, ValidCallToController) { // Simulate a call to a controller handler. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); S.attach(std::make_shared(S, postOnto(Tasks)), BootstrapInfo(S)); @@ -676,14 +673,14 @@ TEST(ControllerAccessTest, ValidCallToController) { S.callViaSession(reinterpret_cast(add_sps_wrapper)), [&](Expected R) { Result = cantFail(std::move(R)); }, 41, 1); - QueueingRunner::runFIFOUntilEmpty(Tasks); + QueueingRunner<>::runFIFOUntilEmpty(Tasks); EXPECT_EQ(Result, 42); } TEST(ControllerAccessTest, CallToControllerBeforeAttach) { // Expect calls to the controller prior to attaching to fail. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); Error Err = Error::success(); @@ -700,7 +697,7 @@ TEST(ControllerAccessTest, CallToControllerBeforeAttach) { TEST(ControllerAccessTest, CallToControllerAfterDetach) { // Expect calls to the controller prior to attaching to fail. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); S.attach(std::make_shared(S, postOnto(Tasks)), BootstrapInfo(S)); @@ -721,7 +718,7 @@ TEST(ControllerAccessTest, CallToControllerAfterDetach) { TEST(ControllerAccessTest, CallFromController) { // Simulate a call from the controller. - TaskQueue Tasks; + QueueingRunner<>::WorkQueue Tasks; Session S(mockExecutorProcessInfo(), QueueingRunner(Tasks), noErrors); auto CA = std::make_shared(S, postOnto(Tasks)); S.attach(CA, BootstrapInfo(S)); @@ -731,7 +728,7 @@ TEST(ControllerAccessTest, CallFromController) { CallViaMockControllerAccess(*CA, add_sps_wrapper), [&](Expected R) { Result = cantFail(std::move(R)); }, 41, 1); - QueueingRunner::runFIFOUntilEmpty(Tasks); + QueueingRunner<>::runFIFOUntilEmpty(Tasks); EXPECT_EQ(Result, 42); } From aa4cac6af5ca53aff574acf7c9e2870ff3563b0d Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Mon, 22 Jun 2026 15:27:23 +0300 Subject: [PATCH 005/511] [lldb] Avoid tautological copying of a newly created object (NFC) (#204998) --- lldb/source/Target/Memory.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lldb/source/Target/Memory.cpp b/lldb/source/Target/Memory.cpp index 3684d9410b64a..f70da27ec058b 100644 --- a/lldb/source/Target/Memory.cpp +++ b/lldb/source/Target/Memory.cpp @@ -42,8 +42,7 @@ void MemoryCache::Clear(bool clear_invalid_ranges) { void MemoryCache::AddL1CacheData(lldb::addr_t addr, const void *src, size_t src_len) { - AddL1CacheData( - addr, DataBufferSP(new DataBufferHeap(DataBufferHeap(src, src_len)))); + AddL1CacheData(addr, std::make_shared(src, src_len)); } void MemoryCache::AddL1CacheData(lldb::addr_t addr, From df5ff0b5de2b1df093455941eb6e7183bc36fb7f Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Mon, 22 Jun 2026 13:32:23 +0100 Subject: [PATCH 006/511] [AMDGPU][NFC] Templatise and roundtrip gfx12_asm_vop3_dpp16.s (#203953) This is effectively the changes between the non-template versions of gfx11/12_asm_vop3_dpp16.s applied on top of the templatised gfx11_asm_vop3_dpp16.s. --- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 16377 ++++++++++++---- .../AMDGPU/gfx12_dasm_vop3_dpp16-fake.txt | 5672 ++++++ .../AMDGPU/gfx12_dasm_vop3_dpp16.txt | 8306 -------- 3 files changed, 17929 insertions(+), 12426 deletions(-) create mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16-fake.txt delete mode 100644 llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index df427b39402af..6c606ec1f35d3 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -1,77 +1,711 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,GFX1200,W64 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W32-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=GFX12-ERR,W64-ERR --implicit-check-not=error: %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX12,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s + +// INSTS= +// v_add3_u32_e64_dpp +// v_add_co_u32_e64_dpp +// v_add_lshl_u32_e64_dpp +// v_add_nc_i16_e64_dpp +// v_add_nc_i32_e64_dpp +// v_add_nc_u16_e64_dpp +// v_alignbit_b32_e64_dpp +// v_alignbyte_b32_e64_dpp +// v_and_b16_e64_dpp +// v_and_or_b32_e64_dpp +// v_ashrrev_i16_e64_dpp +// v_bcnt_u32_b32_e64_dpp +// v_bfe_i32_e64_dpp +// v_bfe_u32_e64_dpp +// v_bfi_b32_e64_dpp +// v_bfm_b32_e64_dpp +// v_cndmask_b16_e64_dpp +// v_cubeid_f32_e64_dpp +// v_cubema_f32_e64_dpp +// v_cubesc_f32_e64_dpp +// v_cubetc_f32_e64_dpp +// v_cvt_pk_bf8_f32_e64_dpp +// v_cvt_pk_fp8_f32_e64_dpp +// v_cvt_pk_i16_f32_e64_dpp +// v_cvt_pk_i16_i32_e64_dpp +// v_cvt_pk_norm_i16_f16_e64_dpp +// v_cvt_pk_norm_i16_f32_e64_dpp +// v_cvt_pk_norm_u16_f16_e64_dpp +// v_cvt_pk_norm_u16_f32_e64_dpp +// v_cvt_pk_u16_f32_e64_dpp +// v_cvt_pk_u16_u32_e64_dpp +// v_cvt_pk_u8_f32_e64_dpp +// v_cvt_sr_bf8_f32_e64_dpp +// v_cvt_sr_fp8_f32_e64_dpp +// v_div_fixup_f16_e64_dpp +// v_dot2_bf16_bf16_e64_dpp +// v_dot2_f16_f16_e64_dpp +// v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_mask:0xf bank_mask:0xf +// v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] +// v_fma_f16_e64_dpp +// v_fma_f32_e64_dpp +// v_ldexp_f32_e64_dpp +// v_lerp_u8_e64_dpp +// v_lshl_add_u32_e64_dpp +// v_lshl_or_b32_e64_dpp +// v_lshlrev_b16_e64_dpp +// v_lshrrev_b16_e64_dpp +// v_mad_i16_e64_dpp +// v_mad_i32_i16_e64_dpp +// v_mad_i32_i24_e64_dpp +// v_mad_u16_e64_dpp +// v_mad_u32_u16_e64_dpp +// v_mad_u32_u24_e64_dpp +// v_max3_num_f16_e64_dpp +// v_max3_num_f32_e64_dpp +// v_max3_i16_e64_dpp +// v_max3_i32_e64_dpp +// v_max3_u16_e64_dpp +// v_max3_u32_e64_dpp +// v_max_i16_e64_dpp +// v_max_u16_e64_dpp +// v_maxmin_num_f16_e64_dpp +// v_maxmin_num_f32_e64_dpp +// v_maxmin_i32_e64_dpp +// v_maxmin_u32_e64_dpp +// v_maximum_f16 +// v_maximum_f32 +// v_maximum3_f16 +// v_maximum3_f32 +// v_maximumminimum_f16 +// v_maximumminimum_f32 +// v_mbcnt_hi_u32_b32_e64_dpp +// v_mbcnt_lo_u32_b32_e64_dpp +// v_med3_num_f16_e64_dpp +// v_med3_num_f32_e64_dpp +// v_med3_i16_e64_dpp +// v_med3_i32_e64_dpp +// v_med3_u16_e64_dpp +// v_med3_u32_e64_dpp +// v_min3_num_f16_e64_dpp +// v_min3_num_f32_e64_dpp +// v_min3_i16_e64_dpp +// v_min3_i32_e64_dpp +// v_min3_u16_e64_dpp +// v_min3_u32_e64_dpp +// v_min_i16_e64_dpp +// v_min_u16_e64_dpp +// v_minmax_num_f16_e64_dpp +// v_minmax_num_f32_e64_dpp +// v_minmax_i32_e64_dpp +// v_minmax_u32_e64_dpp +// v_minimum_f16 +// v_minimum_f32 +// v_minimum3_f16 +// v_minimum3_f32 +// v_minimummaximum_f16 +// v_minimummaximum_f32 +// v_msad_u8_e64_dpp +// v_mul_lo_u16_e64_dpp +// v_mullit_f32_e64_dpp +// v_or3_b32_e64_dpp +// v_or_b16_e64_dpp +// v_pack_b32_f16_e64_dpp +// v_perm_b32_e64_dpp +// v_sad_hi_u8_e64_dpp +// v_sad_u16_e64_dpp +// v_sad_u32_e64_dpp +// v_sad_u8_e64_dpp +// v_sub_co_u32_e64_dpp +// v_sub_nc_i16_e64_dpp +// v_sub_nc_i32_e64_dpp +// v_sub_nc_u16_e64_dpp +// v_subrev_co_u32_e64_dpp +// v_xad_u32_e64_dpp +// v_xor3_b32_e64_dpp +// v_xor_b16_e64_dpp +// +// = +// s6 +// s105 +// s[12:13] +// s[104:105] +// vcc_lo +// vcc_hi +// vcc +// ttmp15 +// ttmp[14:15] +// null +// +// = +// -1 +// 0.5 +// exec_hi +// exec_lo +// m0 +// null +// s105 +// s3 +// src_scc +// ttmp15 +// v255.h +// v255.l +// v3.h +// v3.l +// vcc_hi +// vcc_lo +// +// = +// -v7.l +// |v7.l| +// -|v7.l| +// +// = +// +// -|0.5| +// +// = +// -1 +// 0.5 +// exec_hi +// exec_lo +// m0 +// null +// s105 +// s3 +// src_scc +// ttmp15 +// v255 +// v3 +// vcc_hi +// vcc_lo +// +// = +// -v7 +// |v7| +// -|v7| +// +// = +// +// -|0.5| +// +// = +// bound_ctrl:0 +// bound_ctrl:1 +// +// = +// quad_perm:[0,1,2,3] +// quad_perm:[3,2,1,0] +// row_half_mirror +// row_mirror +// row_ror:1 +// row_ror:15 +// row_share:0 +// row_share:15 +// row_shl:1 +// row_shl:15 +// row_shr:1 +// row_shr:15 +// row_xmask:0 +// row_xmask:15 +// +// = +// fi:0 +// fi:1 +// +// = +// div:2 +// mul:2 +// mul:4 +// +// = +// row_mask:0x0 bank_mask:0x1 +// row_mask:0x3 bank_mask:0x0 +// row_mask:0xf bank_mask:0xf +// +// = +// byte_sel:0 +// byte_sel:1 +// byte_sel:2 +// byte_sel:3 +// +// = +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// +// = +// +// v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, , v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, , v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// +// = +// +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, , v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, , v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] +// v5.l, , v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, , s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// +// = +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// v255.l, v255, v255 quad_perm:[0,1,2,3] +// v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// +// = +// v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// v255.l, v255, v255, v255.l quad_perm:[0,1,2,3] +// v0.l, , v2, v3.l quad_perm:[0,1,2,3] +// v0.l, v1, , v3.l quad_perm:[0,1,2,3] +// v0.l, v1, -|s3|, v3.l quad_perm:[0,1,2,3] +// v0.l, v1, v2, quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3.l +// v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// +// = +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v255, v255.l, v255.l quad_perm:[3,2,1,0] +// v5, , v2.l quad_perm:[3,2,1,0] +// v5, v1.l, quad_perm:[3,2,1,0] +// v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2, v3 quad_perm:[3,2,1,0] +// v5, v1, , v3 quad_perm:[3,2,1,0] +// v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, v1, , v3 quad_perm:[3,2,1,0] +// v5, v1, 10, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// +// = +// +// v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2, v3 quad_perm:[3,2,1,0] +// v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// +// v5, v1, v2 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v255, null, v255, v255 quad_perm:[3,2,1,0] +// v5, , v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, s2 quad_perm:[3,2,1,0] +// v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_add3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] // GFX12: v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror // GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_add_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_add_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror -// W32: v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 @@ -90,154 +724,212 @@ v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +v_add_lshl_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_lshl_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_lshl_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] // GFX12: v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror // GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] @@ -250,11 +942,11 @@ v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -268,32 +960,62 @@ v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] // GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror // GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 // GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -307,45 +1029,51 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 // GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] @@ -358,11 +1086,11 @@ v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -376,3967 +1104,11194 @@ v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_alignbit_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbit_b32_e64_dpp v5, v1, v2, v255.h quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v255.h row_mirror -// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.h quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:0 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:15 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h row_mirror -// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_and_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_and_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:0 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:15 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_and_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_and_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror -// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror -// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_bcnt_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:44: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:44: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_hi row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xae,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:44: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:44: error: invalid operand for instruction -// W64: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bfe_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bfe_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bfe_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bfe_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bfe_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bfe_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bfe_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bfe_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bfe_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bfe_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_bfe_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_bfe_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_bfe_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0xff,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_bfe_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_sr_bf8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:0 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_sr_fp8_f32 v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] -// GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_bfm_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] +// GFX12: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, -v7.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, -v7.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x20,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v7.l|, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v7.l|, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:43: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -|v7.l|, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, -|v7.l|, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0x0e,0x20,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, -v7.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, -v7.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x0e,0x0e,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, |v7.l|, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, |v7.l|, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x0e,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:43: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, -|v7.l|, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, -|v7.l|, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x0e,0x0e,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[12:13] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[12:13] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x32,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0c,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x6a,0xd7,0xfa,0x04,0x02,0x20,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:0 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:1 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x04,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x01] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x30] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x69,0xd7,0xfa,0xfe,0x03,0x00,0xff,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x69,0xd7,0xfa,0x04,0x02,0x20,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:0 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:1 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x04,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x01] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x30] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x06,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x12,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x21,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x13,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x22,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x07,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x26,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x26,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x6c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX12: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x54,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v255.l, v255, v255, v255.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v255.l, v255, v255, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x67,0xd6,0xfa,0xfe,0xff,0x07,0xff,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, -v7, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, -v7, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x24,0x07,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, |v7|, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, |v7|, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x01,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x07,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, -|v7|, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, -|v7|, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x01,0x67,0xd6,0xfa,0x04,0x0e,0x24,0x07,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, -v7, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, -v7, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, |v7|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, |v7|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x67,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, -|v7|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, -|v7|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x67,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, -|s3|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, -|s3|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x67,0xd6,0xfa,0x06,0x0c,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, -1 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, -1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x06,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xc2,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xfe,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xfa,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, m0 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, m0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xf6,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, null quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, null quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xf2,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, s105 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, s105 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xa6,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, s3 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xf6,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xee,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v255.h quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v255.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.h quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xae,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0xaa,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, -|0.5| quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, -|0.5| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x04,0x67,0xd6,0xfa,0x04,0xc2,0x83,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] bound_ctrl:0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] bound_ctrl:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x08,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_half_mirror +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_mirror +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_ror:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_ror:15 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_share:0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_share:15 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shl:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shl:15 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shr:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shr:15 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_xmask:0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_xmask:15 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] fi:0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] fi:1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0xff] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x01] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x30] + +v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v255.l, v255, v255, v255.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v255.l, v255, v255, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x66,0xd6,0xfa,0xfe,0xff,0x07,0xff,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, -v7, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, -v7, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x24,0x07,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, |v7|, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, |v7|, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x01,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x07,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, -|v7|, v2, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, -|v7|, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x01,0x66,0xd6,0xfa,0x04,0x0e,0x24,0x07,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, -v7, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, -v7, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, |v7|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, |v7|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x66,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, -|v7|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, -|v7|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x66,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, -|s3|, v3.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, -|s3|, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x66,0xd6,0xfa,0x06,0x0c,0x44,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, -1 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, -1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x06,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xfe,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xfa,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, m0 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, m0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xf6,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, null quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, null quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xf2,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, s105 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, s105 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xa6,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, s3 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xf6,0x03,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xee,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v255.h quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v255.l quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.h quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xae,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0xaa,0x01,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, -|0.5| quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, -|0.5| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x04,0x66,0xd6,0xfa,0x04,0xc2,0x83,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x20,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] bound_ctrl:0 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] bound_ctrl:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x08,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_half_mirror +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_mirror +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_ror:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_ror:15 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_share:0 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_share:15 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shl:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shl:15 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shr:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shr:15 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_xmask:0 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_xmask:15 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] fi:0 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] fi:1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0xff] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x01] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x30] + +v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0xe4,0x00,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x41,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x40,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x21,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x2f,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x50,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x5f,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x01,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x0f,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x11,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x1f,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x60,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x6f,0x01,0xff] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0x0 bank_mask:0x1 +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x00,0x01] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0x3 bank_mask:0x0 +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x00,0x30] + +v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf +// GFX12: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x48,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x13,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x1c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x53,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x5a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x41,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x59,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2c,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2c,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_max_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x69,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maximum_f16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x68,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_mirror +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_share:0 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_share:15 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x66,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_maximum_f32 v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f32 v5, v1, v2 row_half_mirror +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_mirror +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:15 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:0 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:15 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:15 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:15 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_xmask:0 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_xmask:15 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_maximum_f32 v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximum3_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_half_mirror +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_ror:1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_ror:15 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_share:0 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_share:15 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shl:1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shl:15 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shr:1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shr:15 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_xmask:0 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_xmask:15 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximum3_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_half_mirror +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_mirror +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_ror:1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_ror:15 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_share:0 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_share:15 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shl:1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shl:15 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shr:1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shr:15 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_xmask:0 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_xmask:15 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x32,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_med3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_med3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_med3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_med3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] +v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_med3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2b,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x29,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_max3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_min_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_max_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_max_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x68,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_maxmin_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maxmin_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimum_f16 v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minimum_f16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x67,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimum_f16 v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_mirror +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f16 v5.l, v1.l, v2.l row_share:0 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_share:15 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum_f32 v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x65,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f32 v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_half_mirror +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_minimum_f32 v5, v1, v2 row_mirror +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_ror:1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_ror:15 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_share:0 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_share:15 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shl:1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shl:15 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shr:1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum_f32 v5, v1, v2 row_shr:15 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum_f32 v5, v1, v2 row_xmask:0 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f32 v5, v1, v2 row_xmask:15 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum_f32 v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum_f32 v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f32 v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f32 v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_min3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, v2, v3 row_half_mirror +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_mirror +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_ror:1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_ror:15 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_share:0 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_share:15 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shl:1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shl:15 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shr:1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shr:15 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_xmask:0 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_xmask:15 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[0,1,2,3] -// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h row_mirror -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_min_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_min_i16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6c,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|m0| row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|exec_hi| row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minmax_num_f16_e64_dpp v5.l, |v1.l|, -v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xf2,0x41,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -v1.l, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_minmax_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_half_mirror +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, v3 row_mirror +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, v3 row_ror:1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_minmax_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minmax_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, v2, v3 row_ror:15 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_share:0 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_share:15 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shl:1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shl:15 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shr:1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shr:15 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_xmask:0 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_xmask:15 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 // GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror // GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_msad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] @@ -4349,11 +12304,11 @@ v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -4367,1945 +12322,2127 @@ v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_mul_lo_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mul_lo_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_mul_lo_u16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mul_lo_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x18,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] // GFX12: v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror // GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] // GFX12: v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_or_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_or_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_or_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_or_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_pack_b32_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x11,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x23,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u16_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x24,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u16_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u16_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sad_u16_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u16_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sad_u16_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u16_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u16_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x25,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] -// GFX12: v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.h, v1.h, v2.h row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_xor_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.h, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x22,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.h row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v255.h, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_xor_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sub_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX12: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX12: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:39: error: invalid op_sel operand +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1200: v_dot2_f16_f16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:41: error: invalid op_sel operand +v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_f16_f16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:30: error: invalid operand for instruction +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0.l, v1, s2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:45: error: op_sel operand conflicts with 16-bit operand suffix +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1200: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1200: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h quad_perm:[0,1,2,3] -// GFX1200: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX1200: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0.l, s1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12-ERR: :[[@LINE-1]]:32: error: invalid operand for instruction +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l op_sel:[1,1,0,0] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:45: error: invalid op_sel operand +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1200: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, 0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd6,0xfa,0x04,0x02,0x02,0x01,0x1b,0x00,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h row_mirror -// GFX1200: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX1200: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff] -// GFX1200: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[2,2,3,1] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x7a,0x04,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe] -// GFX1200: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 neg_lo:[1,1,0] neg_hi:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xe ; encoding: [0x00,0x45,0x1a,0xcc,0xfa,0x04,0x0e,0x7c,0x01,0x1b,0x00,0xfe] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff] -// GFX1200: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00] -// GFX1200: v_dot2_f32_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[1,2,3,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0x00,0x40,0x1a,0xcc,0xfa,0x04,0x0e,0x1c,0x01,0x39,0x00,0x00] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_minimum_f32 v5, v1, v2 row_mirror -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f32 v5, v1, v2 row_half_mirror -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_minimum_f32 v5, v1, v2 row_shl:1 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_minimum_f32 v5, v1, v2 row_shl:15 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shr:1 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shr:15 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_ror:1 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_ror:15 -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minimum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minimum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minimum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_maximum_f32 v5, v1, v2 row_mirror -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_maximum_f32 v5, v1, v2 row_half_mirror -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_shl:1 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_maximum_f32 v5, v1, v2 row_shl:15 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_maximum_f32 v5, v1, v2 row_shr:1 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_maximum_f32 v5, v1, v2 row_shr:15 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_ror:1 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_ror:15 -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_maximum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_mirror -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_minimum_f16 v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_minimum_f16 v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_minimum_f16 v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_minimum_f16 v5.h, v1.h, v2.l quad_perm:[3,2,1,0] -// GFX12: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f16 v5.h, v1.h, v2.l row_ror:15 -// GFX12: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_minimum_f16 v255.h, -|v255.h|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_mirror -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX12: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_half_mirror -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_shl:1 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_subrev_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_subrev_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_shl:15 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_shr:1 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_shr:15 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_ror:1 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_ror:15 -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_subrev_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX12: v_subrev_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.h, v1.h, v2.l quad_perm:[3,2,1,0] -// GFX12: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.h, v1.h, v2.l row_ror:15 -// GFX12: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v255.h, -|v255.h|, -|v255.h| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v3 row_mirror -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v255 row_half_mirror -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction + +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, s105 row_shl:1 -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, v2, v3 row_mirror -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, v2, v255 row_half_mirror -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, v2, s105 row_shl:1 -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_minimum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_minimum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_minimum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x2f,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_minimum3_f16 v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_maximum3_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_maximum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_maximum3_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_maximum3_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_maximum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_maximum3_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_maximum3_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_maximum3_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_maximum3_f16 v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_maximum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -v1.h, |v2.l|, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -|v1.l|, -|v2.h|, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x30,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, v3 row_mirror -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, v255 row_half_mirror -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, s105 row_shl:1 -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 row_mirror -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, v255 row_half_mirror -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, s105 row_shl:1 -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX12: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX12: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_maximumminimum_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_maximumminimum_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_maximumminimum_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_maximumminimum_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_maximumminimum_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_maximumminimum_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximumminimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_maximumminimum_f16 v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_maximumminimum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x6f,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_xor_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX12: v_xor_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_maximumminimum_f16 v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_maximumminimum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX12: v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minimummaximum_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minimummaximum_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minimummaximum_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_minimummaximum_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_minimummaximum_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimummaximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_minimummaximum_f16 v5.h, v1.h, v2.h, v3.h quad_perm:[3,2,1,0] -// GFX12: v_minimummaximum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[0,1,2,3] -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_minimummaximum_f16 v5.l, -v1.h, |v2.l|, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_minimummaximum_f16 v5.l, -|v1.l|, -|v2.h|, 0.5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX12: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x13,0x6e,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x09,0x13] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_minimummaximum_f16 v255.h, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX12: v_minimummaximum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX12: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16-fake.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16-fake.txt new file mode 100644 index 0000000000000..8a47f7b674252 --- /dev/null +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16-fake.txt @@ -0,0 +1,5672 @@ +# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,GFX1200,GFX1200-W32 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,GFX1200,GFX1200-W64 %s +# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX1250,W32,GFX1250-W32 %s + +0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, 15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# W32: v_add_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# W32: v_add_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 +# W32: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] +# W64: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] + +0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 +# GFX12: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] + +0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01 +# W32: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] +# W64: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] + +0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13 +# W32: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] +# W64: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] + +0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30 +# GFX12: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 +# GFX12: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x22] + +0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 +# GFX12: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x22] + +0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] + +0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] + +0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] + +0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] + +0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff +# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 +# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] + +0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 +# GFX12: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, 15, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, -2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + + +0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# GFX12: v_maxmin_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 +# GFX12: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + + +0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# GFX12: v_minmax_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 +# GFX12: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 +# GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] + +0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# W32: v_sub_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# W32: v_sub_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# W32: v_subrev_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +# W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +# W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# W32: v_subrev_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +# W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# GFX12: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 +# GFX12: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# GFX12: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] + +0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] + +0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] + +0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 +# GFX12: v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# GFX12: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 +# GFX12: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# GFX12: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 +# GFX12: v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] + +0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 +# GFX12: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 +# GFX12: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] + +0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 +# GFX12: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 +# GFX12: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] + +0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] + +0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] + +0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] + +0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +# op_sel[1:0] are ignored +0x00,0x78,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX1200: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# GFX1250: v_add_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX1200: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff +# GFX1200: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] +# GFX1250: v_add_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] + +0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX1200: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] + +0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +# op_sel[1:0] are ignored +0x00,0x78,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] + +0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX1200: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# GFX1250: v_add_f32_dpp v255, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] + +0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff +# GFX1200: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] +# GFX1250: v_add_f32_dpp v255, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] + +0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX1200: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] + +0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 +# GFX12: v_minimum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_minimum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 +# GFX12: v_maximum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_maximum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 +# GFX12: v_minimum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_minimum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30 +# GFX12: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] + +0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 +# GFX12: v_maximum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 +# GFX12: v_maximum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] + +0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30 +# GFX12: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] + +0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_maximum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_maximum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30 +# GFX12: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_minimum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 +# GFX12: v_minimum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] + +0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30 +# GFX12: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] + +0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 +# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_maximumminimum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# GFX12: v_maximumminimum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30 +# GFX12: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] + +0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 +# GFX12: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] + +0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] + +0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] + +0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 +# GFX12: v_minimummaximum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] + +0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] + +0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] + +0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] + +0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] + +0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 +# GFX12: v_minimummaximum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] + +0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30 +# GFX12: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +# GFX1200-W32: {{.*}} +# GFX1200-W64: {{.*}} +# GFX1250-W32: {{.*}} diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt deleted file mode 100644 index 9d0137558c3d4..0000000000000 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ /dev/null @@ -1,8306 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 5 -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-REAL16,GFX1200,GFX1200-W32,GFX1200-W32-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W32,W32-FAKE16,GFX1200,GFX1200-W32,GFX1200-W32-FAKE16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-REAL16,GFX1200,GFX1200-W64,GFX1200-W64-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,W64,W64-FAKE16,GFX1200,GFX1200-W64,GFX1200-W64-FAKE16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX1250,W32,GFX1250-W32,W32-REAL16,GFX1250-W32-REAL16 %s -# RUN: llvm-mc -triple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 -disassemble -show-encoding < %s | FileCheck -check-prefixes=GFX12,GFX1250,W32,GFX1250-W32,W32-FAKE16,GFX1250-W32-FAKE16 %s - -0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, 15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x55,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32: v_add_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32: v_add_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x00,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x47,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x26,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_alignbit_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_alignbit_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_and_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_and_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_and_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_and_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_and_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x57,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_ashrrev_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_ashrrev_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s104 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, v1, v2, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x60,0x01,0x13] - -0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cndmask_b16_e64_dpp v255, v255, v255, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x6f,0x0d,0x30] - -0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, |v1.h|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x09,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] - -0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp14 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.h|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x12,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x01,0x13] - -0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cndmask_b16_e64_dpp v255.h, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x43,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x20 -# W32-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x22] -# W32-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x22] -# W64-FAKE16: v_cvt_pk_bf8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x22] -# W64-REAL16: v_cvt_pk_bf8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x6a,0xd7,0x02,0x07,0x02,0x22] - -0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] - -0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] - -0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# W32-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W32-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-FAKE16: v_cvt_pk_bf8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-REAL16: v_cvt_pk_bf8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x6a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x20 -# W32-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x22] -# W32-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x22] -# W64-FAKE16: v_cvt_pk_fp8_f32 v1, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x22] -# W64-REAL16: v_cvt_pk_fp8_f32 v1.l, -v2, |v3| ; encoding: [0x01,0x02,0x69,0xd7,0x02,0x07,0x02,0x22] - -0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v6, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] - -0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] - -0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# W32-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W32-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-FAKE16: v_cvt_pk_fp8_f32_e64_dpp v1, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] -# W64-REAL16: v_cvt_pk_fp8_f32_e64_dpp v1.h, -v2, |v3| op_sel:[0,0,1] quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x42,0x69,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] - -0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] - -0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6c,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_bf8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6c,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] - -0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] - -0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5 -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] - -0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x6b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] - -0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff -# GFX12: v_cvt_sr_fp8_f32_e64_dpp v1, v2, v3 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x6b,0xd7,0xfa,0x06,0x02,0x00,0x02,0x1b,0x00,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x06,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x06,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x06,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x06,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x24,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x07,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x07,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x07,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x23,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01 -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13 -# GFX12: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x01,0x13] - -0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30 -# GFX12: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x81,0x1c,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x46,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x56,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_lshlrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_lshlrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_lshrrev_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, 15, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - - -0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6b,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6b,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_maxmin_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_maxmin_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min_i16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min_i16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - - -0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6a,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W32-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] -# W64-REAL16: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x6a,0xd6,0xfa,0x04,0xc2,0x73,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_minmax_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_minmax_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mul_lo_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mul_lo_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13 -# GFX12: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x01,0x13] - -0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x58,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_or_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_or_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_or_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x63,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_or_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_or_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_or_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x63,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x44,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32: v_sub_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32: v_sub_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x01,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# GFX12: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x80,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s12, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32: v_subrev_co_u32_e64_dpp v5, s104, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32: v_subrev_co_u32_e64_dpp v5, ttmp14, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x7a,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# GFX12: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xfc,0x02,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x45,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# GFX12: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# GFX12: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_xor_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_xor_b16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_xor_b16_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_xor_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_xor_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_xor_b16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_add_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_add_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x54,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W32-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-REAL16: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x54,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x48,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x48,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x48,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x48,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x48,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x48,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_fma_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_fma_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x48,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W32-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_fma_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-REAL16: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x48,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x48,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x53,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x53,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x53,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x5a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x5a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x41,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x41,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x41,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x41,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] - -0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] - -0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v5, v1, v2, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x59,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] - -0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x59,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2c,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2c,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2c,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2c,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2c,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2c,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_max3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_max3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x32,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x32,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x32,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x32,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x32,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x32,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x32,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x32,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x32,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x50,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x50,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x50,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x50,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x51,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_med3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x51,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_med3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x51,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, |v1|, v2, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -m0 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2b,0xd6,0xfa,0x04,0xf6,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, -v1, v2, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x2b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x2b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, v2, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x2b,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, -|v2|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x2b,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] - -0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x2b,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2b,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_num_f16_e64_dpp v5, -|v1|, -|v2|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.h|, 0.5 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x13,0x2b,0xd6,0xfa,0x04,0xc2,0x63,0x01,0x60,0x01,0x13] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_i16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_i16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x21,0x01,0xff] - -0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] - -0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] - -0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, null op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x4b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W32-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_min3_u16_e64_dpp v5, v1, v2, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] -# W64-REAL16: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, -1 op_sel:[0,1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x4b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] - -0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_min3_u16_e64_dpp v255, v255, v255, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x4b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W32-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v5, -v1, |v2| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] -# W64-REAL16: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] - -0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_i16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] - -0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W32-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v5, v1, v2 op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] -# W64-REAL16: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] - -0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_sub_nc_u16_e64_dpp v255, v255, v255 op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] - -0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -# op_sel[1:0] are ignored -0x00,0x78,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x66,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] - -0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# GFX1250: v_add_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# GFX1250: v_add_f32_dpp v255, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x66,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] - -0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX1200-W32-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1200-W32-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_dot2_f16_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_dot2_f16_f16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x66,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.l, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -# op_sel[1:0] are ignored -0x00,0x78,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, v1, v2, v3.h op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] - -0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v0, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v0.h, |v1|, -v2, -|s3| op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 ; encoding: [0x00,0x65,0x67,0xd6,0xfa,0x04,0x0e,0xc0,0x01,0xe4,0x04,0x00] - -0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# GFX1250: v_add_f32_dpp v255, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# GFX1250: v_add_f32_dpp v255, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v5, v1, v2, v255 op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v5.l, v1, v2, v255.h op_sel:[0,0,1,0] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x67,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x40,0x01,0xff] - -0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# GFX1200-W32-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1200-W32-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# GFX1250: v_lshlrev_b32_e32 v6, v255, v183 ; encoding: [0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_dot2_bf16_bf16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_dot2_bf16_bf16_e64_dpp v255.h, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x47,0x67,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x65,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 -# GFX12: v_minimum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x65,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_minimum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x65,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# GFX12: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x66,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 -# GFX12: v_maximum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x66,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# GFX12: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# GFX12: v_maximum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x66,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_minimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_minimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_minimum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x67,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minimum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minimum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minimum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x67,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_minimum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_minimum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_minimum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_minimum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_minimum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x67,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] - -0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_maximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_maximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] - -0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_maximum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] - -0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] - -0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maximum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maximum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maximum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] - -0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] - -0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_maximum_f16_e64_dpp v5, v1, v2 op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_maximum_f16_e64_dpp v5.h, v1.h, v2.l op_sel:[1,0,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x48,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] - -0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_maximum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_maximum_f16_e64_dpp v255, -|v255|, -|v255| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_maximum_f16_e64_dpp v255.h, -|v255.h|, -|v255.h| op_sel:[1,1,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x5b,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] - -0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 -# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 -# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_maximum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_maximum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maximum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maximum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maximum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maximum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x30,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_maximum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_maximum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_maximum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] - -0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_minimum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_minimum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minimum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minimum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minimum3_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minimum3_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x2f,0xd6,0xfa,0x04,0x06,0x23,0x01,0x5f,0x01,0x01] - -0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_minimum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_minimum3_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_minimum3_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] - -0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30 -# GFX12: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01 -# GFX12: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13 -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# GFX12: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6f,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] - -0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_maximumminimum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_maximumminimum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_maximumminimum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] - -0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30 -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] - -0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] - -0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] - -0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, -|v1|, v2, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] - -0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13 -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] - -0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, s3, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] - -0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] - -0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] - -0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.h, v1.h, v2.h, v3.h op_sel:[1,1,1,1] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] - -0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01 -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v5, -v1, |v2|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v5.l, -v1.h, |v2.l|, -1 op_sel:[1,0,0,0] mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x0a,0x6e,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] - -0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30 -# W32-FAKE16: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W32-REAL16: v_minimummaximum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W64-FAKE16: v_minimummaximum_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] -# W64-REAL16: v_minimummaximum_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xc7,0x6e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] - -## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# GFX1200: {{.*}} -# GFX1200-W32: {{.*}} -# GFX1200-W64: {{.*}} -# GFX1200-W64-FAKE16: {{.*}} -# GFX1200-W64-REAL16: {{.*}} -# GFX1250-W32: {{.*}} -# GFX1250-W32-FAKE16: {{.*}} -# GFX1250-W32-REAL16: {{.*}} From 2f790b8231dfe5c18a635f48a07f86f6c8738285 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Mon, 22 Jun 2026 13:32:34 +0100 Subject: [PATCH 007/511] [AMDGPU][NFC] Templatise and roundtrip gfx13_asm_vop3_dpp16.s (#204849) Again, this is based on the templatised version of gfx12_asm_vop3_dpp16.s with the GFX13-specific changes re-applied on top of it. gfx13_dasm_vop3_dpp16.txt was never upstreamed, so no changes for the disassembler side. --- llvm/test/MC/AMDGPU/gfx13_asm_vop3_dpp16.s | 17013 +++++++++++++++---- 1 file changed, 13347 insertions(+), 3666 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx13_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx13_asm_vop3_dpp16.s index ae3200fb6e400..a4de0afa70f74 100644 --- a/llvm/test/MC/AMDGPU/gfx13_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx13_asm_vop3_dpp16.s @@ -1,67 +1,753 @@ -// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --version 6 +// NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX13,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX13,W32 %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX13,W64,W64-ASM %s -// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding | FileCheck --check-prefixes=GFX13,W64,W64-DIS %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | FileCheck --check-prefixes=GFX13,W64 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX13,W32 %s +// RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding | FileCheck --strict-whitespace --check-prefixes=GFX13,W64 %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize32,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W32-ERR --implicit-check-not=error: %s // RUN: not llvm-mc -triple=amdgcn -mcpu=gfx1310 -mattr=+wavefrontsize64,+real-true16 -filetype=null %s 2>&1 | FileCheck --check-prefixes=W64-ERR --implicit-check-not=error: %s +// INSTS= +// v_add3_u32_e64_dpp +// v_add_co_u32_e64_dpp +// v_add_lshl_u32_e64_dpp +// v_add_nc_i16_e64_dpp +// v_add_nc_i32_e64_dpp +// v_add_nc_u16_e64_dpp +// v_alignbit_b32_e64_dpp +// v_alignbyte_b32_e64_dpp +// v_and_b16_e64_dpp +// v_and_or_b32_e64_dpp +// v_ashr_pk_i8_i32 +// v_ashrrev_i16_e64_dpp +// v_bcnt_u32_b32_e64_dpp +// v_bfe_i32_e64_dpp +// v_bfe_u32_e64_dpp +// v_bfi_b32_e64_dpp +// v_bfm_b32_e64_dpp +// v_bitop3_b16_e64_dpp +// v_bitop3_b32_e64_dpp +// v_cndmask_b16_e64_dpp +// v_cubeid_f32_e64_dpp +// v_cubema_f32_e64_dpp +// v_cubesc_f32_e64_dpp +// v_cubetc_f32_e64_dpp +// v_cvt_pk_bf16_f32_e64_dpp +// v_cvt_pk_bf8_f32_e64_dpp +// v_cvt_pk_f16_f32_e64_dpp +// v_cvt_pk_fp8_f32_e64_dpp +// v_cvt_pk_i16_f32_e64_dpp +// v_cvt_pk_i16_i32_e64_dpp +// v_cvt_pk_norm_i16_f16_e64_dpp +// v_cvt_pk_norm_i16_f32_e64_dpp +// v_cvt_pk_norm_u16_f16_e64_dpp +// v_cvt_pk_norm_u16_f32_e64_dpp +// v_cvt_pk_u16_f32_e64_dpp +// v_cvt_pk_u16_u32_e64_dpp +// v_cvt_pk_u8_f32_e64_dpp +// v_cvt_sr_bf8_f16_e64_dpp +// v_cvt_sr_fp8_f16_e64_dpp +// v_cvt_sr_bf8_f32_e64_dpp +// v_cvt_sr_fp8_f32_e64_dpp +// v_cvt_sr_pk_bf16_f32_e64_dpp +// v_cvt_sr_pk_f16_f32_e64_dpp +// v_div_fixup_f16_e64_dpp +// v_fma_f16_e64_dpp +// v_fma_f32_e64_dpp +// v_ldexp_f32_e64_dpp +// v_lerp_u8_e64_dpp +// v_lshl_add_u32_e64_dpp +// v_lshl_or_b32_e64_dpp +// v_lshlrev_b16_e64_dpp +// v_lshrrev_b16_e64_dpp +// v_mad_i16_e64_dpp +// v_mad_i32_i16_e64_dpp +// v_mad_i32_i24_e64_dpp +// v_mad_u16_e64_dpp +// v_mad_u32_u16_e64_dpp +// v_mad_u32_u24_e64_dpp +// v_max3_num_f16_e64_dpp +// v_max3_num_f32_e64_dpp +// v_max3_i16_e64_dpp +// v_max3_i32_e64_dpp +// v_max3_u16_e64_dpp +// v_max3_u32_e64_dpp +// v_max_i16_e64_dpp +// v_max_u16_e64_dpp +// v_maxmin_num_f16_e64_dpp +// v_maxmin_num_f32_e64_dpp +// v_maxmin_i32_e64_dpp +// v_maxmin_u32_e64_dpp +// v_maximum_f16 +// v_maximum_f32 +// v_maximum3_f16 +// v_maximum3_f32 +// v_maximumminimum_f16 +// v_maximumminimum_f32 +// v_mbcnt_hi_u32_b32_e64_dpp +// v_mbcnt_lo_u32_b32_e64_dpp +// v_med3_num_f16_e64_dpp +// v_med3_num_f32_e64_dpp +// v_med3_i16_e64_dpp +// v_med3_i32_e64_dpp +// v_med3_u16_e64_dpp +// v_med3_u32_e64_dpp +// v_min3_num_f16_e64_dpp +// v_min3_num_f32_e64_dpp +// v_min3_i16_e64_dpp +// v_min3_i32_e64_dpp +// v_min3_u16_e64_dpp +// v_min3_u32_e64_dpp +// v_min_i16_e64_dpp +// v_min_u16_e64_dpp +// v_minmax_num_f16_e64_dpp +// v_minmax_num_f32_e64_dpp +// v_minmax_i32_e64_dpp +// v_minmax_u32_e64_dpp +// v_minimum_f16 +// v_minimum_f32 +// v_minimum3_f16 +// v_minimum3_f32 +// v_minimummaximum_f16 +// v_minimummaximum_f32 +// v_msad_u8_e64_dpp +// v_mul_lo_u16_e64_dpp +// v_mullit_f32_e64_dpp +// v_or3_b32_e64_dpp +// v_or_b16_e64_dpp +// v_pack_b32_f16_e64_dpp +// v_perm_b32_e64_dpp +// v_sad_hi_u8_e64_dpp +// v_sad_u16_e64_dpp +// v_sad_u32_e64_dpp +// v_sad_u8_e64_dpp +// v_sub_co_u32_e64_dpp +// v_sub_nc_i16_e64_dpp +// v_sub_nc_i32_e64_dpp +// v_sub_nc_u16_e64_dpp +// v_subrev_co_u32_e64_dpp +// v_xad_u32_e64_dpp +// v_xor3_b32_e64_dpp +// v_xor_b16_e64_dpp +// +// = +// s6 +// s105 +// s[12:13] +// s[104:105] +// vcc_lo +// vcc_hi +// vcc +// ttmp15 +// ttmp[14:15] +// null +// +// = +// -1 +// 0.5 +// exec_hi +// exec_lo +// m0 +// null +// s105 +// s3 +// src_scc +// ttmp15 +// v255.h +// v255.l +// v3.h +// v3.l +// vcc_hi +// vcc_lo +// +// = +// -v7.l +// |v7.l| +// -|v7.l| +// +// = +// +// -|0.5| +// +// = +// -1 +// 0.5 +// exec_hi +// exec_lo +// m0 +// null +// s105 +// s3 +// src_scc +// ttmp15 +// v255 +// v3 +// vcc_hi +// vcc_lo +// +// = +// -v7 +// |v7| +// -|v7| +// +// = +// +// -|0.5| +// +// = +// bound_ctrl:0 +// bound_ctrl:1 +// +// = +// quad_perm:[0,1,2,3] +// quad_perm:[3,2,1,0] +// row_half_mirror +// row_mirror +// row_ror:1 +// row_ror:15 +// row_share:0 +// row_share:15 +// row_shl:1 +// row_shl:15 +// row_shr:1 +// row_shr:15 +// row_xmask:0 +// row_xmask:15 +// +// = +// fi:0 +// fi:1 +// +// = +// div:2 +// mul:2 +// mul:4 +// +// = +// row_mask:0x0 bank_mask:0x1 +// row_mask:0x3 bank_mask:0x0 +// row_mask:0xf bank_mask:0xf +// +// = +// byte_sel:0 +// byte_sel:1 +// byte_sel:2 +// byte_sel:3 +// +// = +// bitop3:0 +// bitop3:0xa1 +// +// = +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// +// = +// +// v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, , v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, , v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// +// = +// +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// v5.l, , v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, , v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] +// v5.l, , v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, , s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// +// = +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// v255.l, v255, v255 quad_perm:[0,1,2,3] +// v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// v0.l, v1, v2 quad_perm:[0,1,2,3] +// +// = +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// v255.l, v255, v255, v255 quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// v0.l, v1, v2, quad_perm:[0,1,2,3] +// v0.h, v1, v2, v3 op_sel:[0,0,0,1] quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3 +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// +// = +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v255, v255.l, v255.l quad_perm:[3,2,1,0] +// v5, , v2.l quad_perm:[3,2,1,0] +// v5, v1.l, quad_perm:[3,2,1,0] +// v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l quad_perm:[3,2,1,0] +// v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, quad_perm:[3,2,1,0] +// v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2, v3 quad_perm:[3,2,1,0] +// v5, v1, , v3 quad_perm:[3,2,1,0] +// v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2, v3 quad_perm:[3,2,1,0] +// v5, v1, , v3 quad_perm:[3,2,1,0] +// v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, v1, , v3 quad_perm:[3,2,1,0] +// v5, v1, 10, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// +// = +// +// v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// +// = +// +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v255, v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2, v3 quad_perm:[3,2,1,0] +// v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// v5, v1, s2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// v5, v1, v2, v3 quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// +// v5, v1, v2 clamp quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// v5, v1.l, v2 quad_perm:[3,2,1,0] +// v255, v255.l, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1.l, quad_perm:[3,2,1,0] +// v5, v1.l, v2 quad_perm:[3,2,1,0] +// v5, v1.l, v2 +// v5, v1.l, v2 quad_perm:[3,2,1,0] +// v5, v1.l, v2 quad_perm:[3,2,1,0] +// v5, v1.l, v2 quad_perm:[3,2,1,0] +// +// = +// v5, v1, v2 quad_perm:[3,2,1,0] +// v255, v255, v255 quad_perm:[3,2,1,0] +// v5, , v2 quad_perm:[3,2,1,0] +// v5, v1, quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// v5, v1, v2 quad_perm:[3,2,1,0] +// +// = +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v255, null, v255, v255 quad_perm:[3,2,1,0] +// v5, , v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, s2 quad_perm:[3,2,1,0] +// v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 quad_perm:[3,2,1,0] +// v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] + v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6d,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror // GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_add3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x6d,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_add_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_add_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x0f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_add_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction + +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 @@ -80,134 +766,229 @@ v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 // W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_add_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_add_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_add_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] // W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_add_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_add_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_add_lshl_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x47,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_lshl_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_add_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x0f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x0f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_lshl_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +v_add_lshl_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 // GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror // GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add_lshl_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x47,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -221,35 +1002,62 @@ v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] // GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x7f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] // GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror // GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 // GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -263,35 +1071,68 @@ v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:1 v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 // GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x7f,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_add_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x7f,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -305,3658 +1146,12366 @@ v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_add_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] // GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_alignbit_b32_e64_dpp v5, v1, v2, v255.h quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x16,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbit_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.h quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_alignbyte_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:0 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:15 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbit_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_alignbit_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_and_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x67,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v255, v255, v255, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x17,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_and_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x71,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashr_pk_i8_i32 v2.h, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 -// GFX13: v_ashr_pk_i8_i32_e64_dpp v2.h, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53] -// ERR: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_ashr_pk_i8_i32 v2.l, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_ashr_pk_i8_i32_e64_dpp v2.l, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] -// ERR: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf +v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_ashr_pk_i8_i32 v2.l, v4, v7, v8 quad_perm:[1,2,3,1] -// GFX13: v_ashr_pk_i8_i32_e64_dpp v2.l, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff] -// ERR: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf +v_alignbyte_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_ashr_pk_i8_i32 v2.l, v4, v7, v8 row_share:3 fi:1 -// GFX13: v_ashr_pk_i8_i32_e64_dpp v2.l, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x90,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] -// ERR: v_ashr_pk_i8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 +v_alignbyte_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_ashr_pk_u8_i32 v2.h, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 -// GFX13: v_ashr_pk_u8_i32_e64_dpp v2.h, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 ; encoding: [0x02,0x40,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0x53] -// ERR: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 op_sel:[0,0,0,1] row_share:0 row_mask:0x5 bank_mask:0x3 +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashr_pk_u8_i32 v2.l, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_ashr_pk_u8_i32_e64_dpp v2.l, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x06,0x02,0x04,0x50,0x01,0xff] -// ERR: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, 1 row_share:0 row_mask:0xf bank_mask:0xf +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashr_pk_u8_i32 v2.l, v4, v7, v8 quad_perm:[1,2,3,1] -// GFX13: v_ashr_pk_u8_i32_e64_dpp v2.l, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x79,0x00,0xff] -// ERR: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 quad_perm:[1,2,3,1] row_mask:0xf bank_mask:0xf +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_ashr_pk_u8_i32 v2.l, v4, v7, v8 row_share:3 fi:1 -// GFX13: v_ashr_pk_u8_i32_e64_dpp v2.l, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x02,0x00,0x91,0xd6,0xfa,0x0e,0x22,0x04,0x04,0x53,0x05,0xff] -// ERR: v_ashr_pk_u8_i32_e64_dpp v2, v4, v7, v8 row_share:3 row_mask:0xf bank_mask:0xf fi:1 +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:0 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:15 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x08,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_alignbyte_b32_e64_dpp v5, v1, v2, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_and_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x67,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_and_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_and_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x67,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x71,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bfi_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bfm_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x33,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:104 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_bitop3_b16_e64_dpp v255.h, v255.l, v255.l, src_scc bitop3:0x68 op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x45,0x33,0xd6,0xfa,0xfe,0xf7,0x0b,0xff,0x6f,0x0d,0x30] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:104 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x68 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x25,0x33,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x60,0x01,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x06,0x6b,0x01,0x60,0x09,0x13] +v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:102 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi bitop3:0x66 op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x33,0xd6,0xfa,0x04,0xfe,0xc9,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi bitop3:6 row_ror:15 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0xc1,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:103 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, exec_lo bitop3:0x67 op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x33,0xd6,0xfa,0x04,0xfa,0xe9,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x33,0xd6,0xfa,0x04,0xfa,0xa9,0x01,0x50,0x01,0xff] +v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x71,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null bitop3:104 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, null bitop3:0x68 op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +v_ashr_pk_i8_i32 v255.l, v255, v255, v255 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v255.l, v255, v255, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x90,0xd6,0xfa,0xfe,0xff,0x07,0xff,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x33,0xd6,0xfa,0x04,0xf2,0x09,0x01,0x5f,0x01,0x01] +v_ashr_pk_i8_i32 v0.l, v1, v2, -1 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, -1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x06,0x03,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 bitop3:0x16 row_shl:15 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 bitop3:0x16 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x33,0xd6,0xfa,0x04,0xa6,0xc1,0x01,0x0f,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, 0.5 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xc2,0x03,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 bitop3:5 row_ror:1 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0xa1,0x01,0x21,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, exec_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xfe,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l bitop3:0 row_shl:1 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, exec_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xfa,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0x27 row_mirror -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, m0 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, m0 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xf6,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:100 row_half_mirror -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x8c,0x01,0x41,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, null quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, null quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xf2,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:161 quad_perm:[0,1,2,3] -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, s105 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, s105 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xa6,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, s3 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi bitop3:63 row_shr:1 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x33,0xd6,0xfa,0x04,0xae,0xe1,0x01,0x11,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, src_scc quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xf6,0x03,0x01,0xe4,0x00,0xff] -v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo bitop3:0x24 row_shr:15 -// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0xaa,0x81,0x01,0x1f,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, ttmp15 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xee,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:101 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_bitop3_b32_e64_dpp v255, v255, v255, src_scc bitop3:0x65 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x04,0x34,0xd6,0xfa,0xfe,0xf7,0xab,0xff,0x6f,0x05,0x30] +v_ashr_pk_i8_i32 v0.l, v1, v2, v255 quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:88 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, -1 bitop3:0x58 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x03,0x34,0xd6,0xfa,0x04,0x06,0x0b,0x01,0x5f,0x01,0x01] +v_ashr_pk_i8_i32 v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, vcc_hi quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xae,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:99 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 bitop3:0x63 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xc2,0x6b,0x01,0x60,0x09,0x13] +v_ashr_pk_i8_i32 v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0xaa,0x01,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi bitop3:5 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0xa1,0x01,0x21,0x01,0xff] +v_ashr_pk_i8_i32 v0.h, v1, v2, v3 op_sel:[0,0,0,1] quad_perm:[0,1,2,3] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.h, v1, v2, v3 op_sel:[0,0,0,1] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x40,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo bitop3:6 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0xc1,0x01,0x2f,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] bound_ctrl:0 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:77 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, null bitop3:0x4d row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x34,0xd6,0xfa,0x04,0xf2,0xa9,0x01,0x50,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] bound_ctrl:1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x08,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, s105 bitop3:0 row_shl:1 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 bitop3:0x24 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_half_mirror +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:100 row_half_mirror -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v255 bitop3:0x64 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0xfe,0x8f,0x01,0x41,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_mirror +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0x27 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0xe4,0x01,0x40,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_ror:1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:161 quad_perm:[0,1,2,3] -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0xe4,0x00,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_ror:15 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_share:0 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi bitop3:0x15 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x34,0xd6,0xfa,0x04,0xae,0xa1,0x01,0x0f,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_share:15 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:63 row_shr:1 -// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo bitop3:0x3f row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x07,0x34,0xd6,0xfa,0x04,0xaa,0xe1,0x01,0x11,0x01,0xff] +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_shl:1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_shl:15 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_shr:1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_shr:15 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_xmask:0 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 row_xmask:15 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] fi:0 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] fi:1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x01] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0x30] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashr_pk_i8_i32 v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX13: v_ashr_pk_i8_i32_e64_dpp v0.l, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x90,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc_lo row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_ashrrev_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x08,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp15 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xee,0x21,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction +v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x40,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x0f,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x11,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1f,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x21,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[6:7] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x2f,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:35: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, v1, v2, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x50,0x01,0xff] - -v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, |v1|, -v2, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, |v1.l|, -v2.l, vcc row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0xaa,0x41,0x01,0x5f,0x01,0x01] - -v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction -// W64-ASM: v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] -// W64-DIS: v_cndmask_b16_e64_dpp v5.l, -v1.l, |v2.l|, ttmp[14:15] row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x04,0xea,0x21,0x01,0x60,0x09,0x13] - -v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cndmask_b16_e64_dpp v255.l, -|v255.l|, -|v255.l|, null row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x5d,0xd6,0xfa,0xfe,0xf3,0x61,0xff,0x6f,0x05,0x30] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_cubeid_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cubeid_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_cubeid_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cubeid_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cubeid_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cubeid_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_ashrrev_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_cubema_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bcnt_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x64,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cubema_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_cubema_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cubema_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cubema_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0f,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cubema_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0f,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0f,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_cubesc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cubesc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_cubesc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cubesc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cubesc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cubesc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bcnt_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_bfe_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x11,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_cubetc_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cubetc_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_cubetc_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cubetc_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x0e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cubetc_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x0e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_bfe_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cubetc_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x0e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_bfe_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cubetc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x0e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_bfe_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x6e,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +v_bfe_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +v_bfe_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_bfe_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 +v_bfe_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x10,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x12,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bfm_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bfm_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x33,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0xa1 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b16_e64_dpp v5.l, v1.l, v2.l, v3.l bitop3:0xa1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x33,0xd6,0xfa,0x04,0x0e,0x34,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x34,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[3,2,1,0] +// GFX13: v_bitop3_b32_e64_dpp v5, v1, v2, v3 bitop3:0xa1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x34,0xd6,0xfa,0x04,0x0e,0x34,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] +// GFX13: v_cndmask_b16_e64_dpp v255.l, v255.l, v255.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x5d,0xd6,0xfa,0xfe,0xf3,0x01,0xff,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, -v7.l, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, -v7.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x20,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, |v7.l|, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, |v7.l|, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:43: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, -|v7.l|, v2.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, -|v7.l|, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5d,0xd6,0xfa,0x04,0x0e,0x20,0x07,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, -v7.l, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, -v7.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x0e,0x0e,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:42: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, |v7.l|, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, |v7.l|, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x0e,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:43: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, -|v7.l|, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, -|v7.l|, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5d,0xd6,0xfa,0x0e,0x0e,0x40,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:44: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, 10, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:39: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:39: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, s2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[12:13] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[12:13] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x32,0x00,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s[104:105] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xa2,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, vcc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction +// W64: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, ttmp[14:15] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xea,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:15 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] fi:0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] fi:1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_cndmask_b16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0c,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0c,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cubeid_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cubema_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cubesc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x0e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x0e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x0e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cubetc_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 // GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x7b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x7b,0xd7,0xfa,0x04,0x02,0x20,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:0 +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:1 +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x04,0xff] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x01] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x30] + +v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_bf8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x74,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x74,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x74,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v255.l, v255, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x7a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, -v1, |v2| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x02,0x7a,0xd7,0xfa,0x04,0x02,0x20,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:0 +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] fi:1 +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x04,0xff] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x01] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0x30] + +v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_fp8_f32_e64_dpp v0.l, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x00,0x00,0x7a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x21,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x12,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x68,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x13,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x69,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x22,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, -v7 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, |v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, -|v7| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x26,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x26,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v255, v255.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v255, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x36,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, -v7.l, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, -v7.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, |v7.l|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, |v7.l|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x36,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, -|v7.l|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, -|v7.l|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x36,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_half_mirror +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_mirror +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_ror:1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_ror:15 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_share:0 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_share:15 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shl:1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shl:15 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shr:1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shr:15 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_xmask:0 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_xmask:15 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f16_e64_dpp v5, v1.l, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x36,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v255, v255.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v255, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x35,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, -v7.l, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, -v7.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, |v7.l|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, |v7.l|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x35,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, -|v7.l|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, -|v7.l|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x35,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_half_mirror +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_mirror +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_ror:1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_ror:15 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_share:0 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_share:15 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shl:1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shl:15 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shr:1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shr:15 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_xmask:0 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_xmask:15 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f16_e64_dpp v5, v1.l, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x35,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x38,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x38,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_bf8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x38,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x37,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x37,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x37,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x82,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xe0,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xfe,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xfc,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xfa,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xf8,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xd2,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xfa,0x01,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xf6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xfe,0x03,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x06,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xd6,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0xd4,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x40,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_fp8_f32_e64_dpp v5, v1, v2 byte_sel:3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x60,0x37,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0xc2,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc2,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc2,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc2,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0xc3,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0xc3,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc3,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc3,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x5f,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5f,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5f,0xd7,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5f,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5f,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5f,0xd7,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x4b,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4b,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4b,0xd7,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4b,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4b,0xd7,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x13,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x62,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x62,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x62,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_ldexp_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x46,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6f,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x14,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x5e,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5e,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x75,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x40,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x40,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x73,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, v3 op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.h, v3 op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x54,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd7,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd7,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x55,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x55,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x56,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x56,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_max_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_max_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x69,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maximum_f16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_mirror +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_share:0 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_share:15 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f16 v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_maximum_f32 v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x3d,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_maximum_f32 v5, v1, v2 row_half_mirror +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_mirror +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_ror:15 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:0 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_share:15 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shl:15 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_shr:15 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_xmask:0 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_maximum_f32 v5, v1, v2 row_xmask:15 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_maximum_f32 v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum_f32 v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x30,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximum3_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_half_mirror +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_mirror +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_ror:1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_ror:15 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_share:0 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_share:15 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shl:1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shl:15 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shr:1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_shr:15 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_xmask:0 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 row_xmask:15 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximum3_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_half_mirror +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_mirror +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_ror:1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_ror:15 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_share:0 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_share:15 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shl:1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shl:15 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shr:1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_shr:15 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_xmask:0 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 row_xmask:15 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x66,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x65,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x57,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x57,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x57,0xd7,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x57,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x57,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x57,0xd7,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x31,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x58,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x58,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x59,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x59,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_bf16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_med3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_med3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_med3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX13: v_cvt_pk_bf8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x7b,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_med3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v6.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v6, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v255| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX13: v_cvt_pk_fp8_f32_e64_dpp v1.l, -v2, |v3| quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x02,0x7a,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x36,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x36,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_sr_bf8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x36,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_sr_bf8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1 -// GFX13: v_cvt_sr_bf8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x36,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX13: v_cvt_sr_bf8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x38,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x20,0x35,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x40,0x35,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f16 v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 byte_sel:3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x60,0x35,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x51,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_sr_fp8_f16 v1, v2.l, v3 quad_perm:[0,1,2,3] fi:1 -// GFX13: v_cvt_sr_fp8_f16_e64_dpp v1, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x01,0x00,0x35,0xd7,0xfa,0x06,0x02,0x00,0x02,0xe4,0x04,0xff] +v_min3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_min3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x51,0xd7,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v6, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x06,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xed] +v_min3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v6, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x06,0x1b,0x00,0xed] +v_min3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x51,0xd7,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v255 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0xfe,0x03,0x20,0x02,0x1b,0x00,0xed] +v_min3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x51,0xd7,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[0,2,1,3] row_mask:0xe bank_mask:0xd ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0xd8,0x00,0xed] +v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0x2 bank_mask:0xd ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0x2d] +v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0x5 ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x00,0xe5] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 -// GFX13: v_cvt_sr_fp8_f32_e64_dpp v1, -v2, v3 quad_perm:[3,2,1,0] row_mask:0xe bank_mask:0xd fi:1 ; encoding: [0x01,0x00,0x37,0xd7,0xfa,0x06,0x02,0x20,0x02,0x1b,0x04,0xed] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x21,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x21,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x21,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x51,0xd7,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_i16_i32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x6b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x29,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x22,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x22,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x22,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x52,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x52,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] + +v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x6a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_u8_f32_e64_dpp v255, -|v255|, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x01,0x26,0xd6,0xfa,0xfe,0xf7,0x23,0xff,0x6f,0x05,0x30] +v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x53,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, 10, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x12,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x68,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x68,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_i16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x68,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x13,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x69,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x69,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x69,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5f,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x5f,0xd7,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x5f,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x5f,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x5f,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x5f,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x5f,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_div_fixup_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x5f,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_fma_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x4b,0xd7,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x4b,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_fma_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x4b,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_fma_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x4b,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x4b,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x4b,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_fma_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x4b,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_fma_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_fma_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x13,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_fma_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x13,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_fma_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x13,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_fma_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x13,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_fma_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x13,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_min_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_fma_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x13,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_fma_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x13,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_ldexp_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x62,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_min_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_min_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_lerp_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x15,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_lshl_add_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x46,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x46,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6a,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_lshl_or_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x6f,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_lshl_or_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x6f,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_lshlrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x14,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_lshlrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x14,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_lshrrev_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_lshrrev_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x39,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x5e,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_i16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x5e,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x68,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x75,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_i32_i16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x75,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_i32_i24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_u16_e64_dpp v255.l, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x40,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.l, v2.l, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x73,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_u32_u16_e64_dpp v255, v255.l, v255.l, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x73,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mad_u32_u24_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_max3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x54,0xd7,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x54,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_max3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x54,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x54,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x54,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x54,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x54,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_max3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_max3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_max3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_max3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x55,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x55,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1d,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x56,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x56,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum_f16 v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minimum_f16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum_f16 v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum_f16 v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1e,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_mirror +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_share:0 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_share:15 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimum_f16 v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimum_f16 v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0a,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimum_f16 v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimum_f16 v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimum_f32 v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x3c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimum_f32 v5, -v7, v2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, -v7, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimum_f32 v5, |v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, |v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimum_f32 v5, -|v7|, v2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, -|v7|, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x3c,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_max_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x09,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_max_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x09,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 row_half_mirror +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f32 v5, v1, v2 row_mirror +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_ror:1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_ror:15 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_share:0 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_share:15 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shl:1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6b,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shl:15 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6b,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shr:1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6b,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_shr:15 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6b,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum_f32 v5, v1, v2 row_xmask:0 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6b,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimum_f32 v5, v1, v2 row_xmask:15 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maxmin_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6b,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maxmin_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6b,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum_f32 v5, v1, v2 div:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x18,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum_f32 v5, v1, v2 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x08,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum_f32 v5, v1, v2 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x10,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum_f32 v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_maxmin_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x69,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x69,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum3_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2f,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_maxmin_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x69,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maxmin_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x69,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maxmin_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x69,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maxmin_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x69,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x69,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maxmin_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x64,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maxmin_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maxmin_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x62,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x66,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mbcnt_hi_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x66,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_minimum3_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x2d,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x65,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_med3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x57,0xd7,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x57,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_med3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x57,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x57,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x57,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x57,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x57,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_half_mirror +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_mirror +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_ror:1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_med3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_ror:15 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_med3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x31,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_share:0 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_med3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x31,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_share:15 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_med3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x31,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shl:1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x31,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_shl:15 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x31,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimum3_f32 v5, v1, v2, v3 row_shr:1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x31,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimum3_f32 v5, v1, v2, v3 row_shr:15 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x31,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimum3_f32 v5, v1, v2, v3 row_xmask:0 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 row_xmask:15 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimum3_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v255.l, v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6e,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x58,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, -v7.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x58,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, |v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, -|v7.l|, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -v7.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, |v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v7.l|, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, 2.0, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, s2, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x20,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.h quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.h quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.h, v2.l, v3.l op_sel:[1,0,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.h, v3.l op_sel:[0,1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.h op_sel:[0,0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x20,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x59,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x59,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_half_mirror +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_mirror +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_ror:1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_ror:15 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_share:0 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_share:15 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shl:1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shl:15 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shr:1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_shr:15 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:0 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_xmask:15 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_med3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x21,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_min3_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x51,0xd7,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x51,0xd7,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x6c,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_min3_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x51,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x51,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x51,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x51,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x51,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_min3_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_min3_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x29,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_min3_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x29,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_min3_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x29,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x29,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x29,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x29,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x29,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_half_mirror +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_mirror +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_ror:1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_ror:15 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_share:0 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, v3 row_share:15 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x52,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, v3 row_shl:1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_i16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x52,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_minimummaximum_f32 v5, v1, v2, v3 row_shl:15 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shr:1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_shr:15 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_xmask:0 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 row_xmask:15 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_minimummaximum_f32 v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1a,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x39,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v3.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, v255.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xae,0x01,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, ttmp15 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xee,0x01,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_hi row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, exec_lo row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, null row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x53,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_u16_e64_dpp v255.l, v255.l, v255.l, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x53,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min3_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x1b,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min_i16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0c,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_msad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_min_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_min_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x0b,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6a,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6a,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6a,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6a,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6a,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minmax_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6a,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minmax_num_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6a,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mullit_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x18,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_minmax_num_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x68,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_mullit_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, -v7, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x68,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_mullit_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, |v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x07,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_minmax_num_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x68,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_mullit_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, -|v7|, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0x0e,0x24,0x07,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minmax_num_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x68,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, -v7, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minmax_num_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x68,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_mullit_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, |v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x0e,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minmax_num_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x68,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_mullit_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, -|v7|, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x0e,0x0e,0x44,0x01,0x1b,0x00,0xff] -v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x68,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_mullit_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minmax_i32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x65,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, -|0.5| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xc2,0x83,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minmax_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x63,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 div:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x1c,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x0c,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 mul:4 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x14,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_mullit_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_msad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x72,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_msad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x39,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_or3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mul_lo_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x05,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mul_lo_u16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x05,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_mullit_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_mullit_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x18,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_mullit_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x18,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_mullit_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x18,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mullit_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x18,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mullit_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x18,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_mullit_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x18,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x18,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_or3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 // GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror // GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_or3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_or3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x72,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x72,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] // GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_or_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_or_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_or_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] // GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror // GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 // GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -3970,35 +13519,83 @@ v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 // GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_or_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x25,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_or_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] // GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_pack_b32_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v255, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x11,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, -v7.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, |v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x00,0x07,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, -|v7.l|, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x20,0x07,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, -v7.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, |v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x0e,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, -|v7.l| quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x0e,0x02,0x40,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] + v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] // GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] + v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror // GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror -// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:15 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 // GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] @@ -4012,1584 +13609,1668 @@ v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:1 v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 // GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 -// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 -// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_pack_b32_f16_e64_dpp v5, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x11,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_pack_b32_f16_e64_dpp v5, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] + +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_pack_b32_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_pack_b32_f16_e64_dpp v5, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x11,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x44,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_perm_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x44,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x23,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sad_u16_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x24,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_hi_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x23,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x25,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sad_u8_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sad_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x22,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_sub_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction -// W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sub_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x10,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_hi_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u16_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x24,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u16_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u16_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] + +v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sub_nc_i32_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x76,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u16_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_sad_u16_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x25,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x80,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 -// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 -// W32: v_subrev_co_u32_e64_dpp v5, s105, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32: v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6b,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32: v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7b,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] -// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction +v_sad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x6a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction -// W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x7a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0xfc,0x19,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_xad_u32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x45,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_sad_u32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_sad_u32_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_sad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x22,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x21,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, -1 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x60,0x09,0x13] +v_sad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_xor3_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x05,0x30] +v_sad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x09,0x13] +v_sad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_xor_b16_e64_dpp v255.l, v255.l, v255.l row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x00,0x70,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x05,0x30] +v_sad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_add_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_add_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0d,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_add_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_add_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x03,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_add_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x03,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x12,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_cvt_pk_norm_i16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x12,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x13,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_cvt_pk_norm_u16_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x13,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_div_fixup_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x5f,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_div_fixup_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x5f,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x5f,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_div_fixup_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x5f,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_div_fixup_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x5f,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_fma_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x4b,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_fma_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x4b,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_fma_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x4b,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_fma_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x4b,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_fma_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x4b,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x5e,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x5e,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x5e,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_mad_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x5e,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_mad_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x5e,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_mad_i32_i16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x75,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_mad_i32_i16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x75,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] +// GFX13: v_sad_u8_e64_dpp v5, v1, v2, v3 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x40,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_mad_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x40,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sub_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x10,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x40,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_mad_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x40,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_mad_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x40,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_mad_u32_u16_e64_dpp v5, v1.h, v2.l, 0.5 op_sel:[1,0,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x08,0x73,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_mad_u32_u16_e64_dpp v255, v255.l, v255.h, src_scc op_sel:[0,1,0,0] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x90,0x73,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x54,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x54,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x54,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_max3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x54,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_sub_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_max3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x54,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x55,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:26: error: invalid operand for instruction +// W64: v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x10,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] -v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x55,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x55,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_max3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x55,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_max3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x55,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x56,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_max3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x56,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x56,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_max3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x56,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_max3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x56,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x57,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x57,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x57,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_med3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x57,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_med3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x57,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x58,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x58,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x58,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_med3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x58,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_med3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x58,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x59,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_med3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x59,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_sub_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x10,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction -v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x59,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_med3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x59,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_med3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x59,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_num_f16_e64_dpp v5.h, -v1.h, v2.h, |exec_lo| op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x51,0xd7,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_num_f16_e64_dpp v5.l, -|v1.h|, -|v2.l|, null op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0b,0x51,0xd7,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_num_f16_e64_dpp v5.l, -|v1.l|, v2.h, -|-1| op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x15,0x51,0xd7,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_min3_num_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x26,0x51,0xd7,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_min3_num_f16_e64_dpp v255.h, -|v255.l|, -|v255.l|, -|src_scc| op_sel:[0,0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc7,0x51,0xd7,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_i16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x52,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_i16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x52,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x52,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_min3_i16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x52,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_min3_i16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x52,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_u16_e64_dpp v5.h, v1.h, v2.h, exec_hi op_sel:[1,1,1,1] row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x78,0x53,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x2f,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_min3_u16_e64_dpp v5.l, v1.h, v2.l, exec_lo op_sel:[1,0,0,0] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x53,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.h, null op_sel:[0,1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x10,0x53,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_min3_u16_e64_dpp v5.l, v1.l, v2.l, -1 op_sel:[0,0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x20,0x53,0xd7,0xfa,0x04,0x06,0x03,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_min3_u16_e64_dpp v255.h, v255.l, v255.l, src_scc op_sel:[0,0,0,1] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x40,0x53,0xd7,0xfa,0xfe,0xf7,0x03,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_pack_b32_f16_e64_dpp v5, -v1.h, |v2.l| op_sel:[1,0,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x0a,0x11,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_pack_b32_f16_e64_dpp v255, -|v255.l|, -|v255.h| op_sel:[0,1,0] row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0x13,0x11,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sub_nc_i16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_sub_nc_i16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x0e,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_sub_nc_u16_e64_dpp v5.h, v1.h, v2.h op_sel:[1,1,1] row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x0e,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0x01] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 -// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] +v_sub_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i32_e64_dpp v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x76,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 -// GFX13: v_sub_nc_u16_e64_dpp v255.h, v255.l, v255.l op_sel:[0,0,1] clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0xff,0xc0,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f32 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_minimum_f32 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_minimum_f32 v5, v1, v2 row_mirror -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_half_mirror -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shl:1 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shl:15 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shr:1 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_shr:15 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_ror:1 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_ror:15 -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minimum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3c,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_minimum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3c,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_minimum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3c,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_minimum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x3c,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_maximum_f32 v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_maximum_f32 v5, v1, v2 row_mirror -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_maximum_f32 v5, v1, v2 row_half_mirror -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_maximum_f32 v5, v1, v2 row_shl:1 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_shl:15 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_i32_e64_dpp v5, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x76,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_shr:1 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_shr:15 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_u16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x04,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_ror:1 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_ror:15 -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximum_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3d,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f32 v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximum_f32_e64_dpp v5, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3d,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_maximum_f32 v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximum_f32_e64_dpp v5, -v1, |v2| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3d,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_maximum_f32 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximum_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x3d,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_mirror -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_minimum_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3a,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_minimum_f16 v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3a,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum_f16 v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3a,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_minimum_f16 v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x3a,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[3,2,1,0] -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_maximum_f16 v5.l, v1.l, v2.l quad_perm:[0,1,2,3] -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_mirror -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] +// GFX13: v_sub_nc_u16_e64_dpp v5.l, v1.l, v2.l clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x80,0x04,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_half_mirror -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_shl:1 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_subrev_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_subrev_co_u32_e64_dpp v255, null, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x7c,0x19,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_shl:15 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s105, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x69,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_shr:1 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_shr:15 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x68,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, v1.l, v2.l row_ror:1 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, vcc_lo, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_ror:15 -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, vcc_hi, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6b,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximum_f16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x6a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximum_f16_e64_dpp v5.l, |v1.l|, -v2.l row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0x3b,0xd7,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] +v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, ttmp15, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7b,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum_f16 v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximum_f16_e64_dpp v5.l, -v1.l, |v2.l| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0x3b,0xd7,0xfa,0x04,0x02,0x20,0x01,0x60,0x09,0x13] +v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, ttmp[14:15], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7a,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_maximum_f16 v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l| row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x03,0x3b,0xd7,0xfa,0xfe,0x03,0x60,0xff,0x6f,0x05,0x30] +v_subrev_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] +// GFX13: v_subrev_co_u32_e64_dpp v5, null, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x7c,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] +// W32-ERR: :[[@LINE-1]]:29: error: invalid operand for instruction +// W64: v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x0c,0x19,0xd7,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] -v_minimum3_f32 v5, v1, v2, v3 row_mirror -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, v255 row_half_mirror -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] bound_ctrl:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, s105 row_shl:1 -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_minimum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_minimum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_minimum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v3 row_mirror -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, v255 row_half_mirror -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, s105 row_shl:1 -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] fi:1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_maximum3_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x06,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_maximum3_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_subrev_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] +// W32: v_subrev_co_u32_e64_dpp v5, s6, v1, v2 clamp quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x86,0x19,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction -v_maximum3_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_maximum3_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximum3_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x45,0xd7,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_maximum3_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximum3_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2e,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximum3_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2e,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximum3_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2e,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_minimum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x2f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x2f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_minimum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x2f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x2f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x2f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x2f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimum3_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x2f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_maximum3_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x30,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x30,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_maximum3_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x30,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x30,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximum3_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x30,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximum3_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x30,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximum3_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximum3_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x30,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_maximumminimum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_maximumminimum_f32 v5, v1, v2, v3 row_mirror -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_maximumminimum_f32 v5, v1, v2, v255 row_half_mirror -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_maximumminimum_f32 v5, v1, v2, s105 row_shl:1 -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_maximumminimum_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_maximumminimum_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_maximumminimum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_maximumminimum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6d,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_maximumminimum_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6d,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_maximumminimum_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_maximumminimum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6d,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_maximumminimum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximumminimum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6d,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_maximumminimum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximumminimum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6d,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_maximumminimum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximumminimum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6d,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximumminimum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6d,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_minimummaximum_f32 v5, v1, v2, v3 row_mirror -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_minimummaximum_f32 v5, v1, v2, v255 row_half_mirror -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_minimummaximum_f32 v5, v1, v2, s105 row_shl:1 -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd7,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v255, v255, v255, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x40,0xd6,0xfa,0xfe,0xff,0x07,0xff,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, |v1|, v2, -ttmp15 row_shr:15 -// GFX13: v_minimummaximum_f32_e64_dpp v5, |v1|, v2, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6c,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, -1, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x82,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6c,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, 0.5, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xe0,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -v1, v2, |exec_lo| row_ror:15 -// GFX13: v_minimummaximum_f32_e64_dpp v5, -v1, v2, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6c,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, exec_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfe,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimummaximum_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6c,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, exec_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfc,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimummaximum_f32_e64_dpp v5, -|v1|, v2, -|-1| mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6c,0xd6,0xfa,0x04,0x06,0xab,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, m0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfa,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimummaximum_f32_e64_dpp v5, v1, -|v2|, -|0.5| mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6c,0xd6,0xfa,0x04,0xc2,0xd3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, null, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xf8,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimummaximum_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6c,0xd6,0xfa,0xfe,0xf7,0xfb,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, s105, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd2,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, src_scc, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfa,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, ttmp15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xf6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v255, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xfe,0x0f,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, vcc_hi, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd6,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, vcc_lo, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0xd4,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6f,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6f,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6f,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, -1 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x06,0x03,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6f,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, 0.5 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xc2,0x03,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6f,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, exec_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x01,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_maximumminimum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6f,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, exec_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfa,0x01,0x01,0x1b,0x00,0xff] -v_maximumminimum_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_maximumminimum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6f,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, m0 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf6,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, null quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf2,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, s105 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v3.l row_mirror -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v3.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, v255.l row_half_mirror -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, v255.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, src_scc quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xf6,0x03,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, s105 row_shl:1 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, ttmp15 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_hi row_shl:15 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v255 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, v2.l, vcc_lo row_shr:1 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, v2.l, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, vcc_hi quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xae,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, |v1.l|, v2.l, -ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x01,0x6e,0xd6,0xfa,0x04,0xee,0x81,0x01,0x1f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, vcc_lo quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0x6e,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_minimummaximum_f16 v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, -v1.l, v2.l, |exec_lo| row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x04,0x6e,0xd6,0xfa,0x04,0xfa,0x21,0x01,0x2f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x08,0xff] -v_minimummaximum_f16 v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, -|v2.l|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0x6e,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] -v_minimummaximum_f16 v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, -|v1.l|, v2.l, -|-1| row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x05,0x6e,0xd6,0xfa,0x04,0x06,0xa3,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x41,0x01,0xff] -v_minimummaximum_f16 v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_minimummaximum_f16_e64_dpp v5.l, v1.l, -|v2.l|, -|0.5| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x06,0x6e,0xd6,0xfa,0x04,0xc2,0xc3,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] -v_minimummaximum_f16 v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_minimummaximum_f16_e64_dpp v255.l, -|v255.l|, -|v255.l|, -|src_scc| clamp row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x87,0x6e,0xd6,0xfa,0xfe,0xf7,0xe3,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x21,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x2f,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:0 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x50,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:15 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x5f,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x01,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x0f,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x11,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc3,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1f,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc3,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x60,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0xc3,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x6f,0x01,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0xc3,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:0 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0xc3,0xd6,0xfa,0x04,0x16,0x52,0x01,0x60,0x09,0x13] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] fi:1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x04,0xff] -v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_sr_pk_f16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0xc3,0xd6,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x01] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0x30] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] +v_xor_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] +// GFX13: v_xor_b16_e64_dpp v255.l, v255.l, v255.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0xff,0x00,0x70,0xd7,0xfa,0xfe,0x03,0x00,0xff,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] +// GFX13: v_xor_b16_e64_dpp v5.l, v1.h, v2.l op_sel:[1,0,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x08,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.h op_sel:[0,1,0] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x10,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:0 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] bound_ctrl:1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf bound_ctrl:1 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x08,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x08,0x01,0x5f,0x01,0x01] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] -v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v5, v1, v2 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x00,0x74,0xd7,0xfa,0x04,0x02,0x10,0x01,0x60,0x09,0x13] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] -v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_pk_f16_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x81,0x74,0xd7,0xfa,0xfe,0x03,0x38,0xff,0x6f,0x05,0x30] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_share:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x5f,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_mirror -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, v255 row_half_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xfe,0x07,0x01,0x41,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, s105 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xa6,0x01,0x01,0x01,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1f,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_hi row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xae,0x01,0x01,0x0f,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0xc2,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l row_xmask:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x6f,0x01,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, exec_hi row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x02,0xc2,0xd6,0xfa,0x04,0xfe,0x41,0x01,0x21,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:0 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v1|, -|v2|, null row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x03,0xc2,0xd6,0xfa,0x04,0xf2,0x61,0x01,0x50,0x01,0xff] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] fi:1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf fi:1 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x04,0xff] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, -|v1|, v2, -1 mul:2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x01,0xc2,0xd6,0xfa,0x04,0x06,0x2b,0x01,0x5f,0x01,0x01] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x1 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x01] -v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v5, v1, -|v2|, 5 mul:4 row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 ; encoding: [0x05,0x02,0xc2,0xd6,0xfa,0x04,0x16,0x52,0x01,0x60,0x09,0x13] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0x3 bank_mask:0x0 ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0x30] -v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:0 fi:1 -// GFX13: v_cvt_sr_pk_bf16_f32_e64_dpp v255, -|v255|, -|v255|, src_scc clamp div:2 row_xmask:15 row_mask:0x3 bank_mask:0x0 fi:1 ; encoding: [0xff,0x83,0xc2,0xd6,0xfa,0xfe,0xf7,0x7b,0xff,0x6f,0x05,0x30] +v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf +// GFX13: v_xor_b16_e64_dpp v5.l, v1.l, v2.l quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x70,0xd7,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] From 2ab1f87518fa90f7373c83e631f98d77fd81ccdd Mon Sep 17 00:00:00 2001 From: Tim Date: Mon, 22 Jun 2026 07:33:41 -0500 Subject: [PATCH 008/511] [flang][debug] Add fake use ops for dynamic array dimension variables (#200061) In cases where the upper or lower bounds of a dynamic array are not explicitly referenced in code, flang can optimize away the internal variables that represent these values. This causes missing values to appear in the debugger when examining the dynamic array's type. Adding an llvm.fake.use op for each bound preserves it for use by a debugger, similar to the fix for #185432. Resolves #119474 --- .../flang/Optimizer/Transforms/Passes.td | 4 +- flang/lib/Optimizer/Passes/Pipelines.cpp | 2 +- .../lib/Optimizer/Transforms/AddDebugInfo.cpp | 63 ++++++-- .../debug-fake-use-multiple-dimensions.fir | 135 ++++++++++++++++++ .../debug-fake-use-multiple-returns.fir | 116 +++++++++++++++ flang/test/Transforms/debug-fake-use.fir | 45 +++++- 6 files changed, 344 insertions(+), 21 deletions(-) create mode 100644 flang/test/Transforms/debug-fake-use-multiple-dimensions.fir create mode 100644 flang/test/Transforms/debug-fake-use-multiple-returns.fir diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index d9072e7aab4f7..8c082fb073451 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -267,9 +267,9 @@ def AddDebugInfo : Pass<"add-debug-info", "mlir::ModuleOp"> { Option<"dwarfDebugFlags", "dwarf-debug-flags", "std::string", /*default=*/"std::string{}", "Command-line flags to append to DWARF producer">, - Option<"emitFakeUseForArguments", "emit-fake-use-for-arguments", + Option<"emitFakeUseForDebugVars", "emit-fake-use-for-debug-vars", "bool", /*default=*/"false", - "Emit fake use for function arguments to extend their lifetime"> + "Emit fake use for debug variables to extend their lifetime"> ]; } diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp index c6d531ce50762..c677962f30199 100644 --- a/flang/lib/Optimizer/Passes/Pipelines.cpp +++ b/flang/lib/Optimizer/Passes/Pipelines.cpp @@ -113,7 +113,7 @@ void addDebugInfoPass(mlir::PassManager &pm, options.dwarfVersion = config.DwarfVersion; options.splitDwarfFile = config.SplitDwarfFile; options.dwarfDebugFlags = config.DwarfDebugFlags; - options.emitFakeUseForArguments = + options.emitFakeUseForDebugVars = (config.OptLevel == llvm::OptimizationLevel::O0) && !disableArgumentFakeUse; addPassConditionally(pm, disableDebugInfo, diff --git a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp index e0b570a908521..e5c1fc8279630 100644 --- a/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp +++ b/flang/lib/Optimizer/Transforms/AddDebugInfo.cpp @@ -291,6 +291,27 @@ bool AddDebugInfoPass::createCommonBlockGlobal( return true; } +// Create fake uses for compiler-generated internal variables that represent +// values needed by a debugger. This prevents values from being optimized out +// such as the count and lower bound of dynamic arrays. +template +static void InsertFakeUseForDebugVar(mlir::OpBuilder &builder, Op declOp, + mlir::Value var) { + if (auto funcOp = declOp->template getParentOfType()) { + if (declOp->getBlock() == &funcOp.getBody().front()) { + for (mlir::Block &block : funcOp.getBody()) { + if (auto returnOp = + mlir::dyn_cast(block.getTerminator())) { + mlir::OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(returnOp); + if (!fir::getIntIfConstant(var)) + fir::FakeUseOp::create(builder, declOp.getLoc(), var); + } + } + } + } +} + template void AddDebugInfoPass::handleLocalVariable(Op declOp, llvm::StringRef name, mlir::LLVM::DIFileAttr fileAttr, @@ -307,22 +328,9 @@ void AddDebugInfoPass::handleLocalVariable(Op declOp, llvm::StringRef name, if (dummyScope && declOp.getDummyScope() == dummyScope) { if (auto argNoOpt = declOp.getDummyArgNo()) { argNo = *argNoOpt; - if (emitFakeUseForArguments) { + if (emitFakeUseForDebugVars) { if constexpr (std::is_same_v) { - if (auto funcOp = - declOp->template getParentOfType()) { - if (declOp->getBlock() == &funcOp.getBody().front()) { - for (mlir::Block &block : funcOp.getBody()) { - if (auto returnOp = mlir::dyn_cast( - block.getTerminator())) { - mlir::OpBuilder::InsertionGuard guard(builder); - builder.setInsertionPoint(returnOp); - fir::FakeUseOp::create(builder, declOp.getLoc(), - declOp.getMemref()); - } - } - } - } + InsertFakeUseForDebugVar(builder, declOp, declOp.getMemref()); } } } @@ -331,6 +339,31 @@ void AddDebugInfoPass::handleLocalVariable(Op declOp, llvm::StringRef name, auto tyAttr = typeGen.convertType(typeToConvert, fileAttr, scopeAttr, typeGenDeclOp); + if (emitFakeUseForDebugVars) { + // Create fake uses for internal variables that represent count and lower + // bound of dynamic arrays to ensure they are not optimized out. + if (auto arrayTy = + mlir::dyn_cast(tyAttr)) { + if (arrayTy.getTag() == llvm::dwarf::DW_TAG_array_type) { + if constexpr (std::is_same_v) { + // Count is represented as a value in the shape attribute + for (auto val : declOp.getShape()) + InsertFakeUseForDebugVar(builder, declOp, val); + // Lower bound is represented as a value in the shift attribute + for (auto val : declOp.getShift()) + InsertFakeUseForDebugVar(builder, declOp, val); + } + } + } + + // Create fake uses for the length of character arrays to ensure they + // are not optimized out. + if constexpr (std::is_same_v) { + for (auto val : declOp.getTypeparams()) + InsertFakeUseForDebugVar(builder, declOp, val); + } + } + auto localVarAttr = mlir::LLVM::DILocalVariableAttr::get( context, scopeAttr, mlir::StringAttr::get(context, name), fileAttr, getLineFromLoc(declOp.getLoc()), argNo, /* alignInBits*/ 0, tyAttr, diff --git a/flang/test/Transforms/debug-fake-use-multiple-dimensions.fir b/flang/test/Transforms/debug-fake-use-multiple-dimensions.fir new file mode 100644 index 0000000000000..a1479aa14cb00 --- /dev/null +++ b/flang/test/Transforms/debug-fake-use-multiple-dimensions.fir @@ -0,0 +1,135 @@ +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=true" %s | FileCheck %s --check-prefix=FAKE-USE +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=false" %s | FileCheck %s --check-prefix=NO-FAKE-USE + +// Test that fir.fake_use ops are inserted for count and lower bound variables +// in each dimension of a multi-dimensional dynamic array. + +// FAKE-USE-LABEL: func.func @test_2d_dynamic_array +// FAKE-USE: fircg.ext_declare %arg0(%[[COUNT1:.*]], %[[COUNT2:.*]]) origin %[[LB1:.*]], %[[LB2:.*]] dummy_scope +// FAKE-USE: fir.call @foo() : () -> () +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: fir.fake_use %[[COUNT1]] +// FAKE-USE: fir.fake_use %[[COUNT2]] +// FAKE-USE: fir.fake_use %[[LB1]] +// FAKE-USE: fir.fake_use %[[LB2]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_2d_dynamic_array +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}, %{{.*}}) origin %{{.*}}, %{{.*}} dummy_scope +// NO-FAKE-USE: fir.call @foo() : () -> () +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_3d_dynamic_array +// FAKE-USE: fircg.ext_declare %arg0(%[[C1:.*]], %[[C2:.*]], %[[C3:.*]]) origin %[[L1:.*]], %[[L2:.*]], %[[L3:.*]] dummy_scope +// FAKE-USE: fir.call @foo() : () -> () +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: fir.fake_use %[[C1]] +// FAKE-USE: fir.fake_use %[[C2]] +// FAKE-USE: fir.fake_use %[[C3]] +// FAKE-USE: fir.fake_use %[[L1]] +// FAKE-USE: fir.fake_use %[[L2]] +// FAKE-USE: fir.fake_use %[[L3]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_3d_dynamic_array +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}, %{{.*}}, %{{.*}}) origin %{{.*}}, %{{.*}}, %{{.*}} dummy_scope +// NO-FAKE-USE: fir.call @foo() : () -> () +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_2d_dynamic_array_multi_ret +// FAKE-USE: fircg.ext_declare %arg0(%[[MR_COUNT1:.*]], %[[MR_COUNT2:.*]]) origin %[[MR_LB1:.*]], %[[MR_LB2:.*]] dummy_scope +// FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// FAKE-USE: ^bb1: +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: fir.fake_use %[[MR_COUNT1]] +// FAKE-USE: fir.fake_use %[[MR_COUNT2]] +// FAKE-USE: fir.fake_use %[[MR_LB1]] +// FAKE-USE: fir.fake_use %[[MR_LB2]] +// FAKE-USE: return +// FAKE-USE: ^bb2: +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: fir.fake_use %[[MR_COUNT1]] +// FAKE-USE: fir.fake_use %[[MR_COUNT2]] +// FAKE-USE: fir.fake_use %[[MR_LB1]] +// FAKE-USE: fir.fake_use %[[MR_LB2]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_2d_dynamic_array_multi_ret +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}, %{{.*}}) origin %{{.*}}, %{{.*}} dummy_scope +// NO-FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// NO-FAKE-USE: ^bb1: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return +// NO-FAKE-USE: ^bb2: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_3d_mixed_array +// FAKE-USE: %[[CST_COUNT:.*]] = arith.constant 10 : index +// FAKE-USE: %[[CST_LB:.*]] = arith.constant 1 : index +// FAKE-USE: fircg.ext_declare %arg0(%[[CST_COUNT]], %[[DYN_COUNT1:.*]], %[[DYN_COUNT2:.*]]) origin %[[CST_LB]], %[[DYN_LB1:.*]], %[[DYN_LB2:.*]] dummy_scope +// FAKE-USE: fir.call @foo() : () -> () +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: fir.fake_use %[[DYN_COUNT1]] +// FAKE-USE: fir.fake_use %[[DYN_COUNT2]] +// FAKE-USE: fir.fake_use %[[DYN_LB1]] +// FAKE-USE: fir.fake_use %[[DYN_LB2]] +// FAKE-USE-NOT: fir.fake_use +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_3d_mixed_array +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}, %{{.*}}, %{{.*}}) origin %{{.*}}, %{{.*}}, %{{.*}} dummy_scope +// NO-FAKE-USE: fir.call @foo() : () -> () +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +#loc1 = loc("debug-fake-use-multiple-dimensions.f90":1:1) +#loc3 = loc("debug-fake-use-multiple-dimensions.f90":3:14) +#loc4 = loc("debug-fake-use-multiple-dimensions.f90":4:14) +#loc5 = loc("debug-fake-use-multiple-dimensions.f90":5:1) +#loc6 = loc("debug-fake-use-multiple-dimensions.f90":6:1) +#loc = loc("debug-fake-use-multiple-dimensions.f90":0:0) + +module { + func.func private @foo() + + // 2D dynamically-sized array with a single return. + func.func @test_2d_dynamic_array(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg1: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg2: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg3: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg4: index loc("debug-fake-use-multiple-dimensions.f90":1:1)) attributes {fir.internal_name = "_QPtest_2d_dynamic_array"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%arg1, %arg3) origin %arg2, %arg4 dummy_scope %0 arg 1 {uniq_name = "_QFtest_2d_dynamic_arrayEarr"} : (!fir.ref>, index, index, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + return loc(#loc5) + } loc(#loc1) + + // 3D dynamically-sized array with a single return. + func.func @test_3d_dynamic_array(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg1: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg2: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg3: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg4: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg5: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg6: index loc("debug-fake-use-multiple-dimensions.f90":1:1)) attributes {fir.internal_name = "_QPtest_3d_dynamic_array"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%arg1, %arg3, %arg5) origin %arg2, %arg4, %arg6 dummy_scope %0 arg 1 {uniq_name = "_QFtest_3d_dynamic_arrayEarr"} : (!fir.ref>, index, index, index, index, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + return loc(#loc5) + } loc(#loc1) + + // 2D dynamically-sized array with multiple returns. + func.func @test_2d_dynamic_array_multi_ret(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg1: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg2: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg3: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg4: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg5: i1 loc("debug-fake-use-multiple-dimensions.f90":1:1)) attributes {fir.internal_name = "_QPtest_2d_dynamic_array_multi_ret"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%arg1, %arg3) origin %arg2, %arg4 dummy_scope %0 arg 1 {uniq_name = "_QFtest_2d_dynamic_array_multi_retEarr"} : (!fir.ref>, index, index, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + cf.cond_br %arg5, ^bb1, ^bb2 + ^bb1: + return loc(#loc5) + ^bb2: + return loc(#loc6) + } loc(#loc1) + + // 3D array with mixed constant and dynamic dimensions. + func.func @test_3d_mixed_array(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg1: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg2: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg3: index loc("debug-fake-use-multiple-dimensions.f90":1:1), %arg4: index loc("debug-fake-use-multiple-dimensions.f90":1:1)) attributes {fir.internal_name = "_QPtest_3d_mixed_array"} { + %c10 = arith.constant 10 : index + %c1 = arith.constant 1 : index + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%c10, %arg1, %arg3) origin %c1, %arg2, %arg4 dummy_scope %0 arg 1 {uniq_name = "_QFtest_3d_mixed_arrayEarr"} : (!fir.ref>, index, index, index, index, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + return loc(#loc5) + } loc(#loc1) +} loc(#loc) diff --git a/flang/test/Transforms/debug-fake-use-multiple-returns.fir b/flang/test/Transforms/debug-fake-use-multiple-returns.fir new file mode 100644 index 0000000000000..ac379d0f587f7 --- /dev/null +++ b/flang/test/Transforms/debug-fake-use-multiple-returns.fir @@ -0,0 +1,116 @@ +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=true" %s | FileCheck %s --check-prefix=FAKE-USE +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=false" %s | FileCheck %s --check-prefix=NO-FAKE-USE + +// Test that fir.fake_use ops are inserted before every return in functions with +// multiple return locations. + +// FAKE-USE-LABEL: func.func @test_dummy_arg_multi_ret +// FAKE-USE: %[[UNDEF:.*]] = fir.undefined !fir.dscope +// FAKE-USE: %[[DECL:.*]] = fircg.ext_declare %arg0 dummy_scope %[[UNDEF]] arg 1 +// FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// FAKE-USE: ^bb1: +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: return +// FAKE-USE: ^bb2: +// FAKE-USE: fir.fake_use %arg0 +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_dummy_arg_multi_ret +// NO-FAKE-USE: %[[UNDEF:.*]] = fir.undefined !fir.dscope +// NO-FAKE-USE: %[[DECL:.*]] = fircg.ext_declare %arg0 dummy_scope %[[UNDEF]] arg 1 +// NO-FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// NO-FAKE-USE: ^bb1: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return +// NO-FAKE-USE: ^bb2: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_dynamic_array_multi_ret +// FAKE-USE: fircg.ext_declare %arg0(%[[COUNT:.*]]) origin %[[LB:.*]] dummy_scope +// FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// FAKE-USE: ^bb1: +// FAKE-USE: fir.fake_use %[[COUNT]] +// FAKE-USE: fir.fake_use %[[LB]] +// FAKE-USE: return +// FAKE-USE: ^bb2: +// FAKE-USE: fir.fake_use %[[COUNT]] +// FAKE-USE: fir.fake_use %[[LB]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_dynamic_array_multi_ret +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}) origin %{{.*}} dummy_scope +// NO-FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// NO-FAKE-USE: ^bb1: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return +// NO-FAKE-USE: ^bb2: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_char_array_multi_ret +// FAKE-USE: fircg.ext_declare %arg0 typeparams %[[LEN:.*]] dummy_scope +// FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// FAKE-USE: ^bb1: +// FAKE-USE: fir.fake_use %[[LEN]] +// FAKE-USE: return +// FAKE-USE: ^bb2: +// FAKE-USE: fir.fake_use %[[LEN]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_char_array_multi_ret +// NO-FAKE-USE: fircg.ext_declare %arg0 typeparams %{{.*}} dummy_scope +// NO-FAKE-USE: cf.cond_br %{{.*}}, ^bb1, ^bb2 +// NO-FAKE-USE: ^bb1: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return +// NO-FAKE-USE: ^bb2: +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +#loc1 = loc("debug-fake-use-multiple-returns.f90":1:1) +#loc3 = loc("debug-fake-use-multiple-returns.f90":3:14) +#loc4 = loc("debug-fake-use-multiple-returns.f90":4:14) +#loc5 = loc("debug-fake-use-multiple-returns.f90":5:1) +#loc6 = loc("debug-fake-use-multiple-returns.f90":6:1) +#loc = loc("debug-fake-use-multiple-returns.f90":0:0) + +module { + func.func private @foo() + + // Function with a dummy argument and multiple returns. + func.func @test_dummy_arg_multi_ret(%arg0: !fir.ref {fir.bindc_name = "expected"} loc("debug-fake-use-multiple-returns.f90":1:1), %arg1: i1 loc("debug-fake-use-multiple-returns.f90":1:1)) attributes {fir.internal_name = "_QPtest_dummy_arg_multi_ret"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0 dummy_scope %0 arg 1 {uniq_name = "_QFtest_dummy_arg_multi_retEexpected"} : (!fir.ref, !fir.dscope) -> !fir.ref loc(#loc3) + fir.call @foo() : () -> () + cf.cond_br %arg1, ^bb1, ^bb2 + ^bb1: + return loc(#loc5) + ^bb2: + return loc(#loc6) + } loc(#loc1) + + // Function with a dynamically-sized array and multiple returns. + func.func @test_dynamic_array_multi_ret(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use-multiple-returns.f90":1:1), %arg1: index loc("debug-fake-use-multiple-returns.f90":1:1), %arg2: index loc("debug-fake-use-multiple-returns.f90":1:1), %arg3: i1 loc("debug-fake-use-multiple-returns.f90":1:1)) attributes {fir.internal_name = "_QPtest_dynamic_array_multi_ret"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%arg1) origin %arg2 dummy_scope %0 arg 1 {uniq_name = "_QFtest_dynamic_array_multi_retEarr"} : (!fir.ref>, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + cf.cond_br %arg3, ^bb1, ^bb2 + ^bb1: + return loc(#loc5) + ^bb2: + return loc(#loc6) + } loc(#loc1) + + // Function with a dynamically-sized character array and multiple returns. + func.func @test_char_array_multi_ret(%arg0: !fir.ref> {fir.bindc_name = "str"} loc("debug-fake-use-multiple-returns.f90":1:1), %arg1: index {fir.bindc_name = "n"} loc("debug-fake-use-multiple-returns.f90":1:1), %arg2: i1 loc("debug-fake-use-multiple-returns.f90":1:1)) attributes {fir.internal_name = "_QPtest_char_array_multi_ret"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0 typeparams %arg1 dummy_scope %0 arg 1 {uniq_name = "_QFtest_char_array_multi_retEstr"} : (!fir.ref>, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + cf.cond_br %arg2, ^bb1, ^bb2 + ^bb1: + return loc(#loc5) + ^bb2: + return loc(#loc6) + } loc(#loc1) +} loc(#loc) diff --git a/flang/test/Transforms/debug-fake-use.fir b/flang/test/Transforms/debug-fake-use.fir index d86cc9b3afee4..d16fa2f6ff028 100644 --- a/flang/test/Transforms/debug-fake-use.fir +++ b/flang/test/Transforms/debug-fake-use.fir @@ -1,5 +1,5 @@ -// RUN: fir-opt --add-debug-info="emit-fake-use-for-arguments=true" %s | FileCheck %s --check-prefix=FAKE-USE -// RUN: fir-opt --add-debug-info="emit-fake-use-for-arguments=false" %s | FileCheck %s --check-prefix=NO-FAKE-USE +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=true" %s | FileCheck %s --check-prefix=FAKE-USE +// RUN: fir-opt --add-debug-info="emit-fake-use-for-debug-vars=false" %s | FileCheck %s --check-prefix=NO-FAKE-USE // FAKE-USE-LABEL: func.func @test_ // FAKE-USE: %[[UNDEF:.*]] = fir.undefined !fir.dscope @@ -31,13 +31,38 @@ // NO-FAKE-USE-NOT: fir.fake_use // NO-FAKE-USE: return +// FAKE-USE-LABEL: func.func @test_dynamic_array +// FAKE-USE: fircg.ext_declare %arg0(%[[COUNT:.*]]) origin %[[LB:.*]] dummy_scope +// FAKE-USE: fir.call @foo() : () -> () +// FAKE-USE: fir.fake_use %[[COUNT]] +// FAKE-USE: fir.fake_use %[[LB]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_dynamic_array +// NO-FAKE-USE: fircg.ext_declare %arg0(%{{.*}}) origin %{{.*}} dummy_scope +// NO-FAKE-USE: fir.call @foo() : () -> () +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + +// FAKE-USE-LABEL: func.func @test_char_array +// FAKE-USE: fircg.ext_declare %arg0 typeparams %[[LEN:.*]] dummy_scope +// FAKE-USE: fir.call @foo() : () -> () +// FAKE-USE: fir.fake_use %[[LEN]] +// FAKE-USE: return + +// NO-FAKE-USE-LABEL: func.func @test_char_array +// NO-FAKE-USE: fircg.ext_declare %arg0 typeparams %{{.*}} dummy_scope +// NO-FAKE-USE: fir.call @foo() : () -> () +// NO-FAKE-USE-NOT: fir.fake_use +// NO-FAKE-USE: return + #loc1 = loc("debug-fake-use.f90":1:1) #loc3 = loc("debug-fake-use.f90":3:14) #loc4 = loc("debug-fake-use.f90":4:14) #loc5 = loc("debug-fake-use.f90":5:1) #loc = loc("debug-fake-use.f90":0:0) -module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, f80 = dense<128> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i32 = dense<32> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, !llvm.ptr<270> = dense<32> : vector<4xi64>, i8 = dense<8> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, "dlti.stack_alignment" = 128 : i64, "dlti.mangling_mode" = "e", "dlti.endianness" = "little">, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "x86-64", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang", llvm.target_triple = "x86_64-unknown-linux-gnu"} { +module { func.func private @foo() func.func @test_(%arg0: !fir.ref {fir.bindc_name = "expected"} loc("debug-fake-use.f90":1:1)) attributes {fir.internal_name = "_QPtest"} { %0 = fir.undefined !fir.dscope loc(#loc1) @@ -53,4 +78,18 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64 fir.call @foo() : () -> () return loc(#loc5) } loc(#loc1) + + func.func @test_dynamic_array(%arg0: !fir.ref> {fir.bindc_name = "arr"} loc("debug-fake-use.f90":1:1), %arg1: index loc("debug-fake-use.f90":1:1), %arg2: index loc("debug-fake-use.f90":1:1)) attributes {fir.internal_name = "_QPtest_dynamic_array"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0(%arg1) origin %arg2 dummy_scope %0 arg 1 {uniq_name = "_QFtest_dynamic_arrayEarr"} : (!fir.ref>, index, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + return loc(#loc5) + } loc(#loc1) + + func.func @test_char_array(%arg0: !fir.ref> {fir.bindc_name = "str"} loc("debug-fake-use.f90":1:1), %arg1: index {fir.bindc_name = "n"} loc("debug-fake-use.f90":1:1)) attributes {fir.internal_name = "_QPtest_char_array"} { + %0 = fir.undefined !fir.dscope loc(#loc1) + %1 = fircg.ext_declare %arg0 typeparams %arg1 dummy_scope %0 arg 1 {uniq_name = "_QFtest_char_arrayEstr"} : (!fir.ref>, index, !fir.dscope) -> !fir.ref> loc(#loc3) + fir.call @foo() : () -> () + return loc(#loc5) + } loc(#loc1) } loc(#loc) From 8024bc3c90607a0c25a4e20bd89b609f8a240b76 Mon Sep 17 00:00:00 2001 From: Adam Siemieniuk Date: Mon, 22 Jun 2026 14:37:20 +0200 Subject: [PATCH 009/511] [mlir][x86] Fail on missing read source operation (#205077) Adds an extra check to AMX lowering to fail gracefully when a source operation for contraction input data is not found. --- .../VectorContractToAMXDotProduct.cpp | 4 ++ .../X86/AMX/vector-contract-to-tiled-dp.mlir | 69 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/mlir/lib/Dialect/X86/Transforms/VectorContractToAMXDotProduct.cpp b/mlir/lib/Dialect/X86/Transforms/VectorContractToAMXDotProduct.cpp index bfdeb51b65ef1..86291ae03f29e 100644 --- a/mlir/lib/Dialect/X86/Transforms/VectorContractToAMXDotProduct.cpp +++ b/mlir/lib/Dialect/X86/Transforms/VectorContractToAMXDotProduct.cpp @@ -1064,6 +1064,10 @@ struct VectorContractToAMXDotProduct vectorOpRhs = readOp.getBase().getDefiningOp(); }); + if (!vectorOpLhs || !vectorOpRhs) + return rewriter.notifyMatchFailure( + contractOp, "Failed to find LHS or RHS read source operation"); + // Retrive all the contaction operation within the loop. SmallVector ops; for (mlir::Operation &op : loopLists[0].getBody()->getOperations()) { diff --git a/mlir/test/Dialect/X86/AMX/vector-contract-to-tiled-dp.mlir b/mlir/test/Dialect/X86/AMX/vector-contract-to-tiled-dp.mlir index fbbd20202620b..71bf62c56a6e5 100644 --- a/mlir/test/Dialect/X86/AMX/vector-contract-to-tiled-dp.mlir +++ b/mlir/test/Dialect/X86/AMX/vector-contract-to-tiled-dp.mlir @@ -2009,3 +2009,72 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +!vecA = vector<1x16x32xbf16> +!vecB = vector<1x32x16xbf16> +!vecC = vector<16x16xf32> + +#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)> +#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)> +#map2 = affine_map<(d0, d1, d2, d3) -> (d1, d2)> + +func.func @negative_no_read_op_source(%arg0: memref<16x64x32xbf16>, %arg1: memref<16x32x128xbf16>, %arg2: memref<64x32xf32>) { + %0 = ub.poison : f32 + %1 = ub.poison : bf16 + %c0 = arith.constant 0 : index + %c64 = arith.constant 64 : index + %c16 = arith.constant 16 : index + %c32 = arith.constant 32 : index + %c1 = arith.constant 1 : index + scf.for %arg3 = %c0 to %c64 step %c32 { + %2 = vector.transfer_read %arg2[%arg3, %c0], %0 {in_bounds = [true, true]} : memref<64x32xf32>, !vecC + %3 = vector.transfer_read %arg2[%arg3, %c16], %0 {in_bounds = [true, true]} : memref<64x32xf32>, !vecC + %4 = arith.addi %arg3, %c16 : index + %5 = vector.transfer_read %arg2[%4, %c0], %0 {in_bounds = [true, true]} : memref<64x32xf32>, !vecC + %6 = vector.transfer_read %arg2[%4, %c16], %0 {in_bounds = [true, true]} : memref<64x32xf32>, !vecC + %7:4 = scf.for %arg4 = %c0 to %c16 step %c1 iter_args(%arg5 = %2, %arg6 = %3, %arg7 = %5, %arg8 = %6) -> (!vecC, !vecC, !vecC, !vecC) { + %8 = vector.transfer_read %arg0[%arg4, %arg3, %c0], %1 {in_bounds = [true, true, true]} + : memref<16x64x32xbf16>, !vecA + %9 = vector.transfer_read %arg0[%arg4, %4, %c0], %1 {in_bounds = [true, true, true]} + : memref<16x64x32xbf16>, !vecA + %10 = vector.transfer_read %arg1[%arg4, %c0, %c0], %1 {in_bounds = [true, true, true]} + : memref<16x32x128xbf16>, !vecB + %11 = vector.transfer_read %arg1[%arg4, %c0, %c16], %1 {in_bounds = [true, true, true]} + : memref<16x32x128xbf16>, !vecB + %12 = vector.contract {indexing_maps = [#map, #map1, #map2], iterator_types = + ["reduction", "parallel", "parallel", "reduction"], kind = #vector.kind} + %8, %10, %arg5 : !vecA, !vecB into !vecC + %13 = vector.contract {indexing_maps = [#map, #map1, #map2], iterator_types = + ["reduction", "parallel", "parallel", "reduction"], kind = #vector.kind} + %8, %11, %arg6 : !vecA, !vecB into !vecC + %14 = vector.contract {indexing_maps = [#map, #map1, #map2], iterator_types = + ["reduction", "parallel", "parallel", "reduction"], kind = #vector.kind} + %9, %10, %arg7 : !vecA, !vecB into !vecC + %15 = vector.contract {indexing_maps = [#map, #map1, #map2], iterator_types = + ["reduction", "parallel", "parallel", "reduction"], kind = #vector.kind} + %9, %11, %arg8 : !vecA, !vecB into !vecC + scf.yield %12, %13, %14, %15 : !vecC, !vecC, !vecC, !vecC + } + vector.transfer_write %7#3, %arg2[%4, %c16] {in_bounds = [true, true]} : !vecC, memref<64x32xf32> + vector.transfer_write %7#2, %arg2[%4, %c0] {in_bounds = [true, true]} : !vecC, memref<64x32xf32> + vector.transfer_write %7#1, %arg2[%arg3, %c16] {in_bounds = [true, true]} : !vecC, memref<64x32xf32> + vector.transfer_write %7#0, %arg2[%arg3, %c0] {in_bounds = [true, true]} : !vecC, memref<64x32xf32> + } + return +} + +// CHECK-LABEL: @negative_no_read_op_source +// CHECK-NOT: x86.amx +// CHECK: vector.contract + +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg0: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["func.func"]} in %arg0 : (!transform.any_op) -> !transform.any_op + transform.apply_patterns to %0 { + transform.apply_patterns.x86.vector_contract_to_amx_dot_product + } : !transform.any_op + transform.yield + } +} From c9daf042b70a9e629838b63dc5ab47353dd7382e Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Mon, 22 Jun 2026 14:40:51 +0200 Subject: [PATCH 010/511] Revert "[Allocator] Keep bump pointer at a minimum alignment" (#205091) Arithmetic on nullptr is UB and gets flagged by UBSan. Reverts llvm/llvm-project#203718 --- llvm/include/llvm/Support/Allocator.h | 78 ++++++++---------------- llvm/unittests/Support/AllocatorTest.cpp | 19 ------ 2 files changed, 27 insertions(+), 70 deletions(-) diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index e0185bb3a88e6..fffcbd9f3c1d8 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -59,16 +59,11 @@ LLVM_ABI void printBumpPtrAllocatorStats(unsigned NumSlabs, /// /// The GrowthDelay specifies after how many allocated slabs the allocator /// increases the size of the slabs. -/// -/// MinAlign keeps the bump pointer aligned between allocations: each size is -/// rounded up to a multiple of MinAlign so the fast path can skip realigning -/// CurPtr when the requested alignment is no greater than MinAlign. template + size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128> class BumpPtrAllocatorImpl - : public AllocatorBase>, + : public AllocatorBase>, private detail::AllocatorHolder { using AllocTy = detail::AllocatorHolder; @@ -80,10 +75,6 @@ class BumpPtrAllocatorImpl static_assert(GrowthDelay > 0, "GrowthDelay must be at least 1 which already increases the" "slab size after each allocated slab."); - static_assert(MinAlign > 0 && (MinAlign & (MinAlign - 1)) == 0, - "MinAlign must be a power of two"); - static_assert(MinAlign <= alignof(std::max_align_t), - "MinAlign must not exceed the alignment of fresh slabs"); BumpPtrAllocatorImpl() = default; @@ -159,33 +150,30 @@ class BumpPtrAllocatorImpl // Keep track of how many bytes we've allocated. BytesAllocated += Size; + uintptr_t AlignedPtr = alignAddr(CurPtr, Alignment); + size_t SizeToAllocate = Size; #if LLVM_ADDRESS_SANITIZER_BUILD // Add trailing bytes as a "red zone" under ASan. SizeToAllocate += RedZoneSize; #endif - SizeToAllocate = alignToPowerOf2(SizeToAllocate, MinAlign); - // CurPtr is already MinAlign-aligned, so only a stricter request realigns. - char *Ptr = CurPtr; - if (Alignment.value() > MinAlign) - Ptr = reinterpret_cast(alignAddr(Ptr, Alignment)); - char *NewCurPtr = Ptr + SizeToAllocate; - assert(NewCurPtr >= Ptr && "Alignment + Size must not overflow"); + uintptr_t AllocEndPtr = AlignedPtr + SizeToAllocate; + assert(AllocEndPtr >= uintptr_t(CurPtr) && + "Alignment + Size must not overflow"); // Check if we have enough space. - if (LLVM_LIKELY( - uintptr_t(NewCurPtr) <= uintptr_t(End) && - // We can't return nullptr even for a zero-sized allocation! - CurPtr != nullptr)) { - CurPtr = NewCurPtr; + if (LLVM_LIKELY(AllocEndPtr <= uintptr_t(End) + // We can't return nullptr even for a zero-sized allocation! + && CurPtr != nullptr)) { + CurPtr = reinterpret_cast(AllocEndPtr); // Update the allocation point of this memory block in MemorySanitizer. // Without this, MemorySanitizer messages for values originated from here // will point to the allocation of the entire slab. - __msan_allocated_memory(Ptr, Size); + __msan_allocated_memory(reinterpret_cast(AlignedPtr), Size); // Similarly, tell ASan about this space. - __asan_unpoison_memory_region(Ptr, Size); - return Ptr; + __asan_unpoison_memory_region(reinterpret_cast(AlignedPtr), Size); + return reinterpret_cast(AlignedPtr); } return AllocateSlow(Size, SizeToAllocate, Alignment); @@ -400,13 +388,7 @@ using BumpPtrAllocator = BumpPtrAllocatorImpl<>; /// This allows calling the destructor in DestroyAll() and when the allocator is /// destroyed. template class SpecificBumpPtrAllocator { - // DestroyAll() walks objects at a fixed sizeof(T) stride, so it needs tight - // packing: MinAlign=1 disables the size rounding. (alignof(T) would pack just - // as tightly and reuse the default instantiation, but T may be incomplete - // here, e.g. SpecificBumpPtrAllocator.) - using BumpPtrAllocatorTy = - BumpPtrAllocatorImpl; - BumpPtrAllocatorTy Allocator; + BumpPtrAllocator Allocator; public: SpecificBumpPtrAllocator() { @@ -435,7 +417,7 @@ template class SpecificBumpPtrAllocator { for (auto I = Allocator.Slabs.begin(), E = Allocator.Slabs.end(); I != E; ++I) { - size_t AllocatedSlabSize = BumpPtrAllocatorTy::computeSlabSize( + size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( std::distance(Allocator.Slabs.begin(), I)); char *Begin = (char *)alignAddr(*I, Align::Of()); char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr @@ -455,14 +437,7 @@ template class SpecificBumpPtrAllocator { } /// Allocate space for an array of objects without constructing them. - T *Allocate(size_t num = 1) { - // Slabs are max_align_t-aligned and every size is a multiple of alignof(T), - // so the bump pointer is already alignof(T)-aligned. Request alignment 1 so - // the fast path skips realigning CurPtr; over-aligned T still needs it. - if constexpr (alignof(T) <= alignof(std::max_align_t)) - return static_cast(Allocator.Allocate(num * sizeof(T), Align())); - return Allocator.Allocate(num); - } + T *Allocate(size_t num = 1) { return Allocator.Allocate(num); } /// \return An index uniquely and reproducibly identifying /// an input pointer \p Ptr in the given allocator. @@ -475,19 +450,20 @@ template class SpecificBumpPtrAllocator { } // end namespace llvm template + size_t GrowthDelay> void * operator new(size_t Size, llvm::BumpPtrAllocatorImpl &Allocator) { - return Allocator.Allocate( - Size, std::min(llvm::bit_ceil(Size), alignof(std::max_align_t))); + GrowthDelay> &Allocator) { + return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size), + alignof(std::max_align_t))); } template -void operator delete( - void *, llvm::BumpPtrAllocatorImpl &) {} + size_t GrowthDelay> +void operator delete(void *, + llvm::BumpPtrAllocatorImpl &) { +} #endif // LLVM_SUPPORT_ALLOCATOR_H diff --git a/llvm/unittests/Support/AllocatorTest.cpp b/llvm/unittests/Support/AllocatorTest.cpp index d6f80e0948dc4..2337f34143bad 100644 --- a/llvm/unittests/Support/AllocatorTest.cpp +++ b/llvm/unittests/Support/AllocatorTest.cpp @@ -279,23 +279,4 @@ TEST(AllocatorTest, TestBigAlignment) { EXPECT_GT(MockSlabAllocator::GetLastSlabSize(), 4096u); } -// Over-aligned element type: Allocate() honors alignof(T) > MinAlign and -// DestroyAll() runs every destructor. -TEST(AllocatorTest, TestOverAlignedSpecific) { - unsigned NumDtorCalls = 0; - struct alignas(32) S { - unsigned *Calls; - ~S() { ++*Calls; } - }; - { - SpecificBumpPtrAllocator Alloc; - for (int I = 0; I != 4; ++I) { - S *P = Alloc.Allocate(); - EXPECT_EQ(0u, reinterpret_cast(P) & 31u); - P->Calls = &NumDtorCalls; - } - } - EXPECT_EQ(4u, NumDtorCalls); -} - } // anonymous namespace From 9ebf01e8aca41d7d4150d93795a96b1185b6d5af Mon Sep 17 00:00:00 2001 From: Sean Perry Date: Mon, 22 Jun 2026 09:18:40 -0400 Subject: [PATCH 011/511] Don't test JIT's on z/OS (#196580) The JIT's aren't supported on z/OS so no need to run the unit tests on them. --- llvm/unittests/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/unittests/CMakeLists.txt b/llvm/unittests/CMakeLists.txt index d2932520bbb80..5d507d577bb94 100644 --- a/llvm/unittests/CMakeLists.txt +++ b/llvm/unittests/CMakeLists.txt @@ -41,7 +41,9 @@ add_subdirectory(DebugInfo) add_subdirectory(Debuginfod) add_subdirectory(Demangle) add_subdirectory(DWARFLinkerParallel) +if (NOT ZOS) add_subdirectory(ExecutionEngine) +endif() add_subdirectory(FileCheck) add_subdirectory(Frontend) add_subdirectory(FuzzMutate) From 81ab2db77562bcef9b93525dff4bf5849c8e55b9 Mon Sep 17 00:00:00 2001 From: Akshay Kumar Dubey <123586645+akshaydubey05@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:11:22 +0530 Subject: [PATCH 012/511] [clangd] Log environment variables that influence compilation at startup (#204990) When users face missing system include issues (especially on Windows), it's difficult to diagnose whether the problem is caused by missing environment variables like `INCLUDE`, `CPATH`, etc. This patch logs the values of environment variables that influence how the compiler finds headers and libraries, at startup alongside the existing version/PID/argv logs. Only variables that are actually set are printed. Variables logged: - MSVC (set by vcvarsall.bat): `INCLUDE`, `LIB`, `LIBPATH`, `CL`, `_CL_` - GCC/Clang: `CPATH`, `C_INCLUDE_PATH`, `CPLUS_INCLUDE_PATH`, `OBJC_INCLUDE_PATH`, `LIBRARY_PATH`, `GCC_EXEC_PREFIX` Fixes https://github.com/clangd/clangd/issues/2657 --- clang-tools-extra/clangd/tool/ClangdMain.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp index 549b1ae8d7980..13fe4d3911731 100644 --- a/clang-tools-extra/clangd/tool/ClangdMain.cpp +++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp @@ -912,6 +912,26 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var log("argv[{0}]: {1}", I, argv[I]); if (auto EnvFlags = llvm::sys::Process::GetEnv(FlagsEnvVar)) log("{0}: {1}", FlagsEnvVar, *EnvFlags); + // Log environment variables that influence how clangd finds system headers. + // This helps diagnose missing-include issues, especially on Windows. + for (const char *EnvVar : { + // MSVC environment variables (set by vcvarsall.bat) + "INCLUDE", + "LIB", + "LIBPATH", + "CL", + "_CL_", + // GCC/Clang environment variables + "CPATH", + "C_INCLUDE_PATH", + "CPLUS_INCLUDE_PATH", + "OBJC_INCLUDE_PATH", + "LIBRARY_PATH", + "GCC_EXEC_PREFIX", + }) { + if (auto Val = llvm::sys::Process::GetEnv(EnvVar)) + log("Env {0}: {1}", EnvVar, *Val); + } ClangdLSPServer::Options Opts; Opts.UseDirBasedCDB = (CompileArgsFrom == FilesystemCompileArgs); From 54e5b7e64581e298a554f0faec19030e535ab216 Mon Sep 17 00:00:00 2001 From: Reinhard Stahn Date: Mon, 22 Jun 2026 15:50:48 +0200 Subject: [PATCH 013/511] [mlir][scf] Tighten description of scf.for (#203713) The previous description was vague in the case that the one-past-the-last value for the induction variable is not representable in the used integer type. Current passes implicitly exploit this by implementing non-equivalent semantics (e.g. terminating vs non-terminating loops). We tighten the specification of `scf.for` by first stating the desired ideal semantics, but deeming overflow undefined behavior. This fixes all inconsistencies I am aware of. Documentation only; no behavioral change. --- mlir/include/mlir/Dialect/SCF/IR/SCFOps.td | 34 +++++++++++++--------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td index 0b33ecb48b7f2..c0d1ac501cc77 100644 --- a/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td +++ b/mlir/include/mlir/Dialect/SCF/IR/SCFOps.td @@ -168,19 +168,27 @@ def ForOp : SCF_Op<"for", RecursiveMemoryEffects]> { let summary = "for operation"; let description = [{ - The `scf.for` operation represents a loop taking 3 SSA value as operands - that represent the lower bound, upper bound and step respectively. The - operation defines an SSA value for its induction variable. It has one - region capturing the loop body. The induction variable is represented as an - argument of this region. This SSA value is a signless integer or index. - The step is a value of same type but required to be positive, the lower and - upper bounds can be also negative or zero. The lower and upper bounds - specify a half-open range: the iteration is executed iff the comparison of - induction variable value is less than the upper bound and bigger or equal - to the lower bound. - - By default, the integer comparison is signed. If the `unsignedCmp` unit - attribute is specified, the integer comparison is unsigned. + The `scf.for` operation represents a loop whose first three operands are the + lower bound, upper bound and step respectively. The operation has one region + capturing the loop body. The induction variable is represented as an + argument of this region. + + Lower bound, upper bound, and step are interpreted as signed integers by + default, or as unsigned integers if the `unsignedCmp` unit attribute is + present. The step is required to be strictly positive. + + The lower and upper bounds specify a half-open range, including the lower + bound but excluding the upper bound. More precisely, the semantics is + governed by the following two rules, where arithmetic is performed with + arbitrary precision: + + 1. The trip count `n` is `max(0, ceil((UB - LB) / Step))` and the induction + variable takes the values `LB + j*Step` for `j = 0, ..., n - 1` in that + order. + 2. No-overflow condition: `LB + n*Step` must be representable in the type of + the induction variable. Otherwise the behavior is undefined. Leaving this + case undefined lets the loop be lowered to a plain increment-and-compare + on a single induction register without having to account for wraparound. The body region must contain exactly one block that terminates with `scf.yield`. Calling ForOp::build will create such a region and insert From 980010051721e0f35d79f33b3034064beda27aee Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 22 Jun 2026 14:58:10 +0100 Subject: [PATCH 014/511] [AMDGPU] Remove support for ADDC/ADDE/SUBC/SUBE DAG nodes (#205082) Since #204694 we should be using UADDO/USUBO/UADDO_CARRY/USUBO_CARRY instead. --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 85 -- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 +- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 5 - llvm/lib/Target/AMDGPU/SOPInstructions.td | 13 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 3 - .../AMDGPU/amdgpu-codegenprepare-idiv.ll | 812 ++++++------ llvm/test/CodeGen/AMDGPU/bypass-div.ll | 516 ++++---- .../test/CodeGen/AMDGPU/carryout-selection.ll | 406 +++--- llvm/test/CodeGen/AMDGPU/llvm.mulo.ll | 52 +- llvm/test/CodeGen/AMDGPU/sdiv64.ll | 391 +++--- llvm/test/CodeGen/AMDGPU/srem.ll | 1102 ++++++++--------- llvm/test/CodeGen/AMDGPU/srem64.ll | 485 ++++---- llvm/test/CodeGen/AMDGPU/udiv64.ll | 205 +-- llvm/test/CodeGen/AMDGPU/urem64.ll | 261 ++-- llvm/test/CodeGen/AMDGPU/wave32.ll | 56 +- 15 files changed, 2148 insertions(+), 2248 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ddd8b9e96d2d6..81053507b1ef5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -691,19 +691,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { switch (Opc) { default: break; - // We are selecting i64 ADD here instead of custom lower it during - // DAG legalization, so we can fold some i64 ADDs used for address - // calculation into the LOAD and STORE instructions. - case ISD::ADDC: - case ISD::ADDE: - case ISD::SUBC: - case ISD::SUBE: { - if (N->getValueType(0) != MVT::i64) - break; - - SelectADD_SUB_I64(N); - return; - } case ISD::UADDO_CARRY: case ISD::USUBO_CARRY: if (N->getValueType(0) == MVT::i64) { @@ -1057,78 +1044,6 @@ SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, return SDValue(Mov, 0); } -// Keep this as a fallback for i64 ADDC/ADDE/SUBC/SUBE glue nodes. Wide integer -// add/sub should normally expand through the explicit carry nodes handled in -// SelectAddcSubbI64. -void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { - SDLoc DL(N); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - - unsigned Opcode = N->getOpcode(); - bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); - bool ProduceCarry = - ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; - bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; - - SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); - SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); - - SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, LHS, Sub0); - SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, LHS, Sub1); - - SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, RHS, Sub0); - SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, RHS, Sub1); - - SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); - - static const unsigned OpcMap[2][2][2] = { - {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32}, - {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}}, - {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32}, - {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}}; - - unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd]; - unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd]; - - SDNode *AddLo; - if (!ConsumeCarry) { - SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; - AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); - } else { - SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; - AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); - } - SDValue AddHiArgs[] = { - SDValue(Hi0, 0), - SDValue(Hi1, 0), - SDValue(AddLo, 1) - }; - SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); - - SDValue RegSequenceArgs[] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), - SDValue(AddLo,0), - Sub0, - SDValue(AddHi,0), - Sub1, - }; - SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, - MVT::i64, RegSequenceArgs); - - if (ProduceCarry) { - // Replace the carry-use - ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); - } - - // Replace the remaining uses. - ReplaceNode(N, RegSequence); -} - void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 7c614670c5dfa..be6fd4d243252 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -492,8 +492,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand); - // AMDGPU uses ADDC/SUBC/ADDE/SUBE - setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal); + setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, + Expand); } // The hardware supports 32-bit FSHR, but not FSHL. diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index ae8e73eb5f329..a3825ac909881 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -190,11 +190,6 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM, setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); - const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; - for (MVT VT : ScalarIntVTs) - setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, - Expand); - // LLVM will expand these to atomic_cmp_swap(0) // and atomic_swap, respectively. setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand); diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index 01dd444c477e2..b28195be51733 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -700,12 +700,10 @@ def S_SUB_I32 : SOP2_32 <"s_sub_i32", let Uses = [SCC] in { // Carry in comes from SCC let isCommutable = 1 in { -def S_ADDC_U32 : SOP2_32 <"s_addc_u32", - [(set i32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>; +def S_ADDC_U32 : SOP2_32 <"s_addc_u32">; } // End isCommutable = 1 -def S_SUBB_U32 : SOP2_32 <"s_subb_u32", - [(set i32:$sdst, (UniformBinFrag (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>; +def S_SUBB_U32 : SOP2_32 <"s_subb_u32">; } // End Uses = [SCC] let isCommutable = 1 in { @@ -2072,13 +2070,6 @@ let AddedComplexity = 20 in { >; } -// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector -// case, the sgpr-copies pass will fix this to use the vector version. -def : GCNPat < - (i32 (addc i32:$src0, i32:$src1)), - (S_ADD_U32 $src0, $src1) ->; - // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple // outputs. diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index 74864e10bf4a5..bcf2ca6e8729e 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -1040,9 +1040,6 @@ def : DivergentClampingBinOp; def : DivergentClampingBinOp; } -def : DivergentBinOp; -def : DivergentBinOp; - class divergent_i64_BinOp : GCNPat< (DivergentBinFrag vt:$src0, vt:$src1), diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll index d83bb728e47f9..4a33008661b8e 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -7681,6 +7681,7 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd ; GFX6-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; GFX6-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) ; GFX6-NEXT: s_lshl_b64 s[0:1], 0x1000, s0 ; GFX6-NEXT: s_ashr_i32 s8, s1, 31 @@ -7702,60 +7703,60 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_hi_u32 v2, s0, v0 ; GFX6-NEXT: v_readfirstlane_b32 s12, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s13, s0, s12 -; GFX6-NEXT: v_readfirstlane_b32 s16, v2 -; GFX6-NEXT: s_mul_i32 s14, s1, s2 -; GFX6-NEXT: s_mul_i32 s15, s0, s2 -; GFX6-NEXT: s_add_i32 s13, s16, s13 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s15 -; GFX6-NEXT: s_add_i32 s13, s13, s14 -; GFX6-NEXT: v_mul_hi_u32 v0, v0, s13 -; GFX6-NEXT: v_mul_hi_u32 v4, v1, s15 -; GFX6-NEXT: v_readfirstlane_b32 s14, v3 -; GFX6-NEXT: s_mul_i32 s16, s2, s13 -; GFX6-NEXT: v_mul_hi_u32 v1, v1, s13 -; GFX6-NEXT: s_add_u32 s14, s14, s16 +; GFX6-NEXT: v_readfirstlane_b32 s13, v0 +; GFX6-NEXT: s_mul_i32 s14, s0, s12 +; GFX6-NEXT: v_readfirstlane_b32 s17, v2 +; GFX6-NEXT: s_mul_i32 s15, s1, s13 +; GFX6-NEXT: s_mul_i32 s16, s0, s13 +; GFX6-NEXT: s_add_i32 s14, s17, s14 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s16 +; GFX6-NEXT: s_add_i32 s14, s14, s15 +; GFX6-NEXT: v_mul_hi_u32 v0, v0, s14 +; GFX6-NEXT: v_mul_hi_u32 v4, v1, s16 +; GFX6-NEXT: v_readfirstlane_b32 s15, v3 +; GFX6-NEXT: s_mul_i32 s17, s13, s14 +; GFX6-NEXT: s_add_u32 s15, s15, s17 +; GFX6-NEXT: v_readfirstlane_b32 s17, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, v1, s14 +; GFX6-NEXT: s_mul_i32 s16, s12, s16 +; GFX6-NEXT: s_addc_u32 s17, 0, s17 +; GFX6-NEXT: v_readfirstlane_b32 s18, v4 +; GFX6-NEXT: s_add_u32 s15, s15, s16 +; GFX6-NEXT: s_addc_u32 s15, s17, s18 ; GFX6-NEXT: v_readfirstlane_b32 s16, v0 -; GFX6-NEXT: s_mul_i32 s15, s12, s15 -; GFX6-NEXT: s_addc_u32 s16, 0, s16 -; GFX6-NEXT: v_readfirstlane_b32 s17, v4 -; GFX6-NEXT: s_add_u32 s14, s14, s15 -; GFX6-NEXT: s_addc_u32 s14, s16, s17 +; GFX6-NEXT: s_addc_u32 s16, s16, 0 +; GFX6-NEXT: s_mul_i32 s14, s12, s14 +; GFX6-NEXT: s_add_u32 s14, s15, s14 +; GFX6-NEXT: s_addc_u32 s15, 0, s16 +; GFX6-NEXT: s_add_u32 s13, s13, s14 +; GFX6-NEXT: v_mov_b32_e32 v1, s13 +; GFX6-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX6-NEXT: s_addc_u32 s12, s12, s15 +; GFX6-NEXT: s_mul_i32 s14, s0, s13 +; GFX6-NEXT: s_mul_i32 s0, s0, s12 ; GFX6-NEXT: v_readfirstlane_b32 s15, v1 -; GFX6-NEXT: s_addc_u32 s15, s15, 0 -; GFX6-NEXT: s_mul_i32 s13, s12, s13 -; GFX6-NEXT: s_add_u32 s13, s14, s13 -; GFX6-NEXT: s_addc_u32 s14, 0, s15 -; GFX6-NEXT: s_add_u32 s13, s2, s13 -; GFX6-NEXT: v_mov_b32_e32 v0, s13 -; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX6-NEXT: s_addc_u32 s12, s12, s14 -; GFX6-NEXT: s_mul_i32 s14, s0, s12 +; GFX6-NEXT: s_add_i32 s0, s15, s0 ; GFX6-NEXT: s_mul_i32 s1, s1, s13 -; GFX6-NEXT: v_readfirstlane_b32 s15, v0 -; GFX6-NEXT: s_add_i32 s14, s15, s14 -; GFX6-NEXT: s_mul_i32 s0, s0, s13 -; GFX6-NEXT: s_add_i32 s1, s14, s1 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v0, s1 -; GFX6-NEXT: v_mul_hi_u32 v3, s12, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s13, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s12, v0 +; GFX6-NEXT: v_mov_b32_e32 v0, s14 +; GFX6-NEXT: s_add_i32 s0, s0, s1 +; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 ; GFX6-NEXT: v_mul_hi_u32 v0, s13, v0 -; GFX6-NEXT: s_mul_i32 s15, s13, s1 -; GFX6-NEXT: v_readfirstlane_b32 s17, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, s13, v1 +; GFX6-NEXT: s_mul_i32 s15, s13, s0 +; GFX6-NEXT: s_mul_i32 s14, s12, s14 +; GFX6-NEXT: v_readfirstlane_b32 s17, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s12, v1 +; GFX6-NEXT: v_readfirstlane_b32 s16, v3 ; GFX6-NEXT: s_add_u32 s15, s17, s15 -; GFX6-NEXT: v_readfirstlane_b32 s16, v0 -; GFX6-NEXT: s_mul_i32 s0, s12, s0 ; GFX6-NEXT: s_addc_u32 s16, 0, s16 -; GFX6-NEXT: v_readfirstlane_b32 s14, v3 -; GFX6-NEXT: s_add_u32 s0, s15, s0 -; GFX6-NEXT: s_addc_u32 s0, s16, s14 -; GFX6-NEXT: v_readfirstlane_b32 s14, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v2 +; GFX6-NEXT: s_add_u32 s14, s15, s14 +; GFX6-NEXT: s_addc_u32 s1, s16, s1 +; GFX6-NEXT: v_readfirstlane_b32 s14, v0 ; GFX6-NEXT: s_addc_u32 s14, s14, 0 -; GFX6-NEXT: s_mul_i32 s1, s12, s1 -; GFX6-NEXT: s_add_u32 s0, s0, s1 +; GFX6-NEXT: s_mul_i32 s0, s12, s0 +; GFX6-NEXT: s_add_u32 s0, s1, s0 ; GFX6-NEXT: s_addc_u32 s1, 0, s14 ; GFX6-NEXT: s_add_u32 s14, s13, s0 ; GFX6-NEXT: s_addc_u32 s15, s12, s1 @@ -7829,7 +7830,6 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_sub_u32 s4, s4, s6 ; GFX6-NEXT: s_subb_u32 s5, s5, s7 ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -7870,37 +7870,37 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s13, s11, s12 ; GFX9-NEXT: s_add_u32 s14, s14, s16 ; GFX9-NEXT: s_addc_u32 s13, 0, s13 -; GFX9-NEXT: s_mul_hi_u32 s17, s10, s15 +; GFX9-NEXT: s_mul_hi_u32 s16, s10, s15 ; GFX9-NEXT: s_mul_i32 s15, s10, s15 ; GFX9-NEXT: s_add_u32 s14, s14, s15 -; GFX9-NEXT: s_mul_hi_u32 s16, s10, s12 -; GFX9-NEXT: s_addc_u32 s13, s13, s17 -; GFX9-NEXT: s_addc_u32 s14, s16, 0 +; GFX9-NEXT: s_addc_u32 s13, s13, s16 +; GFX9-NEXT: s_mul_hi_u32 s14, s10, s12 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 ; GFX9-NEXT: s_mul_i32 s12, s10, s12 ; GFX9-NEXT: s_add_u32 s12, s13, s12 ; GFX9-NEXT: s_addc_u32 s13, 0, s14 ; GFX9-NEXT: s_add_u32 s11, s11, s12 ; GFX9-NEXT: s_addc_u32 s10, s10, s13 -; GFX9-NEXT: s_mul_i32 s12, s4, s10 -; GFX9-NEXT: s_mul_hi_u32 s13, s4, s11 -; GFX9-NEXT: s_add_i32 s12, s13, s12 +; GFX9-NEXT: s_mul_i32 s12, s4, s11 +; GFX9-NEXT: s_mul_i32 s15, s4, s10 +; GFX9-NEXT: s_mul_hi_u32 s4, s4, s11 +; GFX9-NEXT: s_add_i32 s4, s4, s15 ; GFX9-NEXT: s_mul_i32 s5, s5, s11 -; GFX9-NEXT: s_add_i32 s12, s12, s5 -; GFX9-NEXT: s_mul_i32 s4, s4, s11 -; GFX9-NEXT: s_mul_hi_u32 s13, s10, s4 -; GFX9-NEXT: s_mul_i32 s14, s10, s4 -; GFX9-NEXT: s_mul_i32 s16, s11, s12 -; GFX9-NEXT: s_mul_hi_u32 s4, s11, s4 -; GFX9-NEXT: s_mul_hi_u32 s15, s11, s12 -; GFX9-NEXT: s_add_u32 s4, s4, s16 -; GFX9-NEXT: s_addc_u32 s15, 0, s15 -; GFX9-NEXT: s_add_u32 s4, s4, s14 -; GFX9-NEXT: s_mul_hi_u32 s5, s10, s12 -; GFX9-NEXT: s_addc_u32 s4, s15, s13 -; GFX9-NEXT: s_addc_u32 s5, s5, 0 -; GFX9-NEXT: s_mul_i32 s12, s10, s12 -; GFX9-NEXT: s_add_u32 s4, s4, s12 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_mul_hi_u32 s13, s10, s12 +; GFX9-NEXT: s_mul_i32 s14, s10, s12 +; GFX9-NEXT: s_mul_i32 s15, s11, s4 +; GFX9-NEXT: s_mul_hi_u32 s12, s11, s12 +; GFX9-NEXT: s_mul_hi_u32 s5, s11, s4 +; GFX9-NEXT: s_add_u32 s12, s12, s15 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_add_u32 s12, s12, s14 +; GFX9-NEXT: s_addc_u32 s5, s5, s13 +; GFX9-NEXT: s_mul_hi_u32 s12, s10, s4 +; GFX9-NEXT: s_addc_u32 s12, s12, 0 +; GFX9-NEXT: s_mul_i32 s4, s10, s4 +; GFX9-NEXT: s_add_u32 s4, s5, s4 +; GFX9-NEXT: s_addc_u32 s5, 0, s12 ; GFX9-NEXT: s_add_u32 s11, s11, s4 ; GFX9-NEXT: s_addc_u32 s10, s10, s5 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -7914,12 +7914,12 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s12, s2, s10 ; GFX9-NEXT: s_add_u32 s13, s14, s13 ; GFX9-NEXT: s_addc_u32 s12, 0, s12 -; GFX9-NEXT: s_mul_hi_u32 s15, s3, s11 +; GFX9-NEXT: s_mul_hi_u32 s14, s3, s11 ; GFX9-NEXT: s_mul_i32 s11, s3, s11 ; GFX9-NEXT: s_add_u32 s11, s13, s11 -; GFX9-NEXT: s_mul_hi_u32 s14, s3, s10 -; GFX9-NEXT: s_addc_u32 s11, s12, s15 -; GFX9-NEXT: s_addc_u32 s12, s14, 0 +; GFX9-NEXT: s_addc_u32 s11, s12, s14 +; GFX9-NEXT: s_mul_hi_u32 s12, s3, s10 +; GFX9-NEXT: s_addc_u32 s12, s12, 0 ; GFX9-NEXT: s_mul_i32 s10, s3, s10 ; GFX9-NEXT: s_add_u32 s13, s11, s10 ; GFX9-NEXT: s_addc_u32 s12, 0, s12 @@ -8218,34 +8218,34 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: s_add_u32 s16, s17, s16 ; GFX6-NEXT: s_addc_u32 s17, 0, s18 ; GFX6-NEXT: s_add_u32 s15, s15, s16 -; GFX6-NEXT: v_mov_b32_e32 v0, s15 -; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s15 +; GFX6-NEXT: v_mul_hi_u32 v1, s12, v1 ; GFX6-NEXT: s_addc_u32 s14, s14, s17 -; GFX6-NEXT: s_mul_i32 s16, s12, s14 +; GFX6-NEXT: s_mul_i32 s16, s12, s15 +; GFX6-NEXT: s_mul_i32 s12, s12, s14 +; GFX6-NEXT: v_readfirstlane_b32 s17, v1 +; GFX6-NEXT: s_add_i32 s12, s17, s12 ; GFX6-NEXT: s_mul_i32 s13, s13, s15 -; GFX6-NEXT: v_readfirstlane_b32 s17, v0 -; GFX6-NEXT: s_add_i32 s16, s17, s16 -; GFX6-NEXT: s_mul_i32 s12, s12, s15 -; GFX6-NEXT: s_add_i32 s13, s16, s13 -; GFX6-NEXT: v_mov_b32_e32 v2, s12 -; GFX6-NEXT: v_mov_b32_e32 v0, s13 -; GFX6-NEXT: v_mul_hi_u32 v3, s14, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s15, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s14, v0 +; GFX6-NEXT: v_mov_b32_e32 v0, s16 +; GFX6-NEXT: s_add_i32 s12, s12, s13 +; GFX6-NEXT: v_mul_hi_u32 v2, s14, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s12 ; GFX6-NEXT: v_mul_hi_u32 v0, s15, v0 -; GFX6-NEXT: s_mul_i32 s17, s15, s13 -; GFX6-NEXT: v_readfirstlane_b32 s19, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, s15, v1 +; GFX6-NEXT: s_mul_i32 s17, s15, s12 +; GFX6-NEXT: s_mul_i32 s16, s14, s16 +; GFX6-NEXT: v_readfirstlane_b32 s19, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s14, v1 +; GFX6-NEXT: v_readfirstlane_b32 s18, v3 ; GFX6-NEXT: s_add_u32 s17, s19, s17 -; GFX6-NEXT: v_readfirstlane_b32 s18, v0 -; GFX6-NEXT: s_mul_i32 s12, s14, s12 ; GFX6-NEXT: s_addc_u32 s18, 0, s18 -; GFX6-NEXT: v_readfirstlane_b32 s16, v3 -; GFX6-NEXT: s_add_u32 s12, s17, s12 -; GFX6-NEXT: s_addc_u32 s12, s18, s16 -; GFX6-NEXT: v_readfirstlane_b32 s16, v1 +; GFX6-NEXT: v_readfirstlane_b32 s13, v2 +; GFX6-NEXT: s_add_u32 s16, s17, s16 +; GFX6-NEXT: s_addc_u32 s13, s18, s13 +; GFX6-NEXT: v_readfirstlane_b32 s16, v0 ; GFX6-NEXT: s_addc_u32 s16, s16, 0 -; GFX6-NEXT: s_mul_i32 s13, s14, s13 -; GFX6-NEXT: s_add_u32 s12, s12, s13 +; GFX6-NEXT: s_mul_i32 s12, s14, s12 +; GFX6-NEXT: s_add_u32 s12, s13, s12 ; GFX6-NEXT: s_addc_u32 s13, 0, s16 ; GFX6-NEXT: s_add_u32 s15, s15, s12 ; GFX6-NEXT: s_addc_u32 s14, s14, s13 @@ -8334,65 +8334,65 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_hi_u32 v2, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s12, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: s_mul_i32 s13, s2, s12 +; GFX6-NEXT: v_readfirstlane_b32 s13, v1 +; GFX6-NEXT: v_readfirstlane_b32 s12, v0 +; GFX6-NEXT: s_mul_i32 s1, s2, s13 ; GFX6-NEXT: v_readfirstlane_b32 s16, v2 -; GFX6-NEXT: s_mul_i32 s1, s3, s0 -; GFX6-NEXT: s_add_i32 s13, s16, s13 -; GFX6-NEXT: s_add_i32 s13, s13, s1 -; GFX6-NEXT: s_mul_i32 s1, s2, s0 -; GFX6-NEXT: v_mul_hi_u32 v2, v0, s13 -; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1 -; GFX6-NEXT: s_mul_i32 s16, s0, s13 -; GFX6-NEXT: v_readfirstlane_b32 s17, v2 +; GFX6-NEXT: s_mul_i32 s0, s3, s12 +; GFX6-NEXT: s_add_i32 s1, s16, s1 +; GFX6-NEXT: s_add_i32 s16, s1, s0 +; GFX6-NEXT: s_mul_i32 s17, s2, s12 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, s16 +; GFX6-NEXT: v_mul_hi_u32 v0, v0, s17 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: s_mul_i32 s4, s12, s16 +; GFX6-NEXT: v_readfirstlane_b32 s5, v2 ; GFX6-NEXT: v_readfirstlane_b32 s18, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, s1 -; GFX6-NEXT: v_mul_hi_u32 v1, v1, s13 -; GFX6-NEXT: s_add_u32 s16, s18, s16 -; GFX6-NEXT: s_addc_u32 s17, 0, s17 -; GFX6-NEXT: s_mul_i32 s1, s12, s1 +; GFX6-NEXT: v_mul_hi_u32 v0, v1, s17 +; GFX6-NEXT: v_mul_hi_u32 v1, v1, s16 +; GFX6-NEXT: s_add_u32 s4, s18, s4 +; GFX6-NEXT: s_addc_u32 s5, 0, s5 +; GFX6-NEXT: s_mul_i32 s17, s13, s17 ; GFX6-NEXT: v_readfirstlane_b32 s18, v0 -; GFX6-NEXT: s_add_u32 s1, s16, s1 -; GFX6-NEXT: s_addc_u32 s1, s17, s18 -; GFX6-NEXT: v_readfirstlane_b32 s16, v1 -; GFX6-NEXT: s_addc_u32 s16, s16, 0 -; GFX6-NEXT: s_mul_i32 s13, s12, s13 -; GFX6-NEXT: s_add_u32 s1, s1, s13 -; GFX6-NEXT: s_addc_u32 s13, 0, s16 -; GFX6-NEXT: s_add_u32 s16, s0, s1 -; GFX6-NEXT: v_mov_b32_e32 v0, s16 -; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 -; GFX6-NEXT: s_addc_u32 s4, s12, s13 -; GFX6-NEXT: s_mul_i32 s5, s2, s4 -; GFX6-NEXT: v_readfirstlane_b32 s12, v0 -; GFX6-NEXT: s_add_i32 s5, s12, s5 -; GFX6-NEXT: s_mul_i32 s3, s3, s16 -; GFX6-NEXT: s_mul_i32 s2, s2, s16 -; GFX6-NEXT: s_add_i32 s3, s5, s3 -; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s3 -; GFX6-NEXT: v_mul_hi_u32 v3, s4, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s16, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s16, v0 -; GFX6-NEXT: s_mul_i32 s12, s16, s3 -; GFX6-NEXT: v_readfirstlane_b32 s17, v2 -; GFX6-NEXT: s_add_u32 s12, s17, s12 -; GFX6-NEXT: v_readfirstlane_b32 s13, v0 -; GFX6-NEXT: s_mul_i32 s2, s4, s2 -; GFX6-NEXT: s_addc_u32 s13, 0, s13 -; GFX6-NEXT: v_readfirstlane_b32 s5, v3 -; GFX6-NEXT: s_add_u32 s2, s12, s2 -; GFX6-NEXT: s_addc_u32 s2, s13, s5 +; GFX6-NEXT: s_add_u32 s4, s4, s17 +; GFX6-NEXT: s_addc_u32 s4, s5, s18 ; GFX6-NEXT: v_readfirstlane_b32 s5, v1 ; GFX6-NEXT: s_addc_u32 s5, s5, 0 -; GFX6-NEXT: s_mul_i32 s3, s4, s3 -; GFX6-NEXT: s_add_u32 s2, s2, s3 -; GFX6-NEXT: s_addc_u32 s3, 0, s5 -; GFX6-NEXT: s_add_u32 s12, s16, s2 -; GFX6-NEXT: s_addc_u32 s13, s4, s3 +; GFX6-NEXT: s_mul_i32 s16, s13, s16 +; GFX6-NEXT: s_add_u32 s4, s4, s16 +; GFX6-NEXT: s_addc_u32 s5, 0, s5 +; GFX6-NEXT: s_add_u32 s4, s12, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-NEXT: v_mul_hi_u32 v1, s2, v1 +; GFX6-NEXT: s_addc_u32 s5, s13, s5 +; GFX6-NEXT: s_mul_i32 s12, s2, s4 +; GFX6-NEXT: s_mul_i32 s2, s2, s5 +; GFX6-NEXT: v_readfirstlane_b32 s13, v1 +; GFX6-NEXT: s_add_i32 s2, s13, s2 +; GFX6-NEXT: s_mul_i32 s3, s3, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s12 +; GFX6-NEXT: s_add_i32 s2, s2, s3 +; GFX6-NEXT: v_mul_hi_u32 v2, s5, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_mul_hi_u32 v3, s4, v1 +; GFX6-NEXT: s_mul_i32 s13, s4, s2 +; GFX6-NEXT: s_mul_i32 s12, s5, s12 +; GFX6-NEXT: v_readfirstlane_b32 s17, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v1 +; GFX6-NEXT: v_readfirstlane_b32 s16, v3 +; GFX6-NEXT: s_add_u32 s13, s17, s13 +; GFX6-NEXT: s_addc_u32 s16, 0, s16 +; GFX6-NEXT: v_readfirstlane_b32 s3, v2 +; GFX6-NEXT: s_add_u32 s12, s13, s12 +; GFX6-NEXT: s_addc_u32 s3, s16, s3 +; GFX6-NEXT: v_readfirstlane_b32 s12, v0 +; GFX6-NEXT: s_addc_u32 s12, s12, 0 +; GFX6-NEXT: s_mul_i32 s2, s5, s2 +; GFX6-NEXT: s_add_u32 s2, s3, s2 +; GFX6-NEXT: s_addc_u32 s3, 0, s12 +; GFX6-NEXT: s_add_u32 s12, s4, s2 +; GFX6-NEXT: s_addc_u32 s13, s5, s3 ; GFX6-NEXT: s_ashr_i32 s4, s11, 31 ; GFX6-NEXT: s_add_u32 s2, s10, s4 ; GFX6-NEXT: s_mov_b32 s5, s4 @@ -8501,41 +8501,41 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_i32 s19, s12, s15 ; GFX9-NEXT: s_add_i32 s16, s16, s17 ; GFX9-NEXT: s_mul_hi_u32 s18, s15, s19 -; GFX9-NEXT: s_mul_i32 s20, s15, s16 +; GFX9-NEXT: s_mul_i32 s21, s15, s16 ; GFX9-NEXT: s_mul_hi_u32 s17, s15, s16 -; GFX9-NEXT: s_add_u32 s18, s18, s20 -; GFX9-NEXT: s_addc_u32 s17, 0, s17 +; GFX9-NEXT: s_add_u32 s18, s18, s21 ; GFX9-NEXT: s_mul_hi_u32 s20, s14, s19 +; GFX9-NEXT: s_addc_u32 s17, 0, s17 ; GFX9-NEXT: s_mul_i32 s19, s14, s19 ; GFX9-NEXT: s_add_u32 s18, s18, s19 -; GFX9-NEXT: s_mul_hi_u32 s21, s14, s16 ; GFX9-NEXT: s_addc_u32 s17, s17, s20 -; GFX9-NEXT: s_addc_u32 s18, s21, 0 +; GFX9-NEXT: s_mul_hi_u32 s18, s14, s16 +; GFX9-NEXT: s_addc_u32 s18, s18, 0 ; GFX9-NEXT: s_mul_i32 s16, s14, s16 ; GFX9-NEXT: s_add_u32 s16, s17, s16 ; GFX9-NEXT: s_addc_u32 s17, 0, s18 ; GFX9-NEXT: s_add_u32 s15, s15, s16 ; GFX9-NEXT: s_addc_u32 s14, s14, s17 -; GFX9-NEXT: s_mul_i32 s16, s12, s14 -; GFX9-NEXT: s_mul_hi_u32 s17, s12, s15 -; GFX9-NEXT: s_add_i32 s16, s17, s16 +; GFX9-NEXT: s_mul_i32 s16, s12, s15 +; GFX9-NEXT: s_mul_i32 s19, s12, s14 +; GFX9-NEXT: s_mul_hi_u32 s12, s12, s15 +; GFX9-NEXT: s_add_i32 s12, s12, s19 ; GFX9-NEXT: s_mul_i32 s13, s13, s15 -; GFX9-NEXT: s_add_i32 s16, s16, s13 -; GFX9-NEXT: s_mul_i32 s12, s12, s15 -; GFX9-NEXT: s_mul_hi_u32 s17, s14, s12 -; GFX9-NEXT: s_mul_i32 s18, s14, s12 -; GFX9-NEXT: s_mul_i32 s20, s15, s16 -; GFX9-NEXT: s_mul_hi_u32 s12, s15, s12 -; GFX9-NEXT: s_mul_hi_u32 s19, s15, s16 -; GFX9-NEXT: s_add_u32 s12, s12, s20 -; GFX9-NEXT: s_addc_u32 s19, 0, s19 -; GFX9-NEXT: s_add_u32 s12, s12, s18 -; GFX9-NEXT: s_mul_hi_u32 s13, s14, s16 -; GFX9-NEXT: s_addc_u32 s12, s19, s17 -; GFX9-NEXT: s_addc_u32 s13, s13, 0 -; GFX9-NEXT: s_mul_i32 s16, s14, s16 -; GFX9-NEXT: s_add_u32 s12, s12, s16 +; GFX9-NEXT: s_add_i32 s12, s12, s13 +; GFX9-NEXT: s_mul_hi_u32 s17, s14, s16 +; GFX9-NEXT: s_mul_i32 s18, s14, s16 +; GFX9-NEXT: s_mul_i32 s19, s15, s12 +; GFX9-NEXT: s_mul_hi_u32 s16, s15, s16 +; GFX9-NEXT: s_mul_hi_u32 s13, s15, s12 +; GFX9-NEXT: s_add_u32 s16, s16, s19 ; GFX9-NEXT: s_addc_u32 s13, 0, s13 +; GFX9-NEXT: s_add_u32 s16, s16, s18 +; GFX9-NEXT: s_addc_u32 s13, s13, s17 +; GFX9-NEXT: s_mul_hi_u32 s16, s14, s12 +; GFX9-NEXT: s_addc_u32 s16, s16, 0 +; GFX9-NEXT: s_mul_i32 s12, s14, s12 +; GFX9-NEXT: s_add_u32 s12, s13, s12 +; GFX9-NEXT: s_addc_u32 s13, 0, s16 ; GFX9-NEXT: s_add_u32 s15, s15, s12 ; GFX9-NEXT: s_addc_u32 s14, s14, s13 ; GFX9-NEXT: s_ashr_i32 s12, s9, 31 @@ -8548,12 +8548,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s16, s8, s14 ; GFX9-NEXT: s_add_u32 s17, s18, s17 ; GFX9-NEXT: s_addc_u32 s16, 0, s16 -; GFX9-NEXT: s_mul_hi_u32 s19, s9, s15 +; GFX9-NEXT: s_mul_hi_u32 s18, s9, s15 ; GFX9-NEXT: s_mul_i32 s15, s9, s15 ; GFX9-NEXT: s_add_u32 s15, s17, s15 -; GFX9-NEXT: s_mul_hi_u32 s18, s9, s14 -; GFX9-NEXT: s_addc_u32 s15, s16, s19 -; GFX9-NEXT: s_addc_u32 s16, s18, 0 +; GFX9-NEXT: s_addc_u32 s15, s16, s18 +; GFX9-NEXT: s_mul_hi_u32 s16, s9, s14 +; GFX9-NEXT: s_addc_u32 s16, s16, 0 ; GFX9-NEXT: s_mul_i32 s14, s9, s14 ; GFX9-NEXT: s_add_u32 s17, s15, s14 ; GFX9-NEXT: s_addc_u32 s16, 0, s16 @@ -8629,37 +8629,37 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s9, s8, s14 ; GFX9-NEXT: s_add_u32 s16, s18, s16 ; GFX9-NEXT: s_addc_u32 s9, 0, s9 -; GFX9-NEXT: s_mul_hi_u32 s19, s15, s17 +; GFX9-NEXT: s_mul_hi_u32 s18, s15, s17 ; GFX9-NEXT: s_mul_i32 s17, s15, s17 ; GFX9-NEXT: s_add_u32 s16, s16, s17 -; GFX9-NEXT: s_mul_hi_u32 s18, s15, s14 -; GFX9-NEXT: s_addc_u32 s9, s9, s19 -; GFX9-NEXT: s_addc_u32 s16, s18, 0 +; GFX9-NEXT: s_addc_u32 s9, s9, s18 +; GFX9-NEXT: s_mul_hi_u32 s16, s15, s14 +; GFX9-NEXT: s_addc_u32 s16, s16, 0 ; GFX9-NEXT: s_mul_i32 s14, s15, s14 ; GFX9-NEXT: s_add_u32 s9, s9, s14 ; GFX9-NEXT: s_addc_u32 s14, 0, s16 ; GFX9-NEXT: s_add_u32 s8, s8, s9 ; GFX9-NEXT: s_addc_u32 s9, s15, s14 -; GFX9-NEXT: s_mul_i32 s14, s4, s9 -; GFX9-NEXT: s_mul_hi_u32 s15, s4, s8 -; GFX9-NEXT: s_add_i32 s14, s15, s14 +; GFX9-NEXT: s_mul_i32 s14, s4, s8 +; GFX9-NEXT: s_mul_i32 s17, s4, s9 +; GFX9-NEXT: s_mul_hi_u32 s4, s4, s8 +; GFX9-NEXT: s_add_i32 s4, s4, s17 ; GFX9-NEXT: s_mul_i32 s5, s5, s8 -; GFX9-NEXT: s_add_i32 s14, s14, s5 -; GFX9-NEXT: s_mul_i32 s4, s4, s8 -; GFX9-NEXT: s_mul_hi_u32 s15, s9, s4 -; GFX9-NEXT: s_mul_i32 s16, s9, s4 -; GFX9-NEXT: s_mul_i32 s18, s8, s14 -; GFX9-NEXT: s_mul_hi_u32 s4, s8, s4 -; GFX9-NEXT: s_mul_hi_u32 s17, s8, s14 -; GFX9-NEXT: s_add_u32 s4, s4, s18 -; GFX9-NEXT: s_addc_u32 s17, 0, s17 -; GFX9-NEXT: s_add_u32 s4, s4, s16 -; GFX9-NEXT: s_mul_hi_u32 s5, s9, s14 -; GFX9-NEXT: s_addc_u32 s4, s17, s15 -; GFX9-NEXT: s_addc_u32 s5, s5, 0 -; GFX9-NEXT: s_mul_i32 s14, s9, s14 -; GFX9-NEXT: s_add_u32 s4, s4, s14 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_mul_hi_u32 s15, s9, s14 +; GFX9-NEXT: s_mul_i32 s16, s9, s14 +; GFX9-NEXT: s_mul_i32 s17, s8, s4 +; GFX9-NEXT: s_mul_hi_u32 s14, s8, s14 +; GFX9-NEXT: s_mul_hi_u32 s5, s8, s4 +; GFX9-NEXT: s_add_u32 s14, s14, s17 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_add_u32 s14, s14, s16 +; GFX9-NEXT: s_addc_u32 s5, s5, s15 +; GFX9-NEXT: s_mul_hi_u32 s14, s9, s4 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 +; GFX9-NEXT: s_mul_i32 s4, s9, s4 +; GFX9-NEXT: s_add_u32 s4, s5, s4 +; GFX9-NEXT: s_addc_u32 s5, 0, s14 ; GFX9-NEXT: s_add_u32 s14, s8, s4 ; GFX9-NEXT: s_addc_u32 s15, s9, s5 ; GFX9-NEXT: s_ashr_i32 s4, s11, 31 @@ -8672,12 +8672,12 @@ define amdgpu_kernel void @sdiv_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s10, s8, s15 ; GFX9-NEXT: s_add_u32 s11, s16, s11 ; GFX9-NEXT: s_addc_u32 s10, 0, s10 -; GFX9-NEXT: s_mul_hi_u32 s17, s9, s14 +; GFX9-NEXT: s_mul_hi_u32 s16, s9, s14 ; GFX9-NEXT: s_mul_i32 s14, s9, s14 ; GFX9-NEXT: s_add_u32 s11, s11, s14 -; GFX9-NEXT: s_mul_hi_u32 s16, s9, s15 -; GFX9-NEXT: s_addc_u32 s10, s10, s17 -; GFX9-NEXT: s_addc_u32 s11, s16, 0 +; GFX9-NEXT: s_addc_u32 s10, s10, s16 +; GFX9-NEXT: s_mul_hi_u32 s11, s9, s15 +; GFX9-NEXT: s_addc_u32 s11, s11, 0 ; GFX9-NEXT: s_mul_i32 s14, s9, s15 ; GFX9-NEXT: s_add_u32 s14, s10, s14 ; GFX9-NEXT: s_addc_u32 s15, 0, s11 @@ -8908,6 +8908,7 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_mov_b32 s3, 0xf000 ; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GFX6-NEXT: v_rcp_f32_e32 v0, v0 +; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GFX6-NEXT: v_trunc_f32_e32 v1, v1 @@ -8916,60 +8917,60 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_hi_u32 v2, s0, v0 ; GFX6-NEXT: v_readfirstlane_b32 s10, v1 -; GFX6-NEXT: v_readfirstlane_b32 s2, v0 -; GFX6-NEXT: s_mul_i32 s11, s0, s10 -; GFX6-NEXT: v_readfirstlane_b32 s14, v2 -; GFX6-NEXT: s_mul_i32 s12, s1, s2 -; GFX6-NEXT: s_mul_i32 s13, s0, s2 -; GFX6-NEXT: s_add_i32 s11, s14, s11 -; GFX6-NEXT: v_mul_hi_u32 v3, v0, s13 -; GFX6-NEXT: s_add_i32 s11, s11, s12 -; GFX6-NEXT: v_mul_hi_u32 v0, v0, s11 -; GFX6-NEXT: v_mul_hi_u32 v4, v1, s13 -; GFX6-NEXT: v_readfirstlane_b32 s12, v3 -; GFX6-NEXT: s_mul_i32 s14, s2, s11 -; GFX6-NEXT: v_mul_hi_u32 v1, v1, s11 -; GFX6-NEXT: s_add_u32 s12, s12, s14 +; GFX6-NEXT: v_readfirstlane_b32 s11, v0 +; GFX6-NEXT: s_mul_i32 s12, s0, s10 +; GFX6-NEXT: v_readfirstlane_b32 s15, v2 +; GFX6-NEXT: s_mul_i32 s13, s1, s11 +; GFX6-NEXT: s_mul_i32 s14, s0, s11 +; GFX6-NEXT: s_add_i32 s12, s15, s12 +; GFX6-NEXT: v_mul_hi_u32 v3, v0, s14 +; GFX6-NEXT: s_add_i32 s12, s12, s13 +; GFX6-NEXT: v_mul_hi_u32 v0, v0, s12 +; GFX6-NEXT: v_mul_hi_u32 v4, v1, s14 +; GFX6-NEXT: v_readfirstlane_b32 s13, v3 +; GFX6-NEXT: s_mul_i32 s15, s11, s12 +; GFX6-NEXT: s_add_u32 s13, s13, s15 +; GFX6-NEXT: v_readfirstlane_b32 s15, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, v1, s12 +; GFX6-NEXT: s_mul_i32 s14, s10, s14 +; GFX6-NEXT: s_addc_u32 s15, 0, s15 +; GFX6-NEXT: v_readfirstlane_b32 s16, v4 +; GFX6-NEXT: s_add_u32 s13, s13, s14 +; GFX6-NEXT: s_addc_u32 s13, s15, s16 ; GFX6-NEXT: v_readfirstlane_b32 s14, v0 -; GFX6-NEXT: s_mul_i32 s13, s10, s13 -; GFX6-NEXT: s_addc_u32 s14, 0, s14 -; GFX6-NEXT: v_readfirstlane_b32 s15, v4 -; GFX6-NEXT: s_add_u32 s12, s12, s13 -; GFX6-NEXT: s_addc_u32 s12, s14, s15 +; GFX6-NEXT: s_addc_u32 s14, s14, 0 +; GFX6-NEXT: s_mul_i32 s12, s10, s12 +; GFX6-NEXT: s_add_u32 s12, s13, s12 +; GFX6-NEXT: s_addc_u32 s13, 0, s14 +; GFX6-NEXT: s_add_u32 s11, s11, s12 +; GFX6-NEXT: v_mov_b32_e32 v1, s11 +; GFX6-NEXT: v_mul_hi_u32 v1, s0, v1 +; GFX6-NEXT: s_addc_u32 s10, s10, s13 +; GFX6-NEXT: s_mul_i32 s12, s0, s11 +; GFX6-NEXT: s_mul_i32 s0, s0, s10 ; GFX6-NEXT: v_readfirstlane_b32 s13, v1 -; GFX6-NEXT: s_addc_u32 s13, s13, 0 -; GFX6-NEXT: s_mul_i32 s11, s10, s11 -; GFX6-NEXT: s_add_u32 s11, s12, s11 -; GFX6-NEXT: s_addc_u32 s12, 0, s13 -; GFX6-NEXT: s_add_u32 s11, s2, s11 -; GFX6-NEXT: v_mov_b32_e32 v0, s11 -; GFX6-NEXT: v_mul_hi_u32 v0, s0, v0 -; GFX6-NEXT: s_addc_u32 s10, s10, s12 -; GFX6-NEXT: s_mul_i32 s12, s0, s10 +; GFX6-NEXT: s_add_i32 s0, s13, s0 ; GFX6-NEXT: s_mul_i32 s1, s1, s11 -; GFX6-NEXT: v_readfirstlane_b32 s13, v0 -; GFX6-NEXT: s_add_i32 s12, s13, s12 -; GFX6-NEXT: s_mul_i32 s0, s0, s11 -; GFX6-NEXT: s_add_i32 s1, s12, s1 -; GFX6-NEXT: v_mov_b32_e32 v2, s0 -; GFX6-NEXT: v_mov_b32_e32 v0, s1 -; GFX6-NEXT: v_mul_hi_u32 v3, s10, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s11, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s10, v0 +; GFX6-NEXT: v_mov_b32_e32 v0, s12 +; GFX6-NEXT: s_add_i32 s0, s0, s1 +; GFX6-NEXT: v_mul_hi_u32 v2, s10, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s0 ; GFX6-NEXT: v_mul_hi_u32 v0, s11, v0 -; GFX6-NEXT: s_mul_i32 s13, s11, s1 -; GFX6-NEXT: v_readfirstlane_b32 s15, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, s11, v1 +; GFX6-NEXT: s_mul_i32 s13, s11, s0 +; GFX6-NEXT: s_mul_i32 s12, s10, s12 +; GFX6-NEXT: v_readfirstlane_b32 s15, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s10, v1 +; GFX6-NEXT: v_readfirstlane_b32 s14, v3 ; GFX6-NEXT: s_add_u32 s13, s15, s13 -; GFX6-NEXT: v_readfirstlane_b32 s14, v0 -; GFX6-NEXT: s_mul_i32 s0, s10, s0 ; GFX6-NEXT: s_addc_u32 s14, 0, s14 -; GFX6-NEXT: v_readfirstlane_b32 s12, v3 -; GFX6-NEXT: s_add_u32 s0, s13, s0 -; GFX6-NEXT: s_addc_u32 s0, s14, s12 -; GFX6-NEXT: v_readfirstlane_b32 s12, v1 +; GFX6-NEXT: v_readfirstlane_b32 s1, v2 +; GFX6-NEXT: s_add_u32 s12, s13, s12 +; GFX6-NEXT: s_addc_u32 s1, s14, s1 +; GFX6-NEXT: v_readfirstlane_b32 s12, v0 ; GFX6-NEXT: s_addc_u32 s12, s12, 0 -; GFX6-NEXT: s_mul_i32 s1, s10, s1 -; GFX6-NEXT: s_add_u32 s0, s0, s1 +; GFX6-NEXT: s_mul_i32 s0, s10, s0 +; GFX6-NEXT: s_add_u32 s0, s1, s0 ; GFX6-NEXT: s_addc_u32 s1, 0, s12 ; GFX6-NEXT: s_add_u32 s12, s11, s0 ; GFX6-NEXT: s_addc_u32 s13, s10, s1 @@ -9043,7 +9044,6 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX6-NEXT: s_sub_u32 s4, s4, s10 ; GFX6-NEXT: s_subb_u32 s5, s5, s10 ; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: s_mov_b32 s2, -1 ; GFX6-NEXT: v_mov_b32_e32 v1, s5 ; GFX6-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX6-NEXT: s_endpgm @@ -9084,37 +9084,37 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s11, s9, s10 ; GFX9-NEXT: s_add_u32 s12, s12, s14 ; GFX9-NEXT: s_addc_u32 s11, 0, s11 -; GFX9-NEXT: s_mul_hi_u32 s15, s8, s13 +; GFX9-NEXT: s_mul_hi_u32 s14, s8, s13 ; GFX9-NEXT: s_mul_i32 s13, s8, s13 ; GFX9-NEXT: s_add_u32 s12, s12, s13 -; GFX9-NEXT: s_mul_hi_u32 s14, s8, s10 -; GFX9-NEXT: s_addc_u32 s11, s11, s15 -; GFX9-NEXT: s_addc_u32 s12, s14, 0 +; GFX9-NEXT: s_addc_u32 s11, s11, s14 +; GFX9-NEXT: s_mul_hi_u32 s12, s8, s10 +; GFX9-NEXT: s_addc_u32 s12, s12, 0 ; GFX9-NEXT: s_mul_i32 s10, s8, s10 ; GFX9-NEXT: s_add_u32 s10, s11, s10 ; GFX9-NEXT: s_addc_u32 s11, 0, s12 ; GFX9-NEXT: s_add_u32 s9, s9, s10 ; GFX9-NEXT: s_addc_u32 s8, s8, s11 -; GFX9-NEXT: s_mul_i32 s10, s4, s8 -; GFX9-NEXT: s_mul_hi_u32 s11, s4, s9 -; GFX9-NEXT: s_add_i32 s10, s11, s10 +; GFX9-NEXT: s_mul_i32 s10, s4, s9 +; GFX9-NEXT: s_mul_i32 s13, s4, s8 +; GFX9-NEXT: s_mul_hi_u32 s4, s4, s9 +; GFX9-NEXT: s_add_i32 s4, s4, s13 ; GFX9-NEXT: s_mul_i32 s5, s5, s9 -; GFX9-NEXT: s_add_i32 s10, s10, s5 -; GFX9-NEXT: s_mul_i32 s4, s4, s9 -; GFX9-NEXT: s_mul_hi_u32 s11, s8, s4 -; GFX9-NEXT: s_mul_i32 s12, s8, s4 -; GFX9-NEXT: s_mul_i32 s14, s9, s10 -; GFX9-NEXT: s_mul_hi_u32 s4, s9, s4 -; GFX9-NEXT: s_mul_hi_u32 s13, s9, s10 -; GFX9-NEXT: s_add_u32 s4, s4, s14 -; GFX9-NEXT: s_addc_u32 s13, 0, s13 -; GFX9-NEXT: s_add_u32 s4, s4, s12 -; GFX9-NEXT: s_mul_hi_u32 s5, s8, s10 -; GFX9-NEXT: s_addc_u32 s4, s13, s11 -; GFX9-NEXT: s_addc_u32 s5, s5, 0 -; GFX9-NEXT: s_mul_i32 s10, s8, s10 -; GFX9-NEXT: s_add_u32 s4, s4, s10 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_mul_hi_u32 s11, s8, s10 +; GFX9-NEXT: s_mul_i32 s12, s8, s10 +; GFX9-NEXT: s_mul_i32 s13, s9, s4 +; GFX9-NEXT: s_mul_hi_u32 s10, s9, s10 +; GFX9-NEXT: s_mul_hi_u32 s5, s9, s4 +; GFX9-NEXT: s_add_u32 s10, s10, s13 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_add_u32 s10, s10, s12 +; GFX9-NEXT: s_addc_u32 s5, s5, s11 +; GFX9-NEXT: s_mul_hi_u32 s10, s8, s4 +; GFX9-NEXT: s_addc_u32 s10, s10, 0 +; GFX9-NEXT: s_mul_i32 s4, s8, s4 +; GFX9-NEXT: s_add_u32 s4, s5, s4 +; GFX9-NEXT: s_addc_u32 s5, 0, s10 ; GFX9-NEXT: s_add_u32 s9, s9, s4 ; GFX9-NEXT: s_addc_u32 s8, s8, s5 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) @@ -9128,12 +9128,12 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x ; GFX9-NEXT: s_mul_hi_u32 s10, s2, s8 ; GFX9-NEXT: s_add_u32 s11, s12, s11 ; GFX9-NEXT: s_addc_u32 s10, 0, s10 -; GFX9-NEXT: s_mul_hi_u32 s13, s3, s9 +; GFX9-NEXT: s_mul_hi_u32 s12, s3, s9 ; GFX9-NEXT: s_mul_i32 s9, s3, s9 ; GFX9-NEXT: s_add_u32 s9, s11, s9 -; GFX9-NEXT: s_mul_hi_u32 s12, s3, s8 -; GFX9-NEXT: s_addc_u32 s9, s10, s13 -; GFX9-NEXT: s_addc_u32 s10, s12, 0 +; GFX9-NEXT: s_addc_u32 s9, s10, s12 +; GFX9-NEXT: s_mul_hi_u32 s10, s3, s8 +; GFX9-NEXT: s_addc_u32 s10, s10, 0 ; GFX9-NEXT: s_mul_i32 s8, s3, s8 ; GFX9-NEXT: s_add_u32 s8, s9, s8 ; GFX9-NEXT: s_addc_u32 s9, 0, s10 @@ -9323,34 +9323,34 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: s_add_u32 s14, s15, s14 ; GFX6-NEXT: s_addc_u32 s15, 0, s16 ; GFX6-NEXT: s_add_u32 s13, s13, s14 -; GFX6-NEXT: v_mov_b32_e32 v0, s13 -; GFX6-NEXT: v_mul_hi_u32 v0, s6, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s13 +; GFX6-NEXT: v_mul_hi_u32 v1, s6, v1 ; GFX6-NEXT: s_addc_u32 s12, s12, s15 -; GFX6-NEXT: s_mul_i32 s14, s6, s12 +; GFX6-NEXT: s_mul_i32 s14, s6, s13 +; GFX6-NEXT: s_mul_i32 s6, s6, s12 +; GFX6-NEXT: v_readfirstlane_b32 s15, v1 +; GFX6-NEXT: s_add_i32 s6, s15, s6 ; GFX6-NEXT: s_mul_i32 s7, s7, s13 -; GFX6-NEXT: v_readfirstlane_b32 s15, v0 -; GFX6-NEXT: s_add_i32 s14, s15, s14 -; GFX6-NEXT: s_mul_i32 s6, s6, s13 -; GFX6-NEXT: s_add_i32 s7, s14, s7 -; GFX6-NEXT: v_mov_b32_e32 v2, s6 -; GFX6-NEXT: v_mov_b32_e32 v0, s7 -; GFX6-NEXT: v_mul_hi_u32 v3, s12, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s13, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s12, v0 +; GFX6-NEXT: v_mov_b32_e32 v0, s14 +; GFX6-NEXT: s_add_i32 s6, s6, s7 +; GFX6-NEXT: v_mul_hi_u32 v2, s12, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s6 ; GFX6-NEXT: v_mul_hi_u32 v0, s13, v0 -; GFX6-NEXT: s_mul_i32 s15, s13, s7 -; GFX6-NEXT: v_readfirstlane_b32 s17, v2 +; GFX6-NEXT: v_mul_hi_u32 v3, s13, v1 +; GFX6-NEXT: s_mul_i32 s15, s13, s6 +; GFX6-NEXT: s_mul_i32 s14, s12, s14 +; GFX6-NEXT: v_readfirstlane_b32 s17, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s12, v1 +; GFX6-NEXT: v_readfirstlane_b32 s16, v3 ; GFX6-NEXT: s_add_u32 s15, s17, s15 -; GFX6-NEXT: v_readfirstlane_b32 s16, v0 -; GFX6-NEXT: s_mul_i32 s6, s12, s6 ; GFX6-NEXT: s_addc_u32 s16, 0, s16 -; GFX6-NEXT: v_readfirstlane_b32 s14, v3 -; GFX6-NEXT: s_add_u32 s6, s15, s6 -; GFX6-NEXT: s_addc_u32 s6, s16, s14 -; GFX6-NEXT: v_readfirstlane_b32 s14, v1 +; GFX6-NEXT: v_readfirstlane_b32 s7, v2 +; GFX6-NEXT: s_add_u32 s14, s15, s14 +; GFX6-NEXT: s_addc_u32 s7, s16, s7 +; GFX6-NEXT: v_readfirstlane_b32 s14, v0 ; GFX6-NEXT: s_addc_u32 s14, s14, 0 -; GFX6-NEXT: s_mul_i32 s7, s12, s7 -; GFX6-NEXT: s_add_u32 s6, s6, s7 +; GFX6-NEXT: s_mul_i32 s6, s12, s6 +; GFX6-NEXT: s_add_u32 s6, s7, s6 ; GFX6-NEXT: s_addc_u32 s7, 0, s14 ; GFX6-NEXT: s_add_u32 s13, s13, s6 ; GFX6-NEXT: s_addc_u32 s12, s12, s7 @@ -9439,65 +9439,65 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 ; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GFX6-NEXT: v_mul_hi_u32 v2, s2, v0 -; GFX6-NEXT: v_readfirstlane_b32 s8, v1 -; GFX6-NEXT: v_readfirstlane_b32 s0, v0 -; GFX6-NEXT: s_mul_i32 s9, s2, s8 +; GFX6-NEXT: v_readfirstlane_b32 s9, v1 +; GFX6-NEXT: v_readfirstlane_b32 s8, v0 +; GFX6-NEXT: s_mul_i32 s1, s2, s9 ; GFX6-NEXT: v_readfirstlane_b32 s12, v2 -; GFX6-NEXT: s_mul_i32 s1, s3, s0 -; GFX6-NEXT: s_add_i32 s9, s12, s9 -; GFX6-NEXT: s_add_i32 s9, s9, s1 -; GFX6-NEXT: s_mul_i32 s1, s2, s0 -; GFX6-NEXT: v_mul_hi_u32 v2, v0, s9 -; GFX6-NEXT: v_mul_hi_u32 v0, v0, s1 -; GFX6-NEXT: s_mul_i32 s12, s0, s9 -; GFX6-NEXT: v_readfirstlane_b32 s13, v2 +; GFX6-NEXT: s_mul_i32 s0, s3, s8 +; GFX6-NEXT: s_add_i32 s1, s12, s1 +; GFX6-NEXT: s_add_i32 s12, s1, s0 +; GFX6-NEXT: s_mul_i32 s13, s2, s8 +; GFX6-NEXT: v_mul_hi_u32 v2, v0, s12 +; GFX6-NEXT: v_mul_hi_u32 v0, v0, s13 +; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; GFX6-NEXT: s_mul_i32 s4, s8, s12 +; GFX6-NEXT: v_readfirstlane_b32 s5, v2 ; GFX6-NEXT: v_readfirstlane_b32 s16, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, v1, s1 -; GFX6-NEXT: v_mul_hi_u32 v1, v1, s9 -; GFX6-NEXT: s_add_u32 s12, s16, s12 -; GFX6-NEXT: s_addc_u32 s13, 0, s13 -; GFX6-NEXT: s_mul_i32 s1, s8, s1 +; GFX6-NEXT: v_mul_hi_u32 v0, v1, s13 +; GFX6-NEXT: v_mul_hi_u32 v1, v1, s12 +; GFX6-NEXT: s_add_u32 s4, s16, s4 +; GFX6-NEXT: s_addc_u32 s5, 0, s5 +; GFX6-NEXT: s_mul_i32 s13, s9, s13 ; GFX6-NEXT: v_readfirstlane_b32 s16, v0 -; GFX6-NEXT: s_add_u32 s1, s12, s1 -; GFX6-NEXT: s_addc_u32 s1, s13, s16 -; GFX6-NEXT: v_readfirstlane_b32 s12, v1 -; GFX6-NEXT: s_addc_u32 s12, s12, 0 -; GFX6-NEXT: s_mul_i32 s9, s8, s9 -; GFX6-NEXT: s_add_u32 s1, s1, s9 -; GFX6-NEXT: s_addc_u32 s9, 0, s12 -; GFX6-NEXT: s_add_u32 s12, s0, s1 -; GFX6-NEXT: v_mov_b32_e32 v0, s12 -; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 -; GFX6-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 -; GFX6-NEXT: s_addc_u32 s4, s8, s9 -; GFX6-NEXT: s_mul_i32 s5, s2, s4 -; GFX6-NEXT: v_readfirstlane_b32 s8, v0 -; GFX6-NEXT: s_add_i32 s5, s8, s5 -; GFX6-NEXT: s_mul_i32 s3, s3, s12 -; GFX6-NEXT: s_mul_i32 s2, s2, s12 -; GFX6-NEXT: s_add_i32 s3, s5, s3 -; GFX6-NEXT: v_mov_b32_e32 v2, s2 -; GFX6-NEXT: v_mov_b32_e32 v0, s3 -; GFX6-NEXT: v_mul_hi_u32 v3, s4, v2 -; GFX6-NEXT: v_mul_hi_u32 v2, s12, v2 -; GFX6-NEXT: v_mul_hi_u32 v1, s4, v0 -; GFX6-NEXT: v_mul_hi_u32 v0, s12, v0 -; GFX6-NEXT: s_mul_i32 s8, s12, s3 -; GFX6-NEXT: v_readfirstlane_b32 s13, v2 -; GFX6-NEXT: s_add_u32 s8, s13, s8 -; GFX6-NEXT: v_readfirstlane_b32 s9, v0 -; GFX6-NEXT: s_mul_i32 s2, s4, s2 -; GFX6-NEXT: s_addc_u32 s9, 0, s9 -; GFX6-NEXT: v_readfirstlane_b32 s5, v3 -; GFX6-NEXT: s_add_u32 s2, s8, s2 -; GFX6-NEXT: s_addc_u32 s2, s9, s5 +; GFX6-NEXT: s_add_u32 s4, s4, s13 +; GFX6-NEXT: s_addc_u32 s4, s5, s16 ; GFX6-NEXT: v_readfirstlane_b32 s5, v1 ; GFX6-NEXT: s_addc_u32 s5, s5, 0 -; GFX6-NEXT: s_mul_i32 s3, s4, s3 -; GFX6-NEXT: s_add_u32 s2, s2, s3 -; GFX6-NEXT: s_addc_u32 s3, 0, s5 -; GFX6-NEXT: s_add_u32 s12, s12, s2 -; GFX6-NEXT: s_addc_u32 s13, s4, s3 +; GFX6-NEXT: s_mul_i32 s12, s9, s12 +; GFX6-NEXT: s_add_u32 s4, s4, s12 +; GFX6-NEXT: s_addc_u32 s5, 0, s5 +; GFX6-NEXT: s_add_u32 s4, s8, s4 +; GFX6-NEXT: v_mov_b32_e32 v1, s4 +; GFX6-NEXT: v_mul_hi_u32 v1, s2, v1 +; GFX6-NEXT: s_addc_u32 s5, s9, s5 +; GFX6-NEXT: s_mul_i32 s8, s2, s4 +; GFX6-NEXT: s_mul_i32 s2, s2, s5 +; GFX6-NEXT: v_readfirstlane_b32 s9, v1 +; GFX6-NEXT: s_add_i32 s2, s9, s2 +; GFX6-NEXT: s_mul_i32 s3, s3, s4 +; GFX6-NEXT: v_mov_b32_e32 v0, s8 +; GFX6-NEXT: s_add_i32 s2, s2, s3 +; GFX6-NEXT: v_mul_hi_u32 v2, s5, v0 +; GFX6-NEXT: v_mov_b32_e32 v1, s2 +; GFX6-NEXT: v_mul_hi_u32 v0, s4, v0 +; GFX6-NEXT: v_mul_hi_u32 v3, s4, v1 +; GFX6-NEXT: s_mul_i32 s9, s4, s2 +; GFX6-NEXT: s_mul_i32 s8, s5, s8 +; GFX6-NEXT: v_readfirstlane_b32 s13, v0 +; GFX6-NEXT: v_mul_hi_u32 v0, s5, v1 +; GFX6-NEXT: v_readfirstlane_b32 s12, v3 +; GFX6-NEXT: s_add_u32 s9, s13, s9 +; GFX6-NEXT: s_addc_u32 s12, 0, s12 +; GFX6-NEXT: v_readfirstlane_b32 s3, v2 +; GFX6-NEXT: s_add_u32 s8, s9, s8 +; GFX6-NEXT: s_addc_u32 s3, s12, s3 +; GFX6-NEXT: v_readfirstlane_b32 s8, v0 +; GFX6-NEXT: s_addc_u32 s8, s8, 0 +; GFX6-NEXT: s_mul_i32 s2, s5, s2 +; GFX6-NEXT: s_add_u32 s2, s3, s2 +; GFX6-NEXT: s_addc_u32 s3, 0, s8 +; GFX6-NEXT: s_add_u32 s12, s4, s2 +; GFX6-NEXT: s_addc_u32 s13, s5, s3 ; GFX6-NEXT: s_ashr_i32 s4, s11, 31 ; GFX6-NEXT: s_add_u32 s2, s10, s4 ; GFX6-NEXT: s_mov_b32 s5, s4 @@ -9606,41 +9606,41 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_i32 s17, s6, s13 ; GFX9-NEXT: s_add_i32 s14, s14, s15 ; GFX9-NEXT: s_mul_hi_u32 s16, s13, s17 -; GFX9-NEXT: s_mul_i32 s18, s13, s14 +; GFX9-NEXT: s_mul_i32 s19, s13, s14 ; GFX9-NEXT: s_mul_hi_u32 s15, s13, s14 -; GFX9-NEXT: s_add_u32 s16, s16, s18 -; GFX9-NEXT: s_addc_u32 s15, 0, s15 +; GFX9-NEXT: s_add_u32 s16, s16, s19 ; GFX9-NEXT: s_mul_hi_u32 s18, s12, s17 +; GFX9-NEXT: s_addc_u32 s15, 0, s15 ; GFX9-NEXT: s_mul_i32 s17, s12, s17 ; GFX9-NEXT: s_add_u32 s16, s16, s17 -; GFX9-NEXT: s_mul_hi_u32 s19, s12, s14 ; GFX9-NEXT: s_addc_u32 s15, s15, s18 -; GFX9-NEXT: s_addc_u32 s16, s19, 0 +; GFX9-NEXT: s_mul_hi_u32 s16, s12, s14 +; GFX9-NEXT: s_addc_u32 s16, s16, 0 ; GFX9-NEXT: s_mul_i32 s14, s12, s14 ; GFX9-NEXT: s_add_u32 s14, s15, s14 ; GFX9-NEXT: s_addc_u32 s15, 0, s16 ; GFX9-NEXT: s_add_u32 s13, s13, s14 ; GFX9-NEXT: s_addc_u32 s12, s12, s15 -; GFX9-NEXT: s_mul_i32 s14, s6, s12 -; GFX9-NEXT: s_mul_hi_u32 s15, s6, s13 -; GFX9-NEXT: s_add_i32 s14, s15, s14 +; GFX9-NEXT: s_mul_i32 s14, s6, s13 +; GFX9-NEXT: s_mul_i32 s17, s6, s12 +; GFX9-NEXT: s_mul_hi_u32 s6, s6, s13 +; GFX9-NEXT: s_add_i32 s6, s6, s17 ; GFX9-NEXT: s_mul_i32 s7, s7, s13 -; GFX9-NEXT: s_add_i32 s14, s14, s7 -; GFX9-NEXT: s_mul_i32 s6, s6, s13 -; GFX9-NEXT: s_mul_hi_u32 s15, s12, s6 -; GFX9-NEXT: s_mul_i32 s16, s12, s6 -; GFX9-NEXT: s_mul_i32 s18, s13, s14 -; GFX9-NEXT: s_mul_hi_u32 s6, s13, s6 -; GFX9-NEXT: s_mul_hi_u32 s17, s13, s14 -; GFX9-NEXT: s_add_u32 s6, s6, s18 -; GFX9-NEXT: s_addc_u32 s17, 0, s17 -; GFX9-NEXT: s_add_u32 s6, s6, s16 -; GFX9-NEXT: s_mul_hi_u32 s7, s12, s14 -; GFX9-NEXT: s_addc_u32 s6, s17, s15 -; GFX9-NEXT: s_addc_u32 s7, s7, 0 -; GFX9-NEXT: s_mul_i32 s14, s12, s14 -; GFX9-NEXT: s_add_u32 s6, s6, s14 +; GFX9-NEXT: s_add_i32 s6, s6, s7 +; GFX9-NEXT: s_mul_hi_u32 s15, s12, s14 +; GFX9-NEXT: s_mul_i32 s16, s12, s14 +; GFX9-NEXT: s_mul_i32 s17, s13, s6 +; GFX9-NEXT: s_mul_hi_u32 s14, s13, s14 +; GFX9-NEXT: s_mul_hi_u32 s7, s13, s6 +; GFX9-NEXT: s_add_u32 s14, s14, s17 ; GFX9-NEXT: s_addc_u32 s7, 0, s7 +; GFX9-NEXT: s_add_u32 s14, s14, s16 +; GFX9-NEXT: s_addc_u32 s7, s7, s15 +; GFX9-NEXT: s_mul_hi_u32 s14, s12, s6 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 +; GFX9-NEXT: s_mul_i32 s6, s12, s6 +; GFX9-NEXT: s_add_u32 s6, s7, s6 +; GFX9-NEXT: s_addc_u32 s7, 0, s14 ; GFX9-NEXT: s_add_u32 s13, s13, s6 ; GFX9-NEXT: s_addc_u32 s12, s12, s7 ; GFX9-NEXT: s_ashr_i32 s6, s9, 31 @@ -9653,12 +9653,12 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s14, s8, s12 ; GFX9-NEXT: s_add_u32 s15, s16, s15 ; GFX9-NEXT: s_addc_u32 s14, 0, s14 -; GFX9-NEXT: s_mul_hi_u32 s17, s9, s13 +; GFX9-NEXT: s_mul_hi_u32 s16, s9, s13 ; GFX9-NEXT: s_mul_i32 s13, s9, s13 ; GFX9-NEXT: s_add_u32 s13, s15, s13 -; GFX9-NEXT: s_mul_hi_u32 s16, s9, s12 -; GFX9-NEXT: s_addc_u32 s13, s14, s17 -; GFX9-NEXT: s_addc_u32 s14, s16, 0 +; GFX9-NEXT: s_addc_u32 s13, s14, s16 +; GFX9-NEXT: s_mul_hi_u32 s14, s9, s12 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 ; GFX9-NEXT: s_mul_i32 s12, s9, s12 ; GFX9-NEXT: s_add_u32 s12, s13, s12 ; GFX9-NEXT: s_addc_u32 s13, 0, s14 @@ -9734,37 +9734,37 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s7, s6, s8 ; GFX9-NEXT: s_add_u32 s14, s16, s14 ; GFX9-NEXT: s_addc_u32 s7, 0, s7 -; GFX9-NEXT: s_mul_hi_u32 s17, s9, s15 +; GFX9-NEXT: s_mul_hi_u32 s16, s9, s15 ; GFX9-NEXT: s_mul_i32 s15, s9, s15 ; GFX9-NEXT: s_add_u32 s14, s14, s15 -; GFX9-NEXT: s_mul_hi_u32 s16, s9, s8 -; GFX9-NEXT: s_addc_u32 s7, s7, s17 -; GFX9-NEXT: s_addc_u32 s14, s16, 0 +; GFX9-NEXT: s_addc_u32 s7, s7, s16 +; GFX9-NEXT: s_mul_hi_u32 s14, s9, s8 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 ; GFX9-NEXT: s_mul_i32 s8, s9, s8 ; GFX9-NEXT: s_add_u32 s7, s7, s8 ; GFX9-NEXT: s_addc_u32 s8, 0, s14 ; GFX9-NEXT: s_add_u32 s6, s6, s7 ; GFX9-NEXT: s_addc_u32 s7, s9, s8 -; GFX9-NEXT: s_mul_i32 s8, s4, s7 -; GFX9-NEXT: s_mul_hi_u32 s9, s4, s6 -; GFX9-NEXT: s_add_i32 s8, s9, s8 +; GFX9-NEXT: s_mul_i32 s8, s4, s6 +; GFX9-NEXT: s_mul_i32 s15, s4, s7 +; GFX9-NEXT: s_mul_hi_u32 s4, s4, s6 +; GFX9-NEXT: s_add_i32 s4, s4, s15 ; GFX9-NEXT: s_mul_i32 s5, s5, s6 -; GFX9-NEXT: s_add_i32 s8, s8, s5 -; GFX9-NEXT: s_mul_i32 s4, s4, s6 -; GFX9-NEXT: s_mul_hi_u32 s9, s7, s4 -; GFX9-NEXT: s_mul_i32 s14, s7, s4 -; GFX9-NEXT: s_mul_i32 s16, s6, s8 -; GFX9-NEXT: s_mul_hi_u32 s4, s6, s4 -; GFX9-NEXT: s_mul_hi_u32 s15, s6, s8 -; GFX9-NEXT: s_add_u32 s4, s4, s16 -; GFX9-NEXT: s_addc_u32 s15, 0, s15 -; GFX9-NEXT: s_add_u32 s4, s4, s14 -; GFX9-NEXT: s_mul_hi_u32 s5, s7, s8 -; GFX9-NEXT: s_addc_u32 s4, s15, s9 -; GFX9-NEXT: s_addc_u32 s5, s5, 0 -; GFX9-NEXT: s_mul_i32 s8, s7, s8 -; GFX9-NEXT: s_add_u32 s4, s4, s8 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_mul_hi_u32 s9, s7, s8 +; GFX9-NEXT: s_mul_i32 s14, s7, s8 +; GFX9-NEXT: s_mul_i32 s15, s6, s4 +; GFX9-NEXT: s_mul_hi_u32 s8, s6, s8 +; GFX9-NEXT: s_mul_hi_u32 s5, s6, s4 +; GFX9-NEXT: s_add_u32 s8, s8, s15 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 +; GFX9-NEXT: s_add_u32 s8, s8, s14 +; GFX9-NEXT: s_addc_u32 s5, s5, s9 +; GFX9-NEXT: s_mul_hi_u32 s8, s7, s4 +; GFX9-NEXT: s_addc_u32 s8, s8, 0 +; GFX9-NEXT: s_mul_i32 s4, s7, s4 +; GFX9-NEXT: s_add_u32 s4, s5, s4 +; GFX9-NEXT: s_addc_u32 s5, 0, s8 ; GFX9-NEXT: s_add_u32 s8, s6, s4 ; GFX9-NEXT: s_addc_u32 s9, s7, s5 ; GFX9-NEXT: s_ashr_i32 s4, s11, 31 @@ -9777,12 +9777,12 @@ define amdgpu_kernel void @srem_v2i64_pow2_shl_denom(ptr addrspace(1) %out, <2 x ; GFX9-NEXT: s_mul_hi_u32 s10, s6, s9 ; GFX9-NEXT: s_add_u32 s11, s14, s11 ; GFX9-NEXT: s_addc_u32 s10, 0, s10 -; GFX9-NEXT: s_mul_hi_u32 s15, s7, s8 +; GFX9-NEXT: s_mul_hi_u32 s14, s7, s8 ; GFX9-NEXT: s_mul_i32 s8, s7, s8 ; GFX9-NEXT: s_add_u32 s8, s11, s8 -; GFX9-NEXT: s_mul_hi_u32 s14, s7, s9 -; GFX9-NEXT: s_addc_u32 s8, s10, s15 -; GFX9-NEXT: s_addc_u32 s10, s14, 0 +; GFX9-NEXT: s_addc_u32 s8, s10, s14 +; GFX9-NEXT: s_mul_hi_u32 s10, s7, s9 +; GFX9-NEXT: s_addc_u32 s10, s10, 0 ; GFX9-NEXT: s_mul_i32 s9, s7, s9 ; GFX9-NEXT: s_add_u32 s8, s8, s9 ; GFX9-NEXT: s_addc_u32 s9, 0, s10 diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll index 150d8cfe22cfd..d3cd1dad991f2 100644 --- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll +++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll @@ -15,105 +15,105 @@ define i64 @sdiv64(i64 %a, i64 %b) { ; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] ; GFX9-NEXT: s_cbranch_execz .LBB0_2 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc -; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 -; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 -; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 -; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, 0, v11 -; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, 0, v10, vcc +; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc +; GFX9-NEXT: v_xor_b32_e32 v8, v3, v7 +; GFX9-NEXT: v_xor_b32_e32 v9, v2, v7 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v9 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v8 +; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, 0, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, 0, v8, vcc ; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GFX9-NEXT: v_rcp_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GFX9-NEXT: v_trunc_f32_e32 v3, v3 ; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 -; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v2 -; GFX9-NEXT: v_mul_lo_u32 v5, v12, v7 -; GFX9-NEXT: v_mul_lo_u32 v4, v13, v8 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v8, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v2 +; GFX9-NEXT: v_mul_lo_u32 v5, v12, v10 +; GFX9-NEXT: v_mul_lo_u32 v4, v13, v11 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v11, 0 ; GFX9-NEXT: v_add3_u32 v14, v3, v5, v4 -; GFX9-NEXT: v_mul_hi_u32 v15, v8, v2 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v14, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v2, 0 +; GFX9-NEXT: v_mul_hi_u32 v15, v11, v2 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v14, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v2, 0 ; GFX9-NEXT: v_add_co_u32_e32 v15, vcc, v15, v3 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v14, 0 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v14, 0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v15, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v14, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v7, v3, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v12, v15 -; GFX9-NEXT: v_mul_lo_u32 v5, v13, v14 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v14, 0 -; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v14, v5, 0 -; GFX9-NEXT: v_mul_hi_u32 v12, v14, v2 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v15, v2, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v5, 0 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v8, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v10, v3, vcc +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v11, 0 +; GFX9-NEXT: v_mul_lo_u32 v4, v12, v10 +; GFX9-NEXT: v_mul_lo_u32 v5, v13, v11 +; GFX9-NEXT: v_mul_hi_u32 v13, v11, v2 +; GFX9-NEXT: v_add3_u32 v12, v3, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v12, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v12, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v13, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v15, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v11, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v10, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_xor_b32_e32 v6, v0, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0 -; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 -; GFX9-NEXT: v_xor_b32_e32 v8, v3, v5 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 +; GFX9-NEXT: v_mul_hi_u32 v10, v6, v2 +; GFX9-NEXT: v_xor_b32_e32 v11, v3, v5 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, 0, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v12, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v10, v2 -; GFX9-NEXT: v_mul_lo_u32 v7, v11, v3 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 -; GFX9-NEXT: v_add3_u32 v1, v1, v7, v4 -; GFX9-NEXT: v_sub_u32_e32 v4, v8, v1 +; GFX9-NEXT: v_mul_lo_u32 v4, v8, v2 +; GFX9-NEXT: v_mul_lo_u32 v10, v9, v3 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, 0 +; GFX9-NEXT: v_add3_u32 v1, v1, v10, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, v11, v1 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v4, v10, vcc -; GFX9-NEXT: v_sub_co_u32_e64 v6, s[4:5], v0, v11 +; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v4, v8, vcc +; GFX9-NEXT: v_sub_co_u32_e64 v6, s[4:5], v0, v9 ; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v11 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[4:5] +; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[4:5] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v6, s[4:5] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v10, v6, s[4:5] ; GFX9-NEXT: v_add_co_u32_e64 v6, s[4:5], 2, v2 -; GFX9-NEXT: v_addc_co_u32_e64 v7, s[4:5], 0, v3, s[4:5] +; GFX9-NEXT: v_addc_co_u32_e64 v10, s[4:5], 0, v3, s[4:5] ; GFX9-NEXT: v_add_co_u32_e64 v12, s[4:5], 1, v2 ; GFX9-NEXT: v_addc_co_u32_e64 v13, s[4:5], 0, v3, s[4:5] -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v1, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v11, v1, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v4, v13, v7, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v4, v13, v10, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, -1, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v0, v7, v0, vcc +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v0, v10, v0, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 ; GFX9-NEXT: v_cndmask_b32_e64 v1, v12, v6, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc -; GFX9-NEXT: v_xor_b32_e32 v2, v5, v9 +; GFX9-NEXT: v_xor_b32_e32 v2, v5, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 @@ -192,25 +192,25 @@ define i64 @udiv64(i64 %a, i64 %b) { ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v5, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v13, v11 -; GFX9-NEXT: v_mul_lo_u32 v7, v14, v12 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v12, 0 -; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_mul_hi_u32 v13, v12, v4 -; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v8, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v12, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v11, v5, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, 0 +; GFX9-NEXT: v_mul_lo_u32 v6, v13, v10 +; GFX9-NEXT: v_mul_lo_u32 v7, v14, v9 +; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 +; GFX9-NEXT: v_add3_u32 v11, v5, v6, v7 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v11, 0 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v12, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v8, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v10, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v8, 0 ; GFX9-NEXT: v_mul_hi_u32 v9, v0, v6 ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v6, 0 @@ -303,100 +303,100 @@ define i64 @srem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v9, v3, v4 -; GFX9-NEXT: v_xor_b32_e32 v10, v2, v4 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v10 -; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9 -; GFX9-NEXT: v_sub_co_u32_e32 v11, vcc, 0, v10 -; GFX9-NEXT: v_subb_co_u32_e32 v12, vcc, 0, v9, vcc +; GFX9-NEXT: v_xor_b32_e32 v7, v3, v4 +; GFX9-NEXT: v_xor_b32_e32 v8, v2, v4 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v8 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v7 +; GFX9-NEXT: v_sub_co_u32_e32 v11, vcc, 0, v8 +; GFX9-NEXT: v_subb_co_u32_e32 v12, vcc, 0, v7, vcc ; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GFX9-NEXT: v_rcp_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GFX9-NEXT: v_trunc_f32_e32 v3, v3 ; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 -; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v2 -; GFX9-NEXT: v_mul_lo_u32 v5, v11, v7 -; GFX9-NEXT: v_mul_lo_u32 v4, v12, v8 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v8, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v2 +; GFX9-NEXT: v_mul_lo_u32 v5, v11, v9 +; GFX9-NEXT: v_mul_lo_u32 v4, v12, v10 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v10, 0 ; GFX9-NEXT: v_add3_u32 v13, v3, v5, v4 -; GFX9-NEXT: v_mul_hi_u32 v14, v8, v2 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v13, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v2, 0 +; GFX9-NEXT: v_mul_hi_u32 v14, v10, v2 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v13, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v2, 0 ; GFX9-NEXT: v_add_co_u32_e32 v14, vcc, v14, v3 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v13, 0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v14, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, v7, v3, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v11, v14 -; GFX9-NEXT: v_mul_lo_u32 v5, v12, v13 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v13, 0 -; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v13, v5, 0 -; GFX9-NEXT: v_mul_hi_u32 v11, v13, v2 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v14, v2, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v14, v5, 0 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v11, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v8, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v10, vcc, v10, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, v9, v3, vcc +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v10, 0 +; GFX9-NEXT: v_mul_lo_u32 v4, v11, v9 +; GFX9-NEXT: v_mul_lo_u32 v5, v12, v10 +; GFX9-NEXT: v_mul_hi_u32 v12, v10, v2 +; GFX9-NEXT: v_add3_u32 v11, v3, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v10, v11, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v3 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v9, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v12, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v10, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v3, vcc ; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 ; GFX9-NEXT: v_xor_b32_e32 v6, v0, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0 -; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 -; GFX9-NEXT: v_xor_b32_e32 v8, v3, v5 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0 -; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 +; GFX9-NEXT: v_mul_hi_u32 v9, v6, v2 +; GFX9-NEXT: v_xor_b32_e32 v10, v3, v5 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, 0, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v7, v2 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v9, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v11, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v2, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v2, v9, v0 -; GFX9-NEXT: v_mul_lo_u32 v3, v10, v1 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, 0 +; GFX9-NEXT: v_mul_lo_u32 v2, v7, v0 +; GFX9-NEXT: v_mul_lo_u32 v3, v8, v1 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v0, 0 ; GFX9-NEXT: v_add3_u32 v1, v1, v3, v2 -; GFX9-NEXT: v_sub_u32_e32 v2, v8, v1 +; GFX9-NEXT: v_sub_u32_e32 v2, v10, v1 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, vcc -; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v0, v10 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v7, vcc +; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v0, v8 ; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[6:7], 0, v2, s[4:5] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v4, v9 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v4, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 -; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, v9 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, s[4:5] -; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v7, s[6:7] -; GFX9-NEXT: v_sub_co_u32_e64 v7, s[4:5], v3, v10 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v8 +; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] +; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v4, v7 +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v7, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v9, s[6:7] +; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v3, v8 ; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v1, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v10, v1, vcc ; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v9 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v7 ; GFX9-NEXT: v_cndmask_b32_e64 v2, v4, v2, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v9 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v7 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v7, s[4:5] +; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v9, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 ; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 @@ -473,25 +473,25 @@ define i64 @urem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v5, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v13, v11 -; GFX9-NEXT: v_mul_lo_u32 v7, v14, v12 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v12, 0 -; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_mul_hi_u32 v13, v12, v4 -; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v8, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v12, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v11, v5, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, 0 +; GFX9-NEXT: v_mul_lo_u32 v6, v13, v10 +; GFX9-NEXT: v_mul_lo_u32 v7, v14, v9 +; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 +; GFX9-NEXT: v_add3_u32 v11, v5, v6, v7 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v11, 0 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v12, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v8, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v10, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v8, 0 ; GFX9-NEXT: v_mul_hi_u32 v9, v0, v6 ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v6, 0 @@ -702,121 +702,121 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) { ; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5] ; GFX9-NEXT: s_cbranch_execz .LBB8_2 ; GFX9-NEXT: ; %bb.1: -; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc -; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 -; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 -; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 -; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 -; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, 0, v11 -; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, 0, v10, vcc +; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v3 +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v7, vcc +; GFX9-NEXT: v_xor_b32_e32 v8, v3, v7 +; GFX9-NEXT: v_xor_b32_e32 v9, v2, v7 +; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v9 +; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v8 +; GFX9-NEXT: v_sub_co_u32_e32 v12, vcc, 0, v9 +; GFX9-NEXT: v_subb_co_u32_e32 v13, vcc, 0, v8, vcc ; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GFX9-NEXT: v_rcp_f32_e32 v2, v2 ; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GFX9-NEXT: v_trunc_f32_e32 v3, v3 ; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2 -; GFX9-NEXT: v_cvt_u32_f32_e32 v7, v3 -; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v2 -; GFX9-NEXT: v_mul_lo_u32 v5, v12, v7 -; GFX9-NEXT: v_mul_lo_u32 v4, v13, v8 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v8, 0 +; GFX9-NEXT: v_cvt_u32_f32_e32 v10, v3 +; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v2 +; GFX9-NEXT: v_mul_lo_u32 v5, v12, v10 +; GFX9-NEXT: v_mul_lo_u32 v4, v13, v11 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v11, 0 ; GFX9-NEXT: v_add3_u32 v14, v3, v5, v4 -; GFX9-NEXT: v_mul_hi_u32 v15, v8, v2 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v8, v14, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v7, v2, 0 +; GFX9-NEXT: v_mul_hi_u32 v15, v11, v2 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v14, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v2, 0 ; GFX9-NEXT: v_add_co_u32_e32 v15, vcc, v15, v3 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v14, 0 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v14, 0 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc ; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v15, v5 ; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v14, vcc, v8, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v15, vcc, v7, v3, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v12, v15 -; GFX9-NEXT: v_mul_lo_u32 v5, v13, v14 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v14, 0 -; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 -; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v14, v5, 0 -; GFX9-NEXT: v_mul_hi_u32 v12, v14, v2 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v15, v2, 0 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v15, v5, 0 -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v3 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v8, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v5 +; GFX9-NEXT: v_add_co_u32_e32 v11, vcc, v11, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v10, v3, vcc +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v11, 0 +; GFX9-NEXT: v_mul_lo_u32 v4, v12, v10 +; GFX9-NEXT: v_mul_lo_u32 v5, v13, v11 +; GFX9-NEXT: v_mul_hi_u32 v13, v11, v2 +; GFX9-NEXT: v_add3_u32 v12, v3, v4, v5 +; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v12, 0 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v10, v2, 0 +; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v10, v12, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc +; GFX9-NEXT: v_add_co_u32_e32 v5, vcc, v13, v5 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v4, v6, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc -; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v2 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v15, v3, vcc -; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1 -; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7 -; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7 -; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v7, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v4, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc +; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v11, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v10, v3, vcc +; GFX9-NEXT: v_ashrrev_i32_e32 v10, 31, v1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v10 +; GFX9-NEXT: v_xor_b32_e32 v5, v0, v10 +; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v1, v10, vcc ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v4, 0 ; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2 -; GFX9-NEXT: v_xor_b32_e32 v8, v3, v7 -; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v8, v2, 0 +; GFX9-NEXT: v_xor_b32_e32 v11, v3, v10 +; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0 ; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, 0, v1, vcc -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v4, 0 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v4, 0 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v6, v2 ; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v12, v3, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v0 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc -; GFX9-NEXT: v_mul_lo_u32 v4, v10, v2 -; GFX9-NEXT: v_mul_lo_u32 v6, v11, v3 -; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 +; GFX9-NEXT: v_mul_lo_u32 v4, v8, v2 +; GFX9-NEXT: v_mul_lo_u32 v6, v9, v3 +; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v9, v2, 0 ; GFX9-NEXT: v_add3_u32 v1, v1, v6, v4 -; GFX9-NEXT: v_sub_u32_e32 v4, v8, v1 +; GFX9-NEXT: v_sub_u32_e32 v4, v11, v1 ; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0 -; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v4, v10, vcc -; GFX9-NEXT: v_sub_co_u32_e64 v12, s[4:5], v0, v11 +; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v4, v8, vcc +; GFX9-NEXT: v_sub_co_u32_e64 v12, s[4:5], v0, v9 ; GFX9-NEXT: v_subbrev_co_u32_e64 v13, s[6:7], 0, v6, s[4:5] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v10 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v13, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v11 +; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] -; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v10 +; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v13, v8 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v5, s[6:7] ; GFX9-NEXT: v_add_co_u32_e64 v5, s[6:7], 2, v2 ; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7] ; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2 -; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v8, v1, vcc +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v11, v1, vcc ; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7] -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v8 ; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 -; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, vcc -; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 +; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, vcc +; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v9 ; GFX9-NEXT: v_cndmask_b32_e64 v4, v16, v14, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 -; GFX9-NEXT: v_cndmask_b32_e32 v8, v8, v14, vcc -; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v8 +; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v8 +; GFX9-NEXT: v_cndmask_b32_e32 v11, v11, v14, vcc +; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v11 ; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v5, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc -; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9 +; GFX9-NEXT: v_xor_b32_e32 v5, v10, v7 ; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5 ; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 ; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5 -; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5] +; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v8, s[4:5] ; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9] -; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v12, v11 +; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v12, v9 ; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] ; GFX9-NEXT: v_cndmask_b32_e64 v2, v13, v2, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v3, s[6:7] ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc -; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7 -; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 -; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 -; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc +; GFX9-NEXT: v_xor_b32_e32 v0, v0, v10 +; GFX9-NEXT: v_xor_b32_e32 v1, v1, v10 +; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v10 +; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v10, vcc ; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 ; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 ; GFX9-NEXT: .LBB8_2: ; %Flow @@ -899,25 +899,25 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) { ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v5, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v13, v11 -; GFX9-NEXT: v_mul_lo_u32 v7, v14, v12 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v12, 0 -; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_mul_hi_u32 v13, v12, v4 -; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v8, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v12, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v11, v5, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, 0 +; GFX9-NEXT: v_mul_lo_u32 v6, v13, v10 +; GFX9-NEXT: v_mul_lo_u32 v7, v14, v9 +; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 +; GFX9-NEXT: v_add3_u32 v11, v5, v6, v7 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v11, 0 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v12, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v8, vcc ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v10, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v8, 0 ; GFX9-NEXT: v_mul_hi_u32 v9, v0, v6 ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v6, 0 @@ -1051,25 +1051,25 @@ define i64 @sdiv64_known32(i64 %a, i64 %b) { ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v10, vcc ; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v5, vcc -; GFX9-NEXT: v_mul_lo_u32 v6, v13, v11 -; GFX9-NEXT: v_mul_lo_u32 v7, v14, v12 -; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v12, 0 -; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 -; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0 -; GFX9-NEXT: v_mul_hi_u32 v13, v12, v4 -; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v11, v4, 0 -; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v7, 0 -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v5 -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 -; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v5, v10, vcc -; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v8, vcc -; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v7 +; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v12, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, v11, v5, vcc +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v13, v9, 0 +; GFX9-NEXT: v_mul_lo_u32 v6, v13, v10 +; GFX9-NEXT: v_mul_lo_u32 v7, v14, v9 +; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 +; GFX9-NEXT: v_add3_u32 v11, v5, v6, v7 +; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v11, 0 +; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v10, v4, 0 +; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 +; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v11, 0 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc +; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v12, v7 +; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v6, v8, vcc +; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v6, v4 ; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc -; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 -; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v11, v5, vcc +; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v9, v4 +; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, v10, v5, vcc ; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v8, 0 ; GFX9-NEXT: v_mul_hi_u32 v9, v1, v6 ; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v2, v6, 0 diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll index 6e948fe4d6fa2..76d4c5a45c26a 100644 --- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll +++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll @@ -2576,25 +2576,25 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; CISI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; CISI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; CISI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; CISI-NEXT: v_mul_lo_u32 v2, s0, v1 -; CISI-NEXT: v_mul_hi_u32 v3, s0, v0 -; CISI-NEXT: v_mul_lo_u32 v4, s1, v0 -; CISI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; CISI-NEXT: v_mul_lo_u32 v3, s0, v0 -; CISI-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; CISI-NEXT: v_mul_lo_u32 v6, v0, v2 +; CISI-NEXT: v_mul_lo_u32 v3, s0, v1 +; CISI-NEXT: v_mul_hi_u32 v4, s0, v0 +; CISI-NEXT: v_mul_lo_u32 v6, s1, v0 +; CISI-NEXT: v_mul_lo_u32 v2, s0, v0 +; CISI-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; CISI-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; CISI-NEXT: v_mul_lo_u32 v4, v0, v3 +; CISI-NEXT: v_mul_hi_u32 v6, v0, v2 ; CISI-NEXT: v_mul_hi_u32 v7, v0, v3 -; CISI-NEXT: v_mul_hi_u32 v8, v0, v2 -; CISI-NEXT: v_mul_hi_u32 v5, v1, v3 -; CISI-NEXT: v_mul_lo_u32 v3, v1, v3 -; CISI-NEXT: v_mul_hi_u32 v4, v1, v2 -; CISI-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; CISI-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; CISI-NEXT: v_mul_hi_u32 v5, v1, v2 ; CISI-NEXT: v_mul_lo_u32 v2, v1, v2 -; CISI-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; CISI-NEXT: v_addc_u32_e32 v3, vcc, v7, v5, vcc -; CISI-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc -; CISI-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; CISI-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; CISI-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc +; CISI-NEXT: v_mul_hi_u32 v7, v1, v3 +; CISI-NEXT: v_mul_lo_u32 v3, v1, v3 +; CISI-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; CISI-NEXT: v_addc_u32_e32 v2, vcc, v6, v5, vcc +; CISI-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; CISI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; CISI-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; CISI-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; CISI-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc @@ -2719,42 +2719,42 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc ; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v0 -; VI-NEXT: v_addc_u32_e32 v7, vcc, v7, v1, vcc -; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s8, v8, 0 -; VI-NEXT: v_mul_lo_u32 v2, s8, v7 -; VI-NEXT: v_mul_lo_u32 v3, s9, v8 -; VI-NEXT: v_mul_hi_u32 v9, v8, v0 +; VI-NEXT: v_add_u32_e32 v5, vcc, v8, v0 +; VI-NEXT: v_addc_u32_e32 v6, vcc, v7, v1, vcc +; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s8, v5, 0 +; VI-NEXT: v_mul_lo_u32 v2, s8, v6 +; VI-NEXT: v_mul_lo_u32 v3, s9, v5 +; VI-NEXT: v_mul_hi_u32 v8, v5, v0 ; VI-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; VI-NEXT: v_add_u32_e32 v5, vcc, v3, v1 -; VI-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v8, v5, 0 -; VI-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v7, v0, 0 -; VI-NEXT: v_mad_u64_u32 v[5:6], s[6:7], v7, v5, 0 -; VI-NEXT: v_add_u32_e32 v0, vcc, v9, v1 -; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; VI-NEXT: v_addc_u32_e32 v0, vcc, v1, v4, vcc -; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc -; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v5 +; VI-NEXT: v_add_u32_e32 v7, vcc, v3, v1 +; VI-NEXT: v_mad_u64_u32 v[1:2], s[6:7], v5, v7, 0 +; VI-NEXT: v_mad_u64_u32 v[3:4], s[6:7], v6, v0, 0 +; VI-NEXT: v_add_u32_e32 v8, vcc, v8, v1 +; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], v6, v7, 0 +; VI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; VI-NEXT: v_add_u32_e32 v3, vcc, v8, v3 +; VI-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; VI-NEXT: v_add_u32_e32 v0, vcc, v2, v0 ; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; VI-NEXT: v_add_u32_e32 v2, vcc, v8, v0 -; VI-NEXT: v_addc_u32_e32 v3, vcc, v7, v1, vcc +; VI-NEXT: v_add_u32_e32 v2, vcc, v5, v0 +; VI-NEXT: v_addc_u32_e32 v3, vcc, v6, v1, vcc ; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s2, v3, 0 ; VI-NEXT: v_mul_hi_u32 v4, s2, v2 ; VI-NEXT: v_readfirstlane_b32 s8, v1 ; VI-NEXT: v_readfirstlane_b32 s9, v0 -; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s3, v3, 0 -; VI-NEXT: v_mad_u64_u32 v[2:3], s[6:7], s3, v2, 0 -; VI-NEXT: v_readfirstlane_b32 s10, v4 -; VI-NEXT: s_add_u32 s6, s10, s9 -; VI-NEXT: s_addc_u32 s7, 0, s8 -; VI-NEXT: v_readfirstlane_b32 s10, v2 -; VI-NEXT: v_readfirstlane_b32 s9, v3 -; VI-NEXT: s_add_u32 s6, s6, s10 -; VI-NEXT: v_readfirstlane_b32 s8, v1 -; VI-NEXT: s_addc_u32 s6, s7, s9 -; VI-NEXT: s_addc_u32 s8, s8, 0 -; VI-NEXT: v_readfirstlane_b32 s7, v0 +; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s3, v2, 0 +; VI-NEXT: v_readfirstlane_b32 s6, v4 +; VI-NEXT: s_add_u32 s9, s6, s9 +; VI-NEXT: s_addc_u32 s8, 0, s8 +; VI-NEXT: v_readfirstlane_b32 s10, v1 +; VI-NEXT: v_mad_u64_u32 v[1:2], s[6:7], s3, v3, 0 +; VI-NEXT: v_readfirstlane_b32 s6, v0 +; VI-NEXT: s_add_u32 s6, s9, s6 +; VI-NEXT: s_addc_u32 s6, s8, s10 +; VI-NEXT: v_readfirstlane_b32 s7, v2 +; VI-NEXT: s_addc_u32 s8, s7, 0 +; VI-NEXT: v_readfirstlane_b32 s7, v1 ; VI-NEXT: s_add_u32 s10, s6, s7 ; VI-NEXT: v_mov_b32_e32 v0, s10 ; VI-NEXT: v_mad_u64_u32 v[0:1], s[6:7], s4, v0, 0 @@ -2867,37 +2867,37 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX9-NEXT: s_mul_hi_u32 s13, s11, s12 ; GFX9-NEXT: s_add_u32 s14, s16, s14 ; GFX9-NEXT: s_addc_u32 s13, 0, s13 -; GFX9-NEXT: s_mul_hi_u32 s17, s10, s15 +; GFX9-NEXT: s_mul_hi_u32 s16, s10, s15 ; GFX9-NEXT: s_mul_i32 s15, s10, s15 ; GFX9-NEXT: s_add_u32 s14, s14, s15 -; GFX9-NEXT: s_mul_hi_u32 s16, s10, s12 -; GFX9-NEXT: s_addc_u32 s13, s13, s17 -; GFX9-NEXT: s_addc_u32 s14, s16, 0 +; GFX9-NEXT: s_addc_u32 s13, s13, s16 +; GFX9-NEXT: s_mul_hi_u32 s14, s10, s12 +; GFX9-NEXT: s_addc_u32 s14, s14, 0 ; GFX9-NEXT: s_mul_i32 s12, s10, s12 ; GFX9-NEXT: s_add_u32 s12, s13, s12 ; GFX9-NEXT: s_addc_u32 s13, 0, s14 ; GFX9-NEXT: s_add_u32 s11, s11, s12 ; GFX9-NEXT: s_addc_u32 s10, s10, s13 -; GFX9-NEXT: s_mul_i32 s12, s8, s10 -; GFX9-NEXT: s_mul_hi_u32 s13, s8, s11 -; GFX9-NEXT: s_add_i32 s12, s13, s12 +; GFX9-NEXT: s_mul_i32 s12, s8, s11 +; GFX9-NEXT: s_mul_i32 s15, s8, s10 +; GFX9-NEXT: s_mul_hi_u32 s8, s8, s11 +; GFX9-NEXT: s_add_i32 s8, s8, s15 ; GFX9-NEXT: s_mul_i32 s9, s9, s11 -; GFX9-NEXT: s_add_i32 s12, s12, s9 -; GFX9-NEXT: s_mul_i32 s8, s8, s11 -; GFX9-NEXT: s_mul_hi_u32 s13, s10, s8 -; GFX9-NEXT: s_mul_i32 s14, s10, s8 -; GFX9-NEXT: s_mul_i32 s16, s11, s12 -; GFX9-NEXT: s_mul_hi_u32 s8, s11, s8 -; GFX9-NEXT: s_mul_hi_u32 s15, s11, s12 -; GFX9-NEXT: s_add_u32 s8, s8, s16 -; GFX9-NEXT: s_addc_u32 s15, 0, s15 -; GFX9-NEXT: s_add_u32 s8, s8, s14 -; GFX9-NEXT: s_mul_hi_u32 s9, s10, s12 -; GFX9-NEXT: s_addc_u32 s8, s15, s13 -; GFX9-NEXT: s_addc_u32 s9, s9, 0 -; GFX9-NEXT: s_mul_i32 s12, s10, s12 -; GFX9-NEXT: s_add_u32 s8, s8, s12 +; GFX9-NEXT: s_add_i32 s8, s8, s9 +; GFX9-NEXT: s_mul_hi_u32 s13, s10, s12 +; GFX9-NEXT: s_mul_i32 s14, s10, s12 +; GFX9-NEXT: s_mul_i32 s15, s11, s8 +; GFX9-NEXT: s_mul_hi_u32 s12, s11, s12 +; GFX9-NEXT: s_mul_hi_u32 s9, s11, s8 +; GFX9-NEXT: s_add_u32 s12, s12, s15 ; GFX9-NEXT: s_addc_u32 s9, 0, s9 +; GFX9-NEXT: s_add_u32 s12, s12, s14 +; GFX9-NEXT: s_addc_u32 s9, s9, s13 +; GFX9-NEXT: s_mul_hi_u32 s12, s10, s8 +; GFX9-NEXT: s_addc_u32 s12, s12, 0 +; GFX9-NEXT: s_mul_i32 s8, s10, s8 +; GFX9-NEXT: s_add_u32 s8, s9, s8 +; GFX9-NEXT: s_addc_u32 s9, 0, s12 ; GFX9-NEXT: s_add_u32 s8, s11, s8 ; GFX9-NEXT: s_addc_u32 s9, s10, s9 ; GFX9-NEXT: s_mul_i32 s11, s2, s9 @@ -2905,12 +2905,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX9-NEXT: s_mul_hi_u32 s10, s2, s9 ; GFX9-NEXT: s_add_u32 s11, s12, s11 ; GFX9-NEXT: s_addc_u32 s10, 0, s10 -; GFX9-NEXT: s_mul_hi_u32 s13, s3, s8 +; GFX9-NEXT: s_mul_hi_u32 s12, s3, s8 ; GFX9-NEXT: s_mul_i32 s8, s3, s8 ; GFX9-NEXT: s_add_u32 s8, s11, s8 -; GFX9-NEXT: s_mul_hi_u32 s12, s3, s9 -; GFX9-NEXT: s_addc_u32 s8, s10, s13 -; GFX9-NEXT: s_addc_u32 s10, s12, 0 +; GFX9-NEXT: s_addc_u32 s8, s10, s12 +; GFX9-NEXT: s_mul_hi_u32 s10, s3, s9 +; GFX9-NEXT: s_addc_u32 s10, s10, 0 ; GFX9-NEXT: s_mul_i32 s9, s3, s9 ; GFX9-NEXT: s_add_u32 s11, s8, s9 ; GFX9-NEXT: s_addc_u32 s10, 0, s10 @@ -3030,24 +3030,24 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1010-NEXT: s_addc_u32 s12, 0, s13 ; GFX1010-NEXT: s_add_u32 s5, s5, s11 ; GFX1010-NEXT: s_addc_u32 s4, s4, s12 -; GFX1010-NEXT: s_mul_hi_u32 s11, s9, s5 -; GFX1010-NEXT: s_mul_i32 s12, s9, s5 +; GFX1010-NEXT: s_mul_i32 s11, s9, s5 +; GFX1010-NEXT: s_mul_hi_u32 s12, s9, s5 ; GFX1010-NEXT: s_mul_i32 s9, s9, s4 ; GFX1010-NEXT: s_mul_i32 s10, s10, s5 -; GFX1010-NEXT: s_add_i32 s9, s11, s9 -; GFX1010-NEXT: s_mul_i32 s11, s4, s12 +; GFX1010-NEXT: s_add_i32 s9, s12, s9 +; GFX1010-NEXT: s_mul_hi_u32 s13, s4, s11 ; GFX1010-NEXT: s_add_i32 s9, s9, s10 -; GFX1010-NEXT: s_mul_hi_u32 s10, s5, s12 -; GFX1010-NEXT: s_mul_i32 s15, s5, s9 -; GFX1010-NEXT: s_mul_hi_u32 s14, s5, s9 -; GFX1010-NEXT: s_add_u32 s10, s10, s15 -; GFX1010-NEXT: s_mul_hi_u32 s13, s4, s12 -; GFX1010-NEXT: s_addc_u32 s14, 0, s14 -; GFX1010-NEXT: s_mul_hi_u32 s12, s4, s9 -; GFX1010-NEXT: s_add_u32 s10, s10, s11 +; GFX1010-NEXT: s_mul_hi_u32 s10, s5, s11 +; GFX1010-NEXT: s_mul_i32 s14, s5, s9 +; GFX1010-NEXT: s_mul_i32 s12, s4, s11 +; GFX1010-NEXT: s_mul_hi_u32 s11, s5, s9 +; GFX1010-NEXT: s_add_u32 s10, s10, s14 +; GFX1010-NEXT: s_addc_u32 s11, 0, s11 +; GFX1010-NEXT: s_mul_hi_u32 s15, s4, s9 +; GFX1010-NEXT: s_add_u32 s10, s10, s12 ; GFX1010-NEXT: s_mul_i32 s9, s4, s9 -; GFX1010-NEXT: s_addc_u32 s10, s14, s13 -; GFX1010-NEXT: s_addc_u32 s11, s12, 0 +; GFX1010-NEXT: s_addc_u32 s10, s11, s13 +; GFX1010-NEXT: s_addc_u32 s11, s15, 0 ; GFX1010-NEXT: s_add_u32 s9, s10, s9 ; GFX1010-NEXT: s_addc_u32 s10, 0, s11 ; GFX1010-NEXT: s_add_u32 s5, s5, s9 @@ -3183,24 +3183,24 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1030W32-NEXT: s_addc_u32 s12, 0, s13 ; GFX1030W32-NEXT: s_add_u32 s7, s7, s11 ; GFX1030W32-NEXT: s_addc_u32 s6, s6, s12 -; GFX1030W32-NEXT: s_mul_hi_u32 s11, s9, s7 -; GFX1030W32-NEXT: s_mul_i32 s12, s9, s7 +; GFX1030W32-NEXT: s_mul_i32 s11, s9, s7 +; GFX1030W32-NEXT: s_mul_hi_u32 s12, s9, s7 ; GFX1030W32-NEXT: s_mul_i32 s9, s9, s6 ; GFX1030W32-NEXT: s_mul_i32 s10, s10, s7 -; GFX1030W32-NEXT: s_add_i32 s9, s11, s9 -; GFX1030W32-NEXT: s_mul_i32 s11, s6, s12 +; GFX1030W32-NEXT: s_add_i32 s9, s12, s9 +; GFX1030W32-NEXT: s_mul_hi_u32 s13, s6, s11 ; GFX1030W32-NEXT: s_add_i32 s9, s9, s10 -; GFX1030W32-NEXT: s_mul_hi_u32 s10, s7, s12 -; GFX1030W32-NEXT: s_mul_i32 s15, s7, s9 -; GFX1030W32-NEXT: s_mul_hi_u32 s14, s7, s9 -; GFX1030W32-NEXT: s_add_u32 s10, s10, s15 -; GFX1030W32-NEXT: s_mul_hi_u32 s13, s6, s12 -; GFX1030W32-NEXT: s_addc_u32 s14, 0, s14 -; GFX1030W32-NEXT: s_mul_hi_u32 s12, s6, s9 -; GFX1030W32-NEXT: s_add_u32 s10, s10, s11 +; GFX1030W32-NEXT: s_mul_hi_u32 s10, s7, s11 +; GFX1030W32-NEXT: s_mul_i32 s14, s7, s9 +; GFX1030W32-NEXT: s_mul_i32 s12, s6, s11 +; GFX1030W32-NEXT: s_mul_hi_u32 s11, s7, s9 +; GFX1030W32-NEXT: s_add_u32 s10, s10, s14 +; GFX1030W32-NEXT: s_addc_u32 s11, 0, s11 +; GFX1030W32-NEXT: s_mul_hi_u32 s15, s6, s9 +; GFX1030W32-NEXT: s_add_u32 s10, s10, s12 ; GFX1030W32-NEXT: s_mul_i32 s9, s6, s9 -; GFX1030W32-NEXT: s_addc_u32 s10, s14, s13 -; GFX1030W32-NEXT: s_addc_u32 s11, s12, 0 +; GFX1030W32-NEXT: s_addc_u32 s10, s11, s13 +; GFX1030W32-NEXT: s_addc_u32 s11, s15, 0 ; GFX1030W32-NEXT: s_add_u32 s9, s10, s9 ; GFX1030W32-NEXT: s_addc_u32 s10, 0, s11 ; GFX1030W32-NEXT: s_add_u32 s7, s7, s9 @@ -3335,24 +3335,24 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX1030W64-NEXT: s_addc_u32 s11, 0, s12 ; GFX1030W64-NEXT: s_add_u32 s7, s7, s10 ; GFX1030W64-NEXT: s_addc_u32 s6, s6, s11 -; GFX1030W64-NEXT: s_mul_hi_u32 s10, s8, s7 -; GFX1030W64-NEXT: s_mul_i32 s11, s8, s7 +; GFX1030W64-NEXT: s_mul_i32 s10, s8, s7 +; GFX1030W64-NEXT: s_mul_hi_u32 s11, s8, s7 ; GFX1030W64-NEXT: s_mul_i32 s8, s8, s6 ; GFX1030W64-NEXT: s_mul_i32 s9, s9, s7 -; GFX1030W64-NEXT: s_add_i32 s8, s10, s8 -; GFX1030W64-NEXT: s_mul_i32 s10, s6, s11 +; GFX1030W64-NEXT: s_add_i32 s8, s11, s8 +; GFX1030W64-NEXT: s_mul_hi_u32 s12, s6, s10 ; GFX1030W64-NEXT: s_add_i32 s8, s8, s9 -; GFX1030W64-NEXT: s_mul_hi_u32 s9, s7, s11 -; GFX1030W64-NEXT: s_mul_i32 s14, s7, s8 -; GFX1030W64-NEXT: s_mul_hi_u32 s13, s7, s8 -; GFX1030W64-NEXT: s_add_u32 s9, s9, s14 -; GFX1030W64-NEXT: s_mul_hi_u32 s12, s6, s11 -; GFX1030W64-NEXT: s_addc_u32 s13, 0, s13 -; GFX1030W64-NEXT: s_mul_hi_u32 s11, s6, s8 -; GFX1030W64-NEXT: s_add_u32 s9, s9, s10 +; GFX1030W64-NEXT: s_mul_hi_u32 s9, s7, s10 +; GFX1030W64-NEXT: s_mul_i32 s13, s7, s8 +; GFX1030W64-NEXT: s_mul_i32 s11, s6, s10 +; GFX1030W64-NEXT: s_mul_hi_u32 s10, s7, s8 +; GFX1030W64-NEXT: s_add_u32 s9, s9, s13 +; GFX1030W64-NEXT: s_addc_u32 s10, 0, s10 +; GFX1030W64-NEXT: s_mul_hi_u32 s14, s6, s8 +; GFX1030W64-NEXT: s_add_u32 s9, s9, s11 ; GFX1030W64-NEXT: s_mul_i32 s8, s6, s8 -; GFX1030W64-NEXT: s_addc_u32 s9, s13, s12 -; GFX1030W64-NEXT: s_addc_u32 s10, s11, 0 +; GFX1030W64-NEXT: s_addc_u32 s9, s10, s12 +; GFX1030W64-NEXT: s_addc_u32 s10, s14, 0 ; GFX1030W64-NEXT: s_add_u32 s8, s9, s8 ; GFX1030W64-NEXT: s_addc_u32 s9, 0, s10 ; GFX1030W64-NEXT: s_add_u32 s7, s7, s8 @@ -3493,24 +3493,24 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GFX11-NEXT: s_addc_u32 s12, 0, s13 ; GFX11-NEXT: s_add_u32 s7, s7, s11 ; GFX11-NEXT: s_addc_u32 s6, s6, s12 -; GFX11-NEXT: s_mul_hi_u32 s11, s9, s7 -; GFX11-NEXT: s_mul_i32 s12, s9, s7 +; GFX11-NEXT: s_mul_i32 s11, s9, s7 +; GFX11-NEXT: s_mul_hi_u32 s12, s9, s7 ; GFX11-NEXT: s_mul_i32 s9, s9, s6 ; GFX11-NEXT: s_mul_i32 s10, s10, s7 -; GFX11-NEXT: s_add_i32 s9, s11, s9 -; GFX11-NEXT: s_mul_i32 s11, s6, s12 +; GFX11-NEXT: s_add_i32 s9, s12, s9 +; GFX11-NEXT: s_mul_hi_u32 s13, s6, s11 ; GFX11-NEXT: s_add_i32 s9, s9, s10 -; GFX11-NEXT: s_mul_hi_u32 s10, s7, s12 -; GFX11-NEXT: s_mul_i32 s15, s7, s9 -; GFX11-NEXT: s_mul_hi_u32 s14, s7, s9 -; GFX11-NEXT: s_add_u32 s10, s10, s15 -; GFX11-NEXT: s_mul_hi_u32 s13, s6, s12 -; GFX11-NEXT: s_addc_u32 s14, 0, s14 -; GFX11-NEXT: s_mul_hi_u32 s12, s6, s9 -; GFX11-NEXT: s_add_u32 s10, s10, s11 +; GFX11-NEXT: s_mul_hi_u32 s10, s7, s11 +; GFX11-NEXT: s_mul_i32 s14, s7, s9 +; GFX11-NEXT: s_mul_i32 s12, s6, s11 +; GFX11-NEXT: s_mul_hi_u32 s11, s7, s9 +; GFX11-NEXT: s_add_u32 s10, s10, s14 +; GFX11-NEXT: s_addc_u32 s11, 0, s11 +; GFX11-NEXT: s_mul_hi_u32 s15, s6, s9 +; GFX11-NEXT: s_add_u32 s10, s10, s12 ; GFX11-NEXT: s_mul_i32 s9, s6, s9 -; GFX11-NEXT: s_addc_u32 s10, s14, s13 -; GFX11-NEXT: s_addc_u32 s11, s12, 0 +; GFX11-NEXT: s_addc_u32 s10, s11, s13 +; GFX11-NEXT: s_addc_u32 s11, s15, 0 ; GFX11-NEXT: s_add_u32 s9, s10, s9 ; GFX11-NEXT: s_addc_u32 s10, 0, s11 ; GFX11-NEXT: s_add_u32 s7, s7, s9 @@ -4030,99 +4030,99 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-ISEL-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[V_MUL_HI_U32_e64_4]], %subreg.sub0, [[S_MOV_B32_9]], %subreg.sub1 ; GCN-ISEL-NEXT: [[S_ADD_U:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE5]], killed [[REG_SEQUENCE4]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY31:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U]].sub0 -; GCN-ISEL-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U]].sub1 -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[V_CVT_U32_F32_e64_1]], [[S_ADD_I32_4]], implicit $exec -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[V_CVT_U32_F32_e64_1]], [[S_MUL_I32_5]], implicit $exec -; GCN-ISEL-NEXT: [[COPY33:%[0-9]+]]:sreg_32 = COPY [[V_CVT_U32_F32_e64_1]] -; GCN-ISEL-NEXT: [[S_MUL_I32_6:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY33]], [[S_MUL_I32_5]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_6]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_6]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY34:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0 +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_5:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[V_CVT_U32_F32_e64_1]], [[S_MUL_I32_5]], implicit $exec +; GCN-ISEL-NEXT: [[COPY32:%[0-9]+]]:sreg_32 = COPY [[V_CVT_U32_F32_e64_1]] +; GCN-ISEL-NEXT: [[S_MUL_I32_6:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY32]], [[S_MUL_I32_5]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_6]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_5]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY33:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub0 +; GCN-ISEL-NEXT: [[S_UADDO:%[0-9]+]]:sreg_32, [[S_UADDO1:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO killed [[COPY31]], killed [[COPY33]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY34:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U]].sub1 ; GCN-ISEL-NEXT: [[COPY35:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE6]].sub1 +; GCN-ISEL-NEXT: [[S_ADD_C:%[0-9]+]]:sreg_32, [[S_ADD_C1:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY34]], killed [[COPY35]], killed [[S_UADDO1]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_6:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[V_CVT_U32_F32_e64_1]], [[S_ADD_I32_4]], implicit $exec ; GCN-ISEL-NEXT: [[S_MOV_B32_10:%[0-9]+]]:sreg_32 = S_MOV_B32 0 -; GCN-ISEL-NEXT: [[S_ADD_U32_:%[0-9]+]]:sreg_32 = S_ADD_U32 killed [[COPY31]], killed [[COPY34]], implicit-def $scc -; GCN-ISEL-NEXT: [[S_ADDC_U32_:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY32]], killed [[COPY35]], implicit-def $scc, implicit $scc -; GCN-ISEL-NEXT: [[COPY36:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_5]] -; GCN-ISEL-NEXT: [[S_ADDC_U32_1:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY36]], [[S_MOV_B32_10]], implicit-def dead $scc, implicit $scc +; GCN-ISEL-NEXT: [[COPY36:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_6]] +; GCN-ISEL-NEXT: [[S_ADD_C2:%[0-9]+]]:sreg_32, [[S_ADD_C3:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY36]], [[S_MOV_B32_10]], killed [[S_ADD_C1]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY37:%[0-9]+]]:sreg_32 = COPY [[V_CVT_U32_F32_e64_1]] ; GCN-ISEL-NEXT: [[S_MUL_I32_7:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY37]], [[S_ADD_I32_4]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_7]], %subreg.sub0, killed [[S_ADDC_U32_1]], %subreg.sub1 -; GCN-ISEL-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_]], %subreg.sub0, killed [[S_ADDC_U32_]], %subreg.sub1 +; GCN-ISEL-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_7]], %subreg.sub0, killed [[S_ADD_C2]], %subreg.sub1 +; GCN-ISEL-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_UADDO]], %subreg.sub0, killed [[S_ADD_C]], %subreg.sub1 ; GCN-ISEL-NEXT: [[COPY38:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE8]].sub1 ; GCN-ISEL-NEXT: [[REG_SEQUENCE9:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY38]], %subreg.sub0, [[S_MOV_B32_10]], %subreg.sub1 ; GCN-ISEL-NEXT: [[S_ADD_U1:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE9]], killed [[REG_SEQUENCE7]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY39:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U1]].sub0 ; GCN-ISEL-NEXT: [[COPY40:%[0-9]+]]:sreg_32 = COPY [[V_CVT_U32_F32_e64_]] -; GCN-ISEL-NEXT: [[S_UADDO:%[0-9]+]]:sreg_32, [[S_UADDO1:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO [[COPY40]], killed [[COPY39]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_UADDO2:%[0-9]+]]:sreg_32, [[S_UADDO3:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO [[COPY40]], killed [[COPY39]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY41:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U1]].sub1 ; GCN-ISEL-NEXT: [[COPY42:%[0-9]+]]:sreg_32 = COPY [[V_CVT_U32_F32_e64_1]] -; GCN-ISEL-NEXT: [[S_ADD_C:%[0-9]+]]:sreg_32, [[S_ADD_C1:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO [[COPY42]], killed [[COPY41]], killed [[S_UADDO1]], implicit-def dead $scc -; GCN-ISEL-NEXT: [[S_MUL_I32_8:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY26]], [[S_ADD_C]] -; GCN-ISEL-NEXT: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY26]], [[COPY43]], implicit $exec -; GCN-ISEL-NEXT: [[COPY44:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_7]] -; GCN-ISEL-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[COPY44]], killed [[S_MUL_I32_8]], implicit-def dead $scc -; GCN-ISEL-NEXT: [[S_MUL_I32_9:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY24]], [[S_UADDO]] -; GCN-ISEL-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_5]], killed [[S_MUL_I32_9]], implicit-def dead $scc -; GCN-ISEL-NEXT: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_6]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_ADD_C]], [[COPY45]], implicit $exec -; GCN-ISEL-NEXT: [[S_MUL_I32_10:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY26]], [[S_UADDO]] -; GCN-ISEL-NEXT: [[COPY46:%[0-9]+]]:vgpr_32 = COPY [[S_MUL_I32_10]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_ADD_C]], [[COPY46]], implicit $exec -; GCN-ISEL-NEXT: [[S_MUL_I32_11:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_ADD_C]], [[S_MUL_I32_10]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE10:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_11]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_9]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY47:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE10]].sub0 -; GCN-ISEL-NEXT: [[COPY48:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE10]].sub1 -; GCN-ISEL-NEXT: [[COPY49:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_6]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_10:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_UADDO]], [[COPY49]], implicit $exec -; GCN-ISEL-NEXT: [[S_MUL_I32_12:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_UADDO]], [[S_ADD_I32_6]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE11:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_12]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_10]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY50:%[0-9]+]]:vgpr_32 = COPY [[S_MUL_I32_10]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_11:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_UADDO]], [[COPY50]], implicit $exec -; GCN-ISEL-NEXT: [[REG_SEQUENCE12:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[V_MUL_HI_U32_e64_11]], %subreg.sub0, [[S_MOV_B32_9]], %subreg.sub1 +; GCN-ISEL-NEXT: [[S_ADD_C4:%[0-9]+]]:sreg_32, [[S_ADD_C5:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO [[COPY42]], killed [[COPY41]], killed [[S_UADDO3]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_MUL_I32_8:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY26]], [[S_UADDO2]] +; GCN-ISEL-NEXT: [[COPY43:%[0-9]+]]:vgpr_32 = COPY [[S_MUL_I32_8]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_7:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_ADD_C4]], [[COPY43]], implicit $exec +; GCN-ISEL-NEXT: [[S_MUL_I32_9:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_ADD_C4]], [[S_MUL_I32_8]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE10:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_9]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_7]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY44:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE10]].sub0 +; GCN-ISEL-NEXT: [[S_MUL_I32_10:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY26]], [[S_ADD_C4]] +; GCN-ISEL-NEXT: [[COPY45:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO2]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_8:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY26]], [[COPY45]], implicit $exec +; GCN-ISEL-NEXT: [[COPY46:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_8]] +; GCN-ISEL-NEXT: [[S_ADD_I32_5:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[COPY46]], killed [[S_MUL_I32_10]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_MUL_I32_11:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY24]], [[S_UADDO2]] +; GCN-ISEL-NEXT: [[S_ADD_I32_6:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_5]], killed [[S_MUL_I32_11]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY47:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_6]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_9:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_UADDO2]], [[COPY47]], implicit $exec +; GCN-ISEL-NEXT: [[S_MUL_I32_12:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_UADDO2]], [[S_ADD_I32_6]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE11:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_12]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_9]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY48:%[0-9]+]]:vgpr_32 = COPY [[S_MUL_I32_8]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_10:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_UADDO2]], [[COPY48]], implicit $exec +; GCN-ISEL-NEXT: [[REG_SEQUENCE12:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[V_MUL_HI_U32_e64_10]], %subreg.sub0, [[S_MOV_B32_9]], %subreg.sub1 ; GCN-ISEL-NEXT: [[S_ADD_U2:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE12]], killed [[REG_SEQUENCE11]], implicit-def dead $scc -; GCN-ISEL-NEXT: [[COPY51:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U2]].sub0 -; GCN-ISEL-NEXT: [[COPY52:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U2]].sub1 -; GCN-ISEL-NEXT: [[S_ADD_U32_1:%[0-9]+]]:sreg_32 = S_ADD_U32 killed [[COPY51]], killed [[COPY47]], implicit-def $scc -; GCN-ISEL-NEXT: [[S_ADDC_U32_2:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY52]], killed [[COPY48]], implicit-def $scc, implicit $scc -; GCN-ISEL-NEXT: [[COPY53:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_8]] -; GCN-ISEL-NEXT: [[S_ADDC_U32_3:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY53]], [[S_MOV_B32_10]], implicit-def dead $scc, implicit $scc -; GCN-ISEL-NEXT: [[S_MUL_I32_13:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_ADD_C]], [[S_ADD_I32_6]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE13:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_13]], %subreg.sub0, killed [[S_ADDC_U32_3]], %subreg.sub1 -; GCN-ISEL-NEXT: [[REG_SEQUENCE14:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_1]], %subreg.sub0, killed [[S_ADDC_U32_2]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY54:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE14]].sub1 -; GCN-ISEL-NEXT: [[REG_SEQUENCE15:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY54]], %subreg.sub0, [[S_MOV_B32_10]], %subreg.sub1 -; GCN-ISEL-NEXT: [[S_ADD_U3:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE15]], killed [[REG_SEQUENCE13]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY49:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U2]].sub0 +; GCN-ISEL-NEXT: [[S_UADDO4:%[0-9]+]]:sreg_32, [[S_UADDO5:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO killed [[COPY49]], killed [[COPY44]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY50:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE10]].sub1 +; GCN-ISEL-NEXT: [[COPY51:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U2]].sub1 +; GCN-ISEL-NEXT: [[S_ADD_C6:%[0-9]+]]:sreg_32, [[S_ADD_C7:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY51]], killed [[COPY50]], killed [[S_UADDO5]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[REG_SEQUENCE13:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_UADDO4]], %subreg.sub0, killed [[S_ADD_C6]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY52:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE13]].sub1 +; GCN-ISEL-NEXT: [[REG_SEQUENCE14:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY52]], %subreg.sub0, [[S_MOV_B32_10]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY53:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_I32_6]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_11:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[S_ADD_C4]], [[COPY53]], implicit $exec +; GCN-ISEL-NEXT: [[COPY54:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_11]] +; GCN-ISEL-NEXT: [[S_ADD_C8:%[0-9]+]]:sreg_32, [[S_ADD_C9:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY54]], [[S_MOV_B32_10]], killed [[S_ADD_C7]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_MUL_I32_13:%[0-9]+]]:sreg_32 = S_MUL_I32 [[S_ADD_C4]], [[S_ADD_I32_6]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE15:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_13]], %subreg.sub0, killed [[S_ADD_C8]], %subreg.sub1 +; GCN-ISEL-NEXT: [[S_ADD_U3:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE14]], killed [[REG_SEQUENCE15]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY55:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U3]].sub0 -; GCN-ISEL-NEXT: [[S_UADDO2:%[0-9]+]]:sreg_32, [[S_UADDO3:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO [[S_UADDO]], killed [[COPY55]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_UADDO6:%[0-9]+]]:sreg_32, [[S_UADDO7:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO [[S_UADDO2]], killed [[COPY55]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY56:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U3]].sub1 -; GCN-ISEL-NEXT: [[S_ADD_C2:%[0-9]+]]:sreg_32, [[S_ADD_C3:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO [[S_ADD_C]], killed [[COPY56]], killed [[S_UADDO3]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_ADD_C10:%[0-9]+]]:sreg_32, [[S_ADD_C11:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO [[S_ADD_C4]], killed [[COPY56]], killed [[S_UADDO7]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY57:%[0-9]+]]:sreg_32 = COPY [[COPY8]].sub0 -; GCN-ISEL-NEXT: [[COPY58:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_C2]] +; GCN-ISEL-NEXT: [[COPY58:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_C10]] ; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_12:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY57]], [[COPY58]], implicit $exec -; GCN-ISEL-NEXT: [[S_MUL_I32_14:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY57]], [[S_ADD_C2]] +; GCN-ISEL-NEXT: [[S_MUL_I32_14:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY57]], [[S_ADD_C10]] ; GCN-ISEL-NEXT: [[REG_SEQUENCE16:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_14]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_12]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO2]] +; GCN-ISEL-NEXT: [[COPY59:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO6]] ; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_13:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY57]], [[COPY59]], implicit $exec ; GCN-ISEL-NEXT: [[REG_SEQUENCE17:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[V_MUL_HI_U32_e64_13]], %subreg.sub0, [[S_MOV_B32_9]], %subreg.sub1 ; GCN-ISEL-NEXT: [[S_ADD_U4:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE17]], killed [[REG_SEQUENCE16]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[COPY60:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U4]].sub0 -; GCN-ISEL-NEXT: [[COPY61:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U4]].sub1 -; GCN-ISEL-NEXT: [[COPY62:%[0-9]+]]:sreg_32 = COPY [[COPY8]].sub1 -; GCN-ISEL-NEXT: [[COPY63:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_C2]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_14:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY62]], [[COPY63]], implicit $exec -; GCN-ISEL-NEXT: [[COPY64:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO2]] -; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_15:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY62]], [[COPY64]], implicit $exec -; GCN-ISEL-NEXT: [[S_MUL_I32_15:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY62]], [[S_UADDO2]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE18:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_15]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_15]], %subreg.sub1 -; GCN-ISEL-NEXT: [[COPY65:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE18]].sub0 -; GCN-ISEL-NEXT: [[COPY66:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE18]].sub1 -; GCN-ISEL-NEXT: [[S_ADD_U32_2:%[0-9]+]]:sreg_32 = S_ADD_U32 killed [[COPY60]], killed [[COPY65]], implicit-def $scc -; GCN-ISEL-NEXT: [[S_ADDC_U32_4:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY61]], killed [[COPY66]], implicit-def $scc, implicit $scc -; GCN-ISEL-NEXT: [[COPY67:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_14]] -; GCN-ISEL-NEXT: [[S_ADDC_U32_5:%[0-9]+]]:sreg_32 = S_ADDC_U32 killed [[COPY67]], [[S_MOV_B32_10]], implicit-def dead $scc, implicit $scc -; GCN-ISEL-NEXT: [[S_MUL_I32_16:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY62]], [[S_ADD_C2]] -; GCN-ISEL-NEXT: [[REG_SEQUENCE19:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_16]], %subreg.sub0, killed [[S_ADDC_U32_5]], %subreg.sub1 -; GCN-ISEL-NEXT: [[REG_SEQUENCE20:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_ADD_U32_2]], %subreg.sub0, killed [[S_ADDC_U32_4]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY61:%[0-9]+]]:sreg_32 = COPY [[COPY8]].sub1 +; GCN-ISEL-NEXT: [[COPY62:%[0-9]+]]:vgpr_32 = COPY [[S_UADDO6]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_14:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY61]], [[COPY62]], implicit $exec +; GCN-ISEL-NEXT: [[S_MUL_I32_15:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY61]], [[S_UADDO6]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE18:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_15]], %subreg.sub0, killed [[V_MUL_HI_U32_e64_14]], %subreg.sub1 +; GCN-ISEL-NEXT: [[COPY63:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE18]].sub0 +; GCN-ISEL-NEXT: [[S_UADDO8:%[0-9]+]]:sreg_32, [[S_UADDO9:%[0-9]+]]:sreg_64_xexec = S_UADDO_PSEUDO killed [[COPY60]], killed [[COPY63]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY64:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U4]].sub1 +; GCN-ISEL-NEXT: [[COPY65:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE18]].sub1 +; GCN-ISEL-NEXT: [[S_ADD_C12:%[0-9]+]]:sreg_32, [[S_ADD_C13:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY64]], killed [[COPY65]], killed [[S_UADDO9]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[COPY66:%[0-9]+]]:vgpr_32 = COPY [[S_ADD_C10]] +; GCN-ISEL-NEXT: [[V_MUL_HI_U32_e64_15:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[COPY61]], [[COPY66]], implicit $exec +; GCN-ISEL-NEXT: [[COPY67:%[0-9]+]]:sreg_32 = COPY [[V_MUL_HI_U32_e64_15]] +; GCN-ISEL-NEXT: [[S_ADD_C14:%[0-9]+]]:sreg_32, [[S_ADD_C15:%[0-9]+]]:sreg_64_xexec = S_ADD_CO_PSEUDO killed [[COPY67]], [[S_MOV_B32_10]], killed [[S_ADD_C13]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_MUL_I32_16:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY61]], [[S_ADD_C10]] +; GCN-ISEL-NEXT: [[REG_SEQUENCE19:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MUL_I32_16]], %subreg.sub0, killed [[S_ADD_C14]], %subreg.sub1 +; GCN-ISEL-NEXT: [[REG_SEQUENCE20:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_UADDO8]], %subreg.sub0, killed [[S_ADD_C12]], %subreg.sub1 ; GCN-ISEL-NEXT: [[COPY68:%[0-9]+]]:sreg_32 = COPY [[REG_SEQUENCE20]].sub1 ; GCN-ISEL-NEXT: [[REG_SEQUENCE21:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[COPY68]], %subreg.sub0, [[S_MOV_B32_10]], %subreg.sub1 ; GCN-ISEL-NEXT: [[S_ADD_U5:%[0-9]+]]:sreg_64 = S_ADD_U64_PSEUDO killed [[REG_SEQUENCE21]], killed [[REG_SEQUENCE19]], implicit-def dead $scc @@ -4135,7 +4135,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-ISEL-NEXT: [[S_ADD_I32_7:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[COPY72]], killed [[S_MUL_I32_17]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[S_MUL_I32_18:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY23]], [[COPY70]] ; GCN-ISEL-NEXT: [[S_ADD_I32_8:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[S_ADD_I32_7]], killed [[S_MUL_I32_18]], implicit-def dead $scc -; GCN-ISEL-NEXT: [[S_SUB_I32_3:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY62]], [[S_ADD_I32_8]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_SUB_I32_3:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY61]], [[S_ADD_I32_8]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[S_MUL_I32_19:%[0-9]+]]:sreg_32 = S_MUL_I32 [[COPY22]], [[COPY70]] ; GCN-ISEL-NEXT: [[S_USUBO:%[0-9]+]]:sreg_32, [[S_USUBO1:%[0-9]+]]:sreg_64_xexec = S_USUBO_PSEUDO [[COPY57]], killed [[S_MUL_I32_19]], implicit-def dead $scc ; GCN-ISEL-NEXT: [[S_SUB_C:%[0-9]+]]:sreg_32, [[S_SUB_C1:%[0-9]+]]:sreg_64_xexec = S_SUB_CO_PSEUDO killed [[S_SUB_I32_3]], [[COPY23]], [[S_USUBO1]], implicit-def dead $scc @@ -4161,7 +4161,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-ISEL-NEXT: [[COPY76:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U6]].sub1 ; GCN-ISEL-NEXT: [[COPY77:%[0-9]+]]:sreg_32 = COPY [[S_ADD_U7]].sub1 ; GCN-ISEL-NEXT: [[S_CSELECT_B32_7:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[COPY77]], killed [[COPY76]], implicit $scc -; GCN-ISEL-NEXT: [[S_SUB_C4:%[0-9]+]]:sreg_32, [[S_SUB_C5:%[0-9]+]]:sreg_64_xexec = S_SUB_CO_PSEUDO [[COPY62]], [[S_ADD_I32_8]], [[S_USUBO1]], implicit-def dead $scc +; GCN-ISEL-NEXT: [[S_SUB_C4:%[0-9]+]]:sreg_32, [[S_SUB_C5:%[0-9]+]]:sreg_64_xexec = S_SUB_CO_PSEUDO [[COPY61]], [[S_ADD_I32_8]], [[S_USUBO1]], implicit-def dead $scc ; GCN-ISEL-NEXT: S_CMP_GE_U32 [[S_SUB_C4]], [[COPY23]], implicit-def $scc ; GCN-ISEL-NEXT: [[S_CSELECT_B32_8:%[0-9]+]]:sreg_32 = S_CSELECT_B32 [[S_MOV_B32_11]], [[S_MOV_B32_10]], implicit $scc ; GCN-ISEL-NEXT: S_CMP_GE_U32 [[S_USUBO]], [[COPY22]], implicit-def $scc diff --git a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll index 5af56a28cd2f1..4b08dcbf94458 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.mulo.ll @@ -19,8 +19,7 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; SI-NEXT: v_mul_lo_u32 v0, v0, v2 ; SI-NEXT: v_add_i32_e32 v1, vcc, v8, v7 ; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v6, vcc -; SI-NEXT: v_add_i32_e32 v6, vcc, v1, v5 -; SI-NEXT: v_add_i32_e64 v1, s[4:5], v1, v5 +; SI-NEXT: v_add_i32_e32 v1, vcc, v1, v5 ; SI-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc ; SI-NEXT: v_addc_u32_e32 v4, vcc, 0, v9, vcc ; SI-NEXT: v_add_i32_e32 v2, vcc, v2, v3 @@ -86,7 +85,7 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_ci_u32_e32 v2, vcc_lo, v3, v9, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v11, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v11, vcc_lo ; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v2, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo @@ -117,7 +116,7 @@ define { i64, i1 } @umulo_i64_v_v(i64 %x, i64 %y) { ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) ; GFX12-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, v6, v8, vcc_lo ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo +; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, 0, v3, vcc_lo ; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v4, v2 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) @@ -145,19 +144,18 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; SI-NEXT: v_mul_lo_u32 v4, v0, v2 ; SI-NEXT: v_add_i32_e32 v8, vcc, v9, v8 ; SI-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; SI-NEXT: v_add_i32_e32 v9, vcc, v8, v5 -; SI-NEXT: v_add_i32_e64 v5, s[4:5], v8, v5 -; SI-NEXT: v_addc_u32_e32 v8, vcc, v7, v6, vcc +; SI-NEXT: v_add_i32_e32 v5, vcc, v8, v5 +; SI-NEXT: v_addc_u32_e32 v7, vcc, v7, v6, vcc +; SI-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc ; SI-NEXT: v_ashrrev_i32_e32 v6, 31, v5 -; SI-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc +; SI-NEXT: v_add_i32_e32 v9, vcc, v7, v11 +; SI-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc ; SI-NEXT: v_mov_b32_e32 v7, v6 -; SI-NEXT: v_add_i32_e32 v8, vcc, v8, v11 -; SI-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; SI-NEXT: v_sub_i32_e32 v2, vcc, v8, v2 -; SI-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v9, vcc +; SI-NEXT: v_sub_i32_e32 v2, vcc, v9, v2 +; SI-NEXT: v_subbrev_u32_e32 v10, vcc, 0, v8, vcc ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 -; SI-NEXT: v_cndmask_b32_e32 v1, v9, v10, vcc -; SI-NEXT: v_cndmask_b32_e32 v2, v8, v2, vcc +; SI-NEXT: v_cndmask_b32_e32 v1, v8, v10, vcc +; SI-NEXT: v_cndmask_b32_e32 v2, v9, v2, vcc ; SI-NEXT: v_sub_i32_e32 v0, vcc, v2, v0 ; SI-NEXT: v_subbrev_u32_e32 v8, vcc, 0, v1, vcc ; SI-NEXT: v_cmp_gt_i32_e32 vcc, 0, v3 @@ -252,7 +250,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX11-NEXT: v_add_co_u32 v12, vcc_lo, v12, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v9, vcc_lo -; GFX11-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v11, vcc_lo +; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, 0, v11, vcc_lo ; GFX11-NEXT: v_add_co_u32 v7, vcc_lo, v7, v10 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX11-NEXT: v_add_co_ci_u32_e64 v9, null, 0, v9, vcc_lo @@ -298,7 +296,7 @@ define { i64, i1 } @smulo_i64_v_v(i64 %x, i64 %y) { ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) ; GFX12-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, v7, v9, vcc_lo ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-NEXT: v_add_co_ci_u32_e32 v9, vcc_lo, 0, v11, vcc_lo +; GFX12-NEXT: v_add_co_ci_u32_e64 v9, null, 0, v11, vcc_lo ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-NEXT: v_add_co_u32 v7, vcc_lo, v7, v10 ; GFX12-NEXT: s_wait_alu depctr_va_vcc(0) @@ -345,19 +343,19 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; SI-NEXT: s_mul_i32 s5, s0, s3 ; SI-NEXT: v_mul_hi_u32 v0, s0, v0 ; SI-NEXT: v_mul_hi_u32 v2, s1, v2 -; SI-NEXT: s_mul_i32 s1, s1, s3 +; SI-NEXT: s_mul_i32 s3, s1, s3 ; SI-NEXT: s_mul_i32 s2, s0, s2 ; SI-NEXT: v_add_i32_e32 v4, vcc, s5, v0 ; SI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc +; SI-NEXT: v_add_i32_e32 v0, vcc, s5, v0 ; SI-NEXT: v_add_i32_e32 v4, vcc, s4, v4 -; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v1, vcc -; SI-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc -; SI-NEXT: v_add_i32_e32 v3, vcc, s5, v0 -; SI-NEXT: v_add_i32_e32 v0, vcc, s1, v1 +; SI-NEXT: v_addc_u32_e64 v1, s[0:1], v3, v1, vcc +; SI-NEXT: v_add_i32_e32 v3, vcc, s4, v0 +; SI-NEXT: v_addc_u32_e64 v2, vcc, 0, v2, s[0:1] +; SI-NEXT: v_add_i32_e32 v0, vcc, s3, v1 ; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; SI-NEXT: v_add_i32_e32 v2, vcc, s4, v3 ; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] -; SI-NEXT: v_cndmask_b32_e64 v1, v2, 0, vcc +; SI-NEXT: v_cndmask_b32_e64 v1, v3, 0, vcc ; SI-NEXT: s_and_b64 s[0:1], vcc, exec ; SI-NEXT: s_cselect_b32 s0, 0, s2 ; SI-NEXT: s_mov_b32 s6, -1 @@ -377,9 +375,9 @@ define amdgpu_kernel void @umulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_addc_u32 s5, 0, s5 ; GFX9-NEXT: s_mul_hi_u32 s4, s1, s2 ; GFX9-NEXT: s_add_u32 s9, s9, s6 -; GFX9-NEXT: s_mul_hi_u32 s10, s1, s3 ; GFX9-NEXT: s_addc_u32 s4, s5, s4 -; GFX9-NEXT: s_addc_u32 s5, s10, 0 +; GFX9-NEXT: s_mul_hi_u32 s5, s1, s3 +; GFX9-NEXT: s_addc_u32 s5, s5, 0 ; GFX9-NEXT: s_mul_i32 s1, s1, s3 ; GFX9-NEXT: s_add_u32 s4, s4, s1 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 @@ -548,9 +546,9 @@ define amdgpu_kernel void @smulo_i64_s(i64 %x, i64 %y) { ; GFX9-NEXT: s_addc_u32 s5, 0, s5 ; GFX9-NEXT: s_mul_hi_u32 s4, s1, s2 ; GFX9-NEXT: s_add_u32 s9, s9, s6 -; GFX9-NEXT: s_mul_hi_i32 s10, s1, s3 ; GFX9-NEXT: s_addc_u32 s4, s5, s4 -; GFX9-NEXT: s_addc_u32 s5, s10, 0 +; GFX9-NEXT: s_mul_hi_i32 s5, s1, s3 +; GFX9-NEXT: s_addc_u32 s5, s5, 0 ; GFX9-NEXT: s_mul_i32 s9, s1, s3 ; GFX9-NEXT: s_add_u32 s4, s4, s9 ; GFX9-NEXT: s_addc_u32 s5, 0, s5 diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll index c447d6aa270d4..a29c21e476363 100644 --- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll @@ -8,6 +8,7 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd ; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_ashr_i32 s8, s1, 31 ; GCN-NEXT: s_add_u32 s0, s0, s8 @@ -28,60 +29,60 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s12, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s13, s0, s12 -; GCN-NEXT: v_readfirstlane_b32 s16, v2 -; GCN-NEXT: s_mul_i32 s14, s1, s2 -; GCN-NEXT: s_mul_i32 s15, s0, s2 -; GCN-NEXT: s_add_i32 s13, s16, s13 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s15 -; GCN-NEXT: s_add_i32 s13, s13, s14 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s13 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s15 -; GCN-NEXT: v_readfirstlane_b32 s14, v3 -; GCN-NEXT: s_mul_i32 s16, s2, s13 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s13 -; GCN-NEXT: s_add_u32 s14, s14, s16 +; GCN-NEXT: v_readfirstlane_b32 s13, v0 +; GCN-NEXT: s_mul_i32 s14, s0, s12 +; GCN-NEXT: v_readfirstlane_b32 s17, v2 +; GCN-NEXT: s_mul_i32 s15, s1, s13 +; GCN-NEXT: s_mul_i32 s16, s0, s13 +; GCN-NEXT: s_add_i32 s14, s17, s14 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s16 +; GCN-NEXT: s_add_i32 s14, s14, s15 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s14 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s16 +; GCN-NEXT: v_readfirstlane_b32 s15, v3 +; GCN-NEXT: s_mul_i32 s17, s13, s14 +; GCN-NEXT: s_add_u32 s15, s15, s17 +; GCN-NEXT: v_readfirstlane_b32 s17, v0 +; GCN-NEXT: v_mul_hi_u32 v0, v1, s14 +; GCN-NEXT: s_mul_i32 s16, s12, s16 +; GCN-NEXT: s_addc_u32 s17, 0, s17 +; GCN-NEXT: v_readfirstlane_b32 s18, v4 +; GCN-NEXT: s_add_u32 s15, s15, s16 +; GCN-NEXT: s_addc_u32 s15, s17, s18 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: s_mul_i32 s15, s12, s15 -; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: v_readfirstlane_b32 s17, v4 -; GCN-NEXT: s_add_u32 s14, s14, s15 -; GCN-NEXT: s_addc_u32 s14, s16, s17 +; GCN-NEXT: s_addc_u32 s16, s16, 0 +; GCN-NEXT: s_mul_i32 s14, s12, s14 +; GCN-NEXT: s_add_u32 s14, s15, s14 +; GCN-NEXT: s_addc_u32 s15, 0, s16 +; GCN-NEXT: s_add_u32 s13, s13, s14 +; GCN-NEXT: v_mov_b32_e32 v1, s13 +; GCN-NEXT: v_mul_hi_u32 v1, s0, v1 +; GCN-NEXT: s_addc_u32 s12, s12, s15 +; GCN-NEXT: s_mul_i32 s14, s0, s13 +; GCN-NEXT: s_mul_i32 s0, s0, s12 ; GCN-NEXT: v_readfirstlane_b32 s15, v1 -; GCN-NEXT: s_addc_u32 s15, s15, 0 -; GCN-NEXT: s_mul_i32 s13, s12, s13 -; GCN-NEXT: s_add_u32 s13, s14, s13 -; GCN-NEXT: s_addc_u32 s14, 0, s15 -; GCN-NEXT: s_add_u32 s13, s2, s13 -; GCN-NEXT: v_mov_b32_e32 v0, s13 -; GCN-NEXT: v_mul_hi_u32 v0, s0, v0 -; GCN-NEXT: s_addc_u32 s12, s12, s14 -; GCN-NEXT: s_mul_i32 s14, s0, s12 +; GCN-NEXT: s_add_i32 s0, s15, s0 ; GCN-NEXT: s_mul_i32 s1, s1, s13 -; GCN-NEXT: v_readfirstlane_b32 s15, v0 -; GCN-NEXT: s_add_i32 s14, s15, s14 -; GCN-NEXT: s_mul_i32 s0, s0, s13 -; GCN-NEXT: s_add_i32 s1, s14, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_hi_u32 v3, s12, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s13, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s12, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s14 +; GCN-NEXT: s_add_i32 s0, s0, s1 +; GCN-NEXT: v_mul_hi_u32 v2, s12, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mul_hi_u32 v0, s13, v0 -; GCN-NEXT: s_mul_i32 s15, s13, s1 -; GCN-NEXT: v_readfirstlane_b32 s17, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s13, v1 +; GCN-NEXT: s_mul_i32 s15, s13, s0 +; GCN-NEXT: s_mul_i32 s14, s12, s14 +; GCN-NEXT: v_readfirstlane_b32 s17, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s12, v1 +; GCN-NEXT: v_readfirstlane_b32 s16, v3 ; GCN-NEXT: s_add_u32 s15, s17, s15 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: s_mul_i32 s0, s12, s0 ; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: v_readfirstlane_b32 s14, v3 -; GCN-NEXT: s_add_u32 s0, s15, s0 -; GCN-NEXT: s_addc_u32 s0, s16, s14 -; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: v_readfirstlane_b32 s1, v2 +; GCN-NEXT: s_add_u32 s14, s15, s14 +; GCN-NEXT: s_addc_u32 s1, s16, s1 +; GCN-NEXT: v_readfirstlane_b32 s14, v0 ; GCN-NEXT: s_addc_u32 s14, s14, 0 -; GCN-NEXT: s_mul_i32 s1, s12, s1 -; GCN-NEXT: s_add_u32 s0, s0, s1 +; GCN-NEXT: s_mul_i32 s0, s12, s0 +; GCN-NEXT: s_add_u32 s0, s1, s0 ; GCN-NEXT: s_addc_u32 s1, 0, s14 ; GCN-NEXT: s_add_u32 s14, s13, s0 ; GCN-NEXT: s_addc_u32 s15, s12, s1 @@ -155,7 +156,6 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_sub_u32 s4, s4, s6 ; GCN-NEXT: s_subb_u32 s5, s5, s7 ; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mov_b32_e32 v1, s5 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-NEXT: s_endpgm @@ -293,25 +293,25 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) { ; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; GCN-NEXT: v_mul_lo_u32 v9, v7, v6 -; GCN-NEXT: v_mul_hi_u32 v10, v7, v5 +; GCN-NEXT: v_mul_lo_u32 v9, v7, v5 +; GCN-NEXT: v_mul_lo_u32 v10, v7, v6 +; GCN-NEXT: v_mul_hi_u32 v7, v7, v5 ; GCN-NEXT: v_mul_lo_u32 v8, v8, v5 -; GCN-NEXT: v_mul_lo_u32 v7, v7, v5 -; GCN-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_mul_lo_u32 v11, v5, v8 +; GCN-NEXT: v_mul_hi_u32 v11, v6, v9 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v10 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v7 +; GCN-NEXT: v_mul_hi_u32 v10, v5, v9 ; GCN-NEXT: v_mul_hi_u32 v12, v5, v7 -; GCN-NEXT: v_mul_hi_u32 v13, v5, v8 -; GCN-NEXT: v_mul_hi_u32 v10, v6, v7 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v9 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc +; GCN-NEXT: v_mul_hi_u32 v12, v6, v7 ; GCN-NEXT: v_mul_lo_u32 v7, v6, v7 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v8 -; GCN-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; GCN-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v6, v8 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, v12, v10, vcc -; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; GCN-NEXT: v_addc_u32_e32 v8, vcc, v10, v11, vcc +; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc +; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -705,6 +705,7 @@ define amdgpu_kernel void @s_test_sdiv32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x9 ; GCN-NEXT: s_load_dword s0, s[4:5], 0xe ; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_ashr_i32 s4, s11, 31 ; GCN-NEXT: s_ashr_i32 s6, s0, 31 @@ -726,60 +727,60 @@ define amdgpu_kernel void @s_test_sdiv32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s5, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s10, s0, s5 -; GCN-NEXT: v_readfirstlane_b32 s16, v2 -; GCN-NEXT: s_mul_i32 s14, s1, s2 -; GCN-NEXT: s_mul_i32 s15, s0, s2 -; GCN-NEXT: s_add_i32 s10, s16, s10 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s15 -; GCN-NEXT: s_add_i32 s10, s10, s14 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s10 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s15 -; GCN-NEXT: v_readfirstlane_b32 s14, v3 -; GCN-NEXT: s_mul_i32 s16, s2, s10 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s10 -; GCN-NEXT: s_add_u32 s14, s14, s16 +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: s_mul_i32 s14, s0, s5 +; GCN-NEXT: v_readfirstlane_b32 s17, v2 +; GCN-NEXT: s_mul_i32 s15, s1, s10 +; GCN-NEXT: s_mul_i32 s16, s0, s10 +; GCN-NEXT: s_add_i32 s14, s17, s14 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s16 +; GCN-NEXT: s_add_i32 s14, s14, s15 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s14 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s16 +; GCN-NEXT: v_readfirstlane_b32 s15, v3 +; GCN-NEXT: s_mul_i32 s17, s10, s14 +; GCN-NEXT: s_add_u32 s15, s15, s17 +; GCN-NEXT: v_readfirstlane_b32 s17, v0 +; GCN-NEXT: v_mul_hi_u32 v0, v1, s14 +; GCN-NEXT: s_addc_u32 s17, 0, s17 +; GCN-NEXT: s_mul_i32 s16, s5, s16 +; GCN-NEXT: v_readfirstlane_b32 s18, v4 +; GCN-NEXT: s_add_u32 s15, s15, s16 +; GCN-NEXT: s_addc_u32 s15, s17, s18 ; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: s_mul_i32 s15, s5, s15 -; GCN-NEXT: v_readfirstlane_b32 s17, v4 -; GCN-NEXT: s_add_u32 s14, s14, s15 -; GCN-NEXT: s_addc_u32 s14, s16, s17 +; GCN-NEXT: s_addc_u32 s16, s16, 0 +; GCN-NEXT: s_mul_i32 s14, s5, s14 +; GCN-NEXT: s_add_u32 s14, s15, s14 +; GCN-NEXT: s_addc_u32 s15, 0, s16 +; GCN-NEXT: s_add_u32 s10, s10, s14 +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: v_mul_hi_u32 v1, s0, v1 +; GCN-NEXT: s_addc_u32 s5, s5, s15 +; GCN-NEXT: s_mul_i32 s14, s0, s10 +; GCN-NEXT: s_mul_i32 s0, s0, s5 ; GCN-NEXT: v_readfirstlane_b32 s15, v1 -; GCN-NEXT: s_addc_u32 s15, s15, 0 -; GCN-NEXT: s_mul_i32 s10, s5, s10 -; GCN-NEXT: s_add_u32 s10, s14, s10 -; GCN-NEXT: s_addc_u32 s14, 0, s15 -; GCN-NEXT: s_add_u32 s10, s2, s10 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mul_hi_u32 v0, s0, v0 -; GCN-NEXT: s_addc_u32 s5, s5, s14 -; GCN-NEXT: s_mul_i32 s14, s0, s5 +; GCN-NEXT: s_add_i32 s0, s15, s0 ; GCN-NEXT: s_mul_i32 s1, s1, s10 -; GCN-NEXT: v_readfirstlane_b32 s15, v0 -; GCN-NEXT: s_add_i32 s14, s15, s14 -; GCN-NEXT: s_mul_i32 s0, s0, s10 -; GCN-NEXT: s_add_i32 s1, s14, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_hi_u32 v3, s5, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s5, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s14 +; GCN-NEXT: s_add_i32 s0, s0, s1 +; GCN-NEXT: v_mul_hi_u32 v2, s5, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mul_hi_u32 v0, s10, v0 -; GCN-NEXT: s_mul_i32 s15, s10, s1 -; GCN-NEXT: v_readfirstlane_b32 s17, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s10, v1 +; GCN-NEXT: s_mul_i32 s15, s10, s0 +; GCN-NEXT: s_mul_i32 s14, s5, s14 +; GCN-NEXT: v_readfirstlane_b32 s17, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s5, v1 +; GCN-NEXT: v_readfirstlane_b32 s16, v3 ; GCN-NEXT: s_add_u32 s15, s17, s15 -; GCN-NEXT: v_readfirstlane_b32 s16, v0 -; GCN-NEXT: s_mul_i32 s0, s5, s0 ; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: v_readfirstlane_b32 s14, v3 -; GCN-NEXT: s_add_u32 s0, s15, s0 -; GCN-NEXT: s_addc_u32 s0, s16, s14 -; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: v_readfirstlane_b32 s1, v2 +; GCN-NEXT: s_add_u32 s14, s15, s14 +; GCN-NEXT: s_addc_u32 s1, s16, s1 +; GCN-NEXT: v_readfirstlane_b32 s14, v0 ; GCN-NEXT: s_addc_u32 s14, s14, 0 -; GCN-NEXT: s_mul_i32 s1, s5, s1 -; GCN-NEXT: s_add_u32 s0, s0, s1 +; GCN-NEXT: s_mul_i32 s0, s5, s0 +; GCN-NEXT: s_add_u32 s0, s1, s0 ; GCN-NEXT: s_addc_u32 s1, 0, s14 ; GCN-NEXT: s_add_u32 s14, s10, s0 ; GCN-NEXT: s_addc_u32 s15, s5, s1 @@ -852,7 +853,6 @@ define amdgpu_kernel void @s_test_sdiv32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_sub_u32 s4, s6, s4 ; GCN-NEXT: s_subb_u32 s5, s7, s5 ; GCN-NEXT: v_mov_b32_e32 v0, s4 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mov_b32_e32 v1, s5 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-NEXT: s_endpgm @@ -1478,6 +1478,7 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s7 ; GCN-NEXT: s_sub_u32 s2, 0, s6 ; GCN-NEXT: s_subb_u32 s8, 0, s7 +; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -1488,67 +1489,67 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s2, v0 ; GCN-NEXT: v_readfirstlane_b32 s9, v1 -; GCN-NEXT: v_readfirstlane_b32 s3, v0 -; GCN-NEXT: s_mul_i32 s10, s2, s9 -; GCN-NEXT: v_readfirstlane_b32 s13, v2 -; GCN-NEXT: s_mul_i32 s11, s8, s3 -; GCN-NEXT: s_mul_i32 s12, s2, s3 -; GCN-NEXT: s_add_i32 s10, s13, s10 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s12 -; GCN-NEXT: s_add_i32 s10, s10, s11 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s10 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s12 -; GCN-NEXT: v_readfirstlane_b32 s11, v3 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s10 -; GCN-NEXT: s_mul_i32 s14, s3, s10 -; GCN-NEXT: s_add_u32 s11, s11, s14 -; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_mul_i32 s12, s9, s12 -; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: v_readfirstlane_b32 s13, v4 -; GCN-NEXT: s_add_u32 s11, s11, s12 -; GCN-NEXT: v_readfirstlane_b32 s15, v1 -; GCN-NEXT: s_addc_u32 s11, s14, s13 -; GCN-NEXT: s_addc_u32 s12, s15, 0 -; GCN-NEXT: s_mul_i32 s10, s9, s10 -; GCN-NEXT: s_add_u32 s10, s11, s10 -; GCN-NEXT: s_addc_u32 s11, 0, s12 -; GCN-NEXT: s_add_u32 s10, s3, s10 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mul_hi_u32 v0, s2, v0 -; GCN-NEXT: s_addc_u32 s9, s9, s11 +; GCN-NEXT: v_readfirstlane_b32 s10, v0 ; GCN-NEXT: s_mul_i32 s11, s2, s9 +; GCN-NEXT: v_readfirstlane_b32 s14, v2 +; GCN-NEXT: s_mul_i32 s12, s8, s10 +; GCN-NEXT: s_mul_i32 s13, s2, s10 +; GCN-NEXT: s_add_i32 s11, s14, s11 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 +; GCN-NEXT: s_add_i32 s11, s11, s12 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s11 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s13 +; GCN-NEXT: v_readfirstlane_b32 s12, v3 +; GCN-NEXT: s_mul_i32 s15, s10, s11 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s11 +; GCN-NEXT: s_add_u32 s12, s12, s15 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: s_mul_i32 s13, s9, s13 +; GCN-NEXT: s_addc_u32 s15, 0, s15 +; GCN-NEXT: v_readfirstlane_b32 s14, v4 +; GCN-NEXT: s_add_u32 s12, s12, s13 +; GCN-NEXT: s_addc_u32 s12, s15, s14 +; GCN-NEXT: v_readfirstlane_b32 s13, v1 +; GCN-NEXT: s_addc_u32 s13, s13, 0 +; GCN-NEXT: s_mul_i32 s11, s9, s11 +; GCN-NEXT: s_add_u32 s11, s12, s11 +; GCN-NEXT: s_addc_u32 s12, 0, s13 +; GCN-NEXT: s_add_u32 s10, s10, s11 +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: v_mul_hi_u32 v1, s2, v1 +; GCN-NEXT: s_addc_u32 s9, s9, s12 +; GCN-NEXT: s_mul_i32 s11, s2, s10 +; GCN-NEXT: s_mul_i32 s2, s2, s9 +; GCN-NEXT: v_readfirstlane_b32 s12, v1 +; GCN-NEXT: s_add_i32 s2, s12, s2 ; GCN-NEXT: s_mul_i32 s8, s8, s10 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: s_add_i32 s11, s12, s11 -; GCN-NEXT: s_mul_i32 s2, s2, s10 -; GCN-NEXT: s_add_i32 s8, s11, s8 -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: v_mul_hi_u32 v3, s9, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s9, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s11 +; GCN-NEXT: s_add_i32 s2, s2, s8 +; GCN-NEXT: v_mul_hi_u32 v2, s9, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mul_hi_u32 v0, s10, v0 -; GCN-NEXT: s_mul_i32 s12, s10, s8 -; GCN-NEXT: v_readfirstlane_b32 s14, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s10, v1 +; GCN-NEXT: s_mul_i32 s12, s10, s2 +; GCN-NEXT: s_mul_i32 s11, s9, s11 +; GCN-NEXT: v_readfirstlane_b32 s14, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s9, v1 +; GCN-NEXT: v_readfirstlane_b32 s13, v3 ; GCN-NEXT: s_add_u32 s12, s14, s12 -; GCN-NEXT: v_readfirstlane_b32 s13, v0 -; GCN-NEXT: s_mul_i32 s2, s9, s2 ; GCN-NEXT: s_addc_u32 s13, 0, s13 -; GCN-NEXT: v_readfirstlane_b32 s11, v3 -; GCN-NEXT: s_add_u32 s2, s12, s2 -; GCN-NEXT: s_addc_u32 s2, s13, s11 -; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: v_readfirstlane_b32 s8, v2 +; GCN-NEXT: s_add_u32 s11, s12, s11 +; GCN-NEXT: s_addc_u32 s8, s13, s8 +; GCN-NEXT: v_readfirstlane_b32 s11, v0 ; GCN-NEXT: s_addc_u32 s11, s11, 0 -; GCN-NEXT: s_mul_i32 s8, s9, s8 -; GCN-NEXT: s_add_u32 s2, s2, s8 +; GCN-NEXT: s_mul_i32 s2, s9, s2 +; GCN-NEXT: s_add_u32 s2, s8, s2 ; GCN-NEXT: s_addc_u32 s8, 0, s11 ; GCN-NEXT: s_add_u32 s2, s10, s2 ; GCN-NEXT: s_addc_u32 s8, s9, s8 ; GCN-NEXT: v_mul_hi_u32 v1, s2, 24 ; GCN-NEXT: v_mul_hi_u32 v0, s8, 24 ; GCN-NEXT: s_mul_i32 s8, s8, 24 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_readfirstlane_b32 s10, v1 ; GCN-NEXT: v_readfirstlane_b32 s9, v0 ; GCN-NEXT: s_add_u32 s8, s10, s8 @@ -1556,7 +1557,6 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_mov_b32_e32 v0, s10 ; GCN-NEXT: v_mul_hi_u32 v0, s6, v0 ; GCN-NEXT: s_mul_i32 s8, s7, s10 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_readfirstlane_b32 s9, v0 ; GCN-NEXT: s_add_i32 s11, s9, s8 ; GCN-NEXT: s_sub_i32 s12, 0, s11 @@ -1719,25 +1719,25 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v7, v5, v4 -; GCN-NEXT: v_mul_hi_u32 v8, v5, v3 +; GCN-NEXT: v_mul_lo_u32 v7, v5, v3 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v4 +; GCN-NEXT: v_mul_hi_u32 v5, v5, v3 ; GCN-NEXT: v_mul_lo_u32 v6, v6, v3 -; GCN-NEXT: v_mul_lo_u32 v5, v5, v3 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 +; GCN-NEXT: v_mul_hi_u32 v9, v4, v7 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v7 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v5 -; GCN-NEXT: v_mul_hi_u32 v11, v3, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v7 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc +; GCN-NEXT: v_mul_hi_u32 v10, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v4, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v6 -; GCN-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v10, v8, vcc -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc +; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v6, vcc @@ -1886,6 +1886,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc ; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3 ; GCN-NEXT: v_rcp_f32_e32 v3, v3 +; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3 ; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3 ; GCN-NEXT: v_trunc_f32_e32 v4, v4 @@ -1914,25 +1915,25 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v7 ; GCN-NEXT: v_addc_u32_e32 v4, vcc, v4, v8, vcc -; GCN-NEXT: v_mul_lo_u32 v7, v5, v4 -; GCN-NEXT: v_mul_hi_u32 v8, v5, v3 +; GCN-NEXT: v_mul_lo_u32 v7, v5, v3 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v4 +; GCN-NEXT: v_mul_hi_u32 v5, v5, v3 ; GCN-NEXT: v_mul_lo_u32 v6, v6, v3 -; GCN-NEXT: v_mul_lo_u32 v5, v5, v3 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_mul_lo_u32 v9, v3, v6 +; GCN-NEXT: v_mul_hi_u32 v9, v4, v7 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v8 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v7 ; GCN-NEXT: v_mul_hi_u32 v10, v3, v5 -; GCN-NEXT: v_mul_hi_u32 v11, v3, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v7 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v8, v6 +; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v10, vcc +; GCN-NEXT: v_mul_hi_u32 v10, v4, v5 ; GCN-NEXT: v_mul_lo_u32 v5, v4, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v6 -; GCN-NEXT: v_add_i32_e32 v9, vcc, v10, v9 -; GCN-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v9, v5 -; GCN-NEXT: v_addc_u32_e32 v5, vcc, v10, v8, vcc -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v7, vcc -; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_addc_u32_e32 v6, vcc, v8, v9, vcc +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v10, vcc +; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 ; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc ; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v4, v6, vcc @@ -1942,7 +1943,7 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_mul_lo_u32 v5, v0, v3 ; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v4 -; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0x8000, v5 +; GCN-NEXT: v_sub_i32_e32 v5, vcc, s4, v5 ; GCN-NEXT: v_subb_u32_e64 v6, s[4:5], v6, v1, vcc ; GCN-NEXT: v_sub_i32_e64 v7, s[4:5], v5, v0 ; GCN-NEXT: v_subbrev_u32_e64 v6, s[4:5], 0, v6, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/srem.ll b/llvm/test/CodeGen/AMDGPU/srem.ll index c870d651eb1aa..966042692c194 100644 --- a/llvm/test/CodeGen/AMDGPU/srem.ll +++ b/llvm/test/CodeGen/AMDGPU/srem.ll @@ -1534,37 +1534,37 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GCN-NEXT: s_mul_hi_u32 s14, s12, s13 ; GCN-NEXT: s_add_u32 s15, s17, s15 ; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: s_mul_hi_u32 s18, s11, s16 +; GCN-NEXT: s_mul_hi_u32 s17, s11, s16 ; GCN-NEXT: s_mul_i32 s16, s11, s16 ; GCN-NEXT: s_add_u32 s15, s15, s16 -; GCN-NEXT: s_mul_hi_u32 s17, s11, s13 -; GCN-NEXT: s_addc_u32 s14, s14, s18 -; GCN-NEXT: s_addc_u32 s15, s17, 0 +; GCN-NEXT: s_addc_u32 s14, s14, s17 +; GCN-NEXT: s_mul_hi_u32 s15, s11, s13 +; GCN-NEXT: s_addc_u32 s15, s15, 0 ; GCN-NEXT: s_mul_i32 s13, s11, s13 ; GCN-NEXT: s_add_u32 s13, s14, s13 ; GCN-NEXT: s_addc_u32 s14, 0, s15 ; GCN-NEXT: s_add_u32 s12, s12, s13 ; GCN-NEXT: s_addc_u32 s11, s11, s14 -; GCN-NEXT: s_mul_i32 s13, s3, s11 -; GCN-NEXT: s_mul_hi_u32 s14, s3, s12 -; GCN-NEXT: s_add_i32 s13, s14, s13 +; GCN-NEXT: s_mul_i32 s13, s3, s12 +; GCN-NEXT: s_mul_i32 s16, s3, s11 +; GCN-NEXT: s_mul_hi_u32 s3, s3, s12 +; GCN-NEXT: s_add_i32 s3, s3, s16 ; GCN-NEXT: s_mul_i32 s10, s10, s12 -; GCN-NEXT: s_add_i32 s13, s13, s10 -; GCN-NEXT: s_mul_i32 s3, s3, s12 -; GCN-NEXT: s_mul_hi_u32 s14, s11, s3 -; GCN-NEXT: s_mul_i32 s15, s11, s3 -; GCN-NEXT: s_mul_i32 s17, s12, s13 -; GCN-NEXT: s_mul_hi_u32 s3, s12, s3 -; GCN-NEXT: s_mul_hi_u32 s16, s12, s13 -; GCN-NEXT: s_add_u32 s3, s3, s17 -; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: s_add_u32 s3, s3, s15 -; GCN-NEXT: s_mul_hi_u32 s10, s11, s13 -; GCN-NEXT: s_addc_u32 s3, s16, s14 -; GCN-NEXT: s_addc_u32 s10, s10, 0 -; GCN-NEXT: s_mul_i32 s13, s11, s13 -; GCN-NEXT: s_add_u32 s3, s3, s13 +; GCN-NEXT: s_add_i32 s3, s3, s10 +; GCN-NEXT: s_mul_hi_u32 s14, s11, s13 +; GCN-NEXT: s_mul_i32 s15, s11, s13 +; GCN-NEXT: s_mul_i32 s16, s12, s3 +; GCN-NEXT: s_mul_hi_u32 s13, s12, s13 +; GCN-NEXT: s_mul_hi_u32 s10, s12, s3 +; GCN-NEXT: s_add_u32 s13, s13, s16 ; GCN-NEXT: s_addc_u32 s10, 0, s10 +; GCN-NEXT: s_add_u32 s13, s13, s15 +; GCN-NEXT: s_addc_u32 s10, s10, s14 +; GCN-NEXT: s_mul_hi_u32 s13, s11, s3 +; GCN-NEXT: s_addc_u32 s13, s13, 0 +; GCN-NEXT: s_mul_i32 s3, s11, s3 +; GCN-NEXT: s_add_u32 s3, s10, s3 +; GCN-NEXT: s_addc_u32 s10, 0, s13 ; GCN-NEXT: s_add_u32 s3, s12, s3 ; GCN-NEXT: s_addc_u32 s14, s11, s10 ; GCN-NEXT: s_ashr_i32 s10, s5, 31 @@ -1577,12 +1577,12 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) ; GCN-NEXT: s_mul_hi_u32 s5, s12, s14 ; GCN-NEXT: s_add_u32 s15, s16, s15 ; GCN-NEXT: s_addc_u32 s5, 0, s5 -; GCN-NEXT: s_mul_hi_u32 s17, s13, s3 +; GCN-NEXT: s_mul_hi_u32 s16, s13, s3 ; GCN-NEXT: s_mul_i32 s3, s13, s3 ; GCN-NEXT: s_add_u32 s3, s15, s3 -; GCN-NEXT: s_mul_hi_u32 s16, s13, s14 -; GCN-NEXT: s_addc_u32 s3, s5, s17 -; GCN-NEXT: s_addc_u32 s5, s16, 0 +; GCN-NEXT: s_addc_u32 s3, s5, s16 +; GCN-NEXT: s_mul_hi_u32 s5, s13, s14 +; GCN-NEXT: s_addc_u32 s5, s5, 0 ; GCN-NEXT: s_mul_i32 s14, s13, s14 ; GCN-NEXT: s_add_u32 s3, s3, s14 ; GCN-NEXT: s_addc_u32 s5, 0, s5 @@ -1710,25 +1710,25 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) ; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v5, v9 ; TAHITI-NEXT: v_addc_u32_e32 v6, vcc, v6, v10, vcc -; TAHITI-NEXT: v_mul_lo_u32 v9, v7, v6 -; TAHITI-NEXT: v_mul_hi_u32 v10, v7, v5 +; TAHITI-NEXT: v_mul_lo_u32 v9, v7, v5 +; TAHITI-NEXT: v_mul_lo_u32 v10, v7, v6 +; TAHITI-NEXT: v_mul_hi_u32 v7, v7, v5 ; TAHITI-NEXT: v_mul_lo_u32 v8, v8, v5 -; TAHITI-NEXT: v_mul_lo_u32 v7, v7, v5 -; TAHITI-NEXT: v_add_i32_e32 v9, vcc, v9, v10 -; TAHITI-NEXT: v_add_i32_e32 v8, vcc, v8, v9 -; TAHITI-NEXT: v_mul_lo_u32 v11, v5, v8 +; TAHITI-NEXT: v_mul_hi_u32 v11, v6, v9 +; TAHITI-NEXT: v_add_i32_e32 v7, vcc, v10, v7 +; TAHITI-NEXT: v_add_i32_e32 v7, vcc, v8, v7 +; TAHITI-NEXT: v_mul_lo_u32 v8, v5, v7 +; TAHITI-NEXT: v_mul_hi_u32 v10, v5, v9 ; TAHITI-NEXT: v_mul_hi_u32 v12, v5, v7 -; TAHITI-NEXT: v_mul_hi_u32 v13, v5, v8 -; TAHITI-NEXT: v_mul_hi_u32 v10, v6, v7 +; TAHITI-NEXT: v_mul_lo_u32 v9, v6, v9 +; TAHITI-NEXT: v_add_i32_e32 v8, vcc, v10, v8 +; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, 0, v12, vcc +; TAHITI-NEXT: v_mul_hi_u32 v12, v6, v7 ; TAHITI-NEXT: v_mul_lo_u32 v7, v6, v7 -; TAHITI-NEXT: v_mul_hi_u32 v9, v6, v8 -; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v12, v11 -; TAHITI-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc -; TAHITI-NEXT: v_mul_lo_u32 v8, v6, v8 -; TAHITI-NEXT: v_add_i32_e32 v7, vcc, v11, v7 -; TAHITI-NEXT: v_addc_u32_e32 v7, vcc, v12, v10, vcc -; TAHITI-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; TAHITI-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; TAHITI-NEXT: v_add_i32_e32 v8, vcc, v8, v9 +; TAHITI-NEXT: v_addc_u32_e32 v8, vcc, v10, v11, vcc +; TAHITI-NEXT: v_addc_u32_e32 v9, vcc, 0, v12, vcc +; TAHITI-NEXT: v_add_i32_e32 v7, vcc, v8, v7 ; TAHITI-NEXT: v_addc_u32_e32 v8, vcc, 0, v9, vcc ; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v5, v7 ; TAHITI-NEXT: v_addc_u32_e32 v6, vcc, v6, v8, vcc @@ -1866,47 +1866,47 @@ define amdgpu_kernel void @srem_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v5 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v0 -; TONGA-NEXT: v_addc_u32_e32 v7, vcc, v7, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s3, v8, 0 -; TONGA-NEXT: v_mul_lo_u32 v2, s3, v7 -; TONGA-NEXT: v_mul_lo_u32 v3, s10, v8 +; TONGA-NEXT: v_add_u32_e32 v5, vcc, v8, v0 +; TONGA-NEXT: v_addc_u32_e32 v6, vcc, v7, v1, vcc +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s3, v5, 0 +; TONGA-NEXT: v_mul_lo_u32 v2, s3, v6 +; TONGA-NEXT: v_mul_lo_u32 v3, s10, v5 ; TONGA-NEXT: s_ashr_i32 s10, s5, 31 -; TONGA-NEXT: v_mul_hi_u32 v9, v8, v0 +; TONGA-NEXT: v_mul_hi_u32 v8, v5, v0 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v2, v1 -; TONGA-NEXT: v_add_u32_e32 v5, vcc, v3, v1 -; TONGA-NEXT: v_mad_u64_u32 v[1:2], s[8:9], v8, v5, 0 -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[8:9], v7, v0, 0 -; TONGA-NEXT: v_mad_u64_u32 v[5:6], s[8:9], v7, v5, 0 -; TONGA-NEXT: s_add_u32 s8, s4, s10 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v9, v1 -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v2, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v3 -; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v1, v4, vcc -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v6, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v5 -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v2, vcc, v8, v0 +; TONGA-NEXT: v_add_u32_e32 v7, vcc, v3, v1 +; TONGA-NEXT: v_mad_u64_u32 v[1:2], s[8:9], v5, v7, 0 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[8:9], v6, v0, 0 ; TONGA-NEXT: s_mov_b32 s11, s10 +; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v1 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v6, v7, 0 +; TONGA-NEXT: v_addc_u32_e32 v2, vcc, 0, v2, vcc +; TONGA-NEXT: v_add_u32_e32 v3, vcc, v8, v3 +; TONGA-NEXT: v_addc_u32_e32 v2, vcc, v2, v4, vcc +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v2, v0 +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; TONGA-NEXT: s_add_u32 s8, s4, s10 +; TONGA-NEXT: v_add_u32_e32 v2, vcc, v5, v0 ; TONGA-NEXT: s_addc_u32 s9, s5, s10 -; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v7, v1, vcc +; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v6, v1, vcc ; TONGA-NEXT: s_xor_b64 s[12:13], s[8:9], s[10:11] ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s12, v3, 0 ; TONGA-NEXT: v_mul_hi_u32 v4, s12, v2 ; TONGA-NEXT: v_readfirstlane_b32 s3, v1 ; TONGA-NEXT: v_readfirstlane_b32 s5, v0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s13, v3, 0 -; TONGA-NEXT: v_mad_u64_u32 v[2:3], s[8:9], s13, v2, 0 -; TONGA-NEXT: v_readfirstlane_b32 s14, v4 -; TONGA-NEXT: s_add_u32 s5, s14, s5 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s13, v2, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v4 +; TONGA-NEXT: s_add_u32 s5, s8, s5 ; TONGA-NEXT: s_addc_u32 s3, 0, s3 -; TONGA-NEXT: v_readfirstlane_b32 s14, v2 -; TONGA-NEXT: v_readfirstlane_b32 s9, v3 -; TONGA-NEXT: s_add_u32 s5, s5, s14 -; TONGA-NEXT: v_readfirstlane_b32 s8, v1 -; TONGA-NEXT: s_addc_u32 s3, s3, s9 -; TONGA-NEXT: s_addc_u32 s5, s8, 0 +; TONGA-NEXT: v_readfirstlane_b32 s14, v1 +; TONGA-NEXT: v_mad_u64_u32 v[1:2], s[8:9], s13, v3, 0 ; TONGA-NEXT: v_readfirstlane_b32 s8, v0 +; TONGA-NEXT: s_add_u32 s5, s5, s8 +; TONGA-NEXT: s_addc_u32 s3, s3, s14 +; TONGA-NEXT: v_readfirstlane_b32 s5, v2 +; TONGA-NEXT: s_addc_u32 s5, s5, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v1 ; TONGA-NEXT: s_add_u32 s3, s3, s8 ; TONGA-NEXT: v_mov_b32_e32 v0, s3 ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s6, v0, 0 @@ -2736,37 +2736,37 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s18, s16, s17 ; GCN-NEXT: s_add_u32 s19, s21, s19 ; GCN-NEXT: s_addc_u32 s18, 0, s18 -; GCN-NEXT: s_mul_hi_u32 s22, s15, s20 +; GCN-NEXT: s_mul_hi_u32 s21, s15, s20 ; GCN-NEXT: s_mul_i32 s20, s15, s20 ; GCN-NEXT: s_add_u32 s19, s19, s20 -; GCN-NEXT: s_mul_hi_u32 s21, s15, s17 -; GCN-NEXT: s_addc_u32 s18, s18, s22 -; GCN-NEXT: s_addc_u32 s19, s21, 0 +; GCN-NEXT: s_addc_u32 s18, s18, s21 +; GCN-NEXT: s_mul_hi_u32 s19, s15, s17 +; GCN-NEXT: s_addc_u32 s19, s19, 0 ; GCN-NEXT: s_mul_i32 s17, s15, s17 ; GCN-NEXT: s_add_u32 s17, s18, s17 ; GCN-NEXT: s_addc_u32 s18, 0, s19 ; GCN-NEXT: s_add_u32 s16, s16, s17 ; GCN-NEXT: s_addc_u32 s15, s15, s18 -; GCN-NEXT: s_mul_i32 s17, s9, s15 -; GCN-NEXT: s_mul_hi_u32 s18, s9, s16 -; GCN-NEXT: s_add_i32 s17, s18, s17 +; GCN-NEXT: s_mul_i32 s17, s9, s16 +; GCN-NEXT: s_mul_i32 s20, s9, s15 +; GCN-NEXT: s_mul_hi_u32 s9, s9, s16 +; GCN-NEXT: s_add_i32 s9, s9, s20 ; GCN-NEXT: s_mul_i32 s14, s14, s16 -; GCN-NEXT: s_add_i32 s17, s17, s14 -; GCN-NEXT: s_mul_i32 s9, s9, s16 -; GCN-NEXT: s_mul_hi_u32 s18, s15, s9 -; GCN-NEXT: s_mul_i32 s19, s15, s9 -; GCN-NEXT: s_mul_i32 s21, s16, s17 -; GCN-NEXT: s_mul_hi_u32 s9, s16, s9 -; GCN-NEXT: s_mul_hi_u32 s20, s16, s17 -; GCN-NEXT: s_add_u32 s9, s9, s21 -; GCN-NEXT: s_addc_u32 s20, 0, s20 -; GCN-NEXT: s_add_u32 s9, s9, s19 -; GCN-NEXT: s_mul_hi_u32 s14, s15, s17 -; GCN-NEXT: s_addc_u32 s9, s20, s18 -; GCN-NEXT: s_addc_u32 s14, s14, 0 -; GCN-NEXT: s_mul_i32 s17, s15, s17 -; GCN-NEXT: s_add_u32 s9, s9, s17 +; GCN-NEXT: s_add_i32 s9, s9, s14 +; GCN-NEXT: s_mul_hi_u32 s18, s15, s17 +; GCN-NEXT: s_mul_i32 s19, s15, s17 +; GCN-NEXT: s_mul_i32 s20, s16, s9 +; GCN-NEXT: s_mul_hi_u32 s17, s16, s17 +; GCN-NEXT: s_mul_hi_u32 s14, s16, s9 +; GCN-NEXT: s_add_u32 s17, s17, s20 ; GCN-NEXT: s_addc_u32 s14, 0, s14 +; GCN-NEXT: s_add_u32 s17, s17, s19 +; GCN-NEXT: s_addc_u32 s14, s14, s18 +; GCN-NEXT: s_mul_hi_u32 s17, s15, s9 +; GCN-NEXT: s_addc_u32 s17, s17, 0 +; GCN-NEXT: s_mul_i32 s9, s15, s9 +; GCN-NEXT: s_add_u32 s9, s14, s9 +; GCN-NEXT: s_addc_u32 s14, 0, s17 ; GCN-NEXT: s_add_u32 s9, s16, s9 ; GCN-NEXT: s_addc_u32 s18, s15, s14 ; GCN-NEXT: s_ashr_i32 s14, s11, 31 @@ -2779,12 +2779,12 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s11, s16, s18 ; GCN-NEXT: s_add_u32 s19, s20, s19 ; GCN-NEXT: s_addc_u32 s11, 0, s11 -; GCN-NEXT: s_mul_hi_u32 s21, s17, s9 +; GCN-NEXT: s_mul_hi_u32 s20, s17, s9 ; GCN-NEXT: s_mul_i32 s9, s17, s9 ; GCN-NEXT: s_add_u32 s9, s19, s9 -; GCN-NEXT: s_mul_hi_u32 s20, s17, s18 -; GCN-NEXT: s_addc_u32 s9, s11, s21 -; GCN-NEXT: s_addc_u32 s11, s20, 0 +; GCN-NEXT: s_addc_u32 s9, s11, s20 +; GCN-NEXT: s_mul_hi_u32 s11, s17, s18 +; GCN-NEXT: s_addc_u32 s11, s11, 0 ; GCN-NEXT: s_mul_i32 s18, s17, s18 ; GCN-NEXT: s_add_u32 s9, s9, s18 ; GCN-NEXT: s_addc_u32 s11, 0, s11 @@ -2884,37 +2884,37 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s16, s14, s15 ; GCN-NEXT: s_add_u32 s17, s19, s17 ; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: s_mul_hi_u32 s20, s13, s18 +; GCN-NEXT: s_mul_hi_u32 s19, s13, s18 ; GCN-NEXT: s_mul_i32 s18, s13, s18 ; GCN-NEXT: s_add_u32 s17, s17, s18 -; GCN-NEXT: s_mul_hi_u32 s19, s13, s15 -; GCN-NEXT: s_addc_u32 s16, s16, s20 -; GCN-NEXT: s_addc_u32 s17, s19, 0 +; GCN-NEXT: s_addc_u32 s16, s16, s19 +; GCN-NEXT: s_mul_hi_u32 s17, s13, s15 +; GCN-NEXT: s_addc_u32 s17, s17, 0 ; GCN-NEXT: s_mul_i32 s15, s13, s15 ; GCN-NEXT: s_add_u32 s15, s16, s15 ; GCN-NEXT: s_addc_u32 s16, 0, s17 ; GCN-NEXT: s_add_u32 s14, s14, s15 ; GCN-NEXT: s_addc_u32 s13, s13, s16 -; GCN-NEXT: s_mul_i32 s15, s3, s13 -; GCN-NEXT: s_mul_hi_u32 s16, s3, s14 -; GCN-NEXT: s_add_i32 s15, s16, s15 +; GCN-NEXT: s_mul_i32 s15, s3, s14 +; GCN-NEXT: s_mul_i32 s18, s3, s13 +; GCN-NEXT: s_mul_hi_u32 s3, s3, s14 +; GCN-NEXT: s_add_i32 s3, s3, s18 ; GCN-NEXT: s_mul_i32 s12, s12, s14 -; GCN-NEXT: s_add_i32 s15, s15, s12 -; GCN-NEXT: s_mul_i32 s3, s3, s14 -; GCN-NEXT: s_mul_hi_u32 s16, s13, s3 -; GCN-NEXT: s_mul_i32 s17, s13, s3 -; GCN-NEXT: s_mul_i32 s19, s14, s15 -; GCN-NEXT: s_mul_hi_u32 s3, s14, s3 -; GCN-NEXT: s_mul_hi_u32 s18, s14, s15 -; GCN-NEXT: s_add_u32 s3, s3, s19 -; GCN-NEXT: s_addc_u32 s18, 0, s18 -; GCN-NEXT: s_add_u32 s3, s3, s17 -; GCN-NEXT: s_mul_hi_u32 s12, s13, s15 -; GCN-NEXT: s_addc_u32 s3, s18, s16 -; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s15, s13, s15 -; GCN-NEXT: s_add_u32 s3, s3, s15 +; GCN-NEXT: s_add_i32 s3, s3, s12 +; GCN-NEXT: s_mul_hi_u32 s16, s13, s15 +; GCN-NEXT: s_mul_i32 s17, s13, s15 +; GCN-NEXT: s_mul_i32 s18, s14, s3 +; GCN-NEXT: s_mul_hi_u32 s15, s14, s15 +; GCN-NEXT: s_mul_hi_u32 s12, s14, s3 +; GCN-NEXT: s_add_u32 s15, s15, s18 ; GCN-NEXT: s_addc_u32 s12, 0, s12 +; GCN-NEXT: s_add_u32 s15, s15, s17 +; GCN-NEXT: s_addc_u32 s12, s12, s16 +; GCN-NEXT: s_mul_hi_u32 s15, s13, s3 +; GCN-NEXT: s_addc_u32 s15, s15, 0 +; GCN-NEXT: s_mul_i32 s3, s13, s3 +; GCN-NEXT: s_add_u32 s3, s12, s3 +; GCN-NEXT: s_addc_u32 s12, 0, s15 ; GCN-NEXT: s_add_u32 s3, s14, s3 ; GCN-NEXT: s_addc_u32 s16, s13, s12 ; GCN-NEXT: s_ashr_i32 s12, s5, 31 @@ -2927,12 +2927,12 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s5, s14, s16 ; GCN-NEXT: s_add_u32 s17, s18, s17 ; GCN-NEXT: s_addc_u32 s5, 0, s5 -; GCN-NEXT: s_mul_hi_u32 s19, s15, s3 +; GCN-NEXT: s_mul_hi_u32 s18, s15, s3 ; GCN-NEXT: s_mul_i32 s3, s15, s3 ; GCN-NEXT: s_add_u32 s3, s17, s3 -; GCN-NEXT: s_mul_hi_u32 s18, s15, s16 -; GCN-NEXT: s_addc_u32 s3, s5, s19 -; GCN-NEXT: s_addc_u32 s5, s18, 0 +; GCN-NEXT: s_addc_u32 s3, s5, s18 +; GCN-NEXT: s_mul_hi_u32 s5, s15, s16 +; GCN-NEXT: s_addc_u32 s5, s5, 0 ; GCN-NEXT: s_mul_i32 s16, s15, s16 ; GCN-NEXT: s_add_u32 s3, s3, s16 ; GCN-NEXT: s_addc_u32 s5, 0, s5 @@ -3067,25 +3067,25 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; TAHITI-NEXT: v_add_i32_e32 v9, vcc, v9, v13 ; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, v10, v14, vcc -; TAHITI-NEXT: v_mul_lo_u32 v13, v11, v10 -; TAHITI-NEXT: v_mul_hi_u32 v14, v11, v9 +; TAHITI-NEXT: v_mul_lo_u32 v13, v11, v9 +; TAHITI-NEXT: v_mul_lo_u32 v14, v11, v10 +; TAHITI-NEXT: v_mul_hi_u32 v11, v11, v9 ; TAHITI-NEXT: v_mul_lo_u32 v12, v12, v9 -; TAHITI-NEXT: v_mul_lo_u32 v11, v11, v9 -; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v13, v14 -; TAHITI-NEXT: v_add_i32_e32 v12, vcc, v12, v13 -; TAHITI-NEXT: v_mul_lo_u32 v15, v9, v12 +; TAHITI-NEXT: v_mul_hi_u32 v15, v10, v13 +; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v14, v11 +; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v12, v11 +; TAHITI-NEXT: v_mul_lo_u32 v12, v9, v11 +; TAHITI-NEXT: v_mul_hi_u32 v14, v9, v13 ; TAHITI-NEXT: v_mul_hi_u32 v16, v9, v11 -; TAHITI-NEXT: v_mul_hi_u32 v17, v9, v12 -; TAHITI-NEXT: v_mul_hi_u32 v14, v10, v11 +; TAHITI-NEXT: v_mul_lo_u32 v13, v10, v13 +; TAHITI-NEXT: v_add_i32_e32 v12, vcc, v14, v12 +; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, 0, v16, vcc +; TAHITI-NEXT: v_mul_hi_u32 v16, v10, v11 ; TAHITI-NEXT: v_mul_lo_u32 v11, v10, v11 -; TAHITI-NEXT: v_mul_hi_u32 v13, v10, v12 -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v16, v15 -; TAHITI-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc -; TAHITI-NEXT: v_mul_lo_u32 v12, v10, v12 -; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v15, v11 -; TAHITI-NEXT: v_addc_u32_e32 v11, vcc, v16, v14, vcc -; TAHITI-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc -; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v11, v12 +; TAHITI-NEXT: v_add_i32_e32 v12, vcc, v12, v13 +; TAHITI-NEXT: v_addc_u32_e32 v12, vcc, v14, v15, vcc +; TAHITI-NEXT: v_addc_u32_e32 v13, vcc, 0, v16, vcc +; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v12, v11 ; TAHITI-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; TAHITI-NEXT: v_add_i32_e32 v9, vcc, v9, v11 ; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, v10, v12, vcc @@ -3209,25 +3209,25 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc ; TAHITI-NEXT: v_add_i32_e32 v3, vcc, v3, v11 ; TAHITI-NEXT: v_addc_u32_e32 v4, vcc, v4, v12, vcc -; TAHITI-NEXT: v_mul_lo_u32 v11, v5, v4 -; TAHITI-NEXT: v_mul_hi_u32 v12, v5, v3 +; TAHITI-NEXT: v_mul_lo_u32 v11, v5, v3 +; TAHITI-NEXT: v_mul_lo_u32 v12, v5, v4 +; TAHITI-NEXT: v_mul_hi_u32 v5, v5, v3 ; TAHITI-NEXT: v_mul_lo_u32 v10, v10, v3 -; TAHITI-NEXT: v_mul_lo_u32 v5, v5, v3 -; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v11, v12 -; TAHITI-NEXT: v_add_i32_e32 v10, vcc, v10, v11 -; TAHITI-NEXT: v_mul_lo_u32 v13, v3, v10 +; TAHITI-NEXT: v_mul_hi_u32 v13, v4, v11 +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v12, v5 +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v10, v5 +; TAHITI-NEXT: v_mul_lo_u32 v10, v3, v5 +; TAHITI-NEXT: v_mul_hi_u32 v12, v3, v11 ; TAHITI-NEXT: v_mul_hi_u32 v14, v3, v5 -; TAHITI-NEXT: v_mul_hi_u32 v15, v3, v10 -; TAHITI-NEXT: v_mul_hi_u32 v12, v4, v5 +; TAHITI-NEXT: v_mul_lo_u32 v11, v4, v11 +; TAHITI-NEXT: v_add_i32_e32 v10, vcc, v12, v10 +; TAHITI-NEXT: v_addc_u32_e32 v12, vcc, 0, v14, vcc +; TAHITI-NEXT: v_mul_hi_u32 v14, v4, v5 ; TAHITI-NEXT: v_mul_lo_u32 v5, v4, v5 -; TAHITI-NEXT: v_mul_hi_u32 v11, v4, v10 -; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v14, v13 -; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc -; TAHITI-NEXT: v_mul_lo_u32 v10, v4, v10 -; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v13, v5 -; TAHITI-NEXT: v_addc_u32_e32 v5, vcc, v14, v12, vcc -; TAHITI-NEXT: v_addc_u32_e32 v11, vcc, 0, v11, vcc -; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v5, v10 +; TAHITI-NEXT: v_add_i32_e32 v10, vcc, v10, v11 +; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, v12, v13, vcc +; TAHITI-NEXT: v_addc_u32_e32 v11, vcc, 0, v14, vcc +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v10, v5 ; TAHITI-NEXT: v_addc_u32_e32 v10, vcc, 0, v11, vcc ; TAHITI-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; TAHITI-NEXT: v_addc_u32_e32 v4, vcc, v4, v10, vcc @@ -3373,47 +3373,47 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v9, vcc ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v8 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v11, vcc, v11, v0 -; TONGA-NEXT: v_addc_u32_e32 v10, vcc, v10, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s1, v11, 0 -; TONGA-NEXT: v_mul_lo_u32 v4, s1, v10 -; TONGA-NEXT: v_mul_lo_u32 v5, s10, v11 +; TONGA-NEXT: v_add_u32_e32 v8, vcc, v11, v0 +; TONGA-NEXT: v_addc_u32_e32 v9, vcc, v10, v1, vcc +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s1, v8, 0 +; TONGA-NEXT: v_mul_lo_u32 v4, s1, v9 +; TONGA-NEXT: v_mul_lo_u32 v5, s10, v8 ; TONGA-NEXT: s_ashr_i32 s10, s3, 31 -; TONGA-NEXT: v_mul_hi_u32 v12, v11, v0 +; TONGA-NEXT: v_mul_hi_u32 v11, v8, v0 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v4, v1 -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v5, v1 -; TONGA-NEXT: v_mad_u64_u32 v[4:5], s[8:9], v11, v8, 0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v10, v0, 0 -; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], v10, v8, 0 -; TONGA-NEXT: s_add_u32 s8, s2, s10 -; TONGA-NEXT: v_add_u32_e32 v4, vcc, v12, v4 -; TONGA-NEXT: v_addc_u32_e32 v5, vcc, 0, v5, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v4, v0 -; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v5, v1, vcc -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v9, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v8 -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v4, vcc, v11, v0 +; TONGA-NEXT: v_add_u32_e32 v10, vcc, v5, v1 +; TONGA-NEXT: v_mad_u64_u32 v[4:5], s[8:9], v8, v10, 0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], v9, v0, 0 ; TONGA-NEXT: s_mov_b32 s11, s10 +; TONGA-NEXT: v_add_u32_e32 v11, vcc, v11, v4 +; TONGA-NEXT: v_addc_u32_e32 v12, vcc, 0, v5, vcc +; TONGA-NEXT: v_mad_u64_u32 v[4:5], s[8:9], v9, v10, 0 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v11, v0 +; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v12, v1, vcc +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v5, vcc +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v4 +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc +; TONGA-NEXT: s_add_u32 s8, s2, s10 +; TONGA-NEXT: v_add_u32_e32 v4, vcc, v8, v0 ; TONGA-NEXT: s_addc_u32 s9, s3, s10 -; TONGA-NEXT: v_addc_u32_e32 v5, vcc, v10, v1, vcc +; TONGA-NEXT: v_addc_u32_e32 v5, vcc, v9, v1, vcc ; TONGA-NEXT: s_xor_b64 s[12:13], s[8:9], s[10:11] ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s12, v5, 0 ; TONGA-NEXT: v_mul_hi_u32 v8, s12, v4 ; TONGA-NEXT: v_readfirstlane_b32 s1, v1 ; TONGA-NEXT: v_readfirstlane_b32 s3, v0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s13, v5, 0 -; TONGA-NEXT: v_mad_u64_u32 v[4:5], s[8:9], s13, v4, 0 -; TONGA-NEXT: v_readfirstlane_b32 s14, v8 -; TONGA-NEXT: s_add_u32 s3, s14, s3 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s13, v4, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v8 +; TONGA-NEXT: s_add_u32 s3, s8, s3 +; TONGA-NEXT: v_mad_u64_u32 v[4:5], s[8:9], s13, v5, 0 ; TONGA-NEXT: s_addc_u32 s1, 0, s1 -; TONGA-NEXT: v_readfirstlane_b32 s14, v4 -; TONGA-NEXT: v_readfirstlane_b32 s9, v5 -; TONGA-NEXT: s_add_u32 s3, s3, s14 -; TONGA-NEXT: v_readfirstlane_b32 s8, v1 -; TONGA-NEXT: s_addc_u32 s1, s1, s9 -; TONGA-NEXT: s_addc_u32 s3, s8, 0 ; TONGA-NEXT: v_readfirstlane_b32 s8, v0 +; TONGA-NEXT: v_readfirstlane_b32 s14, v1 +; TONGA-NEXT: s_add_u32 s3, s3, s8 +; TONGA-NEXT: s_addc_u32 s1, s1, s14 +; TONGA-NEXT: v_readfirstlane_b32 s3, v5 +; TONGA-NEXT: s_addc_u32 s3, s3, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v4 ; TONGA-NEXT: s_add_u32 s1, s1, s8 ; TONGA-NEXT: v_mov_b32_e32 v0, s1 ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[8:9], s6, v0, 0 @@ -3494,66 +3494,66 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v2, v0 ; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc ; TONGA-NEXT: v_xor_b32_e32 v5, v1, v0 -; TONGA-NEXT: v_xor_b32_e32 v12, v3, v0 +; TONGA-NEXT: v_xor_b32_e32 v10, v3, v0 ; TONGA-NEXT: v_cvt_f32_u32_e32 v0, v5 -; TONGA-NEXT: v_cvt_f32_u32_e32 v1, v12 +; TONGA-NEXT: v_cvt_f32_u32_e32 v1, v10 ; TONGA-NEXT: v_sub_u32_e32 v13, vcc, 0, v5 -; TONGA-NEXT: v_subb_u32_e32 v14, vcc, 0, v12, vcc +; TONGA-NEXT: v_subb_u32_e32 v14, vcc, 0, v10, vcc ; TONGA-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; TONGA-NEXT: v_rcp_f32_e32 v0, v0 ; TONGA-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; TONGA-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; TONGA-NEXT: v_trunc_f32_e32 v1, v1 ; TONGA-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 -; TONGA-NEXT: v_cvt_u32_f32_e32 v10, v1 -; TONGA-NEXT: v_cvt_u32_f32_e32 v11, v0 -; TONGA-NEXT: v_mul_lo_u32 v3, v13, v10 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v13, v11, 0 -; TONGA-NEXT: v_mul_lo_u32 v4, v14, v11 +; TONGA-NEXT: v_cvt_u32_f32_e32 v11, v1 +; TONGA-NEXT: v_cvt_u32_f32_e32 v12, v0 +; TONGA-NEXT: v_mul_lo_u32 v3, v13, v11 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v13, v12, 0 +; TONGA-NEXT: v_mul_lo_u32 v4, v14, v12 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; TONGA-NEXT: v_add_u32_e32 v15, vcc, v1, v4 -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v11, v15, 0 -; TONGA-NEXT: v_mul_hi_u32 v16, v11, v0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v10, v0, 0 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v12, v15, 0 +; TONGA-NEXT: v_mul_hi_u32 v16, v12, v0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v11, v0, 0 ; TONGA-NEXT: v_add_u32_e32 v16, vcc, v16, v3 ; TONGA-NEXT: v_addc_u32_e32 v17, vcc, 0, v4, vcc -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v10, v15, 0 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v11, v15, 0 ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v16, v0 ; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v17, v1, vcc ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v15, vcc, v11, v0 -; TONGA-NEXT: v_addc_u32_e32 v16, vcc, v10, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v13, v15, 0 -; TONGA-NEXT: v_mul_lo_u32 v3, v13, v16 -; TONGA-NEXT: v_mul_lo_u32 v4, v14, v15 -; TONGA-NEXT: v_mul_hi_u32 v13, v15, v0 +; TONGA-NEXT: v_add_u32_e32 v12, vcc, v12, v0 +; TONGA-NEXT: v_addc_u32_e32 v11, vcc, v11, v1, vcc +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v13, v12, 0 +; TONGA-NEXT: v_mul_lo_u32 v3, v13, v11 +; TONGA-NEXT: v_mul_lo_u32 v4, v14, v12 +; TONGA-NEXT: v_mul_hi_u32 v14, v12, v0 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v3, v1 -; TONGA-NEXT: v_add_u32_e32 v10, vcc, v4, v1 -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v15, v10, 0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v16, v0, 0 -; TONGA-NEXT: v_mad_u64_u32 v[10:11], s[0:1], v16, v10, 0 -; TONGA-NEXT: v_add_u32_e32 v3, vcc, v13, v3 -; TONGA-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v4, v1, vcc -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v11, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v10 +; TONGA-NEXT: v_add_u32_e32 v13, vcc, v4, v1 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v12, v13, 0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v11, v0, 0 +; TONGA-NEXT: v_add_u32_e32 v14, vcc, v14, v3 +; TONGA-NEXT: v_addc_u32_e32 v15, vcc, 0, v4, vcc +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v11, v13, 0 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v14, v0 +; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v15, v1, vcc +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v3, vcc, v15, v0 -; TONGA-NEXT: v_addc_u32_e32 v10, vcc, v16, v1, vcc -; TONGA-NEXT: v_ashrrev_i32_e32 v11, 31, v7 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v6, v11 -; TONGA-NEXT: v_addc_u32_e32 v4, vcc, v7, v11, vcc -; TONGA-NEXT: v_xor_b32_e32 v7, v0, v11 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v7, v10, 0 +; TONGA-NEXT: v_add_u32_e32 v3, vcc, v12, v0 +; TONGA-NEXT: v_addc_u32_e32 v11, vcc, v11, v1, vcc +; TONGA-NEXT: v_ashrrev_i32_e32 v12, 31, v7 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v6, v12 +; TONGA-NEXT: v_addc_u32_e32 v4, vcc, v7, v12, vcc +; TONGA-NEXT: v_xor_b32_e32 v7, v0, v12 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v7, v11, 0 ; TONGA-NEXT: v_mul_hi_u32 v13, v7, v3 -; TONGA-NEXT: v_xor_b32_e32 v14, v4, v11 +; TONGA-NEXT: v_xor_b32_e32 v14, v4, v12 ; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v14, v3, 0 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v13, v0 ; TONGA-NEXT: v_addc_u32_e32 v15, vcc, 0, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v14, v10, 0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v14, v11, 0 ; TONGA-NEXT: v_add_u32_e32 v3, vcc, v13, v3 ; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v15, v4, vcc ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc @@ -3561,40 +3561,40 @@ define amdgpu_kernel void @srem_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v0, vcc, 0, v1, vcc ; TONGA-NEXT: v_mul_lo_u32 v4, v5, v0 ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v5, v3, 0 -; TONGA-NEXT: v_mul_lo_u32 v3, v12, v3 +; TONGA-NEXT: v_mul_lo_u32 v3, v10, v3 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v4, v1 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v3, v1 ; TONGA-NEXT: v_sub_u32_e32 v3, vcc, v14, v1 ; TONGA-NEXT: v_sub_u32_e32 v0, vcc, v7, v0 -; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v12, vcc +; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v10, vcc ; TONGA-NEXT: v_sub_u32_e64 v4, s[0:1], v0, v5 ; TONGA-NEXT: v_subbrev_u32_e64 v7, s[2:3], 0, v3, s[0:1] -; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v7, v12 -; TONGA-NEXT: v_cndmask_b32_e64 v10, 0, -1, s[2:3] +; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v7, v10 +; TONGA-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[2:3] ; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v4, v5 ; TONGA-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[2:3] -; TONGA-NEXT: v_cmp_eq_u32_e64 s[2:3], v7, v12 -; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v12, s[0:1] -; TONGA-NEXT: v_cndmask_b32_e64 v10, v10, v13, s[2:3] +; TONGA-NEXT: v_cmp_eq_u32_e64 s[2:3], v7, v10 +; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v10, s[0:1] +; TONGA-NEXT: v_cndmask_b32_e64 v11, v11, v13, s[2:3] ; TONGA-NEXT: v_sub_u32_e64 v13, s[0:1], v4, v5 ; TONGA-NEXT: v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1] ; TONGA-NEXT: v_subb_u32_e32 v1, vcc, v14, v1, vcc -; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v10 -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v12 +; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v11 +; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 ; TONGA-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[0:1] ; TONGA-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 ; TONGA-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; TONGA-NEXT: v_cmp_eq_u32_e32 vcc, v1, v12 +; TONGA-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 ; TONGA-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; TONGA-NEXT: v_cndmask_b32_e64 v4, v4, v13, s[0:1] ; TONGA-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; TONGA-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; TONGA-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; TONGA-NEXT: v_xor_b32_e32 v0, v0, v11 -; TONGA-NEXT: v_xor_b32_e32 v1, v1, v11 -; TONGA-NEXT: v_sub_u32_e32 v10, vcc, v0, v11 -; TONGA-NEXT: v_subb_u32_e32 v11, vcc, v1, v11, vcc +; TONGA-NEXT: v_xor_b32_e32 v0, v0, v12 +; TONGA-NEXT: v_xor_b32_e32 v1, v1, v12 +; TONGA-NEXT: v_sub_u32_e32 v10, vcc, v0, v12 +; TONGA-NEXT: v_subb_u32_e32 v11, vcc, v1, v12, vcc ; TONGA-NEXT: s_cbranch_execnz .LBB10_8 ; TONGA-NEXT: .LBB10_7: ; TONGA-NEXT: v_cvt_f32_u32_e32 v0, v2 @@ -4914,37 +4914,37 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s26, s24, s25 ; GCN-NEXT: s_add_u32 s27, s29, s27 ; GCN-NEXT: s_addc_u32 s26, 0, s26 -; GCN-NEXT: s_mul_hi_u32 s30, s23, s28 +; GCN-NEXT: s_mul_hi_u32 s29, s23, s28 ; GCN-NEXT: s_mul_i32 s28, s23, s28 ; GCN-NEXT: s_add_u32 s27, s27, s28 -; GCN-NEXT: s_mul_hi_u32 s29, s23, s25 -; GCN-NEXT: s_addc_u32 s26, s26, s30 -; GCN-NEXT: s_addc_u32 s27, s29, 0 +; GCN-NEXT: s_addc_u32 s26, s26, s29 +; GCN-NEXT: s_mul_hi_u32 s27, s23, s25 +; GCN-NEXT: s_addc_u32 s27, s27, 0 ; GCN-NEXT: s_mul_i32 s25, s23, s25 ; GCN-NEXT: s_add_u32 s25, s26, s25 ; GCN-NEXT: s_addc_u32 s26, 0, s27 ; GCN-NEXT: s_add_u32 s24, s24, s25 ; GCN-NEXT: s_addc_u32 s23, s23, s26 -; GCN-NEXT: s_mul_i32 s25, s17, s23 -; GCN-NEXT: s_mul_hi_u32 s26, s17, s24 -; GCN-NEXT: s_add_i32 s25, s26, s25 +; GCN-NEXT: s_mul_i32 s25, s17, s24 +; GCN-NEXT: s_mul_i32 s28, s17, s23 +; GCN-NEXT: s_mul_hi_u32 s17, s17, s24 +; GCN-NEXT: s_add_i32 s17, s17, s28 ; GCN-NEXT: s_mul_i32 s22, s22, s24 -; GCN-NEXT: s_add_i32 s25, s25, s22 -; GCN-NEXT: s_mul_i32 s17, s17, s24 -; GCN-NEXT: s_mul_hi_u32 s26, s23, s17 -; GCN-NEXT: s_mul_i32 s27, s23, s17 -; GCN-NEXT: s_mul_i32 s29, s24, s25 -; GCN-NEXT: s_mul_hi_u32 s17, s24, s17 -; GCN-NEXT: s_mul_hi_u32 s28, s24, s25 -; GCN-NEXT: s_add_u32 s17, s17, s29 -; GCN-NEXT: s_addc_u32 s28, 0, s28 -; GCN-NEXT: s_add_u32 s17, s17, s27 -; GCN-NEXT: s_mul_hi_u32 s22, s23, s25 -; GCN-NEXT: s_addc_u32 s17, s28, s26 -; GCN-NEXT: s_addc_u32 s22, s22, 0 -; GCN-NEXT: s_mul_i32 s25, s23, s25 -; GCN-NEXT: s_add_u32 s17, s17, s25 +; GCN-NEXT: s_add_i32 s17, s17, s22 +; GCN-NEXT: s_mul_hi_u32 s26, s23, s25 +; GCN-NEXT: s_mul_i32 s27, s23, s25 +; GCN-NEXT: s_mul_i32 s28, s24, s17 +; GCN-NEXT: s_mul_hi_u32 s25, s24, s25 +; GCN-NEXT: s_mul_hi_u32 s22, s24, s17 +; GCN-NEXT: s_add_u32 s25, s25, s28 ; GCN-NEXT: s_addc_u32 s22, 0, s22 +; GCN-NEXT: s_add_u32 s25, s25, s27 +; GCN-NEXT: s_addc_u32 s22, s22, s26 +; GCN-NEXT: s_mul_hi_u32 s25, s23, s17 +; GCN-NEXT: s_addc_u32 s25, s25, 0 +; GCN-NEXT: s_mul_i32 s17, s23, s17 +; GCN-NEXT: s_add_u32 s17, s22, s17 +; GCN-NEXT: s_addc_u32 s22, 0, s25 ; GCN-NEXT: s_add_u32 s17, s24, s17 ; GCN-NEXT: s_addc_u32 s26, s23, s22 ; GCN-NEXT: s_ashr_i32 s22, s19, 31 @@ -4957,12 +4957,12 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s19, s24, s26 ; GCN-NEXT: s_add_u32 s27, s28, s27 ; GCN-NEXT: s_addc_u32 s19, 0, s19 -; GCN-NEXT: s_mul_hi_u32 s29, s25, s17 +; GCN-NEXT: s_mul_hi_u32 s28, s25, s17 ; GCN-NEXT: s_mul_i32 s17, s25, s17 ; GCN-NEXT: s_add_u32 s17, s27, s17 -; GCN-NEXT: s_mul_hi_u32 s28, s25, s26 -; GCN-NEXT: s_addc_u32 s17, s19, s29 -; GCN-NEXT: s_addc_u32 s19, s28, 0 +; GCN-NEXT: s_addc_u32 s17, s19, s28 +; GCN-NEXT: s_mul_hi_u32 s19, s25, s26 +; GCN-NEXT: s_addc_u32 s19, s19, 0 ; GCN-NEXT: s_mul_i32 s26, s25, s26 ; GCN-NEXT: s_add_u32 s17, s17, s26 ; GCN-NEXT: s_addc_u32 s19, 0, s19 @@ -5062,37 +5062,37 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s24, s22, s23 ; GCN-NEXT: s_add_u32 s25, s27, s25 ; GCN-NEXT: s_addc_u32 s24, 0, s24 -; GCN-NEXT: s_mul_hi_u32 s28, s21, s26 +; GCN-NEXT: s_mul_hi_u32 s27, s21, s26 ; GCN-NEXT: s_mul_i32 s26, s21, s26 ; GCN-NEXT: s_add_u32 s25, s25, s26 -; GCN-NEXT: s_mul_hi_u32 s27, s21, s23 -; GCN-NEXT: s_addc_u32 s24, s24, s28 -; GCN-NEXT: s_addc_u32 s25, s27, 0 +; GCN-NEXT: s_addc_u32 s24, s24, s27 +; GCN-NEXT: s_mul_hi_u32 s25, s21, s23 +; GCN-NEXT: s_addc_u32 s25, s25, 0 ; GCN-NEXT: s_mul_i32 s23, s21, s23 ; GCN-NEXT: s_add_u32 s23, s24, s23 ; GCN-NEXT: s_addc_u32 s24, 0, s25 ; GCN-NEXT: s_add_u32 s22, s22, s23 ; GCN-NEXT: s_addc_u32 s21, s21, s24 -; GCN-NEXT: s_mul_i32 s23, s13, s21 -; GCN-NEXT: s_mul_hi_u32 s24, s13, s22 -; GCN-NEXT: s_add_i32 s23, s24, s23 +; GCN-NEXT: s_mul_i32 s23, s13, s22 +; GCN-NEXT: s_mul_i32 s26, s13, s21 +; GCN-NEXT: s_mul_hi_u32 s13, s13, s22 +; GCN-NEXT: s_add_i32 s13, s13, s26 ; GCN-NEXT: s_mul_i32 s20, s20, s22 -; GCN-NEXT: s_add_i32 s23, s23, s20 -; GCN-NEXT: s_mul_i32 s13, s13, s22 -; GCN-NEXT: s_mul_hi_u32 s24, s21, s13 -; GCN-NEXT: s_mul_i32 s25, s21, s13 -; GCN-NEXT: s_mul_i32 s27, s22, s23 -; GCN-NEXT: s_mul_hi_u32 s13, s22, s13 -; GCN-NEXT: s_mul_hi_u32 s26, s22, s23 -; GCN-NEXT: s_add_u32 s13, s13, s27 -; GCN-NEXT: s_addc_u32 s26, 0, s26 -; GCN-NEXT: s_add_u32 s13, s13, s25 -; GCN-NEXT: s_mul_hi_u32 s20, s21, s23 -; GCN-NEXT: s_addc_u32 s13, s26, s24 -; GCN-NEXT: s_addc_u32 s20, s20, 0 -; GCN-NEXT: s_mul_i32 s23, s21, s23 -; GCN-NEXT: s_add_u32 s13, s13, s23 +; GCN-NEXT: s_add_i32 s13, s13, s20 +; GCN-NEXT: s_mul_hi_u32 s24, s21, s23 +; GCN-NEXT: s_mul_i32 s25, s21, s23 +; GCN-NEXT: s_mul_i32 s26, s22, s13 +; GCN-NEXT: s_mul_hi_u32 s23, s22, s23 +; GCN-NEXT: s_mul_hi_u32 s20, s22, s13 +; GCN-NEXT: s_add_u32 s23, s23, s26 ; GCN-NEXT: s_addc_u32 s20, 0, s20 +; GCN-NEXT: s_add_u32 s23, s23, s25 +; GCN-NEXT: s_addc_u32 s20, s20, s24 +; GCN-NEXT: s_mul_hi_u32 s23, s21, s13 +; GCN-NEXT: s_addc_u32 s23, s23, 0 +; GCN-NEXT: s_mul_i32 s13, s21, s13 +; GCN-NEXT: s_add_u32 s13, s20, s13 +; GCN-NEXT: s_addc_u32 s20, 0, s23 ; GCN-NEXT: s_add_u32 s13, s22, s13 ; GCN-NEXT: s_addc_u32 s24, s21, s20 ; GCN-NEXT: s_ashr_i32 s20, s15, 31 @@ -5105,12 +5105,12 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s15, s22, s24 ; GCN-NEXT: s_add_u32 s25, s26, s25 ; GCN-NEXT: s_addc_u32 s15, 0, s15 -; GCN-NEXT: s_mul_hi_u32 s27, s23, s13 +; GCN-NEXT: s_mul_hi_u32 s26, s23, s13 ; GCN-NEXT: s_mul_i32 s13, s23, s13 ; GCN-NEXT: s_add_u32 s13, s25, s13 -; GCN-NEXT: s_mul_hi_u32 s26, s23, s24 -; GCN-NEXT: s_addc_u32 s13, s15, s27 -; GCN-NEXT: s_addc_u32 s15, s26, 0 +; GCN-NEXT: s_addc_u32 s13, s15, s26 +; GCN-NEXT: s_mul_hi_u32 s15, s23, s24 +; GCN-NEXT: s_addc_u32 s15, s15, 0 ; GCN-NEXT: s_mul_i32 s24, s23, s24 ; GCN-NEXT: s_add_u32 s13, s13, s24 ; GCN-NEXT: s_addc_u32 s15, 0, s15 @@ -5219,37 +5219,37 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s20, s18, s19 ; GCN-NEXT: s_add_u32 s21, s23, s21 ; GCN-NEXT: s_addc_u32 s20, 0, s20 -; GCN-NEXT: s_mul_hi_u32 s24, s17, s22 +; GCN-NEXT: s_mul_hi_u32 s23, s17, s22 ; GCN-NEXT: s_mul_i32 s22, s17, s22 ; GCN-NEXT: s_add_u32 s21, s21, s22 -; GCN-NEXT: s_mul_hi_u32 s23, s17, s19 -; GCN-NEXT: s_addc_u32 s20, s20, s24 -; GCN-NEXT: s_addc_u32 s21, s23, 0 +; GCN-NEXT: s_addc_u32 s20, s20, s23 +; GCN-NEXT: s_mul_hi_u32 s21, s17, s19 +; GCN-NEXT: s_addc_u32 s21, s21, 0 ; GCN-NEXT: s_mul_i32 s19, s17, s19 ; GCN-NEXT: s_add_u32 s19, s20, s19 ; GCN-NEXT: s_addc_u32 s20, 0, s21 ; GCN-NEXT: s_add_u32 s18, s18, s19 ; GCN-NEXT: s_addc_u32 s17, s17, s20 -; GCN-NEXT: s_mul_i32 s19, s9, s17 -; GCN-NEXT: s_mul_hi_u32 s20, s9, s18 -; GCN-NEXT: s_add_i32 s19, s20, s19 +; GCN-NEXT: s_mul_i32 s19, s9, s18 +; GCN-NEXT: s_mul_i32 s22, s9, s17 +; GCN-NEXT: s_mul_hi_u32 s9, s9, s18 +; GCN-NEXT: s_add_i32 s9, s9, s22 ; GCN-NEXT: s_mul_i32 s16, s16, s18 -; GCN-NEXT: s_add_i32 s19, s19, s16 -; GCN-NEXT: s_mul_i32 s9, s9, s18 -; GCN-NEXT: s_mul_hi_u32 s20, s17, s9 -; GCN-NEXT: s_mul_i32 s21, s17, s9 -; GCN-NEXT: s_mul_i32 s23, s18, s19 -; GCN-NEXT: s_mul_hi_u32 s9, s18, s9 -; GCN-NEXT: s_mul_hi_u32 s22, s18, s19 -; GCN-NEXT: s_add_u32 s9, s9, s23 -; GCN-NEXT: s_addc_u32 s22, 0, s22 -; GCN-NEXT: s_add_u32 s9, s9, s21 -; GCN-NEXT: s_mul_hi_u32 s16, s17, s19 -; GCN-NEXT: s_addc_u32 s9, s22, s20 -; GCN-NEXT: s_addc_u32 s16, s16, 0 -; GCN-NEXT: s_mul_i32 s19, s17, s19 -; GCN-NEXT: s_add_u32 s9, s9, s19 +; GCN-NEXT: s_add_i32 s9, s9, s16 +; GCN-NEXT: s_mul_hi_u32 s20, s17, s19 +; GCN-NEXT: s_mul_i32 s21, s17, s19 +; GCN-NEXT: s_mul_i32 s22, s18, s9 +; GCN-NEXT: s_mul_hi_u32 s19, s18, s19 +; GCN-NEXT: s_mul_hi_u32 s16, s18, s9 +; GCN-NEXT: s_add_u32 s19, s19, s22 ; GCN-NEXT: s_addc_u32 s16, 0, s16 +; GCN-NEXT: s_add_u32 s19, s19, s21 +; GCN-NEXT: s_addc_u32 s16, s16, s20 +; GCN-NEXT: s_mul_hi_u32 s19, s17, s9 +; GCN-NEXT: s_addc_u32 s19, s19, 0 +; GCN-NEXT: s_mul_i32 s9, s17, s9 +; GCN-NEXT: s_add_u32 s9, s16, s9 +; GCN-NEXT: s_addc_u32 s16, 0, s19 ; GCN-NEXT: s_add_u32 s9, s18, s9 ; GCN-NEXT: s_addc_u32 s20, s17, s16 ; GCN-NEXT: s_ashr_i32 s16, s11, 31 @@ -5262,12 +5262,12 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s11, s18, s20 ; GCN-NEXT: s_add_u32 s21, s22, s21 ; GCN-NEXT: s_addc_u32 s11, 0, s11 -; GCN-NEXT: s_mul_hi_u32 s23, s19, s9 +; GCN-NEXT: s_mul_hi_u32 s22, s19, s9 ; GCN-NEXT: s_mul_i32 s9, s19, s9 ; GCN-NEXT: s_add_u32 s9, s21, s9 -; GCN-NEXT: s_mul_hi_u32 s22, s19, s20 -; GCN-NEXT: s_addc_u32 s9, s11, s23 -; GCN-NEXT: s_addc_u32 s11, s22, 0 +; GCN-NEXT: s_addc_u32 s9, s11, s22 +; GCN-NEXT: s_mul_hi_u32 s11, s19, s20 +; GCN-NEXT: s_addc_u32 s11, s11, 0 ; GCN-NEXT: s_mul_i32 s20, s19, s20 ; GCN-NEXT: s_add_u32 s9, s9, s20 ; GCN-NEXT: s_addc_u32 s11, 0, s11 @@ -5373,37 +5373,37 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s16, s14, s15 ; GCN-NEXT: s_add_u32 s17, s19, s17 ; GCN-NEXT: s_addc_u32 s16, 0, s16 -; GCN-NEXT: s_mul_hi_u32 s20, s13, s18 +; GCN-NEXT: s_mul_hi_u32 s19, s13, s18 ; GCN-NEXT: s_mul_i32 s18, s13, s18 ; GCN-NEXT: s_add_u32 s17, s17, s18 -; GCN-NEXT: s_mul_hi_u32 s19, s13, s15 -; GCN-NEXT: s_addc_u32 s16, s16, s20 -; GCN-NEXT: s_addc_u32 s17, s19, 0 +; GCN-NEXT: s_addc_u32 s16, s16, s19 +; GCN-NEXT: s_mul_hi_u32 s17, s13, s15 +; GCN-NEXT: s_addc_u32 s17, s17, 0 ; GCN-NEXT: s_mul_i32 s15, s13, s15 ; GCN-NEXT: s_add_u32 s15, s16, s15 ; GCN-NEXT: s_addc_u32 s16, 0, s17 ; GCN-NEXT: s_add_u32 s14, s14, s15 ; GCN-NEXT: s_addc_u32 s13, s13, s16 -; GCN-NEXT: s_mul_i32 s15, s3, s13 -; GCN-NEXT: s_mul_hi_u32 s16, s3, s14 -; GCN-NEXT: s_add_i32 s15, s16, s15 +; GCN-NEXT: s_mul_i32 s15, s3, s14 +; GCN-NEXT: s_mul_i32 s18, s3, s13 +; GCN-NEXT: s_mul_hi_u32 s3, s3, s14 +; GCN-NEXT: s_add_i32 s3, s3, s18 ; GCN-NEXT: s_mul_i32 s12, s12, s14 -; GCN-NEXT: s_add_i32 s15, s15, s12 -; GCN-NEXT: s_mul_i32 s3, s3, s14 -; GCN-NEXT: s_mul_hi_u32 s16, s13, s3 -; GCN-NEXT: s_mul_i32 s17, s13, s3 -; GCN-NEXT: s_mul_i32 s19, s14, s15 -; GCN-NEXT: s_mul_hi_u32 s3, s14, s3 -; GCN-NEXT: s_mul_hi_u32 s18, s14, s15 -; GCN-NEXT: s_add_u32 s3, s3, s19 -; GCN-NEXT: s_addc_u32 s18, 0, s18 -; GCN-NEXT: s_add_u32 s3, s3, s17 -; GCN-NEXT: s_mul_hi_u32 s12, s13, s15 -; GCN-NEXT: s_addc_u32 s3, s18, s16 -; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s15, s13, s15 -; GCN-NEXT: s_add_u32 s3, s3, s15 +; GCN-NEXT: s_add_i32 s3, s3, s12 +; GCN-NEXT: s_mul_hi_u32 s16, s13, s15 +; GCN-NEXT: s_mul_i32 s17, s13, s15 +; GCN-NEXT: s_mul_i32 s18, s14, s3 +; GCN-NEXT: s_mul_hi_u32 s15, s14, s15 +; GCN-NEXT: s_mul_hi_u32 s12, s14, s3 +; GCN-NEXT: s_add_u32 s15, s15, s18 ; GCN-NEXT: s_addc_u32 s12, 0, s12 +; GCN-NEXT: s_add_u32 s15, s15, s17 +; GCN-NEXT: s_addc_u32 s12, s12, s16 +; GCN-NEXT: s_mul_hi_u32 s15, s13, s3 +; GCN-NEXT: s_addc_u32 s15, s15, 0 +; GCN-NEXT: s_mul_i32 s3, s13, s3 +; GCN-NEXT: s_add_u32 s3, s12, s3 +; GCN-NEXT: s_addc_u32 s12, 0, s15 ; GCN-NEXT: s_add_u32 s3, s14, s3 ; GCN-NEXT: s_addc_u32 s16, s13, s12 ; GCN-NEXT: s_ashr_i32 s12, s5, 31 @@ -5416,12 +5416,12 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; GCN-NEXT: s_mul_hi_u32 s5, s14, s16 ; GCN-NEXT: s_add_u32 s17, s18, s17 ; GCN-NEXT: s_addc_u32 s5, 0, s5 -; GCN-NEXT: s_mul_hi_u32 s19, s15, s3 +; GCN-NEXT: s_mul_hi_u32 s18, s15, s3 ; GCN-NEXT: s_mul_i32 s3, s15, s3 ; GCN-NEXT: s_add_u32 s3, s17, s3 -; GCN-NEXT: s_mul_hi_u32 s18, s15, s16 -; GCN-NEXT: s_addc_u32 s3, s5, s19 -; GCN-NEXT: s_addc_u32 s5, s18, 0 +; GCN-NEXT: s_addc_u32 s3, s5, s18 +; GCN-NEXT: s_mul_hi_u32 s5, s15, s16 +; GCN-NEXT: s_addc_u32 s5, s5, 0 ; GCN-NEXT: s_mul_i32 s16, s15, s16 ; GCN-NEXT: s_add_u32 s3, s3, s16 ; GCN-NEXT: s_addc_u32 s5, 0, s5 @@ -5556,25 +5556,25 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v22, vcc, 0, v23, vcc ; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v11, v21 ; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, v18, v22, vcc -; TAHITI-NEXT: v_mul_lo_u32 v21, v19, v18 -; TAHITI-NEXT: v_mul_hi_u32 v22, v19, v11 +; TAHITI-NEXT: v_mul_lo_u32 v21, v19, v11 +; TAHITI-NEXT: v_mul_lo_u32 v22, v19, v18 +; TAHITI-NEXT: v_mul_hi_u32 v19, v19, v11 ; TAHITI-NEXT: v_mul_lo_u32 v20, v20, v11 -; TAHITI-NEXT: v_mul_lo_u32 v19, v19, v11 -; TAHITI-NEXT: v_add_i32_e32 v21, vcc, v21, v22 -; TAHITI-NEXT: v_add_i32_e32 v20, vcc, v20, v21 -; TAHITI-NEXT: v_mul_lo_u32 v23, v11, v20 +; TAHITI-NEXT: v_mul_hi_u32 v23, v18, v21 +; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v22, v19 +; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v20, v19 +; TAHITI-NEXT: v_mul_lo_u32 v20, v11, v19 +; TAHITI-NEXT: v_mul_hi_u32 v22, v11, v21 ; TAHITI-NEXT: v_mul_hi_u32 v24, v11, v19 -; TAHITI-NEXT: v_mul_hi_u32 v25, v11, v20 -; TAHITI-NEXT: v_mul_hi_u32 v22, v18, v19 +; TAHITI-NEXT: v_mul_lo_u32 v21, v18, v21 +; TAHITI-NEXT: v_add_i32_e32 v20, vcc, v22, v20 +; TAHITI-NEXT: v_addc_u32_e32 v22, vcc, 0, v24, vcc +; TAHITI-NEXT: v_mul_hi_u32 v24, v18, v19 ; TAHITI-NEXT: v_mul_lo_u32 v19, v18, v19 -; TAHITI-NEXT: v_mul_hi_u32 v21, v18, v20 -; TAHITI-NEXT: v_add_i32_e32 v23, vcc, v24, v23 -; TAHITI-NEXT: v_addc_u32_e32 v24, vcc, 0, v25, vcc -; TAHITI-NEXT: v_mul_lo_u32 v20, v18, v20 -; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v23, v19 -; TAHITI-NEXT: v_addc_u32_e32 v19, vcc, v24, v22, vcc -; TAHITI-NEXT: v_addc_u32_e32 v21, vcc, 0, v21, vcc -; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v19, v20 +; TAHITI-NEXT: v_add_i32_e32 v20, vcc, v20, v21 +; TAHITI-NEXT: v_addc_u32_e32 v20, vcc, v22, v23, vcc +; TAHITI-NEXT: v_addc_u32_e32 v21, vcc, 0, v24, vcc +; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v20, v19 ; TAHITI-NEXT: v_addc_u32_e32 v20, vcc, 0, v21, vcc ; TAHITI-NEXT: v_add_i32_e32 v11, vcc, v11, v19 ; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, v18, v20, vcc @@ -5698,25 +5698,25 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v20, vcc, 0, v21, vcc ; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v13, v19 ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, v14, v20, vcc -; TAHITI-NEXT: v_mul_lo_u32 v19, v15, v14 -; TAHITI-NEXT: v_mul_hi_u32 v20, v15, v13 +; TAHITI-NEXT: v_mul_lo_u32 v19, v15, v13 +; TAHITI-NEXT: v_mul_lo_u32 v20, v15, v14 +; TAHITI-NEXT: v_mul_hi_u32 v15, v15, v13 ; TAHITI-NEXT: v_mul_lo_u32 v18, v18, v13 -; TAHITI-NEXT: v_mul_lo_u32 v15, v15, v13 -; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v19, v20 -; TAHITI-NEXT: v_add_i32_e32 v18, vcc, v18, v19 -; TAHITI-NEXT: v_mul_lo_u32 v21, v13, v18 +; TAHITI-NEXT: v_mul_hi_u32 v21, v14, v19 +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v20, v15 +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v18, v15 +; TAHITI-NEXT: v_mul_lo_u32 v18, v13, v15 +; TAHITI-NEXT: v_mul_hi_u32 v20, v13, v19 ; TAHITI-NEXT: v_mul_hi_u32 v22, v13, v15 -; TAHITI-NEXT: v_mul_hi_u32 v23, v13, v18 -; TAHITI-NEXT: v_mul_hi_u32 v20, v14, v15 +; TAHITI-NEXT: v_mul_lo_u32 v19, v14, v19 +; TAHITI-NEXT: v_add_i32_e32 v18, vcc, v20, v18 +; TAHITI-NEXT: v_addc_u32_e32 v20, vcc, 0, v22, vcc +; TAHITI-NEXT: v_mul_hi_u32 v22, v14, v15 ; TAHITI-NEXT: v_mul_lo_u32 v15, v14, v15 -; TAHITI-NEXT: v_mul_hi_u32 v19, v14, v18 -; TAHITI-NEXT: v_add_i32_e32 v21, vcc, v22, v21 -; TAHITI-NEXT: v_addc_u32_e32 v22, vcc, 0, v23, vcc -; TAHITI-NEXT: v_mul_lo_u32 v18, v14, v18 -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v21, v15 -; TAHITI-NEXT: v_addc_u32_e32 v15, vcc, v22, v20, vcc -; TAHITI-NEXT: v_addc_u32_e32 v19, vcc, 0, v19, vcc -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v15, v18 +; TAHITI-NEXT: v_add_i32_e32 v18, vcc, v18, v19 +; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, v20, v21, vcc +; TAHITI-NEXT: v_addc_u32_e32 v19, vcc, 0, v22, vcc +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v18, v15 ; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, 0, v19, vcc ; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, v14, v18, vcc @@ -5841,25 +5841,25 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, 0, v19, vcc ; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v13, v17 ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, v14, v18, vcc -; TAHITI-NEXT: v_mul_lo_u32 v17, v15, v14 -; TAHITI-NEXT: v_mul_hi_u32 v18, v15, v13 +; TAHITI-NEXT: v_mul_lo_u32 v17, v15, v13 +; TAHITI-NEXT: v_mul_lo_u32 v18, v15, v14 +; TAHITI-NEXT: v_mul_hi_u32 v15, v15, v13 ; TAHITI-NEXT: v_mul_lo_u32 v16, v16, v13 -; TAHITI-NEXT: v_mul_lo_u32 v15, v15, v13 -; TAHITI-NEXT: v_add_i32_e32 v17, vcc, v17, v18 -; TAHITI-NEXT: v_add_i32_e32 v16, vcc, v16, v17 -; TAHITI-NEXT: v_mul_lo_u32 v19, v13, v16 +; TAHITI-NEXT: v_mul_hi_u32 v19, v14, v17 +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v18, v15 +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v16, v15 +; TAHITI-NEXT: v_mul_lo_u32 v16, v13, v15 +; TAHITI-NEXT: v_mul_hi_u32 v18, v13, v17 ; TAHITI-NEXT: v_mul_hi_u32 v20, v13, v15 -; TAHITI-NEXT: v_mul_hi_u32 v21, v13, v16 -; TAHITI-NEXT: v_mul_hi_u32 v18, v14, v15 +; TAHITI-NEXT: v_mul_lo_u32 v17, v14, v17 +; TAHITI-NEXT: v_add_i32_e32 v16, vcc, v18, v16 +; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, 0, v20, vcc +; TAHITI-NEXT: v_mul_hi_u32 v20, v14, v15 ; TAHITI-NEXT: v_mul_lo_u32 v15, v14, v15 -; TAHITI-NEXT: v_mul_hi_u32 v17, v14, v16 -; TAHITI-NEXT: v_add_i32_e32 v19, vcc, v20, v19 -; TAHITI-NEXT: v_addc_u32_e32 v20, vcc, 0, v21, vcc -; TAHITI-NEXT: v_mul_lo_u32 v16, v14, v16 -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v19, v15 -; TAHITI-NEXT: v_addc_u32_e32 v15, vcc, v20, v18, vcc -; TAHITI-NEXT: v_addc_u32_e32 v17, vcc, 0, v17, vcc -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v15, v16 +; TAHITI-NEXT: v_add_i32_e32 v16, vcc, v16, v17 +; TAHITI-NEXT: v_addc_u32_e32 v16, vcc, v18, v19, vcc +; TAHITI-NEXT: v_addc_u32_e32 v17, vcc, 0, v20, vcc +; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v16, v15 ; TAHITI-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc ; TAHITI-NEXT: v_add_i32_e32 v13, vcc, v13, v15 ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc @@ -5983,25 +5983,25 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TAHITI-NEXT: v_addc_u32_e32 v16, vcc, 0, v17, vcc ; TAHITI-NEXT: v_add_i32_e32 v3, vcc, v3, v15 ; TAHITI-NEXT: v_addc_u32_e32 v4, vcc, v4, v16, vcc -; TAHITI-NEXT: v_mul_lo_u32 v15, v5, v4 -; TAHITI-NEXT: v_mul_hi_u32 v16, v5, v3 +; TAHITI-NEXT: v_mul_lo_u32 v15, v5, v3 +; TAHITI-NEXT: v_mul_lo_u32 v16, v5, v4 +; TAHITI-NEXT: v_mul_hi_u32 v5, v5, v3 ; TAHITI-NEXT: v_mul_lo_u32 v14, v14, v3 -; TAHITI-NEXT: v_mul_lo_u32 v5, v5, v3 -; TAHITI-NEXT: v_add_i32_e32 v15, vcc, v15, v16 -; TAHITI-NEXT: v_add_i32_e32 v14, vcc, v14, v15 -; TAHITI-NEXT: v_mul_lo_u32 v17, v3, v14 +; TAHITI-NEXT: v_mul_hi_u32 v17, v4, v15 +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v16, v5 +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v14, v5 +; TAHITI-NEXT: v_mul_lo_u32 v14, v3, v5 +; TAHITI-NEXT: v_mul_hi_u32 v16, v3, v15 ; TAHITI-NEXT: v_mul_hi_u32 v18, v3, v5 -; TAHITI-NEXT: v_mul_hi_u32 v19, v3, v14 -; TAHITI-NEXT: v_mul_hi_u32 v16, v4, v5 +; TAHITI-NEXT: v_mul_lo_u32 v15, v4, v15 +; TAHITI-NEXT: v_add_i32_e32 v14, vcc, v16, v14 +; TAHITI-NEXT: v_addc_u32_e32 v16, vcc, 0, v18, vcc +; TAHITI-NEXT: v_mul_hi_u32 v18, v4, v5 ; TAHITI-NEXT: v_mul_lo_u32 v5, v4, v5 -; TAHITI-NEXT: v_mul_hi_u32 v15, v4, v14 -; TAHITI-NEXT: v_add_i32_e32 v17, vcc, v18, v17 -; TAHITI-NEXT: v_addc_u32_e32 v18, vcc, 0, v19, vcc -; TAHITI-NEXT: v_mul_lo_u32 v14, v4, v14 -; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v17, v5 -; TAHITI-NEXT: v_addc_u32_e32 v5, vcc, v18, v16, vcc -; TAHITI-NEXT: v_addc_u32_e32 v15, vcc, 0, v15, vcc -; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v5, v14 +; TAHITI-NEXT: v_add_i32_e32 v14, vcc, v14, v15 +; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, v16, v17, vcc +; TAHITI-NEXT: v_addc_u32_e32 v15, vcc, 0, v18, vcc +; TAHITI-NEXT: v_add_i32_e32 v5, vcc, v14, v5 ; TAHITI-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc ; TAHITI-NEXT: v_add_i32_e32 v3, vcc, v3, v5 ; TAHITI-NEXT: v_addc_u32_e32 v4, vcc, v4, v14, vcc @@ -6163,28 +6163,28 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v19, vcc ; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v18 ; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; TONGA-NEXT: v_add_u32_e32 v20, vcc, v20, v8 +; TONGA-NEXT: v_add_u32_e32 v18, vcc, v20, v8 ; TONGA-NEXT: v_addc_u32_e32 v11, vcc, v11, v9, vcc -; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], s1, v20, 0 +; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], s1, v18, 0 ; TONGA-NEXT: v_mul_lo_u32 v10, s1, v11 -; TONGA-NEXT: v_mul_lo_u32 v14, s10, v20 +; TONGA-NEXT: v_mul_lo_u32 v14, s10, v18 ; TONGA-NEXT: s_ashr_i32 s10, s3, 31 -; TONGA-NEXT: v_mul_hi_u32 v21, v20, v8 +; TONGA-NEXT: v_mul_hi_u32 v20, v18, v8 ; TONGA-NEXT: v_add_u32_e32 v9, vcc, v10, v9 -; TONGA-NEXT: v_add_u32_e32 v18, vcc, v14, v9 -; TONGA-NEXT: v_mad_u64_u32 v[9:10], s[8:9], v20, v18, 0 +; TONGA-NEXT: v_add_u32_e32 v19, vcc, v14, v9 +; TONGA-NEXT: v_mad_u64_u32 v[9:10], s[8:9], v18, v19, 0 ; TONGA-NEXT: v_mad_u64_u32 v[14:15], s[8:9], v11, v8, 0 -; TONGA-NEXT: v_mad_u64_u32 v[18:19], s[8:9], v11, v18, 0 -; TONGA-NEXT: s_add_u32 s8, s2, s10 -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v21, v9 -; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v14 -; TONGA-NEXT: v_addc_u32_e32 v8, vcc, v9, v15, vcc -; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v19, vcc -; TONGA-NEXT: v_add_u32_e32 v8, vcc, v8, v18 -; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc -; TONGA-NEXT: v_add_u32_e32 v10, vcc, v20, v8 ; TONGA-NEXT: s_mov_b32 s11, s10 +; TONGA-NEXT: v_add_u32_e32 v20, vcc, v20, v9 +; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], v11, v19, 0 +; TONGA-NEXT: v_addc_u32_e32 v10, vcc, 0, v10, vcc +; TONGA-NEXT: v_add_u32_e32 v14, vcc, v20, v14 +; TONGA-NEXT: v_addc_u32_e32 v10, vcc, v10, v15, vcc +; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; TONGA-NEXT: v_add_u32_e32 v8, vcc, v10, v8 +; TONGA-NEXT: v_addc_u32_e32 v9, vcc, 0, v9, vcc +; TONGA-NEXT: s_add_u32 s8, s2, s10 +; TONGA-NEXT: v_add_u32_e32 v10, vcc, v18, v8 ; TONGA-NEXT: s_addc_u32 s9, s3, s10 ; TONGA-NEXT: v_addc_u32_e32 v11, vcc, v11, v9, vcc ; TONGA-NEXT: s_xor_b64 s[12:13], s[8:9], s[10:11] @@ -6192,18 +6192,18 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_mul_hi_u32 v14, s12, v10 ; TONGA-NEXT: v_readfirstlane_b32 s1, v9 ; TONGA-NEXT: v_readfirstlane_b32 s3, v8 -; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], s13, v11, 0 -; TONGA-NEXT: v_mad_u64_u32 v[10:11], s[8:9], s13, v10, 0 -; TONGA-NEXT: v_readfirstlane_b32 s14, v14 -; TONGA-NEXT: s_add_u32 s3, s14, s3 +; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], s13, v10, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v14 +; TONGA-NEXT: s_add_u32 s3, s8, s3 ; TONGA-NEXT: s_addc_u32 s1, 0, s1 -; TONGA-NEXT: v_readfirstlane_b32 s14, v10 -; TONGA-NEXT: v_readfirstlane_b32 s9, v11 -; TONGA-NEXT: s_add_u32 s3, s3, s14 -; TONGA-NEXT: v_readfirstlane_b32 s8, v9 -; TONGA-NEXT: s_addc_u32 s1, s1, s9 -; TONGA-NEXT: s_addc_u32 s3, s8, 0 +; TONGA-NEXT: v_readfirstlane_b32 s14, v9 +; TONGA-NEXT: v_mad_u64_u32 v[9:10], s[8:9], s13, v11, 0 ; TONGA-NEXT: v_readfirstlane_b32 s8, v8 +; TONGA-NEXT: s_add_u32 s3, s3, s8 +; TONGA-NEXT: s_addc_u32 s1, s1, s14 +; TONGA-NEXT: v_readfirstlane_b32 s3, v10 +; TONGA-NEXT: s_addc_u32 s3, s3, 0 +; TONGA-NEXT: v_readfirstlane_b32 s8, v9 ; TONGA-NEXT: s_add_u32 s1, s1, s8 ; TONGA-NEXT: v_mov_b32_e32 v8, s1 ; TONGA-NEXT: v_mad_u64_u32 v[8:9], s[8:9], s6, v8, 0 @@ -6313,26 +6313,26 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v15, v13 ; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc -; TONGA-NEXT: v_add_u32_e32 v24, vcc, v21, v13 -; TONGA-NEXT: v_addc_u32_e32 v25, vcc, v20, v14, vcc -; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v22, v24, 0 -; TONGA-NEXT: v_mul_lo_u32 v15, v22, v25 -; TONGA-NEXT: v_mul_lo_u32 v18, v23, v24 -; TONGA-NEXT: v_mul_hi_u32 v22, v24, v13 +; TONGA-NEXT: v_add_u32_e32 v21, vcc, v21, v13 +; TONGA-NEXT: v_addc_u32_e32 v20, vcc, v20, v14, vcc +; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v22, v21, 0 +; TONGA-NEXT: v_mul_lo_u32 v15, v22, v20 +; TONGA-NEXT: v_mul_lo_u32 v18, v23, v21 +; TONGA-NEXT: v_mul_hi_u32 v23, v21, v13 ; TONGA-NEXT: v_add_u32_e32 v14, vcc, v15, v14 -; TONGA-NEXT: v_add_u32_e32 v20, vcc, v18, v14 -; TONGA-NEXT: v_mad_u64_u32 v[14:15], s[0:1], v24, v20, 0 -; TONGA-NEXT: v_mad_u64_u32 v[18:19], s[0:1], v25, v13, 0 -; TONGA-NEXT: v_mad_u64_u32 v[20:21], s[0:1], v25, v20, 0 -; TONGA-NEXT: v_add_u32_e32 v13, vcc, v22, v14 -; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v15, vcc -; TONGA-NEXT: v_add_u32_e32 v13, vcc, v13, v18 -; TONGA-NEXT: v_addc_u32_e32 v13, vcc, v14, v19, vcc -; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v21, vcc -; TONGA-NEXT: v_add_u32_e32 v13, vcc, v13, v20 +; TONGA-NEXT: v_add_u32_e32 v22, vcc, v18, v14 +; TONGA-NEXT: v_mad_u64_u32 v[14:15], s[0:1], v21, v22, 0 +; TONGA-NEXT: v_mad_u64_u32 v[18:19], s[0:1], v20, v13, 0 +; TONGA-NEXT: v_add_u32_e32 v23, vcc, v23, v14 +; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v20, v22, 0 +; TONGA-NEXT: v_addc_u32_e32 v15, vcc, 0, v15, vcc +; TONGA-NEXT: v_add_u32_e32 v18, vcc, v23, v18 +; TONGA-NEXT: v_addc_u32_e32 v15, vcc, v15, v19, vcc ; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc -; TONGA-NEXT: v_add_u32_e32 v15, vcc, v24, v13 -; TONGA-NEXT: v_addc_u32_e32 v19, vcc, v25, v14, vcc +; TONGA-NEXT: v_add_u32_e32 v13, vcc, v15, v13 +; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc +; TONGA-NEXT: v_add_u32_e32 v15, vcc, v21, v13 +; TONGA-NEXT: v_addc_u32_e32 v19, vcc, v20, v14, vcc ; TONGA-NEXT: v_ashrrev_i32_e32 v20, 31, v17 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v16, v20 ; TONGA-NEXT: v_xor_b32_e32 v21, v13, v20 @@ -6414,11 +6414,11 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_ashrrev_i32_e32 v12, 31, v1 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v0, v12 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, v1, v12, vcc -; TONGA-NEXT: v_xor_b32_e32 v19, v13, v12 +; TONGA-NEXT: v_xor_b32_e32 v17, v13, v12 ; TONGA-NEXT: v_xor_b32_e32 v1, v1, v12 -; TONGA-NEXT: v_cvt_f32_u32_e32 v12, v19 +; TONGA-NEXT: v_cvt_f32_u32_e32 v12, v17 ; TONGA-NEXT: v_cvt_f32_u32_e32 v13, v1 -; TONGA-NEXT: v_sub_u32_e32 v20, vcc, 0, v19 +; TONGA-NEXT: v_sub_u32_e32 v20, vcc, 0, v17 ; TONGA-NEXT: v_subb_u32_e32 v21, vcc, 0, v1, vcc ; TONGA-NEXT: v_madmk_f32 v12, v13, 0x4f800000, v12 ; TONGA-NEXT: v_rcp_f32_e32 v12, v12 @@ -6426,51 +6426,51 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_mul_f32_e32 v13, 0x2f800000, v12 ; TONGA-NEXT: v_trunc_f32_e32 v13, v13 ; TONGA-NEXT: v_madmk_f32 v12, v13, 0xcf800000, v12 -; TONGA-NEXT: v_cvt_u32_f32_e32 v17, v13 -; TONGA-NEXT: v_cvt_u32_f32_e32 v18, v12 -; TONGA-NEXT: v_mul_lo_u32 v14, v20, v17 -; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v20, v18, 0 -; TONGA-NEXT: v_mul_lo_u32 v15, v21, v18 +; TONGA-NEXT: v_cvt_u32_f32_e32 v18, v13 +; TONGA-NEXT: v_cvt_u32_f32_e32 v19, v12 +; TONGA-NEXT: v_mul_lo_u32 v14, v20, v18 +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v20, v19, 0 +; TONGA-NEXT: v_mul_lo_u32 v15, v21, v19 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v13, v14 ; TONGA-NEXT: v_add_u32_e32 v22, vcc, v13, v15 -; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v18, v22, 0 -; TONGA-NEXT: v_mul_hi_u32 v23, v18, v12 -; TONGA-NEXT: v_mad_u64_u32 v[15:16], s[0:1], v17, v12, 0 +; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v19, v22, 0 +; TONGA-NEXT: v_mul_hi_u32 v23, v19, v12 +; TONGA-NEXT: v_mad_u64_u32 v[15:16], s[0:1], v18, v12, 0 ; TONGA-NEXT: v_add_u32_e32 v23, vcc, v23, v13 -; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v17, v22, 0 +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v18, v22, 0 ; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc ; TONGA-NEXT: v_add_u32_e32 v15, vcc, v23, v15 ; TONGA-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc ; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc ; TONGA-NEXT: v_add_u32_e32 v12, vcc, v14, v12 ; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc -; TONGA-NEXT: v_add_u32_e32 v22, vcc, v18, v12 -; TONGA-NEXT: v_addc_u32_e32 v23, vcc, v17, v13, vcc -; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v20, v22, 0 -; TONGA-NEXT: v_mul_lo_u32 v14, v20, v23 -; TONGA-NEXT: v_mul_lo_u32 v15, v21, v22 -; TONGA-NEXT: v_mul_hi_u32 v20, v22, v12 +; TONGA-NEXT: v_add_u32_e32 v19, vcc, v19, v12 +; TONGA-NEXT: v_addc_u32_e32 v18, vcc, v18, v13, vcc +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v20, v19, 0 +; TONGA-NEXT: v_mul_lo_u32 v14, v20, v18 +; TONGA-NEXT: v_mul_lo_u32 v15, v21, v19 +; TONGA-NEXT: v_mul_hi_u32 v21, v19, v12 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v14, v13 -; TONGA-NEXT: v_add_u32_e32 v17, vcc, v15, v13 -; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v22, v17, 0 -; TONGA-NEXT: v_mad_u64_u32 v[15:16], s[0:1], v23, v12, 0 -; TONGA-NEXT: v_mad_u64_u32 v[17:18], s[0:1], v23, v17, 0 -; TONGA-NEXT: v_add_u32_e32 v12, vcc, v20, v13 -; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v14, vcc -; TONGA-NEXT: v_add_u32_e32 v12, vcc, v12, v15 -; TONGA-NEXT: v_addc_u32_e32 v12, vcc, v13, v16, vcc -; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v18, vcc -; TONGA-NEXT: v_add_u32_e32 v12, vcc, v12, v17 +; TONGA-NEXT: v_add_u32_e32 v20, vcc, v15, v13 +; TONGA-NEXT: v_mad_u64_u32 v[13:14], s[0:1], v19, v20, 0 +; TONGA-NEXT: v_mad_u64_u32 v[15:16], s[0:1], v18, v12, 0 +; TONGA-NEXT: v_add_u32_e32 v21, vcc, v21, v13 +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v18, v20, 0 +; TONGA-NEXT: v_addc_u32_e32 v14, vcc, 0, v14, vcc +; TONGA-NEXT: v_add_u32_e32 v15, vcc, v21, v15 +; TONGA-NEXT: v_addc_u32_e32 v14, vcc, v14, v16, vcc +; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc +; TONGA-NEXT: v_add_u32_e32 v12, vcc, v14, v12 ; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc -; TONGA-NEXT: v_add_u32_e32 v14, vcc, v22, v12 -; TONGA-NEXT: v_addc_u32_e32 v16, vcc, v23, v13, vcc -; TONGA-NEXT: v_ashrrev_i32_e32 v17, 31, v5 -; TONGA-NEXT: v_add_u32_e32 v12, vcc, v4, v17 -; TONGA-NEXT: v_xor_b32_e32 v18, v12, v17 -; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v18, v16, 0 -; TONGA-NEXT: v_mul_hi_u32 v20, v18, v14 -; TONGA-NEXT: v_addc_u32_e32 v5, vcc, v5, v17, vcc -; TONGA-NEXT: v_xor_b32_e32 v5, v5, v17 +; TONGA-NEXT: v_add_u32_e32 v14, vcc, v19, v12 +; TONGA-NEXT: v_addc_u32_e32 v16, vcc, v18, v13, vcc +; TONGA-NEXT: v_ashrrev_i32_e32 v18, 31, v5 +; TONGA-NEXT: v_add_u32_e32 v12, vcc, v4, v18 +; TONGA-NEXT: v_xor_b32_e32 v19, v12, v18 +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v19, v16, 0 +; TONGA-NEXT: v_mul_hi_u32 v20, v19, v14 +; TONGA-NEXT: v_addc_u32_e32 v5, vcc, v5, v18, vcc +; TONGA-NEXT: v_xor_b32_e32 v5, v5, v18 ; TONGA-NEXT: v_mad_u64_u32 v[14:15], s[0:1], v5, v14, 0 ; TONGA-NEXT: v_add_u32_e32 v20, vcc, v20, v12 ; TONGA-NEXT: v_addc_u32_e32 v21, vcc, 0, v13, vcc @@ -6480,30 +6480,30 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v13, vcc, 0, v13, vcc ; TONGA-NEXT: v_add_u32_e32 v14, vcc, v14, v12 ; TONGA-NEXT: v_addc_u32_e32 v12, vcc, 0, v13, vcc -; TONGA-NEXT: v_mul_lo_u32 v15, v19, v12 -; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v19, v14, 0 +; TONGA-NEXT: v_mul_lo_u32 v15, v17, v12 +; TONGA-NEXT: v_mad_u64_u32 v[12:13], s[0:1], v17, v14, 0 ; TONGA-NEXT: v_mul_lo_u32 v14, v1, v14 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v15, v13 ; TONGA-NEXT: v_add_u32_e32 v13, vcc, v14, v13 ; TONGA-NEXT: v_sub_u32_e32 v14, vcc, v5, v13 -; TONGA-NEXT: v_sub_u32_e32 v12, vcc, v18, v12 +; TONGA-NEXT: v_sub_u32_e32 v12, vcc, v19, v12 ; TONGA-NEXT: v_subb_u32_e64 v14, s[0:1], v14, v1, vcc -; TONGA-NEXT: v_sub_u32_e64 v15, s[0:1], v12, v19 +; TONGA-NEXT: v_sub_u32_e64 v15, s[0:1], v12, v17 ; TONGA-NEXT: v_subbrev_u32_e64 v16, s[2:3], 0, v14, s[0:1] ; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v16, v1 -; TONGA-NEXT: v_cndmask_b32_e64 v18, 0, -1, s[2:3] -; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v15, v19 +; TONGA-NEXT: v_cndmask_b32_e64 v19, 0, -1, s[2:3] +; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v15, v17 ; TONGA-NEXT: v_cndmask_b32_e64 v20, 0, -1, s[2:3] ; TONGA-NEXT: v_cmp_eq_u32_e64 s[2:3], v16, v1 ; TONGA-NEXT: v_subb_u32_e64 v14, s[0:1], v14, v1, s[0:1] -; TONGA-NEXT: v_cndmask_b32_e64 v18, v18, v20, s[2:3] -; TONGA-NEXT: v_sub_u32_e64 v20, s[0:1], v15, v19 +; TONGA-NEXT: v_cndmask_b32_e64 v19, v19, v20, s[2:3] +; TONGA-NEXT: v_sub_u32_e64 v20, s[0:1], v15, v17 ; TONGA-NEXT: v_subb_u32_e32 v5, vcc, v5, v13, vcc ; TONGA-NEXT: v_subbrev_u32_e64 v14, s[0:1], 0, v14, s[0:1] ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v5, v1 -; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v18 +; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v19 ; TONGA-NEXT: v_cndmask_b32_e64 v13, 0, -1, vcc -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v12, v19 +; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v12, v17 ; TONGA-NEXT: v_cndmask_b32_e64 v14, v16, v14, s[0:1] ; TONGA-NEXT: v_cndmask_b32_e64 v16, 0, -1, vcc ; TONGA-NEXT: v_cmp_eq_u32_e32 vcc, v5, v1 @@ -6512,10 +6512,10 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 ; TONGA-NEXT: v_cndmask_b32_e32 v1, v5, v14, vcc ; TONGA-NEXT: v_cndmask_b32_e32 v5, v12, v15, vcc -; TONGA-NEXT: v_xor_b32_e32 v5, v5, v17 -; TONGA-NEXT: v_xor_b32_e32 v1, v1, v17 -; TONGA-NEXT: v_sub_u32_e32 v12, vcc, v5, v17 -; TONGA-NEXT: v_subb_u32_e32 v13, vcc, v1, v17, vcc +; TONGA-NEXT: v_xor_b32_e32 v5, v5, v18 +; TONGA-NEXT: v_xor_b32_e32 v1, v1, v18 +; TONGA-NEXT: v_sub_u32_e32 v12, vcc, v5, v18 +; TONGA-NEXT: v_subb_u32_e32 v13, vcc, v1, v18, vcc ; TONGA-NEXT: s_cbranch_execnz .LBB12_11 ; TONGA-NEXT: .LBB12_10: ; TONGA-NEXT: v_cvt_f32_u32_e32 v1, v0 @@ -6545,66 +6545,66 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v2, v0 ; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v3, v0, vcc ; TONGA-NEXT: v_xor_b32_e32 v5, v1, v0 -; TONGA-NEXT: v_xor_b32_e32 v16, v3, v0 +; TONGA-NEXT: v_xor_b32_e32 v14, v3, v0 ; TONGA-NEXT: v_cvt_f32_u32_e32 v0, v5 -; TONGA-NEXT: v_cvt_f32_u32_e32 v1, v16 +; TONGA-NEXT: v_cvt_f32_u32_e32 v1, v14 ; TONGA-NEXT: v_sub_u32_e32 v17, vcc, 0, v5 -; TONGA-NEXT: v_subb_u32_e32 v18, vcc, 0, v16, vcc +; TONGA-NEXT: v_subb_u32_e32 v18, vcc, 0, v14, vcc ; TONGA-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; TONGA-NEXT: v_rcp_f32_e32 v0, v0 ; TONGA-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; TONGA-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; TONGA-NEXT: v_trunc_f32_e32 v1, v1 ; TONGA-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0 -; TONGA-NEXT: v_cvt_u32_f32_e32 v14, v1 -; TONGA-NEXT: v_cvt_u32_f32_e32 v15, v0 -; TONGA-NEXT: v_mul_lo_u32 v3, v17, v14 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v17, v15, 0 -; TONGA-NEXT: v_mul_lo_u32 v4, v18, v15 +; TONGA-NEXT: v_cvt_u32_f32_e32 v15, v1 +; TONGA-NEXT: v_cvt_u32_f32_e32 v16, v0 +; TONGA-NEXT: v_mul_lo_u32 v3, v17, v15 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v17, v16, 0 +; TONGA-NEXT: v_mul_lo_u32 v4, v18, v16 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v1, v3 ; TONGA-NEXT: v_add_u32_e32 v19, vcc, v1, v4 -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v15, v19, 0 -; TONGA-NEXT: v_mul_hi_u32 v20, v15, v0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v14, v0, 0 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v16, v19, 0 +; TONGA-NEXT: v_mul_hi_u32 v20, v16, v0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v15, v0, 0 ; TONGA-NEXT: v_add_u32_e32 v20, vcc, v20, v3 ; TONGA-NEXT: v_addc_u32_e32 v21, vcc, 0, v4, vcc -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v14, v19, 0 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v15, v19, 0 ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v20, v0 ; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v21, v1, vcc ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc ; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v19, vcc, v15, v0 -; TONGA-NEXT: v_addc_u32_e32 v20, vcc, v14, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v17, v19, 0 -; TONGA-NEXT: v_mul_lo_u32 v3, v17, v20 -; TONGA-NEXT: v_mul_lo_u32 v4, v18, v19 -; TONGA-NEXT: v_mul_hi_u32 v17, v19, v0 +; TONGA-NEXT: v_add_u32_e32 v16, vcc, v16, v0 +; TONGA-NEXT: v_addc_u32_e32 v15, vcc, v15, v1, vcc +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v17, v16, 0 +; TONGA-NEXT: v_mul_lo_u32 v3, v17, v15 +; TONGA-NEXT: v_mul_lo_u32 v4, v18, v16 +; TONGA-NEXT: v_mul_hi_u32 v18, v16, v0 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v3, v1 -; TONGA-NEXT: v_add_u32_e32 v14, vcc, v4, v1 -; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v19, v14, 0 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v20, v0, 0 -; TONGA-NEXT: v_mad_u64_u32 v[14:15], s[0:1], v20, v14, 0 -; TONGA-NEXT: v_add_u32_e32 v3, vcc, v17, v3 -; TONGA-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v3, v0 -; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v4, v1, vcc -; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v15, vcc -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v14 +; TONGA-NEXT: v_add_u32_e32 v17, vcc, v4, v1 +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v16, v17, 0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v15, v0, 0 +; TONGA-NEXT: v_add_u32_e32 v18, vcc, v18, v3 +; TONGA-NEXT: v_addc_u32_e32 v19, vcc, 0, v4, vcc +; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v15, v17, 0 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v18, v0 +; TONGA-NEXT: v_addc_u32_e32 v0, vcc, v19, v1, vcc +; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v4, vcc +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v0, v3 ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc -; TONGA-NEXT: v_add_u32_e32 v3, vcc, v19, v0 -; TONGA-NEXT: v_addc_u32_e32 v14, vcc, v20, v1, vcc -; TONGA-NEXT: v_ashrrev_i32_e32 v15, 31, v7 -; TONGA-NEXT: v_add_u32_e32 v0, vcc, v6, v15 -; TONGA-NEXT: v_addc_u32_e32 v4, vcc, v7, v15, vcc -; TONGA-NEXT: v_xor_b32_e32 v7, v0, v15 -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v7, v14, 0 +; TONGA-NEXT: v_add_u32_e32 v3, vcc, v16, v0 +; TONGA-NEXT: v_addc_u32_e32 v15, vcc, v15, v1, vcc +; TONGA-NEXT: v_ashrrev_i32_e32 v16, 31, v7 +; TONGA-NEXT: v_add_u32_e32 v0, vcc, v6, v16 +; TONGA-NEXT: v_addc_u32_e32 v4, vcc, v7, v16, vcc +; TONGA-NEXT: v_xor_b32_e32 v7, v0, v16 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v7, v15, 0 ; TONGA-NEXT: v_mul_hi_u32 v17, v7, v3 -; TONGA-NEXT: v_xor_b32_e32 v18, v4, v15 +; TONGA-NEXT: v_xor_b32_e32 v18, v4, v16 ; TONGA-NEXT: v_mad_u64_u32 v[3:4], s[0:1], v18, v3, 0 ; TONGA-NEXT: v_add_u32_e32 v17, vcc, v17, v0 ; TONGA-NEXT: v_addc_u32_e32 v19, vcc, 0, v1, vcc -; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v18, v14, 0 +; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v18, v15, 0 ; TONGA-NEXT: v_add_u32_e32 v3, vcc, v17, v3 ; TONGA-NEXT: v_addc_u32_e32 v3, vcc, v19, v4, vcc ; TONGA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc @@ -6612,40 +6612,40 @@ define amdgpu_kernel void @srem_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %i ; TONGA-NEXT: v_addc_u32_e32 v0, vcc, 0, v1, vcc ; TONGA-NEXT: v_mul_lo_u32 v4, v5, v0 ; TONGA-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v5, v3, 0 -; TONGA-NEXT: v_mul_lo_u32 v3, v16, v3 +; TONGA-NEXT: v_mul_lo_u32 v3, v14, v3 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v4, v1 ; TONGA-NEXT: v_add_u32_e32 v1, vcc, v3, v1 ; TONGA-NEXT: v_sub_u32_e32 v3, vcc, v18, v1 ; TONGA-NEXT: v_sub_u32_e32 v0, vcc, v7, v0 -; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v16, vcc +; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v14, vcc ; TONGA-NEXT: v_sub_u32_e64 v4, s[0:1], v0, v5 ; TONGA-NEXT: v_subbrev_u32_e64 v7, s[2:3], 0, v3, s[0:1] -; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v7, v16 -; TONGA-NEXT: v_cndmask_b32_e64 v14, 0, -1, s[2:3] +; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v7, v14 +; TONGA-NEXT: v_cndmask_b32_e64 v15, 0, -1, s[2:3] ; TONGA-NEXT: v_cmp_ge_u32_e64 s[2:3], v4, v5 ; TONGA-NEXT: v_cndmask_b32_e64 v17, 0, -1, s[2:3] -; TONGA-NEXT: v_cmp_eq_u32_e64 s[2:3], v7, v16 -; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v16, s[0:1] -; TONGA-NEXT: v_cndmask_b32_e64 v14, v14, v17, s[2:3] +; TONGA-NEXT: v_cmp_eq_u32_e64 s[2:3], v7, v14 +; TONGA-NEXT: v_subb_u32_e64 v3, s[0:1], v3, v14, s[0:1] +; TONGA-NEXT: v_cndmask_b32_e64 v15, v15, v17, s[2:3] ; TONGA-NEXT: v_sub_u32_e64 v17, s[0:1], v4, v5 ; TONGA-NEXT: v_subbrev_u32_e64 v3, s[0:1], 0, v3, s[0:1] ; TONGA-NEXT: v_subb_u32_e32 v1, vcc, v18, v1, vcc -; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v14 -; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v16 +; TONGA-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, v15 +; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v1, v14 ; TONGA-NEXT: v_cndmask_b32_e64 v3, v7, v3, s[0:1] ; TONGA-NEXT: v_cndmask_b32_e64 v7, 0, -1, vcc ; TONGA-NEXT: v_cmp_ge_u32_e32 vcc, v0, v5 ; TONGA-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc -; TONGA-NEXT: v_cmp_eq_u32_e32 vcc, v1, v16 +; TONGA-NEXT: v_cmp_eq_u32_e32 vcc, v1, v14 ; TONGA-NEXT: v_cndmask_b32_e32 v5, v7, v5, vcc ; TONGA-NEXT: v_cndmask_b32_e64 v4, v4, v17, s[0:1] ; TONGA-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 ; TONGA-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc ; TONGA-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc -; TONGA-NEXT: v_xor_b32_e32 v0, v0, v15 -; TONGA-NEXT: v_xor_b32_e32 v1, v1, v15 -; TONGA-NEXT: v_sub_u32_e32 v14, vcc, v0, v15 -; TONGA-NEXT: v_subb_u32_e32 v15, vcc, v1, v15, vcc +; TONGA-NEXT: v_xor_b32_e32 v0, v0, v16 +; TONGA-NEXT: v_xor_b32_e32 v1, v1, v16 +; TONGA-NEXT: v_sub_u32_e32 v14, vcc, v0, v16 +; TONGA-NEXT: v_subb_u32_e32 v15, vcc, v1, v16, vcc ; TONGA-NEXT: s_cbranch_execnz .LBB12_14 ; TONGA-NEXT: .LBB12_13: ; TONGA-NEXT: v_cvt_f32_u32_e32 v0, v2 diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll index 661e2b391af3c..c7b2f0d31e0d1 100644 --- a/llvm/test/CodeGen/AMDGPU/srem64.ll +++ b/llvm/test/CodeGen/AMDGPU/srem64.ll @@ -8,6 +8,7 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd ; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8 ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9 @@ -23,60 +24,60 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s11, s0, s10 -; GCN-NEXT: v_readfirstlane_b32 s14, v2 -; GCN-NEXT: s_mul_i32 s12, s1, s2 -; GCN-NEXT: s_mul_i32 s13, s0, s2 -; GCN-NEXT: s_add_i32 s11, s14, s11 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 -; GCN-NEXT: s_add_i32 s11, s11, s12 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s11 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s13 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_mul_i32 s15, s2, s11 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s11 -; GCN-NEXT: s_add_u32 s12, s12, s15 +; GCN-NEXT: v_readfirstlane_b32 s11, v0 +; GCN-NEXT: s_mul_i32 s12, s0, s10 +; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: s_mul_i32 s13, s1, s11 +; GCN-NEXT: s_mul_i32 s14, s0, s11 +; GCN-NEXT: s_add_i32 s12, s15, s12 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s14 +; GCN-NEXT: s_add_i32 s12, s12, s13 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s12 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s14 +; GCN-NEXT: v_readfirstlane_b32 s13, v3 +; GCN-NEXT: s_mul_i32 s15, s11, s12 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s12 +; GCN-NEXT: s_add_u32 s13, s13, s15 ; GCN-NEXT: v_readfirstlane_b32 s15, v0 -; GCN-NEXT: s_mul_i32 s13, s10, s13 +; GCN-NEXT: s_mul_i32 s14, s10, s14 ; GCN-NEXT: s_addc_u32 s15, 0, s15 -; GCN-NEXT: v_readfirstlane_b32 s14, v4 -; GCN-NEXT: s_add_u32 s12, s12, s13 -; GCN-NEXT: s_addc_u32 s12, s15, s14 +; GCN-NEXT: v_readfirstlane_b32 s16, v4 +; GCN-NEXT: s_add_u32 s13, s13, s14 +; GCN-NEXT: s_addc_u32 s13, s15, s16 +; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: s_addc_u32 s14, s14, 0 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s13, 0, s14 +; GCN-NEXT: s_add_u32 s11, s11, s12 +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: v_mul_hi_u32 v1, s0, v1 +; GCN-NEXT: s_addc_u32 s10, s10, s13 +; GCN-NEXT: s_mul_i32 s12, s0, s11 +; GCN-NEXT: s_mul_i32 s0, s0, s10 ; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s13, s13, 0 -; GCN-NEXT: s_mul_i32 s11, s10, s11 -; GCN-NEXT: s_add_u32 s11, s12, s11 -; GCN-NEXT: s_addc_u32 s12, 0, s13 -; GCN-NEXT: s_add_u32 s11, s2, s11 -; GCN-NEXT: v_mov_b32_e32 v0, s11 -; GCN-NEXT: v_mul_hi_u32 v0, s0, v0 -; GCN-NEXT: s_addc_u32 s10, s10, s12 -; GCN-NEXT: s_mul_i32 s12, s0, s10 +; GCN-NEXT: s_add_i32 s0, s13, s0 ; GCN-NEXT: s_mul_i32 s1, s1, s11 -; GCN-NEXT: v_readfirstlane_b32 s13, v0 -; GCN-NEXT: s_add_i32 s12, s13, s12 -; GCN-NEXT: s_mul_i32 s0, s0, s11 -; GCN-NEXT: s_add_i32 s1, s12, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_hi_u32 v3, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s11, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: s_add_i32 s0, s0, s1 +; GCN-NEXT: v_mul_hi_u32 v2, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mul_hi_u32 v0, s11, v0 -; GCN-NEXT: s_mul_i32 s13, s11, s1 -; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s11, v1 +; GCN-NEXT: s_mul_i32 s13, s11, s0 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s14, v3 ; GCN-NEXT: s_add_u32 s13, s15, s13 -; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_mul_i32 s0, s10, s0 ; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_add_u32 s0, s13, s0 -; GCN-NEXT: s_addc_u32 s0, s14, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v1 +; GCN-NEXT: v_readfirstlane_b32 s1, v2 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s1, s14, s1 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 ; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s1, s10, s1 -; GCN-NEXT: s_add_u32 s0, s0, s1 +; GCN-NEXT: s_mul_i32 s0, s10, s0 +; GCN-NEXT: s_add_u32 s0, s1, s0 ; GCN-NEXT: s_addc_u32 s1, 0, s12 ; GCN-NEXT: s_add_u32 s11, s11, s0 ; GCN-NEXT: s_addc_u32 s1, s10, s1 @@ -141,7 +142,6 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) { ; GCN-NEXT: s_cmp_lg_u32 s5, 0 ; GCN-NEXT: s_cselect_b32 s4, s10, s4 ; GCN-NEXT: s_cselect_b32 s5, s11, s6 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mov_b32_e32 v0, s5 ; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -277,25 +277,25 @@ define i64 @v_test_srem(i64 %x, i64 %y) { ; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v6, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v7, v7, v4 -; GCN-NEXT: v_mul_lo_u32 v6, v6, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GCN-NEXT: v_mul_lo_u32 v10, v4, v7 +; GCN-NEXT: v_mul_hi_u32 v10, v5, v8 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v6 +; GCN-NEXT: v_mul_hi_u32 v9, v4, v8 ; GCN-NEXT: v_mul_hi_u32 v11, v4, v6 -; GCN-NEXT: v_mul_hi_u32 v12, v4, v7 -; GCN-NEXT: v_mul_hi_u32 v9, v5, v6 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v8 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GCN-NEXT: v_mul_hi_u32 v11, v5, v6 ; GCN-NEXT: v_mul_lo_u32 v6, v5, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v5, v7 -; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; GCN-NEXT: v_mul_lo_u32 v7, v5, v7 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v11, v9, vcc -; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, v9, v10, vcc +; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc @@ -890,6 +890,7 @@ define amdgpu_kernel void @s_test_srem32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 @@ -898,60 +899,60 @@ define amdgpu_kernel void @s_test_srem32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s5, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s10, s0, s5 -; GCN-NEXT: v_readfirstlane_b32 s14, v2 -; GCN-NEXT: s_mul_i32 s12, s1, s2 -; GCN-NEXT: s_mul_i32 s13, s0, s2 -; GCN-NEXT: s_add_i32 s10, s14, s10 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 -; GCN-NEXT: s_add_i32 s10, s10, s12 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s10 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s13 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_mul_i32 s14, s2, s10 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s10 -; GCN-NEXT: s_add_u32 s12, s12, s14 +; GCN-NEXT: v_readfirstlane_b32 s10, v0 +; GCN-NEXT: s_mul_i32 s12, s0, s5 +; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: s_mul_i32 s13, s1, s10 +; GCN-NEXT: s_mul_i32 s14, s0, s10 +; GCN-NEXT: s_add_i32 s12, s15, s12 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s14 +; GCN-NEXT: s_add_i32 s12, s12, s13 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s12 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s14 +; GCN-NEXT: v_readfirstlane_b32 s13, v3 +; GCN-NEXT: s_mul_i32 s15, s10, s12 +; GCN-NEXT: s_add_u32 s13, s13, s15 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, v1, s12 +; GCN-NEXT: s_addc_u32 s15, 0, s15 +; GCN-NEXT: s_mul_i32 s14, s5, s14 +; GCN-NEXT: v_readfirstlane_b32 s16, v4 +; GCN-NEXT: s_add_u32 s13, s13, s14 +; GCN-NEXT: s_addc_u32 s13, s15, s16 ; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: s_mul_i32 s13, s5, s13 -; GCN-NEXT: v_readfirstlane_b32 s15, v4 -; GCN-NEXT: s_add_u32 s12, s12, s13 -; GCN-NEXT: s_addc_u32 s12, s14, s15 +; GCN-NEXT: s_addc_u32 s14, s14, 0 +; GCN-NEXT: s_mul_i32 s12, s5, s12 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s13, 0, s14 +; GCN-NEXT: s_add_u32 s10, s10, s12 +; GCN-NEXT: v_mov_b32_e32 v1, s10 +; GCN-NEXT: v_mul_hi_u32 v1, s0, v1 +; GCN-NEXT: s_addc_u32 s5, s5, s13 +; GCN-NEXT: s_mul_i32 s12, s0, s10 +; GCN-NEXT: s_mul_i32 s0, s0, s5 ; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s13, s13, 0 -; GCN-NEXT: s_mul_i32 s10, s5, s10 -; GCN-NEXT: s_add_u32 s10, s12, s10 -; GCN-NEXT: s_addc_u32 s12, 0, s13 -; GCN-NEXT: s_add_u32 s10, s2, s10 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mul_hi_u32 v0, s0, v0 -; GCN-NEXT: s_addc_u32 s5, s5, s12 -; GCN-NEXT: s_mul_i32 s12, s0, s5 +; GCN-NEXT: s_add_i32 s0, s13, s0 ; GCN-NEXT: s_mul_i32 s1, s1, s10 -; GCN-NEXT: v_readfirstlane_b32 s13, v0 -; GCN-NEXT: s_add_i32 s12, s13, s12 -; GCN-NEXT: s_mul_i32 s0, s0, s10 -; GCN-NEXT: s_add_i32 s1, s12, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_hi_u32 v3, s5, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s5, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: s_add_i32 s0, s0, s1 +; GCN-NEXT: v_mul_hi_u32 v2, s5, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mul_hi_u32 v0, s10, v0 -; GCN-NEXT: s_mul_i32 s13, s10, s1 -; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s10, v1 +; GCN-NEXT: s_mul_i32 s13, s10, s0 +; GCN-NEXT: s_mul_i32 s12, s5, s12 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s5, v1 +; GCN-NEXT: v_readfirstlane_b32 s14, v3 ; GCN-NEXT: s_add_u32 s13, s15, s13 -; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_mul_i32 s0, s5, s0 ; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_add_u32 s0, s13, s0 -; GCN-NEXT: s_addc_u32 s0, s14, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v1 +; GCN-NEXT: v_readfirstlane_b32 s1, v2 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s1, s14, s1 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 ; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s1, s5, s1 -; GCN-NEXT: s_add_u32 s0, s0, s1 +; GCN-NEXT: s_mul_i32 s0, s5, s0 +; GCN-NEXT: s_add_u32 s0, s1, s0 ; GCN-NEXT: s_addc_u32 s1, 0, s12 ; GCN-NEXT: s_add_u32 s12, s10, s0 ; GCN-NEXT: s_addc_u32 s13, s5, s1 @@ -1024,7 +1025,6 @@ define amdgpu_kernel void @s_test_srem32_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_sub_u32 s6, s6, s4 ; GCN-NEXT: s_subb_u32 s7, s7, s4 ; GCN-NEXT: v_mov_b32_e32 v0, s6 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mov_b32_e32 v1, s7 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GCN-NEXT: s_endpgm @@ -1155,6 +1155,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 ; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0 ; GCN-NEXT: v_trunc_f32_e32 v1, v1 @@ -1163,60 +1164,60 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s8, v0 ; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s11, s8, s10 -; GCN-NEXT: v_readfirstlane_b32 s14, v2 -; GCN-NEXT: s_mul_i32 s12, s9, s2 -; GCN-NEXT: s_mul_i32 s13, s8, s2 -; GCN-NEXT: s_add_i32 s11, s14, s11 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 -; GCN-NEXT: s_add_i32 s11, s11, s12 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s11 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s13 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_mul_i32 s14, s2, s11 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s11 -; GCN-NEXT: s_add_u32 s12, s12, s14 +; GCN-NEXT: v_readfirstlane_b32 s11, v0 +; GCN-NEXT: s_mul_i32 s12, s8, s10 +; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: s_mul_i32 s13, s9, s11 +; GCN-NEXT: s_mul_i32 s14, s8, s11 +; GCN-NEXT: s_add_i32 s12, s15, s12 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s14 +; GCN-NEXT: s_add_i32 s12, s12, s13 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s12 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s14 +; GCN-NEXT: v_readfirstlane_b32 s13, v3 +; GCN-NEXT: s_mul_i32 s15, s11, s12 +; GCN-NEXT: s_add_u32 s13, s13, s15 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, v1, s12 +; GCN-NEXT: s_addc_u32 s15, 0, s15 +; GCN-NEXT: s_mul_i32 s14, s10, s14 +; GCN-NEXT: v_readfirstlane_b32 s16, v4 +; GCN-NEXT: s_add_u32 s13, s13, s14 +; GCN-NEXT: s_addc_u32 s13, s15, s16 ; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: s_mul_i32 s13, s10, s13 -; GCN-NEXT: v_readfirstlane_b32 s15, v4 -; GCN-NEXT: s_add_u32 s12, s12, s13 -; GCN-NEXT: s_addc_u32 s12, s14, s15 +; GCN-NEXT: s_addc_u32 s14, s14, 0 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s13, 0, s14 +; GCN-NEXT: s_add_u32 s11, s11, s12 +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: v_mul_hi_u32 v1, s8, v1 +; GCN-NEXT: s_addc_u32 s10, s10, s13 +; GCN-NEXT: s_mul_i32 s12, s8, s11 +; GCN-NEXT: s_mul_i32 s8, s8, s10 ; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s13, s13, 0 -; GCN-NEXT: s_mul_i32 s11, s10, s11 -; GCN-NEXT: s_add_u32 s11, s12, s11 -; GCN-NEXT: s_addc_u32 s12, 0, s13 -; GCN-NEXT: s_add_u32 s11, s2, s11 -; GCN-NEXT: v_mov_b32_e32 v0, s11 -; GCN-NEXT: v_mul_hi_u32 v0, s8, v0 -; GCN-NEXT: s_addc_u32 s10, s10, s12 -; GCN-NEXT: s_mul_i32 s12, s8, s10 +; GCN-NEXT: s_add_i32 s8, s13, s8 ; GCN-NEXT: s_mul_i32 s9, s9, s11 -; GCN-NEXT: v_readfirstlane_b32 s13, v0 -; GCN-NEXT: s_add_i32 s12, s13, s12 -; GCN-NEXT: s_mul_i32 s8, s8, s11 -; GCN-NEXT: s_add_i32 s9, s12, s9 -; GCN-NEXT: v_mov_b32_e32 v2, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s9 -; GCN-NEXT: v_mul_hi_u32 v3, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s11, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: s_add_i32 s8, s8, s9 +; GCN-NEXT: v_mul_hi_u32 v2, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s8 ; GCN-NEXT: v_mul_hi_u32 v0, s11, v0 -; GCN-NEXT: s_mul_i32 s13, s11, s9 -; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s11, v1 +; GCN-NEXT: s_mul_i32 s13, s11, s8 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s14, v3 ; GCN-NEXT: s_add_u32 s13, s15, s13 -; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_mul_i32 s8, s10, s8 ; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_add_u32 s8, s13, s8 -; GCN-NEXT: s_addc_u32 s8, s14, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v1 +; GCN-NEXT: v_readfirstlane_b32 s9, v2 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s9, s14, s9 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 ; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s9, s10, s9 -; GCN-NEXT: s_add_u32 s8, s8, s9 +; GCN-NEXT: s_mul_i32 s8, s10, s8 +; GCN-NEXT: s_add_u32 s8, s9, s8 ; GCN-NEXT: s_addc_u32 s9, 0, s12 ; GCN-NEXT: s_add_u32 s11, s11, s8 ; GCN-NEXT: s_addc_u32 s10, s10, s9 @@ -1248,7 +1249,6 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 % ; GCN-NEXT: v_mul_hi_u32 v0, s4, v0 ; GCN-NEXT: s_addc_u32 s11, 0, s12 ; GCN-NEXT: s_mul_i32 s11, s4, s11 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_readfirstlane_b32 s12, v0 ; GCN-NEXT: s_add_i32 s11, s12, s11 ; GCN-NEXT: s_mul_i32 s12, s5, s10 @@ -1507,6 +1507,7 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s5 ; GCN-NEXT: s_sub_u32 s2, 0, s4 ; GCN-NEXT: s_subb_u32 s6, 0, s5 +; GCN-NEXT: s_mov_b32 s3, 0xf000 ; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0 ; GCN-NEXT: v_rcp_f32_e32 v0, v0 ; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0 @@ -1517,67 +1518,67 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s2, v0 ; GCN-NEXT: v_readfirstlane_b32 s7, v1 -; GCN-NEXT: v_readfirstlane_b32 s3, v0 -; GCN-NEXT: s_mul_i32 s8, s2, s7 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: s_mul_i32 s9, s6, s3 -; GCN-NEXT: s_mul_i32 s10, s2, s3 -; GCN-NEXT: s_add_i32 s8, s11, s8 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s10 -; GCN-NEXT: s_add_i32 s8, s8, s9 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s8 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s10 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s8 -; GCN-NEXT: s_mul_i32 s12, s3, s8 -; GCN-NEXT: s_add_u32 s9, s9, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: s_mul_i32 s10, s7, s10 -; GCN-NEXT: s_addc_u32 s12, 0, s12 -; GCN-NEXT: v_readfirstlane_b32 s11, v4 -; GCN-NEXT: s_add_u32 s9, s9, s10 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s9, s12, s11 -; GCN-NEXT: s_addc_u32 s10, s13, 0 -; GCN-NEXT: s_mul_i32 s8, s7, s8 -; GCN-NEXT: s_add_u32 s8, s9, s8 -; GCN-NEXT: s_addc_u32 s9, 0, s10 -; GCN-NEXT: s_add_u32 s8, s3, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: v_mul_hi_u32 v0, s2, v0 -; GCN-NEXT: s_addc_u32 s7, s7, s9 +; GCN-NEXT: v_readfirstlane_b32 s8, v0 ; GCN-NEXT: s_mul_i32 s9, s2, s7 +; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: s_mul_i32 s10, s6, s8 +; GCN-NEXT: s_mul_i32 s11, s2, s8 +; GCN-NEXT: s_add_i32 s9, s12, s9 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s11 +; GCN-NEXT: s_add_i32 s9, s9, s10 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s9 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s11 +; GCN-NEXT: v_readfirstlane_b32 s10, v3 +; GCN-NEXT: s_mul_i32 s13, s8, s9 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s9 +; GCN-NEXT: s_add_u32 s10, s10, s13 +; GCN-NEXT: v_readfirstlane_b32 s13, v0 +; GCN-NEXT: s_mul_i32 s11, s7, s11 +; GCN-NEXT: s_addc_u32 s13, 0, s13 +; GCN-NEXT: v_readfirstlane_b32 s12, v4 +; GCN-NEXT: s_add_u32 s10, s10, s11 +; GCN-NEXT: s_addc_u32 s10, s13, s12 +; GCN-NEXT: v_readfirstlane_b32 s11, v1 +; GCN-NEXT: s_addc_u32 s11, s11, 0 +; GCN-NEXT: s_mul_i32 s9, s7, s9 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s10, 0, s11 +; GCN-NEXT: s_add_u32 s8, s8, s9 +; GCN-NEXT: v_mov_b32_e32 v1, s8 +; GCN-NEXT: v_mul_hi_u32 v1, s2, v1 +; GCN-NEXT: s_addc_u32 s7, s7, s10 +; GCN-NEXT: s_mul_i32 s9, s2, s8 +; GCN-NEXT: s_mul_i32 s2, s2, s7 +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: s_add_i32 s2, s10, s2 ; GCN-NEXT: s_mul_i32 s6, s6, s8 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: s_add_i32 s9, s10, s9 -; GCN-NEXT: s_mul_i32 s2, s2, s8 -; GCN-NEXT: s_add_i32 s6, s9, s6 -; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v0, s6 -; GCN-NEXT: v_mul_hi_u32 v3, s7, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s8, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s7, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s9 +; GCN-NEXT: s_add_i32 s2, s2, s6 +; GCN-NEXT: v_mul_hi_u32 v2, s7, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s2 ; GCN-NEXT: v_mul_hi_u32 v0, s8, v0 -; GCN-NEXT: s_mul_i32 s10, s8, s6 -; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s8, v1 +; GCN-NEXT: s_mul_i32 s10, s8, s2 +; GCN-NEXT: s_mul_i32 s9, s7, s9 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s7, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v3 ; GCN-NEXT: s_add_u32 s10, s12, s10 -; GCN-NEXT: v_readfirstlane_b32 s11, v0 -; GCN-NEXT: s_mul_i32 s2, s7, s2 ; GCN-NEXT: s_addc_u32 s11, 0, s11 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: s_add_u32 s2, s10, s2 -; GCN-NEXT: s_addc_u32 s2, s11, s9 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_readfirstlane_b32 s6, v2 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s6, s11, s6 +; GCN-NEXT: v_readfirstlane_b32 s9, v0 ; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: s_mul_i32 s6, s7, s6 -; GCN-NEXT: s_add_u32 s2, s2, s6 +; GCN-NEXT: s_mul_i32 s2, s7, s2 +; GCN-NEXT: s_add_u32 s2, s6, s2 ; GCN-NEXT: s_addc_u32 s6, 0, s9 ; GCN-NEXT: s_add_u32 s2, s8, s2 ; GCN-NEXT: s_addc_u32 s6, s7, s6 ; GCN-NEXT: v_mul_hi_u32 v1, s2, 24 ; GCN-NEXT: v_mul_hi_u32 v0, s6, 24 ; GCN-NEXT: s_mul_i32 s6, s6, 24 -; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 ; GCN-NEXT: v_readfirstlane_b32 s7, v0 ; GCN-NEXT: s_add_u32 s6, s8, s6 @@ -1586,7 +1587,6 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_mul_hi_u32 v0, s4, v0 ; GCN-NEXT: s_mul_i32 s7, s5, s6 ; GCN-NEXT: s_mul_i32 s6, s4, s6 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_readfirstlane_b32 s8, v0 ; GCN-NEXT: s_add_i32 s10, s8, s7 ; GCN-NEXT: s_sub_i32 s8, 0, s10 @@ -1750,25 +1750,25 @@ define i64 @v_test_srem_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v6, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v4, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v5, v5, v2 -; GCN-NEXT: v_mul_lo_u32 v4, v4, v2 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GCN-NEXT: v_mul_lo_u32 v8, v2, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v6 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v10, v2, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v3, v4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v6 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_hi_u32 v9, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v5 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; GCN-NEXT: v_mul_lo_u32 v5, v3, v5 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v9, v7, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc @@ -1915,6 +1915,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc ; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 +; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1943,25 +1944,25 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v6, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v4, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v5, v5, v2 -; GCN-NEXT: v_mul_lo_u32 v4, v4, v2 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GCN-NEXT: v_mul_lo_u32 v8, v2, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v6 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v10, v2, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v3, v4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v6 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_hi_u32 v9, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v5 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; GCN-NEXT: v_mul_lo_u32 v5, v3, v5 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v9, v7, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v3, v5, vcc @@ -1971,7 +1972,7 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_mul_lo_u32 v2, v0, v2 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 -; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0x8000, v2 +; GCN-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 ; GCN-NEXT: v_subb_u32_e64 v4, s[4:5], v4, v1, vcc ; GCN-NEXT: v_sub_i32_e64 v5, s[4:5], v2, v0 ; GCN-NEXT: v_subbrev_u32_e64 v6, s[6:7], 0, v4, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll index 97e0937760f92..c4f4962ea7bce 100644 --- a/llvm/test/CodeGen/AMDGPU/udiv64.ll +++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll @@ -44,26 +44,26 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc -; GCN-NEXT: v_mul_lo_u32 v2, s4, v1 -; GCN-NEXT: v_mul_hi_u32 v3, s4, v0 -; GCN-NEXT: v_mul_lo_u32 v4, s5, v0 -; GCN-NEXT: s_mov_b32 s5, s1 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 -; GCN-NEXT: v_mul_lo_u32 v3, s4, v0 -; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 -; GCN-NEXT: v_mul_lo_u32 v6, v0, v2 +; GCN-NEXT: v_mul_lo_u32 v3, s4, v1 +; GCN-NEXT: v_mul_hi_u32 v4, s4, v0 +; GCN-NEXT: v_mul_lo_u32 v6, s5, v0 +; GCN-NEXT: v_mul_lo_u32 v2, s4, v0 +; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: v_add_i32_e32 v3, vcc, v3, v4 +; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 +; GCN-NEXT: v_mul_lo_u32 v4, v0, v3 +; GCN-NEXT: v_mul_hi_u32 v6, v0, v2 ; GCN-NEXT: v_mul_hi_u32 v7, v0, v3 -; GCN-NEXT: v_mul_hi_u32 v8, v0, v2 -; GCN-NEXT: v_mul_hi_u32 v5, v1, v3 -; GCN-NEXT: v_mul_lo_u32 v3, v1, v3 -; GCN-NEXT: v_mul_hi_u32 v4, v1, v2 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc +; GCN-NEXT: v_mul_hi_u32 v5, v1, v2 ; GCN-NEXT: v_mul_lo_u32 v2, v1, v2 -; GCN-NEXT: v_add_i32_e32 v3, vcc, v6, v3 -; GCN-NEXT: v_addc_u32_e32 v3, vcc, v7, v5, vcc -; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v4, vcc -; GCN-NEXT: v_add_i32_e32 v2, vcc, v3, v2 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v6, v4 +; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v7, vcc +; GCN-NEXT: v_mul_hi_u32 v7, v1, v3 +; GCN-NEXT: v_mul_lo_u32 v3, v1, v3 +; GCN-NEXT: v_add_i32_e32 v2, vcc, v4, v2 +; GCN-NEXT: v_addc_u32_e32 v2, vcc, v6, v5, vcc +; GCN-NEXT: v_addc_u32_e32 v4, vcc, 0, v7, vcc +; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v3 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_add_i32_e32 v0, vcc, v0, v2 ; GCN-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc @@ -76,7 +76,7 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc ; GCN-NEXT: v_mul_lo_u32 v4, s3, v0 ; GCN-NEXT: v_mul_hi_u32 v0, s3, v0 -; GCN-NEXT: s_mov_b32 s4, s0 +; GCN-NEXT: s_mov_b32 s5, s1 ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v0, vcc, v3, v0, vcc ; GCN-NEXT: v_addc_u32_e32 v2, vcc, 0, v5, vcc @@ -235,25 +235,25 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v6, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v7, v7, v4 -; GCN-NEXT: v_mul_lo_u32 v6, v6, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GCN-NEXT: v_mul_lo_u32 v10, v4, v7 +; GCN-NEXT: v_mul_hi_u32 v10, v5, v8 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v6 +; GCN-NEXT: v_mul_hi_u32 v9, v4, v8 ; GCN-NEXT: v_mul_hi_u32 v11, v4, v6 -; GCN-NEXT: v_mul_hi_u32 v12, v4, v7 -; GCN-NEXT: v_mul_hi_u32 v9, v5, v6 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v8 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GCN-NEXT: v_mul_hi_u32 v11, v5, v6 ; GCN-NEXT: v_mul_lo_u32 v6, v5, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v5, v7 -; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; GCN-NEXT: v_mul_lo_u32 v7, v5, v7 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v11, v9, vcc -; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, v9, v10, vcc +; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc @@ -874,6 +874,7 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-LABEL: s_test_udiv_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2 ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3 @@ -889,67 +890,67 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s4, v0 ; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v0 -; GCN-NEXT: s_mul_i32 s8, s4, s6 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: s_mul_i32 s9, s5, s7 -; GCN-NEXT: s_mul_i32 s10, s4, s7 -; GCN-NEXT: s_add_i32 s8, s11, s8 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s10 -; GCN-NEXT: s_add_i32 s8, s8, s9 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s8 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s10 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: s_mul_i32 s12, s7, s8 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s8 -; GCN-NEXT: s_add_u32 s9, s9, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: s_mul_i32 s10, s6, s10 -; GCN-NEXT: s_addc_u32 s12, 0, s12 -; GCN-NEXT: v_readfirstlane_b32 s11, v4 -; GCN-NEXT: s_add_u32 s9, s9, s10 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s9, s12, s11 -; GCN-NEXT: s_mul_i32 s8, s6, s8 -; GCN-NEXT: s_addc_u32 s10, s13, 0 -; GCN-NEXT: s_add_u32 s8, s9, s8 -; GCN-NEXT: s_addc_u32 s9, 0, s10 -; GCN-NEXT: s_add_u32 s8, s7, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-NEXT: s_addc_u32 s6, s6, s9 +; GCN-NEXT: v_readfirstlane_b32 s8, v0 ; GCN-NEXT: s_mul_i32 s9, s4, s6 +; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: s_mul_i32 s10, s5, s8 +; GCN-NEXT: s_mul_i32 s11, s4, s8 +; GCN-NEXT: s_add_i32 s9, s12, s9 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s11 +; GCN-NEXT: s_add_i32 s9, s9, s10 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s9 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s11 +; GCN-NEXT: v_readfirstlane_b32 s10, v3 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s9 +; GCN-NEXT: s_mul_i32 s13, s8, s9 +; GCN-NEXT: s_add_u32 s10, s10, s13 +; GCN-NEXT: v_readfirstlane_b32 s13, v0 +; GCN-NEXT: s_mul_i32 s11, s6, s11 +; GCN-NEXT: s_addc_u32 s13, 0, s13 +; GCN-NEXT: v_readfirstlane_b32 s12, v4 +; GCN-NEXT: s_add_u32 s10, s10, s11 +; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: s_addc_u32 s10, s13, s12 +; GCN-NEXT: s_addc_u32 s11, s14, 0 +; GCN-NEXT: s_mul_i32 s9, s6, s9 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s10, 0, s11 +; GCN-NEXT: s_add_u32 s8, s8, s9 +; GCN-NEXT: v_mov_b32_e32 v1, s8 +; GCN-NEXT: v_mul_hi_u32 v1, s4, v1 +; GCN-NEXT: s_addc_u32 s6, s6, s10 +; GCN-NEXT: s_mul_i32 s9, s4, s8 +; GCN-NEXT: s_mul_i32 s4, s4, s6 +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: s_add_i32 s4, s10, s4 ; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: s_add_i32 s9, s10, s9 -; GCN-NEXT: s_mul_i32 s4, s4, s8 -; GCN-NEXT: s_add_i32 s5, s9, s5 -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mul_hi_u32 v3, s6, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s8, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s6, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s9 +; GCN-NEXT: s_add_i32 s4, s4, s5 +; GCN-NEXT: v_mul_hi_u32 v2, s6, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: v_mul_hi_u32 v0, s8, v0 -; GCN-NEXT: s_mul_i32 s10, s8, s5 -; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s8, v1 +; GCN-NEXT: s_mul_i32 s10, s8, s4 +; GCN-NEXT: s_mul_i32 s9, s6, s9 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s6, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v3 ; GCN-NEXT: s_add_u32 s10, s12, s10 -; GCN-NEXT: v_readfirstlane_b32 s11, v0 -; GCN-NEXT: s_mul_i32 s4, s6, s4 ; GCN-NEXT: s_addc_u32 s11, 0, s11 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: s_add_u32 s4, s10, s4 -; GCN-NEXT: s_addc_u32 s4, s11, s9 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_readfirstlane_b32 s5, v2 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s5, s11, s5 +; GCN-NEXT: v_readfirstlane_b32 s9, v0 ; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: s_mul_i32 s5, s6, s5 -; GCN-NEXT: s_add_u32 s4, s4, s5 +; GCN-NEXT: s_mul_i32 s4, s6, s4 +; GCN-NEXT: s_add_u32 s4, s5, s4 ; GCN-NEXT: s_addc_u32 s5, 0, s9 ; GCN-NEXT: s_add_u32 s4, s8, s4 ; GCN-NEXT: s_addc_u32 s5, s6, s5 ; GCN-NEXT: v_mul_hi_u32 v1, s4, 24 ; GCN-NEXT: v_mul_hi_u32 v0, s5, 24 ; GCN-NEXT: s_mul_i32 s5, s5, 24 -; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 ; GCN-NEXT: v_readfirstlane_b32 s4, v0 ; GCN-NEXT: s_add_u32 s5, s8, s5 @@ -992,7 +993,6 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_cmp_lg_u32 s0, 0 ; GCN-NEXT: s_cselect_b32 s0, s12, 0 ; GCN-NEXT: s_cselect_b32 s1, s10, s8 -; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: v_mov_b32_e32 v0, s1 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1083,6 +1083,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc ; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 +; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1111,25 +1112,25 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v6, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v4, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v5, v5, v2 -; GCN-NEXT: v_mul_lo_u32 v4, v4, v2 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GCN-NEXT: v_mul_lo_u32 v8, v2, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v6 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v10, v2, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v3, v4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v6 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_hi_u32 v9, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v5 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; GCN-NEXT: v_mul_lo_u32 v5, v3, v5 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v9, v7, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v3, v5, vcc @@ -1139,7 +1140,7 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; GCN-NEXT: v_mul_lo_u32 v4, v0, v2 ; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 -; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0x8000, v4 +; GCN-NEXT: v_sub_i32_e32 v4, vcc, s4, v4 ; GCN-NEXT: v_subb_u32_e64 v5, s[4:5], v5, v1, vcc ; GCN-NEXT: v_sub_i32_e64 v6, s[4:5], v4, v0 ; GCN-NEXT: v_subbrev_u32_e64 v5, s[4:5], 0, v5, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll index 0952013401892..d87b021d1b9d6 100644 --- a/llvm/test/CodeGen/AMDGPU/urem64.ll +++ b/llvm/test/CodeGen/AMDGPU/urem64.ll @@ -8,6 +8,7 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd ; GCN-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 ; GCN-NEXT: s_mov_b32 s3, 0xf000 +; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_cvt_f32_u32_e32 v0, s8 ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9 @@ -23,60 +24,60 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s0, v0 ; GCN-NEXT: v_readfirstlane_b32 s10, v1 -; GCN-NEXT: v_readfirstlane_b32 s2, v0 -; GCN-NEXT: s_mul_i32 s11, s0, s10 -; GCN-NEXT: v_readfirstlane_b32 s14, v2 -; GCN-NEXT: s_mul_i32 s12, s1, s2 -; GCN-NEXT: s_mul_i32 s13, s0, s2 -; GCN-NEXT: s_add_i32 s11, s14, s11 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s13 -; GCN-NEXT: s_add_i32 s11, s11, s12 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s11 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s13 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_mul_i32 s15, s2, s11 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s11 -; GCN-NEXT: s_add_u32 s12, s12, s15 +; GCN-NEXT: v_readfirstlane_b32 s11, v0 +; GCN-NEXT: s_mul_i32 s12, s0, s10 +; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: s_mul_i32 s13, s1, s11 +; GCN-NEXT: s_mul_i32 s14, s0, s11 +; GCN-NEXT: s_add_i32 s12, s15, s12 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s14 +; GCN-NEXT: s_add_i32 s12, s12, s13 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s12 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s14 +; GCN-NEXT: v_readfirstlane_b32 s13, v3 +; GCN-NEXT: s_mul_i32 s15, s11, s12 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s12 +; GCN-NEXT: s_add_u32 s13, s13, s15 ; GCN-NEXT: v_readfirstlane_b32 s15, v0 -; GCN-NEXT: s_mul_i32 s13, s10, s13 +; GCN-NEXT: s_mul_i32 s14, s10, s14 ; GCN-NEXT: s_addc_u32 s15, 0, s15 -; GCN-NEXT: v_readfirstlane_b32 s14, v4 -; GCN-NEXT: s_add_u32 s12, s12, s13 -; GCN-NEXT: s_addc_u32 s12, s15, s14 +; GCN-NEXT: v_readfirstlane_b32 s16, v4 +; GCN-NEXT: s_add_u32 s13, s13, s14 +; GCN-NEXT: s_addc_u32 s13, s15, s16 +; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: s_addc_u32 s14, s14, 0 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s13, 0, s14 +; GCN-NEXT: s_add_u32 s11, s11, s12 +; GCN-NEXT: v_mov_b32_e32 v1, s11 +; GCN-NEXT: v_mul_hi_u32 v1, s0, v1 +; GCN-NEXT: s_addc_u32 s10, s10, s13 +; GCN-NEXT: s_mul_i32 s12, s0, s11 +; GCN-NEXT: s_mul_i32 s0, s0, s10 ; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s13, s13, 0 -; GCN-NEXT: s_mul_i32 s11, s10, s11 -; GCN-NEXT: s_add_u32 s11, s12, s11 -; GCN-NEXT: s_addc_u32 s12, 0, s13 -; GCN-NEXT: s_add_u32 s11, s2, s11 -; GCN-NEXT: v_mov_b32_e32 v0, s11 -; GCN-NEXT: v_mul_hi_u32 v0, s0, v0 -; GCN-NEXT: s_addc_u32 s10, s10, s12 -; GCN-NEXT: s_mul_i32 s12, s0, s10 +; GCN-NEXT: s_add_i32 s0, s13, s0 ; GCN-NEXT: s_mul_i32 s1, s1, s11 -; GCN-NEXT: v_readfirstlane_b32 s13, v0 -; GCN-NEXT: s_add_i32 s12, s13, s12 -; GCN-NEXT: s_mul_i32 s0, s0, s11 -; GCN-NEXT: s_add_i32 s1, s12, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s0 -; GCN-NEXT: v_mov_b32_e32 v0, s1 -; GCN-NEXT: v_mul_hi_u32 v3, s10, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s11, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s12 +; GCN-NEXT: s_add_i32 s0, s0, s1 +; GCN-NEXT: v_mul_hi_u32 v2, s10, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: v_mul_hi_u32 v0, s11, v0 -; GCN-NEXT: s_mul_i32 s13, s11, s1 -; GCN-NEXT: v_readfirstlane_b32 s15, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s11, v1 +; GCN-NEXT: s_mul_i32 s13, s11, s0 +; GCN-NEXT: s_mul_i32 s12, s10, s12 +; GCN-NEXT: v_readfirstlane_b32 s15, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s10, v1 +; GCN-NEXT: v_readfirstlane_b32 s14, v3 ; GCN-NEXT: s_add_u32 s13, s15, s13 -; GCN-NEXT: v_readfirstlane_b32 s14, v0 -; GCN-NEXT: s_mul_i32 s0, s10, s0 ; GCN-NEXT: s_addc_u32 s14, 0, s14 -; GCN-NEXT: v_readfirstlane_b32 s12, v3 -; GCN-NEXT: s_add_u32 s0, s13, s0 -; GCN-NEXT: s_addc_u32 s0, s14, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v1 +; GCN-NEXT: v_readfirstlane_b32 s1, v2 +; GCN-NEXT: s_add_u32 s12, s13, s12 +; GCN-NEXT: s_addc_u32 s1, s14, s1 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 ; GCN-NEXT: s_addc_u32 s12, s12, 0 -; GCN-NEXT: s_mul_i32 s1, s10, s1 -; GCN-NEXT: s_add_u32 s0, s0, s1 +; GCN-NEXT: s_mul_i32 s0, s10, s0 +; GCN-NEXT: s_add_u32 s0, s1, s0 ; GCN-NEXT: s_addc_u32 s1, 0, s12 ; GCN-NEXT: s_add_u32 s11, s11, s0 ; GCN-NEXT: s_addc_u32 s1, s10, s1 @@ -141,7 +142,6 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y ; GCN-NEXT: s_cmp_lg_u32 s5, 0 ; GCN-NEXT: s_cselect_b32 s4, s10, s4 ; GCN-NEXT: s_cselect_b32 s5, s11, s6 -; GCN-NEXT: s_mov_b32 s2, -1 ; GCN-NEXT: v_mov_b32_e32 v0, s5 ; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 @@ -272,25 +272,25 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) { ; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v8 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v9, vcc -; GCN-NEXT: v_mul_lo_u32 v8, v6, v5 -; GCN-NEXT: v_mul_hi_u32 v9, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v8, v6, v4 +; GCN-NEXT: v_mul_lo_u32 v9, v6, v5 +; GCN-NEXT: v_mul_hi_u32 v6, v6, v4 ; GCN-NEXT: v_mul_lo_u32 v7, v7, v4 -; GCN-NEXT: v_mul_lo_u32 v6, v6, v4 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_add_i32_e32 v7, vcc, v8, v7 -; GCN-NEXT: v_mul_lo_u32 v10, v4, v7 +; GCN-NEXT: v_mul_hi_u32 v10, v5, v8 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v9 +; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v6 +; GCN-NEXT: v_mul_hi_u32 v9, v4, v8 ; GCN-NEXT: v_mul_hi_u32 v11, v4, v6 -; GCN-NEXT: v_mul_hi_u32 v12, v4, v7 -; GCN-NEXT: v_mul_hi_u32 v9, v5, v6 +; GCN-NEXT: v_mul_lo_u32 v8, v5, v8 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v9, v7 +; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v11, vcc +; GCN-NEXT: v_mul_hi_u32 v11, v5, v6 ; GCN-NEXT: v_mul_lo_u32 v6, v5, v6 -; GCN-NEXT: v_mul_hi_u32 v8, v5, v7 -; GCN-NEXT: v_add_i32_e32 v10, vcc, v11, v10 -; GCN-NEXT: v_addc_u32_e32 v11, vcc, 0, v12, vcc -; GCN-NEXT: v_mul_lo_u32 v7, v5, v7 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v10, v6 -; GCN-NEXT: v_addc_u32_e32 v6, vcc, v11, v9, vcc -; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v8, vcc -; GCN-NEXT: v_add_i32_e32 v6, vcc, v6, v7 +; GCN-NEXT: v_add_i32_e32 v7, vcc, v7, v8 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, v9, v10, vcc +; GCN-NEXT: v_addc_u32_e32 v8, vcc, 0, v11, vcc +; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v6 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc @@ -827,6 +827,7 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-LABEL: s_test_urem_k_num_i64: ; GCN: ; %bb.0: ; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: v_cvt_f32_u32_e32 v0, s2 ; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3 @@ -842,67 +843,67 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1 ; GCN-NEXT: v_mul_hi_u32 v2, s4, v0 ; GCN-NEXT: v_readfirstlane_b32 s6, v1 -; GCN-NEXT: v_readfirstlane_b32 s7, v0 -; GCN-NEXT: s_mul_i32 s8, s4, s6 -; GCN-NEXT: v_readfirstlane_b32 s11, v2 -; GCN-NEXT: s_mul_i32 s9, s5, s7 -; GCN-NEXT: s_mul_i32 s10, s4, s7 -; GCN-NEXT: s_add_i32 s8, s11, s8 -; GCN-NEXT: v_mul_hi_u32 v3, v0, s10 -; GCN-NEXT: s_add_i32 s8, s8, s9 -; GCN-NEXT: v_mul_hi_u32 v0, v0, s8 -; GCN-NEXT: v_mul_hi_u32 v4, v1, s10 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: s_mul_i32 s12, s7, s8 -; GCN-NEXT: v_mul_hi_u32 v1, v1, s8 -; GCN-NEXT: s_add_u32 s9, s9, s12 -; GCN-NEXT: v_readfirstlane_b32 s12, v0 -; GCN-NEXT: s_mul_i32 s10, s6, s10 -; GCN-NEXT: s_addc_u32 s12, 0, s12 -; GCN-NEXT: v_readfirstlane_b32 s11, v4 -; GCN-NEXT: s_add_u32 s9, s9, s10 -; GCN-NEXT: v_readfirstlane_b32 s13, v1 -; GCN-NEXT: s_addc_u32 s9, s12, s11 -; GCN-NEXT: s_mul_i32 s8, s6, s8 -; GCN-NEXT: s_addc_u32 s10, s13, 0 -; GCN-NEXT: s_add_u32 s8, s9, s8 -; GCN-NEXT: s_addc_u32 s9, 0, s10 -; GCN-NEXT: s_add_u32 s8, s7, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s8 -; GCN-NEXT: v_mul_hi_u32 v0, s4, v0 -; GCN-NEXT: s_addc_u32 s6, s6, s9 +; GCN-NEXT: v_readfirstlane_b32 s8, v0 ; GCN-NEXT: s_mul_i32 s9, s4, s6 +; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: s_mul_i32 s10, s5, s8 +; GCN-NEXT: s_mul_i32 s11, s4, s8 +; GCN-NEXT: s_add_i32 s9, s12, s9 +; GCN-NEXT: v_mul_hi_u32 v3, v0, s11 +; GCN-NEXT: s_add_i32 s9, s9, s10 +; GCN-NEXT: v_mul_hi_u32 v0, v0, s9 +; GCN-NEXT: v_mul_hi_u32 v4, v1, s11 +; GCN-NEXT: v_readfirstlane_b32 s10, v3 +; GCN-NEXT: v_mul_hi_u32 v1, v1, s9 +; GCN-NEXT: s_mul_i32 s13, s8, s9 +; GCN-NEXT: s_add_u32 s10, s10, s13 +; GCN-NEXT: v_readfirstlane_b32 s13, v0 +; GCN-NEXT: s_mul_i32 s11, s6, s11 +; GCN-NEXT: s_addc_u32 s13, 0, s13 +; GCN-NEXT: v_readfirstlane_b32 s12, v4 +; GCN-NEXT: s_add_u32 s10, s10, s11 +; GCN-NEXT: v_readfirstlane_b32 s14, v1 +; GCN-NEXT: s_addc_u32 s10, s13, s12 +; GCN-NEXT: s_addc_u32 s11, s14, 0 +; GCN-NEXT: s_mul_i32 s9, s6, s9 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s10, 0, s11 +; GCN-NEXT: s_add_u32 s8, s8, s9 +; GCN-NEXT: v_mov_b32_e32 v1, s8 +; GCN-NEXT: v_mul_hi_u32 v1, s4, v1 +; GCN-NEXT: s_addc_u32 s6, s6, s10 +; GCN-NEXT: s_mul_i32 s9, s4, s8 +; GCN-NEXT: s_mul_i32 s4, s4, s6 +; GCN-NEXT: v_readfirstlane_b32 s10, v1 +; GCN-NEXT: s_add_i32 s4, s10, s4 ; GCN-NEXT: s_mul_i32 s5, s5, s8 -; GCN-NEXT: v_readfirstlane_b32 s10, v0 -; GCN-NEXT: s_add_i32 s9, s10, s9 -; GCN-NEXT: s_mul_i32 s4, s4, s8 -; GCN-NEXT: s_add_i32 s5, s9, s5 -; GCN-NEXT: v_mov_b32_e32 v2, s4 -; GCN-NEXT: v_mov_b32_e32 v0, s5 -; GCN-NEXT: v_mul_hi_u32 v3, s6, v2 -; GCN-NEXT: v_mul_hi_u32 v2, s8, v2 -; GCN-NEXT: v_mul_hi_u32 v1, s6, v0 +; GCN-NEXT: v_mov_b32_e32 v0, s9 +; GCN-NEXT: s_add_i32 s4, s4, s5 +; GCN-NEXT: v_mul_hi_u32 v2, s6, v0 +; GCN-NEXT: v_mov_b32_e32 v1, s4 ; GCN-NEXT: v_mul_hi_u32 v0, s8, v0 -; GCN-NEXT: s_mul_i32 s10, s8, s5 -; GCN-NEXT: v_readfirstlane_b32 s12, v2 +; GCN-NEXT: v_mul_hi_u32 v3, s8, v1 +; GCN-NEXT: s_mul_i32 s10, s8, s4 +; GCN-NEXT: s_mul_i32 s9, s6, s9 +; GCN-NEXT: v_readfirstlane_b32 s12, v0 +; GCN-NEXT: v_mul_hi_u32 v0, s6, v1 +; GCN-NEXT: v_readfirstlane_b32 s11, v3 ; GCN-NEXT: s_add_u32 s10, s12, s10 -; GCN-NEXT: v_readfirstlane_b32 s11, v0 -; GCN-NEXT: s_mul_i32 s4, s6, s4 ; GCN-NEXT: s_addc_u32 s11, 0, s11 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: s_add_u32 s4, s10, s4 -; GCN-NEXT: s_addc_u32 s4, s11, s9 -; GCN-NEXT: v_readfirstlane_b32 s9, v1 +; GCN-NEXT: v_readfirstlane_b32 s5, v2 +; GCN-NEXT: s_add_u32 s9, s10, s9 +; GCN-NEXT: s_addc_u32 s5, s11, s5 +; GCN-NEXT: v_readfirstlane_b32 s9, v0 ; GCN-NEXT: s_addc_u32 s9, s9, 0 -; GCN-NEXT: s_mul_i32 s5, s6, s5 -; GCN-NEXT: s_add_u32 s4, s4, s5 +; GCN-NEXT: s_mul_i32 s4, s6, s4 +; GCN-NEXT: s_add_u32 s4, s5, s4 ; GCN-NEXT: s_addc_u32 s5, 0, s9 ; GCN-NEXT: s_add_u32 s4, s8, s4 ; GCN-NEXT: s_addc_u32 s5, s6, s5 ; GCN-NEXT: v_mul_hi_u32 v1, s4, 24 ; GCN-NEXT: v_mul_hi_u32 v0, s5, 24 ; GCN-NEXT: s_mul_i32 s5, s5, 24 -; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: v_readfirstlane_b32 s8, v1 ; GCN-NEXT: v_readfirstlane_b32 s4, v0 ; GCN-NEXT: s_add_u32 s5, s8, s5 @@ -946,7 +947,6 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x) ; GCN-NEXT: s_cmp_lg_u32 s1, 0 ; GCN-NEXT: s_cselect_b32 s0, s8, s0 ; GCN-NEXT: s_cselect_b32 s1, s9, s11 -; GCN-NEXT: s_mov_b32 s6, -1 ; GCN-NEXT: v_mov_b32_e32 v0, s1 ; GCN-NEXT: v_mov_b32_e32 v1, s0 ; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 @@ -1160,6 +1160,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc ; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2 ; GCN-NEXT: v_rcp_f32_e32 v2, v2 +; GCN-NEXT: s_mov_b32 s4, 0x8000 ; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 ; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 ; GCN-NEXT: v_trunc_f32_e32 v3, v3 @@ -1188,25 +1189,25 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v8, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v6 ; GCN-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc -; GCN-NEXT: v_mul_lo_u32 v6, v4, v3 -; GCN-NEXT: v_mul_hi_u32 v7, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v6, v4, v2 +; GCN-NEXT: v_mul_lo_u32 v7, v4, v3 +; GCN-NEXT: v_mul_hi_u32 v4, v4, v2 ; GCN-NEXT: v_mul_lo_u32 v5, v5, v2 -; GCN-NEXT: v_mul_lo_u32 v4, v4, v2 -; GCN-NEXT: v_add_i32_e32 v6, vcc, v7, v6 -; GCN-NEXT: v_add_i32_e32 v5, vcc, v6, v5 -; GCN-NEXT: v_mul_lo_u32 v8, v2, v5 +; GCN-NEXT: v_mul_hi_u32 v8, v3, v6 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v7 +; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_mul_lo_u32 v5, v2, v4 +; GCN-NEXT: v_mul_hi_u32 v7, v2, v6 ; GCN-NEXT: v_mul_hi_u32 v9, v2, v4 -; GCN-NEXT: v_mul_hi_u32 v10, v2, v5 -; GCN-NEXT: v_mul_hi_u32 v7, v3, v4 +; GCN-NEXT: v_mul_lo_u32 v6, v3, v6 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v7, v5 +; GCN-NEXT: v_addc_u32_e32 v7, vcc, 0, v9, vcc +; GCN-NEXT: v_mul_hi_u32 v9, v3, v4 ; GCN-NEXT: v_mul_lo_u32 v4, v3, v4 -; GCN-NEXT: v_mul_hi_u32 v6, v3, v5 -; GCN-NEXT: v_add_i32_e32 v8, vcc, v9, v8 -; GCN-NEXT: v_addc_u32_e32 v9, vcc, 0, v10, vcc -; GCN-NEXT: v_mul_lo_u32 v5, v3, v5 -; GCN-NEXT: v_add_i32_e32 v4, vcc, v8, v4 -; GCN-NEXT: v_addc_u32_e32 v4, vcc, v9, v7, vcc -; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v6, vcc -; GCN-NEXT: v_add_i32_e32 v4, vcc, v4, v5 +; GCN-NEXT: v_add_i32_e32 v5, vcc, v5, v6 +; GCN-NEXT: v_addc_u32_e32 v5, vcc, v7, v8, vcc +; GCN-NEXT: v_addc_u32_e32 v6, vcc, 0, v9, vcc +; GCN-NEXT: v_add_i32_e32 v4, vcc, v5, v4 ; GCN-NEXT: v_addc_u32_e32 v5, vcc, 0, v6, vcc ; GCN-NEXT: v_add_i32_e32 v2, vcc, v2, v4 ; GCN-NEXT: v_addc_u32_e32 v2, vcc, v3, v5, vcc @@ -1216,7 +1217,7 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) { ; GCN-NEXT: v_mul_lo_u32 v2, v0, v2 ; GCN-NEXT: v_add_i32_e32 v3, vcc, v4, v3 ; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 -; GCN-NEXT: v_sub_i32_e32 v2, vcc, 0x8000, v2 +; GCN-NEXT: v_sub_i32_e32 v2, vcc, s4, v2 ; GCN-NEXT: v_subb_u32_e64 v4, s[4:5], v4, v1, vcc ; GCN-NEXT: v_sub_i32_e64 v5, s[4:5], v2, v0 ; GCN-NEXT: v_subbrev_u32_e64 v6, s[6:7], 0, v4, s[4:5] diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll index 013b402a82488..c165cc7d547ca 100644 --- a/llvm/test/CodeGen/AMDGPU/wave32.ll +++ b/llvm/test/CodeGen/AMDGPU/wave32.ll @@ -775,24 +775,24 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 { ; GFX1032-NEXT: s_addc_u32 s12, 0, s13 ; GFX1032-NEXT: s_add_u32 s5, s5, s11 ; GFX1032-NEXT: s_addc_u32 s4, s4, s12 -; GFX1032-NEXT: s_mul_hi_u32 s11, s9, s5 -; GFX1032-NEXT: s_mul_i32 s12, s9, s5 +; GFX1032-NEXT: s_mul_i32 s11, s9, s5 +; GFX1032-NEXT: s_mul_hi_u32 s12, s9, s5 ; GFX1032-NEXT: s_mul_i32 s9, s9, s4 ; GFX1032-NEXT: s_mul_i32 s10, s10, s5 -; GFX1032-NEXT: s_add_i32 s9, s11, s9 -; GFX1032-NEXT: s_mul_i32 s11, s4, s12 +; GFX1032-NEXT: s_add_i32 s9, s12, s9 +; GFX1032-NEXT: s_mul_hi_u32 s13, s4, s11 ; GFX1032-NEXT: s_add_i32 s9, s9, s10 -; GFX1032-NEXT: s_mul_hi_u32 s10, s5, s12 -; GFX1032-NEXT: s_mul_i32 s15, s5, s9 -; GFX1032-NEXT: s_mul_hi_u32 s14, s5, s9 -; GFX1032-NEXT: s_add_u32 s10, s10, s15 -; GFX1032-NEXT: s_mul_hi_u32 s13, s4, s12 -; GFX1032-NEXT: s_addc_u32 s14, 0, s14 -; GFX1032-NEXT: s_mul_hi_u32 s12, s4, s9 -; GFX1032-NEXT: s_add_u32 s10, s10, s11 +; GFX1032-NEXT: s_mul_hi_u32 s10, s5, s11 +; GFX1032-NEXT: s_mul_i32 s14, s5, s9 +; GFX1032-NEXT: s_mul_i32 s12, s4, s11 +; GFX1032-NEXT: s_mul_hi_u32 s11, s5, s9 +; GFX1032-NEXT: s_add_u32 s10, s10, s14 +; GFX1032-NEXT: s_addc_u32 s11, 0, s11 +; GFX1032-NEXT: s_mul_hi_u32 s15, s4, s9 +; GFX1032-NEXT: s_add_u32 s10, s10, s12 ; GFX1032-NEXT: s_mul_i32 s9, s4, s9 -; GFX1032-NEXT: s_addc_u32 s10, s14, s13 -; GFX1032-NEXT: s_addc_u32 s11, s12, 0 +; GFX1032-NEXT: s_addc_u32 s10, s11, s13 +; GFX1032-NEXT: s_addc_u32 s11, s15, 0 ; GFX1032-NEXT: s_add_u32 s9, s10, s9 ; GFX1032-NEXT: s_addc_u32 s10, 0, s11 ; GFX1032-NEXT: s_add_u32 s5, s5, s9 @@ -927,24 +927,24 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 { ; GFX1064-NEXT: s_addc_u32 s11, 0, s12 ; GFX1064-NEXT: s_add_u32 s5, s5, s10 ; GFX1064-NEXT: s_addc_u32 s4, s4, s11 -; GFX1064-NEXT: s_mul_hi_u32 s10, s8, s5 -; GFX1064-NEXT: s_mul_i32 s11, s8, s5 +; GFX1064-NEXT: s_mul_i32 s10, s8, s5 +; GFX1064-NEXT: s_mul_hi_u32 s11, s8, s5 ; GFX1064-NEXT: s_mul_i32 s8, s8, s4 ; GFX1064-NEXT: s_mul_i32 s9, s9, s5 -; GFX1064-NEXT: s_add_i32 s8, s10, s8 -; GFX1064-NEXT: s_mul_i32 s10, s4, s11 +; GFX1064-NEXT: s_add_i32 s8, s11, s8 +; GFX1064-NEXT: s_mul_hi_u32 s12, s4, s10 ; GFX1064-NEXT: s_add_i32 s8, s8, s9 -; GFX1064-NEXT: s_mul_hi_u32 s9, s5, s11 -; GFX1064-NEXT: s_mul_i32 s14, s5, s8 -; GFX1064-NEXT: s_mul_hi_u32 s13, s5, s8 -; GFX1064-NEXT: s_add_u32 s9, s9, s14 -; GFX1064-NEXT: s_mul_hi_u32 s12, s4, s11 -; GFX1064-NEXT: s_addc_u32 s13, 0, s13 -; GFX1064-NEXT: s_mul_hi_u32 s11, s4, s8 -; GFX1064-NEXT: s_add_u32 s9, s9, s10 +; GFX1064-NEXT: s_mul_hi_u32 s9, s5, s10 +; GFX1064-NEXT: s_mul_i32 s13, s5, s8 +; GFX1064-NEXT: s_mul_i32 s11, s4, s10 +; GFX1064-NEXT: s_mul_hi_u32 s10, s5, s8 +; GFX1064-NEXT: s_add_u32 s9, s9, s13 +; GFX1064-NEXT: s_addc_u32 s10, 0, s10 +; GFX1064-NEXT: s_mul_hi_u32 s14, s4, s8 +; GFX1064-NEXT: s_add_u32 s9, s9, s11 ; GFX1064-NEXT: s_mul_i32 s8, s4, s8 -; GFX1064-NEXT: s_addc_u32 s9, s13, s12 -; GFX1064-NEXT: s_addc_u32 s10, s11, 0 +; GFX1064-NEXT: s_addc_u32 s9, s10, s12 +; GFX1064-NEXT: s_addc_u32 s10, s14, 0 ; GFX1064-NEXT: s_add_u32 s8, s9, s8 ; GFX1064-NEXT: s_addc_u32 s9, 0, s10 ; GFX1064-NEXT: s_add_u32 s5, s5, s8 From 6c908edc774b3df48a42f422cddbdca286202adf Mon Sep 17 00:00:00 2001 From: gretay-amd Date: Mon, 22 Jun 2026 15:00:01 +0100 Subject: [PATCH 015/511] [AMDGPU] Waterfall loop codegen improvement in SIInstrInfo (#192415) When generating waterfall loops, use the instructions `v_cmpx_eq_*` and `s_andn2_wrexec_*` as recommended for recent architectures, instead of `v_cmp_eq_*` and `s_and saveexec`. This PR only updates waterfall loop code generation in `SIInstrInfo.cpp`. Other places that generated waterfall loops can be handled separately. - Add new lane mask constant for `s_andn2_wrexec` - Set `isTerminator` for `v_cmpx_eq_{u32,u64}_e32` - Fix test `mubuf-legalize-operands.mir` to track liveness needed for verifying phi nodes - Update .ll and .mir tests to accept the new instruction sequences Assisted-by: Claude --- llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h | 3 + llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp | 6 + llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 170 ++- llvm/lib/Target/AMDGPU/SIInstructions.td | 18 + .../Target/AMDGPU/SIOptimizeExecMasking.cpp | 6 + .../buffer-fat-pointer-atomicrmw-fadd.ll | 1239 ++++++++-------- .../buffer-fat-pointer-atomicrmw-fmax.ll | 1304 ++++++++--------- .../buffer-fat-pointer-atomicrmw-fmin.ll | 1304 ++++++++--------- ...e92561-restore-undef-scc-verifier-error.ll | 68 +- .../AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll | 25 +- .../AMDGPU/llvm.amdgcn.dual_intersect_ray.ll | 25 +- ...mdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll | 69 +- ....amdgcn.struct.buffer.load.format.v3f16.ll | 126 +- ...cn.struct.ptr.buffer.atomic.fadd.v2bf16.ll | 64 +- ...gcn.struct.ptr.buffer.atomic.fadd_nortn.ll | 132 +- ...mdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll | 132 +- ...mdgcn.struct.ptr.buffer.atomic.fmax.f32.ll | 210 ++- ...mdgcn.struct.ptr.buffer.atomic.fmin.f32.ll | 210 ++- ...gcn.struct.ptr.buffer.load.format.v3f16.ll | 76 +- .../AMDGPU/move-to-valu-vimage-vsample.ll | 84 +- ...uf-legalize-operands-non-ptr-intrinsics.ll | 536 ++++--- .../CodeGen/AMDGPU/mubuf-legalize-operands.ll | 536 ++++--- .../AMDGPU/mubuf-legalize-operands.mir | 441 ++++-- ...r-descriptor-waterfall-loop-idom-update.ll | 51 +- llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll | 98 +- llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll | 28 +- 26 files changed, 3513 insertions(+), 3448 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h index 3fdd962dcc30c..af14066c1bd46 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h +++ b/llvm/lib/Target/AMDGPU/AMDGPULaneMaskUtils.h @@ -27,6 +27,7 @@ class LaneMaskConstants { const unsigned AndN2Opc; const unsigned AndN2SaveExecOpc; const unsigned AndN2TermOpc; + const unsigned AndN2WrExecOpc; // GFX10+ (HasNoSdstCMPX) only const unsigned AndSaveExecOpc; const unsigned AndSaveExecTermOpc; const unsigned BfmOpc; @@ -52,6 +53,8 @@ class LaneMaskConstants { : AMDGPU::S_ANDN2_SAVEEXEC_B64), AndN2TermOpc(IsWave32 ? AMDGPU::S_ANDN2_B32_term : AMDGPU::S_ANDN2_B64_term), + AndN2WrExecOpc(IsWave32 ? AMDGPU::S_ANDN2_WREXEC_B32 + : AMDGPU::S_ANDN2_WREXEC_B64), AndSaveExecOpc(IsWave32 ? AMDGPU::S_AND_SAVEEXEC_B32 : AMDGPU::S_AND_SAVEEXEC_B64), AndSaveExecTermOpc(IsWave32 ? AMDGPU::S_AND_SAVEEXEC_B32_term diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp index 9a3bc28134824..35ee341ca78c1 100644 --- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp +++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp @@ -182,6 +182,12 @@ class SIFixSGPRCopiesLegacy : public MachineFunctionPass { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } + + // Waterfall expansion may introduce Phi nodes and -verify-machineinstrs will + // fail. + MachineFunctionProperties getClearedProperties() const override { + return MachineFunctionProperties().setNoPHIs(); + } }; } // end anonymous namespace diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 0fcd7da6d5ef5..43cdaa34cf3e3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -2002,6 +2002,13 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const { MI.setDesc(get(AMDGPU::S_AND_SAVEEXEC_B32)); break; + case AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term: + MI.setDesc(get(AMDGPU::V_CMPX_EQ_U32_nosdst_e32)); + break; + case AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term: + MI.setDesc(get(AMDGPU::V_CMPX_EQ_U64_nosdst_e32)); + break; + case AMDGPU::SI_SPILL_S32_TO_VGPR: MI.setDesc(get(AMDGPU::V_WRITELANE_B32)); break; @@ -3117,6 +3124,8 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, case AMDGPU::S_ANDN2_B32_term: case AMDGPU::S_AND_B32_term: case AMDGPU::S_AND_SAVEEXEC_B32_term: + case AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term: + case AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term: break; case AMDGPU::SI_IF: case AMDGPU::SI_ELSE: @@ -7013,8 +7022,8 @@ void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB, // unique value of \p ScalarOps across all lanes. In the best case we execute 1 // iteration, in the worst case we execute 64 (once per lane). static void emitLoadScalarOpsFromVGPRLoop( - const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &LoopBB, - MachineBasicBlock &BodyBB, const DebugLoc &DL, + const SIInstrInfo &TII, MachineRegisterInfo &MRI, MachineBasicBlock &PredBB, + MachineBasicBlock &LoopBB, MachineBasicBlock &BodyBB, const DebugLoc &DL, ArrayRef ScalarOps, ArrayRef PhySGPRs = {}) { MachineFunction &MF = *LoopBB.getParent(); const GCNSubtarget &ST = MF.getSubtarget(); @@ -7022,8 +7031,53 @@ static void emitLoadScalarOpsFromVGPRLoop( const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get(ST); const auto *BoolXExecRC = TRI->getWaveMaskRegClass(); + // Emit v_cmpx_eq and s_andn2_wrexec when both instructions are + // available. Otherwise, use the previous pattern of v_cmp_eq, + // s_and_saveexec, and s_xor. + bool UseNewExecInstructions = + ST.hasNoSdstCMPX() && TII.pseudoToMCOpcode(LMC.AndN2WrExecOpc) != -1; + MachineBasicBlock::iterator I = LoopBB.begin(); Register CondReg; + + Register PhiExec; + Register NewExec; + + if (UseNewExecInstructions) { + PhiExec = MRI.createVirtualRegister(BoolXExecRC); + NewExec = MRI.createVirtualRegister(BoolXExecRC); + Register InitExec = MRI.createVirtualRegister(BoolXExecRC); + BuildMI(PredBB, PredBB.end(), DL, TII.get(LMC.MovOpc), InitExec) + .addReg(LMC.ExecReg); + + BuildMI(LoopBB, I, DL, TII.get(TargetOpcode::PHI), PhiExec) + .addReg(InitExec) + .addMBB(&PredBB) + .addReg(NewExec) + .addMBB(&BodyBB); + } + + // Placement of v_cmpx instructions (when index is longer than 64 bit) + // involves a trade-off between register pressure and latency: + // (a) Defering all v_cmpx after all v_readfirstlane may increase + // register pressure because arguments and results of all + // v_readfirstlane instructions must stay live until deferred v_cmpx use them. + // (b) Interleaving v_cmpx with v_readfirstlanes may reduce live ranges and + // increase latency by placing v_readfirstlane instructions + // immediately before v_cmpx instruction that directly depend on it. + /// + // Emitting interleaved v_cmpx and v_readfirstlane requires + // block splitting because v_cmpx changes EXEC mask and therefore for safety + // v_cmpx needs to be treated as terminator until after register allocation + // (spill placement) and instruction reordering. + // + // Current implementation defers v_cmpx and leaves other instruction + // scheduling decisions to later passes, where register pressure is known or + // easier to approximate. + // Non-terminators (V_READFIRSTLANE and REG_SEQUENCE) are inserted before I; + // v_cmpx instructions are inserted at the end of LoopBB. + // After the first v_cmpx is emitted, I is updated to point to it + // so subsequent non-terminators are inserted before all v_cmpx instructions. for (auto [Idx, ScalarOp] : enumerate(ScalarOps)) { unsigned RegSize = TRI->getRegSizeInBits(ScalarOp->getReg(), MRI); unsigned NumSubRegs = RegSize / 32; @@ -7046,21 +7100,30 @@ static void emitLoadScalarOpsFromVGPRLoop( BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_READFIRSTLANE_B32), CurReg) .addReg(VScalarOp); - Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); + if (UseNewExecInstructions) { + auto CmpxMI = BuildMI(LoopBB, LoopBB.end(), DL, + TII.get(AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term)) + .addReg(CurReg) + .addReg(VScalarOp); + if (I == LoopBB.end()) + I = CmpxMI.getInstr()->getIterator(); + } else { + Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg) - .addReg(CurReg) - .addReg(VScalarOp); + BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U32_e64), NewCondReg) + .addReg(CurReg) + .addReg(VScalarOp); - // Combine the comparison results with AND. - if (!CondReg) // First. - CondReg = NewCondReg; - else { // If not the first, we create an AND. - Register AndReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(LoopBB, I, DL, TII.get(LMC.AndOpc), AndReg) - .addReg(CondReg) - .addReg(NewCondReg); - CondReg = AndReg; + // Combine the comparison results with AND. + if (!CondReg) { // First. + CondReg = NewCondReg; + } else { // If not the first, we create an AND. + Register AndReg = MRI.createVirtualRegister(BoolXExecRC); + BuildMI(LoopBB, I, DL, TII.get(LMC.AndOpc), AndReg) + .addReg(CondReg) + .addReg(NewCondReg); + CondReg = AndReg; + } } // Update ScalarOp operand to use the SGPR ScalarOp. @@ -7106,25 +7169,32 @@ static void emitLoadScalarOpsFromVGPRLoop( .addReg(CurRegHi) .addImm(AMDGPU::sub1); - Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); - auto Cmp = BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), - NewCondReg) - .addReg(CurReg); - if (NumSubRegs <= 2) - Cmp.addReg(VScalarOp); - else - Cmp.addReg(VScalarOp, VScalarOpUndef, - TRI->getSubRegFromChannel(Idx, 2)); - - // Combine the comparison results with AND. - if (!CondReg) // First. - CondReg = NewCondReg; - else { // If not the first, we create an AND. - Register AndReg = MRI.createVirtualRegister(BoolXExecRC); - BuildMI(LoopBB, I, DL, TII.get(LMC.AndOpc), AndReg) - .addReg(CondReg) - .addReg(NewCondReg); - CondReg = AndReg; + unsigned SubReg = + NumSubRegs <= 2 ? 0 : TRI->getSubRegFromChannel(Idx, 2); + + if (UseNewExecInstructions) { + auto CmpxMI = BuildMI(LoopBB, LoopBB.end(), DL, + TII.get(AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term)) + .addReg(CurReg) + .addReg(VScalarOp, VScalarOpUndef, SubReg); + if (I == LoopBB.end()) + I = CmpxMI.getInstr()->getIterator(); + } else { + Register NewCondReg = MRI.createVirtualRegister(BoolXExecRC); + BuildMI(LoopBB, I, DL, TII.get(AMDGPU::V_CMP_EQ_U64_e64), NewCondReg) + .addReg(CurReg) + .addReg(VScalarOp, VScalarOpUndef, SubReg); + + // Combine the comparison results with AND. + if (!CondReg) { // First. + CondReg = NewCondReg; + } else { // If not the first, we create an AND. + Register AndReg = MRI.createVirtualRegister(BoolXExecRC); + BuildMI(LoopBB, I, DL, TII.get(LMC.AndOpc), AndReg) + .addReg(CondReg) + .addReg(NewCondReg); + CondReg = AndReg; + } } } // End for loop. @@ -7153,20 +7223,32 @@ static void emitLoadScalarOpsFromVGPRLoop( } } - Register SaveExec = MRI.createVirtualRegister(BoolXExecRC); - MRI.setSimpleHint(SaveExec, CondReg); + // Instructions AndSaveExecOpc and AndN2WrExecOpc that modify EXEC mask + // should have isTerminator=1 but terminators that define + // virtual registers are not supported. + Register SaveExec; + if (!UseNewExecInstructions) { + SaveExec = MRI.createVirtualRegister(BoolXExecRC); + MRI.setSimpleHint(SaveExec, CondReg); - // Update EXEC to matching lanes, saving original to SaveExec. - BuildMI(LoopBB, I, DL, TII.get(LMC.AndSaveExecOpc), SaveExec) - .addReg(CondReg, RegState::Kill); + // Update EXEC to matching lanes, saving original to SaveExec. + BuildMI(LoopBB, I, DL, TII.get(LMC.AndSaveExecOpc), SaveExec) + .addReg(CondReg, RegState::Kill); + } // The original instruction is here; we insert the terminators after it. I = BodyBB.end(); - // Update EXEC, switch all done bits to 0 and all todo bits to 1. - BuildMI(BodyBB, I, DL, TII.get(LMC.XorTermOpc), LMC.ExecReg) - .addReg(LMC.ExecReg) - .addReg(SaveExec); + if (UseNewExecInstructions) { + MRI.setSimpleHint(NewExec, PhiExec); + BuildMI(BodyBB, I, DL, TII.get(LMC.AndN2WrExecOpc), NewExec) + .addReg(PhiExec); + } else { + // Update EXEC, switch all done bits to 0 and all todo bits to 1. + BuildMI(BodyBB, I, DL, TII.get(LMC.XorTermOpc), LMC.ExecReg) + .addReg(LMC.ExecReg) + .addReg(SaveExec); + } BuildMI(BodyBB, I, DL, TII.get(AMDGPU::SI_WATERFALL_LOOP)).addMBB(&LoopBB); } @@ -7267,7 +7349,7 @@ generateWaterFallLoop(const SIInstrInfo &TII, MachineInstr &MI, } } - emitLoadScalarOpsFromVGPRLoop(TII, MRI, *LoopBB, *BodyBB, DL, ScalarOps, + emitLoadScalarOpsFromVGPRLoop(TII, MRI, MBB, *LoopBB, *BodyBB, DL, ScalarOps, PhySGPRs); MachineBasicBlock::iterator First = RemainderBB->begin(); diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index eff8dc2768b59..750cb1973e21f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -508,6 +508,24 @@ def S_AND_B32_term : WrapTerminatorInst; def S_AND_SAVEEXEC_B32_term : WrapTerminatorInst; } +class WrapTerminatorVOPC : VPseudoInstSI< + base_inst.OutOperandList, + base_inst.InOperandList> { + let Uses = base_inst.Uses; + let Defs = base_inst.Defs; + let SchedRW = base_inst.SchedRW; + let isTerminator = 1; + let isConvergent = 1; + let isCompare = 1; + let VOPC = 1; +} + +let SubtargetPredicate = HasNoSdstCMPX in { +def V_CMPX_EQ_U32_nosdst_e32_term + : WrapTerminatorVOPC; +def V_CMPX_EQ_U64_nosdst_e32_term + : WrapTerminatorVOPC; +} def WAVE_BARRIER : SPseudoInstSI<(outs), (ins), [(int_amdgcn_wave_barrier)]> { diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp index 53bc68036b204..a9bd72e634414 100644 --- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp +++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp @@ -293,6 +293,12 @@ bool SIOptimizeExecMasking::removeTerminatorBit(MachineInstr &MI) const { MI.setDesc(TII->get(AMDGPU::S_AND_B32)); return true; } + case AMDGPU::V_CMPX_EQ_U32_nosdst_e32_term: + MI.setDesc(TII->get(AMDGPU::V_CMPX_EQ_U32_nosdst_e32)); + return true; + case AMDGPU::V_CMPX_EQ_U64_nosdst_e32_term: + MI.setDesc(TII->get(AMDGPU::V_CMPX_EQ_U64_nosdst_e32)); + return true; default: return false; } diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll index b5844049fd287..7df50850f6357 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll @@ -372,31 +372,31 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_add_f32 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_add_f32 v5, v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: ; implicit-def: $vgpr4 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB2_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: @@ -431,27 +431,25 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_add_f32 v5, v4, s[4:7], 0 offen offset:1024 glc +; GFX11-NEXT: buffer_atomic_add_f32 v5, v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: ; implicit-def: $vgpr4 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB2_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: buffer_gl1_inv @@ -461,60 +459,60 @@ define float @buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgp ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory__amdgpu_ignore_denormal_mode: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v8, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v8, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: .LBB2_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB2_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_f32_e32 v7, v8, v5 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v8 ; GFX10-NEXT: .LBB2_4: ; Parent Loop BB2_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB2_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB2_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8 ; GFX10-NEXT: v_mov_b32_e32 v8, v6 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB2_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -2310,66 +2308,64 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], null offen offset:2048 +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB10_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 +; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: .LBB10_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB10_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_add_f64_e32 v[11:12], v[13:14], v[5:6] -; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s5, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s6, s5 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX12-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX12-NEXT: .LBB10_4: ; Parent Loop BB10_3 Depth=1 ; GFX12-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], null offen offset:2048 th:TH_ATOMIC_RETURN -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], null offen offset:2048 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-NEXT: s_cbranch_execnz .LBB10_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB10_3 Depth=1 -; GFX12-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX12-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV -; GFX12-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_cbranch_execnz .LBB10_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -2409,65 +2405,61 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], 0 offen offset:2048 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] +; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], 0 offen offset:2048 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB10_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB10_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB10_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_add_f64 v[11:12], v[13:14], v[5:6] -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX11-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX11-NEXT: .LBB10_4: ; Parent Loop BB10_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], 0 offen offset:2048 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], 0 offen offset:2048 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB10_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB10_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB10_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -2477,63 +2469,63 @@ define double @buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdg ; GFX10-NEXT: v_mov_b32_e32 v7, v2 ; GFX10-NEXT: v_mov_b32_e32 v10, v1 ; GFX10-NEXT: v_mov_b32_e32 v9, v0 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB10_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v9 -; GFX10-NEXT: v_readfirstlane_b32 s9, v10 -; GFX10-NEXT: v_readfirstlane_b32 s10, v7 -; GFX10-NEXT: v_readfirstlane_b32 s11, v8 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[9:10] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[7:8] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dwordx2 v[13:14], v4, s[8:11], 0 offen offset:2048 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v9 +; GFX10-NEXT: v_readfirstlane_b32 s5, v10 +; GFX10-NEXT: v_readfirstlane_b32 s6, v7 +; GFX10-NEXT: v_readfirstlane_b32 s7, v8 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[9:10] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[7:8] +; GFX10-NEXT: buffer_load_dwordx2 v[13:14], v4, s[4:7], 0 offen offset:2048 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB10_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: .LBB10_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB10_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_add_f64 v[11:12], v[13:14], v[5:6] -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_mov_b32_e32 v0, v11 ; GFX10-NEXT: v_mov_b32_e32 v1, v12 ; GFX10-NEXT: v_mov_b32_e32 v2, v13 ; GFX10-NEXT: v_mov_b32_e32 v3, v14 ; GFX10-NEXT: .LBB10_4: ; Parent Loop BB10_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v9 -; GFX10-NEXT: v_readfirstlane_b32 s9, v10 -; GFX10-NEXT: v_readfirstlane_b32 s10, v7 -; GFX10-NEXT: v_readfirstlane_b32 s11, v8 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[9:10] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[7:8] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v9 +; GFX10-NEXT: v_readfirstlane_b32 s5, v10 +; GFX10-NEXT: v_readfirstlane_b32 s6, v7 +; GFX10-NEXT: v_readfirstlane_b32 s7, v8 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[9:10] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[7:8] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v4, s[8:11], 0 offen offset:2048 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap_x2 v[0:3], v4, s[4:7], 0 offen offset:2048 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB10_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB10_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX10-NEXT: v_mov_b32_e32 v14, v1 ; GFX10-NEXT: v_mov_b32_e32 v13, v0 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB10_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX90A-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -4193,7 +4185,9 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, -4, v6 @@ -4202,67 +4196,63 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v11, v7 ; GFX12-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v10, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v10, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v7 ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v6.h, 0 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l -; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6 ; GFX12-TRUE16-NEXT: v_and_or_b32 v6, v7, v11, v6 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX12-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v8, v7 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v8 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4274,7 +4264,9 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 3, v6 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v10, -4, v6 @@ -4283,68 +4275,63 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v11, v7 ; GFX12-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v10, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v10, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v6, v4, v7 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_add_f16_e32 v6, v6, v5 -; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v6 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_and_or_b32 v6, v7, v11, v6 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX12-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v8, v7 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v7, v8 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4420,8 +4407,9 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v4, 3, v6 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, -4, v6 @@ -4430,21 +4418,18 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v11, v7 ; GFX11-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v10, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v10, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: .p2align 6 ; GFX11-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX11-TRUE16-NEXT: ; =>This Loop Header: Depth=1 @@ -4452,43 +4437,41 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v6, v4, v7 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v6.h, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: v_add_f16_e32 v6.l, v6.l, v5.l -; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v6, v4, v6 ; GFX11-TRUE16-NEXT: v_and_or_b32 v6, v7, v11, v6 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX11-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v8, v7 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v8 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4496,8 +4479,9 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v4, 3, v6 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v10, -4, v6 @@ -4506,29 +4490,27 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v11, v7 ; GFX11-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v10, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v10, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: .p2align 6 ; GFX11-FAKE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX11-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, v4, v7 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_add_f16_e32 v6, v6, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff, v6 @@ -4539,33 +4521,30 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v9, v7 :: v_dual_mov_b32 v8, v6 ; GFX11-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[8:9], v10, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v8, v7 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v8 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -4573,35 +4552,36 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v6, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v4, 3, v6 ; GFX10-NEXT: v_and_b32_e32 v10, -4, v6 ; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v4 ; GFX10-NEXT: v_lshlrev_b32_e64 v7, v4, 0xffff ; GFX10-NEXT: v_not_b32_e32 v11, v7 ; GFX10-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v10, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v10, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v6, v4, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_add_f16_e32 v6, v6, v5 ; GFX10-NEXT: v_lshlrev_b32_sdwa v6, v4, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX10-NEXT: v_and_or_b32 v6, v7, v11, v6 @@ -4609,32 +4589,31 @@ define half @buffer_fat_ptr_agent_atomic_fadd_ret_f16__offset__waterfall__amdgpu ; GFX10-NEXT: v_mov_b32_e32 v8, v6 ; GFX10-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[8:9], v10, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v8, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v8 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB15_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v4, v8 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6015,7 +5994,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -6024,80 +6005,75 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-TRUE16-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB18_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_add_f32_e32 v4, v4, v10 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-TRUE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-TRUE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.h -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v7, v5 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-TRUE16-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB18_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB18_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6109,7 +6085,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -6118,79 +6096,74 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-FAKE16-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB18_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_add_f32_e32 v4, v4, v10 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-FAKE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-FAKE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo -; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-FAKE16-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB18_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB18_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6273,8 +6246,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -6283,21 +6257,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-TRUE16-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB18_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-TRUE16-NEXT: .p2align 6 @@ -6306,8 +6277,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-TRUE16-NEXT: v_add_f32_e32 v4, v4, v10 @@ -6328,34 +6300,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-TRUE16-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB18_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB18_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6363,8 +6332,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -6373,21 +6343,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-FAKE16-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB18_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-FAKE16-NEXT: .p2align 6 @@ -6396,8 +6363,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-FAKE16-NEXT: v_add_f32_e32 v4, v4, v10 @@ -6417,34 +6385,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-FAKE16-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB18_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB18_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -6452,36 +6417,37 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v6, v8, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_add_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 @@ -6494,32 +6460,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fadd_ret_bf16__offset__waterfall__amd ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB18_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -7346,31 +7311,31 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_pk_add_f16 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_pk_add_f16 v5, v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: ; implicit-def: $vgpr4 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB21_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -7405,124 +7370,120 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v8, v4, s[4:7], 0 offen offset:1024 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-NEXT: buffer_load_b32 v8, v4, s[0:3], 0 offen offset:1024 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB21_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_add_f16 v7, v8, v5 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-NEXT: v_mov_b32_e32 v7, v8 ; GFX11-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[6:7], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB21_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8 ; GFX11-NEXT: v_mov_b32_e32 v8, v6 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB21_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: v_mov_b32_e32 v0, v6 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v8, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v8, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_pk_add_f16 v7, v8, v5 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v8 ; GFX10-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[6:7], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v6, v8 ; GFX10-NEXT: v_mov_b32_e32 v8, v6 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB21_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -9669,31 +9630,31 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_pk_add_bf16 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_pk_add_bf16 v5, v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: ; implicit-def: $vgpr4 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB28_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -9776,24 +9737,22 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX11-TRUE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB28_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -9804,8 +9763,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_add_f32 v6, v6, v8 :: v_dual_add_f32 v5, v5, v9 ; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -9826,58 +9786,53 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-TRUE16-NEXT: .LBB28_4: ; Parent Loop BB28_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB28_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB28_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB28_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB28_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -9888,8 +9843,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_dual_add_f32 v6, v6, v9 :: v_dual_add_f32 v5, v5, v8 ; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -9910,57 +9866,54 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-FAKE16-NEXT: .LBB28_4: ; Parent Loop BB28_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB28_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB28_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB28_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB28_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB28_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX10-NEXT: .LBB28_3: ; %atomicrmw.start @@ -9969,8 +9922,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_add_f32_e32 v5, v5, v8 ; GFX10-NEXT: v_add_f32_e32 v6, v6, v9 ; GFX10-NEXT: v_bfe_u32 v10, v5, 16, 1 @@ -9988,32 +9942,31 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: .LBB28_4: ; Parent Loop BB28_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB28_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB28_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v5 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB28_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll index 3d85bf7019426..08e1a59245fd6 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmax.ll @@ -363,31 +363,31 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_max_num_f32 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_max_num_f32 v5, v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: ; implicit-def: $vgpr4 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB2_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -453,27 +453,25 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_max_f32 v5, v4, s[4:7], 0 offen offset:1024 glc +; GFX11-NEXT: buffer_atomic_max_f32 v5, v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: ; implicit-def: $vgpr4 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB2_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: buffer_gl1_inv @@ -483,26 +481,26 @@ define float @buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgp ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmax v5, v4, s[8:11], 0 offen offset:1024 glc +; GFX10-NEXT: buffer_atomic_fmax v5, v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: buffer_gl1_inv @@ -1540,68 +1538,66 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], null offen offset:2048 +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB7_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: v_max_num_f64_e32 v[5:6], v[5:6], v[5:6] -; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: .LBB7_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB7_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[13:14], v[13:14] -; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s5, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s6, s5 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_max_num_f64_e32 v[11:12], v[0:1], v[5:6] ; GFX12-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX12-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX12-NEXT: .LBB7_4: ; Parent Loop BB7_3 Depth=1 ; GFX12-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], null offen offset:2048 th:TH_ATOMIC_RETURN -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], null offen offset:2048 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-NEXT: s_cbranch_execnz .LBB7_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB7_3 Depth=1 -; GFX12-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX12-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV -; GFX12-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_cbranch_execnz .LBB7_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -1641,25 +1637,23 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], 0 offen offset:2048 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] +; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], 0 offen offset:2048 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB7_3: ; %atomicrmw.start @@ -1667,66 +1661,64 @@ define double @buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdg ; GFX11-NEXT: ; Child Loop BB7_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_f64 v[0:1], v[13:14], v[13:14] -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_max_f64 v[11:12], v[0:1], v[5:6] ; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX11-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX11-NEXT: .LBB7_4: ; Parent Loop BB7_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], 0 offen offset:2048 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], 0 offen offset:2048 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB7_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB7_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB7_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmax_x2 v[5:6], v4, s[8:11], 0 offen offset:2048 glc +; GFX10-NEXT: buffer_atomic_fmax_x2 v[5:6], v4, s[4:7], 0 offen offset:2048 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB7_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v1, v6 @@ -3312,7 +3304,9 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, -4, v4 @@ -3321,70 +3315,65 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v9, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v11, v6 ; GFX12-TRUE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v10, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v10, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.l, v5.l -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v5, v9, v6 ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v5.l, v5.l -; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v4.h, v4.l ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v5.l, v4.h, v4.l ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, v9, v5 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v11, v5 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_dual_mov_b32 v8, v6 :: v_dual_mov_b32 v7, v5 ; GFX12-TRUE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v7, v6 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3396,7 +3385,9 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -3405,71 +3396,67 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-FAKE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v10, v5, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 -; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v10 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v10 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-FAKE16-NEXT: v_and_or_b32 v5, v6, v9, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-FAKE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3547,8 +3534,9 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, -4, v4 @@ -3557,21 +3545,18 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v9, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v11, v6 ; GFX11-TRUE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v10, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v10, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_max_f16_e32 v4.l, v5.l, v5.l ; GFX11-TRUE16-NEXT: .p2align 6 ; GFX11-TRUE16-NEXT: .LBB12_3: ; %atomicrmw.start @@ -3580,45 +3565,42 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, v9, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: v_max_f16_e32 v4.h, v5.l, v5.l -; GFX11-TRUE16-NEXT: v_max_f16_e32 v5.l, v4.h, v4.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_max_f16_e32 v5.l, v4.h, v4.l ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, v9, v5 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_and_or_b32 v5, v6, v11, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v6 :: v_dual_mov_b32 v7, v5 ; GFX11-TRUE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v7, v6 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3626,8 +3608,9 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -3636,21 +3619,18 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-FAKE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX11-FAKE16-NEXT: .p2align 6 ; GFX11-FAKE16-NEXT: .LBB12_3: ; %atomicrmw.start @@ -3658,8 +3638,9 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_max_f16_e32 v4, v4, v4 ; GFX11-FAKE16-NEXT: v_max_f16_e32 v4, v4, v10 @@ -3672,33 +3653,30 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-FAKE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3706,36 +3684,37 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v6, v8, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB12_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX10-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_max_f16_e32 v4, v4, v4 ; GFX10-NEXT: v_max_f16_e32 v4, v4, v10 ; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -3744,32 +3723,31 @@ define half @buffer_fat_ptr_agent_atomic_fmax_ret_f16__offset__waterfall__amdgpu ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB12_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB12_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5160,7 +5138,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5169,80 +5149,75 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_max_num_f32_e32 v4, v4, v10 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-TRUE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-TRUE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.h -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v7, v5 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5254,7 +5229,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5263,79 +5240,74 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_max_num_f32_e32 v4, v4, v10 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-FAKE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-FAKE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo -; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5418,8 +5390,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5428,21 +5401,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-TRUE16-NEXT: .p2align 6 @@ -5451,8 +5421,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-TRUE16-NEXT: v_max_f32_e32 v4, v4, v10 @@ -5473,34 +5444,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5508,8 +5476,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5518,21 +5487,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-FAKE16-NEXT: .p2align 6 @@ -5541,8 +5507,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-FAKE16-NEXT: v_max_f32_e32 v4, v4, v10 @@ -5562,34 +5529,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5597,36 +5561,37 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v6, v8, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_max_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 @@ -5639,32 +5604,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmax_ret_bf16__offset__waterfall__amd ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB15_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6642,67 +6606,65 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB18_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: v_pk_max_num_f16 v8, v5, v5 -; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_pk_max_num_f16 v5, v7, v7 -; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s5, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s6, s5 ; GFX12-NEXT: v_pk_max_num_f16 v6, v5, v8 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX12-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-NEXT: s_cbranch_execnz .LBB18_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX12-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV -; GFX12-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_cbranch_execnz .LBB18_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -6769,24 +6731,22 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB18_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: v_pk_max_f16 v8, v5, v5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB18_3: ; %atomicrmw.start @@ -6794,103 +6754,101 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall ; GFX11-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_max_f16 v5, v7, v7 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_pk_max_f16 v6, v5, v8 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB18_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB18_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_pk_max_f16 v8, v5, v5 ; GFX10-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_pk_max_f16 v5, v7, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_pk_max_f16 v6, v5, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v5 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB18_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8266,40 +8224,40 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v5 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_dual_max_num_f32 v6, v6, v8 :: v_dual_max_num_f32 v5, v5, v9 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_bfe_u32 v11, v6, 16, 1 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX12-TRUE16-NEXT: v_or_b32_e32 v12, 0x400000, v5 ; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 @@ -8318,33 +8276,30 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-TRUE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -8355,40 +8310,40 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_dual_max_num_f32 v6, v6, v9 :: v_dual_max_num_f32 v5, v5, v8 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_bfe_u32 v11, v6, 16, 1 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX12-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v5 ; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 @@ -8407,33 +8362,30 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-FAKE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -8517,24 +8469,22 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX11-TRUE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8545,8 +8495,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_max_f32 v6, v6, v8 :: v_dual_max_f32 v5, v5, v9 ; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -8567,58 +8518,53 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-TRUE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8629,8 +8575,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_dual_max_f32 v6, v6, v9 :: v_dual_max_f32 v5, v5, v8 ; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -8651,57 +8598,54 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-FAKE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX10-NEXT: .LBB21_3: ; %atomicrmw.start @@ -8710,8 +8654,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_max_f32_e32 v5, v5, v8 ; GFX10-NEXT: v_max_f32_e32 v6, v6, v9 ; GFX10-NEXT: v_bfe_u32 v10, v5, 16, 1 @@ -8729,32 +8674,31 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmax_ret_v2bf16__offset__waterf ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v5 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB21_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll index 3f15d35320573..be032802e75d1 100644 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll +++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fmin.ll @@ -363,31 +363,31 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_min_num_f32 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: buffer_atomic_min_num_f32 v5, v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-NEXT: ; implicit-def: $vgpr4 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-NEXT: s_cbranch_execnz .LBB2_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -453,27 +453,25 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_min_f32 v5, v4, s[4:7], 0 offen offset:1024 glc +; GFX11-NEXT: buffer_atomic_min_f32 v5, v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-NEXT: ; implicit-def: $vgpr4 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB2_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: buffer_gl1_inv @@ -483,26 +481,26 @@ define float @buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgp ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f32__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmin v5, v4, s[8:11], 0 offen offset:1024 glc +; GFX10-NEXT: buffer_atomic_fmin v5, v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB2_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: buffer_gl1_inv @@ -1540,68 +1538,66 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg ; GFX12-NEXT: s_wait_kmcnt 0x0 ; GFX12-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX12-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_3) +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], null offen offset:2048 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], null offen offset:2048 +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB7_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: v_max_num_f64_e32 v[5:6], v[5:6], v[5:6] -; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: .LBB7_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB7_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_max_num_f64_e32 v[0:1], v[13:14], v[13:14] -; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s5, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s6, s5 ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-NEXT: v_min_num_f64_e32 v[11:12], v[0:1], v[5:6] ; GFX12-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX12-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX12-NEXT: .LBB7_4: ; Parent Loop BB7_3 Depth=1 ; GFX12-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-NEXT: v_readfirstlane_b32 s4, v9 -; GFX12-NEXT: v_readfirstlane_b32 s5, v10 -; GFX12-NEXT: v_readfirstlane_b32 s6, v7 -; GFX12-NEXT: v_readfirstlane_b32 s7, v8 +; GFX12-NEXT: v_readfirstlane_b32 s0, v9 +; GFX12-NEXT: v_readfirstlane_b32 s1, v10 +; GFX12-NEXT: v_readfirstlane_b32 s2, v7 +; GFX12-NEXT: v_readfirstlane_b32 s3, v8 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], null offen offset:2048 th:TH_ATOMIC_RETURN -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], null offen offset:2048 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-NEXT: s_cbranch_execnz .LBB7_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB7_3 Depth=1 -; GFX12-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX12-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX12-NEXT: global_inv scope:SCOPE_DEV -; GFX12-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_cbranch_execnz .LBB7_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; ; GFX942-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: @@ -1641,25 +1637,23 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v7, v2 ; GFX11-NEXT: v_dual_mov_b32 v10, v1 :: v_dual_mov_b32 v9, v0 -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[4:7], 0 offen offset:2048 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] +; GFX11-NEXT: buffer_load_b64 v[13:14], v4, s[0:3], 0 offen offset:2048 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB7_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: v_max_f64 v[5:6], v[5:6], v[5:6] ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB7_3: ; %atomicrmw.start @@ -1667,66 +1661,64 @@ define double @buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdg ; GFX11-NEXT: ; Child Loop BB7_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_max_f64 v[0:1], v[13:14], v[13:14] -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_min_f64 v[11:12], v[0:1], v[5:6] ; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 ; GFX11-NEXT: v_dual_mov_b32 v2, v13 :: v_dual_mov_b32 v3, v14 ; GFX11-NEXT: .LBB7_4: ; Parent Loop BB7_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v9 -; GFX11-NEXT: v_readfirstlane_b32 s5, v10 -; GFX11-NEXT: v_readfirstlane_b32 s6, v7 -; GFX11-NEXT: v_readfirstlane_b32 s7, v8 +; GFX11-NEXT: v_readfirstlane_b32 s0, v9 +; GFX11-NEXT: v_readfirstlane_b32 s1, v10 +; GFX11-NEXT: v_readfirstlane_b32 s2, v7 +; GFX11-NEXT: v_readfirstlane_b32 s3, v8 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[9:10] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[7:8] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[9:10] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[7:8] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[4:7], 0 offen offset:2048 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b64 v[0:3], v4, s[0:3], 0 offen offset:2048 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB7_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB7_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, v[0:1], v[13:14] ; GFX11-NEXT: v_dual_mov_b32 v14, v1 :: v_dual_mov_b32 v13, v0 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB7_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_f64__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmin_x2 v[5:6], v4, s[8:11], 0 offen offset:2048 glc +; GFX10-NEXT: buffer_atomic_fmin_x2 v[5:6], v4, s[4:7], 0 offen offset:2048 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB7_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: v_mov_b32_e32 v1, v6 @@ -3312,7 +3304,9 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v10, -4, v4 @@ -3321,70 +3315,65 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v9, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v11, v6 ; GFX12-TRUE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v10, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v10, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v4.l, v5.l, v5.l -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v5, v9, v6 ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_max_num_f16_e32 v4.h, v5.l, v5.l -; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v5.l, v4.h, v4.l ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: v_min_num_f16_e32 v5.l, v4.h, v4.l ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, v9, v5 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v11, v5 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_dual_mov_b32 v8, v6 :: v_dual_mov_b32 v7, v5 ; GFX12-TRUE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v7, v6 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3396,7 +3385,9 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -3405,71 +3396,67 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-FAKE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v10, v5, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_max_num_f16_e32 v4, v4, v4 -; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v4, v4, v10 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_min_num_f16_e32 v4, v4, v10 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v4, 0xffff, v4 -; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 ; GFX12-FAKE16-NEXT: v_and_or_b32 v5, v6, v9, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-FAKE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3547,8 +3534,9 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v10, -4, v4 @@ -3557,21 +3545,18 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v9, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v11, v6 ; GFX11-TRUE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v10, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v10, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_max_f16_e32 v4.l, v5.l, v5.l ; GFX11-TRUE16-NEXT: .p2align 6 ; GFX11-TRUE16-NEXT: .LBB12_3: ; %atomicrmw.start @@ -3580,45 +3565,42 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v5, v9, v6 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: v_max_f16_e32 v4.h, v5.l, v5.l -; GFX11-TRUE16-NEXT: v_min_f16_e32 v5.l, v4.h, v4.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-TRUE16-NEXT: v_min_f16_e32 v5.l, v4.h, v4.l ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, v9, v5 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_and_or_b32 v5, v6, v11, v5 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_mov_b32 v8, v6 :: v_dual_mov_b32 v7, v5 ; GFX11-TRUE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[7:8], v10, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v7, v6 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v9, v7 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3626,8 +3608,9 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -3636,21 +3619,18 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-FAKE16-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX11-FAKE16-NEXT: .p2align 6 ; GFX11-FAKE16-NEXT: .LBB12_3: ; %atomicrmw.start @@ -3658,8 +3638,9 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_max_f16_e32 v4, v4, v4 ; GFX11-FAKE16-NEXT: v_min_f16_e32 v4, v4, v10 @@ -3672,33 +3653,30 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-FAKE16-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB12_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -3706,36 +3684,37 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB12_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v6, v8, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB12_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_max_f16_e32 v10, v5, v5 ; GFX10-NEXT: .LBB12_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB12_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_max_f16_e32 v4, v4, v4 ; GFX10-NEXT: v_min_f16_e32 v4, v4, v10 ; GFX10-NEXT: v_lshlrev_b32_sdwa v4, v7, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -3744,32 +3723,31 @@ define half @buffer_fat_ptr_agent_atomic_fmin_ret_f16__offset__waterfall__amdgpu ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB12_4: ; Parent Loop BB12_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB12_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB12_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB12_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -5160,7 +5138,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5169,80 +5149,75 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_min_num_f32_e32 v4, v4, v10 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-TRUE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-TRUE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.h, 0 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v5.l, v4.h -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v4, v7, v5 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5254,7 +5229,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 ; GFX12-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5263,79 +5240,74 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX12-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX12-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], null offen -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], null offen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_min_num_f32_e32 v4, v4, v10 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3) ; GFX12-FAKE16-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX12-FAKE16-NEXT: v_or_b32_e32 v11, 0x400000, v4 ; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v4, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_add3_u32 v5, v5, v4, 0x7fff ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_vcc(0) -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_cndmask_b32_e32 v4, v5, v11, vcc_lo -; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v4, v7, v4 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_and_or_b32 v5, v6, v9, v4 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v4, v5 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], null offen th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], null offen th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5418,8 +5390,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-TRUE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5428,21 +5401,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-TRUE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-TRUE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-TRUE16-NEXT: .p2align 6 @@ -5451,8 +5421,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-TRUE16-NEXT: v_min_f32_e32 v4, v4, v10 @@ -5473,34 +5444,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-TRUE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5508,8 +5476,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX11-FAKE16-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v8, -4, v4 @@ -5518,21 +5487,18 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX11-FAKE16-NEXT: v_not_b32_e32 v9, v6 ; GFX11-FAKE16-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[4:7], 0 offen -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v6, v8, s[0:3], 0 offen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 ; GFX11-FAKE16-NEXT: .p2align 6 @@ -5541,8 +5507,9 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, v7, v6 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v4, 16, v4 ; GFX11-FAKE16-NEXT: v_min_f32_e32 v4, v4, v10 @@ -5562,34 +5529,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-FAKE16-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[4:7], 0 offen glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[4:5], v8, s[0:3], 0 offen glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v4 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB15_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -5597,36 +5561,37 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_add_nc_u32_e32 v4, 0x200, v4 -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_and_b32_e32 v6, 3, v4 ; GFX10-NEXT: v_and_b32_e32 v8, -4, v4 ; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v6 ; GFX10-NEXT: v_lshlrev_b32_e64 v6, v7, 0xffff ; GFX10-NEXT: v_not_b32_e32 v9, v6 ; GFX10-NEXT: .LBB15_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v6, v8, s[8:11], 0 offen -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v6, v8, s[4:7], 0 offen +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v10, 16, v5 ; GFX10-NEXT: .LBB15_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB15_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_sdwa v4, v7, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_min_f32_e32 v4, v4, v10 ; GFX10-NEXT: v_bfe_u32 v5, v4, 16, 1 ; GFX10-NEXT: v_or_b32_e32 v11, 0x400000, v4 @@ -5639,32 +5604,31 @@ define bfloat @buffer_fat_ptr_agent_atomic_fmin_ret_bf16__offset__waterfall__amd ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: .LBB15_4: ; Parent Loop BB15_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[8:11], 0 offen glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[4:5], v8, s[4:7], 0 offen glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB15_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB15_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v4, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v4 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB15_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_lshrrev_b32_e32 v0, v7, v4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -6642,67 +6606,65 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall ; GFX12-NEXT: s_wait_samplecnt 0x0 ; GFX12-NEXT: s_wait_bvhcnt 0x0 ; GFX12-NEXT: s_wait_kmcnt 0x0 -; GFX12-NEXT: s_mov_b32 s1, exec_lo +; GFX12-NEXT: s_mov_b32 s4, exec_lo +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s5, s4 ; GFX12-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-NEXT: s_cbranch_execnz .LBB18_1 ; GFX12-NEXT: ; %bb.2: -; GFX12-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-NEXT: v_pk_max_num_f16 v8, v5, v5 -; GFX12-NEXT: s_mov_b32 s1, 0 +; GFX12-NEXT: s_mov_b32 s4, 0 ; GFX12-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX12-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_pk_max_num_f16 v5, v7, v7 -; GFX12-NEXT: s_mov_b32 s2, exec_lo +; GFX12-NEXT: s_mov_b32 s5, exec_lo ; GFX12-NEXT: s_wait_storecnt 0x0 -; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-NEXT: s_mov_b32 s6, s5 ; GFX12-NEXT: v_pk_min_num_f16 v6, v5, v8 +; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX12-NEXT: v_mov_b32_e32 v5, v6 ; GFX12-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX12-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-NEXT: s_wait_loadcnt 0x0 -; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-NEXT: s_cbranch_execnz .LBB18_4 ; GFX12-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX12-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-NEXT: s_wait_loadcnt 0x0 ; GFX12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-NEXT: global_inv scope:SCOPE_DEV -; GFX12-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: s_cbranch_execnz .LBB18_3 ; GFX12-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-NEXT: s_setpc_b64 s[30:31] ; @@ -6769,24 +6731,22 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall ; GFX11-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s1, 0 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo +; GFX11-NEXT: s_mov_b32 s4, 0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB18_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: v_pk_max_f16 v8, v5, v5 ; GFX11-NEXT: .p2align 6 ; GFX11-NEXT: .LBB18_3: ; %atomicrmw.start @@ -6794,103 +6754,101 @@ define <2 x half> @buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall ; GFX11-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_max_f16 v5, v7, v7 -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s5, exec_lo ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: s_mov_b32 s6, s5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-NEXT: v_pk_min_f16 v6, v5, v8 ; GFX11-NEXT: v_mov_b32_e32 v5, v6 ; GFX11-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-NEXT: s_cbranch_execnz .LBB18_4 ; GFX11-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-NEXT: buffer_gl1_inv ; GFX11-NEXT: buffer_gl0_inv -; GFX11-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: s_cbranch_execnz .LBB18_3 ; GFX11-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2f16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB18_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_pk_max_f16 v8, v5, v5 ; GFX10-NEXT: .LBB18_3: ; %atomicrmw.start ; GFX10-NEXT: ; =>This Loop Header: Depth=1 ; GFX10-NEXT: ; Child Loop BB18_4 Depth 2 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_pk_max_f16 v5, v7, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_pk_min_f16 v6, v5, v8 ; GFX10-NEXT: v_mov_b32_e32 v5, v6 ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: .LBB18_4: ; Parent Loop BB18_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB18_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB18_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v5 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB18_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; @@ -8266,40 +8224,40 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX12-TRUE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-TRUE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v5 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5 -; GFX12-TRUE16-NEXT: s_mov_b32 s1, 0 +; GFX12-TRUE16-NEXT: s_mov_b32 s4, 0 ; GFX12-TRUE16-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX12-TRUE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-TRUE16-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX12-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX12-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-TRUE16-NEXT: s_wait_storecnt 0x0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX12-TRUE16-NEXT: v_dual_min_num_f32 v6, v6, v8 :: v_dual_min_num_f32 v5, v5, v9 +; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_bfe_u32 v11, v6, 16, 1 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-TRUE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX12-TRUE16-NEXT: v_or_b32_e32 v12, 0x400000, v5 ; GFX12-TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 @@ -8318,33 +8276,30 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-TRUE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX12-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX12-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-TRUE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX12-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-TRUE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-TRUE16-NEXT: s_setpc_b64 s[30:31] ; @@ -8355,40 +8310,40 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX12-FAKE16-NEXT: s_wait_samplecnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_bvhcnt 0x0 ; GFX12-FAKE16-NEXT: s_wait_kmcnt 0x0 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v4, s[4:7], null offen offset:1024 -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_load_b32 v7, v4, s[0:3], null offen offset:1024 +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 -; GFX12-FAKE16-NEXT: s_mov_b32 s1, 0 +; GFX12-FAKE16-NEXT: s_mov_b32 s4, 0 ; GFX12-FAKE16-NEXT: .LBB21_3: ; %atomicrmw.start ; GFX12-FAKE16-NEXT: ; =>This Loop Header: Depth=1 ; GFX12-FAKE16-NEXT: ; Child Loop BB21_4 Depth 2 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX12-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX12-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX12-FAKE16-NEXT: s_wait_storecnt 0x0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX12-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX12-FAKE16-NEXT: v_dual_min_num_f32 v6, v6, v9 :: v_dual_min_num_f32 v5, v5, v8 +; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_bfe_u32 v11, v6, 16, 1 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) ; GFX12-FAKE16-NEXT: v_bfe_u32 v10, v5, 16, 1 ; GFX12-FAKE16-NEXT: v_or_b32_e32 v12, 0x400000, v5 ; GFX12-FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v5, v5 @@ -8407,33 +8362,30 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX12-FAKE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX12-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX12-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX12-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX12-FAKE16-NEXT: global_inv scope:SCOPE_DEV -; GFX12-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX12-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX12-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX12-FAKE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX12-FAKE16-NEXT: s_setpc_b64 s[30:31] ; @@ -8517,24 +8469,22 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX11-TRUE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_mov_b32 s1, 0 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, 0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: v_and_b32_e32 v8, 0xffff0000, v5 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v9, 16, v5 ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8545,8 +8495,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-TRUE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-TRUE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-TRUE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-TRUE16-NEXT: s_mov_b32 s6, s5 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_dual_min_f32 v6, v6, v8 :: v_dual_min_f32 v5, v5, v9 ; GFX11-TRUE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -8567,58 +8518,53 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-TRUE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX11-TRUE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-TRUE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX11-TRUE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-TRUE16-NEXT: buffer_gl1_inv ; GFX11-TRUE16-NEXT: buffer_gl0_inv -; GFX11-TRUE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX11-TRUE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-TRUE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-TRUE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX11-FAKE16: ; %bb.0: ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_mov_b32 s1, 0 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, 0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[4:7], 0 offen offset:1024 -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_b32 v7, v4, s[0:3], 0 offen offset:1024 +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX11-FAKE16-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x1 @@ -8629,8 +8575,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 ; GFX11-FAKE16-NEXT: v_lshlrev_b32_e32 v5, 16, v7 -; GFX11-FAKE16-NEXT: s_mov_b32 s2, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s5, exec_lo ; GFX11-FAKE16-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-FAKE16-NEXT: s_mov_b32 s6, s5 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-FAKE16-NEXT: v_dual_min_f32 v6, v6, v9 :: v_dual_min_f32 v5, v5, v8 ; GFX11-FAKE16-NEXT: v_bfe_u32 v11, v6, 16, 1 @@ -8651,57 +8598,54 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v6, v7 ; GFX11-FAKE16-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX11-FAKE16-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[4:7], 0 offen offset:1024 glc -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX11-FAKE16-NEXT: buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], 0 offen offset:1024 glc +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_4 ; GFX11-FAKE16-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s5 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v7, v5 ; GFX11-FAKE16-NEXT: buffer_gl1_inv ; GFX11-FAKE16-NEXT: buffer_gl0_inv -; GFX11-FAKE16-NEXT: s_or_b32 s1, vcc_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 s4, vcc_lo, s4 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB21_3 ; GFX11-FAKE16-NEXT: ; %bb.6: ; %atomicrmw.end ; GFX11-FAKE16-NEXT: s_set_inst_prefetch_distance 0x2 -; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX11-FAKE16-NEXT: s_or_b32 exec_lo, exec_lo, s4 ; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, v5 ; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterfall__amdgpu_no_fine_grained_memory: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, 0 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo +; GFX10-NEXT: s_mov_b32 s8, 0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: .LBB21_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 -; GFX10-NEXT: buffer_load_dword v7, v4, s[8:11], 0 offen offset:1024 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX10-NEXT: buffer_load_dword v7, v4, s[4:7], 0 offen offset:1024 +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: v_lshlrev_b32_e32 v8, 16, v5 ; GFX10-NEXT: v_and_b32_e32 v9, 0xffff0000, v5 ; GFX10-NEXT: .LBB21_3: ; %atomicrmw.start @@ -8710,8 +8654,9 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshlrev_b32_e32 v5, 16, v7 ; GFX10-NEXT: v_and_b32_e32 v6, 0xffff0000, v7 -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s9, exec_lo ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-NEXT: s_mov_b32 s10, s9 ; GFX10-NEXT: v_min_f32_e32 v5, v5, v8 ; GFX10-NEXT: v_min_f32_e32 v6, v6, v9 ; GFX10-NEXT: v_bfe_u32 v10, v5, 16, 1 @@ -8729,32 +8674,31 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fmin_ret_v2bf16__offset__waterf ; GFX10-NEXT: v_mov_b32_e32 v6, v7 ; GFX10-NEXT: .LBB21_4: ; Parent Loop BB21_3 Depth=1 ; GFX10-NEXT: ; => This Inner Loop Header: Depth=2 -; GFX10-NEXT: v_readfirstlane_b32 s8, v0 -; GFX10-NEXT: v_readfirstlane_b32 s9, v1 -; GFX10-NEXT: v_readfirstlane_b32 s10, v2 -; GFX10-NEXT: v_readfirstlane_b32 s11, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-NEXT: v_readfirstlane_b32 s5, v1 +; GFX10-NEXT: v_readfirstlane_b32 s6, v2 +; GFX10-NEXT: v_readfirstlane_b32 s7, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[8:11], 0 offen offset:1024 glc -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 +; GFX10-NEXT: buffer_atomic_cmpswap v[5:6], v4, s[4:7], 0 offen offset:1024 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX10-NEXT: s_cbranch_execnz .LBB21_4 ; GFX10-NEXT: ; %bb.5: ; in Loop: Header=BB21_3 Depth=1 -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s9 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v5, v7 ; GFX10-NEXT: v_mov_b32_e32 v7, v5 ; GFX10-NEXT: buffer_gl1_inv ; GFX10-NEXT: buffer_gl0_inv -; GFX10-NEXT: s_or_b32 s5, vcc_lo, s5 +; GFX10-NEXT: s_or_b32 s8, vcc_lo, s8 ; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: s_cbranch_execnz .LBB21_3 ; GFX10-NEXT: ; %bb.6: ; %atomicrmw.end -; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX10-NEXT: v_mov_b32_e32 v0, v5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll index 1caa1442fd2fd..4408eb79ae671 100644 --- a/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll +++ b/llvm/test/CodeGen/AMDGPU/issue92561-restore-undef-scc-verifier-error.ll @@ -15,50 +15,46 @@ define void @issue92561(ptr addrspace(1) %arg) { ; SDAG-NEXT: global_load_b128 v[4:7], v[0:1], off offset:16 ; SDAG-NEXT: global_load_b128 v[0:3], v[0:1], off ; SDAG-NEXT: v_mov_b32_e32 v8, 0 -; SDAG-NEXT: s_mov_b32 s12, 0 -; SDAG-NEXT: s_mov_b32 s3, exec_lo +; SDAG-NEXT: s_mov_b32 s8, 0 +; SDAG-NEXT: s_mov_b32 s12, exec_lo +; SDAG-NEXT: s_mov_b32 s9, s8 +; SDAG-NEXT: s_mov_b32 s10, s8 +; SDAG-NEXT: s_mov_b32 s11, s8 ; SDAG-NEXT: s_mov_b32 s13, s12 -; SDAG-NEXT: s_mov_b32 s14, s12 -; SDAG-NEXT: s_mov_b32 s15, s12 ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; SDAG-NEXT: v_readfirstlane_b32 s4, v0 -; SDAG-NEXT: v_readfirstlane_b32 s5, v1 -; SDAG-NEXT: v_readfirstlane_b32 s6, v2 -; SDAG-NEXT: v_readfirstlane_b32 s7, v3 -; SDAG-NEXT: v_readfirstlane_b32 s8, v4 -; SDAG-NEXT: v_readfirstlane_b32 s9, v5 -; SDAG-NEXT: v_readfirstlane_b32 s10, v6 -; SDAG-NEXT: v_readfirstlane_b32 s11, v7 -; SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; SDAG-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; SDAG-NEXT: v_cmp_eq_u64_e64 s1, s[8:9], v[4:5] -; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; SDAG-NEXT: v_cmp_eq_u64_e64 s2, s[10:11], v[6:7] -; SDAG-NEXT: s_and_b32 s0, vcc_lo, s0 -; SDAG-NEXT: s_and_b32 s0, s0, s1 -; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; SDAG-NEXT: s_and_b32 s0, s0, s2 -; SDAG-NEXT: s_and_saveexec_b32 s0, s0 -; SDAG-NEXT: image_sample_c_lz v9, [v8, v8, v8, v8], s[4:11], s[12:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SDAG-NEXT: v_readfirstlane_b32 s2, v2 +; SDAG-NEXT: v_readfirstlane_b32 s3, v3 +; SDAG-NEXT: v_readfirstlane_b32 s4, v4 +; SDAG-NEXT: v_readfirstlane_b32 s5, v5 +; SDAG-NEXT: v_readfirstlane_b32 s6, v6 +; SDAG-NEXT: v_readfirstlane_b32 s7, v7 +; SDAG-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; SDAG-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; SDAG-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; SDAG-NEXT: s_delay_alu instid0(VALU_DEP_4) +; SDAG-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; SDAG-NEXT: image_sample_c_lz v9, [v8, v8, v8, v8], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; SDAG-NEXT: s_and_not1_wrexec_b32 s13, s13 ; SDAG-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 -; SDAG-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; SDAG-NEXT: s_cbranch_execnz .LBB0_1 ; SDAG-NEXT: ; %bb.2: -; SDAG-NEXT: s_mov_b32 exec_lo, s3 +; SDAG-NEXT: s_mov_b32 exec_lo, s12 ; SDAG-NEXT: v_dual_mov_b32 v0, 0x7fc00000 :: v_dual_mov_b32 v1, 1.0 -; SDAG-NEXT: s_mov_b32 s0, s12 -; SDAG-NEXT: s_mov_b32 s1, s12 -; SDAG-NEXT: s_mov_b32 s2, s12 -; SDAG-NEXT: s_mov_b32 s3, s12 -; SDAG-NEXT: s_mov_b32 s4, s12 -; SDAG-NEXT: s_mov_b32 s5, s12 -; SDAG-NEXT: s_mov_b32 s6, s12 -; SDAG-NEXT: s_mov_b32 s7, s12 +; SDAG-NEXT: s_mov_b32 s0, s8 +; SDAG-NEXT: s_mov_b32 s1, s8 +; SDAG-NEXT: s_mov_b32 s2, s8 +; SDAG-NEXT: s_mov_b32 s3, s8 +; SDAG-NEXT: s_mov_b32 s4, s8 +; SDAG-NEXT: s_mov_b32 s5, s8 +; SDAG-NEXT: s_mov_b32 s6, s8 +; SDAG-NEXT: s_mov_b32 s7, s8 ; SDAG-NEXT: s_clause 0x2 -; SDAG-NEXT: image_sample_c_lz v0, [v8, v8, v0, v8], s[0:7], s[12:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY -; SDAG-NEXT: image_sample_c_lz v2, [v8, v8, v8, v8], s[0:7], s[12:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY -; SDAG-NEXT: image_sample_c_lz v1, [v8, v1, v8, v8], s[0:7], s[12:15] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; SDAG-NEXT: image_sample_c_lz v0, [v8, v8, v0, v8], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; SDAG-NEXT: image_sample_c_lz v2, [v8, v8, v8, v8], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY +; SDAG-NEXT: image_sample_c_lz v1, [v8, v1, v8, v8], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY ; SDAG-NEXT: s_waitcnt vmcnt(2) ; SDAG-NEXT: v_dual_add_f32 v0, v9, v0 :: v_dual_mov_b32 v9, v8 ; SDAG-NEXT: s_waitcnt vmcnt(0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll index cc77019631759..373648ace8621 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.bvh8_intersect_ray.ll @@ -152,29 +152,28 @@ define amdgpu_ps <10 x float> @image_bvh8_intersect_ray_vvvvvv(i64 %node_ptr, fl ; GFX12-SDAG-NEXT: v_dual_mov_b32 v22, v4 :: v_dual_mov_b32 v25, v1 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v26, v2 :: v_dual_mov_b32 v27, 0 ; GFX12-SDAG-NEXT: v_mov_b32_e32 v24, v0 -; GFX12-SDAG-NEXT: s_mov_b32 s1, exec_lo +; GFX12-SDAG-NEXT: s_mov_b32 s4, exec_lo +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_mov_b32 s5, s4 ; GFX12-SDAG-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s4, v10 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s5, v11 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s6, v12 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s7, v13 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v10 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v11 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s2, v12 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s3, v13 ; GFX12-SDAG-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[10:11] -; GFX12-SDAG-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[12:13] -; GFX12-SDAG-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-SDAG-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-SDAG-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[10:11] +; GFX12-SDAG-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[12:13] ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 -; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[24:25], v[26:27], v[21:23], v[18:20], v28], s[4:7] +; GFX12-SDAG-NEXT: image_bvh8_intersect_ray v[0:9], [v[24:25], v[26:27], v[21:23], v[18:20], v28], s[0:3] +; GFX12-SDAG-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr10_vgpr11_vgpr12_vgpr13 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr24_vgpr25 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr26_vgpr27 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr28 -; GFX12-SDAG-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB3_1 ; GFX12-SDAG-NEXT: ; %bb.2: -; GFX12-SDAG-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-SDAG-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: global_store_b96 v[14:15], v[21:23], off ; GFX12-SDAG-NEXT: global_store_b96 v[16:17], v[18:20], off diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll index dec4f9bcabfbd..195e72ecaf336 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dual_intersect_ray.ll @@ -157,29 +157,28 @@ define amdgpu_ps <10 x float> @image_bvh_dual_intersect_ray_vvvvvv(i64 %node_ptr ; GFX12-SDAG-NEXT: v_dual_mov_b32 v23, v4 :: v_dual_mov_b32 v22, v3 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v29, v2 :: v_dual_mov_b32 v28, v1 ; GFX12-SDAG-NEXT: v_dual_mov_b32 v27, v0 :: v_dual_mov_b32 v30, 0 -; GFX12-SDAG-NEXT: s_mov_b32 s1, exec_lo +; GFX12-SDAG-NEXT: s_mov_b32 s4, exec_lo +; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-SDAG-NEXT: s_mov_b32 s5, s4 ; GFX12-SDAG-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s4, v11 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s5, v12 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s6, v13 -; GFX12-SDAG-NEXT: v_readfirstlane_b32 s7, v14 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s0, v11 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s1, v12 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s2, v13 +; GFX12-SDAG-NEXT: v_readfirstlane_b32 s3, v14 ; GFX12-SDAG-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-SDAG-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[11:12] -; GFX12-SDAG-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[13:14] -; GFX12-SDAG-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-SDAG-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-SDAG-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[11:12] +; GFX12-SDAG-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[13:14] ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 -; GFX12-SDAG-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[27:28], v[29:30], v[22:24], v[19:21], v[25:26]], s[4:7] +; GFX12-SDAG-NEXT: image_bvh_dual_intersect_ray v[0:9], [v[27:28], v[29:30], v[22:24], v[19:21], v[25:26]], s[0:3] +; GFX12-SDAG-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr11_vgpr12_vgpr13_vgpr14 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr27_vgpr28 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr29_vgpr30 ; GFX12-SDAG-NEXT: ; implicit-def: $vgpr25_vgpr26 -; GFX12-SDAG-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-SDAG-NEXT: s_cbranch_execnz .LBB3_1 ; GFX12-SDAG-NEXT: ; %bb.2: -; GFX12-SDAG-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-SDAG-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0 ; GFX12-SDAG-NEXT: global_store_b96 v[15:16], v[22:24], off ; GFX12-SDAG-NEXT: global_store_b96 v[17:18], v[19:21], off diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll index 874b336a929a5..cc3bd9706887e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.atomic.fadd.v2bf16.ll @@ -104,33 +104,31 @@ define <2 x bfloat> @raw_ptr_buffer_atomic_add_v2bf16_rtn__vgpr_val__vgpr_rsrc__ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v6 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v6 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v6 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v6 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v5, s[4:7], s3 offen offset:128 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v5, s[0:3], s6 offen offset:128 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr6 ; GFX1200-NEXT: ; implicit-def: $vgpr5 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: raw_ptr_buffer_atomic_add_v2bf16_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset_add__vgpr_soffset: @@ -140,33 +138,30 @@ define <2 x bfloat> @raw_ptr_buffer_atomic_add_v2bf16_rtn__vgpr_val__vgpr_rsrc__ ; GFX1250-NEXT: v_dual_mov_b32 v11, v4 :: v_dual_mov_b32 v10, v3 ; GFX1250-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v8, v1 ; GFX1250-NEXT: v_add_nc_u32_e32 v1, 0x80, v5 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo -; GFX1250-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v8 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v9 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v10 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v11 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v6 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[8:9] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[10:11] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v6 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 +; GFX1250-NEXT: s_mov_b32 s4, exec_lo ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1250-NEXT: s_mov_b32 s5, s4 +; GFX1250-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1250-NEXT: v_readfirstlane_b32 s0, v8 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v9 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v10 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v11 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[8:9] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[10:11] +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v6 ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[4:7], s3 offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: buffer_atomic_pk_add_bf16 v0, v1, s[0:3], s6 offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11 ; GFX1250-NEXT: ; implicit-def: $vgpr6 ; GFX1250-NEXT: ; implicit-def: $vgpr1 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %voffset.add = add i32 %voffset, 128 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll index 24fcb933bf801..e035bfbf18fb5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.load.format.v3f16.ll @@ -12,24 +12,24 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) { ; GFX10-LABEL: main: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_mov_b32 s1, exec_lo +; GFX10-NEXT: s_mov_b32 s4, exec_lo +; GFX10-NEXT: s_mov_b32 s5, s4 ; GFX10-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s4, v0 -; GFX10-NEXT: v_readfirstlane_b32 s5, v1 -; GFX10-NEXT: v_readfirstlane_b32 s6, v2 -; GFX10-NEXT: v_readfirstlane_b32 s7, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX10-NEXT: s_and_saveexec_b32 s0, s0 -; GFX10-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX10-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX10-NEXT: s_andn2_wrexec_b32 s5, s5 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v6 @@ -92,25 +92,24 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) { ; ; GFX11-TRUE16-LABEL: main: ; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX11-TRUE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v5.h @@ -119,25 +118,24 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) { ; ; GFX11-FAKE16-LABEL: main: ; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX11-FAKE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6 @@ -145,27 +143,26 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) { ; ; GFX12-TRUE16-LABEL: main: ; GFX12-TRUE16: ; %bb.0: ; %bb -; GFX12-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX12-TRUE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-TRUE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], null idxen +; GFX12-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], null idxen +; GFX12-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-TRUE16-NEXT: ; implicit-def: $vgpr4 -; GFX12-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-TRUE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX12-TRUE16-NEXT: ; %bb.2: -; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 ; GFX12-TRUE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-TRUE16-NEXT: v_mov_b16_e32 v0.l, v5.h @@ -174,27 +171,26 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) { ; ; GFX12-FAKE16-LABEL: main: ; GFX12-FAKE16: ; %bb.0: ; %bb -; GFX12-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX12-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX12-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX12-FAKE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX12-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX12-FAKE16-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX12-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX12-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX12-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX12-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX12-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX12-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 -; GFX12-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], null idxen +; GFX12-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], null idxen +; GFX12-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX12-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX12-FAKE16-NEXT: ; implicit-def: $vgpr4 -; GFX12-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX12-FAKE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX12-FAKE16-NEXT: ; %bb.2: -; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX12-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX12-FAKE16-NEXT: s_wait_loadcnt 0x0 ; GFX12-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX12-FAKE16-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll index 6921cca5a2394..7599cde4eb406 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16.ll @@ -40,33 +40,31 @@ define <2 x bfloat> @struct_ptr_buffer_atomic_add_v2bf16_rtn__vgpr_val__vgpr_rsr ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v[5:6], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v[5:6], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x bfloat> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret <2 x bfloat> %ret @@ -80,32 +78,30 @@ define void @struct_ptr_buffer_atomic_add_v2bf16_noret__vgpr_val__vgpr_rsrc__vgp ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v[5:6], s[4:7], s3 idxen offen +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 +; GFX1200-NEXT: buffer_atomic_pk_add_bf16 v0, v[5:6], s[0:3], s6 idxen offen +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr0 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB3_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] %ret = call <2 x bfloat> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2bf16(<2 x bfloat> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll index d6e140c14d4cc..4723cabc315e5 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_nortn.ll @@ -296,32 +296,30 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1200-NEXT: buffer_atomic_add_f32 v0, v[5:6], s[4:7], s3 idxen offen +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 +; GFX1200-NEXT: buffer_atomic_add_f32 v0, v[5:6], s[0:3], s6 idxen offen +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr0 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: @@ -331,33 +329,31 @@ define void @struct_ptr_buffer_atomic_add_f32_noret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX1250-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v8, v5 ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 ; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s4, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s5, s4 ; GFX1250-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1250-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s3 idxen offen +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 +; GFX1250-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[0:3], s6 idxen offen +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr0 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void @@ -470,32 +466,30 @@ define void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1200-NEXT: buffer_atomic_pk_add_f16 v0, v[5:6], s[4:7], s3 idxen offen +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 +; GFX1200-NEXT: buffer_atomic_pk_add_f16 v0, v[5:6], s[0:3], s6 idxen offen +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr0 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB5_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: @@ -505,33 +499,31 @@ define void @struct_ptr_buffer_atomic_add_v2f16_noret__vgpr_val__vgpr_rsrc__vgpr ; GFX1250-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v8, v5 ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 ; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s4, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s5, s4 ; GFX1250-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1250-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s3 idxen offen +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 +; GFX1250-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[0:3], s6 idxen offen +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr0 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB5_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) ret void diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll index d99f1a4a5b996..ec811326b4cb7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fadd_rtn.ll @@ -245,33 +245,31 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_add_f32 v0, v[5:6], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_add_f32 v0, v[5:6], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: @@ -281,33 +279,31 @@ define float @struct_ptr_buffer_atomic_add_f32_rtn__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX1250-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v8, v5 ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 ; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s4, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s5, s4 ; GFX1250-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: buffer_atomic_add_f32 v0, v[8:9], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB4_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ret = call float @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.f32(float %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) @@ -391,33 +387,31 @@ define <2 x half> @struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_pk_add_f16 v0, v[5:6], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_pk_add_f16 v0, v[5:6], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB5_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__vgpr_voffset__vgpr_soffset: @@ -427,33 +421,31 @@ define <2 x half> @struct_ptr_buffer_atomic_add_v2f16_rtn__vgpr_val__vgpr_rsrc__ ; GFX1250-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v8, v5 ; GFX1250-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3 ; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s4, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s5, s4 ; GFX1250-NEXT: .LBB5_1: ; =>This Inner Loop Header: Depth=1 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: buffer_atomic_pk_add_f16 v0, v[8:9], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB5_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %ret = call <2 x half> @llvm.amdgcn.struct.ptr.buffer.atomic.fadd.v2f16(<2 x half> %val, ptr addrspace(8) %rsrc, i32 %vindex, i32 %voffset, i32 %soffset, i32 0) diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll index 3a6cea74de9e9..b5b3790c90f69 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmax.f32.ll @@ -461,51 +461,50 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX10-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_add__sgpr_soffset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v1 -; GFX10-NEXT: v_readfirstlane_b32 s9, v2 -; GFX10-NEXT: v_readfirstlane_b32 s10, v3 -; GFX10-NEXT: v_readfirstlane_b32 s11, v4 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-NEXT: v_readfirstlane_b32 s5, v2 +; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s7, v4 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmax v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc +; GFX10-NEXT: buffer_atomic_fmax v0, v[5:6], s[4:7], s16 idxen offen offset:256 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_add__sgpr_soffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s2, s1 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v1 ; GFX11-NEXT: v_readfirstlane_b32 s5, v2 ; GFX11-NEXT: v_readfirstlane_b32 s6, v3 ; GFX11-NEXT: v_readfirstlane_b32 s7, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX11-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] -; GFX11-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s1, s1 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_atomic_max_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -516,7 +515,9 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s1, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s2, s1 ; GFX1200-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 @@ -524,20 +525,18 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX1200-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] -; GFX1200-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s1, s1 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX1200-NEXT: s_wait_loadcnt 0x0 ; GFX1200-NEXT: buffer_atomic_max_num_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX1200-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s1 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_add__sgpr_soffset: @@ -547,28 +546,27 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__vgpr_rsrc__vgpr_vo ; GFX1250-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v5, v4 ; GFX1250-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2 ; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_add_nc_u32 v9, 0x100, v6 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s2, s1 ; GFX1250-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 ; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 ; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[4:5] -; GFX1250-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s1, s1 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[4:5] ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: buffer_atomic_max_num_f32 v0, v[8:9], s[4:7], s0 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX1250-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s1 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %voffset.add = add i32 %voffset, 256 @@ -637,60 +635,57 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX10-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_add__vgpr_soffset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v1 -; GFX10-NEXT: v_readfirstlane_b32 s9, v2 -; GFX10-NEXT: v_readfirstlane_b32 s10, v3 -; GFX10-NEXT: v_readfirstlane_b32 s11, v4 -; GFX10-NEXT: v_readfirstlane_b32 s7, v7 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, s7, v7 -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_b32 s4, s4, s5 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-NEXT: v_readfirstlane_b32 s5, v2 +; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s7, v4 +; GFX10-NEXT: v_readfirstlane_b32 s10, v7 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] +; GFX10-NEXT: v_cmpx_eq_u32_e32 s10, v7 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmax v0, v[5:6], s[8:11], s7 idxen offen offset:256 glc +; GFX10-NEXT: buffer_atomic_fmax v0, v[5:6], s[4:7], s10 idxen offen offset:256 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: ; implicit-def: $vgpr7 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_add__vgpr_soffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v1 -; GFX11-NEXT: v_readfirstlane_b32 s5, v2 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 -; GFX11-NEXT: v_readfirstlane_b32 s7, v4 -; GFX11-NEXT: v_readfirstlane_b32 s3, v7 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v2 +; GFX11-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_readfirstlane_b32 s6, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_and_b32 s0, s0, s1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_max_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 glc +; GFX11-NEXT: buffer_atomic_max_f32 v0, v[5:6], s[0:3], s6 idxen offen offset:256 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr7 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -701,33 +696,31 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_max_num_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_max_num_f32 v0, v[5:6], s[0:3], s6 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_add__vgpr_soffset: @@ -737,33 +730,30 @@ define float @struct_ptr_buffer_atomic_add_f32_ret__vgpr_val__sgpr_rsrc__vgpr_vo ; GFX1250-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v5, v4 ; GFX1250-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2 ; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_add_nc_u32 v9, 0x100, v6 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo -; GFX1250-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 +; GFX1250-NEXT: s_mov_b32 s4, exec_lo ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1250-NEXT: s_mov_b32 s5, s4 +; GFX1250-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: buffer_atomic_max_num_f32 v0, v[8:9], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: buffer_atomic_max_num_f32 v0, v[8:9], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %voffset.add = add i32 %voffset, 256 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll index a9f843f4a184a..e19c57c3bd0db 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.atomic.fmin.f32.ll @@ -461,51 +461,50 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s5, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v1 -; GFX10-NEXT: v_readfirstlane_b32 s9, v2 -; GFX10-NEXT: v_readfirstlane_b32 s10, v3 -; GFX10-NEXT: v_readfirstlane_b32 s11, v4 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-NEXT: v_readfirstlane_b32 s5, v2 +; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s7, v4 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s16 idxen offen offset:256 glc +; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[4:7], s16 idxen offen offset:256 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB8_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s5 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s2, s1 ; GFX11-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX11-NEXT: v_readfirstlane_b32 s4, v1 ; GFX11-NEXT: v_readfirstlane_b32 s5, v2 ; GFX11-NEXT: v_readfirstlane_b32 s6, v3 ; GFX11-NEXT: v_readfirstlane_b32 s7, v4 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX11-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] -; GFX11-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s1, s1 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX11-NEXT: s_cbranch_execnz .LBB8_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -516,7 +515,9 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s1, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s2, s1 ; GFX1200-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 ; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 ; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 @@ -524,20 +525,18 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX1200-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[3:4] -; GFX1200-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s1, s1 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] ; GFX1200-NEXT: s_wait_loadcnt 0x0 ; GFX1200-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s0 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX1200-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s1 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_voffset_fmin__sgpr_soffset: @@ -547,28 +546,27 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__vgpr_rsrc__vgpr_v ; GFX1250-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v5, v4 ; GFX1250-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2 ; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_add_nc_u32 v9, 0x100, v6 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo +; GFX1250-NEXT: s_mov_b32 s1, exec_lo +; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1250-NEXT: s_mov_b32 s2, s1 ; GFX1250-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) ; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 ; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 ; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 ; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s1, s[6:7], v[4:5] -; GFX1250-NEXT: s_and_b32 s1, vcc_lo, s1 -; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s1, s1 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[4:5] ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: buffer_atomic_min_num_f32 v0, v[8:9], s[4:7], s0 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s2, s2 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s1 ; GFX1250-NEXT: s_cbranch_execnz .LBB8_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s1 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %voffset.add = add i32 %voffset, 256 @@ -637,60 +635,57 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_v ; GFX10-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_mov_b32 s6, exec_lo +; GFX10-NEXT: s_mov_b32 s8, exec_lo +; GFX10-NEXT: s_mov_b32 s9, s8 ; GFX10-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s8, v1 -; GFX10-NEXT: v_readfirstlane_b32 s9, v2 -; GFX10-NEXT: v_readfirstlane_b32 s10, v3 -; GFX10-NEXT: v_readfirstlane_b32 s11, v4 -; GFX10-NEXT: v_readfirstlane_b32 s7, v7 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[1:2] -; GFX10-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[3:4] -; GFX10-NEXT: v_cmp_eq_u32_e64 s5, s7, v7 -; GFX10-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX10-NEXT: s_and_b32 s4, s4, s5 -; GFX10-NEXT: s_and_saveexec_b32 s4, s4 +; GFX10-NEXT: v_readfirstlane_b32 s4, v1 +; GFX10-NEXT: v_readfirstlane_b32 s5, v2 +; GFX10-NEXT: v_readfirstlane_b32 s6, v3 +; GFX10-NEXT: v_readfirstlane_b32 s7, v4 +; GFX10-NEXT: v_readfirstlane_b32 s10, v7 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[1:2] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[3:4] +; GFX10-NEXT: v_cmpx_eq_u32_e32 s10, v7 ; GFX10-NEXT: s_waitcnt vmcnt(0) -; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[8:11], s7 idxen offen offset:256 glc +; GFX10-NEXT: buffer_atomic_fmin v0, v[5:6], s[4:7], s10 idxen offen offset:256 glc +; GFX10-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX10-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX10-NEXT: ; implicit-def: $vgpr7 ; GFX10-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX10-NEXT: s_cbranch_execnz .LBB9_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s6 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s8 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s2, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-NEXT: v_readfirstlane_b32 s4, v1 -; GFX11-NEXT: v_readfirstlane_b32 s5, v2 -; GFX11-NEXT: v_readfirstlane_b32 s6, v3 -; GFX11-NEXT: v_readfirstlane_b32 s7, v4 -; GFX11-NEXT: v_readfirstlane_b32 s3, v7 +; GFX11-NEXT: v_readfirstlane_b32 s0, v1 +; GFX11-NEXT: v_readfirstlane_b32 s1, v2 +; GFX11-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-NEXT: v_readfirstlane_b32 s3, v4 +; GFX11-NEXT: v_readfirstlane_b32 s6, v7 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX11-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_and_b32 s0, s0, s1 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 glc +; GFX11-NEXT: buffer_atomic_min_f32 v0, v[5:6], s[0:3], s6 idxen offen offset:256 glc +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX11-NEXT: ; implicit-def: $vgpr7 ; GFX11-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB9_1 ; GFX11-NEXT: ; %bb.2: -; GFX11-NEXT: s_mov_b32 exec_lo, s2 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] ; @@ -701,33 +696,31 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_v ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; GFX1200-NEXT: s_wait_kmcnt 0x0 -; GFX1200-NEXT: s_mov_b32 s2, exec_lo +; GFX1200-NEXT: s_mov_b32 s4, exec_lo +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) +; GFX1200-NEXT: s_mov_b32 s5, s4 ; GFX1200-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1200-NEXT: v_readfirstlane_b32 s4, v1 -; GFX1200-NEXT: v_readfirstlane_b32 s5, v2 -; GFX1200-NEXT: v_readfirstlane_b32 s6, v3 -; GFX1200-NEXT: v_readfirstlane_b32 s7, v4 -; GFX1200-NEXT: v_readfirstlane_b32 s3, v7 +; GFX1200-NEXT: v_readfirstlane_b32 s0, v1 +; GFX1200-NEXT: v_readfirstlane_b32 s1, v2 +; GFX1200-NEXT: v_readfirstlane_b32 s2, v3 +; GFX1200-NEXT: v_readfirstlane_b32 s3, v4 +; GFX1200-NEXT: v_readfirstlane_b32 s6, v7 ; GFX1200-NEXT: s_wait_alu depctr_va_sdst(0) -; GFX1200-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[1:2] +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[1:2] ; GFX1200-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1200-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[3:4] -; GFX1200-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1200-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_b32 s0, s0, s1 -; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) -; GFX1200-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1200-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[3:4] +; GFX1200-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1200-NEXT: s_wait_loadcnt 0x0 -; GFX1200-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[4:7], s3 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: buffer_atomic_min_num_f32 v0, v[5:6], s[0:3], s6 idxen offen offset:256 th:TH_ATOMIC_RETURN +; GFX1200-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1200-NEXT: ; implicit-def: $vgpr1_vgpr2_vgpr3_vgpr4 ; GFX1200-NEXT: ; implicit-def: $vgpr7 ; GFX1200-NEXT: ; implicit-def: $vgpr5_vgpr6 -; GFX1200-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1200-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1200-NEXT: ; %bb.2: -; GFX1200-NEXT: s_mov_b32 exec_lo, s2 +; GFX1200-NEXT: s_mov_b32 exec_lo, s4 ; GFX1200-NEXT: s_wait_loadcnt 0x0 +; GFX1200-NEXT: s_wait_alu depctr_sa_sdst(0) ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; ; GFX1250-LABEL: struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_voffset_fmin__vgpr_soffset: @@ -737,33 +730,30 @@ define float @struct_ptr_buffer_atomic_fmin_f32_ret__vgpr_val__sgpr_rsrc__vgpr_v ; GFX1250-NEXT: v_dual_mov_b32 v8, v5 :: v_dual_mov_b32 v5, v4 ; GFX1250-NEXT: v_dual_mov_b32 v4, v3 :: v_dual_mov_b32 v3, v2 ; GFX1250-NEXT: v_dual_mov_b32 v2, v1 :: v_dual_add_nc_u32 v9, 0x100, v6 -; GFX1250-NEXT: s_mov_b32 s2, exec_lo -; GFX1250-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_readfirstlane_b32 s4, v2 -; GFX1250-NEXT: v_readfirstlane_b32 s5, v3 -; GFX1250-NEXT: v_readfirstlane_b32 s6, v4 -; GFX1250-NEXT: v_readfirstlane_b32 s7, v5 -; GFX1250-NEXT: v_readfirstlane_b32 s3, v7 -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX1250-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX1250-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) -; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, s3, v7 -; GFX1250-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1250-NEXT: s_and_b32 s0, s0, s1 +; GFX1250-NEXT: s_mov_b32 s4, exec_lo ; GFX1250-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1250-NEXT: s_and_saveexec_b32 s0, s0 +; GFX1250-NEXT: s_mov_b32 s5, s4 +; GFX1250-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1 +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_4) | instid1(VALU_DEP_4) +; GFX1250-NEXT: v_readfirstlane_b32 s0, v2 +; GFX1250-NEXT: v_readfirstlane_b32 s1, v3 +; GFX1250-NEXT: v_readfirstlane_b32 s2, v4 +; GFX1250-NEXT: v_readfirstlane_b32 s3, v5 +; GFX1250-NEXT: v_readfirstlane_b32 s6, v7 +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX1250-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX1250-NEXT: v_cmpx_eq_u32_e32 s6, v7 ; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: buffer_atomic_min_num_f32 v0, v[8:9], s[4:7], s3 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: buffer_atomic_min_num_f32 v0, v[8:9], s[0:3], s6 idxen offen th:TH_ATOMIC_RETURN +; GFX1250-NEXT: s_wait_xcnt 0x0 +; GFX1250-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1250-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 ; GFX1250-NEXT: ; implicit-def: $vgpr7 ; GFX1250-NEXT: ; implicit-def: $vgpr8_vgpr9 -; GFX1250-NEXT: s_wait_xcnt 0x0 -; GFX1250-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1250-NEXT: s_cbranch_execnz .LBB9_1 ; GFX1250-NEXT: ; %bb.2: -; GFX1250-NEXT: s_mov_b32 exec_lo, s2 +; GFX1250-NEXT: s_mov_b32 exec_lo, s4 ; GFX1250-NEXT: s_wait_loadcnt 0x0 ; GFX1250-NEXT: s_set_pc_i64 s[30:31] %voffset.add = add i32 %voffset, 256 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll index f64f8a81a256f..dc4797ce8bc45 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.buffer.load.format.v3f16.ll @@ -9,24 +9,24 @@ define amdgpu_gs void @main(ptr addrspace(8) %arg, i32 %arg1) { ; GFX10-LABEL: main: ; GFX10: ; %bb.0: ; %bb -; GFX10-NEXT: s_mov_b32 s1, exec_lo +; GFX10-NEXT: s_mov_b32 s4, exec_lo +; GFX10-NEXT: s_mov_b32 s5, s4 ; GFX10-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX10-NEXT: v_readfirstlane_b32 s4, v0 -; GFX10-NEXT: v_readfirstlane_b32 s5, v1 -; GFX10-NEXT: v_readfirstlane_b32 s6, v2 -; GFX10-NEXT: v_readfirstlane_b32 s7, v3 -; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX10-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX10-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX10-NEXT: s_and_saveexec_b32 s0, s0 -; GFX10-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX10-NEXT: v_readfirstlane_b32 s0, v0 +; GFX10-NEXT: v_readfirstlane_b32 s1, v1 +; GFX10-NEXT: v_readfirstlane_b32 s2, v2 +; GFX10-NEXT: v_readfirstlane_b32 s3, v3 +; GFX10-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX10-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX10-NEXT: buffer_load_format_d16_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX10-NEXT: s_andn2_wrexec_b32 s5, s5 ; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX10-NEXT: ; implicit-def: $vgpr4 -; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX10-NEXT: s_cbranch_execnz .LBB0_1 ; GFX10-NEXT: ; %bb.2: -; GFX10-NEXT: s_mov_b32 exec_lo, s1 +; GFX10-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX10-NEXT: s_mov_b32 exec_lo, s4 ; GFX10-NEXT: s_waitcnt vmcnt(0) ; GFX10-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v6 @@ -89,25 +89,24 @@ define amdgpu_gs void @main(ptr addrspace(8) %arg, i32 %arg1) { ; ; GFX11-TRUE16-LABEL: main: ; GFX11-TRUE16: ; %bb.0: ; %bb -; GFX11-TRUE16-NEXT: s_mov_b32 s1, exec_lo +; GFX11-TRUE16-NEXT: s_mov_b32 s4, exec_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-TRUE16-NEXT: s_mov_b32 s5, s4 ; GFX11-TRUE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-TRUE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-TRUE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-TRUE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-TRUE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-TRUE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-TRUE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX11-TRUE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-TRUE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-TRUE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-TRUE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX11-TRUE16-NEXT: ; %bb.2: -; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-TRUE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.h, 0 ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-TRUE16-NEXT: v_mov_b16_e32 v0.l, v5.h @@ -116,25 +115,24 @@ define amdgpu_gs void @main(ptr addrspace(8) %arg, i32 %arg1) { ; ; GFX11-FAKE16-LABEL: main: ; GFX11-FAKE16: ; %bb.0: ; %bb -; GFX11-FAKE16-NEXT: s_mov_b32 s1, exec_lo +; GFX11-FAKE16-NEXT: s_mov_b32 s4, exec_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-FAKE16-NEXT: s_mov_b32 s5, s4 ; GFX11-FAKE16-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s5, v1 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s6, v2 -; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s7, v3 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s1, v1 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s2, v2 +; GFX11-FAKE16-NEXT: v_readfirstlane_b32 s3, v3 ; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX11-FAKE16-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX11-FAKE16-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-FAKE16-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[4:7], 0 idxen +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX11-FAKE16-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX11-FAKE16-NEXT: buffer_load_d16_format_xyz v[5:6], v4, s[0:3], 0 idxen +; GFX11-FAKE16-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX11-FAKE16-NEXT: ; implicit-def: $vgpr4 -; GFX11-FAKE16-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-FAKE16-NEXT: s_cbranch_execnz .LBB0_1 ; GFX11-FAKE16-NEXT: ; %bb.2: -; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-FAKE16-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) ; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v5 ; GFX11-FAKE16-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_and_b32 v1, 0xffff, v6 diff --git a/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll b/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll index 10d363648e3ae..e29dab04ed0b1 100644 --- a/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll +++ b/llvm/test/CodeGen/AMDGPU/move-to-valu-vimage-vsample.ll @@ -19,37 +19,35 @@ define amdgpu_ps float @vimage_move_to_valu(<8 x i32> %rsrc) { ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY4]], %subreg.sub3, [[COPY3]], %subreg.sub4, [[COPY2]], %subreg.sub5, [[COPY1]], %subreg.sub6, [[COPY]], %subreg.sub7 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.1: ; GFX11-NEXT: successors: %bb.2(0x80000000) ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %14, %bb.2 ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX11-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub4, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub5, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_4]], %subreg.sub0, [[V_READFIRSTLANE_B32_5]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U64_e64_2]], implicit-def $scc ; GFX11-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub6, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub7, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_6]], %subreg.sub0, [[V_READFIRSTLANE_B32_7]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_1]], [[V_CMP_EQ_U64_e64_3]], implicit-def $scc ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3, [[V_READFIRSTLANE_B32_4]], %subreg.sub4, [[V_READFIRSTLANE_B32_5]], %subreg.sub5, [[V_READFIRSTLANE_B32_6]], %subreg.sub6, [[V_READFIRSTLANE_B32_7]], %subreg.sub7 - ; GFX11-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit-def $exec, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.2: ; GFX11-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[IMAGE_LOAD_V1_V2_nsa_gfx11_:%[0-9]+]]:vgpr_32 = IMAGE_LOAD_V1_V2_nsa_gfx11 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE5]], 1, 1, -1, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.3: @@ -73,37 +71,35 @@ define amdgpu_ps float @vimage_move_to_valu(<8 x i32> %rsrc) { ; GFX12-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_256 = REG_SEQUENCE [[COPY7]], %subreg.sub0, [[COPY6]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[COPY4]], %subreg.sub3, [[COPY3]], %subreg.sub4, [[COPY2]], %subreg.sub5, [[COPY1]], %subreg.sub6, [[COPY]], %subreg.sub7 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.1: ; GFX12-NEXT: successors: %bb.2(0x80000000) ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %14, %bb.2 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub4, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub5, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_4]], %subreg.sub0, [[V_READFIRSTLANE_B32_5]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U64_e64_2]], implicit-def $scc ; GFX12-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub6, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub7, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_6]], %subreg.sub0, [[V_READFIRSTLANE_B32_7]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_1]], [[V_CMP_EQ_U64_e64_3]], implicit-def $scc ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3, [[V_READFIRSTLANE_B32_4]], %subreg.sub4, [[V_READFIRSTLANE_B32_5]], %subreg.sub5, [[V_READFIRSTLANE_B32_6]], %subreg.sub6, [[V_READFIRSTLANE_B32_7]], %subreg.sub7 - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit-def $exec, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[IMAGE_LOAD_V1_V2_gfx12_:%[0-9]+]]:vgpr_32 = IMAGE_LOAD_V1_V2_gfx12 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE5]], 1, 1, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX12-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: @@ -137,37 +133,35 @@ define amdgpu_ps float @vsample_move_to_valu_rsrc(<8 x i32> %rsrc, <4 x i32> inr ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.1: ; GFX11-NEXT: successors: %bb.2(0x80000000) ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %19, %bb.2 ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX11-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub4, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub5, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_4]], %subreg.sub0, [[V_READFIRSTLANE_B32_5]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub4_sub5, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U64_e64_2]], implicit-def $scc ; GFX11-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub6, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub7, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_6]], %subreg.sub0, [[V_READFIRSTLANE_B32_7]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE5]], [[REG_SEQUENCE]].sub6_sub7, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_1]], [[V_CMP_EQ_U64_e64_3]], implicit-def $scc ; GFX11-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3, [[V_READFIRSTLANE_B32_4]], %subreg.sub4, [[V_READFIRSTLANE_B32_5]], %subreg.sub5, [[V_READFIRSTLANE_B32_6]], %subreg.sub6, [[V_READFIRSTLANE_B32_7]], %subreg.sub7 - ; GFX11-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub4_sub5, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE5]], [[REG_SEQUENCE]].sub6_sub7, implicit-def $exec, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.2: ; GFX11-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[IMAGE_SAMPLE_V1_V1_gfx11_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx11 [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE6]], [[REG_SEQUENCE1]], 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.3: @@ -196,37 +190,35 @@ define amdgpu_ps float @vsample_move_to_valu_rsrc(<8 x i32> %rsrc, <4 x i32> inr ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.1: ; GFX12-NEXT: successors: %bb.2(0x80000000) ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %19, %bb.2 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX12-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub4, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub5, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_4]], %subreg.sub0, [[V_READFIRSTLANE_B32_5]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub4_sub5, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_]], [[V_CMP_EQ_U64_e64_2]], implicit-def $scc ; GFX12-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub6, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub7, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_6]], %subreg.sub0, [[V_READFIRSTLANE_B32_7]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE5]], [[REG_SEQUENCE]].sub6_sub7, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[S_AND_B32_1]], [[V_CMP_EQ_U64_e64_3]], implicit-def $scc ; GFX12-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_256 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3, [[V_READFIRSTLANE_B32_4]], %subreg.sub4, [[V_READFIRSTLANE_B32_5]], %subreg.sub5, [[V_READFIRSTLANE_B32_6]], %subreg.sub6, [[V_READFIRSTLANE_B32_7]], %subreg.sub7 - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_2]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub4_sub5, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE5]], [[REG_SEQUENCE]].sub6_sub7, implicit-def $exec, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[IMAGE_SAMPLE_V1_V1_gfx12_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx12 [[V_MOV_B32_e32_]], killed [[REG_SEQUENCE6]], [[REG_SEQUENCE1]], 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX12-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: @@ -260,27 +252,27 @@ define amdgpu_ps float @vsample_move_to_valu_samp(<8 x i32> inreg %rsrc, <4 x i3 ; GFX11-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX11-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX11-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.1: ; GFX11-NEXT: successors: %bb.2(0x80000000) ; GFX11-NEXT: {{ $}} + ; GFX11-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %19, %bb.2 ; GFX11-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub0, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub1, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]].sub0_sub1, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub2, implicit $exec ; GFX11-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub3, implicit $exec ; GFX11-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX11-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]].sub2_sub3, implicit $exec - ; GFX11-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX11-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX11-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE1]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX11-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE1]].sub2_sub3, implicit-def $exec, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.2: ; GFX11-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: [[IMAGE_SAMPLE_V1_V1_gfx11_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx11 [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], killed [[REG_SEQUENCE4]], 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX11-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX11-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX11-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX11-NEXT: {{ $}} ; GFX11-NEXT: bb.3: @@ -309,27 +301,27 @@ define amdgpu_ps float @vsample_move_to_valu_samp(<8 x i32> inreg %rsrc, <4 x i3 ; GFX12-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec ; GFX12-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; GFX12-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.1: ; GFX12-NEXT: successors: %bb.2(0x80000000) ; GFX12-NEXT: {{ $}} + ; GFX12-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %19, %bb.2 ; GFX12-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub0, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub1, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]].sub0_sub1, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub2, implicit $exec ; GFX12-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE1]].sub3, implicit $exec ; GFX12-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; GFX12-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[REG_SEQUENCE1]].sub2_sub3, implicit $exec - ; GFX12-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; GFX12-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; GFX12-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE1]].sub0_sub1, implicit-def $exec, implicit $exec + ; GFX12-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], [[REG_SEQUENCE1]].sub2_sub3, implicit-def $exec, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.2: ; GFX12-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: [[IMAGE_SAMPLE_V1_V1_gfx12_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V1_gfx12 [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], killed [[REG_SEQUENCE4]], 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; GFX12-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; GFX12-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; GFX12-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; GFX12-NEXT: {{ $}} ; GFX12-NEXT: bb.3: diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll index c01ab74c5a909..04d763f0ea6ea 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands-non-ptr-intrinsics.ll @@ -38,24 +38,24 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; GFX1010_W32-LABEL: mubuf_vgpr: ; GFX1010_W32: ; %bb.0: ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v5, v4, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) ; GFX1010_W32-NEXT: v_mov_b32_e32 v0, v5 ; GFX1010_W32-NEXT: s_setpc_b64 s[30:31] @@ -63,24 +63,24 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; GFX1010_W64-LABEL: mubuf_vgpr: ; GFX1010_W64: ; %bb.0: ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v5, v4, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) ; GFX1010_W64-NEXT: v_mov_b32_e32 v0, v5 ; GFX1010_W64-NEXT: s_setpc_b64 s[30:31] @@ -88,25 +88,24 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; GFX1100_W32-LABEL: mubuf_vgpr: ; GFX1100_W32: ; %bb.0: ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v5, v4, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) ; GFX1100_W32-NEXT: v_mov_b32_e32 v0, v5 ; GFX1100_W32-NEXT: s_setpc_b64 s[30:31] @@ -114,25 +113,24 @@ define float @mubuf_vgpr(<4 x i32> %i, i32 %c) #0 { ; GFX1100_W64-LABEL: mubuf_vgpr: ; GFX1100_W64: ; %bb.0: ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v5, v4, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) ; GFX1100_W64-NEXT: v_mov_b32_e32 v0, v5 ; GFX1100_W64-NEXT: s_setpc_b64 s[30:31] @@ -293,41 +291,41 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v13, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[6:7] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v0, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W32-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1010_W32-NEXT: ; %bb.4: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(1) ; GFX1010_W32-NEXT: global_store_dword v[9:10], v13, off ; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 @@ -339,41 +337,41 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v13, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[4:5] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[6:7] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v0, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W64-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1010_W64-NEXT: ; %bb.4: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(1) ; GFX1010_W64-NEXT: global_store_dword v[9:10], v13, off ; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 @@ -385,44 +383,41 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v13, v8, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[6:7] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W32-NEXT: buffer_load_format_x v0, v8, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1100_W32-NEXT: ; %bb.4: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(1) ; GFX1100_W32-NEXT: global_store_b32 v[9:10], v13, off dlc ; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 @@ -434,44 +429,41 @@ define void @mubuf_vgpr_adjacent_in_block(<4 x i32> %i, <4 x i32> %j, i32 %c, pt ; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v13, v8, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[4:5] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[6:7] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W64-NEXT: buffer_load_format_x v0, v8, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1100_W64-NEXT: ; %bb.4: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(1) ; GFX1100_W64-NEXT: global_store_b32 v[9:10], v13, off dlc ; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 @@ -767,53 +759,53 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; GFX1010_W32-NEXT: ;;#ASMSTART ; GFX1010_W32-NEXT: s_mov_b32 s4, 17 ; GFX1010_W32-NEXT: ;;#ASMEND -; GFX1010_W32-NEXT: v_mov_b32_e32 v8, s4 -; GFX1010_W32-NEXT: s_mov_b32 s6, exec_lo +; GFX1010_W32-NEXT: v_mov_b32_e32 v9, s4 +; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s6, s5 ; GFX1010_W32-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s5, vcc_lo, s5 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s5, s5 -; GFX1010_W32-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[8:9], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[10:11], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v8, v9, s[8:11], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s6, s6 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s5 +; GFX1010_W32-NEXT: ; implicit-def: $vgpr9 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s6 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 ; GFX1010_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX1010_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s5, vcc_lo +; GFX1010_W32-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1010_W32-NEXT: s_cbranch_execz .LBB2_6 ; GFX1010_W32-NEXT: ; %bb.3: ; %bb1 ; GFX1010_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX1010_W32-NEXT: s_mov_b32 s6, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s10, s9 ; GFX1010_W32-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[6:7] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v9, v0, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W32-NEXT: buffer_load_format_x v8, v0, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1010_W32-NEXT: ; %bb.5: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s6 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s9 ; GFX1010_W32-NEXT: .LBB2_6: ; %bb2 -; GFX1010_W32-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX1010_W32-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) -; GFX1010_W32-NEXT: global_store_dword v[11:12], v9, off +; GFX1010_W32-NEXT: global_store_dword v[11:12], v8, off ; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_setpc_b64 s[30:31] ; @@ -823,53 +815,53 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; GFX1010_W64-NEXT: ;;#ASMSTART ; GFX1010_W64-NEXT: s_mov_b32 s4, 17 ; GFX1010_W64-NEXT: ;;#ASMEND -; GFX1010_W64-NEXT: v_mov_b32_e32 v8, s4 -; GFX1010_W64-NEXT: s_mov_b64 s[12:13], exec +; GFX1010_W64-NEXT: v_mov_b32_e32 v9, s4 +; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX1010_W64-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[6:7], vcc, s[6:7] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[6:7], s[6:7] -; GFX1010_W64-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[8:9], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[10:11], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v8, v9, s[8:11], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[12:13], s[12:13] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[6:7] +; GFX1010_W64-NEXT: ; implicit-def: $vgpr9 ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[12:13] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] ; GFX1010_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX1010_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1010_W64-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1010_W64-NEXT: s_cbranch_execz .LBB2_6 ; GFX1010_W64-NEXT: ; %bb.3: ; %bb1 ; GFX1010_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX1010_W64-NEXT: s_mov_b64 s[12:13], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], exec +; GFX1010_W64-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX1010_W64-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[4:5] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[6:7] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v9, v0, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W64-NEXT: buffer_load_format_x v8, v0, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[12:13], s[12:13] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1010_W64-NEXT: ; %bb.5: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[12:13] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[10:11] ; GFX1010_W64-NEXT: .LBB2_6: ; %bb2 -; GFX1010_W64-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX1010_W64-NEXT: s_or_b64 exec, exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) -; GFX1010_W64-NEXT: global_store_dword v[11:12], v9, off +; GFX1010_W64-NEXT: global_store_dword v[11:12], v8, off ; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_setpc_b64 s[30:31] ; @@ -879,57 +871,55 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; GFX1100_W32-NEXT: ;;#ASMSTART ; GFX1100_W32-NEXT: s_mov_b32 s4, 17 ; GFX1100_W32-NEXT: ;;#ASMEND -; GFX1100_W32-NEXT: v_mov_b32_e32 v8, s4 -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: v_mov_b32_e32 v9, s4 +; GFX1100_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s6, s5 ; GFX1100_W32-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s11, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[10:11], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v8, v9, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1100_W32-NEXT: ; implicit-def: $vgpr9 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s5 ; GFX1100_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100_W32-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1100_W32-NEXT: s_cbranch_execz .LBB2_6 ; GFX1100_W32-NEXT: ; %bb.3: ; %bb1 ; GFX1100_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX1100_W32-NEXT: s_mov_b32 s2, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s6, s4 ; GFX1100_W32-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[6:7] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v9, v0, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W32-NEXT: buffer_load_format_x v8, v0, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1100_W32-NEXT: ; %bb.5: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s2 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: .LBB2_6: ; %bb2 ; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1100_W32-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) -; GFX1100_W32-NEXT: global_store_b32 v[11:12], v9, off dlc +; GFX1100_W32-NEXT: global_store_b32 v[11:12], v8, off dlc ; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_setpc_b64 s[30:31] ; @@ -939,57 +929,55 @@ define void @mubuf_vgpr_outside_entry(<4 x i32> %i, <4 x i32> %j, i32 %c, ptr ad ; GFX1100_W64-NEXT: ;;#ASMSTART ; GFX1100_W64-NEXT: s_mov_b32 s4, 17 ; GFX1100_W64-NEXT: ;;#ASMEND -; GFX1100_W64-NEXT: v_mov_b32_e32 v8, s4 -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: v_mov_b32_e32 v9, s4 +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1100_W64-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s11, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[10:11], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v8, v9, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[8:9], s[8:9] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1100_W64-NEXT: ; implicit-def: $vgpr9 ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[6:7] ; GFX1100_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100_W64-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1100_W64-NEXT: s_cbranch_execz .LBB2_6 ; GFX1100_W64-NEXT: ; %bb.3: ; %bb1 ; GFX1100_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX1100_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX1100_W64-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[4:5] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[6:7] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v9, v0, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W64-NEXT: buffer_load_format_x v8, v0, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[8:9], s[8:9] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1100_W64-NEXT: ; %bb.5: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[8:9] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: .LBB2_6: ; %bb2 ; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX1100_W64-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) -; GFX1100_W64-NEXT: global_store_b32 v[11:12], v9, off dlc +; GFX1100_W64-NEXT: global_store_b32 v[11:12], v8, off dlc ; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll index b7947de7d5836..7a60a79f90a90 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -37,24 +37,24 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; GFX1010_W32-LABEL: mubuf_vgpr: ; GFX1010_W32: ; %bb.0: ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v5, v4, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) ; GFX1010_W32-NEXT: v_mov_b32_e32 v0, v5 ; GFX1010_W32-NEXT: s_setpc_b64 s[30:31] @@ -62,24 +62,24 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; GFX1010_W64-LABEL: mubuf_vgpr: ; GFX1010_W64: ; %bb.0: ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v5, v4, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) ; GFX1010_W64-NEXT: v_mov_b32_e32 v0, v5 ; GFX1010_W64-NEXT: s_setpc_b64 s[30:31] @@ -87,25 +87,24 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; GFX1100_W32-LABEL: mubuf_vgpr: ; GFX1100_W32: ; %bb.0: ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v5, v4, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) ; GFX1100_W32-NEXT: v_mov_b32_e32 v0, v5 ; GFX1100_W32-NEXT: s_setpc_b64 s[30:31] @@ -113,25 +112,24 @@ define float @mubuf_vgpr(ptr addrspace(8) %i, i32 %c) #0 { ; GFX1100_W64-LABEL: mubuf_vgpr: ; GFX1100_W64: ; %bb.0: ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v5, v4, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v5, v4, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB0_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) ; GFX1100_W64-NEXT: v_mov_b32_e32 v0, v5 ; GFX1100_W64-NEXT: s_setpc_b64 s[30:31] @@ -299,41 +297,41 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; GFX1010_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W32: ; %bb.0: ; %entry ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v13, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 -; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 +; GFX1010_W32-NEXT: s_mov_b32 s8, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, s8 ; GFX1010_W32-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[6:7] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v0, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W32-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s9, s9 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1010_W32-NEXT: ; %bb.4: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(1) ; GFX1010_W32-NEXT: global_store_dword v[9:10], v13, off ; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 @@ -345,41 +343,41 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; GFX1010_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1010_W64: ; %bb.0: ; %entry ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v13, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v0 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v1 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v2 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] -; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] +; GFX1010_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], s[8:9] ; GFX1010_W64-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[4:5] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[6:7] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v0, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W64-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[10:11], s[10:11] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1010_W64-NEXT: ; %bb.4: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(1) ; GFX1010_W64-NEXT: global_store_dword v[9:10], v13, off ; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 @@ -391,44 +389,41 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; GFX1100_W32-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W32: ; %bb.0: ; %entry ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v13, v8, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s5, s4 ; GFX1100_W32-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[6:7] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W32-NEXT: buffer_load_format_x v0, v8, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1100_W32-NEXT: ; %bb.4: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(1) ; GFX1100_W32-NEXT: global_store_b32 v[9:10], v13, off dlc ; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 @@ -440,44 +435,41 @@ define void @mubuf_vgpr_adjacent_in_block(ptr addrspace(8) %i, ptr addrspace(8) ; GFX1100_W64-LABEL: mubuf_vgpr_adjacent_in_block: ; GFX1100_W64: ; %bb.0: ; %entry ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB1_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v13, v8, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v13, v8, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB1_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1100_W64-NEXT: .LBB1_3: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[4:5] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[6:7] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v0, v8, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W64-NEXT: buffer_load_format_x v0, v8, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[6:7], s[6:7] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB1_3 ; GFX1100_W64-NEXT: ; %bb.4: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(1) ; GFX1100_W64-NEXT: global_store_b32 v[9:10], v13, off dlc ; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 @@ -787,53 +779,53 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; GFX1010_W32-NEXT: ;;#ASMSTART ; GFX1010_W32-NEXT: s_mov_b32 s4, 17 ; GFX1010_W32-NEXT: ;;#ASMEND -; GFX1010_W32-NEXT: v_mov_b32_e32 v8, s4 -; GFX1010_W32-NEXT: s_mov_b32 s6, exec_lo +; GFX1010_W32-NEXT: v_mov_b32_e32 v9, s4 +; GFX1010_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s6, s5 ; GFX1010_W32-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v0 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v1 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v2 ; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s5, s[10:11], v[2:3] -; GFX1010_W32-NEXT: s_and_b32 s5, vcc_lo, s5 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s5, s5 -; GFX1010_W32-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[8:9], v[0:1] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[10:11], v[2:3] +; GFX1010_W32-NEXT: buffer_load_format_x v8, v9, s[8:11], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s6, s6 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s5 +; GFX1010_W32-NEXT: ; implicit-def: $vgpr9 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1010_W32-NEXT: ; %bb.2: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s6 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s5 ; GFX1010_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX1010_W32-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s5, vcc_lo +; GFX1010_W32-NEXT: s_and_saveexec_b32 s8, vcc_lo ; GFX1010_W32-NEXT: s_cbranch_execz .LBB2_6 ; GFX1010_W32-NEXT: ; %bb.3: ; %bb1 ; GFX1010_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX1010_W32-NEXT: s_mov_b32 s6, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s9, exec_lo +; GFX1010_W32-NEXT: s_mov_b32 s10, s9 ; GFX1010_W32-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W32-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[4:5] -; GFX1010_W32-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[6:7] -; GFX1010_W32-NEXT: s_and_b32 s4, vcc_lo, s4 -; GFX1010_W32-NEXT: s_and_saveexec_b32 s4, s4 -; GFX1010_W32-NEXT: buffer_load_format_x v9, v0, s[8:11], 0 idxen +; GFX1010_W32-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W32-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W32-NEXT: buffer_load_format_x v8, v0, s[4:7], 0 idxen +; GFX1010_W32-NEXT: s_andn2_wrexec_b32 s10, s10 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W32-NEXT: ; implicit-def: $vgpr0 -; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GFX1010_W32-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1010_W32-NEXT: ; %bb.5: -; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s6 +; GFX1010_W32-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W32-NEXT: s_mov_b32 exec_lo, s9 ; GFX1010_W32-NEXT: .LBB2_6: ; %bb2 -; GFX1010_W32-NEXT: s_or_b32 exec_lo, exec_lo, s5 +; GFX1010_W32-NEXT: s_or_b32 exec_lo, exec_lo, s8 ; GFX1010_W32-NEXT: s_waitcnt vmcnt(0) -; GFX1010_W32-NEXT: global_store_dword v[11:12], v9, off +; GFX1010_W32-NEXT: global_store_dword v[11:12], v8, off ; GFX1010_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W32-NEXT: s_setpc_b64 s[30:31] ; @@ -843,53 +835,53 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; GFX1010_W64-NEXT: ;;#ASMSTART ; GFX1010_W64-NEXT: s_mov_b32 s4, 17 ; GFX1010_W64-NEXT: ;;#ASMEND -; GFX1010_W64-NEXT: v_mov_b32_e32 v8, s4 -; GFX1010_W64-NEXT: s_mov_b64 s[12:13], exec +; GFX1010_W64-NEXT: v_mov_b32_e32 v9, s4 +; GFX1010_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1010_W64-NEXT: s_mov_b64 s[12:13], s[6:7] ; GFX1010_W64-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v0 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v1 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v2 ; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v3 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[6:7], s[10:11], v[2:3] -; GFX1010_W64-NEXT: s_and_b64 s[6:7], vcc, s[6:7] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[6:7], s[6:7] -; GFX1010_W64-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[8:9], v[0:1] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[10:11], v[2:3] +; GFX1010_W64-NEXT: buffer_load_format_x v8, v9, s[8:11], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[12:13], s[12:13] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1010_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[6:7] +; GFX1010_W64-NEXT: ; implicit-def: $vgpr9 ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1010_W64-NEXT: ; %bb.2: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[12:13] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[6:7] ; GFX1010_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX1010_W64-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[6:7], vcc +; GFX1010_W64-NEXT: s_and_saveexec_b64 s[8:9], vcc ; GFX1010_W64-NEXT: s_cbranch_execz .LBB2_6 ; GFX1010_W64-NEXT: ; %bb.3: ; %bb1 ; GFX1010_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX1010_W64-NEXT: s_mov_b64 s[12:13], exec +; GFX1010_W64-NEXT: s_mov_b64 s[10:11], exec +; GFX1010_W64-NEXT: s_mov_b64 s[12:13], s[10:11] ; GFX1010_W64-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s8, v4 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s9, v5 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s10, v6 -; GFX1010_W64-NEXT: v_readfirstlane_b32 s11, v7 -; GFX1010_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[4:5] -; GFX1010_W64-NEXT: v_cmp_eq_u64_e64 s[4:5], s[10:11], v[6:7] -; GFX1010_W64-NEXT: s_and_b64 s[4:5], vcc, s[4:5] -; GFX1010_W64-NEXT: s_and_saveexec_b64 s[4:5], s[4:5] -; GFX1010_W64-NEXT: buffer_load_format_x v9, v0, s[8:11], 0 idxen +; GFX1010_W64-NEXT: v_readfirstlane_b32 s4, v4 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s5, v5 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s6, v6 +; GFX1010_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] +; GFX1010_W64-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[6:7] +; GFX1010_W64-NEXT: buffer_load_format_x v8, v0, s[4:7], 0 idxen +; GFX1010_W64-NEXT: s_andn2_wrexec_b64 s[12:13], s[12:13] ; GFX1010_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1010_W64-NEXT: ; implicit-def: $vgpr0 -; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GFX1010_W64-NEXT: s_xor_b64 exec, exec, s[4:5] ; GFX1010_W64-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1010_W64-NEXT: ; %bb.5: -; GFX1010_W64-NEXT: s_mov_b64 exec, s[12:13] +; GFX1010_W64-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GFX1010_W64-NEXT: s_mov_b64 exec, s[10:11] ; GFX1010_W64-NEXT: .LBB2_6: ; %bb2 -; GFX1010_W64-NEXT: s_or_b64 exec, exec, s[6:7] +; GFX1010_W64-NEXT: s_or_b64 exec, exec, s[8:9] ; GFX1010_W64-NEXT: s_waitcnt vmcnt(0) -; GFX1010_W64-NEXT: global_store_dword v[11:12], v9, off +; GFX1010_W64-NEXT: global_store_dword v[11:12], v8, off ; GFX1010_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1010_W64-NEXT: s_setpc_b64 s[30:31] ; @@ -899,57 +891,55 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; GFX1100_W32-NEXT: ;;#ASMSTART ; GFX1100_W32-NEXT: s_mov_b32 s4, 17 ; GFX1100_W32-NEXT: ;;#ASMEND -; GFX1100_W32-NEXT: v_mov_b32_e32 v8, s4 -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: v_mov_b32_e32 v9, s4 +; GFX1100_W32-NEXT: s_mov_b32 s5, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s6, s5 ; GFX1100_W32-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s11, v3 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[0:1] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[10:11], v[2:3] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W32-NEXT: buffer_load_format_x v8, v9, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W32-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 +; GFX1100_W32-NEXT: ; implicit-def: $vgpr9 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1100_W32-NEXT: ; %bb.2: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s1 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s5 ; GFX1100_W32-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX1100_W32-NEXT: s_mov_b32 s1, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s5, exec_lo ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100_W32-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1100_W32-NEXT: s_cbranch_execz .LBB2_6 ; GFX1100_W32-NEXT: ; %bb.3: ; %bb1 ; GFX1100_W32-NEXT: v_mov_b32_e32 v0, s4 -; GFX1100_W32-NEXT: s_mov_b32 s2, exec_lo +; GFX1100_W32-NEXT: s_mov_b32 s4, exec_lo +; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W32-NEXT: s_mov_b32 s6, s4 ; GFX1100_W32-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W32-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W32-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W32-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W32-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; GFX1100_W32-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[6:7] -; GFX1100_W32-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_and_saveexec_b32 s0, s0 -; GFX1100_W32-NEXT: buffer_load_format_x v9, v0, s[4:7], 0 idxen +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W32-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W32-NEXT: buffer_load_format_x v8, v0, s[0:3], 0 idxen +; GFX1100_W32-NEXT: s_and_not1_wrexec_b32 s6, s6 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W32-NEXT: ; implicit-def: $vgpr0 -; GFX1100_W32-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX1100_W32-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1100_W32-NEXT: ; %bb.5: -; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s2 +; GFX1100_W32-NEXT: s_mov_b32 exec_lo, s4 ; GFX1100_W32-NEXT: .LBB2_6: ; %bb2 ; GFX1100_W32-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W32-NEXT: s_or_b32 exec_lo, exec_lo, s1 +; GFX1100_W32-NEXT: s_or_b32 exec_lo, exec_lo, s5 ; GFX1100_W32-NEXT: s_waitcnt vmcnt(0) -; GFX1100_W32-NEXT: global_store_b32 v[11:12], v9, off dlc +; GFX1100_W32-NEXT: global_store_b32 v[11:12], v8, off dlc ; GFX1100_W32-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W32-NEXT: s_setpc_b64 s[30:31] ; @@ -959,57 +949,55 @@ define void @mubuf_vgpr_outside_entry(ptr addrspace(8) %i, ptr addrspace(8) %j, ; GFX1100_W64-NEXT: ;;#ASMSTART ; GFX1100_W64-NEXT: s_mov_b32 s4, 17 ; GFX1100_W64-NEXT: ;;#ASMEND -; GFX1100_W64-NEXT: v_mov_b32_e32 v8, s4 -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: v_mov_b32_e32 v9, s4 +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[8:9], s[6:7] ; GFX1100_W64-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s8, v0 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s9, v1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s10, v2 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s11, v3 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v3 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[10:11], v[2:3] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v9, v8, s[8:11], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[0:1] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[2:3] +; GFX1100_W64-NEXT: buffer_load_format_x v8, v9, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[8:9], s[8:9] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3 -; GFX1100_W64-NEXT: ; implicit-def: $vgpr8 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] +; GFX1100_W64-NEXT: ; implicit-def: $vgpr9 ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB2_1 ; GFX1100_W64-NEXT: ; %bb.2: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[2:3] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[6:7] ; GFX1100_W64-NEXT: v_and_b32_e32 v0, 0x3ff, v31 -; GFX1100_W64-NEXT: s_mov_b64 s[2:3], exec +; GFX1100_W64-NEXT: s_mov_b64 s[6:7], exec ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1100_W64-NEXT: v_cmpx_eq_u32_e32 0, v0 ; GFX1100_W64-NEXT: s_cbranch_execz .LBB2_6 ; GFX1100_W64-NEXT: ; %bb.3: ; %bb1 ; GFX1100_W64-NEXT: v_mov_b32_e32 v0, s4 -; GFX1100_W64-NEXT: s_mov_b64 s[8:9], exec +; GFX1100_W64-NEXT: s_mov_b64 s[4:5], exec +; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX1100_W64-NEXT: s_mov_b64 s[8:9], s[4:5] ; GFX1100_W64-NEXT: .LBB2_4: ; =>This Inner Loop Header: Depth=1 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s4, v4 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s5, v5 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s6, v6 -; GFX1100_W64-NEXT: v_readfirstlane_b32 s7, v7 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s0, v4 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s1, v5 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s2, v6 +; GFX1100_W64-NEXT: v_readfirstlane_b32 s3, v7 ; GFX1100_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1100_W64-NEXT: v_cmp_eq_u64_e32 vcc, s[4:5], v[4:5] -; GFX1100_W64-NEXT: v_cmp_eq_u64_e64 s[0:1], s[6:7], v[6:7] -; GFX1100_W64-NEXT: s_and_b64 s[0:1], vcc, s[0:1] -; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_and_saveexec_b64 s[0:1], s[0:1] -; GFX1100_W64-NEXT: buffer_load_format_x v9, v0, s[4:7], 0 idxen +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[4:5] +; GFX1100_W64-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[6:7] +; GFX1100_W64-NEXT: buffer_load_format_x v8, v0, s[0:3], 0 idxen +; GFX1100_W64-NEXT: s_and_not1_wrexec_b64 s[8:9], s[8:9] ; GFX1100_W64-NEXT: ; implicit-def: $vgpr4_vgpr5_vgpr6_vgpr7 ; GFX1100_W64-NEXT: ; implicit-def: $vgpr0 -; GFX1100_W64-NEXT: s_xor_b64 exec, exec, s[0:1] ; GFX1100_W64-NEXT: s_cbranch_execnz .LBB2_4 ; GFX1100_W64-NEXT: ; %bb.5: -; GFX1100_W64-NEXT: s_mov_b64 exec, s[8:9] +; GFX1100_W64-NEXT: s_mov_b64 exec, s[4:5] ; GFX1100_W64-NEXT: .LBB2_6: ; %bb2 ; GFX1100_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1100_W64-NEXT: s_or_b64 exec, exec, s[2:3] +; GFX1100_W64-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX1100_W64-NEXT: s_waitcnt vmcnt(0) -; GFX1100_W64-NEXT: global_store_b32 v[11:12], v9, off dlc +; GFX1100_W64-NEXT: global_store_b32 v[11:12], v8, off dlc ; GFX1100_W64-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX1100_W64-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir index 6ef1574c148b5..e1b31c4ec1a70 100644 --- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir +++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir @@ -1,9 +1,9 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,ADDR64 -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64-PRE-GFX10,W64-PRE-GFX10-ADDR64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64-PRE-GFX10,W64-PRE-GFX10-NO-ADDR64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64 # RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32 -# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64,W64-NO-ADDR64 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W64 # RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize32 -verify-machineinstrs -verify-machine-dom-info --run-pass=si-fix-sgpr-copies -o - %s | FileCheck %s --check-prefixes=W32 # Test that we correctly legalize VGPR Rsrc operands in MUBUF instructions. @@ -16,6 +16,7 @@ # TODO: S_XOR_B32_term should be `implicit-def $scc` --- name: idxen +tracksRegLiveness: true liveins: - { reg: '$vgpr0', virtual-reg: '%0' } - { reg: '$vgpr1', virtual-reg: '%1' } @@ -26,6 +27,48 @@ liveins: body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + + ; W64-PRE-GFX10-LABEL: name: idxen + ; W64-PRE-GFX10: successors: %bb.1(0x80000000) + ; W64-PRE-GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; W64-PRE-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .1: + ; W64-PRE-GFX10-NEXT: successors: %bb.2(0x80000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .2: + ; W64-PRE-GFX10-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .3: + ; W64-PRE-GFX10-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; W64-PRE-GFX10-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_IDXEN]] + ; W64-PRE-GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; W64-LABEL: name: idxen ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 @@ -38,27 +81,27 @@ body: | ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: ; W64-NEXT: successors: %bb.2(0x80000000) ; W64-NEXT: {{ $}} + ; W64-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.0, %15, %bb.2 ; W64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W64-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-NEXT: [[S_ANDN2_WREXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_ANDN2_WREXEC_B64 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: @@ -79,27 +122,27 @@ body: | ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: ; W32-NEXT: successors: %bb.2(0x80000000) ; W32-NEXT: {{ $}} + ; W32-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %15, %bb.2 ; W32-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W32-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; W32-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: @@ -123,6 +166,7 @@ body: | --- name: offen +tracksRegLiveness: true liveins: - { reg: '$vgpr0', virtual-reg: '%0' } - { reg: '$vgpr1', virtual-reg: '%1' } @@ -133,6 +177,48 @@ liveins: body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + + ; W64-PRE-GFX10-LABEL: name: offen + ; W64-PRE-GFX10: successors: %bb.1(0x80000000) + ; W64-PRE-GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4 + ; W64-PRE-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .1: + ; W64-PRE-GFX10-NEXT: successors: %bb.2(0x80000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .2: + ; W64-PRE-GFX10-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .3: + ; W64-PRE-GFX10-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; W64-PRE-GFX10-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFEN]] + ; W64-PRE-GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; W64-LABEL: name: offen ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 @@ -145,27 +231,27 @@ body: | ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: ; W64-NEXT: successors: %bb.2(0x80000000) ; W64-NEXT: {{ $}} + ; W64-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.0, %15, %bb.2 ; W64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W64-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-NEXT: [[S_ANDN2_WREXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_ANDN2_WREXEC_B64 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: @@ -186,27 +272,27 @@ body: | ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: ; W32-NEXT: successors: %bb.2(0x80000000) ; W32-NEXT: {{ $}} + ; W32-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %15, %bb.2 ; W32-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W32-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; W32-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: @@ -230,6 +316,7 @@ body: | --- name: bothen +tracksRegLiveness: true liveins: - { reg: '$vgpr0', virtual-reg: '%0' } - { reg: '$vgpr1', virtual-reg: '%1' } @@ -240,6 +327,49 @@ liveins: body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + + + ; W64-PRE-GFX10-LABEL: name: bothen + ; W64-PRE-GFX10: successors: %bb.1(0x80000000) + ; W64-PRE-GFX10-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-PRE-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .1: + ; W64-PRE-GFX10-NEXT: successors: %bb.2(0x80000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec + ; W64-PRE-GFX10-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .2: + ; W64-PRE-GFX10-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-PRE-GFX10-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: .3: + ; W64-PRE-GFX10-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; W64-PRE-GFX10-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_BOTHEN]] + ; W64-PRE-GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; W64-LABEL: name: bothen ; W64: successors: %bb.1(0x80000000) ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 @@ -252,27 +382,27 @@ body: | ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .1: ; W64-NEXT: successors: %bb.2(0x80000000) ; W64-NEXT: {{ $}} + ; W64-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.0, %15, %bb.2 ; W64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W64-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W64-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .2: ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W64-NEXT: {{ $}} ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-NEXT: [[S_ANDN2_WREXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_ANDN2_WREXEC_B64 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W64-NEXT: {{ $}} ; W64-NEXT: .3: @@ -293,27 +423,27 @@ body: | ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: ; W32-NEXT: successors: %bb.2(0x80000000) ; W32-NEXT: {{ $}} + ; W32-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %15, %bb.2 ; W32-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W32-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_BOTHEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN [[COPY1]], killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; W32-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: @@ -337,6 +467,7 @@ body: | --- name: addr64 +tracksRegLiveness: true liveins: - { reg: '$vgpr0', virtual-reg: '%0' } - { reg: '$vgpr1', virtual-reg: '%1' } @@ -347,28 +478,52 @@ liveins: body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; ADDR64-LABEL: name: addr64 - ; ADDR64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; ADDR64-NEXT: {{ $}} - ; ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 - ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]].sub0, [[COPY1]].sub0, 0, implicit $exec - ; ADDR64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec - ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 - ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec - ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] - ; ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + + ; W64-PRE-GFX10-LABEL: name: addr64 + ; W64-PRE-GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: {{ $}} + ; W64-PRE-GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-PRE-GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; W64-PRE-GFX10-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 + ; W64-PRE-GFX10-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]].sub0, [[COPY1]].sub0, 0, implicit $exec + ; W64-PRE-GFX10-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; W64-PRE-GFX10-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; W64-PRE-GFX10-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; W64-PRE-GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; W64-LABEL: name: addr64 + ; W64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; W64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; W64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 822173696 + ; W64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 + ; W64-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]].sub0, [[COPY1]].sub0, 0, implicit $exec + ; W64-NEXT: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, dead [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[COPY6]].sub1, [[COPY1]].sub1, killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec + ; W64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, [[V_ADDC_U32_e64_]], %subreg.sub1 + ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], killed [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec + ; W64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; W32-LABEL: name: addr64 ; W32: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 @@ -409,6 +564,7 @@ body: | --- name: offset +tracksRegLiveness: true liveins: - { reg: '$vgpr0', virtual-reg: '%0' } - { reg: '$vgpr1', virtual-reg: '%1' } @@ -419,67 +575,110 @@ liveins: body: | bb.0: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; ADDR64-LABEL: name: offset - ; ADDR64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; ADDR64-NEXT: {{ $}} - ; ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; ADDR64-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 - ; ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 - ; ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 - ; ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 - ; ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 - ; ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]].sub0, %subreg.sub0, [[COPY6]].sub1, %subreg.sub1 - ; ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec - ; ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] - ; ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + + + ; W64-PRE-GFX10-ADDR64-LABEL: name: offset + ; W64-PRE-GFX10-ADDR64: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-ADDR64-NEXT: [[COPY6:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1 + ; W64-PRE-GFX10-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64 = S_MOV_B64 0 + ; W64-PRE-GFX10-ADDR64-NEXT: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 0 + ; W64-PRE-GFX10-ADDR64-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440 + ; W64-PRE-GFX10-ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[S_MOV_B64_]], %subreg.sub0_sub1, [[S_MOV_B32_]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 + ; W64-PRE-GFX10-ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY6]].sub0, %subreg.sub0, [[COPY6]].sub1, %subreg.sub1 + ; W64-PRE-GFX10-ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[REG_SEQUENCE2]], [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_ADDR64_]] + ; W64-PRE-GFX10-ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; W64-NO-ADDR64-LABEL: name: offset - ; W64-NO-ADDR64: successors: %bb.1(0x80000000) - ; W64-NO-ADDR64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 - ; W64-NO-ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 - ; W64-NO-ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 - ; W64-NO-ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; W64-NO-ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 - ; W64-NO-ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 - ; W64-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: .1: - ; W64-NO-ADDR64-NEXT: successors: %bb.2(0x80000000) - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec - ; W64-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W64-NO-ADDR64-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec - ; W64-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec - ; W64-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W64-NO-ADDR64-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W64-NO-ADDR64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc - ; W64-NO-ADDR64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W64-NO-ADDR64-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: .2: - ; W64-NO-ADDR64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W64-NO-ADDR64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc - ; W64-NO-ADDR64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec - ; W64-NO-ADDR64-NEXT: {{ $}} - ; W64-NO-ADDR64-NEXT: .3: - ; W64-NO-ADDR64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] - ; W64-NO-ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] - ; W64-NO-ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] - ; W64-NO-ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; W64-PRE-GFX10-NO-ADDR64-LABEL: name: offset + ; W64-PRE-GFX10-NO-ADDR64: successors: %bb.1(0x80000000) + ; W64-PRE-GFX10-NO-ADDR64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: .1: + ; W64-PRE-GFX10-NO-ADDR64-NEXT: successors: %bb.2(0x80000000) + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: .2: + ; W64-PRE-GFX10-NO-ADDR64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc + ; W64-PRE-GFX10-NO-ADDR64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec + ; W64-PRE-GFX10-NO-ADDR64-NEXT: {{ $}} + ; W64-PRE-GFX10-NO-ADDR64-NEXT: .3: + ; W64-PRE-GFX10-NO-ADDR64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; W64-PRE-GFX10-NO-ADDR64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-PRE-GFX10-NO-ADDR64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] + ; W64-PRE-GFX10-NO-ADDR64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; W64-LABEL: name: offset + ; W64: successors: %bb.1(0x80000000) + ; W64-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $sgpr30_sgpr31 + ; W64-NEXT: {{ $}} + ; W64-NEXT: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr30_sgpr31 + ; W64-NEXT: [[COPY1:%[0-9]+]]:vreg_64 = COPY $vgpr4_vgpr5 + ; W64-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3 + ; W64-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; W64-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; W64-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; W64-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 + ; W64-NEXT: [[S_MOV_B64_:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-NEXT: [[S_MOV_B64_1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64 $exec + ; W64-NEXT: {{ $}} + ; W64-NEXT: .1: + ; W64-NEXT: successors: %bb.2(0x80000000) + ; W64-NEXT: {{ $}} + ; W64-NEXT: [[PHI:%[0-9]+]]:sreg_64_xexec = PHI [[S_MOV_B64_1]], %bb.0, %15, %bb.2 + ; W64-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec + ; W64-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec + ; W64-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 + ; W64-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec + ; W64-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec + ; W64-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 + ; W64-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W64-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; W64-NEXT: {{ $}} + ; W64-NEXT: .2: + ; W64-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) + ; W64-NEXT: {{ $}} + ; W64-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec + ; W64-NEXT: [[S_ANDN2_WREXEC_B64_:%[0-9]+]]:sreg_64_xexec = S_ANDN2_WREXEC_B64 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W64-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec + ; W64-NEXT: {{ $}} + ; W64-NEXT: .3: + ; W64-NEXT: $exec = S_MOV_B64 [[S_MOV_B64_]] + ; W64-NEXT: $sgpr30_sgpr31 = COPY [[COPY]] + ; W64-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_FORMAT_X_OFFSET]] + ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; ; W32-LABEL: name: offset ; W32: successors: %bb.1(0x80000000) @@ -493,27 +692,27 @@ body: | ; W32-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0 ; W32-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1, [[COPY3]], %subreg.sub2, [[COPY2]], %subreg.sub3 ; W32-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo + ; W32-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_MOV_B32 $exec_lo ; W32-NEXT: {{ $}} ; W32-NEXT: .1: ; W32-NEXT: successors: %bb.2(0x80000000) ; W32-NEXT: {{ $}} + ; W32-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.0, %15, %bb.2 ; W32-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; W32-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; W32-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; W32-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; W32-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; W32-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[V_CMP_EQ_U64_e64_]], [[V_CMP_EQ_U64_e64_1]], implicit-def $scc ; W32-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3 - ; W32-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_]], implicit-def $exec, implicit-def $scc, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; W32-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .2: ; W32-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; W32-NEXT: {{ $}} ; W32-NEXT: [[BUFFER_LOAD_FORMAT_X_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[REG_SEQUENCE3]], 0, 0, 0, 0, implicit $exec - ; W32-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, [[S_AND_SAVEEXEC_B32_]], implicit-def $scc + ; W32-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 [[PHI]], implicit-def $exec, implicit-def $scc, implicit $exec ; W32-NEXT: SI_WATERFALL_LOOP %bb.1, implicit $exec ; W32-NEXT: {{ $}} ; W32-NEXT: .3: diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll index 950a1252a3e06..fa2ea184f2f83 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll @@ -11,28 +11,28 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(ptr %arg) #0 { ; GCN-NEXT: ; Child Loop BB0_2 Depth 2 ; GCN-NEXT: v_add_co_u32 v6, vcc_lo, v0, 8 ; GCN-NEXT: v_add_co_ci_u32_e32 v7, vcc_lo, 0, v1, vcc_lo -; GCN-NEXT: s_mov_b32 s5, exec_lo +; GCN-NEXT: s_mov_b32 s8, exec_lo ; GCN-NEXT: s_clause 0x1 ; GCN-NEXT: flat_load_dwordx2 v[4:5], v[6:7] ; GCN-NEXT: flat_load_dwordx2 v[2:3], v[0:1] +; GCN-NEXT: s_mov_b32 s9, s8 ; GCN-NEXT: .LBB0_2: ; Parent Loop BB0_1 Depth=1 ; GCN-NEXT: ; => This Inner Loop Header: Depth=2 ; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GCN-NEXT: v_readfirstlane_b32 s8, v2 -; GCN-NEXT: v_readfirstlane_b32 s9, v3 -; GCN-NEXT: v_readfirstlane_b32 s10, v4 -; GCN-NEXT: v_readfirstlane_b32 s11, v5 -; GCN-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[8:9], v[2:3] -; GCN-NEXT: v_cmp_eq_u64_e64 s4, s[10:11], v[4:5] -; GCN-NEXT: s_and_b32 s4, vcc_lo, s4 -; GCN-NEXT: s_and_saveexec_b32 s4, s4 -; GCN-NEXT: buffer_store_dword v0, v0, s[8:11], 0 offen +; GCN-NEXT: v_readfirstlane_b32 s4, v2 +; GCN-NEXT: v_readfirstlane_b32 s5, v3 +; GCN-NEXT: v_readfirstlane_b32 s6, v4 +; GCN-NEXT: v_readfirstlane_b32 s7, v5 +; GCN-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; GCN-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[2:3] +; GCN-NEXT: v_cmpx_eq_u64_e32 s[6:7], v[4:5] +; GCN-NEXT: buffer_store_dword v0, v0, s[4:7], 0 offen +; GCN-NEXT: s_andn2_wrexec_b32 s9, s9 ; GCN-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GCN-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) -; GCN-NEXT: s_xor_b32 exec_lo, exec_lo, s4 ; GCN-NEXT: s_cbranch_execnz .LBB0_2 ; GCN-NEXT: ; %bb.3: ; in Loop: Header=BB0_1 Depth=1 -; GCN-NEXT: s_mov_b32 exec_lo, s5 +; GCN-NEXT: s_waitcnt_depctr depctr_vm_vsrc(0) +; GCN-NEXT: s_mov_b32 exec_lo, s8 ; GCN-NEXT: s_mov_b32 vcc_lo, exec_lo ; GCN-NEXT: s_cbranch_vccnz .LBB0_1 ; GCN-NEXT: ; %bb.4: ; %DummyReturnBlock @@ -46,26 +46,25 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(ptr %arg) #0 { ; GFX11-NEXT: ; =>This Loop Header: Depth=1 ; GFX11-NEXT: ; Child Loop BB0_2 Depth 2 ; GFX11-NEXT: flat_load_b128 v[2:5], v[0:1] -; GFX11-NEXT: s_mov_b32 s1, exec_lo +; GFX11-NEXT: s_mov_b32 s4, exec_lo +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: s_mov_b32 s5, s4 ; GFX11-NEXT: .LBB0_2: ; Parent Loop BB0_1 Depth=1 ; GFX11-NEXT: ; => This Inner Loop Header: Depth=2 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s4, v2 -; GFX11-NEXT: v_readfirstlane_b32 s5, v3 -; GFX11-NEXT: v_readfirstlane_b32 s6, v4 -; GFX11-NEXT: v_readfirstlane_b32 s7, v5 +; GFX11-NEXT: v_readfirstlane_b32 s0, v2 +; GFX11-NEXT: v_readfirstlane_b32 s1, v3 +; GFX11-NEXT: v_readfirstlane_b32 s2, v4 +; GFX11-NEXT: v_readfirstlane_b32 s3, v5 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; GFX11-NEXT: v_cmp_eq_u64_e64 s0, s[6:7], v[4:5] -; GFX11-NEXT: s_and_b32 s0, vcc_lo, s0 -; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: s_and_saveexec_b32 s0, s0 -; GFX11-NEXT: buffer_store_b32 v0, v0, s[4:7], 0 offen +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[0:1], v[2:3] +; GFX11-NEXT: v_cmpx_eq_u64_e32 s[2:3], v[4:5] +; GFX11-NEXT: buffer_store_b32 v0, v0, s[0:3], 0 offen +; GFX11-NEXT: s_and_not1_wrexec_b32 s5, s5 ; GFX11-NEXT: ; implicit-def: $vgpr2_vgpr3_vgpr4_vgpr5 -; GFX11-NEXT: s_xor_b32 exec_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_execnz .LBB0_2 ; GFX11-NEXT: ; %bb.3: ; in Loop: Header=BB0_1 Depth=1 -; GFX11-NEXT: s_mov_b32 exec_lo, s1 +; GFX11-NEXT: s_mov_b32 exec_lo, s4 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_mov_b32 vcc_lo, exec_lo ; GFX11-NEXT: s_cbranch_vccnz .LBB0_1 diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll index 3bc67562012e5..8352e3948611b 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange-ir.ll @@ -233,9 +233,9 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %48:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %50:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY4]], %bb.0, undef %50:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %52:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI3:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %54:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -249,24 +249,24 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %54:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 - ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI undef %56:vgpr_32, %bb.4, [[PHI1]], %bb.2 - ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec - ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec + ; SI-NEXT: [[PHI4:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_]], %bb.2, %41, %bb.4 + ; SI-NEXT: [[PHI5:%[0-9]+]]:vreg_64 = PHI undef %56:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 + ; SI-NEXT: [[PHI6:%[0-9]+]]:vgpr_32 = PHI undef %58:vgpr_32, %bb.4, [[PHI1]], %bb.2 + ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI5]].sub0, implicit $exec + ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI5]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI4]], implicit $exec - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], killed [[PHI5]], implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4: ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY6]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI5]] + ; SI-NEXT: $vgpr0 = COPY killed [[PHI6]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI4]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.5: @@ -286,24 +286,24 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %58:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 - ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI undef %60:vgpr_32, %bb.8, [[COPY4]], %bb.6 - ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec - ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec + ; SI-NEXT: [[PHI7:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.6, %33, %bb.8 + ; SI-NEXT: [[PHI8:%[0-9]+]]:vreg_64 = PHI undef %60:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 + ; SI-NEXT: [[PHI9:%[0-9]+]]:vgpr_32 = PHI undef %62:vgpr_32, %bb.8, [[COPY4]], %bb.6 + ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI8]].sub0, implicit $exec + ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI8]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI6]], implicit $exec - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_1]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], killed [[PHI8]], implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.8: ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = COPY [[COPY9]] - ; SI-NEXT: $vgpr0 = COPY killed [[PHI7]] + ; SI-NEXT: $vgpr0 = COPY killed [[PHI9]] ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI7]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.9: @@ -314,9 +314,9 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI8:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI10:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: $vgpr0 = COPY killed [[PHI8]] + ; SI-NEXT: $vgpr0 = COPY killed [[PHI10]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: %cc = icmp sgt i32 %z, 5 @@ -356,8 +356,8 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: successors: %bb.2(0x40000000), %bb.10(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[PHI:%[0-9]+]]:vgpr_32 = PHI undef %16:vgpr_32, %bb.0, %4, %bb.9 - ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %49:vgpr_32, %bb.9 - ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %51:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI1:%[0-9]+]]:vgpr_32 = PHI [[COPY3]], %bb.0, undef %51:vgpr_32, %bb.9 + ; SI-NEXT: [[PHI2:%[0-9]+]]:vgpr_32 = PHI [[COPY2]], %bb.0, undef %53:vgpr_32, %bb.9 ; SI-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32 = SI_ELSE killed [[SI_IF]], %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: S_BRANCH %bb.2 ; SI-NEXT: {{ $}} @@ -371,12 +371,12 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI3:%[0-9]+]]:vreg_64 = PHI undef %53:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 - ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI3]].sub0, implicit $exec - ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI3]].sub1, implicit $exec + ; SI-NEXT: [[PHI3:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_]], %bb.2, %42, %bb.4 + ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %55:vreg_64, %bb.4, [[REG_SEQUENCE]], %bb.2 + ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec + ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE1]], killed [[PHI3]], implicit $exec - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE1]], killed [[PHI4]], implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.4: ; SI-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) @@ -387,7 +387,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE1]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI3]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.3, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.5: @@ -407,12 +407,12 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: bb.7: ; SI-NEXT: successors: %bb.8(0x80000000) ; SI-NEXT: {{ $}} - ; SI-NEXT: [[PHI4:%[0-9]+]]:vreg_64 = PHI undef %55:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 - ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub0, implicit $exec - ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI4]].sub1, implicit $exec + ; SI-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.6, %34, %bb.8 + ; SI-NEXT: [[PHI6:%[0-9]+]]:vreg_64 = PHI undef %57:vreg_64, %bb.8, [[REG_SEQUENCE2]], %bb.6 + ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI6]].sub0, implicit $exec + ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[PHI6]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_2]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], killed [[PHI4]], implicit $exec - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[V_CMP_EQ_U64_e64_1]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term [[REG_SEQUENCE3]], killed [[PHI6]], implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.8: ; SI-NEXT: successors: %bb.7(0x40000000), %bb.9(0x40000000) @@ -423,7 +423,7 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed [[REG_SEQUENCE3]], 0, csr_amdgpu_si_gfx, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit killed $vgpr0, implicit-def $vgpr0 ; SI-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; SI-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0 - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI5]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.7, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.9: @@ -434,9 +434,9 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: S_BRANCH %bb.1 ; SI-NEXT: {{ $}} ; SI-NEXT: bb.10.end: - ; SI-NEXT: [[PHI5:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 + ; SI-NEXT: [[PHI7:%[0-9]+]]:vgpr_32 = PHI [[PHI]], %bb.1, [[COPY8]], %bb.5 ; SI-NEXT: SI_END_CF killed [[SI_ELSE]], implicit-def dead $exec, implicit-def dead $scc, implicit $exec - ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI5]], 0, killed [[COPY4]], 0, 0, implicit $mode, implicit $exec + ; SI-NEXT: [[V_ADD_F32_e64_:%[0-9]+]]:vgpr_32 = nofpexcept V_ADD_F32_e64 0, killed [[PHI7]], 0, killed [[COPY4]], 0, 0, implicit $mode, implicit $exec ; SI-NEXT: $vgpr0 = COPY killed [[V_ADD_F32_e64_]] ; SI-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 main_body: @@ -582,27 +582,24 @@ define protected amdgpu_kernel void @nested_waterfalls(ptr addrspace(1) %tex.coe ; SI-NEXT: bb.2: ; SI-NEXT: successors: %bb.3(0x80000000) ; SI-NEXT: {{ $}} + ; SI-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_]], %bb.1, %38, %bb.6 ; SI-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_2:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub2, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_3:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub3, implicit $exec ; SI-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_2]], %subreg.sub0, [[V_READFIRSTLANE_B32_3]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit $exec - ; SI-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_EQ_U64_e64_]], killed [[V_CMP_EQ_U64_e64_1]], implicit-def dead $scc ; SI-NEXT: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub4, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_5:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub5, implicit $exec ; SI-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_4]], %subreg.sub0, [[V_READFIRSTLANE_B32_5]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_2:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit $exec - ; SI-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[S_AND_B32_]], killed [[V_CMP_EQ_U64_e64_2]], implicit-def dead $scc ; SI-NEXT: [[V_READFIRSTLANE_B32_6:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub6, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_7:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[REG_SEQUENCE]].sub7, implicit $exec ; SI-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_6]], %subreg.sub0, [[V_READFIRSTLANE_B32_7]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_3:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit $exec - ; SI-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[S_AND_B32_1]], killed [[V_CMP_EQ_U64_e64_3]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:sgpr_256 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_1]], %subreg.sub1, killed [[V_READFIRSTLANE_B32_2]], %subreg.sub2, killed [[V_READFIRSTLANE_B32_3]], %subreg.sub3, killed [[V_READFIRSTLANE_B32_4]], %subreg.sub4, killed [[V_READFIRSTLANE_B32_5]], %subreg.sub5, killed [[V_READFIRSTLANE_B32_6]], %subreg.sub6, killed [[V_READFIRSTLANE_B32_7]], %subreg.sub7 - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_2]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE1]], [[REG_SEQUENCE]].sub0_sub1, implicit-def $exec, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE2]], [[REG_SEQUENCE]].sub2_sub3, implicit-def $exec, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE3]], [[REG_SEQUENCE]].sub4_sub5, implicit-def $exec, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE4]], [[REG_SEQUENCE]].sub6_sub7, implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.3: ; SI-NEXT: successors: %bb.4(0x80000000) @@ -612,30 +609,29 @@ define protected amdgpu_kernel void @nested_waterfalls(ptr addrspace(1) %tex.coe ; SI-NEXT: bb.4: ; SI-NEXT: successors: %bb.5(0x80000000) ; SI-NEXT: {{ $}} + ; SI-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_MOV_B32_1]], %bb.3, %55, %bb.5 ; SI-NEXT: [[V_READFIRSTLANE_B32_8:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[GLOBAL_LOAD_DWORDX4_2]].sub0, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_9:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[GLOBAL_LOAD_DWORDX4_2]].sub1, implicit $exec ; SI-NEXT: [[REG_SEQUENCE6:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_8]], %subreg.sub0, [[V_READFIRSTLANE_B32_9]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_4:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE6]], [[GLOBAL_LOAD_DWORDX4_2]].sub0_sub1, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_10:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[GLOBAL_LOAD_DWORDX4_2]].sub2, implicit $exec ; SI-NEXT: [[V_READFIRSTLANE_B32_11:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[GLOBAL_LOAD_DWORDX4_2]].sub3, implicit $exec ; SI-NEXT: [[REG_SEQUENCE7:%[0-9]+]]:sgpr_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_10]], %subreg.sub0, [[V_READFIRSTLANE_B32_11]], %subreg.sub1 - ; SI-NEXT: [[V_CMP_EQ_U64_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 killed [[REG_SEQUENCE7]], [[GLOBAL_LOAD_DWORDX4_2]].sub2_sub3, implicit $exec - ; SI-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 killed [[V_CMP_EQ_U64_e64_4]], killed [[V_CMP_EQ_U64_e64_5]], implicit-def dead $scc ; SI-NEXT: [[REG_SEQUENCE8:%[0-9]+]]:sgpr_128 = REG_SEQUENCE killed [[V_READFIRSTLANE_B32_8]], %subreg.sub0, killed [[V_READFIRSTLANE_B32_9]], %subreg.sub1, killed [[V_READFIRSTLANE_B32_10]], %subreg.sub2, killed [[V_READFIRSTLANE_B32_11]], %subreg.sub3 - ; SI-NEXT: [[S_AND_SAVEEXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[S_AND_B32_3]], implicit-def $exec, implicit-def dead $scc, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE6]], [[GLOBAL_LOAD_DWORDX4_2]].sub0_sub1, implicit-def $exec, implicit $exec + ; SI-NEXT: V_CMPX_EQ_U64_nosdst_e32_term killed [[REG_SEQUENCE7]], [[GLOBAL_LOAD_DWORDX4_2]].sub2_sub3, implicit-def $exec, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.5: ; SI-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: [[IMAGE_SAMPLE_V1_V2_nsa_gfx10_:%[0-9]+]]:vgpr_32 = IMAGE_SAMPLE_V1_V2_nsa_gfx10 undef %29:vgpr_32, undef %31:vgpr_32, [[REG_SEQUENCE5]], killed [[REG_SEQUENCE8]], 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s32), addrspace 8) - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_1]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI1]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.4, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.6: ; SI-NEXT: successors: %bb.2(0x40000000), %bb.7(0x40000000) ; SI-NEXT: {{ $}} ; SI-NEXT: $exec_lo = S_MOV_B32 killed [[S_MOV_B32_1]] - ; SI-NEXT: $exec_lo = S_XOR_B32_term $exec_lo, killed [[S_AND_SAVEEXEC_B32_]], implicit-def dead $scc + ; SI-NEXT: [[S_ANDN2_WREXEC_B32_1:%[0-9]+]]:sreg_32_xm0_xexec = S_ANDN2_WREXEC_B32 killed [[PHI]], implicit-def $exec, implicit-def dead $scc, implicit $exec ; SI-NEXT: SI_WATERFALL_LOOP %bb.2, implicit $exec ; SI-NEXT: {{ $}} ; SI-NEXT: bb.7: diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll index b46f5f5640b66..9e0db1dfa26c5 100644 --- a/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll +++ b/llvm/test/CodeGen/AMDGPU/vgpr-liverange.ll @@ -177,18 +177,19 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: s_cbranch_execz .LBB3_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mov_b32 s7, exec_lo +; SI-NEXT: s_mov_b32 s8, s7 ; SI-NEXT: .LBB3_2: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_readfirstlane_b32 s4, v4 ; SI-NEXT: v_readfirstlane_b32 s5, v5 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; SI-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] ; SI-NEXT: s_mov_b64 s[0:1], s[12:13] ; SI-NEXT: s_mov_b64 s[2:3], s[14:15] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SI-NEXT: v_mov_b32_e32 v1, v0 +; SI-NEXT: s_andn2_wrexec_b32 s8, s8 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 ; SI-NEXT: ; implicit-def: $vgpr0 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz .LBB3_2 ; SI-NEXT: ; %bb.3: ; SI-NEXT: s_mov_b32 exec_lo, s7 @@ -199,18 +200,19 @@ define amdgpu_ps float @loop(i32 %z, float %v, i32 inreg %bound, ptr %extern_fun ; SI-NEXT: s_cbranch_execz .LBB3_8 ; SI-NEXT: ; %bb.5: ; %if ; SI-NEXT: s_mov_b32 s7, exec_lo +; SI-NEXT: s_mov_b32 s8, s7 ; SI-NEXT: .LBB3_6: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_readfirstlane_b32 s4, v2 ; SI-NEXT: v_readfirstlane_b32 s5, v3 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; SI-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[2:3] ; SI-NEXT: s_mov_b64 s[0:1], s[12:13] ; SI-NEXT: s_mov_b64 s[2:3], s[14:15] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] ; SI-NEXT: v_mov_b32_e32 v1, v0 +; SI-NEXT: s_andn2_wrexec_b32 s8, s8 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 ; SI-NEXT: ; implicit-def: $vgpr0 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz .LBB3_6 ; SI-NEXT: ; %bb.7: ; SI-NEXT: s_mov_b32 exec_lo, s7 @@ -254,17 +256,18 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: s_cbranch_execz .LBB4_4 ; SI-NEXT: ; %bb.1: ; %else ; SI-NEXT: s_mov_b32 s7, exec_lo +; SI-NEXT: s_mov_b32 s8, s7 ; SI-NEXT: .LBB4_2: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_readfirstlane_b32 s4, v4 ; SI-NEXT: v_readfirstlane_b32 s5, v5 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[4:5] -; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; SI-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[4:5] ; SI-NEXT: v_mov_b32_e32 v0, v40 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13] ; SI-NEXT: s_mov_b64 s[2:3], s[14:15] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: s_andn2_wrexec_b32 s8, s8 ; SI-NEXT: ; implicit-def: $vgpr4_vgpr5 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz .LBB4_2 ; SI-NEXT: ; %bb.3: ; SI-NEXT: s_mov_b32 exec_lo, s7 @@ -274,17 +277,18 @@ define amdgpu_ps float @loop_with_use(i32 %z, float %v, i32 inreg %bound, ptr %e ; SI-NEXT: s_cbranch_execz .LBB4_8 ; SI-NEXT: ; %bb.5: ; %if ; SI-NEXT: s_mov_b32 s7, exec_lo +; SI-NEXT: s_mov_b32 s8, s7 ; SI-NEXT: .LBB4_6: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: v_readfirstlane_b32 s4, v2 ; SI-NEXT: v_readfirstlane_b32 s5, v3 -; SI-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[2:3] -; SI-NEXT: s_and_saveexec_b32 s8, vcc_lo +; SI-NEXT: s_waitcnt_depctr depctr_sa_sdst(0) +; SI-NEXT: v_cmpx_eq_u64_e32 s[4:5], v[2:3] ; SI-NEXT: v_mov_b32_e32 v0, v40 ; SI-NEXT: s_mov_b64 s[0:1], s[12:13] ; SI-NEXT: s_mov_b64 s[2:3], s[14:15] ; SI-NEXT: s_swappc_b64 s[30:31], s[4:5] +; SI-NEXT: s_andn2_wrexec_b32 s8, s8 ; SI-NEXT: ; implicit-def: $vgpr2_vgpr3 -; SI-NEXT: s_xor_b32 exec_lo, exec_lo, s8 ; SI-NEXT: s_cbranch_execnz .LBB4_6 ; SI-NEXT: ; %bb.7: ; SI-NEXT: s_mov_b32 exec_lo, s7 From b3de3887e859bda2c261bd619fc7d1f2d71ee211 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 22 Jun 2026 22:08:52 +0800 Subject: [PATCH 016/511] [llubi] Reset retval when return type is void (#205107) In `returnFromCallee`, the return value is moved out from `CurrentFrame->RetVal`. So `visitReturnInst` is always responsible for setting a valid value. Closes https://github.com/llvm/llvm-project/issues/204992 --- .../tools/llubi/reset_return_value_slot.ll | 30 +++++++++++++++++++ llvm/tools/llubi/lib/Interpreter.cpp | 2 ++ 2 files changed, 32 insertions(+) create mode 100644 llvm/test/tools/llubi/reset_return_value_slot.ll diff --git a/llvm/test/tools/llubi/reset_return_value_slot.ll b/llvm/test/tools/llubi/reset_return_value_slot.ll new file mode 100644 index 0000000000000..72cab180e4556 --- /dev/null +++ b/llvm/test/tools/llubi/reset_return_value_slot.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6 +; RUN: llubi --verbose < %s 2>&1 | FileCheck %s + +define void @main() { +entry: + %res = call i16 @func1() + call void @func2() + ret void +} + +define i16 @func1() { +entry: + ret i16 0 +} + +define void @func2() { +entry: + ret void +} +; CHECK: Entering function: main +; CHECK-NEXT: Entering function: func1 +; CHECK-NEXT: ret i16 0 +; CHECK-NEXT: Exiting function: func1 +; CHECK-NEXT: %res = call i16 @func1() => i16 0 +; CHECK-NEXT: Entering function: func2 +; CHECK-NEXT: ret void +; CHECK-NEXT: Exiting function: func2 +; CHECK-NEXT: call void @func2() +; CHECK-NEXT: ret void +; CHECK-NEXT: Exiting function: main diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp index 588f1069c2a80..f833b660f4e3a 100644 --- a/llvm/tools/llubi/lib/Interpreter.cpp +++ b/llvm/tools/llubi/lib/Interpreter.cpp @@ -710,6 +710,8 @@ class InstExecutor : public InstVisitor, void visitReturnInst(ReturnInst &RI) { if (auto *RV = RI.getReturnValue()) CurrentFrame->RetVal = getValue(RV); + else + CurrentFrame->RetVal = AnyValue(); CurrentFrame->State = FrameState::Exit; if (!Handler.onInstructionExecuted(RI, None)) setFailed(); From d9bba98c9503dba364de915a3e6dce34eee6e7d4 Mon Sep 17 00:00:00 2001 From: lijinpei-amd Date: Mon, 22 Jun 2026 22:12:55 +0800 Subject: [PATCH 017/511] [Attributor] Do not delete side-effect-free terminator (#205052) Fixes: https://github.com/llvm/llvm-project/issues/192012 --- .../Transforms/IPO/AttributorAttributes.cpp | 2 +- llvm/test/Transforms/Attributor/callbr.ll | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Attributor/callbr.ll diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index f67b87e6a8ad0..4d59f7dd2c3ee 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -4291,7 +4291,7 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { A.deleteAfterManifest(*FI); return ChangeStatus::CHANGED; } - if (isAssumedSideEffectFree(A, I) && !isa(I)) { + if (isAssumedSideEffectFree(A, I) && !I->isTerminator()) { A.deleteAfterManifest(*I); return ChangeStatus::CHANGED; } diff --git a/llvm/test/Transforms/Attributor/callbr.ll b/llvm/test/Transforms/Attributor/callbr.ll new file mode 100644 index 0000000000000..1a7aff43c25e7 --- /dev/null +++ b/llvm/test/Transforms/Attributor/callbr.ll @@ -0,0 +1,23 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes=attributor -S < %s | FileCheck %s + +; Regression test for #192012: a side-effect-free callbr is still a terminator +; and must not be queued for generic deletion after manifestation. +define i32 @callbr_is_not_deleted_as_dead_callbase() { +; CHECK-LABEL: define noundef i32 @callbr_is_not_deleted_as_dead_callbase( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: callbr void asm "", "!i"() #[[ATTR1:[0-9]+]] +; CHECK-NEXT: to label %[[COMMON_RET:.*]] [label %[[COMMON_RET]]] +; CHECK: [[COMMON_RET]]: +; CHECK-NEXT: ret i32 0 +; +entry: + callbr void asm "", "!i"() #0 + to label %common.ret [label %common.ret] + +common.ret: + ret i32 0 +} + +attributes #0 = { nounwind memory(none) } From 13712816e34b34dac2f41e24b9519aef3ddb53d9 Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Mon, 22 Jun 2026 10:16:04 -0400 Subject: [PATCH 018/511] Fix test on read-only file systems (#205108) Fixes 25e4057d49055a645dc6a51ae1f40ac647aaed5b. Use the -fsyntax-only flag instead of -c. This performs the necessary parsing and diagnostics verification (the actual intent of this test) without attempting to emit an object file. --- clang/test/AST/ByteCode/command-line-options.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/test/AST/ByteCode/command-line-options.cpp b/clang/test/AST/ByteCode/command-line-options.cpp index e85b66f08d020..b64fc565e042a 100644 --- a/clang/test/AST/ByteCode/command-line-options.cpp +++ b/clang/test/AST/ByteCode/command-line-options.cpp @@ -6,11 +6,11 @@ /// All this should be true if the driver is used or -cc1. -// RUN: %clang -c -fexperimental-new-constant-interpreter %s -Xclang -verify=bc -// RUN: %clang -cc1 -fexperimental-new-constant-interpreter %s -verify=bc +// RUN: %clang -fsyntax-only -fexperimental-new-constant-interpreter %s -Xclang -verify=bc +// RUN: %clang -cc1 -fexperimental-new-constant-interpreter %s -verify=bc -// RUN: %clang -c -fno-experimental-new-constant-interpreter %s -Xclang -verify=nobc -// RUN: %clang -cc1 -fno-experimental-new-constant-interpreter %s -verify=nobc +// RUN: %clang -fsyntax-only -fno-experimental-new-constant-interpreter %s -Xclang -verify=nobc +// RUN: %clang -cc1 -fno-experimental-new-constant-interpreter %s -verify=nobc /// Note that we're not testing the behavior without those command line options since that From d56bd5a938b153efc07af5696aaeb04e86f024a0 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 22 Jun 2026 09:18:50 -0500 Subject: [PATCH 019/511] [HIP] Fix `--no-offload-new-driver` behavior after #201457 (#205094) Summary: https://github.com/llvm/llvm-project/pull/201457 changed the default for all targets. Even though the old offload driver is getting removed soon we shouldn't break it for the LLVM23 release. This simply reverts to the original behavior, the old driver builds its jobs manually so we can just turn off this one specific case unless the user forced it. --- clang/lib/Driver/Driver.cpp | 9 +-- clang/test/Driver/hip-binding.hip | 8 +-- clang/test/Driver/hip-device-compile.hip | 2 +- clang/test/Driver/hip-phases.hip | 78 +++++++++++++--------- clang/test/Driver/hip-toolchain-no-rdc.hip | 7 +- 5 files changed, 59 insertions(+), 45 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 7bf02223bc4ce..e67886abc35b6 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3822,8 +3822,11 @@ class OffloadingActionBuilder final { // object containing ISA. Then we use a special "link" action to create // a fat binary containing all the code objects for different GPU's. // The fat binary is then an input to the host action. + bool ExplicitOffloadLTO = Args.hasArg(options::OPT_foffload_lto, + options::OPT_foffload_lto_EQ); for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - if (ToolChains[I]->isUsingLTO(Args, AssociatedOffloadKind)) { + if (ExplicitOffloadLTO && + ToolChains[I]->isUsingLTO(Args, AssociatedOffloadKind)) { // When LTO is enabled, skip the backend and assemble phases and // use lld to link the bitcode. ActionList AL; @@ -3849,11 +3852,9 @@ class OffloadingActionBuilder final { BackendAction = C.MakeAction(CudaDeviceActions[I], Output); } else { - auto DevLTO = - ToolChains[I]->getLTOMode(Args, AssociatedOffloadKind); BackendAction = C.getDriver().ConstructPhaseAction( C, Args, phases::Backend, CudaDeviceActions[I], - AssociatedOffloadKind, DevLTO); + AssociatedOffloadKind, LTOK_None); } auto AssembleAction = C.getDriver().ConstructPhaseAction( C, Args, phases::Assemble, BackendAction, diff --git a/clang/test/Driver/hip-binding.hip b/clang/test/Driver/hip-binding.hip index 752e7b96117fe..b0839021aa1d9 100644 --- a/clang/test/Driver/hip-binding.hip +++ b/clang/test/Driver/hip-binding.hip @@ -4,10 +4,10 @@ // RUN: %clang -ccc-print-bindings --target=x86_64-linux-gnu --offload-new-driver \ // RUN: --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 %s \ // RUN: --no-offload-new-driver -c 2>&1 | FileCheck -check-prefix=NRDCS %s -// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC1:.*bc]]" -// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[BC1]]"], output: "[[IMG1:.*]]" -// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[BC2:.*bc]]" -// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[BC2]]"], output: "[[IMG2:.*]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ1:.*o]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ1]]"], output: "[[IMG1:.*]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "clang", inputs: ["[[IN:.*hip-binding.hip]]"], output: "[[OBJ2:.*o]]" +// NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[OBJ2]]"], output: "[[IMG2:.*]]" // NRDCS: # "amdgcn-amd-amdhsa" - "AMDGCN::Linker", inputs: ["[[IMG1]]", "[[IMG2]]"], output: "[[FATBIN:.*]]" // NRDCS: # "x86_64-unknown-linux-gnu" - "clang", inputs: ["[[IN]]", "[[FATBIN]]"], output: "{{.*}}" diff --git a/clang/test/Driver/hip-device-compile.hip b/clang/test/Driver/hip-device-compile.hip index bf9bf933cf8c6..efc9345f63d28 100644 --- a/clang/test/Driver/hip-device-compile.hip +++ b/clang/test/Driver/hip-device-compile.hip @@ -122,7 +122,7 @@ // RUN: %S/Inputs/hip_multiple_inputs/a.cu --no-gpu-bundle-output \ // RUN: 2>&1 | FileCheck -check-prefixes=OBJ,OBJ-UBUN %s -// OBJ: {{"*.clang.*"}} {{.*}} "-emit-llvm-bc" +// OBJ: {{"*.clang.*"}} {{.*}} "-emit-obj" // OBJ-NOT: {{"*.llvm-link"}} // OBJ-NOT: {{".*opt"}} // OBJ-NOT: {{".*llc"}} diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index fac9e34cef535..4554519960133 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -32,9 +32,11 @@ // BIN-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH:gfx803]]) // BIN-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // BIN-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH]]) -// OLDN-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH]]) -// OLDN-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P6]]}, image -// OLDN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, hip-fatbin, (device-[[T]]) +// OLDN-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH]]) +// OLDN-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH]]) +// OLDN-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH]]) +// OLDN-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P8]]}, image +// OLDN-DAG: [[P10:[0-9]+]]: linker, {[[P9]]}, hip-fatbin, (device-[[T]]) // OLDR-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, lto-bc, (device-[[T]], [[ARCH]]) // OLDR-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, image, (device-[[T]], [[ARCH]]) // OLDR-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P7]]}, image @@ -46,14 +48,14 @@ // NEWN-DAG: [[P9:[0-9]+]]: clang-linker-wrapper, {[[P8]]}, hip-fatbin, (device-[[T]]) // NEWLTO-DAG: [[P9:[0-9]+]]: clang-linker-wrapper, {[[P8]]}, hip-fatbin, (device-[[T]]) -// OLDN-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P8]]}, ir +// OLDN-DAG: [[P11:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P10]]}, ir // NEWN-DAG: [[P10:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P9]]}, ir // NEWLTO-DAG: [[P10:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P9]]}, ir // NEWR-DAG: [[P9:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (x86_64-unknown-linux-gnu)" {[[P8]]}, ir // OLDR-DAG: [[P14:[0-9]+]]: linker, {[[P13]], [[P10]]}, image, (host-[[T]]) -// OLDN-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (host-[[T]]) -// OLDN-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (host-[[T]]) -// OLDN-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (host-[[T]]) +// OLDN-DAG: [[P12:[0-9]+]]: backend, {[[P11]]}, assembler, (host-[[T]]) +// OLDN-DAG: [[P13:[0-9]+]]: assembler, {[[P12]]}, object, (host-[[T]]) +// OLDN-DAG: [[P14:[0-9]+]]: linker, {[[P13]]}, image, (host-[[T]]) // NEWN-DAG: [[P11:[0-9]+]]: backend, {[[P10]]}, assembler, (host-[[T]]) // NEWN-DAG: [[P12:[0-9]+]]: assembler, {[[P11]]}, object, (host-[[T]]) // NEWN-DAG: [[P13:[0-9]+]]: clang-linker-wrapper, {[[P12]]}, image, (host-[[T]]) @@ -99,19 +101,23 @@ // NRD2-DAG: [[P3:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH1:gfx803]]) // NRD2-DAG: [[P4:[0-9]+]]: preprocessor, {[[P3]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH1]]) // NRD2-DAG: [[P5:[0-9]+]]: compiler, {[[P4]]}, ir, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P6:[0-9]+]]: linker, {[[P5]]}, image, (device-[[T]], [[ARCH1]]) -// NRD2-DAG: [[P7:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P6]]}, image - -// NRD2-DAG: [[P8:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// NRD2-DAG: [[P9:[0-9]+]]: preprocessor, {[[P8]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P10:[0-9]+]]: compiler, {[[P9]]}, ir, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P11:[0-9]+]]: linker, {[[P10]]}, image, (device-[[T]], [[ARCH2]]) -// NRD2-DAG: [[P12:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P11]]}, image -// NRD2-DAG: [[P13:[0-9]+]]: linker, {[[P7]], [[P12]]}, hip-fatbin, (device-[[T]]) -// NRD2-DAG: [[P14:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P13]]}, ir -// NRD2-DAG: [[P15:[0-9]+]]: backend, {[[P14]]}, assembler, (host-[[T]]) -// NRD2-DAG: [[P16:[0-9]+]]: assembler, {[[P15]]}, object, (host-[[T]]) -// NCL2-DAG: [[P17:[0-9]+]]: linker, {[[P16]]}, image, (host-[[T]]) +// NRD2-DAG: [[P6:[0-9]+]]: backend, {[[P5]]}, assembler, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P7:[0-9]+]]: assembler, {[[P6]]}, object, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH1]]) +// NRD2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH1]])" {[[P8]]}, image + +// NRD2-DAG: [[P10:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// NRD2-DAG: [[P11:[0-9]+]]: preprocessor, {[[P10]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P12:[0-9]+]]: compiler, {[[P11]]}, ir, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P13:[0-9]+]]: backend, {[[P12]]}, assembler, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P14:[0-9]+]]: assembler, {[[P13]]}, object, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P15:[0-9]+]]: linker, {[[P14]]}, image, (device-[[T]], [[ARCH2]]) +// NRD2-DAG: [[P16:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P15]]}, image +// NRD2-DAG: [[P17:[0-9]+]]: linker, {[[P9]], [[P16]]}, hip-fatbin, (device-[[T]]) +// NRD2-DAG: [[P18:[0-9]+]]: offload, "host-[[T]] (x86_64-unknown-linux-gnu)" {[[P2]]}, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P17]]}, ir +// NRD2-DAG: [[P19:[0-9]+]]: backend, {[[P18]]}, assembler, (host-[[T]]) +// NRD2-DAG: [[P20:[0-9]+]]: assembler, {[[P19]]}, object, (host-[[T]]) +// NCL2-DAG: [[P21:[0-9]+]]: linker, {[[P20]]}, image, (host-[[T]]) // // Test two gpu architectures with complete compilation with -fgpu-rdc. @@ -247,10 +253,12 @@ // DBIN-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN-DAG: [[P3:[0-9]+]]: linker, {[[P2]]}, image, (device-[[T]], [[ARCH]]) -// DBIN-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, image -// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, hip-fatbin, (device-hip, ) -// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P5]]}, hip-fatbin +// DBIN-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, ) +// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin // DBIN-NOT: host // @@ -338,15 +346,19 @@ // DBIN2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // DBIN2-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // DBIN2-DAG: [[P2:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH]]) -// DBIN2-DAG: [[P3:[0-9]+]]: linker, {[[P2]]}, image, (device-[[T]], [[ARCH]]) -// DBIN2-DAG: [[P4:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P3]]}, image -// DBIN2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) -// DBIN2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) -// DBIN2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) -// DBIN2-DAG: [[P8:[0-9]+]]: linker, {[[P7]]}, image, (device-[[T]], [[ARCH2]]) -// DBIN2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, image -// DBIN2-DAG: [[P10:[0-9]+]]: linker, {[[P4]], [[P9]]}, hip-fatbin, (device-hip, ) -// DBIN2-DAG: [[P11:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P10]]}, hip-fatbin +// DBIN2-DAG: [[P3:[0-9]+]]: backend, {[[P2]]}, assembler, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) +// DBIN2-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image +// DBIN2-DAG: [[P7:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) +// DBIN2-DAG: [[P8:[0-9]+]]: preprocessor, {[[P7]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P9:[0-9]+]]: compiler, {[[P8]]}, ir, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P10:[0-9]+]]: backend, {[[P9]]}, assembler, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]]) +// DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image +// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, ) +// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin // DBIN2-NOT: host // diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index 18fcf42f5be6c..a97a25a7b05a8 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -67,7 +67,8 @@ // CHECK: [[CLANG:".*clang.*"]] "-cc1" "-triple" "amdgcn-amd-amdhsa" // CHECK-SAME: "-aux-triple" "x86_64-unknown-linux-gnu" -// CHECK-SAME: "-emit-llvm-bc" +// OLD-SAME: "-emit-obj" +// NEW-SAME: "-emit-llvm-bc" // CHECK-SAME: {{.*}} "-main-file-name" "a.cu" // CHECK-SAME: "-fcuda-is-device" "-fno-threadsafe-statics" "-mllvm" "-amdgpu-internalize-symbols" // CHECK-SAME: "-fvisibility=hidden" @@ -233,8 +234,8 @@ // AMDGCNSPIRV: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-llvm-bc" {{.*}}"-flto=full"{{.*}} "-fembed-bitcode=marker" "-disable-llvm-passes" {{.*}} "-o" "[[AMDGCNSPV_BC:.*bc]]" // AMDGCNSPIRV: {{".*llvm-link.*"}} "-o" "[[AMDGCNSPV_TMP:.*bc]]" "[[AMDGCNSPV_BC]]" // AMDGCNSPIRV: {{".*llvm-spirv.*"}} "--spirv-max-version=1.6" "--spirv-ext=+all" {{.*}} "[[AMDGCNSPV_TMP]]" {{.*}}"-o" "[[AMDGCNSPV_CO:.*out]]" -// AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-llvm-bc" {{.*}}"-flto=full"{{.*}} "-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_BC:.*bc]]" -// AMDGCNSPIRV: {{".*lld.*"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_BC]]" +// AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" {{.*}} "-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]" +// AMDGCNSPIRV: {{".*lld.*"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_OBJ]]" // AMDGCNSPIRV: {{".*clang-offload-bundler.*"}} "-type=o" // AMDGCNSPIRV-SAME: "-targets={{.*}}hip-spirv64-amd-amdhsa--amdgcnspirv,hip-amdgcn-amd-amdhsa--gfx900" // AMDGCNSPIRV-SAME: "-input=[[AMDGCNSPV_CO]]" "-input=[[GFX900_CO]]" From 020a4492368ca83f806804408976014ccdeb441a Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Mon, 22 Jun 2026 10:19:17 -0400 Subject: [PATCH 020/511] Be more permissive on spaces in command line argument parsing (#205111) Fixes c888371ff0a3e10f8472676dc992f4347fca58d9. This change properly accommodates both presence and absence of extra trailing arguments like -resource-dir. --- .../clangd/test/did-change-configuration-params.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/clangd/test/did-change-configuration-params.test b/clang-tools-extra/clangd/test/did-change-configuration-params.test index 08c7b4bcb57ad..f922dc5508f6f 100644 --- a/clang-tools-extra/clangd/test/did-change-configuration-params.test +++ b/clang-tools-extra/clangd/test/did-change-configuration-params.test @@ -48,7 +48,7 @@ # # ERR: ASTWorker building file {{.*}}foo.c version 0 with command # ERR: [{{.*}}clangd-test2] -# ERR: clang -c -Wall -Werror {{.*}} -- {{.*}}foo.c +# ERR: clang -c -Wall -Werror{{.*}} -- {{.*}}foo.c --- {"jsonrpc":"2.0","id":5,"method":"shutdown"} --- From 458ad1e4cf49922c3f4cf50daf5e88de9314218c Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 22 Jun 2026 09:25:25 -0500 Subject: [PATCH 021/511] [flang][OpenMP] Centralize pushing/popping directive context (#204924) Put calls to PushContextAndClauseSets to the Enter function for OpenMPConstruct and OpenMPDeclarativeConstruct, and popping the context to the corresponding Leave functions. This moves most of the context handling to the top-level AST entries. This will allow more centralized verification of common clause properties in the future. --- flang/lib/Semantics/check-omp-atomic.cpp | 6 - flang/lib/Semantics/check-omp-loop.cpp | 2 - flang/lib/Semantics/check-omp-structure.cpp | 170 +++----------------- flang/lib/Semantics/check-omp-structure.h | 27 ---- flang/lib/Semantics/check-omp-variant.cpp | 19 --- 5 files changed, 21 insertions(+), 203 deletions(-) diff --git a/flang/lib/Semantics/check-omp-atomic.cpp b/flang/lib/Semantics/check-omp-atomic.cpp index a61a868c9baa4..ec307be469982 100644 --- a/flang/lib/Semantics/check-omp-atomic.cpp +++ b/flang/lib/Semantics/check-omp-atomic.cpp @@ -1641,8 +1641,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { }}; const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; - auto &dir{std::get(dirSpec.t)}; - PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_atomic); llvm::omp::Clause kind{x.GetKind()}; checkExclusive(atomic, "atomic", dirSpec.Clauses()); @@ -1665,10 +1663,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPAtomicConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPAtomicConstruct &) { - dirContext_.pop_back(); -} - // Rewrite min/max: // Min and max intrinsics in Fortran take an arbitrary number of arguments // (two or more). The first two are mandatory, the rest is optional. That diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 689e3a0da89ca..3e1f2e6cdf5d0 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -381,7 +381,6 @@ void OmpStructureChecker::CheckNestedConstruct( void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) { const parser::OmpDirectiveName &beginName{x.BeginDir().DirName()}; - PushContextAndClauseSets(beginName.source, beginName.v); // Check matching, end directive is optional if (auto &endSpec{x.EndDir()}) { @@ -688,7 +687,6 @@ void OmpStructureChecker::Leave(const parser::OpenMPLoopConstruct &x) { if (llvm::omp::allSimdSet.test(beginSpec.DirName().v)) { ExitDirectiveNest(SIMDNest); } - dirContext_.pop_back(); } void OmpStructureChecker::Enter(const parser::OmpClause::Depth &x) { diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index e2220156d13cd..022c774cd2adc 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1041,22 +1041,17 @@ void OmpStructureChecker::Enter(const parser::OpenMPConstruct &x) { return CheckDirectiveSpelling(source, id); }); parser::Walk(x, visitor); - if (GetOmpDirectiveName(x).v != llvm::omp::Directive::OMPD_section) { - dirStack_.push_back(&GetOmpDirectiveSpecification(x)); - } - - CheckDirectiveDeprecation(x); - if (GetOmpDirectiveName(x).v != llvm::omp::Directive::OMPD_section) { - dirStack_.push_back(&GetOmpDirectiveSpecification(x)); + parser::OmpDirectiveName dirName{GetOmpDirectiveName(x)}; + if (dirName.v == llvm::omp::Directive::OMPD_section) { + return; } + PushContextAndClauseSets(dirName.source, dirName.v); + dirStack_.push_back(&GetOmpDirectiveSpecification(x)); + CheckDirectiveDeprecation(x); - // Simd Construct with Ordered Construct Nesting check - // We cannot use CurrentDirectiveIsNested() here because - // PushContextAndClauseSets() has not been called yet, it is - // called individually for each construct. Therefore a - // dirContext_ size `1` means the current construct is nested - if (dirContext_.size() >= 1) { + // Simd Construct with Ordered Construct Nesting check. + if (CurrentDirectiveIsNested()) { if (GetDirectiveNest(SIMDNest) > 0) { CheckSIMDNest(x); } @@ -1067,8 +1062,11 @@ void OmpStructureChecker::Enter(const parser::OpenMPConstruct &x) { } void OmpStructureChecker::Leave(const parser::OpenMPConstruct &x) { - if (GetOmpDirectiveName(x).v != llvm::omp::Directive::OMPD_section) { + parser::OmpDirectiveName dirName{GetOmpDirectiveName(x)}; + if (dirName.v != llvm::omp::Directive::OMPD_section) { dirStack_.pop_back(); + assert(dirName.v == GetContext().directive && "Context mismatch"); + dirContext_.pop_back(); } constructStack_.pop_back(); } @@ -1080,6 +1078,8 @@ void OmpStructureChecker::Enter(const parser::OpenMPDeclarativeConstruct &x) { }); parser::Walk(x, visitor); + parser::OmpDirectiveName dirName{GetOmpDirectiveName(x)}; + PushContextAndClauseSets(dirName.source, dirName.v); dirStack_.push_back(&GetOmpDirectiveSpecification(x)); EnterDirectiveNest(DeclarativeNest); } @@ -1087,6 +1087,9 @@ void OmpStructureChecker::Enter(const parser::OpenMPDeclarativeConstruct &x) { void OmpStructureChecker::Leave(const parser::OpenMPDeclarativeConstruct &x) { ExitDirectiveNest(DeclarativeNest); dirStack_.pop_back(); + [[maybe_unused]] parser::OmpDirectiveName dirName{GetOmpDirectiveName(x)}; + assert(dirName.v == GetContext().directive && "Context mismatch"); + dirContext_.pop_back(); } void OmpStructureChecker::AddEndDirectiveClauses( @@ -1215,8 +1218,6 @@ void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) { const parser::Block &block{std::get(x.t)}; unsigned version{context_.langOptions().OpenMPVersion}; - PushContextAndClauseSets(beginSpec.DirName().source, beginSpec.DirId()); - // Missing mandatory end block: this is checked in semantics because that // makes it easier to control the error messages. // The end block is mandatory when the construct is not applied to a strictly @@ -1400,22 +1401,6 @@ void OmpStructureChecker::CheckMasterNesting( } } -void OmpStructureChecker::Enter(const parser::OmpAssumeDirective &x) { - PushContextAndClauseSets(x.source, llvm::omp::Directive::OMPD_assume); -} - -void OmpStructureChecker::Leave(const parser::OmpAssumeDirective &) { - dirContext_.pop_back(); -} - -void OmpStructureChecker::Enter(const parser::OmpAssumesDirective &x) { - PushContextAndClauseSets(x.source, llvm::omp::Directive::OMPD_assumes); -} - -void OmpStructureChecker::Leave(const parser::OmpAssumesDirective &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Leave(const parser::OmpBlockConstruct &x) { if (GetContext().directive == llvm::omp::Directive::OMPD_taskgraph) { CheckTaskgraph(x); @@ -1426,7 +1411,6 @@ void OmpStructureChecker::Leave(const parser::OmpBlockConstruct &x) { if (llvm::omp::allTargetSet.test(GetContext().directive)) { ExitDirectiveNest(TargetNest); } - dirContext_.pop_back(); } void OmpStructureChecker::ChecksOnOrderedAsBlock() { @@ -1526,7 +1510,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPSectionsConstruct &x) { const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; const parser::OmpDirectiveName &beginName{beginSpec.DirName()}; const auto &endSpec{x.EndDir()}; - PushContextAndClauseSets(beginName.source, beginName.v); if (!endSpec) { context_.Say( @@ -1549,10 +1532,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPSectionsConstruct &x) { llvm::omp::nestedWorkshareErrSet); } -void OmpStructureChecker::Leave(const parser::OpenMPSectionsConstruct &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OmpEndSectionsDirective &x) { const parser::OmpDirectiveName &dirName{x.DirName()}; ResetPartialContext(dirName.source); @@ -1689,9 +1668,6 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( } void OmpStructureChecker::Enter(const parser::OmpGroupprivateDirective &x) { - PushContextAndClauseSets( - x.v.DirName().source, llvm::omp::Directive::OMPD_groupprivate); - for (const parser::OmpArgument &arg : x.v.Arguments().v) { auto *locator{std::get_if(&arg.u)}; const Symbol *sym{GetArgumentSymbol(arg, /*ultimate=*/true)}; @@ -1740,15 +1716,6 @@ void OmpStructureChecker::Enter(const parser::OmpGroupprivateDirective &x) { } } -void OmpStructureChecker::Leave(const parser::OmpGroupprivateDirective &x) { - dirContext_.pop_back(); -} - -void OmpStructureChecker::Enter(const parser::OmpThreadprivateDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); -} - void OmpStructureChecker::Leave(const parser::OmpThreadprivateDirective &x) { const parser::OmpDirectiveSpecification &dirSpec{x.v}; for (const parser::OmpArgument &arg : x.v.Arguments().v) { @@ -1759,12 +1726,10 @@ void OmpStructureChecker::Leave(const parser::OmpThreadprivateDirective &x) { CheckThreadprivateOrDeclareTargetVar(*object); } } - dirContext_.pop_back(); } void OmpStructureChecker::Enter(const parser::OmpDeclareSimdDirective &x) { const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); const Scope &containingScope = context_.FindScope(dirName.source); const Scope &progUnitScope = GetProgramUnitContaining(containingScope); @@ -1836,10 +1801,6 @@ void OmpStructureChecker::Enter(const parser::OmpDeclareSimdDirective &x) { } } -void OmpStructureChecker::Leave(const parser::OmpDeclareSimdDirective &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::CheckInitOnDepobj( const parser::OpenMPDepobjConstruct &depobj, const parser::OmpClause &initClause) { @@ -1887,7 +1848,6 @@ void OmpStructureChecker::CheckInitOnDepobj( void OmpStructureChecker::Enter(const parser::OpenMPDepobjConstruct &x) { const auto &dirName{std::get(x.v.t)}; - PushContextAndClauseSets(dirName.source, llvm::omp::Directive::OMPD_depobj); unsigned version{context_.langOptions().OpenMPVersion}; const parser::OmpArgumentList &arguments{x.v.Arguments()}; @@ -1942,13 +1902,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPDepobjConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPDepobjConstruct &x) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OmpRequiresDirective &x) { - const auto &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); unsigned version{context_.langOptions().OpenMPVersion}; for (const parser::OmpClause &clause : x.v.Clauses().v) { @@ -1989,10 +1943,6 @@ void OmpStructureChecker::Enter(const parser::OmpRequiresDirective &x) { } } -void OmpStructureChecker::Leave(const parser::OmpRequiresDirective &) { - dirContext_.pop_back(); -} - static std::pair getAllocateStmtAndSource(const parser::ExecutionPartConstruct *epc) { if (SourcedActionStmt as{GetActionStmt(epc)}) { @@ -2182,7 +2132,6 @@ void OmpStructureChecker::CheckExecutableAllocateDirective( void OmpStructureChecker::Enter(const parser::OmpAllocateDirective &x) { const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; const parser::OmpDirectiveName &dirName{beginSpec.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); ++allocateDirectiveLevel_; bool isExecutable{partStack_.back() == PartKind::ExecutionPart}; @@ -2234,7 +2183,6 @@ void OmpStructureChecker::Leave(const parser::OmpAllocateDirective &x) { } --allocateDirectiveLevel_; - dirContext_.pop_back(); } void OmpStructureChecker::Enter(const parser::OmpClause::Allocator &x) { @@ -2302,9 +2250,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Allocate &x) { } void OmpStructureChecker::Enter(const parser::OmpDeclareMapperDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); - const parser::OmpArgumentList &args{x.v.Arguments()}; if (args.v.size() != 1) { context_.Say(args.source, @@ -2324,14 +2269,7 @@ void OmpStructureChecker::Enter(const parser::OmpDeclareMapperDirective &x) { } } -void OmpStructureChecker::Leave(const parser::OmpDeclareMapperDirective &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OmpDeclareReductionDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); - const parser::OmpArgumentList &args{x.v.Arguments()}; if (args.v.size() != 1) { context_.Say(args.source, @@ -2346,13 +2284,8 @@ void OmpStructureChecker::Enter(const parser::OmpDeclareReductionDirective &x) { } } -void OmpStructureChecker::Leave(const parser::OmpDeclareReductionDirective &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OmpDeclareTargetDirective &x) { const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContext(dirName.source, dirName.v); // Check if arguments are extended-list-items. for (const parser::OmpArgument &arg : x.v.Arguments().v) { @@ -2491,29 +2424,10 @@ void OmpStructureChecker::Leave(const parser::OmpDeclareTargetDirective &x) { deviceConstructFound_ = true; } } - - dirContext_.pop_back(); -} - -void OmpStructureChecker::Enter(const parser::OmpErrorDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); -} - -void OmpStructureChecker::Enter(const parser::OmpNothingDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); -} - -void OmpStructureChecker::Leave(const parser::OmpNothingDirective &x) { - dirContext_.pop_back(); } void OmpStructureChecker::Enter(const parser::OpenMPDispatchConstruct &x) { - const parser::OmpDirectiveSpecification &dirSpec{x.BeginDir()}; const auto &block{std::get(x.t)}; - PushContextAndClauseSets( - dirSpec.DirName().source, llvm::omp::Directive::OMPD_dispatch); if (block.empty()) { context_.Say(x.source, @@ -2538,14 +2452,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPDispatchConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPDispatchConstruct &x) { - dirContext_.pop_back(); -} - -void OmpStructureChecker::Leave(const parser::OmpErrorDirective &x) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OmpClause::At &x) { CheckAllowedClause(llvm::omp::Clause::OMPC_at); if (GetDirectiveNest(DeclarativeNest) > 0) { @@ -2559,8 +2465,6 @@ void OmpStructureChecker::Enter(const parser::OmpClause::At &x) { void OmpStructureChecker::Enter(const parser::OpenMPAllocatorsConstruct &x) { const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; const parser::OmpDirectiveName &dirName{beginSpec.DirName()}; - PushContextAndClauseSets( - dirName.source, llvm::omp::Directive::OMPD_allocators); for (const auto &clause : beginSpec.Clauses().v) { auto *alloc{std::get_if(&clause.u)}; @@ -2637,10 +2541,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPAllocatorsConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPAllocatorsConstruct &x) { - dirContext_.pop_back(); -} - void OmpStructureChecker::CheckScan( const parser::OpenMPSimpleStandaloneConstruct &x) { if (x.v.Clauses().v.size() != 1) { @@ -3031,7 +2931,6 @@ void OmpStructureChecker::CheckDependenceType( void OmpStructureChecker::Enter( const parser::OpenMPSimpleStandaloneConstruct &x) { const auto &dir{std::get(x.v.t)}; - PushContextAndClauseSets(dir.source, dir.v); switch (dir.v) { case llvm::omp::Directive::OMPD_barrier: CheckBarrierNesting(x); @@ -3057,12 +2956,6 @@ void OmpStructureChecker::Leave( default: break; } - dirContext_.pop_back(); -} - -void OmpStructureChecker::Enter(const parser::OpenMPFlushConstruct &x) { - const auto &dirName{std::get(x.v.t)}; - PushContextAndClauseSets(dirName.source, llvm::omp::Directive::OMPD_flush); } void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &x) { @@ -3098,14 +2991,11 @@ void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &x) { "The syntax \"FLUSH clause (object, ...)\" has been deprecated, use \"FLUSH(object, ...) clause\" instead"_warn_en_US); } } - - dirContext_.pop_back(); } void OmpStructureChecker::Enter(const parser::OpenMPCancelConstruct &x) { auto &dirName{std::get(x.v.t)}; auto &maybeClauses{std::get>(x.v.t)}; - PushContextAndClauseSets(dirName.source, llvm::omp::Directive::OMPD_cancel); if (auto maybeConstruct{GetCancelType( llvm::omp::Directive::OMPD_cancel, x.source, maybeClauses)}) { @@ -3144,14 +3034,9 @@ void OmpStructureChecker::Enter(const parser::OpenMPCancelConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter(const parser::OpenMPCriticalConstruct &x) { const parser::OmpBeginDirective &beginSpec{x.BeginDir()}; const std::optional &endSpec{x.EndDir()}; - PushContextAndClauseSets(beginSpec.DirName().source, beginSpec.DirId()); const auto &block{std::get(x.t)}; CheckNoBranching( @@ -3233,10 +3118,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPCriticalConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPCriticalConstruct &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::Enter( const parser::OmpClause::CancellationConstructType &x) { llvm::omp::Directive dir{GetContext().directive}; @@ -3271,8 +3152,6 @@ void OmpStructureChecker::Enter( const parser::OpenMPCancellationPointConstruct &x) { auto &dirName{std::get(x.v.t)}; auto &maybeClauses{std::get>(x.v.t)}; - PushContextAndClauseSets( - dirName.source, llvm::omp::Directive::OMPD_cancellation_point); if (auto maybeConstruct{ GetCancelType(llvm::omp::Directive::OMPD_cancellation_point, x.source, @@ -3281,11 +3160,6 @@ void OmpStructureChecker::Enter( } } -void OmpStructureChecker::Leave( - const parser::OpenMPCancellationPointConstruct &) { - dirContext_.pop_back(); -} - std::optional OmpStructureChecker::GetCancelType( llvm::omp::Directive cancelDir, const parser::CharBlock &cancelSource, const std::optional &maybeClauses) { @@ -5794,9 +5668,7 @@ void OmpStructureChecker::Enter(const parser::OmpClause::ThreadLimit &x) { void OmpStructureChecker::Enter(const parser::OpenMPInteropConstruct &x) { bool isDependClauseOccurred{false}; int targetCount{0}, targetSyncCount{0}; - const auto &dir{std::get(x.v.t)}; std::set objectSymbolList; - PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_interop); const auto &clauseList{std::get>(x.v.t)}; for (const auto &clause : clauseList->v) { common::visit( @@ -5884,10 +5756,6 @@ void OmpStructureChecker::Enter(const parser::OpenMPInteropConstruct &x) { } } -void OmpStructureChecker::Leave(const parser::OpenMPInteropConstruct &) { - dirContext_.pop_back(); -} - void OmpStructureChecker::CheckAllowedRequiresClause(llvm::omp::Clause clause) { CheckAllowedClause(clause); unsigned version{context_.langOptions().OpenMPVersion}; @@ -5909,6 +5777,8 @@ void OmpStructureChecker::Enter(const parser::OpenMPMisplacedEndDirective &x) { } void OmpStructureChecker::Leave(const parser::OpenMPMisplacedEndDirective &x) { + assert(GetContext().directive == llvm::omp::Directive::OMPD_unknown && + "Context mismatch"); dirContext_.pop_back(); } @@ -5918,6 +5788,8 @@ void OmpStructureChecker::Enter(const parser::OpenMPInvalidDirective &x) { } void OmpStructureChecker::Leave(const parser::OpenMPInvalidDirective &x) { + assert(GetContext().directive == llvm::omp::Directive::OMPD_unknown && + "Context mismatch"); dirContext_.pop_back(); } diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 9fca5ff0f5fca..1154fce5e00a7 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -96,12 +96,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void Enter(const parser::OpenMPLoopConstruct &); void Leave(const parser::OpenMPLoopConstruct &); - void Enter(const parser::OmpAssumeDirective &); - void Leave(const parser::OmpAssumeDirective &); - void Enter(const parser::OmpAssumesDirective &); - void Leave(const parser::OmpAssumesDirective &); void Enter(const parser::OpenMPInteropConstruct &); - void Leave(const parser::OpenMPInteropConstruct &); void Enter(const parser::OmpBlockConstruct &); void Leave(const parser::OmpBlockConstruct &); void Enter(const parser::OmpBeginDirective &); @@ -110,51 +105,31 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void Leave(const parser::OmpEndDirective &); void Enter(const parser::OpenMPSectionsConstruct &); - void Leave(const parser::OpenMPSectionsConstruct &); void Enter(const parser::OmpEndSectionsDirective &); void Leave(const parser::OmpEndSectionsDirective &); void Enter(const parser::OmpDeclareVariantDirective &); - void Leave(const parser::OmpDeclareVariantDirective &); void Enter(const parser::OmpDeclareSimdDirective &); - void Leave(const parser::OmpDeclareSimdDirective &); void Enter(const parser::OmpAllocateDirective &); void Leave(const parser::OmpAllocateDirective &); void Enter(const parser::OmpDeclareMapperDirective &); - void Leave(const parser::OmpDeclareMapperDirective &); void Enter(const parser::OmpDeclareReductionDirective &); - void Leave(const parser::OmpDeclareReductionDirective &); void Enter(const parser::OmpDeclareTargetDirective &); void Leave(const parser::OmpDeclareTargetDirective &); void Enter(const parser::OpenMPDepobjConstruct &); - void Leave(const parser::OpenMPDepobjConstruct &); void Enter(const parser::OpenMPDispatchConstruct &); - void Leave(const parser::OpenMPDispatchConstruct &); - void Enter(const parser::OmpErrorDirective &); - void Leave(const parser::OmpErrorDirective &); - void Enter(const parser::OmpNothingDirective &); - void Leave(const parser::OmpNothingDirective &); void Enter(const parser::OpenMPAllocatorsConstruct &); - void Leave(const parser::OpenMPAllocatorsConstruct &); void Enter(const parser::OmpRequiresDirective &); - void Leave(const parser::OmpRequiresDirective &); void Enter(const parser::OmpGroupprivateDirective &); - void Leave(const parser::OmpGroupprivateDirective &); - void Enter(const parser::OmpThreadprivateDirective &); void Leave(const parser::OmpThreadprivateDirective &); void Enter(const parser::OpenMPSimpleStandaloneConstruct &); void Leave(const parser::OpenMPSimpleStandaloneConstruct &); - void Enter(const parser::OpenMPFlushConstruct &); void Leave(const parser::OpenMPFlushConstruct &); void Enter(const parser::OpenMPCancelConstruct &); - void Leave(const parser::OpenMPCancelConstruct &); void Enter(const parser::OpenMPCancellationPointConstruct &); - void Leave(const parser::OpenMPCancellationPointConstruct &); void Enter(const parser::OpenMPCriticalConstruct &); - void Leave(const parser::OpenMPCriticalConstruct &); void Enter(const parser::OpenMPAtomicConstruct &); - void Leave(const parser::OpenMPAtomicConstruct &); void Leave(const parser::OmpClauseList &); void Enter(const parser::OmpClause &); @@ -167,8 +142,6 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void Enter(const parser::OmpMetadirectiveDirective &); void Leave(const parser::OmpMetadirectiveDirective &); - void Enter(const parser::OmpDelimitedMetadirectiveDirective &); - void Leave(const parser::OmpDelimitedMetadirectiveDirective &); void Enter(const parser::OmpContextSelector &); void Leave(const parser::OmpContextSelector &); diff --git a/flang/lib/Semantics/check-omp-variant.cpp b/flang/lib/Semantics/check-omp-variant.cpp index 8ab160496b64a..c681cd601b856 100644 --- a/flang/lib/Semantics/check-omp-variant.cpp +++ b/flang/lib/Semantics/check-omp-variant.cpp @@ -585,23 +585,10 @@ void OmpStructureChecker::Leave(const parser::OmpDirectiveSpecification &x) { void OmpStructureChecker::Enter(const parser::OmpMetadirectiveDirective &x) { EnterDirectiveNest(MetadirectiveNest); - PushContextAndClauseSets( - x.v.source, llvm::omp::Directive::OMPD_metadirective); } void OmpStructureChecker::Leave(const parser::OmpMetadirectiveDirective &) { ExitDirectiveNest(MetadirectiveNest); - dirContext_.pop_back(); -} - -void OmpStructureChecker::Enter( - const parser::OmpDelimitedMetadirectiveDirective &x) { - PushContextAndClauseSets(x.source, llvm::omp::Directive::OMPD_metadirective); -} - -void OmpStructureChecker::Leave( - const parser::OmpDelimitedMetadirectiveDirective &) { - dirContext_.pop_back(); } static const parser::traits::OmpContextSelectorSpecification * @@ -742,13 +729,7 @@ void OmpStructureChecker::CheckOmpDeclareVariantDirective( } void OmpStructureChecker::Enter(const parser::OmpDeclareVariantDirective &x) { - const parser::OmpDirectiveName &dirName{x.v.DirName()}; - PushContextAndClauseSets(dirName.source, dirName.v); CheckOmpDeclareVariantDirective(x); } -void OmpStructureChecker::Leave(const parser::OmpDeclareVariantDirective &) { - dirContext_.pop_back(); -} - } // namespace Fortran::semantics From dea1417335e3da45d1544a2409ec4432042689b3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 16:32:46 +0200 Subject: [PATCH 022/511] runtimes: Pass CMAKE_SYSTEM_NAME based on target triple (#203504) Compute the cmake system name from the target triple, rather than passing through the host's. This is primarily to stop forwarding OSX specific cmake variables. This fixes build failures when trying to build gpu libc on mac hosts. Previously it would fail on several issues, starting with an unused argument -mmacos-version-min error, followed by other errors caused by passing -isysroot. Secondarily, restrict the cmake imported targets when cross compiling. Without this, the amdgpu build prints many cmake warnings about the target not supporting shared libraries. Claude did most of the actual work, though it required quite a few rounds of prodding to get it into the right place. In particular it took care of handling all of the cmake platform recognized names from the triple. Co-authored-by: Claude Opus 4.6 --- clang/cmake/modules/ClangConfig.cmake.in | 5 +- cmake/Modules/GetTripleCMakeSystemName.cmake | 89 +++++++++++++++++++ cmake/Modules/NormalizeTriple.cmake | 36 ++++++++ llvm/cmake/modules/LLVMConfig.cmake.in | 69 +++++++------- .../modules/LLVMExternalProjectUtils.cmake | 64 +++++++++---- llvm/runtimes/CMakeLists.txt | 4 - runtimes/CMakeLists.txt | 18 +--- 7 files changed, 212 insertions(+), 73 deletions(-) create mode 100644 cmake/Modules/GetTripleCMakeSystemName.cmake create mode 100644 cmake/Modules/NormalizeTriple.cmake diff --git a/clang/cmake/modules/ClangConfig.cmake.in b/clang/cmake/modules/ClangConfig.cmake.in index 68f723d050117..e199c7e17b6b7 100644 --- a/clang/cmake/modules/ClangConfig.cmake.in +++ b/clang/cmake/modules/ClangConfig.cmake.in @@ -13,7 +13,10 @@ set(CLANG_LINK_CLANG_DYLIB "@CLANG_LINK_CLANG_DYLIB@") set(CLANG_DEFAULT_LINKER "@CLANG_DEFAULT_LINKER@") # Provide all our library targets to users. -@CLANG_CONFIG_INCLUDE_EXPORTS@ +# Skip when cross-compiling, as host library targets are not usable. +if(NOT CMAKE_CROSSCOMPILING) + @CLANG_CONFIG_INCLUDE_EXPORTS@ +endif() # By creating clang-tablegen-targets here, subprojects that depend on Clang's # tablegen-generated headers can always depend on this target whether building diff --git a/cmake/Modules/GetTripleCMakeSystemName.cmake b/cmake/Modules/GetTripleCMakeSystemName.cmake new file mode 100644 index 0000000000000..6cd8d3c59324e --- /dev/null +++ b/cmake/Modules/GetTripleCMakeSystemName.cmake @@ -0,0 +1,89 @@ +#===--------------------------------------------------------------------===// +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for details. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===--------------------------------------------------------------------===// + +# Extract the OS component from a target triple and map it to the +# corresponding CMake system name. +# +# Usage: +# get_triple_cmake_system_name( ) +# +# Parses the triple (arch-vendor-os[-env]) and sets to the +# CMake-style system name (e.g. "Darwin", "Linux", "Windows"). +# Unrecognized OS values are mapped to "Generic". This expects a +# normalized triple. + +function(get_triple_cmake_system_name triple out_var) + string(REPLACE "-" ";" _components "${triple}") + list(LENGTH _components _len) + if(_len LESS 3) + set(${out_var} "${CMAKE_HOST_SYSTEM_NAME}" PARENT_SCOPE) + return() + endif() + + list(GET _components 1 _vendor) + list(GET _components 2 _os) + set(_env "") + if(_len GREATER_EQUAL 4) + list(GET _components 3 _env) + endif() + + # Check the special environment components first, since it can + # override the usual OS mapping. + if("${_env}" MATCHES "^android") + set(${out_var} "Android" PARENT_SCOPE) + elseif("${_env}" MATCHES "^cygnus") + set(${out_var} "CYGWIN" PARENT_SCOPE) + elseif("${_os}" MATCHES "^darwin|^macos") + set(${out_var} "Darwin" PARENT_SCOPE) + elseif("${_os}" MATCHES "^ios") + set(${out_var} "iOS" PARENT_SCOPE) + elseif("${_os}" MATCHES "^tvos") + set(${out_var} "tvOS" PARENT_SCOPE) + elseif("${_os}" MATCHES "^watchos") + set(${out_var} "watchOS" PARENT_SCOPE) + elseif("${_os}" MATCHES "^xros|^visionos") + set(${out_var} "visionOS" PARENT_SCOPE) + elseif("${_vendor}" STREQUAL "apple") + # Catch-all for other Apple triples (e.g. driverkit, bridgeos). + set(${out_var} "Darwin" PARENT_SCOPE) + elseif("${_os}" MATCHES "^linux") + set(${out_var} "Linux" PARENT_SCOPE) + elseif("${_os}" MATCHES "^windows") + set(${out_var} "Windows" PARENT_SCOPE) + elseif("${_os}" MATCHES "^freebsd|^kfreebsd") + set(${out_var} "FreeBSD" PARENT_SCOPE) + elseif("${_os}" MATCHES "^netbsd") + set(${out_var} "NetBSD" PARENT_SCOPE) + elseif("${_os}" MATCHES "^openbsd") + set(${out_var} "OpenBSD" PARENT_SCOPE) + elseif("${_os}" MATCHES "^dragonfly") + set(${out_var} "DragonFly" PARENT_SCOPE) + elseif("${_os}" MATCHES "^solaris") + set(${out_var} "SunOS" PARENT_SCOPE) + elseif("${_os}" MATCHES "^aix") + set(${out_var} "AIX" PARENT_SCOPE) + elseif("${_os}" MATCHES "^fuchsia") + set(${out_var} "Fuchsia" PARENT_SCOPE) + elseif("${_os}" MATCHES "^haiku") + set(${out_var} "Haiku" PARENT_SCOPE) + elseif("${_os}" MATCHES "^emscripten") + set(${out_var} "Emscripten" PARENT_SCOPE) + elseif("${_os}" MATCHES "^wasi") + set(${out_var} "WASI" PARENT_SCOPE) + elseif("${_os}" MATCHES "^rtems") + set(${out_var} "RTEMS" PARENT_SCOPE) + elseif("${_os}" MATCHES "^zos") + set(${out_var} "OS390" PARENT_SCOPE) + elseif("${_os}" MATCHES "^hurd") + set(${out_var} "GNU" PARENT_SCOPE) + elseif("${_os}" MATCHES "^serenity") + set(${out_var} "SerenityOS" PARENT_SCOPE) + else() + set(${out_var} "Generic" PARENT_SCOPE) + endif() +endfunction() diff --git a/cmake/Modules/NormalizeTriple.cmake b/cmake/Modules/NormalizeTriple.cmake new file mode 100644 index 0000000000000..08f09a22bdbb0 --- /dev/null +++ b/cmake/Modules/NormalizeTriple.cmake @@ -0,0 +1,36 @@ +#===--------------------------------------------------------------------===// +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for details. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +#===--------------------------------------------------------------------===// + +# Normalize a target triple using clang's -print-target-triple. +# +# Usage: +# normalize_triple( ) +# +# Runs --target= -print-target-triple to produce a +# canonical triple. If the compiler invocation fails (e.g. the compiler +# is not clang), is returned unchanged. + +function(normalize_triple compiler triple out_var) + set(_prefix "") + if(CMAKE_C_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") + set(_prefix "/clang:") + endif() + execute_process( + COMMAND "${compiler}" "${_prefix}--target=${triple}" "${_prefix}-print-target-triple" + RESULT_VARIABLE _result + OUTPUT_VARIABLE _output + OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if(_result EQUAL 0 AND _output) + set(${out_var} "${_output}" PARENT_SCOPE) + else() + # TODO(#97876): Report an error. + message(WARNING "Failed to execute `${compiler} ${_prefix}--target=${triple} ${_prefix}-print-target-triple` to normalize target triple.") + set(${out_var} "${triple}" PARENT_SCOPE) + endif() +endfunction() diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 300c25e7c6101..6ef0cef7d0296 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -56,52 +56,47 @@ set(LLVM_ENABLE_ASSERTIONS @LLVM_ENABLE_ASSERTIONS@) set(LLVM_ENABLE_EH @LLVM_ENABLE_EH@) set(LLVM_ENABLE_FFI @LLVM_ENABLE_FFI@) -if(LLVM_ENABLE_FFI) - find_package(FFI) -endif() - set(LLVM_ENABLE_RTTI @LLVM_ENABLE_RTTI@) - -set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@) -if(LLVM_ENABLE_LIBEDIT) - find_package(LibEdit) -endif() - set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) - set(LLVM_ENABLE_UNWIND_TABLES @LLVM_ENABLE_UNWIND_TABLES@) - set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@) -if(LLVM_ENABLE_ZLIB) - set(ZLIB_ROOT @ZLIB_ROOT@) - find_package(ZLIB) -endif() - set(LLVM_ENABLE_ZSTD @LLVM_ENABLE_ZSTD@) -if(LLVM_ENABLE_ZSTD) - find_package(zstd) -endif() - set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@) -if(LLVM_ENABLE_LIBXML2) - find_package(LibXml2) -endif() - set(LLVM_ENABLE_CURL @LLVM_ENABLE_CURL@) -if(LLVM_ENABLE_CURL) - find_package(CURL) -endif() - set(LLVM_ENABLE_HTTPLIB @LLVM_ENABLE_HTTPLIB@) -if(LLVM_ENABLE_HTTPLIB) - find_package(httplib) -endif() - set(LLVM_WITH_Z3 @LLVM_WITH_Z3@) - set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) -if(LLVM_ENABLE_DIA_SDK) - find_package(DIASDK) +set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@) + +# These are host libraries that LLVM was built with. Only find them when the +# consumer can actually use them (i.e. not when cross-compiling for an +# incompatible target). +if(NOT CMAKE_CROSSCOMPILING) + if(LLVM_ENABLE_FFI) + find_package(FFI) + endif() + if(LLVM_ENABLE_LIBEDIT) + find_package(LibEdit) + endif() + if(LLVM_ENABLE_ZLIB) + set(ZLIB_ROOT @ZLIB_ROOT@) + find_package(ZLIB) + endif() + if(LLVM_ENABLE_ZSTD) + find_package(zstd) + endif() + if(LLVM_ENABLE_LIBXML2) + find_package(LibXml2) + endif() + if(LLVM_ENABLE_CURL) + find_package(CURL) + endif() + if(LLVM_ENABLE_HTTPLIB) + find_package(httplib) + endif() + if(LLVM_ENABLE_DIA_SDK) + find_package(DIASDK) + endif() endif() set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) @@ -152,7 +147,7 @@ set(LLVM_ENABLE_SHARED_LIBS @BUILD_SHARED_LIBS@) set(LLVM_DEFAULT_EXTERNAL_LIT "@LLVM_CONFIG_DEFAULT_EXTERNAL_LIT@") set(LLVM_LIT_ARGS "@LLVM_LIT_ARGS@") -if(NOT TARGET LLVMSupport) +if(NOT TARGET LLVMSupport AND NOT CMAKE_CROSSCOMPILING) @LLVM_CONFIG_INCLUDE_EXPORTS@ @llvm_config_include_buildtree_only_exports@ endif() diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake index ee270d70a778d..9567792e664e4 100644 --- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake @@ -84,12 +84,6 @@ function(llvm_ExternalProject_Add name source_dir) endif() endforeach() - # If CMAKE_SYSTEM_NAME is not set explicitly in the arguments passed to us, - # reflect CMake's own default. - if (NOT _cmake_system_name) - set(_cmake_system_name "${CMAKE_HOST_SYSTEM_NAME}") - endif() - if(NOT ARG_TARGET_TRIPLE) set(target_triple ${LLVM_DEFAULT_TARGET_TRIPLE}) else() @@ -98,6 +92,36 @@ function(llvm_ExternalProject_Add name source_dir) is_msvc_triple(is_msvc_target "${target_triple}") + if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING) + set(_cmake_c_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}") + set(_cmake_cxx_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++${CMAKE_EXECUTABLE_SUFFIX}") + set(_cmake_asm_compiler "${_cmake_c_compiler}") + if(is_msvc_target) + set(_cmake_c_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX}") + set(_cmake_cxx_compiler "${_cmake_c_compiler}") + set(_cmake_asm_compiler "${_cmake_c_compiler}") + endif() + else() + set(_cmake_c_compiler "${CMAKE_C_COMPILER}") + set(_cmake_cxx_compiler "${CMAKE_CXX_COMPILER}") + set(_cmake_asm_compiler "${CMAKE_C_COMPILER}") + endif() + + # If CMAKE_SYSTEM_NAME is not set explicitly in the arguments passed to us, + # derive it from the target triple if available, otherwise reflect CMake's + # own default. This ensures that cross-compilation targets get the correct + # platform files (e.g. AMDGPU targets on a Darwin host won't get macOS flags). + if (NOT _cmake_system_name) + if(ARG_TARGET_TRIPLE) + include(NormalizeTriple) + normalize_triple("${_cmake_c_compiler}" "${ARG_TARGET_TRIPLE}" _normalized_triple) + include(GetTripleCMakeSystemName) + get_triple_cmake_system_name("${_normalized_triple}" _cmake_system_name) + else() + set(_cmake_system_name "${CMAKE_HOST_SYSTEM_NAME}") + endif() + endif() + if(NOT ARG_TOOLCHAIN_TOOLS) set(ARG_TOOLCHAIN_TOOLS clang) if (ARG_ENABLE_FORTRAN) @@ -231,15 +255,9 @@ function(llvm_ExternalProject_Add name source_dir) if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING) if(CLANG_IN_TOOLCHAIN) - if(is_msvc_target) - set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX} - -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX} - -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX}) - else() - set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX} - -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++${CMAKE_EXECUTABLE_SUFFIX} - -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}) - endif() + set(compiler_args -DCMAKE_C_COMPILER=${_cmake_c_compiler} + -DCMAKE_CXX_COMPILER=${_cmake_cxx_compiler} + -DCMAKE_ASM_COMPILER=${_cmake_asm_compiler}) endif() if(FLANG_IN_TOOLCHAIN) list(APPEND compiler_args -DCMAKE_Fortran_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/flang${CMAKE_EXECUTABLE_SUFFIX}) @@ -379,6 +397,22 @@ function(llvm_ExternalProject_Add name source_dir) list(APPEND compiler_args -DCMAKE_CXX_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) list(APPEND compiler_args -DCMAKE_Fortran_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) list(APPEND compiler_args -DCMAKE_ASM_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) + + # Pass CMAKE_SYSTEM_NAME derived from the target triple so the sub-build + # loads the correct platform files instead of the host's. + if(NOT "${_cmake_system_name}" STREQUAL "${CMAKE_HOST_SYSTEM_NAME}") + list(APPEND compiler_args -DCMAKE_SYSTEM_NAME=${_cmake_system_name}) + endif() + + # Forward Darwin-specific variables only when targeting Darwin. + if("${_cmake_system_name}" STREQUAL "Darwin") + if(CMAKE_OSX_SYSROOT) + list(APPEND compiler_args -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}) + endif() + if(CMAKE_OSX_DEPLOYMENT_TARGET) + list(APPEND compiler_args -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}) + endif() + endif() endif() if(CMAKE_VERBOSE_MAKEFILE) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 6d81b26d2d416..af50413c9b0b7 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -4,10 +4,6 @@ # the two files. set(COMMON_CMAKE_ARGS "-DHAVE_LLVM_LIT=ON;-DCLANG_RESOURCE_DIR=${CLANG_RESOURCE_DIR}") -if(APPLE AND CMAKE_OSX_SYSROOT AND (LLVM_TARGET_TRIPLE STREQUAL LLVM_HOST_TRIPLE)) - # Only propagate the host sysroot for native runtimes builds. - list(APPEND RUNTIMES_CMAKE_ARGS "-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}") -endif() foreach(proj ${LLVM_ENABLE_RUNTIMES}) string(TOUPPER "${proj}" canon_name) STRING(REGEX REPLACE "-" "_" canon_name ${canon_name}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 0a84ef3957f76..36ebe594edc0d 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -228,22 +228,8 @@ message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}") set(LLVM_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") if(CMAKE_C_COMPILER_ID MATCHES "Clang") - set(option_prefix "") - if (CMAKE_C_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") - set(option_prefix "/clang:") - endif() - set(print_target_triple ${CMAKE_C_COMPILER} ${option_prefix}--target=${LLVM_DEFAULT_TARGET_TRIPLE} ${option_prefix}-print-target-triple) - execute_process(COMMAND ${print_target_triple} - RESULT_VARIABLE result - OUTPUT_VARIABLE output - OUTPUT_STRIP_TRAILING_WHITESPACE) - if(result EQUAL 0) - set(LLVM_DEFAULT_TARGET_TRIPLE ${output}) - else() - string(REPLACE ";" " " print_target_triple "${print_target_triple}") - # TODO(#97876): Report an error. - message(WARNING "Failed to execute `${print_target_triple}` to normalize target triple.") - endif() + include(NormalizeTriple) + normalize_triple("${CMAKE_C_COMPILER}" "${LLVM_DEFAULT_TARGET_TRIPLE}" LLVM_DEFAULT_TARGET_TRIPLE) endif() # Determine output and install paths based on LLVM_TARGET_TRIPLE From 51e26f68b12962b1585e56845ae0e937e16d4a8a Mon Sep 17 00:00:00 2001 From: Koakuma Date: Mon, 22 Jun 2026 21:42:17 +0700 Subject: [PATCH 023/511] [SPARC] Use hardware byteswapper when we have V9 (#191720) On V9 processors we have endianness-adjusted memory operations, that can be used to implement BSWAPs. Use those instructions whenever possible to reduce code size. --- llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp | 16 + llvm/lib/Target/Sparc/SparcISelLowering.cpp | 126 +++++- llvm/lib/Target/Sparc/SparcISelLowering.h | 4 + llvm/lib/Target/Sparc/SparcInstr64Bit.td | 12 + llvm/lib/Target/Sparc/SparcInstrInfo.td | 46 +- llvm/test/CodeGen/SPARC/bswap.ll | 474 +++++--------------- 6 files changed, 308 insertions(+), 370 deletions(-) diff --git a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp index 55bf40e185ff4..8ed9c5222a3a1 100644 --- a/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -49,6 +49,7 @@ class SparcDAGToDAGISel : public SelectionDAGISel { // Complex Pattern Selectors. bool SelectADDRrr(SDValue N, SDValue &R1, SDValue &R2); bool SelectADDRri(SDValue N, SDValue &Base, SDValue &Offset); + bool SelectForceADDRrr(SDValue N, SDValue &Base, SDValue &Disp); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -152,6 +153,21 @@ bool SparcDAGToDAGISel::SelectADDRrr(SDValue Addr, SDValue &R1, SDValue &R2) { return true; } +bool SparcDAGToDAGISel::SelectForceADDRrr(SDValue Addr, SDValue &Base, + SDValue &Disp) { + // If it's already in R+R form then hand it over to regular ADDRrr handling. + if (Addr.getNumOperands() == 2 && + !isa(Addr.getOperand(0).getNode()) && + !isa(Addr.getOperand(1).getNode())) + return SelectADDRrr(Addr, Base, Disp); + + // Otherwise we'll use the full address in base and set the offset part to + // zero. + Base = Addr; + Disp = + CurDAG->getRegister(SP::G0, TLI->getPointerTy(CurDAG->getDataLayout())); + return true; +} // Re-assemble i64 arguments split up in SelectionDAGBuilder's // visitInlineAsm / GetRegistersForValue functions. diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp index 4badb2f17bd98..bd1ca913a258e 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp +++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp @@ -1808,7 +1808,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, setOperationAction(ISD::CTPOP, MVT::i64, Subtarget->usePopc() ? Legal : Expand); - setOperationAction(ISD::BSWAP, MVT::i64, Expand); + setOperationAction(ISD::BSWAP, MVT::i64, Custom); setOperationAction(ISD::ROTL , MVT::i64, Expand); setOperationAction(ISD::ROTR , MVT::i64, Expand); setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom); @@ -1872,7 +1872,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, Subtarget->isUA2007() ? Legal : Expand); setOperationAction(ISD::ROTL , MVT::i32, Expand); setOperationAction(ISD::ROTR , MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Subtarget->isV9() ? Custom : Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); @@ -1985,6 +1985,9 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM, if (!Subtarget->is64Bit()) setTargetDAGCombine(ISD::BITCAST); + if (Subtarget->isV9()) + setTargetDAGCombine({ISD::BSWAP, ISD::STORE}); + if (Subtarget->hasLeonCycleCounter()) setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); @@ -3002,6 +3005,39 @@ static SDValue LowerF128Load(SDValue Op, SelectionDAG &DAG) return DAG.getMergeValues(Ops, dl); } +SDValue SparcTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const { + // We don't have an in-register bswap, so expand bswap(x) into + // load(store-swapped(x)). The reason the swap is done during the store is + // that on some implementations (mainly older ones) ASI-tagged memory + // operations are not pipelined, and generally stores finish faster than + // loads. + + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MVT PtrVT = getPointerTy(DAG.getDataLayout()); + SDValue Chain = DAG.getEntryNode(); + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + SDLoc DL(Op); + + SDValue BSwapOp = Op.getOperand(0); + EVT VT = BSwapOp.getValueType(); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + Align Al = DAG.getDataLayout().getPrefTypeAlign(Ty); + + // Create a stack object to serve as temporary storage. + int TmpFI = MFI.CreateStackObject(VT.getStoreSize(), Al, false); + SDValue TmpPtr = DAG.getFrameIndex(TmpFI, PtrVT); + + // Store-swap the value, then load it back. + SDValue Ops[] = {Chain, BSwapOp, TmpPtr, DAG.getValueType(VT)}; + SDValue ST = DAG.getMemIntrinsicNode( + IsLittleEndian ? SPISD::STORE_BIG : SPISD::STORE_LITTLE, DL, + DAG.getVTList(MVT::Other), Ops, VT, + MachinePointerInfo::getFixedStack(MF, TmpFI)); + return DAG.getLoad(VT, DL, ST, TmpPtr, + MachinePointerInfo::getFixedStack(MF, TmpFI)); +} + static SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) { LoadSDNode *LdNode = cast(Op.getNode()); @@ -3174,6 +3210,9 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STACKADDRESS: return LowerSTACKADDRESS(Op, DAG, *Subtarget); + case ISD::BSWAP: + return LowerBSWAP(Op, DAG); + case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::FADD: @@ -3219,6 +3258,85 @@ SDValue SparcTargetLowering::PerformBITCASTCombine(SDNode *N, return SDValue(); } +SDValue SparcTargetLowering::PerformBSWAPCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SDLoc DL(N); + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Turn BSWAP (LOAD) -> ld*a #ASI_P(_L) on V9. + if (Subtarget->isV9() && ISD::isNormalLoad(Op.getNode()) && + Op.getNode()->hasOneUse() && + (VT == MVT::i16 || VT == MVT::i32 || + (Subtarget->is64Bit() && VT == MVT::i64))) { + SDValue Load = Op; + auto *LD = cast(Load); + + // Create the byte-swapping load. + SDValue Ops[] = {LD->getChain(), LD->getBasePtr(), DAG.getValueType(VT)}; + + SDValue BSLoad = DAG.getMemIntrinsicNode( + IsLittleEndian ? SPISD::LOAD_BIG : SPISD::LOAD_LITTLE, DL, + DAG.getVTList(VT == MVT::i64 ? MVT::i64 : MVT::i32, MVT::Other), Ops, + LD->getMemoryVT(), LD->getMemOperand()); + + // If this is an i16 load, insert the truncate. + SDValue ResVal = BSLoad; + if (VT == MVT::i16) + ResVal = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, BSLoad); + + return DCI.CombineTo(N, ResVal); + } + + return SDValue(); +} + +SDValue SparcTargetLowering::PerformSTORECombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SDLoc DL(N); + SelectionDAG &DAG = DCI.DAG; + SDValue Op = N->getOperand(1); + EVT VT = Op.getValueType(); + unsigned Opcode = Op.getOpcode(); + bool IsLittleEndian = DAG.getDataLayout().isLittleEndian(); + + // Turn STORE (BSWAP) -> st*a #ASI_P(_L) on V9. + if (Subtarget->isV9() && Opcode == ISD::BSWAP && Op.getNode()->hasOneUse() && + (VT == MVT::i16 || VT == MVT::i32 || + (Subtarget->is64Bit() && VT == MVT::i64))) { + + // st*a can only handle simple types and it makes no sense to store less + // than two bytes in byte-reversed order. + EVT MemVT = cast(N)->getMemoryVT(); + if (MemVT.getSizeInBits() < 16) + return SDValue(); + + SDValue BSwapOp = Op.getOperand(0); + // Do an any-extend to 32-bits if this is a half-word input. + if (BSwapOp.getValueType() == MVT::i16) + BSwapOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, BSwapOp); + + // If the type of BSWAP operand is wider than stored memory width + // it needs to be shifted to the right side before st*a. + if (VT.bitsGT(MemVT)) { + unsigned Shift = VT.getSizeInBits() - MemVT.getSizeInBits(); + BSwapOp = DAG.getNode(ISD::SRL, DL, VT, BSwapOp, + DAG.getShiftAmountConstant(Shift, VT, DL)); + } + + SDValue Ops[] = {N->getOperand(0), BSwapOp, N->getOperand(2), + DAG.getValueType(MemVT)}; + return DAG.getMemIntrinsicNode( + IsLittleEndian ? SPISD::STORE_BIG : SPISD::STORE_LITTLE, DL, + DAG.getVTList(MVT::Other), Ops, cast(N)->getMemoryVT(), + cast(N)->getMemOperand()); + } + + return SDValue(); +} + SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { @@ -3226,6 +3344,10 @@ SDValue SparcTargetLowering::PerformDAGCombine(SDNode *N, break; case ISD::BITCAST: return PerformBITCASTCombine(N, DCI); + case ISD::BSWAP: + return PerformBSWAPCombine(N, DCI); + case ISD::STORE: + return PerformSTORECombine(N, DCI); } return SDValue(); } diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.h b/llvm/lib/Target/Sparc/SparcISelLowering.h index f97b0ab9edd93..5098480e192a6 100644 --- a/llvm/lib/Target/Sparc/SparcISelLowering.h +++ b/llvm/lib/Target/Sparc/SparcISelLowering.h @@ -143,9 +143,13 @@ namespace llvm { SDValue LowerF128Compare(SDValue LHS, SDValue RHS, unsigned &SPCC, const SDLoc &DL, SelectionDAG &DAG) const; + SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue PerformBITCASTCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformBSWAPCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue PerformSTORECombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue bitcastConstantFPToInt(ConstantFPSDNode *C, const SDLoc &DL, SelectionDAG &DAG) const; diff --git a/llvm/lib/Target/Sparc/SparcInstr64Bit.td b/llvm/lib/Target/Sparc/SparcInstr64Bit.td index d6d081d51e4bc..5680dcd81ce86 100644 --- a/llvm/lib/Target/Sparc/SparcInstr64Bit.td +++ b/llvm/lib/Target/Sparc/SparcInstr64Bit.td @@ -500,3 +500,15 @@ def : Pat<(add iPTR:$r, (SPlo tconstpool:$in)), (ADDri $r, tconstpool:$in)>; def : Pat<(add iPTR:$r, (SPlo tblockaddress:$in)), (ADDri $r, tblockaddress:$in)>; } + +// Endian adjusted memory operations. +let Predicates = [HasV9, Is64Bit] in { + // LDSWA is only usable on 64-bit environment. + def : Pat<(sext (i32 (SPloadbig ForceADDRrr:$src, i32))), (LDSWArr ForceADDRrr:$src, 0x80)>; + def : Pat<(sext (i32 (SPloadlittle ForceADDRrr:$src, i32))), (LDSWArr ForceADDRrr:$src, 0x88)>; + + def : Pat<(SPloadbig ForceADDRrr:$src, i64), (LDXArr ForceADDRrr:$src, 0x80)>; + def : Pat<(SPloadlittle ForceADDRrr:$src, i64), (LDXArr ForceADDRrr:$src, 0x88)>; + def : Pat<(SPstorebig i64:$val, ForceADDRrr:$dst, i64), (STXArr ForceADDRrr:$dst, $val, 0x80)>; + def : Pat<(SPstorelittle i64:$val, ForceADDRrr:$dst, i64), (STXArr ForceADDRrr:$dst, $val, 0x88)>; +} diff --git a/llvm/lib/Target/Sparc/SparcInstrInfo.td b/llvm/lib/Target/Sparc/SparcInstrInfo.td index 8717b9347210c..4686cd9f278ce 100644 --- a/llvm/lib/Target/Sparc/SparcInstrInfo.td +++ b/llvm/lib/Target/Sparc/SparcInstrInfo.td @@ -158,6 +158,7 @@ def SETHIimm_not : PatLeaf<(i32 imm), [{ // Addressing modes. def ADDRrr : ComplexPattern; def ADDRri : ComplexPattern; +def ForceADDRrr : ComplexPattern; // Constrained operands for the shift operations. class ShiftAmtImmAsmOperand : AsmOperandClass { @@ -336,6 +337,11 @@ SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; def SDTSPloadgdop : SDTypeProfile<1, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def SDTSPloadbig : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; +def SDTSPloadlittle : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; +def SDTSPstorebig : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; +def SDTSPstorelittle : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>; + def SPcmpicc : SDNode<"SPISD::CMPICC", SDTSPcmpicc, [SDNPOutGlue]>; def SPcmpfcc : SDNode<"SPISD::CMPFCC", SDTSPcmpfcc, [SDNPOutGlue]>; def SPcmpfccv9 : SDNode<"SPISD::CMPFCC_V9", SDTSPcmpfcc, [SDNPOutGlue]>; @@ -359,6 +365,24 @@ def SPselectxcc : SDNode<"SPISD::SELECT_XCC", SDTSPselectcc, [SDNPInGlue]>; def SPselectfcc : SDNode<"SPISD::SELECT_FCC", SDTSPselectcc, [SDNPInGlue]>; def SPselectreg : SDNode<"SPISD::SELECT_REG", SDTSPselectreg>; +// GPRC, CHAIN = LOAD_BIG/LITTLE CHAIN, Ptr, Type +// These are endianness-adjusted load instructions. They load the low "Type" +// bits of the memory pointed by Ptr, possibly byte swapping it as necessary +// to account for its in-memory endianness. Type can be i16, i32, or i64. +def SPloadbig : SDNode<"SPISD::LOAD_BIG", SDTSPloadbig, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def SPloadlittle : SDNode<"SPISD::LOAD_LITTLE", SDTSPloadlittle, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + +// CHAIN = STORE_BIG/LITTLE CHAIN, GPRC, Ptr, Type +// These are endianness-adjusted store instructions. They store the low "Type" +// bits of the GPR input out through Ptr, possibly byte swapping it as necessary +// to adjust its in-memory endianness. Type can be i16, i32, or i64. +def SPstorebig : SDNode<"SPISD::STORE_BIG", SDTSPstorebig, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def SPstorelittle : SDNode<"SPISD::STORE_LITTLE", SDTSPstorelittle, + [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + // These are target-independent nodes, but have target-specific formats. def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>; @@ -546,8 +570,6 @@ multiclass Store Op3Val, SDPatternOperator OpNode, itin>; } -// TODO: Instructions of the StoreASI class are currently asm only; hooking up -// CodeGen's address spaces to use these is a future task. multiclass StoreASI Op3Val, RegisterClass RC, InstrItinClass itin = IIC_st> { def rr : F3_1_asi<3, Op3Val, (outs), (ins (MEMrr $rs1, $rs2):$addr, RC:$rd, ASITag:$asi), @@ -2050,6 +2072,26 @@ def : Pat<(build_vector (i32 IntRegs:$a1), (i32 IntRegs:$a2)), (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (i32 IntRegs:$a1), sub_even), (i32 IntRegs:$a2), sub_odd)>; +// Endian adjusted memory operations. +let Predicates = [HasV9] in { + def : Pat<(i32 (SPloadbig ForceADDRrr:$src, i16)), (LDUHArr ForceADDRrr:$src, 0x80)>; + def : Pat<(i32 (SPloadbig ForceADDRrr:$src, i32)), (LDArr ForceADDRrr:$src, 0x80)>; + def : Pat<(i32 (SPloadlittle ForceADDRrr:$src, i16)), (LDUHArr ForceADDRrr:$src, 0x88)>; + def : Pat<(i32 (SPloadlittle ForceADDRrr:$src, i32)), (LDArr ForceADDRrr:$src, 0x88)>; + + def : Pat<(zanyext (i32 (SPloadbig ForceADDRrr:$src, i16))), (LDUHArr ForceADDRrr:$src, 0x80)>; + def : Pat<(zanyext (i32 (SPloadbig ForceADDRrr:$src, i32))), (LDArr ForceADDRrr:$src, 0x80)>; + def : Pat<(zanyext (i32 (SPloadlittle ForceADDRrr:$src, i16))), (LDUHArr ForceADDRrr:$src, 0x88)>; + def : Pat<(zanyext (i32 (SPloadlittle ForceADDRrr:$src, i32))), (LDArr ForceADDRrr:$src, 0x88)>; + + def : Pat<(sext (i32 (SPloadbig ForceADDRrr:$src, i16))), (LDSHArr ForceADDRrr:$src, 0x80)>; + def : Pat<(sext (i32 (SPloadlittle ForceADDRrr:$src, i16))), (LDSHArr ForceADDRrr:$src, 0x88)>; + + def : Pat<(SPstorebig i32:$val, ForceADDRrr:$dst, i16), (STHArr ForceADDRrr:$dst, $val, 0x80)>; + def : Pat<(SPstorebig i32:$val, ForceADDRrr:$dst, i32), (STArr ForceADDRrr:$dst, $val, 0x80)>; + def : Pat<(SPstorelittle i32:$val, ForceADDRrr:$dst, i16), (STHArr ForceADDRrr:$dst, $val, 0x88)>; + def : Pat<(SPstorelittle i32:$val, ForceADDRrr:$dst, i32), (STArr ForceADDRrr:$dst, $val, 0x88)>; +} include "SparcInstr64Bit.td" include "SparcInstrVIS.td" diff --git a/llvm/test/CodeGen/SPARC/bswap.ll b/llvm/test/CodeGen/SPARC/bswap.ll index dd389f7902a72..b182862561752 100644 --- a/llvm/test/CodeGen/SPARC/bswap.ll +++ b/llvm/test/CodeGen/SPARC/bswap.ll @@ -3,6 +3,9 @@ ; RUN: llc < %s -mtriple=sparcel -mcpu=v9 | FileCheck %s --check-prefix=SPARCEL ; RUN: llc < %s -mtriple=sparc64 -mcpu=v9 | FileCheck %s --check-prefix=SPARC64 +;; On V9 processors we can use endian-adjusted memory accessess to implement +;; byte swapping for more compact code. + declare i16 @llvm.bswap.i16(i16) declare i32 @llvm.bswap.i32(i32) declare i64 @llvm.bswap.i64(i64) @@ -10,33 +13,31 @@ declare i64 @llvm.bswap.i64(i64) define i16 @u16_bswap(i16 %0) #0 { ; SPARC32-LABEL: u16_bswap: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: sethi 63, %o1 -; SPARC32-NEXT: or %o1, 768, %o1 -; SPARC32-NEXT: and %o0, %o1, %o1 -; SPARC32-NEXT: srl %o1, 8, %o1 -; SPARC32-NEXT: sll %o0, 8, %o0 +; SPARC32-NEXT: add %sp, -96, %sp +; SPARC32-NEXT: add %sp, 92, %o1 +; SPARC32-NEXT: sta %o0, [%o1] #ASI_P_L +; SPARC32-NEXT: lduh [%sp+92], %o0 ; SPARC32-NEXT: retl -; SPARC32-NEXT: or %o0, %o1, %o0 +; SPARC32-NEXT: add %sp, 96, %sp ; ; SPARCEL-LABEL: u16_bswap: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: sethi 63, %o1 -; SPARCEL-NEXT: or %o1, 768, %o1 -; SPARCEL-NEXT: and %o0, %o1, %o1 -; SPARCEL-NEXT: srl %o1, 8, %o1 -; SPARCEL-NEXT: sll %o0, 8, %o0 +; SPARCEL-NEXT: add %sp, -96, %sp +; SPARCEL-NEXT: add %sp, 92, %o1 +; SPARCEL-NEXT: sta %o0, [%o1] #ASI_P +; SPARCEL-NEXT: or %o1, 2, %o0 +; SPARCEL-NEXT: lduh [%o0], %o0 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: or %o0, %o1, %o0 +; SPARCEL-NEXT: add %sp, 96, %sp ; ; SPARC64-LABEL: u16_bswap: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: sethi 63, %o1 -; SPARC64-NEXT: or %o1, 768, %o1 -; SPARC64-NEXT: and %o0, %o1, %o1 -; SPARC64-NEXT: srl %o1, 8, %o1 -; SPARC64-NEXT: sll %o0, 8, %o0 +; SPARC64-NEXT: add %sp, -144, %sp +; SPARC64-NEXT: add %sp, 2187, %o1 +; SPARC64-NEXT: sta %o0, [%o1] #ASI_P_L +; SPARC64-NEXT: lduh [%sp+2187], %o0 ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: add %sp, 144, %sp %2 = tail call i16 @llvm.bswap.i16(i16 %0) ret i16 %2 } @@ -44,48 +45,30 @@ define i16 @u16_bswap(i16 %0) #0 { define i32 @u32_bswap(i32 %0) #0 { ; SPARC32-LABEL: u32_bswap: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: srl %o0, 8, %o1 -; SPARC32-NEXT: sethi 63, %o2 -; SPARC32-NEXT: or %o2, 768, %o2 -; SPARC32-NEXT: and %o1, %o2, %o1 -; SPARC32-NEXT: srl %o0, 24, %o3 -; SPARC32-NEXT: or %o1, %o3, %o1 -; SPARC32-NEXT: and %o0, %o2, %o2 -; SPARC32-NEXT: sll %o2, 8, %o2 -; SPARC32-NEXT: sll %o0, 24, %o0 -; SPARC32-NEXT: or %o0, %o2, %o0 +; SPARC32-NEXT: add %sp, -96, %sp +; SPARC32-NEXT: add %sp, 92, %o1 +; SPARC32-NEXT: sta %o0, [%o1] #ASI_P_L +; SPARC32-NEXT: ld [%sp+92], %o0 ; SPARC32-NEXT: retl -; SPARC32-NEXT: or %o0, %o1, %o0 +; SPARC32-NEXT: add %sp, 96, %sp ; ; SPARCEL-LABEL: u32_bswap: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: srl %o0, 8, %o1 -; SPARCEL-NEXT: sethi 63, %o2 -; SPARCEL-NEXT: or %o2, 768, %o2 -; SPARCEL-NEXT: and %o1, %o2, %o1 -; SPARCEL-NEXT: srl %o0, 24, %o3 -; SPARCEL-NEXT: or %o1, %o3, %o1 -; SPARCEL-NEXT: and %o0, %o2, %o2 -; SPARCEL-NEXT: sll %o2, 8, %o2 -; SPARCEL-NEXT: sll %o0, 24, %o0 -; SPARCEL-NEXT: or %o0, %o2, %o0 +; SPARCEL-NEXT: add %sp, -96, %sp +; SPARCEL-NEXT: add %sp, 92, %o1 +; SPARCEL-NEXT: sta %o0, [%o1] #ASI_P +; SPARCEL-NEXT: ld [%sp+92], %o0 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: or %o0, %o1, %o0 +; SPARCEL-NEXT: add %sp, 96, %sp ; ; SPARC64-LABEL: u32_bswap: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: srl %o0, 8, %o1 -; SPARC64-NEXT: sethi 63, %o2 -; SPARC64-NEXT: or %o2, 768, %o2 -; SPARC64-NEXT: and %o1, %o2, %o1 -; SPARC64-NEXT: srl %o0, 24, %o3 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: and %o0, %o2, %o2 -; SPARC64-NEXT: sll %o2, 8, %o2 -; SPARC64-NEXT: sll %o0, 24, %o0 -; SPARC64-NEXT: or %o0, %o2, %o0 +; SPARC64-NEXT: add %sp, -144, %sp +; SPARC64-NEXT: add %sp, 2187, %o1 +; SPARC64-NEXT: sta %o0, [%o1] #ASI_P_L +; SPARC64-NEXT: ld [%sp+2187], %o0 ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: add %sp, 144, %sp %2 = tail call i32 @llvm.bswap.i32(i32 %0) ret i32 %2 } @@ -93,83 +76,36 @@ define i32 @u32_bswap(i32 %0) #0 { define i64 @u64_bswap(i64 %0) #0 { ; SPARC32-LABEL: u64_bswap: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: srl %o1, 8, %o2 -; SPARC32-NEXT: sethi 63, %o3 -; SPARC32-NEXT: or %o3, 768, %o3 -; SPARC32-NEXT: and %o2, %o3, %o2 -; SPARC32-NEXT: srl %o1, 24, %o4 -; SPARC32-NEXT: or %o2, %o4, %o2 -; SPARC32-NEXT: and %o1, %o3, %o4 -; SPARC32-NEXT: sll %o4, 8, %o4 -; SPARC32-NEXT: sll %o1, 24, %o1 -; SPARC32-NEXT: or %o1, %o4, %o1 -; SPARC32-NEXT: or %o1, %o2, %o2 -; SPARC32-NEXT: srl %o0, 8, %o1 -; SPARC32-NEXT: and %o1, %o3, %o1 -; SPARC32-NEXT: srl %o0, 24, %o4 -; SPARC32-NEXT: or %o1, %o4, %o1 -; SPARC32-NEXT: and %o0, %o3, %o3 -; SPARC32-NEXT: sll %o3, 8, %o3 -; SPARC32-NEXT: sll %o0, 24, %o0 -; SPARC32-NEXT: or %o0, %o3, %o0 -; SPARC32-NEXT: or %o0, %o1, %o1 +; SPARC32-NEXT: add %sp, -104, %sp +; SPARC32-NEXT: add %sp, 100, %o2 +; SPARC32-NEXT: sta %o1, [%o2] #ASI_P_L +; SPARC32-NEXT: add %sp, 96, %o1 +; SPARC32-NEXT: sta %o0, [%o1] #ASI_P_L +; SPARC32-NEXT: ld [%sp+100], %o0 +; SPARC32-NEXT: ld [%sp+96], %o1 ; SPARC32-NEXT: retl -; SPARC32-NEXT: mov %o2, %o0 +; SPARC32-NEXT: add %sp, 104, %sp ; ; SPARCEL-LABEL: u64_bswap: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: srl %o1, 8, %o2 -; SPARCEL-NEXT: sethi 63, %o3 -; SPARCEL-NEXT: or %o3, 768, %o3 -; SPARCEL-NEXT: and %o2, %o3, %o2 -; SPARCEL-NEXT: srl %o1, 24, %o4 -; SPARCEL-NEXT: or %o2, %o4, %o2 -; SPARCEL-NEXT: and %o1, %o3, %o4 -; SPARCEL-NEXT: sll %o4, 8, %o4 -; SPARCEL-NEXT: sll %o1, 24, %o1 -; SPARCEL-NEXT: or %o1, %o4, %o1 -; SPARCEL-NEXT: or %o1, %o2, %o2 -; SPARCEL-NEXT: srl %o0, 8, %o1 -; SPARCEL-NEXT: and %o1, %o3, %o1 -; SPARCEL-NEXT: srl %o0, 24, %o4 -; SPARCEL-NEXT: or %o1, %o4, %o1 -; SPARCEL-NEXT: and %o0, %o3, %o3 -; SPARCEL-NEXT: sll %o3, 8, %o3 -; SPARCEL-NEXT: sll %o0, 24, %o0 -; SPARCEL-NEXT: or %o0, %o3, %o0 -; SPARCEL-NEXT: or %o0, %o1, %o1 +; SPARCEL-NEXT: add %sp, -104, %sp +; SPARCEL-NEXT: add %sp, 100, %o2 +; SPARCEL-NEXT: sta %o1, [%o2] #ASI_P +; SPARCEL-NEXT: add %sp, 96, %o1 +; SPARCEL-NEXT: sta %o0, [%o1] #ASI_P +; SPARCEL-NEXT: ld [%sp+100], %o0 +; SPARCEL-NEXT: ld [%sp+96], %o1 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: mov %o2, %o0 +; SPARCEL-NEXT: add %sp, 104, %sp ; ; SPARC64-LABEL: u64_bswap: -; SPARC64: .register %g2, #scratch -; SPARC64-NEXT: ! %bb.0: -; SPARC64-NEXT: srlx %o0, 24, %o1 -; SPARC64-NEXT: sethi 16320, %o2 -; SPARC64-NEXT: and %o1, %o2, %o1 -; SPARC64-NEXT: srlx %o0, 8, %o3 -; SPARC64-NEXT: sethi 4177920, %o4 -; SPARC64-NEXT: and %o3, %o4, %o3 -; SPARC64-NEXT: or %o3, %o1, %o1 -; SPARC64-NEXT: srlx %o0, 40, %o3 -; SPARC64-NEXT: sethi 63, %o5 -; SPARC64-NEXT: or %o5, 768, %o5 -; SPARC64-NEXT: and %o3, %o5, %o3 -; SPARC64-NEXT: srlx %o0, 56, %g2 -; SPARC64-NEXT: or %o3, %g2, %o3 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: and %o0, %o4, %o3 -; SPARC64-NEXT: sllx %o3, 8, %o3 -; SPARC64-NEXT: and %o0, %o2, %o2 -; SPARC64-NEXT: sllx %o2, 24, %o2 -; SPARC64-NEXT: or %o2, %o3, %o2 -; SPARC64-NEXT: and %o0, %o5, %o3 -; SPARC64-NEXT: sllx %o3, 40, %o3 -; SPARC64-NEXT: sllx %o0, 56, %o0 -; SPARC64-NEXT: or %o0, %o3, %o0 -; SPARC64-NEXT: or %o0, %o2, %o0 +; SPARC64: ! %bb.0: +; SPARC64-NEXT: add %sp, -144, %sp +; SPARC64-NEXT: add %sp, 2183, %o1 +; SPARC64-NEXT: stxa %o0, [%o1] #ASI_P_L +; SPARC64-NEXT: ldx [%sp+2183], %o0 ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: add %sp, 144, %sp %2 = tail call i64 @llvm.bswap.i64(i64 %0) ret i64 %2 } @@ -177,27 +113,18 @@ define i64 @u64_bswap(i64 %0) #0 { define i16 @u16_bswapload(ptr %0) #0 { ; SPARC32-LABEL: u16_bswapload: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: lduh [%o0], %o0 -; SPARC32-NEXT: srl %o0, 8, %o1 -; SPARC32-NEXT: sll %o0, 8, %o0 ; SPARC32-NEXT: retl -; SPARC32-NEXT: or %o0, %o1, %o0 +; SPARC32-NEXT: lduha [%o0] #ASI_P_L, %o0 ; ; SPARCEL-LABEL: u16_bswapload: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: lduh [%o0], %o0 -; SPARCEL-NEXT: srl %o0, 8, %o1 -; SPARCEL-NEXT: sll %o0, 8, %o0 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: or %o0, %o1, %o0 +; SPARCEL-NEXT: lduha [%o0] #ASI_P, %o0 ; ; SPARC64-LABEL: u16_bswapload: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: lduh [%o0], %o0 -; SPARC64-NEXT: srl %o0, 8, %o1 -; SPARC64-NEXT: sll %o0, 8, %o0 ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: lduha [%o0] #ASI_P_L, %o0 %2 = load i16, ptr %0, align 2 %3 = tail call i16 @llvm.bswap.i16(i16 %2) ret i16 %3 @@ -206,51 +133,18 @@ define i16 @u16_bswapload(ptr %0) #0 { define i32 @u32_bswapload(ptr %0) #0 { ; SPARC32-LABEL: u32_bswapload: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: ld [%o0], %o0 -; SPARC32-NEXT: srl %o0, 8, %o1 -; SPARC32-NEXT: sethi 63, %o2 -; SPARC32-NEXT: or %o2, 768, %o2 -; SPARC32-NEXT: and %o1, %o2, %o1 -; SPARC32-NEXT: srl %o0, 24, %o3 -; SPARC32-NEXT: or %o1, %o3, %o1 -; SPARC32-NEXT: and %o0, %o2, %o2 -; SPARC32-NEXT: sll %o2, 8, %o2 -; SPARC32-NEXT: sll %o0, 24, %o0 -; SPARC32-NEXT: or %o0, %o2, %o0 ; SPARC32-NEXT: retl -; SPARC32-NEXT: or %o0, %o1, %o0 +; SPARC32-NEXT: lda [%o0] #ASI_P_L, %o0 ; ; SPARCEL-LABEL: u32_bswapload: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: ld [%o0], %o0 -; SPARCEL-NEXT: srl %o0, 8, %o1 -; SPARCEL-NEXT: sethi 63, %o2 -; SPARCEL-NEXT: or %o2, 768, %o2 -; SPARCEL-NEXT: and %o1, %o2, %o1 -; SPARCEL-NEXT: srl %o0, 24, %o3 -; SPARCEL-NEXT: or %o1, %o3, %o1 -; SPARCEL-NEXT: and %o0, %o2, %o2 -; SPARCEL-NEXT: sll %o2, 8, %o2 -; SPARCEL-NEXT: sll %o0, 24, %o0 -; SPARCEL-NEXT: or %o0, %o2, %o0 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: or %o0, %o1, %o0 +; SPARCEL-NEXT: lda [%o0] #ASI_P, %o0 ; ; SPARC64-LABEL: u32_bswapload: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: ld [%o0], %o0 -; SPARC64-NEXT: srl %o0, 8, %o1 -; SPARC64-NEXT: sethi 63, %o2 -; SPARC64-NEXT: or %o2, 768, %o2 -; SPARC64-NEXT: and %o1, %o2, %o1 -; SPARC64-NEXT: srl %o0, 24, %o3 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: and %o0, %o2, %o2 -; SPARC64-NEXT: sll %o2, 8, %o2 -; SPARC64-NEXT: sll %o0, 24, %o0 -; SPARC64-NEXT: or %o0, %o2, %o0 ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: lda [%o0] #ASI_P_L, %o0 %2 = load i32, ptr %0, align 4 %3 = tail call i32 @llvm.bswap.i32(i32 %2) ret i32 %3 @@ -259,84 +153,34 @@ define i32 @u32_bswapload(ptr %0) #0 { define i64 @u64_bswapload(ptr %0) #0 { ; SPARC32-LABEL: u64_bswapload: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: ldd [%o0], %o2 -; SPARC32-NEXT: srl %o3, 8, %o0 -; SPARC32-NEXT: sethi 63, %o1 -; SPARC32-NEXT: or %o1, 768, %o1 -; SPARC32-NEXT: and %o0, %o1, %o0 -; SPARC32-NEXT: srl %o3, 24, %o4 -; SPARC32-NEXT: or %o0, %o4, %o0 -; SPARC32-NEXT: and %o3, %o1, %o4 -; SPARC32-NEXT: sll %o4, 8, %o4 -; SPARC32-NEXT: sll %o3, 24, %o5 -; SPARC32-NEXT: or %o5, %o4, %o4 -; SPARC32-NEXT: or %o4, %o0, %o0 -; SPARC32-NEXT: srl %o2, 8, %o4 -; SPARC32-NEXT: and %o4, %o1, %o4 -; SPARC32-NEXT: srl %o2, 24, %o5 -; SPARC32-NEXT: or %o4, %o5, %o4 -; SPARC32-NEXT: and %o2, %o1, %o1 -; SPARC32-NEXT: sll %o1, 8, %o1 -; SPARC32-NEXT: sll %o2, 24, %o2 -; SPARC32-NEXT: or %o2, %o1, %o1 +; SPARC32-NEXT: add %sp, -104, %sp +; SPARC32-NEXT: ldd [%o0], %o0 +; SPARC32-NEXT: add %sp, 96, %o2 +; SPARC32-NEXT: sta %o1, [%o2] #ASI_P_L +; SPARC32-NEXT: add %sp, 100, %o2 +; SPARC32-NEXT: sta %o0, [%o2] #ASI_P_L +; SPARC32-NEXT: ld [%sp+96], %o0 +; SPARC32-NEXT: ld [%sp+100], %o1 ; SPARC32-NEXT: retl -; SPARC32-NEXT: or %o1, %o4, %o1 +; SPARC32-NEXT: add %sp, 104, %sp ; ; SPARCEL-LABEL: u64_bswapload: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: ldd [%o0], %o2 -; SPARCEL-NEXT: srl %o3, 8, %o0 -; SPARCEL-NEXT: sethi 63, %o1 -; SPARCEL-NEXT: or %o1, 768, %o1 -; SPARCEL-NEXT: and %o0, %o1, %o0 -; SPARCEL-NEXT: srl %o3, 24, %o4 -; SPARCEL-NEXT: or %o0, %o4, %o0 -; SPARCEL-NEXT: and %o3, %o1, %o4 -; SPARCEL-NEXT: sll %o4, 8, %o4 -; SPARCEL-NEXT: sll %o3, 24, %o5 -; SPARCEL-NEXT: or %o5, %o4, %o4 -; SPARCEL-NEXT: or %o4, %o0, %o0 -; SPARCEL-NEXT: srl %o2, 8, %o4 -; SPARCEL-NEXT: and %o4, %o1, %o4 -; SPARCEL-NEXT: srl %o2, 24, %o5 -; SPARCEL-NEXT: or %o4, %o5, %o4 -; SPARCEL-NEXT: and %o2, %o1, %o1 -; SPARCEL-NEXT: sll %o1, 8, %o1 -; SPARCEL-NEXT: sll %o2, 24, %o2 -; SPARCEL-NEXT: or %o2, %o1, %o1 +; SPARCEL-NEXT: add %sp, -104, %sp +; SPARCEL-NEXT: ldd [%o0], %o0 +; SPARCEL-NEXT: add %sp, 96, %o2 +; SPARCEL-NEXT: sta %o1, [%o2] #ASI_P +; SPARCEL-NEXT: add %sp, 100, %o2 +; SPARCEL-NEXT: sta %o0, [%o2] #ASI_P +; SPARCEL-NEXT: ld [%sp+96], %o0 +; SPARCEL-NEXT: ld [%sp+100], %o1 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: or %o1, %o4, %o1 +; SPARCEL-NEXT: add %sp, 104, %sp ; ; SPARC64-LABEL: u64_bswapload: -; SPARC64: .register %g2, #scratch -; SPARC64-NEXT: ! %bb.0: -; SPARC64-NEXT: ldx [%o0], %o0 -; SPARC64-NEXT: srlx %o0, 24, %o1 -; SPARC64-NEXT: sethi 16320, %o2 -; SPARC64-NEXT: and %o1, %o2, %o1 -; SPARC64-NEXT: srlx %o0, 8, %o3 -; SPARC64-NEXT: sethi 4177920, %o4 -; SPARC64-NEXT: and %o3, %o4, %o3 -; SPARC64-NEXT: or %o3, %o1, %o1 -; SPARC64-NEXT: srlx %o0, 40, %o3 -; SPARC64-NEXT: sethi 63, %o5 -; SPARC64-NEXT: or %o5, 768, %o5 -; SPARC64-NEXT: and %o3, %o5, %o3 -; SPARC64-NEXT: srlx %o0, 56, %g2 -; SPARC64-NEXT: or %o3, %g2, %o3 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: and %o0, %o4, %o3 -; SPARC64-NEXT: sllx %o3, 8, %o3 -; SPARC64-NEXT: and %o0, %o2, %o2 -; SPARC64-NEXT: sllx %o2, 24, %o2 -; SPARC64-NEXT: or %o2, %o3, %o2 -; SPARC64-NEXT: and %o0, %o5, %o3 -; SPARC64-NEXT: sllx %o3, 40, %o3 -; SPARC64-NEXT: sllx %o0, 56, %o0 -; SPARC64-NEXT: or %o0, %o3, %o0 -; SPARC64-NEXT: or %o0, %o2, %o0 +; SPARC64: ! %bb.0: ; SPARC64-NEXT: retl -; SPARC64-NEXT: or %o0, %o1, %o0 +; SPARC64-NEXT: ldxa [%o0] #ASI_P_L, %o0 %2 = load i64, ptr %0, align 8 %3 = tail call i64 @llvm.bswap.i64(i64 %2) ret i64 %3 @@ -345,36 +189,18 @@ define i64 @u64_bswapload(ptr %0) #0 { define void @u16_bswapstore(ptr %0, i16 %1) #0 { ; SPARC32-LABEL: u16_bswapstore: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: sethi 63, %o2 -; SPARC32-NEXT: or %o2, 768, %o2 -; SPARC32-NEXT: and %o1, %o2, %o2 -; SPARC32-NEXT: srl %o2, 8, %o2 -; SPARC32-NEXT: sll %o1, 8, %o1 -; SPARC32-NEXT: or %o1, %o2, %o1 ; SPARC32-NEXT: retl -; SPARC32-NEXT: sth %o1, [%o0] +; SPARC32-NEXT: stha %o1, [%o0] #ASI_P_L ; ; SPARCEL-LABEL: u16_bswapstore: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: sethi 63, %o2 -; SPARCEL-NEXT: or %o2, 768, %o2 -; SPARCEL-NEXT: and %o1, %o2, %o2 -; SPARCEL-NEXT: srl %o2, 8, %o2 -; SPARCEL-NEXT: sll %o1, 8, %o1 -; SPARCEL-NEXT: or %o1, %o2, %o1 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: sth %o1, [%o0] +; SPARCEL-NEXT: stha %o1, [%o0] #ASI_P ; ; SPARC64-LABEL: u16_bswapstore: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: sethi 63, %o2 -; SPARC64-NEXT: or %o2, 768, %o2 -; SPARC64-NEXT: and %o1, %o2, %o2 -; SPARC64-NEXT: srl %o2, 8, %o2 -; SPARC64-NEXT: sll %o1, 8, %o1 -; SPARC64-NEXT: or %o1, %o2, %o1 ; SPARC64-NEXT: retl -; SPARC64-NEXT: sth %o1, [%o0] +; SPARC64-NEXT: stha %o1, [%o0] #ASI_P_L %3 = tail call i16 @llvm.bswap.i16(i16 %1) store i16 %3, ptr %0, align 2 ret void @@ -383,51 +209,18 @@ define void @u16_bswapstore(ptr %0, i16 %1) #0 { define void @u32_bswapstore(ptr %0, i32 %1) #0 { ; SPARC32-LABEL: u32_bswapstore: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: srl %o1, 8, %o2 -; SPARC32-NEXT: sethi 63, %o3 -; SPARC32-NEXT: or %o3, 768, %o3 -; SPARC32-NEXT: and %o2, %o3, %o2 -; SPARC32-NEXT: srl %o1, 24, %o4 -; SPARC32-NEXT: or %o2, %o4, %o2 -; SPARC32-NEXT: and %o1, %o3, %o3 -; SPARC32-NEXT: sll %o3, 8, %o3 -; SPARC32-NEXT: sll %o1, 24, %o1 -; SPARC32-NEXT: or %o1, %o3, %o1 -; SPARC32-NEXT: or %o1, %o2, %o1 ; SPARC32-NEXT: retl -; SPARC32-NEXT: st %o1, [%o0] +; SPARC32-NEXT: sta %o1, [%o0] #ASI_P_L ; ; SPARCEL-LABEL: u32_bswapstore: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: srl %o1, 8, %o2 -; SPARCEL-NEXT: sethi 63, %o3 -; SPARCEL-NEXT: or %o3, 768, %o3 -; SPARCEL-NEXT: and %o2, %o3, %o2 -; SPARCEL-NEXT: srl %o1, 24, %o4 -; SPARCEL-NEXT: or %o2, %o4, %o2 -; SPARCEL-NEXT: and %o1, %o3, %o3 -; SPARCEL-NEXT: sll %o3, 8, %o3 -; SPARCEL-NEXT: sll %o1, 24, %o1 -; SPARCEL-NEXT: or %o1, %o3, %o1 -; SPARCEL-NEXT: or %o1, %o2, %o1 ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: st %o1, [%o0] +; SPARCEL-NEXT: sta %o1, [%o0] #ASI_P ; ; SPARC64-LABEL: u32_bswapstore: ; SPARC64: ! %bb.0: -; SPARC64-NEXT: srl %o1, 8, %o2 -; SPARC64-NEXT: sethi 63, %o3 -; SPARC64-NEXT: or %o3, 768, %o3 -; SPARC64-NEXT: and %o2, %o3, %o2 -; SPARC64-NEXT: srl %o1, 24, %o4 -; SPARC64-NEXT: or %o2, %o4, %o2 -; SPARC64-NEXT: and %o1, %o3, %o3 -; SPARC64-NEXT: sll %o3, 8, %o3 -; SPARC64-NEXT: sll %o1, 24, %o1 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: or %o1, %o2, %o1 ; SPARC64-NEXT: retl -; SPARC64-NEXT: st %o1, [%o0] +; SPARC64-NEXT: sta %o1, [%o0] #ASI_P_L %3 = tail call i32 @llvm.bswap.i32(i32 %1) store i32 %3, ptr %0, align 4 ret void @@ -436,85 +229,34 @@ define void @u32_bswapstore(ptr %0, i32 %1) #0 { define void @u64_bswapstore(ptr %0, i64 %1) #0 { ; SPARC32-LABEL: u64_bswapstore: ; SPARC32: ! %bb.0: -; SPARC32-NEXT: srl %o1, 8, %o3 -; SPARC32-NEXT: sethi 63, %o4 -; SPARC32-NEXT: or %o4, 768, %o4 -; SPARC32-NEXT: and %o3, %o4, %o3 -; SPARC32-NEXT: srl %o1, 24, %o5 -; SPARC32-NEXT: or %o3, %o5, %o3 -; SPARC32-NEXT: and %o1, %o4, %o5 -; SPARC32-NEXT: sll %o5, 8, %o5 -; SPARC32-NEXT: sll %o1, 24, %o1 -; SPARC32-NEXT: or %o1, %o5, %o1 -; SPARC32-NEXT: or %o1, %o3, %g3 -; SPARC32-NEXT: srl %o2, 8, %o1 -; SPARC32-NEXT: and %o1, %o4, %o1 -; SPARC32-NEXT: srl %o2, 24, %o3 -; SPARC32-NEXT: or %o1, %o3, %o1 -; SPARC32-NEXT: and %o2, %o4, %o3 -; SPARC32-NEXT: sll %o3, 8, %o3 -; SPARC32-NEXT: sll %o2, 24, %o2 -; SPARC32-NEXT: or %o2, %o3, %o2 -; SPARC32-NEXT: or %o2, %o1, %g2 +; SPARC32-NEXT: add %sp, -104, %sp +; SPARC32-NEXT: add %sp, 96, %o3 +; SPARC32-NEXT: sta %o1, [%o3] #ASI_P_L +; SPARC32-NEXT: add %sp, 100, %o1 +; SPARC32-NEXT: sta %o2, [%o1] #ASI_P_L +; SPARC32-NEXT: ld [%sp+96], %o3 +; SPARC32-NEXT: ld [%sp+100], %o2 +; SPARC32-NEXT: std %o2, [%o0] ; SPARC32-NEXT: retl -; SPARC32-NEXT: std %g2, [%o0] +; SPARC32-NEXT: add %sp, 104, %sp ; ; SPARCEL-LABEL: u64_bswapstore: ; SPARCEL: ! %bb.0: -; SPARCEL-NEXT: srl %o1, 8, %o3 -; SPARCEL-NEXT: sethi 63, %o4 -; SPARCEL-NEXT: or %o4, 768, %o4 -; SPARCEL-NEXT: and %o3, %o4, %o3 -; SPARCEL-NEXT: srl %o1, 24, %o5 -; SPARCEL-NEXT: or %o3, %o5, %o3 -; SPARCEL-NEXT: and %o1, %o4, %o5 -; SPARCEL-NEXT: sll %o5, 8, %o5 -; SPARCEL-NEXT: sll %o1, 24, %o1 -; SPARCEL-NEXT: or %o1, %o5, %o1 -; SPARCEL-NEXT: or %o1, %o3, %g3 -; SPARCEL-NEXT: srl %o2, 8, %o1 -; SPARCEL-NEXT: and %o1, %o4, %o1 -; SPARCEL-NEXT: srl %o2, 24, %o3 -; SPARCEL-NEXT: or %o1, %o3, %o1 -; SPARCEL-NEXT: and %o2, %o4, %o3 -; SPARCEL-NEXT: sll %o3, 8, %o3 -; SPARCEL-NEXT: sll %o2, 24, %o2 -; SPARCEL-NEXT: or %o2, %o3, %o2 -; SPARCEL-NEXT: or %o2, %o1, %g2 +; SPARCEL-NEXT: add %sp, -104, %sp +; SPARCEL-NEXT: add %sp, 96, %o3 +; SPARCEL-NEXT: sta %o1, [%o3] #ASI_P +; SPARCEL-NEXT: add %sp, 100, %o1 +; SPARCEL-NEXT: sta %o2, [%o1] #ASI_P +; SPARCEL-NEXT: ld [%sp+96], %o3 +; SPARCEL-NEXT: ld [%sp+100], %o2 +; SPARCEL-NEXT: std %o2, [%o0] ; SPARCEL-NEXT: retl -; SPARCEL-NEXT: std %g2, [%o0] +; SPARCEL-NEXT: add %sp, 104, %sp ; ; SPARC64-LABEL: u64_bswapstore: -; SPARC64: .register %g2, #scratch -; SPARC64-NEXT: .register %g3, #scratch -; SPARC64-NEXT: ! %bb.0: -; SPARC64-NEXT: srlx %o1, 24, %o2 -; SPARC64-NEXT: sethi 16320, %o3 -; SPARC64-NEXT: and %o2, %o3, %o2 -; SPARC64-NEXT: srlx %o1, 8, %o4 -; SPARC64-NEXT: sethi 4177920, %o5 -; SPARC64-NEXT: and %o4, %o5, %o4 -; SPARC64-NEXT: or %o4, %o2, %o2 -; SPARC64-NEXT: srlx %o1, 40, %o4 -; SPARC64-NEXT: sethi 63, %g2 -; SPARC64-NEXT: or %g2, 768, %g2 -; SPARC64-NEXT: and %o4, %g2, %o4 -; SPARC64-NEXT: srlx %o1, 56, %g3 -; SPARC64-NEXT: or %o4, %g3, %o4 -; SPARC64-NEXT: or %o2, %o4, %o2 -; SPARC64-NEXT: and %o1, %o5, %o4 -; SPARC64-NEXT: sllx %o4, 8, %o4 -; SPARC64-NEXT: and %o1, %o3, %o3 -; SPARC64-NEXT: sllx %o3, 24, %o3 -; SPARC64-NEXT: or %o3, %o4, %o3 -; SPARC64-NEXT: and %o1, %g2, %o4 -; SPARC64-NEXT: sllx %o4, 40, %o4 -; SPARC64-NEXT: sllx %o1, 56, %o1 -; SPARC64-NEXT: or %o1, %o4, %o1 -; SPARC64-NEXT: or %o1, %o3, %o1 -; SPARC64-NEXT: or %o1, %o2, %o1 +; SPARC64: ! %bb.0: ; SPARC64-NEXT: retl -; SPARC64-NEXT: stx %o1, [%o0] +; SPARC64-NEXT: stxa %o1, [%o0] #ASI_P_L %3 = tail call i64 @llvm.bswap.i64(i64 %1) store i64 %3, ptr %0, align 8 ret void From f91af40368bb1a8009c5f010bbf35dcc042ac75a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 22 Jun 2026 09:43:55 -0500 Subject: [PATCH 024/511] [flang][OpenMP] Move unique clauses to allowedOnceClauses in OMP.td (#204995) Many unique clauses were listed in "allowedClauses", which turned off the single-occurrence check in flang. Move these clauses to the right category to enable this check. One exception to this is the IF clause: the IF clause is unique for all non-compound directives, but is repeatable on compound ones with the restriction that at most one IF clause can apply to any of the constituents. This restriction is currently not enforced correctly in flang, and so the IF clause was left unchanged. Although this change is applied to a file shared between flang and clang, clang does not use these categories for its checks, and hence is not affected by this patch. --- flang/lib/Semantics/check-omp-structure.cpp | 3 - llvm/include/llvm/Frontend/OpenMP/OMP.td | 566 +++++++++++--------- 2 files changed, 309 insertions(+), 260 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 022c774cd2adc..b61662995ad57 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -2380,9 +2380,6 @@ void OmpStructureChecker::Leave(const parser::OmpDeclareTargetDirective &x) { context_.Warn(common::UsageWarning::OpenMPUsage, toClause->source, "The usage of TO clause on DECLARE TARGET directive has been deprecated. Use ENTER clause instead."_warn_en_US); } - if (indirectClause) { - CheckAllowedClause(llvm::omp::Clause::OMPC_indirect); - } } bool toClauseFound{false}; diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index e622d856f7cf9..65118c96bc2e8 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -681,9 +681,9 @@ def OMP_Assume : Directive<[Spelling<"assume">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; } def OMP_Atomic : Directive<[Spelling<"atomic">]> { @@ -730,11 +730,13 @@ def OMP_EndAssumes : Directive<[Spelling<"end assumes">]> { def OMP_BeginDeclareTarget : Directive<[Spelling<"begin declare target", 1, 52>, Spelling<"begin declare_target", 60>]> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + ]; let association = AS_Delimited; let category = CA_Declarative; let languages = [L_C]; @@ -827,13 +829,13 @@ def OMP_DeclareTarget : Directive<[Spelling<"declare target", 1, 52>, Spelling<"declare_target", 60>]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Declarative; @@ -844,8 +846,8 @@ def OMP_DeclareVariant : Directive<[Spelling<"declare variant", 1, 52>, VersionedClause, ]; let allowedOnceClauses = [ - VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_Declaration; let category = CA_Declarative; @@ -859,6 +861,8 @@ def OMP_Depobj : Directive<[Spelling<"depobj">]> { VersionedClause, VersionedClause, VersionedClause, + ]; + let allowedOnceClauses = [ VersionedClause, ]; let association = AS_None; @@ -867,9 +871,11 @@ def OMP_Depobj : Directive<[Spelling<"depobj">]> { def OMP_dispatch : Directive<[Spelling<"dispatch">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -922,7 +928,7 @@ def OMP_EndDo : Directive<[Spelling<"end do">]> { let languages = OMP_Do.languages; } def OMP_Error : Directive<[Spelling<"error">]> { - let allowedClauses = [ + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, @@ -956,15 +962,17 @@ def OMP_Flush : Directive<[Spelling<"flush">]> { def OMP_For : Directive<[Spelling<"for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, ]; let association = AS_LoopNest; @@ -998,11 +1006,13 @@ def OMP_interop : Directive<[Spelling<"interop">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + ]; let association = AS_None; let category = CA_Executable; } @@ -1036,8 +1046,8 @@ def OMP_Metadirective : Directive<[Spelling<"metadirective">]> { VersionedClause, ]; let allowedOnceClauses = [ - VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Meta; @@ -1069,24 +1079,26 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> { VersionedClause, VersionedClause, ]; - let allowedOnceClauses = [VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; } def OMP_Requires : Directive<[Spelling<"requires">]> { let allowedOnceClauses = [ - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, ]; let association = AS_None; let category = CA_Informational; @@ -1096,7 +1108,7 @@ def OMP_Reverse : Directive<[Spelling<"reverse">]> { let category = CA_Executable; } def OMP_Scan : Directive<[Spelling<"scan">]> { - let allowedClauses = [ + let allowedOnceClauses = [ VersionedClause, VersionedClause, ]; @@ -1134,10 +1146,12 @@ def OMP_Sections : Directive<[Spelling<"sections">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; + let allowedOnceClauses = [ + VersionedClause, + ]; let association = AS_Block; let category = CA_Executable; } @@ -1208,9 +1222,9 @@ def OMP_Target : Directive<[Spelling<"target">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1226,9 +1240,9 @@ def OMP_Target : Directive<[Spelling<"target">]> { def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>, Spelling<"target_data", 60>]> { let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let requiredClauses = [ @@ -1296,20 +1310,20 @@ def OMP_Task : Directive<[Spelling<"task">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, ]; let association = AS_Block; let category = CA_Executable; @@ -1338,22 +1352,22 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, @@ -1365,9 +1379,9 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { def OMP_TaskWait : Directive<[Spelling<"taskwait">]> { let allowedClauses = [ VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, ]; let association = AS_None; @@ -1479,7 +1493,6 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1489,6 +1502,7 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> { ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1507,25 +1521,27 @@ def OMP_DistributeParallelDoSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_Do, OMP_Simd]; @@ -1536,23 +1552,25 @@ def OMP_DistributeParallelFor : Directive<[Spelling<"distribute parallel for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_For]; let category = CA_Executable; @@ -1563,26 +1581,28 @@ def OMP_DistributeParallelForSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; @@ -1594,7 +1614,6 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1604,6 +1623,7 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> { ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1654,17 +1674,19 @@ def OMP_ForSimd : Directive<[Spelling<"for simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1692,13 +1714,13 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> { let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_loop]; let category = CA_Executable; @@ -1706,22 +1728,24 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> { def OMP_MaskedTaskloop : Directive<[Spelling<"masked taskloop">]> { let allowedClauses = [ VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_masked, OMP_TaskLoop]; @@ -1731,26 +1755,28 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1760,21 +1786,23 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> { def OMP_MasterTaskloop : Directive<[Spelling<"master taskloop">]> { let allowedClauses = [ VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Master, OMP_TaskLoop]; @@ -1784,25 +1812,27 @@ def OMP_MasterTaskloopSimd : Directive<[Spelling<"master taskloop simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1813,7 +1843,6 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1823,6 +1852,7 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> { ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1841,7 +1871,6 @@ def OMP_ParallelDoSimd : Directive<[Spelling<"parallel do simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1853,6 +1882,7 @@ def OMP_ParallelDoSimd : Directive<[Spelling<"parallel do simd">]> { ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1860,8 +1890,8 @@ def OMP_ParallelDoSimd : Directive<[Spelling<"parallel do simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; @@ -1870,24 +1900,26 @@ def OMP_ParallelDoSimd : Directive<[Spelling<"parallel do simd">]> { def OMP_ParallelFor : Directive<[Spelling<"parallel for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_For]; let category = CA_Executable; @@ -1897,26 +1929,28 @@ def OMP_ParallelForSimd : Directive<[Spelling<"parallel for simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_For, OMP_Simd]; @@ -1952,47 +1986,51 @@ def OMP_ParallelMasked : Directive<[Spelling<"parallel masked">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; - let leafConstructs = [OMP_Parallel, OMP_masked]; - let category = CA_Executable; -} + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let leafConstructs = [OMP_Parallel, OMP_masked]; + let category = CA_Executable; +} def OMP_ParallelMaskedTaskloop : Directive<[Spelling<"parallel masked taskloop">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_masked, OMP_TaskLoop]; @@ -2003,31 +2041,33 @@ def OMP_ParallelMaskedTaskloopSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -2038,17 +2078,19 @@ def OMP_ParallelMaster : Directive<[Spelling<"parallel master">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_Master]; let category = CA_Executable; @@ -2057,26 +2099,28 @@ def OMP_ParallelMasterTaskloop : Directive<[Spelling<"parallel master taskloop">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_Master, OMP_TaskLoop]; @@ -2087,30 +2131,32 @@ def OMP_ParallelMasterTaskloopSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -2121,19 +2167,19 @@ def OMP_ParallelSections : Directive<[Spelling<"parallel sections">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Parallel, OMP_Sections]; @@ -2143,13 +2189,13 @@ def OMP_ParallelWorkshare : Directive<[Spelling<"parallel workshare">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2163,7 +2209,6 @@ def OMP_ParallelWorkshare : Directive<[Spelling<"parallel workshare">]> { def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2171,7 +2216,6 @@ def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2179,9 +2223,11 @@ def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> { VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2193,8 +2239,6 @@ def OMP_TargetParallel : Directive<[Spelling<"target parallel">]> { } def OMP_TargetParallelDo : Directive<[Spelling<"target parallel do">]> { let allowedClauses = [ - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2210,7 +2254,9 @@ def OMP_TargetParallelDo : Directive<[Spelling<"target parallel do">]> { VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2231,11 +2277,7 @@ def OMP_TargetParallelDoSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2244,21 +2286,27 @@ def OMP_TargetParallelDoSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_Do, OMP_Simd]; let category = CA_Executable; @@ -2267,11 +2315,7 @@ def OMP_TargetParallelDoSimd def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2280,22 +2324,26 @@ def OMP_TargetParallelFor : Directive<[Spelling<"target parallel for">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_For]; @@ -2307,11 +2355,7 @@ def OMP_TargetParallelForSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2320,25 +2364,29 @@ def OMP_TargetParallelForSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - ]; - let allowedOnceClauses = [ - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Parallel, OMP_For, OMP_Simd]; @@ -2349,7 +2397,6 @@ def OMP_target_parallel_loop : Directive<[Spelling<"target parallel loop">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2368,6 +2415,7 @@ def OMP_target_parallel_loop : Directive<[Spelling<"target parallel loop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2394,7 +2442,6 @@ def OMP_TargetSimd : Directive<[Spelling<"target simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2406,6 +2453,7 @@ def OMP_TargetSimd : Directive<[Spelling<"target simd">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2441,8 +2489,8 @@ def OMP_TargetTeams : Directive<[Spelling<"target teams">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Teams]; @@ -2567,12 +2615,7 @@ def OMP_TargetTeamsDistributeParallelFor : Directive<[Spelling<"target teams distribute parallel for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2580,23 +2623,28 @@ def OMP_TargetTeamsDistributeParallelFor VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - ]; - let allowedOnceClauses = [ - VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For]; @@ -2608,12 +2656,7 @@ def OMP_TargetTeamsDistributeParallelForSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2622,26 +2665,31 @@ def OMP_TargetTeamsDistributeParallelForSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - ]; - let allowedOnceClauses = [ - VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For, OMP_Simd]; @@ -2668,10 +2716,10 @@ def OMP_TargetTeamsDistributeSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2707,8 +2755,8 @@ def OMP_TargetTeamsWorkdistribute : Directive<[Spelling<"target teams workdistri VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Target, OMP_Teams, OMP_Workdistribute]; @@ -2718,9 +2766,7 @@ def OMP_TargetTeamsWorkdistribute : Directive<[Spelling<"target teams workdistri def OMP_target_teams_loop : Directive<[Spelling<"target teams loop">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2738,6 +2784,8 @@ def OMP_target_teams_loop : Directive<[Spelling<"target teams loop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2751,27 +2799,27 @@ def OMP_TaskLoopSimd : Directive<[Spelling<"taskloop simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, ]; let allowedExclusiveClauses = [ VersionedClause, @@ -2783,22 +2831,22 @@ def OMP_TaskLoopSimd : Directive<[Spelling<"taskloop simd">]> { def OMP_TeamsDistribute : Directive<[Spelling<"teams distribute">]> { let allowedClauses = [ VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, ]; let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, VersionedClause, + VersionedClause, ]; let leafConstructs = [OMP_Teams, OMP_Distribute]; let category = CA_Executable; @@ -2873,25 +2921,27 @@ def OMP_TeamsDistributeParallelFor : Directive<[Spelling<"teams distribute parallel for">]> { let allowedClauses = [ VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let leafConstructs = [OMP_Teams, OMP_Distribute, OMP_Parallel, OMP_For]; @@ -2903,27 +2953,29 @@ def OMP_TeamsDistributeParallelForSimd let allowedClauses = [ VersionedClause, VersionedClause, - VersionedClause, - VersionedClause, - VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, VersionedClause, - VersionedClause, VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, + ]; + let allowedOnceClauses = [ + VersionedClause, + VersionedClause, + VersionedClause, + VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; From 724d62edc655232369c10e5f7c21e65709aee43e Mon Sep 17 00:00:00 2001 From: "Yaxun (Sam) Liu" Date: Mon, 22 Jun 2026 10:52:01 -0400 Subject: [PATCH 025/511] [CodeGen] Join live range segments after dead def move (#204648) Moving a dead def upward can retag a following live-range segment to the same value as the previous segment. That leaves adjacent same-value segments, which live range verification rejects. Add a shared LiveRange helper for merging adjacent same-value segments. Use it in the existing value-number merge code and after retagging later segments for a moved dead def. Add an AMDGPU scheduler regression test. --- llvm/include/llvm/CodeGen/LiveInterval.h | 7 + llvm/lib/CodeGen/LiveInterval.cpp | 52 +- llvm/lib/CodeGen/LiveIntervals.cpp | 12 +- .../sched-handleMoveUp-dead-def-join.mir | 5126 +++++++++++++++++ 4 files changed, 5168 insertions(+), 29 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-dead-def-join.mir diff --git a/llvm/include/llvm/CodeGen/LiveInterval.h b/llvm/include/llvm/CodeGen/LiveInterval.h index 114978996ffcf..34f318a9d2715 100644 --- a/llvm/include/llvm/CodeGen/LiveInterval.h +++ b/llvm/include/llvm/CodeGen/LiveInterval.h @@ -490,6 +490,13 @@ namespace llvm { /// may have grown since it was inserted). LLVM_ABI iterator addSegment(Segment S); + /// Merge the segment pointed to by @p I with its immediate neighbors when + /// they use the same value number and touch it. @p I must be a valid + /// iterator into this live range. Returns an iterator to the merged + /// segment, which may be @p I or the previous segment if @p I was merged + /// into it. + LLVM_ABI iterator mergeAdjacentSegments(iterator I); + /// Attempt to extend a value defined after @p StartIdx to include @p Use. /// Both @p StartIdx and @p Use should be in the same basic block. In case /// of subranges, an extension could be prevented by an explicit "undef" diff --git a/llvm/lib/CodeGen/LiveInterval.cpp b/llvm/lib/CodeGen/LiveInterval.cpp index 299db85233c2d..ed048c411316e 100644 --- a/llvm/lib/CodeGen/LiveInterval.cpp +++ b/llvm/lib/CodeGen/LiveInterval.cpp @@ -540,6 +540,28 @@ LiveRange::iterator LiveRange::addSegment(Segment S) { return CalcLiveRangeUtilVector(this).addSegment(S); } +LiveRange::iterator LiveRange::mergeAdjacentSegments(iterator I) { + assert(segmentSet == nullptr && "Cannot merge with active segment set"); + assert(I != end() && "Cannot merge end iterator"); + + if (I != begin()) { + iterator Prev = std::prev(I); + if (Prev->valno == I->valno && Prev->end == I->start) { + Prev->end = I->end; + segments.erase(I); + I = Prev; + } + } + + iterator Next = std::next(I); + if (Next != end() && I->valno == Next->valno && I->end == Next->start) { + I->end = Next->end; + segments.erase(Next); + } + + return I; +} + void LiveRange::append(const Segment S) { // Check that the segment belongs to the back of the list. assert(segments.empty() || segments.back().end <= S.start); @@ -754,34 +776,10 @@ VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { iterator S = I++; if (S->valno != V1) continue; // Not a V1 Segment. - // Okay, we found a V1 live range. If it had a previous, touching, V2 live - // range, extend it. - if (S != begin()) { - iterator Prev = S-1; - if (Prev->valno == V2 && Prev->end == S->start) { - Prev->end = S->end; - - // Erase this live-range. - segments.erase(S); - I = Prev+1; - S = Prev; - } - } - - // Okay, now we have a V1 or V2 live range that is maximally merged forward. - // Ensure that it is a V2 live-range. + // After changing this segment to V2, it may touch an adjacent V2 segment. + // Merge with either neighbor before continuing. S->valno = V2; - - // If we can merge it into later V2 segments, do so now. We ignore any - // following V1 segments, as they will be merged in subsequent iterations - // of the loop. - if (I != end()) { - if (I->start == S->end && I->valno == V2) { - S->end = I->end; - segments.erase(I); - I = S+1; - } - } + I = std::next(mergeAdjacentSegments(S)); } // Now that V1 is dead, remove it. diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp index 6ed379418abef..3da53566ae0c2 100644 --- a/llvm/lib/CodeGen/LiveIntervals.cpp +++ b/llvm/lib/CodeGen/LiveIntervals.cpp @@ -1461,9 +1461,17 @@ class LiveIntervals::HMEditor { *(NewIdxOut + 1) = LiveRange::Segment( NewIdxDef.getRegSlot(), (NewIdxOut + 1)->end, OldIdxVNI); OldIdxVNI->def = NewIdxDef; - // Modify subsequent segments to be defined by the moved def OldIdxVNI. - for (auto *Idx = NewIdxOut + 2; Idx <= OldIdxOut; ++Idx) + // Retag the segments that were shifted down from [NewIdxOut + 2, + // OldIdxOut]. Retagging can make a segment touch another segment with + // the same value number, so merge as we go. Stop at the original end + // slot instead of using a segment count because merging may erase + // segments. + const SlotIndex RetagEnd = OldIdxOut->end; + for (LiveRange::iterator Idx = NewIdxOut + 2; + Idx != LR.end() && Idx->start < RetagEnd;) { Idx->valno = OldIdxVNI; + Idx = std::next(LR.mergeAdjacentSegments(Idx)); + } // Aggressively remove all dead flags from the former dead definition. // Kill/dead flags shouldn't be used while live intervals exist; they // will be reinserted by VirtRegRewriter. diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-dead-def-join.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-dead-def-join.mir new file mode 100644 index 0000000000000..cb2a93202390d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-dead-def-join.mir @@ -0,0 +1,5126 @@ +# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx950 -verify-misched -run-pass=machine-scheduler -misched-only-block=2 -filetype=null %s +# REQUIRES: asserts + +# This used to assert while moving a dead subregister def upward. The moved +# def retagged a following live-range segment to the same value number as the +# previous segment, but did not join the adjacent segments. This is a reduced +# MIR test, but it still needs the surrounding scheduler context to reproduce +# the problematic move. + +--- | + target triple = "amdgcn-unknown-amdhsa" + define amdgpu_kernel void @rock_attention(ptr addrspace(1) inreg %0, ptr addrspace(1) inreg %1, ptr addrspace(1) inreg %2, ptr addrspace(1) inreg %3, ptr addrspace(1) inreg %4, ptr addrspace(1) inreg %5) #0 { + ret void + } + attributes #0 = { nounwind "amdgpu-cluster-dims"="1,1,1" "amdgpu-flat-work-group-size"="1,64" "uniform-work-group-size" } +... +--- +name: rock_attention +alignment: 4 +tracksRegLiveness: true +noPhis: true +isSSA: false +noVRegs: false +hasFakeUses: false +registers: + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_64 } + - { id: 4, class: sreg_64 } +liveins: + - { reg: '$vgpr0', virtual-reg: '%0' } + - { reg: '$sgpr4_sgpr5', virtual-reg: '%1' } + - { reg: '$sgpr8_sgpr9', virtual-reg: '%2' } + - { reg: '$sgpr10_sgpr11', virtual-reg: '%3' } + - { reg: '$sgpr12_sgpr13', virtual-reg: '%4' } + - { reg: '$sgpr14_sgpr15', virtual-reg: '%5' } + - { reg: '$sgpr16', virtual-reg: '%6' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + explicitKernArgSize: 48 + maxKernArgAlign: 8 + ldsSize: 51552 + isEntryFunction: true + numWaveDispatchSGPRs: 16 + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + dispatchPtr: { reg: '$sgpr0_sgpr1' } + queuePtr: { reg: '$sgpr2_sgpr3' } + kernargSegmentPtr: { reg: '$sgpr4_sgpr5' } + dispatchID: { reg: '$sgpr6_sgpr7' } + firstKernArgPreloadReg: { reg: '$sgpr8' } + workGroupIDX: { reg: '$sgpr16' } + workGroupIDY: { reg: '$sgpr17' } + workGroupIDZ: { reg: '$sgpr18' } + workItemIDX: { reg: '$vgpr0', mask: 1023 } + workItemIDY: { reg: '$vgpr0', mask: 1047552 } + workItemIDZ: { reg: '$vgpr0', mask: 1072693248 } + occupancy: 1 + sgprForEXECCopy: '$sgpr100_sgpr101' + numKernargPreloadSGPRs: 8 +body: | + bb.0: + liveins: $vgpr0, $sgpr4_sgpr5, $sgpr8_sgpr9, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr14_sgpr15, $sgpr16 + + %6:sgpr_32 = COPY $sgpr16 + %5:sreg_64_xexec_xnull = COPY $sgpr14_sgpr15 + undef %7.sub0_sub1:sgpr_128 = COPY $sgpr12_sgpr13 + undef %8.sub0_sub1:sgpr_128 = COPY $sgpr10_sgpr11 + undef %9.sub0_sub1:sgpr_128 = COPY $sgpr8_sgpr9 + %1:sgpr_64(p4) = COPY $sgpr4_sgpr5 + %0:vgpr_32(s32) = COPY $vgpr0 + %10:sgpr_128 = S_LOAD_DWORDX4_IMM %1(p4), 32, 0 + %11:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %12:sreg_32 = S_MOV_B32 45408 + %13:sreg_32 = S_MOV_B32 32768 + %14:sreg_32 = S_MOV_B32 47456 + %15:sreg_32 = S_MOV_B32 36992 + %16:sreg_32 = S_LSHR_B32 %6, 2, implicit-def dead $scc + %17:sreg_32 = S_MOV_B32 1 + %18:sreg_32 = S_LSHL_B32 %6, 1, implicit-def dead $scc + %19:sreg_32 = S_AND_B32 %18, 6, implicit-def dead $scc + %20:sreg_32 = nuw nsw S_ADD_I32 %19, %16, implicit-def dead $scc + S_CMP_GT_I32 %6, 7, implicit-def $scc + %21:sreg_32 = S_CSELECT_B32 %6, %20, implicit killed $scc + %22:vgpr_32 = V_AND_B32_e32 1023, %0(s32), implicit $exec + %23:vgpr_32 = V_AND_B32_e32 15, %0(s32), implicit $exec + %24:vgpr_32 = V_AND_B32_e32 32, %0(s32), implicit $exec + %25:vgpr_32 = V_AND_B32_e32 1, %0(s32), implicit $exec + %26:vgpr_32 = V_AND_B32_e32 48, %0(s32), implicit $exec + %27:sreg_64_xexec = V_CMP_GT_U32_e64 7, %22, implicit $exec + %28:sreg_32 = S_MUL_I32 %21, 112 + %29:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 4, %22, implicit $exec + %30:vgpr_32 = disjoint V_OR_B32_e32 1024, %29, implicit $exec + %31:vgpr_32 = V_ADD_U32_e32 %28, %29, implicit $exec + %9.sub1:sgpr_128 = S_AND_B32 %9.sub1, 65535, implicit-def dead $scc + %9.sub3:sgpr_128 = S_MOV_B32 159744 + %9.sub2:sgpr_128 = S_MOV_B32 2147483646 + %32:vgpr_32 = V_MOV_B32_e32 -2147483648, implicit $exec + %33:vgpr_32 = V_CNDMASK_B32_e64 0, %32, 0, %31, %27, implicit $exec + %34:sreg_32 = S_MOV_B32 0 + %35:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %33, %9, 0, 0, 0, 0, implicit $exec + %36:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN %32, %9, 0, 0, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %29, %35, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %29, %36, 1024, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %37:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 1, %24, implicit $exec + %38:vreg_128_align2 = DS_READ_B128_gfx9 %37, 0, 0, implicit $exec + %39:vreg_128_align2 = DS_READ_B128_gfx9 %37, 16, 0, implicit $exec + %40:vreg_128_align2 = DS_READ_B128_gfx9 %37, 32, 0, implicit $exec + %41:vreg_128_align2 = DS_READ_B128_gfx9 %37, 48, 0, implicit $exec + %42:vreg_128_align2 = DS_READ_B128_gfx9 %37, 128, 0, implicit $exec + %43:vreg_128_align2 = DS_READ_B128_gfx9 %37, 144, 0, implicit $exec + %44:vreg_128_align2 = DS_READ_B128_gfx9 %37, 160, 0, implicit $exec + %45:vreg_128_align2 = DS_READ_B128_gfx9 %37, 176, 0, implicit $exec + %46:vreg_128_align2 = DS_READ_B128_gfx9 %37, 256, 0, implicit $exec + %47:vreg_128_align2 = DS_READ_B128_gfx9 %37, 272, 0, implicit $exec + %48:vreg_128_align2 = DS_READ_B128_gfx9 %37, 288, 0, implicit $exec + %49:vreg_128_align2 = DS_READ_B128_gfx9 %37, 304, 0, implicit $exec + %50:vreg_128_align2 = DS_READ_B128_gfx9 %37, 384, 0, implicit $exec + %51:vreg_128_align2 = DS_READ_B128_gfx9 %37, 400, 0, implicit $exec + %52:vreg_128_align2 = DS_READ_B128_gfx9 %37, 416, 0, implicit $exec + %53:vreg_128_align2 = DS_READ_B128_gfx9 %37, 432, 0, implicit $exec + %54:vreg_128_align2 = DS_READ_B128_gfx9 %37, 512, 0, implicit $exec + %55:vreg_128_align2 = DS_READ_B128_gfx9 %37, 528, 0, implicit $exec + %56:vreg_128_align2 = DS_READ_B128_gfx9 %37, 544, 0, implicit $exec + %57:vreg_128_align2 = DS_READ_B128_gfx9 %37, 560, 0, implicit $exec + %58:vreg_128_align2 = DS_READ_B128_gfx9 %37, 640, 0, implicit $exec + %59:vreg_128_align2 = DS_READ_B128_gfx9 %37, 656, 0, implicit $exec + %60:vreg_128_align2 = DS_READ_B128_gfx9 %37, 672, 0, implicit $exec + %61:vreg_128_align2 = DS_READ_B128_gfx9 %37, 688, 0, implicit $exec + %62:vreg_128_align2 = DS_READ_B128_gfx9 %37, 768, 0, implicit $exec + %63:vreg_128_align2 = DS_READ_B128_gfx9 %37, 784, 0, implicit $exec + %64:vreg_128_align2 = DS_READ_B128_gfx9 %37, 800, 0, implicit $exec + %65:vreg_128_align2 = DS_READ_B128_gfx9 %37, 816, 0, implicit $exec + %66:vreg_128_align2 = DS_READ_B128_gfx9 %37, 896, 0, implicit $exec + %67:vreg_128_align2 = DS_READ_B128_gfx9 %37, 912, 0, implicit $exec + %68:vreg_128_align2 = DS_READ_B128_gfx9 %37, 928, 0, implicit $exec + %69:vreg_128_align2 = DS_READ_B128_gfx9 %37, 944, 0, implicit $exec + %70:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1024, 0, implicit $exec + %71:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1040, 0, implicit $exec + %72:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1056, 0, implicit $exec + %73:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1072, 0, implicit $exec + %74:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1152, 0, implicit $exec + %75:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1168, 0, implicit $exec + %76:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1184, 0, implicit $exec + %77:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1200, 0, implicit $exec + %78:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1280, 0, implicit $exec + %79:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1296, 0, implicit $exec + %80:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1312, 0, implicit $exec + %81:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1328, 0, implicit $exec + %82:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1408, 0, implicit $exec + %83:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1424, 0, implicit $exec + %84:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1440, 0, implicit $exec + %85:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1456, 0, implicit $exec + %86:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1536, 0, implicit $exec + %87:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1552, 0, implicit $exec + %88:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1568, 0, implicit $exec + %89:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1584, 0, implicit $exec + %90:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1664, 0, implicit $exec + %91:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1680, 0, implicit $exec + %92:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1696, 0, implicit $exec + %93:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1712, 0, implicit $exec + %94:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1792, 0, implicit $exec + %95:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1808, 0, implicit $exec + %96:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1824, 0, implicit $exec + %97:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1840, 0, implicit $exec + %98:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1920, 0, implicit $exec + %99:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1936, 0, implicit $exec + %100:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1952, 0, implicit $exec + %101:vreg_128_align2 = DS_READ_B128_gfx9 %37, 1968, 0, implicit $exec + %102:vgpr_32 = GLOBAL_LOAD_UBYTE_SADDR %5, %11, 0, 0, implicit $exec + %103:sreg_32_xm0 = V_READFIRSTLANE_B32 %102, implicit $exec + %104:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR %10.sub0_sub1, %11, 0, 0, implicit $exec + %105:sreg_32 = S_LSHL_B32 %21, 16, implicit-def dead $scc + %106:vgpr_32 = disjoint V_OR_B32_e32 %105, %29, implicit $exec + %107:vgpr_32 = disjoint V_OR_B32_e32 %105, %30, implicit $exec + %8.sub1:sgpr_128 = S_AND_B32 %8.sub1, 65535, implicit-def dead $scc + %8.sub2:sgpr_128 = COPY %9.sub2 + %8.sub3:sgpr_128 = COPY %9.sub3 + $m0 = S_MOV_B32 45408 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %106, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_MOV_B32 46432 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %107, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %108:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 3, %22, implicit $exec + %109:vgpr_32 = disjoint V_OR_B32_e32 512, %108, implicit $exec + %110:vgpr_32 = disjoint V_OR_B32_e32 1024, %108, implicit $exec + %111:vgpr_32 = V_OR_B32_e32 1536, %108, implicit $exec + %112:vgpr_32 = disjoint V_OR_B32_e32 %105, %108, implicit $exec + %113:vgpr_32 = disjoint V_OR_B32_e32 %105, %109, implicit $exec + %114:vgpr_32 = disjoint V_OR_B32_e32 %105, %110, implicit $exec + %115:vgpr_32 = disjoint V_OR_B32_e32 %105, %111, implicit $exec + %7.sub1:sgpr_128 = S_AND_B32 %7.sub1, 65535, implicit-def dead $scc + %7.sub2:sgpr_128 = COPY %9.sub2 + %7.sub3:sgpr_128 = COPY %9.sub3 + %116:vgpr_32 = V_LSHLREV_B32_e32 1, %112, implicit $exec + $m0 = S_MOV_B32 32768 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %116, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %117:vgpr_32 = V_LSHLREV_B32_e32 1, %113, implicit $exec + $m0 = S_MOV_B32 33824 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %117, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %118:vgpr_32 = V_LSHLREV_B32_e32 1, %114, implicit $exec + $m0 = S_MOV_B32 34880 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %118, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %119:vgpr_32 = V_LSHLREV_B32_e32 1, %115, implicit $exec + $m0 = S_MOV_B32 35936 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %119, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %120:sreg_32 = disjoint S_OR_B32 %105, 2048, implicit-def dead $scc + %121:vgpr_32 = disjoint V_OR_B32_e32 %120, %29, implicit $exec + %122:vgpr_32 = disjoint V_OR_B32_e32 %120, %30, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + $m0 = S_MOV_B32 47456 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %121, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_MOV_B32 48480 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %122, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %123:vgpr_32 = disjoint V_OR_B32_e32 %120, %108, implicit $exec + %124:vgpr_32 = disjoint V_OR_B32_e32 %120, %109, implicit $exec + %125:vgpr_32 = disjoint V_OR_B32_e32 %120, %110, implicit $exec + %126:vgpr_32 = V_OR_B32_e32 %120, %111, implicit $exec + %127:vgpr_32 = V_LSHLREV_B32_e32 1, %123, implicit $exec + $m0 = S_MOV_B32 36992 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %127, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %128:vgpr_32 = V_LSHLREV_B32_e32 1, %124, implicit $exec + $m0 = S_MOV_B32 38048 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %128, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %129:vgpr_32 = V_LSHLREV_B32_e32 1, %125, implicit $exec + $m0 = S_MOV_B32 39104 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %129, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %130:vgpr_32 = V_LSHLREV_B32_e32 1, %126, implicit $exec + $m0 = S_MOV_B32 40160 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %130, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + S_WAITCNT 49279 + WAVE_BARRIER + %131:sreg_32 = disjoint S_OR_B32 %105, 4096, implicit-def dead $scc + %132:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 6, %22, implicit $exec + %133:vgpr_32 = V_AND_B32_e32 1984, %132, implicit $exec + %134:vgpr_32 = V_AND_B32_e32 304, %29, implicit $exec + %135:vgpr_32 = V_LSHL_OR_B32_e64 %22, 8, %108, implicit $exec + %136:vgpr_32 = V_AND_B32_e32 64, %108, implicit $exec + %137:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 2, %24, implicit $exec + %138:sreg_32 = S_MOV_B32 1088 + %139:vgpr_32 = disjoint V_AND_OR_B32_e64 %135, %138, %137, implicit $exec + %140:vgpr_32 = disjoint V_OR_B32_e32 %139, %134, implicit $exec + %141:sreg_32 = S_MOV_B32 576 + %142:vgpr_32 = V_BITOP3_B32_e64 %139, %141, %134, 54, implicit $exec + %143:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 2, %22, implicit $exec + %144:vgpr_32 = V_AND_B32_e32 48, %143, implicit $exec + %145:vgpr_32 = V_BFE_I32_e64 %0(s32), 1, 1, implicit $exec + %146:vgpr_32 = V_AND_B32_e32 576, %145, implicit $exec + %147:vgpr_32 = V_AND_B32_e32 1024, %132, implicit $exec + %148:vgpr_32 = V_BITOP3_B32_e64 %146, %37, %144, 54, implicit $exec + %149:vgpr_32 = disjoint V_LSHL_OR_B32_e64 %25, 11, %147, implicit $exec + %150:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 4, %23, implicit $exec + %151:vgpr_32 = V_LSHLREV_B32_e32 1, %22, implicit $exec + %152:vgpr_32 = V_XOR_B32_e32 264, %151, implicit $exec + %153:vgpr_32 = V_XOR_B32_e32 528, %151, implicit $exec + %154:vgpr_32 = V_XOR_B32_e32 792, %151, implicit $exec + %155:vgpr_32 = V_XOR_B32_e32 1056, %151, implicit $exec + %156:vgpr_32 = V_XOR_B32_e32 1320, %151, implicit $exec + %157:vgpr_32 = V_XOR_B32_e32 1584, %151, implicit $exec + %158:vgpr_32 = V_XOR_B32_e32 1848, %151, implicit $exec + %159:vgpr_32 = V_XOR_B32_e32 2112, %151, implicit $exec + %160:vgpr_32 = V_XOR_B32_e32 2376, %151, implicit $exec + %161:vgpr_32 = V_XOR_B32_e32 2640, %151, implicit $exec + %162:vgpr_32 = V_XOR_B32_e32 2904, %151, implicit $exec + %163:vgpr_32 = V_XOR_B32_e32 3168, %151, implicit $exec + %164:vgpr_32 = V_XOR_B32_e32 3432, %151, implicit $exec + %165:vgpr_32 = V_XOR_B32_e32 3696, %151, implicit $exec + %166:vgpr_32 = V_XOR_B32_e32 3960, %151, implicit $exec + %167:vgpr_32 = exact V_LSHRREV_B32_e32 1, %26, implicit $exec + %168:vgpr_32 = V_MUL_U32_U24_e32 264, %23, implicit $exec + %169:vgpr_32 = V_XOR_B32_e32 %168, %167, implicit $exec + %170:vgpr_32 = V_XOR_B32_e32 32, %169, implicit $exec + %171:vgpr_32 = V_XOR_B32_e32 64, %169, implicit $exec + %172:vgpr_32 = V_XOR_B32_e32 96, %169, implicit $exec + undef %173.sub0:vreg_64_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %174.sub0:vreg_64_align2 = V_MOV_B32_e32 -8388608, implicit $exec + undef %175.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %175.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %175.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %175.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %176.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %176.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %176.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %176.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %177.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %177.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %177.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %177.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %178.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %178.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %178.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %178.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %179.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %179.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %179.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %179.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %180.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %180.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %180.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %180.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %181.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %181.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %181.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %181.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + undef %182.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %182.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %182.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %182.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec, implicit $exec + %8.sub2:sgpr_128 = COPY %9.sub2 + %8.sub3:sgpr_128 = COPY %9.sub3 + %183:vgpr_32 = V_ADD_U32_e32 %149, %148, implicit $exec + undef %184.sub0:sgpr_64 = S_MOV_B32 1069066811 + %7.sub2:sgpr_128 = COPY %9.sub2 + %7.sub3:sgpr_128 = COPY %9.sub3 + %185:vgpr_32 = COPY %104 + %186:sreg_32 = COPY %103 + %174.sub1:vreg_64_align2 = COPY %174.sub0 + undef %187.sub0:vreg_64_align2 = COPY %174.sub0 + %187.sub1:vreg_64_align2 = COPY %174.sub0 + undef %188.sub0:vreg_64_align2 = COPY %174.sub0 + %188.sub1:vreg_64_align2 = COPY %174.sub0 + undef %189.sub0:vreg_64_align2 = COPY %174.sub0 + %189.sub1:vreg_64_align2 = COPY %174.sub0 + undef %190.sub0:vreg_64_align2 = COPY %174.sub0 + %190.sub1:vreg_64_align2 = COPY %174.sub0 + undef %191.sub0:vreg_64_align2 = COPY %174.sub0 + %191.sub1:vreg_64_align2 = COPY %174.sub0 + undef %192.sub0:vreg_64_align2 = COPY %174.sub0 + %192.sub1:vreg_64_align2 = COPY %174.sub0 + undef %193.sub0:vreg_64_align2 = COPY %174.sub0 + %193.sub1:vreg_64_align2 = COPY %174.sub0 + undef %194.sub0:vreg_64_align2 = COPY %174.sub0 + %194.sub1:vreg_64_align2 = COPY %174.sub0 + undef %195.sub0:vreg_64_align2 = COPY %174.sub0 + %195.sub1:vreg_64_align2 = COPY %174.sub0 + undef %196.sub0:vreg_64_align2 = COPY %174.sub0 + %196.sub1:vreg_64_align2 = COPY %174.sub0 + undef %197.sub0:vreg_64_align2 = COPY %174.sub0 + %197.sub1:vreg_64_align2 = COPY %174.sub0 + undef %198.sub0:vreg_64_align2 = COPY %174.sub0 + %198.sub1:vreg_64_align2 = COPY %174.sub0 + undef %199.sub0:vreg_64_align2 = COPY %174.sub0 + %199.sub1:vreg_64_align2 = COPY %174.sub0 + undef %200.sub0:vreg_64_align2 = COPY %174.sub0 + %200.sub1:vreg_64_align2 = COPY %174.sub0 + undef %201.sub0:vreg_64_align2 = COPY %174.sub0 + %201.sub1:vreg_64_align2 = COPY %174.sub0 + undef %202.sub0:vreg_64_align2 = COPY %174.sub0 + %202.sub1:vreg_64_align2 = COPY %174.sub0 + undef %203.sub0:vreg_64_align2 = COPY %174.sub0 + %203.sub1:vreg_64_align2 = COPY %174.sub0 + undef %204.sub0:vreg_64_align2 = COPY %174.sub0 + %204.sub1:vreg_64_align2 = COPY %174.sub0 + undef %205.sub0:vreg_64_align2 = COPY %174.sub0 + %205.sub1:vreg_64_align2 = COPY %174.sub0 + undef %206.sub0:vreg_64_align2 = COPY %174.sub0 + %206.sub1:vreg_64_align2 = COPY %174.sub0 + undef %207.sub0:vreg_64_align2 = COPY %174.sub0 + %207.sub1:vreg_64_align2 = COPY %174.sub0 + undef %208.sub0:vreg_64_align2 = COPY %174.sub0 + %208.sub1:vreg_64_align2 = COPY %174.sub0 + undef %209.sub0:vreg_64_align2 = COPY %174.sub0 + %209.sub1:vreg_64_align2 = COPY %174.sub0 + undef %210.sub0:vreg_64_align2 = COPY %174.sub0 + %210.sub1:vreg_64_align2 = COPY %174.sub0 + undef %211.sub0:vreg_64_align2 = COPY %174.sub0 + %211.sub1:vreg_64_align2 = COPY %174.sub0 + undef %212.sub0:vreg_64_align2 = COPY %174.sub0 + %212.sub1:vreg_64_align2 = COPY %174.sub0 + undef %213.sub0:vreg_64_align2 = COPY %174.sub0 + %213.sub1:vreg_64_align2 = COPY %174.sub0 + undef %214.sub0:vreg_64_align2 = COPY %174.sub0 + %214.sub1:vreg_64_align2 = COPY %174.sub0 + undef %215.sub0:vreg_64_align2 = COPY %174.sub0 + %215.sub1:vreg_64_align2 = COPY %174.sub0 + undef %216.sub0:vreg_64_align2 = COPY %174.sub0 + %216.sub1:vreg_64_align2 = COPY %174.sub0 + undef %217.sub0:vreg_64_align2 = COPY %174.sub0 + %217.sub1:vreg_64_align2 = COPY %174.sub0 + undef %218.sub0:vreg_64_align2 = COPY %174.sub0 + %218.sub1:vreg_64_align2 = COPY %174.sub0 + undef %219.sub0:vreg_64_align2 = COPY %174.sub0 + %219.sub1:vreg_64_align2 = COPY %174.sub0 + undef %220.sub0:vreg_64_align2 = COPY %174.sub0 + %220.sub1:vreg_64_align2 = COPY %174.sub0 + undef %221.sub0:vreg_64_align2 = COPY %174.sub0 + %221.sub1:vreg_64_align2 = COPY %174.sub0 + undef %222.sub0:vreg_64_align2 = COPY %174.sub0 + %222.sub1:vreg_64_align2 = COPY %174.sub0 + undef %223.sub0:vreg_64_align2 = COPY %174.sub0 + %223.sub1:vreg_64_align2 = COPY %174.sub0 + undef %224.sub0:vreg_64_align2 = COPY %174.sub0 + %224.sub1:vreg_64_align2 = COPY %174.sub0 + undef %225.sub0:vreg_64_align2 = COPY %174.sub0 + %225.sub1:vreg_64_align2 = COPY %174.sub0 + undef %226.sub0:vreg_64_align2 = COPY %174.sub0 + %226.sub1:vreg_64_align2 = COPY %174.sub0 + undef %227.sub0:vreg_64_align2 = COPY %174.sub0 + %227.sub1:vreg_64_align2 = COPY %174.sub0 + undef %228.sub0:vreg_64_align2 = COPY %174.sub0 + %228.sub1:vreg_64_align2 = COPY %174.sub0 + undef %229.sub0:vreg_64_align2 = COPY %174.sub0 + %229.sub1:vreg_64_align2 = COPY %174.sub0 + undef %230.sub0:vreg_64_align2 = COPY %174.sub0 + %230.sub1:vreg_64_align2 = COPY %174.sub0 + undef %231.sub0:vreg_64_align2 = COPY %174.sub0 + %231.sub1:vreg_64_align2 = COPY %174.sub0 + undef %232.sub0:vreg_64_align2 = COPY %174.sub0 + %232.sub1:vreg_64_align2 = COPY %174.sub0 + undef %233.sub0:vreg_64_align2 = COPY %174.sub0 + %233.sub1:vreg_64_align2 = COPY %174.sub0 + undef %234.sub0:vreg_64_align2 = COPY %174.sub0 + %234.sub1:vreg_64_align2 = COPY %174.sub0 + undef %235.sub0:vreg_64_align2 = COPY %174.sub0 + %235.sub1:vreg_64_align2 = COPY %174.sub0 + undef %236.sub0:vreg_64_align2 = COPY %174.sub0 + %236.sub1:vreg_64_align2 = COPY %174.sub0 + undef %237.sub0:vreg_64_align2 = COPY %174.sub0 + %237.sub1:vreg_64_align2 = COPY %174.sub0 + undef %238.sub0:vreg_64_align2 = COPY %174.sub0 + %238.sub1:vreg_64_align2 = COPY %174.sub0 + undef %239.sub0:vreg_64_align2 = COPY %174.sub0 + %239.sub1:vreg_64_align2 = COPY %174.sub0 + undef %240.sub0:vreg_64_align2 = COPY %174.sub0 + %240.sub1:vreg_64_align2 = COPY %174.sub0 + undef %241.sub0:vreg_64_align2 = COPY %174.sub0 + %241.sub1:vreg_64_align2 = COPY %174.sub0 + undef %242.sub0:vreg_64_align2 = COPY %174.sub0 + %242.sub1:vreg_64_align2 = COPY %174.sub0 + undef %243.sub0:vreg_64_align2 = COPY %174.sub0 + %243.sub1:vreg_64_align2 = COPY %174.sub0 + undef %244.sub0:vreg_64_align2 = COPY %174.sub0 + %244.sub1:vreg_64_align2 = COPY %174.sub0 + undef %245.sub0:vreg_64_align2 = COPY %174.sub0 + %245.sub1:vreg_64_align2 = COPY %174.sub0 + undef %246.sub0:vreg_64_align2 = COPY %174.sub0 + %246.sub1:vreg_64_align2 = COPY %174.sub0 + undef %247.sub0:vreg_64_align2 = COPY %174.sub0 + undef %248.sub0:vreg_64_align2 = COPY %174.sub0 + undef %249.sub0:vreg_64_align2 = COPY %174.sub0 + undef %250.sub0:vreg_64_align2 = COPY %174.sub0 + undef %251.sub0:vreg_64_align2 = COPY %174.sub0 + undef %252.sub0:vreg_64_align2 = COPY %174.sub0 + %173.sub1:vreg_64_align2 = COPY %173.sub0 + undef %253.sub0:vreg_64_align2 = COPY %173.sub0 + %253.sub1:vreg_64_align2 = COPY %173.sub0 + undef %254.sub0:vreg_64_align2 = COPY %173.sub0 + %254.sub1:vreg_64_align2 = COPY %173.sub0 + undef %255.sub0:vreg_64_align2 = COPY %173.sub0 + %255.sub1:vreg_64_align2 = COPY %173.sub0 + undef %256.sub0:vreg_64_align2 = COPY %173.sub0 + %256.sub1:vreg_64_align2 = COPY %173.sub0 + undef %257.sub0:vreg_64_align2 = COPY %173.sub0 + %257.sub1:vreg_64_align2 = COPY %173.sub0 + undef %258.sub0:vreg_64_align2 = COPY %173.sub0 + %258.sub1:vreg_64_align2 = COPY %173.sub0 + undef %259.sub0:vreg_64_align2 = COPY %173.sub0 + %259.sub1:vreg_64_align2 = COPY %173.sub0 + undef %260.sub0:vreg_64_align2 = COPY %173.sub0 + %260.sub1:vreg_64_align2 = COPY %173.sub0 + undef %261.sub0:vreg_64_align2 = COPY %173.sub0 + %261.sub1:vreg_64_align2 = COPY %173.sub0 + undef %262.sub0:vreg_64_align2 = COPY %173.sub0 + %262.sub1:vreg_64_align2 = COPY %173.sub0 + undef %263.sub0:vreg_64_align2 = COPY %173.sub0 + %263.sub1:vreg_64_align2 = COPY %173.sub0 + undef %264.sub0:vreg_64_align2 = COPY %173.sub0 + %264.sub1:vreg_64_align2 = COPY %173.sub0 + undef %265.sub0:vreg_64_align2 = COPY %173.sub0 + %265.sub1:vreg_64_align2 = COPY %173.sub0 + undef %266.sub0:vreg_64_align2 = COPY %173.sub0 + %266.sub1:vreg_64_align2 = COPY %173.sub0 + undef %267.sub0:vreg_64_align2 = COPY %173.sub0 + %267.sub1:vreg_64_align2 = COPY %173.sub0 + undef %268.sub0:vreg_64_align2 = COPY %173.sub0 + %268.sub1:vreg_64_align2 = COPY %173.sub0 + undef %269.sub0:vreg_64_align2 = COPY %173.sub0 + %269.sub1:vreg_64_align2 = COPY %173.sub0 + undef %270.sub0:vreg_64_align2 = COPY %173.sub0 + %270.sub1:vreg_64_align2 = COPY %173.sub0 + undef %271.sub0:vreg_64_align2 = COPY %173.sub0 + %271.sub1:vreg_64_align2 = COPY %173.sub0 + undef %272.sub0:vreg_64_align2 = COPY %173.sub0 + %272.sub1:vreg_64_align2 = COPY %173.sub0 + undef %273.sub0:vreg_64_align2 = COPY %173.sub0 + %273.sub1:vreg_64_align2 = COPY %173.sub0 + undef %274.sub0:vreg_64_align2 = COPY %173.sub0 + %274.sub1:vreg_64_align2 = COPY %173.sub0 + undef %275.sub0:vreg_64_align2 = COPY %173.sub0 + %275.sub1:vreg_64_align2 = COPY %173.sub0 + undef %276.sub0:vreg_64_align2 = COPY %173.sub0 + %276.sub1:vreg_64_align2 = COPY %173.sub0 + undef %277.sub0:vreg_64_align2 = COPY %173.sub0 + %277.sub1:vreg_64_align2 = COPY %173.sub0 + undef %278.sub0:vreg_64_align2 = COPY %173.sub0 + %278.sub1:vreg_64_align2 = COPY %173.sub0 + undef %279.sub0:vreg_64_align2 = COPY %173.sub0 + %279.sub1:vreg_64_align2 = COPY %173.sub0 + undef %280.sub0:vreg_64_align2 = COPY %173.sub0 + %280.sub1:vreg_64_align2 = COPY %173.sub0 + undef %281.sub0:vreg_64_align2 = COPY %173.sub0 + %281.sub1:vreg_64_align2 = COPY %173.sub0 + undef %282.sub0:vreg_64_align2 = COPY %173.sub0 + %282.sub1:vreg_64_align2 = COPY %173.sub0 + undef %283.sub0:vreg_64_align2 = COPY %173.sub0 + %283.sub1:vreg_64_align2 = COPY %173.sub0 + undef %284.sub0:vreg_64_align2 = COPY %173.sub0 + %284.sub1:vreg_64_align2 = COPY %173.sub0 + undef %285.sub0:vreg_64_align2 = COPY %173.sub0 + %285.sub1:vreg_64_align2 = COPY %173.sub0 + undef %286.sub0:vreg_64_align2 = COPY %173.sub0 + %286.sub1:vreg_64_align2 = COPY %173.sub0 + undef %287.sub0:vreg_64_align2 = COPY %173.sub0 + %287.sub1:vreg_64_align2 = COPY %173.sub0 + undef %288.sub0:vreg_64_align2 = COPY %173.sub0 + %288.sub1:vreg_64_align2 = COPY %173.sub0 + undef %289.sub0:vreg_64_align2 = COPY %173.sub0 + %289.sub1:vreg_64_align2 = COPY %173.sub0 + undef %290.sub0:vreg_64_align2 = COPY %173.sub0 + %290.sub1:vreg_64_align2 = COPY %173.sub0 + undef %291.sub0:vreg_64_align2 = COPY %173.sub0 + %291.sub1:vreg_64_align2 = COPY %173.sub0 + undef %292.sub0:vreg_64_align2 = COPY %173.sub0 + %292.sub1:vreg_64_align2 = COPY %173.sub0 + undef %293.sub0:vreg_64_align2 = COPY %173.sub0 + %293.sub1:vreg_64_align2 = COPY %173.sub0 + undef %294.sub0:vreg_64_align2 = COPY %173.sub0 + %294.sub1:vreg_64_align2 = COPY %173.sub0 + undef %295.sub0:vreg_64_align2 = COPY %173.sub0 + %295.sub1:vreg_64_align2 = COPY %173.sub0 + undef %296.sub0:vreg_64_align2 = COPY %173.sub0 + %296.sub1:vreg_64_align2 = COPY %173.sub0 + undef %297.sub0:vreg_64_align2 = COPY %173.sub0 + %297.sub1:vreg_64_align2 = COPY %173.sub0 + undef %298.sub0:vreg_64_align2 = COPY %173.sub0 + %298.sub1:vreg_64_align2 = COPY %173.sub0 + undef %299.sub0:vreg_64_align2 = COPY %173.sub0 + %299.sub1:vreg_64_align2 = COPY %173.sub0 + undef %300.sub0:vreg_64_align2 = COPY %173.sub0 + %300.sub1:vreg_64_align2 = COPY %173.sub0 + undef %301.sub0:vreg_64_align2 = COPY %173.sub0 + %301.sub1:vreg_64_align2 = COPY %173.sub0 + undef %302.sub0:vreg_64_align2 = COPY %173.sub0 + %302.sub1:vreg_64_align2 = COPY %173.sub0 + undef %303.sub0:vreg_64_align2 = COPY %173.sub0 + %303.sub1:vreg_64_align2 = COPY %173.sub0 + undef %304.sub0:vreg_64_align2 = COPY %173.sub0 + %304.sub1:vreg_64_align2 = COPY %173.sub0 + undef %305.sub0:vreg_64_align2 = COPY %173.sub0 + %305.sub1:vreg_64_align2 = COPY %173.sub0 + undef %306.sub0:vreg_64_align2 = COPY %173.sub0 + %306.sub1:vreg_64_align2 = COPY %173.sub0 + undef %307.sub0:vreg_64_align2 = COPY %173.sub0 + %307.sub1:vreg_64_align2 = COPY %173.sub0 + undef %308.sub0:vreg_64_align2 = COPY %173.sub0 + %308.sub1:vreg_64_align2 = COPY %173.sub0 + undef %309.sub0:vreg_64_align2 = COPY %173.sub0 + %309.sub1:vreg_64_align2 = COPY %173.sub0 + undef %310.sub0:vreg_64_align2 = COPY %173.sub0 + %310.sub1:vreg_64_align2 = COPY %173.sub0 + undef %311.sub0:vreg_64_align2 = COPY %173.sub0 + %311.sub1:vreg_64_align2 = COPY %173.sub0 + undef %312.sub0:vreg_64_align2 = COPY %173.sub0 + %312.sub1:vreg_64_align2 = COPY %173.sub0 + undef %313.sub0:vreg_64_align2 = COPY %173.sub0 + %313.sub1:vreg_64_align2 = COPY %173.sub0 + undef %314.sub0:vreg_64_align2 = COPY %173.sub0 + %314.sub1:vreg_64_align2 = COPY %173.sub0 + undef %315.sub0:vreg_64_align2 = COPY %173.sub0 + %315.sub1:vreg_64_align2 = COPY %173.sub0 + + bb.1: + successors: %bb.2(0x04000000), %bb.1(0x7c000000) + + %316:sreg_32 = COPY %12 + %12:sreg_32 = COPY %14 + %317:sreg_32 = COPY %186 + %186:sreg_32 = COPY %103 + %318:vgpr_32 = COPY %185 + %185:vgpr_32 = COPY %104 + %319:sreg_32 = COPY %13 + %13:sreg_32 = COPY %15 + undef %320.sub0:vreg_64_align2 = COPY %251.sub0 + %320.sub1:vreg_64_align2 = COPY %252.sub0 + undef %321.sub0:vreg_64_align2 = COPY %249.sub0 + %321.sub1:vreg_64_align2 = COPY %250.sub0 + undef %322.sub0:vreg_64_align2 = COPY %247.sub0 + %322.sub1:vreg_64_align2 = COPY %248.sub0 + undef %323.sub0:vreg_64_align2 = COPY %246.sub0 + %323.sub1:vreg_64_align2 = COPY %246.sub1 + undef %324.sub0:vreg_64_align2 = COPY %245.sub0 + %324.sub1:vreg_64_align2 = COPY %245.sub1 + undef %325.sub0:vreg_64_align2 = COPY %244.sub0 + %325.sub1:vreg_64_align2 = COPY %244.sub1 + undef %326.sub0:vreg_64_align2 = COPY %243.sub0 + %326.sub1:vreg_64_align2 = COPY %243.sub1 + undef %327.sub0:vreg_64_align2 = COPY %242.sub0 + %327.sub1:vreg_64_align2 = COPY %242.sub1 + undef %328.sub0:vreg_64_align2 = COPY %241.sub0 + %328.sub1:vreg_64_align2 = COPY %241.sub1 + undef %329.sub0:vreg_64_align2 = COPY %240.sub0 + %329.sub1:vreg_64_align2 = COPY %240.sub1 + undef %330.sub0:vreg_64_align2 = COPY %239.sub0 + %330.sub1:vreg_64_align2 = COPY %239.sub1 + undef %331.sub0:vreg_64_align2 = COPY %238.sub0 + %331.sub1:vreg_64_align2 = COPY %238.sub1 + undef %332.sub0:vreg_64_align2 = COPY %237.sub0 + %332.sub1:vreg_64_align2 = COPY %237.sub1 + undef %333.sub0:vreg_64_align2 = COPY %236.sub0 + %333.sub1:vreg_64_align2 = COPY %236.sub1 + undef %334.sub0:vreg_64_align2 = COPY %235.sub0 + %334.sub1:vreg_64_align2 = COPY %235.sub1 + undef %335.sub0:vreg_64_align2 = COPY %234.sub0 + %335.sub1:vreg_64_align2 = COPY %234.sub1 + undef %336.sub0:vreg_64_align2 = COPY %233.sub0 + %336.sub1:vreg_64_align2 = COPY %233.sub1 + undef %337.sub0:vreg_64_align2 = COPY %232.sub0 + %337.sub1:vreg_64_align2 = COPY %232.sub1 + undef %338.sub0:vreg_64_align2 = COPY %231.sub0 + %338.sub1:vreg_64_align2 = COPY %231.sub1 + undef %339.sub0:vreg_64_align2 = COPY %230.sub0 + %339.sub1:vreg_64_align2 = COPY %230.sub1 + undef %340.sub0:vreg_64_align2 = COPY %229.sub0 + %340.sub1:vreg_64_align2 = COPY %229.sub1 + undef %341.sub0:vreg_64_align2 = COPY %228.sub0 + %341.sub1:vreg_64_align2 = COPY %228.sub1 + undef %342.sub0:vreg_64_align2 = COPY %227.sub0 + %342.sub1:vreg_64_align2 = COPY %227.sub1 + undef %343.sub0:vreg_64_align2 = COPY %226.sub0 + %343.sub1:vreg_64_align2 = COPY %226.sub1 + undef %344.sub0:vreg_64_align2 = COPY %225.sub0 + %344.sub1:vreg_64_align2 = COPY %225.sub1 + undef %345.sub0:vreg_64_align2 = COPY %224.sub0 + %345.sub1:vreg_64_align2 = COPY %224.sub1 + undef %346.sub0:vreg_64_align2 = COPY %223.sub0 + %346.sub1:vreg_64_align2 = COPY %223.sub1 + undef %347.sub0:vreg_64_align2 = COPY %222.sub0 + %347.sub1:vreg_64_align2 = COPY %222.sub1 + undef %348.sub0:vreg_64_align2 = COPY %221.sub0 + %348.sub1:vreg_64_align2 = COPY %221.sub1 + undef %349.sub0:vreg_64_align2 = COPY %220.sub0 + %349.sub1:vreg_64_align2 = COPY %220.sub1 + undef %350.sub0:vreg_64_align2 = COPY %219.sub0 + %350.sub1:vreg_64_align2 = COPY %219.sub1 + undef %351.sub0:vreg_64_align2 = COPY %218.sub0 + %351.sub1:vreg_64_align2 = COPY %218.sub1 + undef %352.sub0:vreg_64_align2 = COPY %217.sub0 + %352.sub1:vreg_64_align2 = COPY %217.sub1 + undef %353.sub0:vreg_64_align2 = COPY %216.sub0 + %353.sub1:vreg_64_align2 = COPY %216.sub1 + undef %354.sub0:vreg_64_align2 = COPY %215.sub0 + %354.sub1:vreg_64_align2 = COPY %215.sub1 + undef %355.sub0:vreg_64_align2 = COPY %214.sub0 + %355.sub1:vreg_64_align2 = COPY %214.sub1 + undef %356.sub0:vreg_64_align2 = COPY %213.sub0 + %356.sub1:vreg_64_align2 = COPY %213.sub1 + undef %357.sub0:vreg_64_align2 = COPY %212.sub0 + %357.sub1:vreg_64_align2 = COPY %212.sub1 + undef %358.sub0:vreg_64_align2 = COPY %211.sub0 + %358.sub1:vreg_64_align2 = COPY %211.sub1 + undef %359.sub0:vreg_64_align2 = COPY %210.sub0 + %359.sub1:vreg_64_align2 = COPY %210.sub1 + undef %360.sub0:vreg_64_align2 = COPY %209.sub0 + %360.sub1:vreg_64_align2 = COPY %209.sub1 + undef %361.sub0:vreg_64_align2 = COPY %208.sub0 + %361.sub1:vreg_64_align2 = COPY %208.sub1 + undef %362.sub0:vreg_64_align2 = COPY %207.sub0 + %362.sub1:vreg_64_align2 = COPY %207.sub1 + undef %363.sub0:vreg_64_align2 = COPY %206.sub0 + %363.sub1:vreg_64_align2 = COPY %206.sub1 + undef %364.sub0:vreg_64_align2 = COPY %205.sub0 + %364.sub1:vreg_64_align2 = COPY %205.sub1 + undef %365.sub0:vreg_64_align2 = COPY %204.sub0 + %365.sub1:vreg_64_align2 = COPY %204.sub1 + undef %366.sub0:vreg_64_align2 = COPY %203.sub0 + %366.sub1:vreg_64_align2 = COPY %203.sub1 + undef %367.sub0:vreg_64_align2 = COPY %202.sub0 + %367.sub1:vreg_64_align2 = COPY %202.sub1 + undef %368.sub0:vreg_64_align2 = COPY %201.sub0 + %368.sub1:vreg_64_align2 = COPY %201.sub1 + undef %369.sub0:vreg_64_align2 = COPY %200.sub0 + %369.sub1:vreg_64_align2 = COPY %200.sub1 + undef %370.sub0:vreg_64_align2 = COPY %199.sub0 + %370.sub1:vreg_64_align2 = COPY %199.sub1 + undef %371.sub0:vreg_64_align2 = COPY %198.sub0 + %371.sub1:vreg_64_align2 = COPY %198.sub1 + undef %372.sub0:vreg_64_align2 = COPY %197.sub0 + %372.sub1:vreg_64_align2 = COPY %197.sub1 + undef %373.sub0:vreg_64_align2 = COPY %196.sub0 + %373.sub1:vreg_64_align2 = COPY %196.sub1 + undef %374.sub0:vreg_64_align2 = COPY %195.sub0 + %374.sub1:vreg_64_align2 = COPY %195.sub1 + undef %375.sub0:vreg_64_align2 = COPY %194.sub0 + %375.sub1:vreg_64_align2 = COPY %194.sub1 + undef %376.sub0:vreg_64_align2 = COPY %193.sub0 + %376.sub1:vreg_64_align2 = COPY %193.sub1 + undef %377.sub0:vreg_64_align2 = COPY %192.sub0 + %377.sub1:vreg_64_align2 = COPY %192.sub1 + undef %378.sub0:vreg_64_align2 = COPY %191.sub0 + %378.sub1:vreg_64_align2 = COPY %191.sub1 + undef %379.sub0:vreg_64_align2 = COPY %190.sub0 + %379.sub1:vreg_64_align2 = COPY %190.sub1 + undef %380.sub0:vreg_64_align2 = COPY %189.sub0 + %380.sub1:vreg_64_align2 = COPY %189.sub1 + undef %381.sub0:vreg_64_align2 = COPY %188.sub0 + %381.sub1:vreg_64_align2 = COPY %188.sub1 + undef %382.sub0:vreg_64_align2 = COPY %187.sub0 + %382.sub1:vreg_64_align2 = COPY %187.sub1 + undef %383.sub0:vreg_64_align2 = COPY %174.sub0 + %383.sub1:vreg_64_align2 = COPY %174.sub1 + %384:sreg_32 = S_ADD_I32 %17, 1, implicit-def dead $scc + S_CMP_LT_I32 %384, 3, implicit-def $scc + %17:sreg_32 = S_CSELECT_B32 %384, 0, implicit killed $scc + %385:sreg_32 = nuw nsw S_LSHL_B32 %34, 11, implicit-def dead $scc + %386:sreg_32 = nuw nsw S_ADD_I32 %131, %385, implicit-def dead $scc + %387:vgpr_32 = disjoint V_OR_B32_e32 %386, %29, implicit $exec + %388:vgpr_32 = disjoint V_OR_B32_e32 %386, %30, implicit $exec + %389:sreg_32 = S_LSHL_B32 %17, 11, implicit-def dead $scc + %14:sreg_32 = S_ADD_I32 %389, 45408, implicit-def dead $scc + $m0 = COPY %14 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %387, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_ADD_I32 %389, 46432, implicit-def dead $scc + BUFFER_LOAD_DWORDX4_LDS_OFFEN %388, %8, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %390:vgpr_32 = nuw V_ADD_U32_e32 %316, %133, implicit $exec + %391:vreg_128_align2 = DS_READ_B128_gfx9 %390, 0, 0, implicit $exec + %392:vreg_128_align2 = DS_READ_B128_gfx9 %390, 16, 0, implicit $exec + %393:vreg_128_align2 = DS_READ_B128_gfx9 %390, 32, 0, implicit $exec + %394:vreg_128_align2 = DS_READ_B128_gfx9 %390, 48, 0, implicit $exec + undef %395.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %395.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub0, %391.sub0, %395.sub0, implicit $exec + undef %395.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub1, %391.sub1, %395.sub0, implicit $exec + undef %395.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub2, %391.sub2, %395.sub0, implicit $exec + undef %395.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub3, %391.sub3, %395.sub0, implicit $exec + undef %396.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %396.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub0, %392.sub0, %396.sub0, implicit $exec + undef %396.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub1, %392.sub1, %396.sub0, implicit $exec + undef %396.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub2, %392.sub2, %396.sub0, implicit $exec + undef %396.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub3, %392.sub3, %396.sub0, implicit $exec + undef %397.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %397.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub0, %393.sub0, %397.sub0, implicit $exec + undef %397.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub1, %393.sub1, %397.sub0, implicit $exec + undef %397.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub2, %393.sub2, %397.sub0, implicit $exec + undef %397.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub3, %393.sub3, %397.sub0, implicit $exec + undef %398.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %398.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub0, %394.sub0, %398.sub0, implicit $exec + undef %398.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub1, %394.sub1, %398.sub0, implicit $exec + undef %398.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub2, %394.sub2, %398.sub0, implicit $exec + undef %398.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %38.sub3, %394.sub3, %398.sub0, implicit $exec + %395.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %395.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub0, %391.sub0, %395.sub1, implicit $exec + %395.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub1, %391.sub1, %395.sub1, implicit $exec + %395.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub2, %391.sub2, %395.sub1, implicit $exec + %395.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub3, %391.sub3, %395.sub1, implicit $exec + %396.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %396.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub0, %392.sub0, %396.sub1, implicit $exec + %396.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub1, %392.sub1, %396.sub1, implicit $exec + %396.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub2, %392.sub2, %396.sub1, implicit $exec + %396.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub3, %392.sub3, %396.sub1, implicit $exec + %397.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %397.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub0, %393.sub0, %397.sub1, implicit $exec + %397.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub1, %393.sub1, %397.sub1, implicit $exec + %397.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub2, %393.sub2, %397.sub1, implicit $exec + %397.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub3, %393.sub3, %397.sub1, implicit $exec + %398.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %398.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub0, %394.sub0, %398.sub1, implicit $exec + %398.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub1, %394.sub1, %398.sub1, implicit $exec + %398.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub2, %394.sub2, %398.sub1, implicit $exec + %398.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %39.sub3, %394.sub3, %398.sub1, implicit $exec + %395.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %395.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub0, %391.sub0, %395.sub2, implicit $exec + %395.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub1, %391.sub1, %395.sub2, implicit $exec + %395.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub2, %391.sub2, %395.sub2, implicit $exec + %395.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub3, %391.sub3, %395.sub2, implicit $exec + %396.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %396.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub0, %392.sub0, %396.sub2, implicit $exec + %396.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub1, %392.sub1, %396.sub2, implicit $exec + %396.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub2, %392.sub2, %396.sub2, implicit $exec + %396.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub3, %392.sub3, %396.sub2, implicit $exec + %397.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %397.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub0, %393.sub0, %397.sub2, implicit $exec + %397.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub1, %393.sub1, %397.sub2, implicit $exec + %397.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub2, %393.sub2, %397.sub2, implicit $exec + %397.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub3, %393.sub3, %397.sub2, implicit $exec + %398.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %398.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub0, %394.sub0, %398.sub2, implicit $exec + %398.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub1, %394.sub1, %398.sub2, implicit $exec + %398.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub2, %394.sub2, %398.sub2, implicit $exec + %398.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %40.sub3, %394.sub3, %398.sub2, implicit $exec + %395.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %395.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub0, %391.sub0, %395.sub3, implicit $exec + %395.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub1, %391.sub1, %395.sub3, implicit $exec + %395.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub2, %391.sub2, %395.sub3, implicit $exec + %395.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub3, %391.sub3, %395.sub3, implicit $exec + %396.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %396.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub0, %392.sub0, %396.sub3, implicit $exec + %396.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub1, %392.sub1, %396.sub3, implicit $exec + %396.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub2, %392.sub2, %396.sub3, implicit $exec + %396.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub3, %392.sub3, %396.sub3, implicit $exec + %397.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %397.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub0, %393.sub0, %397.sub3, implicit $exec + %397.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub1, %393.sub1, %397.sub3, implicit $exec + %397.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub2, %393.sub2, %397.sub3, implicit $exec + %397.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub3, %393.sub3, %397.sub3, implicit $exec + %398.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %398.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub0, %394.sub0, %398.sub3, implicit $exec + %398.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub1, %394.sub1, %398.sub3, implicit $exec + %398.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub2, %394.sub2, %398.sub3, implicit $exec + %398.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %41.sub3, %394.sub3, %398.sub3, implicit $exec + undef %399.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %399.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub0, %391.sub0, %399.sub0, implicit $exec + undef %399.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub1, %391.sub1, %399.sub0, implicit $exec + undef %399.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub2, %391.sub2, %399.sub0, implicit $exec + undef %399.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub3, %391.sub3, %399.sub0, implicit $exec + undef %400.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %400.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub0, %392.sub0, %400.sub0, implicit $exec + undef %400.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub1, %392.sub1, %400.sub0, implicit $exec + undef %400.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub2, %392.sub2, %400.sub0, implicit $exec + undef %400.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub3, %392.sub3, %400.sub0, implicit $exec + undef %401.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %401.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub0, %393.sub0, %401.sub0, implicit $exec + undef %401.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub1, %393.sub1, %401.sub0, implicit $exec + undef %401.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub2, %393.sub2, %401.sub0, implicit $exec + undef %401.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub3, %393.sub3, %401.sub0, implicit $exec + undef %402.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %402.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub0, %394.sub0, %402.sub0, implicit $exec + undef %402.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub1, %394.sub1, %402.sub0, implicit $exec + undef %402.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub2, %394.sub2, %402.sub0, implicit $exec + undef %402.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %42.sub3, %394.sub3, %402.sub0, implicit $exec + %399.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %399.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %43.sub0, %391.sub0, %399.sub1, implicit $exec + %399.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %43.sub1, %391.sub1, %399.sub1, implicit $exec + %399.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %399.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub0, %391.sub0, %399.sub3, implicit $exec + %399.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub1, %391.sub1, %399.sub3, implicit $exec + %399.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub2, %391.sub2, %399.sub3, implicit $exec + %399.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub3, %391.sub3, %399.sub3, implicit $exec + %400.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %400.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub0, %392.sub0, %400.sub3, implicit $exec + %400.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub1, %392.sub1, %400.sub3, implicit $exec + %400.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub2, %392.sub2, %400.sub3, implicit $exec + %400.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub3, %392.sub3, %400.sub3, implicit $exec + %401.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %401.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub0, %393.sub0, %401.sub3, implicit $exec + %401.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub1, %393.sub1, %401.sub3, implicit $exec + %401.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub2, %393.sub2, %401.sub3, implicit $exec + %401.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub3, %393.sub3, %401.sub3, implicit $exec + %402.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %402.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub0, %394.sub0, %402.sub3, implicit $exec + %402.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub1, %394.sub1, %402.sub3, implicit $exec + %402.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub2, %394.sub2, %402.sub3, implicit $exec + %402.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %45.sub3, %394.sub3, %402.sub3, implicit $exec + undef %403.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %403.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub0, %391.sub0, %403.sub0, implicit $exec + undef %403.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub1, %391.sub1, %403.sub0, implicit $exec + undef %403.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub2, %391.sub2, %403.sub0, implicit $exec + undef %403.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub3, %391.sub3, %403.sub0, implicit $exec + undef %404.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %404.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub0, %392.sub0, %404.sub0, implicit $exec + undef %404.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub1, %392.sub1, %404.sub0, implicit $exec + undef %404.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub2, %392.sub2, %404.sub0, implicit $exec + undef %404.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub3, %392.sub3, %404.sub0, implicit $exec + undef %405.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %405.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub0, %393.sub0, %405.sub0, implicit $exec + undef %405.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub1, %393.sub1, %405.sub0, implicit $exec + undef %405.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub2, %393.sub2, %405.sub0, implicit $exec + undef %405.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub3, %393.sub3, %405.sub0, implicit $exec + undef %406.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %406.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub0, %394.sub0, %406.sub0, implicit $exec + undef %406.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub1, %394.sub1, %406.sub0, implicit $exec + undef %406.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub2, %394.sub2, %406.sub0, implicit $exec + undef %406.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %46.sub3, %394.sub3, %406.sub0, implicit $exec + %403.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %403.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub0, %391.sub0, %403.sub1, implicit $exec + %403.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub1, %391.sub1, %403.sub1, implicit $exec + %403.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub2, %391.sub2, %403.sub1, implicit $exec + %403.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub3, %391.sub3, %403.sub1, implicit $exec + %404.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %404.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub0, %392.sub0, %404.sub1, implicit $exec + %404.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub1, %392.sub1, %404.sub1, implicit $exec + %404.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub2, %392.sub2, %404.sub1, implicit $exec + %404.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub3, %392.sub3, %404.sub1, implicit $exec + %405.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %405.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub0, %393.sub0, %405.sub1, implicit $exec + %405.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub1, %393.sub1, %405.sub1, implicit $exec + %405.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub2, %393.sub2, %405.sub1, implicit $exec + %405.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub3, %393.sub3, %405.sub1, implicit $exec + %406.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %406.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub0, %394.sub0, %406.sub1, implicit $exec + %406.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub1, %394.sub1, %406.sub1, implicit $exec + %406.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub2, %394.sub2, %406.sub1, implicit $exec + %406.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %47.sub3, %394.sub3, %406.sub1, implicit $exec + %403.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %403.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub0, %391.sub0, %403.sub2, implicit $exec + %403.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub1, %391.sub1, %403.sub2, implicit $exec + %403.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub2, %391.sub2, %403.sub2, implicit $exec + %403.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub3, %391.sub3, %403.sub2, implicit $exec + %404.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %404.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub0, %392.sub0, %404.sub2, implicit $exec + %404.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub1, %392.sub1, %404.sub2, implicit $exec + %404.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub2, %392.sub2, %404.sub2, implicit $exec + %404.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub3, %392.sub3, %404.sub2, implicit $exec + %405.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %405.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub0, %393.sub0, %405.sub2, implicit $exec + %405.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub1, %393.sub1, %405.sub2, implicit $exec + %405.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub2, %393.sub2, %405.sub2, implicit $exec + %405.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub3, %393.sub3, %405.sub2, implicit $exec + %406.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %406.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub0, %394.sub0, %406.sub2, implicit $exec + %406.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub1, %394.sub1, %406.sub2, implicit $exec + %406.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub2, %394.sub2, %406.sub2, implicit $exec + %406.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %48.sub3, %394.sub3, %406.sub2, implicit $exec + %403.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %403.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub0, %391.sub0, %403.sub3, implicit $exec + %403.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub1, %391.sub1, %403.sub3, implicit $exec + %403.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub2, %391.sub2, %403.sub3, implicit $exec + %403.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub3, %391.sub3, %403.sub3, implicit $exec + %404.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %404.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub0, %392.sub0, %404.sub3, implicit $exec + %404.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub1, %392.sub1, %404.sub3, implicit $exec + %404.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub2, %392.sub2, %404.sub3, implicit $exec + %404.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub3, %392.sub3, %404.sub3, implicit $exec + %405.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %405.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub0, %393.sub0, %405.sub3, implicit $exec + %405.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub1, %393.sub1, %405.sub3, implicit $exec + %405.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub2, %393.sub2, %405.sub3, implicit $exec + %405.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub3, %393.sub3, %405.sub3, implicit $exec + %406.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %406.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub0, %394.sub0, %406.sub3, implicit $exec + %406.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub1, %394.sub1, %406.sub3, implicit $exec + %406.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub2, %394.sub2, %406.sub3, implicit $exec + %406.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %49.sub3, %394.sub3, %406.sub3, implicit $exec + undef %407.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %407.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub0, %391.sub0, %407.sub0, implicit $exec + undef %407.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub1, %391.sub1, %407.sub0, implicit $exec + undef %407.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub2, %391.sub2, %407.sub0, implicit $exec + undef %407.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub3, %391.sub3, %407.sub0, implicit $exec + undef %408.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %408.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub0, %392.sub0, %408.sub0, implicit $exec + undef %408.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub1, %392.sub1, %408.sub0, implicit $exec + undef %408.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub2, %392.sub2, %408.sub0, implicit $exec + undef %408.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub3, %392.sub3, %408.sub0, implicit $exec + undef %409.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %409.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub0, %393.sub0, %409.sub0, implicit $exec + undef %409.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub1, %393.sub1, %409.sub0, implicit $exec + undef %409.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub2, %393.sub2, %409.sub0, implicit $exec + undef %409.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub3, %393.sub3, %409.sub0, implicit $exec + undef %410.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %410.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub0, %394.sub0, %410.sub0, implicit $exec + undef %410.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub1, %394.sub1, %410.sub0, implicit $exec + undef %410.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub2, %394.sub2, %410.sub0, implicit $exec + undef %410.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %50.sub3, %394.sub3, %410.sub0, implicit $exec + %407.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %407.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub0, %391.sub0, %407.sub1, implicit $exec + %407.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub1, %391.sub1, %407.sub1, implicit $exec + %407.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub2, %391.sub2, %407.sub1, implicit $exec + %407.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub3, %391.sub3, %407.sub1, implicit $exec + %408.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %408.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub0, %392.sub0, %408.sub1, implicit $exec + %408.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub1, %392.sub1, %408.sub1, implicit $exec + %408.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub2, %392.sub2, %408.sub1, implicit $exec + %408.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub3, %392.sub3, %408.sub1, implicit $exec + %409.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %409.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub0, %393.sub0, %409.sub1, implicit $exec + %409.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub1, %393.sub1, %409.sub1, implicit $exec + %409.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub2, %393.sub2, %409.sub1, implicit $exec + %409.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub3, %393.sub3, %409.sub1, implicit $exec + %410.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %410.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub0, %394.sub0, %410.sub1, implicit $exec + %410.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub1, %394.sub1, %410.sub1, implicit $exec + %410.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub2, %394.sub2, %410.sub1, implicit $exec + %410.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %51.sub3, %394.sub3, %410.sub1, implicit $exec + %407.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %407.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub0, %391.sub0, %407.sub2, implicit $exec + %407.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub1, %391.sub1, %407.sub2, implicit $exec + %407.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub2, %391.sub2, %407.sub2, implicit $exec + %407.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub3, %391.sub3, %407.sub2, implicit $exec + %408.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %408.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub0, %392.sub0, %408.sub2, implicit $exec + %408.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub1, %392.sub1, %408.sub2, implicit $exec + %408.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub2, %392.sub2, %408.sub2, implicit $exec + %408.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub3, %392.sub3, %408.sub2, implicit $exec + %409.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %409.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub0, %393.sub0, %409.sub2, implicit $exec + %409.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub1, %393.sub1, %409.sub2, implicit $exec + %409.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub2, %393.sub2, %409.sub2, implicit $exec + %409.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub3, %393.sub3, %409.sub2, implicit $exec + %410.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %410.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub0, %394.sub0, %410.sub2, implicit $exec + %410.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub1, %394.sub1, %410.sub2, implicit $exec + %410.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub2, %394.sub2, %410.sub2, implicit $exec + %410.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %52.sub3, %394.sub3, %410.sub2, implicit $exec + %407.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %407.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub0, %391.sub0, %407.sub3, implicit $exec + %407.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub1, %391.sub1, %407.sub3, implicit $exec + %407.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub2, %391.sub2, %407.sub3, implicit $exec + %407.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub3, %391.sub3, %407.sub3, implicit $exec + %408.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %408.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub0, %392.sub0, %408.sub3, implicit $exec + %408.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub1, %392.sub1, %408.sub3, implicit $exec + %408.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub2, %392.sub2, %408.sub3, implicit $exec + %408.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub3, %392.sub3, %408.sub3, implicit $exec + %409.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %409.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub0, %393.sub0, %409.sub3, implicit $exec + %409.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub1, %393.sub1, %409.sub3, implicit $exec + %409.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub2, %393.sub2, %409.sub3, implicit $exec + %409.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub3, %393.sub3, %409.sub3, implicit $exec + %410.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %410.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub0, %394.sub0, %410.sub3, implicit $exec + %410.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub1, %394.sub1, %410.sub3, implicit $exec + %410.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub2, %394.sub2, %410.sub3, implicit $exec + %410.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %53.sub3, %394.sub3, %410.sub3, implicit $exec + undef %411.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %411.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub0, %391.sub0, %411.sub0, implicit $exec + undef %411.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub1, %391.sub1, %411.sub0, implicit $exec + undef %411.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub2, %391.sub2, %411.sub0, implicit $exec + undef %411.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub3, %391.sub3, %411.sub0, implicit $exec + undef %412.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %412.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub0, %392.sub0, %412.sub0, implicit $exec + undef %412.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub1, %392.sub1, %412.sub0, implicit $exec + undef %412.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub2, %392.sub2, %412.sub0, implicit $exec + undef %412.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub3, %392.sub3, %412.sub0, implicit $exec + undef %413.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %413.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub0, %393.sub0, %413.sub0, implicit $exec + undef %413.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub1, %393.sub1, %413.sub0, implicit $exec + undef %413.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub2, %393.sub2, %413.sub0, implicit $exec + undef %413.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub3, %393.sub3, %413.sub0, implicit $exec + undef %414.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %414.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub0, %394.sub0, %414.sub0, implicit $exec + undef %414.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub1, %394.sub1, %414.sub0, implicit $exec + undef %414.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub2, %394.sub2, %414.sub0, implicit $exec + undef %414.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %54.sub3, %394.sub3, %414.sub0, implicit $exec + %411.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %411.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub0, %391.sub0, %411.sub1, implicit $exec + %411.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub1, %391.sub1, %411.sub1, implicit $exec + %411.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub2, %391.sub2, %411.sub1, implicit $exec + %411.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub3, %391.sub3, %411.sub1, implicit $exec + %412.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %412.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub0, %392.sub0, %412.sub1, implicit $exec + %412.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub1, %392.sub1, %412.sub1, implicit $exec + %412.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub2, %392.sub2, %412.sub1, implicit $exec + %412.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub3, %392.sub3, %412.sub1, implicit $exec + %413.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %413.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub0, %393.sub0, %413.sub1, implicit $exec + %413.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub1, %393.sub1, %413.sub1, implicit $exec + %413.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub2, %393.sub2, %413.sub1, implicit $exec + %413.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub3, %393.sub3, %413.sub1, implicit $exec + %414.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %414.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub0, %394.sub0, %414.sub1, implicit $exec + %414.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub1, %394.sub1, %414.sub1, implicit $exec + %414.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub2, %394.sub2, %414.sub1, implicit $exec + %414.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %55.sub3, %394.sub3, %414.sub1, implicit $exec + %411.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %411.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub0, %391.sub0, %411.sub2, implicit $exec + %411.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub1, %391.sub1, %411.sub2, implicit $exec + %411.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub2, %391.sub2, %411.sub2, implicit $exec + %411.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub3, %391.sub3, %411.sub2, implicit $exec + %412.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %412.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub0, %392.sub0, %412.sub2, implicit $exec + %412.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub1, %392.sub1, %412.sub2, implicit $exec + %412.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub2, %392.sub2, %412.sub2, implicit $exec + %412.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub3, %392.sub3, %412.sub2, implicit $exec + %413.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %413.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub0, %393.sub0, %413.sub2, implicit $exec + %413.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub1, %393.sub1, %413.sub2, implicit $exec + %413.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub2, %393.sub2, %413.sub2, implicit $exec + %413.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub3, %393.sub3, %413.sub2, implicit $exec + %414.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %414.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub0, %394.sub0, %414.sub2, implicit $exec + %414.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub1, %394.sub1, %414.sub2, implicit $exec + %414.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub2, %394.sub2, %414.sub2, implicit $exec + %414.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %56.sub3, %394.sub3, %414.sub2, implicit $exec + %411.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %411.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub0, %391.sub0, %411.sub3, implicit $exec + %411.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub1, %391.sub1, %411.sub3, implicit $exec + %411.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub2, %391.sub2, %411.sub3, implicit $exec + %411.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub3, %391.sub3, %411.sub3, implicit $exec + %412.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %412.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub0, %392.sub0, %412.sub3, implicit $exec + %412.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub1, %392.sub1, %412.sub3, implicit $exec + %412.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub2, %392.sub2, %412.sub3, implicit $exec + %412.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub3, %392.sub3, %412.sub3, implicit $exec + %413.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %413.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub0, %393.sub0, %413.sub3, implicit $exec + %413.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub1, %393.sub1, %413.sub3, implicit $exec + %413.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub2, %393.sub2, %413.sub3, implicit $exec + %413.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub3, %393.sub3, %413.sub3, implicit $exec + %414.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %414.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub0, %394.sub0, %414.sub3, implicit $exec + %414.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub1, %394.sub1, %414.sub3, implicit $exec + %414.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub2, %394.sub2, %414.sub3, implicit $exec + %414.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %57.sub3, %394.sub3, %414.sub3, implicit $exec + undef %415.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %415.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub0, %391.sub0, %415.sub0, implicit $exec + undef %415.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub1, %391.sub1, %415.sub0, implicit $exec + undef %415.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub2, %391.sub2, %415.sub0, implicit $exec + undef %415.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub3, %391.sub3, %415.sub0, implicit $exec + undef %416.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %416.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub0, %392.sub0, %416.sub0, implicit $exec + undef %416.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub1, %392.sub1, %416.sub0, implicit $exec + undef %416.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub2, %392.sub2, %416.sub0, implicit $exec + undef %416.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub3, %392.sub3, %416.sub0, implicit $exec + undef %417.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %417.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub0, %393.sub0, %417.sub0, implicit $exec + undef %417.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub1, %393.sub1, %417.sub0, implicit $exec + undef %417.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub2, %393.sub2, %417.sub0, implicit $exec + undef %417.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub3, %393.sub3, %417.sub0, implicit $exec + undef %418.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %418.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub0, %394.sub0, %418.sub0, implicit $exec + undef %418.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub1, %394.sub1, %418.sub0, implicit $exec + undef %418.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub2, %394.sub2, %418.sub0, implicit $exec + undef %418.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %58.sub3, %394.sub3, %418.sub0, implicit $exec + %415.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %415.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub0, %391.sub0, %415.sub1, implicit $exec + %415.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub1, %391.sub1, %415.sub1, implicit $exec + %415.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub2, %391.sub2, %415.sub1, implicit $exec + %415.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub3, %391.sub3, %415.sub1, implicit $exec + %416.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %416.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub0, %392.sub0, %416.sub1, implicit $exec + %416.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub1, %392.sub1, %416.sub1, implicit $exec + %416.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub2, %392.sub2, %416.sub1, implicit $exec + %416.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub3, %392.sub3, %416.sub1, implicit $exec + %417.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %417.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub0, %393.sub0, %417.sub1, implicit $exec + %417.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub1, %393.sub1, %417.sub1, implicit $exec + %417.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub2, %393.sub2, %417.sub1, implicit $exec + %417.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub3, %393.sub3, %417.sub1, implicit $exec + %418.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %418.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub0, %394.sub0, %418.sub1, implicit $exec + %418.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub1, %394.sub1, %418.sub1, implicit $exec + %418.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub2, %394.sub2, %418.sub1, implicit $exec + %418.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %59.sub3, %394.sub3, %418.sub1, implicit $exec + %415.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %415.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub0, %391.sub0, %415.sub2, implicit $exec + %415.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub1, %391.sub1, %415.sub2, implicit $exec + %415.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub2, %391.sub2, %415.sub2, implicit $exec + %415.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub3, %391.sub3, %415.sub2, implicit $exec + %416.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %416.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub0, %392.sub0, %416.sub2, implicit $exec + %416.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub1, %392.sub1, %416.sub2, implicit $exec + %416.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub2, %392.sub2, %416.sub2, implicit $exec + %416.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub3, %392.sub3, %416.sub2, implicit $exec + %417.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %417.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub0, %393.sub0, %417.sub2, implicit $exec + %417.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub1, %393.sub1, %417.sub2, implicit $exec + %417.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub2, %393.sub2, %417.sub2, implicit $exec + %417.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub3, %393.sub3, %417.sub2, implicit $exec + %418.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %418.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub0, %394.sub0, %418.sub2, implicit $exec + %418.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub1, %394.sub1, %418.sub2, implicit $exec + %418.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub2, %394.sub2, %418.sub2, implicit $exec + %418.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %60.sub3, %394.sub3, %418.sub2, implicit $exec + %415.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %415.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub0, %391.sub0, %415.sub3, implicit $exec + %415.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub1, %391.sub1, %415.sub3, implicit $exec + %415.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub2, %391.sub2, %415.sub3, implicit $exec + %415.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub3, %391.sub3, %415.sub3, implicit $exec + %416.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %416.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub0, %392.sub0, %416.sub3, implicit $exec + %416.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub1, %392.sub1, %416.sub3, implicit $exec + %416.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub2, %392.sub2, %416.sub3, implicit $exec + %416.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub3, %392.sub3, %416.sub3, implicit $exec + %417.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %417.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub0, %393.sub0, %417.sub3, implicit $exec + %417.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub1, %393.sub1, %417.sub3, implicit $exec + %417.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub2, %393.sub2, %417.sub3, implicit $exec + %417.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub3, %393.sub3, %417.sub3, implicit $exec + %418.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %418.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub0, %394.sub0, %418.sub3, implicit $exec + %418.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub1, %394.sub1, %418.sub3, implicit $exec + %418.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub2, %394.sub2, %418.sub3, implicit $exec + %418.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %61.sub3, %394.sub3, %418.sub3, implicit $exec + undef %419.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %419.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub0, %391.sub0, %419.sub0, implicit $exec + undef %419.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub1, %391.sub1, %419.sub0, implicit $exec + undef %419.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub2, %391.sub2, %419.sub0, implicit $exec + undef %419.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub3, %391.sub3, %419.sub0, implicit $exec + undef %420.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %420.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub0, %392.sub0, %420.sub0, implicit $exec + undef %420.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub1, %392.sub1, %420.sub0, implicit $exec + undef %420.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub2, %392.sub2, %420.sub0, implicit $exec + undef %420.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub3, %392.sub3, %420.sub0, implicit $exec + undef %421.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %421.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub0, %393.sub0, %421.sub0, implicit $exec + undef %421.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub1, %393.sub1, %421.sub0, implicit $exec + undef %421.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub2, %393.sub2, %421.sub0, implicit $exec + undef %421.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub3, %393.sub3, %421.sub0, implicit $exec + undef %422.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %422.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub0, %394.sub0, %422.sub0, implicit $exec + undef %422.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub1, %394.sub1, %422.sub0, implicit $exec + undef %422.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub2, %394.sub2, %422.sub0, implicit $exec + undef %422.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %62.sub3, %394.sub3, %422.sub0, implicit $exec + %419.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %419.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub0, %391.sub0, %419.sub1, implicit $exec + %419.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub1, %391.sub1, %419.sub1, implicit $exec + %419.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub2, %391.sub2, %419.sub1, implicit $exec + %419.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub3, %391.sub3, %419.sub1, implicit $exec + %420.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %420.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub0, %392.sub0, %420.sub1, implicit $exec + %420.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub1, %392.sub1, %420.sub1, implicit $exec + %420.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub2, %392.sub2, %420.sub1, implicit $exec + %420.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %63.sub3, %392.sub3, %420.sub1, implicit $exec + undef %423.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %423.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub0, %391.sub0, %423.sub0, implicit $exec + undef %423.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub1, %391.sub1, %423.sub0, implicit $exec + undef %423.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub2, %391.sub2, %423.sub0, implicit $exec + undef %423.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub3, %391.sub3, %423.sub0, implicit $exec + undef %424.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %424.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub0, %392.sub0, %424.sub0, implicit $exec + undef %424.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub1, %392.sub1, %424.sub0, implicit $exec + undef %424.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub2, %392.sub2, %424.sub0, implicit $exec + undef %424.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub3, %392.sub3, %424.sub0, implicit $exec + undef %425.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %425.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub0, %393.sub0, %425.sub0, implicit $exec + undef %425.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub1, %393.sub1, %425.sub0, implicit $exec + undef %425.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub2, %393.sub2, %425.sub0, implicit $exec + undef %425.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub3, %393.sub3, %425.sub0, implicit $exec + undef %426.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %426.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub0, %394.sub0, %426.sub0, implicit $exec + undef %426.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub1, %394.sub1, %426.sub0, implicit $exec + undef %426.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub2, %394.sub2, %426.sub0, implicit $exec + undef %426.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %82.sub3, %394.sub3, %426.sub0, implicit $exec + %423.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %423.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub0, %391.sub0, %423.sub1, implicit $exec + %423.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub1, %391.sub1, %423.sub1, implicit $exec + %423.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub2, %391.sub2, %423.sub1, implicit $exec + %423.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub3, %391.sub3, %423.sub1, implicit $exec + %424.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %424.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub0, %392.sub0, %424.sub1, implicit $exec + %424.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub1, %392.sub1, %424.sub1, implicit $exec + %424.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub2, %392.sub2, %424.sub1, implicit $exec + %424.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub3, %392.sub3, %424.sub1, implicit $exec + %425.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %425.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub0, %393.sub0, %425.sub1, implicit $exec + %425.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub1, %393.sub1, %425.sub1, implicit $exec + %425.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub2, %393.sub2, %425.sub1, implicit $exec + %425.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub3, %393.sub3, %425.sub1, implicit $exec + %426.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %426.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub0, %394.sub0, %426.sub1, implicit $exec + %426.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub1, %394.sub1, %426.sub1, implicit $exec + %426.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub2, %394.sub2, %426.sub1, implicit $exec + %426.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %83.sub3, %394.sub3, %426.sub1, implicit $exec + %423.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %423.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub0, %391.sub0, %423.sub2, implicit $exec + %423.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub1, %391.sub1, %423.sub2, implicit $exec + %423.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub2, %391.sub2, %423.sub2, implicit $exec + %423.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub3, %391.sub3, %423.sub2, implicit $exec + %424.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %424.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub0, %392.sub0, %424.sub2, implicit $exec + %424.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub1, %392.sub1, %424.sub2, implicit $exec + %424.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub2, %392.sub2, %424.sub2, implicit $exec + %424.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub3, %392.sub3, %424.sub2, implicit $exec + %425.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %425.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub0, %393.sub0, %425.sub2, implicit $exec + %425.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub1, %393.sub1, %425.sub2, implicit $exec + %425.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub2, %393.sub2, %425.sub2, implicit $exec + %425.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub3, %393.sub3, %425.sub2, implicit $exec + %426.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %426.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub0, %394.sub0, %426.sub2, implicit $exec + %426.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub1, %394.sub1, %426.sub2, implicit $exec + %426.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub2, %394.sub2, %426.sub2, implicit $exec + %426.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %84.sub3, %394.sub3, %426.sub2, implicit $exec + %423.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %423.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub0, %391.sub0, %423.sub3, implicit $exec + %423.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub1, %391.sub1, %423.sub3, implicit $exec + %423.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub2, %391.sub2, %423.sub3, implicit $exec + %423.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub3, %391.sub3, %423.sub3, implicit $exec + %424.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %424.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub0, %392.sub0, %424.sub3, implicit $exec + %424.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub1, %392.sub1, %424.sub3, implicit $exec + %424.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub2, %392.sub2, %424.sub3, implicit $exec + %424.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub3, %392.sub3, %424.sub3, implicit $exec + %425.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %425.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub0, %393.sub0, %425.sub3, implicit $exec + %425.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub1, %393.sub1, %425.sub3, implicit $exec + %425.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub2, %393.sub2, %425.sub3, implicit $exec + %425.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub3, %393.sub3, %425.sub3, implicit $exec + %426.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %426.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub0, %394.sub0, %426.sub3, implicit $exec + %426.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub1, %394.sub1, %426.sub3, implicit $exec + %426.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub2, %394.sub2, %426.sub3, implicit $exec + %426.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %85.sub3, %394.sub3, %426.sub3, implicit $exec + undef %427.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %427.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub0, %391.sub0, %427.sub0, implicit $exec + undef %427.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub1, %391.sub1, %427.sub0, implicit $exec + undef %427.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub2, %391.sub2, %427.sub0, implicit $exec + undef %427.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub3, %391.sub3, %427.sub0, implicit $exec + undef %428.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %428.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub0, %392.sub0, %428.sub0, implicit $exec + undef %428.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub1, %392.sub1, %428.sub0, implicit $exec + undef %428.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub2, %392.sub2, %428.sub0, implicit $exec + undef %428.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub3, %392.sub3, %428.sub0, implicit $exec + undef %429.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %429.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub0, %393.sub0, %429.sub0, implicit $exec + undef %429.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub1, %393.sub1, %429.sub0, implicit $exec + undef %429.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub2, %393.sub2, %429.sub0, implicit $exec + undef %429.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub3, %393.sub3, %429.sub0, implicit $exec + undef %430.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %430.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub0, %394.sub0, %430.sub0, implicit $exec + undef %430.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub1, %394.sub1, %430.sub0, implicit $exec + undef %430.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub2, %394.sub2, %430.sub0, implicit $exec + undef %430.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %86.sub3, %394.sub3, %430.sub0, implicit $exec + %427.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %427.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub0, %391.sub0, %427.sub1, implicit $exec + %427.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub1, %391.sub1, %427.sub1, implicit $exec + %427.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub2, %391.sub2, %427.sub1, implicit $exec + %427.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub3, %391.sub3, %427.sub1, implicit $exec + %428.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %428.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub0, %392.sub0, %428.sub1, implicit $exec + %428.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub1, %392.sub1, %428.sub1, implicit $exec + %428.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub2, %392.sub2, %428.sub1, implicit $exec + %428.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub3, %392.sub3, %428.sub1, implicit $exec + %429.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %429.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub0, %393.sub0, %429.sub1, implicit $exec + %429.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub1, %393.sub1, %429.sub1, implicit $exec + %429.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub2, %393.sub2, %429.sub1, implicit $exec + %429.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub3, %393.sub3, %429.sub1, implicit $exec + %430.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %430.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub0, %394.sub0, %430.sub1, implicit $exec + %430.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub1, %394.sub1, %430.sub1, implicit $exec + %430.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub2, %394.sub2, %430.sub1, implicit $exec + %430.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %87.sub3, %394.sub3, %430.sub1, implicit $exec + %427.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %427.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub0, %391.sub0, %427.sub2, implicit $exec + %427.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub1, %391.sub1, %427.sub2, implicit $exec + %427.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub2, %391.sub2, %427.sub2, implicit $exec + %427.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub3, %391.sub3, %427.sub2, implicit $exec + %428.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %428.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub0, %392.sub0, %428.sub2, implicit $exec + %428.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub1, %392.sub1, %428.sub2, implicit $exec + %428.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub2, %392.sub2, %428.sub2, implicit $exec + %428.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub3, %392.sub3, %428.sub2, implicit $exec + %429.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %429.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub0, %393.sub0, %429.sub2, implicit $exec + %429.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub1, %393.sub1, %429.sub2, implicit $exec + %429.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub2, %393.sub2, %429.sub2, implicit $exec + %429.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub3, %393.sub3, %429.sub2, implicit $exec + %430.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %430.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub0, %394.sub0, %430.sub2, implicit $exec + %430.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub1, %394.sub1, %430.sub2, implicit $exec + %430.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub2, %394.sub2, %430.sub2, implicit $exec + %430.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %88.sub3, %394.sub3, %430.sub2, implicit $exec + %427.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %427.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub0, %391.sub0, %427.sub3, implicit $exec + %427.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub1, %391.sub1, %427.sub3, implicit $exec + %427.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub2, %391.sub2, %427.sub3, implicit $exec + %427.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub3, %391.sub3, %427.sub3, implicit $exec + %428.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %428.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub0, %392.sub0, %428.sub3, implicit $exec + %428.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub1, %392.sub1, %428.sub3, implicit $exec + %428.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub2, %392.sub2, %428.sub3, implicit $exec + %428.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub3, %392.sub3, %428.sub3, implicit $exec + %429.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %429.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub0, %393.sub0, %429.sub3, implicit $exec + %429.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub1, %393.sub1, %429.sub3, implicit $exec + %429.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub2, %393.sub2, %429.sub3, implicit $exec + %429.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub3, %393.sub3, %429.sub3, implicit $exec + %430.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %430.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub0, %394.sub0, %430.sub3, implicit $exec + %430.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub1, %394.sub1, %430.sub3, implicit $exec + %430.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub2, %394.sub2, %430.sub3, implicit $exec + %430.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %89.sub3, %394.sub3, %430.sub3, implicit $exec + undef %431.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %431.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub0, %391.sub0, %431.sub0, implicit $exec + undef %431.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub1, %391.sub1, %431.sub0, implicit $exec + undef %431.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub2, %391.sub2, %431.sub0, implicit $exec + undef %431.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub3, %391.sub3, %431.sub0, implicit $exec + undef %432.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %432.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub0, %392.sub0, %432.sub0, implicit $exec + undef %432.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub1, %392.sub1, %432.sub0, implicit $exec + undef %432.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub2, %392.sub2, %432.sub0, implicit $exec + undef %432.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub3, %392.sub3, %432.sub0, implicit $exec + undef %433.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %433.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub0, %393.sub0, %433.sub0, implicit $exec + undef %433.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub1, %393.sub1, %433.sub0, implicit $exec + undef %433.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub2, %393.sub2, %433.sub0, implicit $exec + undef %433.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub3, %393.sub3, %433.sub0, implicit $exec + undef %434.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %434.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub0, %394.sub0, %434.sub0, implicit $exec + undef %434.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub1, %394.sub1, %434.sub0, implicit $exec + undef %434.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub2, %394.sub2, %434.sub0, implicit $exec + undef %434.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %90.sub3, %394.sub3, %434.sub0, implicit $exec + %431.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %431.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub0, %391.sub0, %431.sub1, implicit $exec + %431.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub1, %391.sub1, %431.sub1, implicit $exec + %431.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub2, %391.sub2, %431.sub1, implicit $exec + %431.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub3, %391.sub3, %431.sub1, implicit $exec + %432.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %432.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub0, %392.sub0, %432.sub1, implicit $exec + %432.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub1, %392.sub1, %432.sub1, implicit $exec + %432.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub2, %392.sub2, %432.sub1, implicit $exec + %432.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub3, %392.sub3, %432.sub1, implicit $exec + %433.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %433.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub0, %393.sub0, %433.sub1, implicit $exec + %433.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub1, %393.sub1, %433.sub1, implicit $exec + %433.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub2, %393.sub2, %433.sub1, implicit $exec + %433.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub3, %393.sub3, %433.sub1, implicit $exec + %434.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %434.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub0, %394.sub0, %434.sub1, implicit $exec + %434.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub1, %394.sub1, %434.sub1, implicit $exec + %434.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub2, %394.sub2, %434.sub1, implicit $exec + %434.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %91.sub3, %394.sub3, %434.sub1, implicit $exec + %431.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %431.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub0, %391.sub0, %431.sub2, implicit $exec + %431.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub1, %391.sub1, %431.sub2, implicit $exec + %431.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub2, %391.sub2, %431.sub2, implicit $exec + %431.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub3, %391.sub3, %431.sub2, implicit $exec + %432.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %432.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub0, %392.sub0, %432.sub2, implicit $exec + %432.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub1, %392.sub1, %432.sub2, implicit $exec + %432.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub2, %392.sub2, %432.sub2, implicit $exec + %432.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub3, %392.sub3, %432.sub2, implicit $exec + %433.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %433.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub0, %393.sub0, %433.sub2, implicit $exec + %433.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub1, %393.sub1, %433.sub2, implicit $exec + %433.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub2, %393.sub2, %433.sub2, implicit $exec + %433.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub3, %393.sub3, %433.sub2, implicit $exec + %434.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %434.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub0, %394.sub0, %434.sub2, implicit $exec + %434.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub1, %394.sub1, %434.sub2, implicit $exec + %434.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub2, %394.sub2, %434.sub2, implicit $exec + %434.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %92.sub3, %394.sub3, %434.sub2, implicit $exec + %431.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %431.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub0, %391.sub0, %431.sub3, implicit $exec + %431.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub1, %391.sub1, %431.sub3, implicit $exec + %431.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub2, %391.sub2, %431.sub3, implicit $exec + %431.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub3, %391.sub3, %431.sub3, implicit $exec + %432.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %432.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub0, %392.sub0, %432.sub3, implicit $exec + %432.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub1, %392.sub1, %432.sub3, implicit $exec + %432.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub2, %392.sub2, %432.sub3, implicit $exec + %432.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub3, %392.sub3, %432.sub3, implicit $exec + %433.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %433.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub0, %393.sub0, %433.sub3, implicit $exec + %433.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub1, %393.sub1, %433.sub3, implicit $exec + %433.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub2, %393.sub2, %433.sub3, implicit $exec + %433.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub3, %393.sub3, %433.sub3, implicit $exec + %434.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %434.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub0, %394.sub0, %434.sub3, implicit $exec + %434.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub1, %394.sub1, %434.sub3, implicit $exec + %434.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub2, %394.sub2, %434.sub3, implicit $exec + %434.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %93.sub3, %394.sub3, %434.sub3, implicit $exec + undef %435.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %435.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub0, %391.sub0, %435.sub0, implicit $exec + undef %435.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub1, %391.sub1, %435.sub0, implicit $exec + undef %435.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub2, %391.sub2, %435.sub0, implicit $exec + undef %435.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub3, %391.sub3, %435.sub0, implicit $exec + undef %436.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %436.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub0, %392.sub0, %436.sub0, implicit $exec + undef %436.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub1, %392.sub1, %436.sub0, implicit $exec + undef %436.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub2, %392.sub2, %436.sub0, implicit $exec + undef %436.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub3, %392.sub3, %436.sub0, implicit $exec + undef %437.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %437.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub0, %393.sub0, %437.sub0, implicit $exec + undef %437.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub1, %393.sub1, %437.sub0, implicit $exec + undef %437.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub2, %393.sub2, %437.sub0, implicit $exec + undef %437.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub3, %393.sub3, %437.sub0, implicit $exec + undef %438.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %438.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub0, %394.sub0, %438.sub0, implicit $exec + undef %438.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub1, %394.sub1, %438.sub0, implicit $exec + undef %438.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub2, %394.sub2, %438.sub0, implicit $exec + undef %438.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %94.sub3, %394.sub3, %438.sub0, implicit $exec + %435.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %435.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub0, %391.sub0, %435.sub1, implicit $exec + %435.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub1, %391.sub1, %435.sub1, implicit $exec + %435.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub2, %391.sub2, %435.sub1, implicit $exec + %435.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub3, %391.sub3, %435.sub1, implicit $exec + %436.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %436.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub0, %392.sub0, %436.sub1, implicit $exec + %436.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub1, %392.sub1, %436.sub1, implicit $exec + %436.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub2, %392.sub2, %436.sub1, implicit $exec + %436.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub3, %392.sub3, %436.sub1, implicit $exec + %437.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %437.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub0, %393.sub0, %437.sub1, implicit $exec + %437.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub1, %393.sub1, %437.sub1, implicit $exec + %437.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub2, %393.sub2, %437.sub1, implicit $exec + %437.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub3, %393.sub3, %437.sub1, implicit $exec + %438.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %438.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub0, %394.sub0, %438.sub1, implicit $exec + %438.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub1, %394.sub1, %438.sub1, implicit $exec + %438.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub2, %394.sub2, %438.sub1, implicit $exec + %438.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %95.sub3, %394.sub3, %438.sub1, implicit $exec + %435.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %435.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub0, %391.sub0, %435.sub2, implicit $exec + %435.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub1, %391.sub1, %435.sub2, implicit $exec + %435.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub2, %391.sub2, %435.sub2, implicit $exec + %435.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub3, %391.sub3, %435.sub2, implicit $exec + %436.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %436.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub0, %392.sub0, %436.sub2, implicit $exec + %436.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub1, %392.sub1, %436.sub2, implicit $exec + %436.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub2, %392.sub2, %436.sub2, implicit $exec + %436.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub3, %392.sub3, %436.sub2, implicit $exec + %437.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %437.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub0, %393.sub0, %437.sub2, implicit $exec + %437.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub1, %393.sub1, %437.sub2, implicit $exec + %437.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub2, %393.sub2, %437.sub2, implicit $exec + %437.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub3, %393.sub3, %437.sub2, implicit $exec + %438.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %438.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub0, %394.sub0, %438.sub2, implicit $exec + %438.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub1, %394.sub1, %438.sub2, implicit $exec + %438.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub2, %394.sub2, %438.sub2, implicit $exec + %438.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %96.sub3, %394.sub3, %438.sub2, implicit $exec + %435.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %435.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub0, %391.sub0, %435.sub3, implicit $exec + %435.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub1, %391.sub1, %435.sub3, implicit $exec + %435.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub2, %391.sub2, %435.sub3, implicit $exec + %435.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub3, %391.sub3, %435.sub3, implicit $exec + %436.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %436.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub0, %392.sub0, %436.sub3, implicit $exec + %436.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub1, %392.sub1, %436.sub3, implicit $exec + %436.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub2, %392.sub2, %436.sub3, implicit $exec + %436.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub3, %392.sub3, %436.sub3, implicit $exec + %437.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %437.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub0, %393.sub0, %437.sub3, implicit $exec + %437.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub1, %393.sub1, %437.sub3, implicit $exec + %437.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub2, %393.sub2, %437.sub3, implicit $exec + %437.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub3, %393.sub3, %437.sub3, implicit $exec + %438.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %438.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub0, %394.sub0, %438.sub3, implicit $exec + %438.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub1, %394.sub1, %438.sub3, implicit $exec + %438.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub2, %394.sub2, %438.sub3, implicit $exec + %438.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %97.sub3, %394.sub3, %438.sub3, implicit $exec + undef %439.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %439.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub0, %391.sub0, %439.sub0, implicit $exec + undef %439.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub1, %391.sub1, %439.sub0, implicit $exec + undef %439.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub2, %391.sub2, %439.sub0, implicit $exec + undef %439.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub3, %391.sub3, %439.sub0, implicit $exec + undef %440.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %440.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub0, %392.sub0, %440.sub0, implicit $exec + undef %440.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub1, %392.sub1, %440.sub0, implicit $exec + undef %440.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub2, %392.sub2, %440.sub0, implicit $exec + undef %440.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub3, %392.sub3, %440.sub0, implicit $exec + undef %441.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %441.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub0, %393.sub0, %441.sub0, implicit $exec + undef %441.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub1, %393.sub1, %441.sub0, implicit $exec + undef %441.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub2, %393.sub2, %441.sub0, implicit $exec + undef %441.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub3, %393.sub3, %441.sub0, implicit $exec + undef %442.sub0:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + undef %442.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub0, %394.sub0, %442.sub0, implicit $exec + undef %442.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub1, %394.sub1, %442.sub0, implicit $exec + undef %442.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub2, %394.sub2, %442.sub0, implicit $exec + undef %442.sub0:vreg_128_align2 = V_DOT4C_I32_I8_e32 %98.sub3, %394.sub3, %442.sub0, implicit $exec + %439.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %439.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub0, %391.sub0, %439.sub1, implicit $exec + %439.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub1, %391.sub1, %439.sub1, implicit $exec + %439.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub2, %391.sub2, %439.sub1, implicit $exec + %439.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub3, %391.sub3, %439.sub1, implicit $exec + %440.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %440.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub0, %392.sub0, %440.sub1, implicit $exec + %440.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub1, %392.sub1, %440.sub1, implicit $exec + %440.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub2, %392.sub2, %440.sub1, implicit $exec + %440.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub3, %392.sub3, %440.sub1, implicit $exec + %441.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %441.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub0, %393.sub0, %441.sub1, implicit $exec + %441.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub1, %393.sub1, %441.sub1, implicit $exec + %441.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub2, %393.sub2, %441.sub1, implicit $exec + %441.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub3, %393.sub3, %441.sub1, implicit $exec + %442.sub1:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %442.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub0, %394.sub0, %442.sub1, implicit $exec + %442.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub1, %394.sub1, %442.sub1, implicit $exec + %442.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub2, %394.sub2, %442.sub1, implicit $exec + %442.sub1:vreg_128_align2 = V_DOT4C_I32_I8_e32 %99.sub3, %394.sub3, %442.sub1, implicit $exec + %439.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %439.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub0, %391.sub0, %439.sub2, implicit $exec + %439.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub1, %391.sub1, %439.sub2, implicit $exec + %439.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub2, %391.sub2, %439.sub2, implicit $exec + %439.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub3, %391.sub3, %439.sub2, implicit $exec + %440.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %440.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub0, %392.sub0, %440.sub2, implicit $exec + %440.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub1, %392.sub1, %440.sub2, implicit $exec + %440.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub2, %392.sub2, %440.sub2, implicit $exec + %440.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub3, %392.sub3, %440.sub2, implicit $exec + %441.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %441.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub0, %393.sub0, %441.sub2, implicit $exec + %441.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub1, %393.sub1, %441.sub2, implicit $exec + %441.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub2, %393.sub2, %441.sub2, implicit $exec + %441.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub3, %393.sub3, %441.sub2, implicit $exec + %442.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %442.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub0, %394.sub0, %442.sub2, implicit $exec + %442.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub1, %394.sub1, %442.sub2, implicit $exec + %442.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub2, %394.sub2, %442.sub2, implicit $exec + %442.sub2:vreg_128_align2 = V_DOT4C_I32_I8_e32 %100.sub3, %394.sub3, %442.sub2, implicit $exec + %439.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %439.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub0, %391.sub0, %439.sub3, implicit $exec + %439.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub1, %391.sub1, %439.sub3, implicit $exec + %439.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub2, %391.sub2, %439.sub3, implicit $exec + %439.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub3, %391.sub3, %439.sub3, implicit $exec + %440.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %440.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub0, %392.sub0, %440.sub3, implicit $exec + %440.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub1, %392.sub1, %440.sub3, implicit $exec + %440.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub2, %392.sub2, %440.sub3, implicit $exec + %440.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub3, %392.sub3, %440.sub3, implicit $exec + %441.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %441.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub0, %393.sub0, %441.sub3, implicit $exec + %441.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub1, %393.sub1, %441.sub3, implicit $exec + %441.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub2, %393.sub2, %441.sub3, implicit $exec + %441.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub3, %393.sub3, %441.sub3, implicit $exec + %442.sub3:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %442.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub0, %394.sub0, %442.sub3, implicit $exec + %442.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub1, %394.sub1, %442.sub3, implicit $exec + %442.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub2, %394.sub2, %442.sub3, implicit $exec + %442.sub3:vreg_128_align2 = V_DOT4C_I32_I8_e32 %101.sub3, %394.sub3, %442.sub3, implicit $exec + DS_WRITE_B128_gfx9 %140, %395, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %396, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %397, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %398, 2048, 0, implicit $exec + WAVE_BARRIER + %443:vreg_128_align2 = DS_READ_B128_gfx9 %183, 0, 0, implicit $exec + %444:vreg_128_align2 = DS_READ_B128_gfx9 %183, 256, 0, implicit $exec + %445:vreg_128_align2 = DS_READ_B128_gfx9 %183, 128, 0, implicit $exec + %446:vreg_128_align2 = DS_READ_B128_gfx9 %183, 384, 0, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %399, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %400, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %401, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %402, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %403, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %404, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %405, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %406, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %407, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %408, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %409, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %410, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %411, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %412, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %413, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %414, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %415, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %416, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %417, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %418, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %419, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %420, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %421, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %422, 2048, 0, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %431, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %432, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %433, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %434, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %435, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %436, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %437, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %438, 2048, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %140, %439, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %140, %440, 2048, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %441, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %142, %442, 2048, 0, implicit $exec + WAVE_BARRIER + %447:vgpr_32 = GLOBAL_LOAD_UBYTE_SADDR %5, %11, 0, 0, implicit $exec + %103:sreg_32_xm0 = V_READFIRSTLANE_B32 %447, implicit $exec + %104:vgpr_32 = GLOBAL_LOAD_USHORT_SADDR %10.sub0_sub1, %11, 0, 0, implicit $exec + %448:sreg_32 = S_SEXT_I32_I8 %317 + %449:vgpr_32 = V_MOV_B32_dpp undef %449, %11, 280, 15, 15, -1, implicit $exec + %450:vgpr_32 = V_MAXIMUM3_F32_e64 0, %449, 0, 0, 0, 0, 0, 0, implicit $exec + %451:vgpr_32 = V_MOV_B32_dpp undef %451, %450, 276, 15, 15, -1, implicit $exec + %452:vgpr_32 = V_MAXIMUM3_F32_e64 0, %450, 0, %451, 0, %451, 0, 0, implicit $exec + %453:vgpr_32 = V_MOV_B32_dpp undef %453, %452, 274, 15, 15, -1, implicit $exec + %454:vgpr_32 = V_MAXIMUM3_F32_e64 0, %452, 0, %453, 0, %453, 0, 0, implicit $exec + %455:vgpr_32 = V_MOV_B32_dpp undef %455, %454, 273, 15, 15, -1, implicit $exec + %456:vgpr_32 = V_MAXIMUM3_F32_e64 0, %454, 0, %455, 0, %455, 0, 0, implicit $exec + %457:vgpr_32 = COPY %456 + %457:vgpr_32 = V_MOV_B32_dpp %457, %457, 322, 10, 15, -1, implicit $exec + %458:vgpr_32 = V_MAXIMUM3_F32_e64 0, %456, 0, %457, 0, %457, 0, 0, implicit $exec + %459:vgpr_32 = V_MOV_B32_dpp undef %459, %458, 323, 15, 15, -1, implicit $exec + %460:vgpr_32 = V_MAXIMUM3_F32_e64 0, %458, 0, %459, 0, %459, 0, 0, implicit $exec + %461:sgpr_32 = V_READLANE_B32 %460, 63 + %462:vgpr_32 = V_SUBREV_U32_e32 %448, %444.sub0, implicit $exec + %463:vgpr_32 = V_SUBREV_U32_e32 %448, %443.sub0, implicit $exec + %464:vgpr_32 = V_CVT_F32_I32_e32 %462, implicit $mode, implicit $exec + %465:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %464, implicit $mode, implicit $exec + %466:vgpr_32 = V_CVT_F32_I32_e32 %463, implicit $mode, implicit $exec + %467:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %466, implicit $mode, implicit $exec + %468:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %467, 0, %465, 0, 0, implicit $mode, implicit $exec + %469:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %468, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %470.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %469, implicit $mode, implicit $exec + %471:vgpr_32 = V_LSHRREV_B32_e32 16, %469, implicit $exec + %470.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %471, implicit $mode, implicit $exec + %472:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %470, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %473:vgpr_32 = V_MAXIMUM3_F32_e64 0, %472.sub0, 0, %472.sub1, 0, %472.sub1, 0, 0, implicit $exec + %474:vgpr_32 = V_MOV_B32_dpp undef %474, %473, 280, 15, 15, -1, implicit $exec + %475:vgpr_32 = V_MAXIMUM3_F32_e64 0, %473, 0, %474, 0, %474, 0, 0, implicit $exec + %476:vgpr_32 = V_MOV_B32_dpp undef %476, %475, 276, 15, 15, -1, implicit $exec + %477:vgpr_32 = V_MAXIMUM3_F32_e64 0, %475, 0, %476, 0, %476, 0, 0, implicit $exec + %478:vgpr_32 = V_MOV_B32_dpp undef %478, %477, 274, 15, 15, -1, implicit $exec + %479:vgpr_32 = V_MAXIMUM3_F32_e64 0, %477, 0, %478, 0, %478, 0, 0, implicit $exec + %480:vgpr_32 = V_MOV_B32_dpp undef %480, %479, 273, 15, 15, -1, implicit $exec + %481:vgpr_32 = V_MAXIMUM3_F32_e64 0, %479, 0, %480, 0, %480, 0, 0, implicit $exec + %482:vgpr_32 = COPY %481 + %482:vgpr_32 = V_MOV_B32_dpp %482, %482, 322, 10, 15, -1, implicit $exec + %483:vgpr_32 = V_MAXIMUM3_F32_e64 0, %481, 0, %482, 0, %482, 0, 0, implicit $exec + %484:vgpr_32 = V_MOV_B32_dpp undef %484, %483, 323, 15, 15, -1, implicit $exec + %485:vgpr_32 = V_MAXIMUM3_F32_e64 0, %483, 0, %484, 0, %484, 0, 0, implicit $exec + %486:sgpr_32 = V_READLANE_B32 %485, 63 + %487:vgpr_32 = V_SUBREV_U32_e32 %448, %444.sub1, implicit $exec + %488:vgpr_32 = V_SUBREV_U32_e32 %448, %443.sub1, implicit $exec + %489:vgpr_32 = V_CVT_F32_I32_e32 %487, implicit $mode, implicit $exec + %490:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %489, implicit $mode, implicit $exec + %491:vgpr_32 = V_CVT_F32_I32_e32 %488, implicit $mode, implicit $exec + %492:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %491, implicit $mode, implicit $exec + %493:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %492, 0, %490, 0, 0, implicit $mode, implicit $exec + %494:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %493, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %495.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %494, implicit $mode, implicit $exec + %496:vgpr_32 = V_LSHRREV_B32_e32 16, %494, implicit $exec + %495.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %496, implicit $mode, implicit $exec + %497:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %495, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %498:vgpr_32 = V_MAXIMUM3_F32_e64 0, %497.sub0, 0, %497.sub1, 0, %497.sub1, 0, 0, implicit $exec + %499:vgpr_32 = V_MOV_B32_dpp undef %499, %498, 280, 15, 15, -1, implicit $exec + %500:vgpr_32 = V_MAXIMUM3_F32_e64 0, %498, 0, %499, 0, %499, 0, 0, implicit $exec + %501:vgpr_32 = V_MOV_B32_dpp undef %501, %500, 276, 15, 15, -1, implicit $exec + %502:vgpr_32 = V_MAXIMUM3_F32_e64 0, %500, 0, %501, 0, %501, 0, 0, implicit $exec + %503:vgpr_32 = V_MOV_B32_dpp undef %503, %502, 274, 15, 15, -1, implicit $exec + %504:vgpr_32 = V_MAXIMUM3_F32_e64 0, %502, 0, %503, 0, %503, 0, 0, implicit $exec + %505:vgpr_32 = V_MOV_B32_dpp undef %505, %504, 273, 15, 15, -1, implicit $exec + %506:vgpr_32 = V_MAXIMUM3_F32_e64 0, %504, 0, %505, 0, %505, 0, 0, implicit $exec + %507:vgpr_32 = COPY %506 + %507:vgpr_32 = V_MOV_B32_dpp %507, %507, 322, 10, 15, -1, implicit $exec + %508:vgpr_32 = V_MAXIMUM3_F32_e64 0, %506, 0, %507, 0, %507, 0, 0, implicit $exec + %509:vgpr_32 = V_MOV_B32_dpp undef %509, %508, 323, 15, 15, -1, implicit $exec + %510:vgpr_32 = V_MAXIMUM3_F32_e64 0, %508, 0, %509, 0, %509, 0, 0, implicit $exec + %511:sgpr_32 = V_READLANE_B32 %510, 63 + %512:vgpr_32 = V_SUBREV_U32_e32 %448, %444.sub2, implicit $exec + %513:vgpr_32 = V_SUBREV_U32_e32 %448, %443.sub2, implicit $exec + %514:vgpr_32 = V_CVT_F32_I32_e32 %512, implicit $mode, implicit $exec + %515:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %514, implicit $mode, implicit $exec + %516:vgpr_32 = V_CVT_F32_I32_e32 %513, implicit $mode, implicit $exec + %517:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %516, implicit $mode, implicit $exec + %518:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %517, 0, %515, 0, 0, implicit $mode, implicit $exec + %519:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %518, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %520.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %519, implicit $mode, implicit $exec + %521:vgpr_32 = V_LSHRREV_B32_e32 16, %519, implicit $exec + %520.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %521, implicit $mode, implicit $exec + %522:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %520, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %523:vgpr_32 = V_MAXIMUM3_F32_e64 0, %522.sub0, 0, %522.sub1, 0, %522.sub1, 0, 0, implicit $exec + %524:vgpr_32 = V_MOV_B32_dpp undef %524, %523, 280, 15, 15, -1, implicit $exec + %525:vgpr_32 = V_MAXIMUM3_F32_e64 0, %523, 0, %524, 0, %524, 0, 0, implicit $exec + %526:vgpr_32 = V_MOV_B32_dpp undef %526, %525, 276, 15, 15, -1, implicit $exec + %527:vgpr_32 = V_MAXIMUM3_F32_e64 0, %525, 0, %526, 0, %526, 0, 0, implicit $exec + %528:vgpr_32 = V_MOV_B32_dpp undef %528, %527, 274, 15, 15, -1, implicit $exec + %529:vgpr_32 = V_MAXIMUM3_F32_e64 0, %527, 0, %528, 0, %528, 0, 0, implicit $exec + %530:vgpr_32 = V_MOV_B32_dpp undef %530, %529, 273, 15, 15, -1, implicit $exec + %531:vgpr_32 = V_MAXIMUM3_F32_e64 0, %529, 0, %530, 0, %530, 0, 0, implicit $exec + %532:vgpr_32 = COPY %531 + %532:vgpr_32 = V_MOV_B32_dpp %532, %532, 322, 10, 15, -1, implicit $exec + %533:vgpr_32 = V_MAXIMUM3_F32_e64 0, %531, 0, %532, 0, %532, 0, 0, implicit $exec + %534:vgpr_32 = V_MOV_B32_dpp undef %534, %533, 323, 15, 15, -1, implicit $exec + %535:vgpr_32 = V_MAXIMUM3_F32_e64 0, %533, 0, %534, 0, %534, 0, 0, implicit $exec + %536:sgpr_32 = V_READLANE_B32 %535, 63 + %537:vgpr_32 = V_SUBREV_U32_e32 %448, %444.sub3, implicit $exec + %538:vgpr_32 = V_SUBREV_U32_e32 %448, %443.sub3, implicit $exec + %539:vgpr_32 = V_CVT_F32_I32_e32 %537, implicit $mode, implicit $exec + %540:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %539, implicit $mode, implicit $exec + %541:vgpr_32 = V_CVT_F32_I32_e32 %538, implicit $mode, implicit $exec + %542:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %541, implicit $mode, implicit $exec + %543:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %542, 0, %540, 0, 0, implicit $mode, implicit $exec + %544:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %543, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %545.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %544, implicit $mode, implicit $exec + %546:vgpr_32 = V_LSHRREV_B32_e32 16, %544, implicit $exec + %545.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %546, implicit $mode, implicit $exec + %547:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %545, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %548:vgpr_32 = V_MAXIMUM3_F32_e64 0, %547.sub0, 0, %547.sub1, 0, %547.sub1, 0, 0, implicit $exec + %549:vgpr_32 = V_MOV_B32_dpp undef %549, %548, 280, 15, 15, -1, implicit $exec + %550:vgpr_32 = V_MAXIMUM3_F32_e64 0, %548, 0, %549, 0, %549, 0, 0, implicit $exec + %551:vgpr_32 = V_MOV_B32_dpp undef %551, %550, 276, 15, 15, -1, implicit $exec + %552:vgpr_32 = V_MAXIMUM3_F32_e64 0, %550, 0, %551, 0, %551, 0, 0, implicit $exec + %553:vgpr_32 = V_MOV_B32_dpp undef %553, %552, 274, 15, 15, -1, implicit $exec + %554:vgpr_32 = V_MAXIMUM3_F32_e64 0, %552, 0, %553, 0, %553, 0, 0, implicit $exec + %555:vgpr_32 = V_MOV_B32_dpp undef %555, %554, 273, 15, 15, -1, implicit $exec + %556:vgpr_32 = V_MAXIMUM3_F32_e64 0, %554, 0, %555, 0, %555, 0, 0, implicit $exec + %557:vgpr_32 = COPY %556 + %557:vgpr_32 = V_MOV_B32_dpp %557, %557, 322, 10, 15, -1, implicit $exec + %558:vgpr_32 = V_MAXIMUM3_F32_e64 0, %556, 0, %557, 0, %557, 0, 0, implicit $exec + %559:vgpr_32 = V_MOV_B32_dpp undef %559, %558, 323, 15, 15, -1, implicit $exec + %560:vgpr_32 = V_MAXIMUM3_F32_e64 0, %558, 0, %559, 0, %559, 0, 0, implicit $exec + %561:sgpr_32 = V_READLANE_B32 %560, 63 + %562:vgpr_32 = V_SUBREV_U32_e32 %448, %446.sub0, implicit $exec + %563:vgpr_32 = V_SUBREV_U32_e32 %448, %445.sub0, implicit $exec + %564:vgpr_32 = V_CVT_F32_I32_e32 %562, implicit $mode, implicit $exec + %565:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %564, implicit $mode, implicit $exec + %566:vgpr_32 = V_CVT_F32_I32_e32 %563, implicit $mode, implicit $exec + %567:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %566, implicit $mode, implicit $exec + %568:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %567, 0, %565, 0, 0, implicit $mode, implicit $exec + %569:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %568, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %570.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %569, implicit $mode, implicit $exec + %571:vgpr_32 = V_LSHRREV_B32_e32 16, %569, implicit $exec + %570.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %571, implicit $mode, implicit $exec + %572:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %570, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %573:vgpr_32 = V_MAXIMUM3_F32_e64 0, %572.sub0, 0, %572.sub1, 0, %572.sub1, 0, 0, implicit $exec + %574:vgpr_32 = V_MOV_B32_dpp undef %574, %573, 280, 15, 15, -1, implicit $exec + %575:vgpr_32 = V_MAXIMUM3_F32_e64 0, %573, 0, %574, 0, %574, 0, 0, implicit $exec + %576:vgpr_32 = V_MOV_B32_dpp undef %576, %575, 276, 15, 15, -1, implicit $exec + %577:vgpr_32 = V_MAXIMUM3_F32_e64 0, %575, 0, %576, 0, %576, 0, 0, implicit $exec + %578:vgpr_32 = V_MOV_B32_dpp undef %578, %577, 274, 15, 15, -1, implicit $exec + %579:vgpr_32 = V_MAXIMUM3_F32_e64 0, %577, 0, %578, 0, %578, 0, 0, implicit $exec + %580:vgpr_32 = V_MOV_B32_dpp undef %580, %579, 273, 15, 15, -1, implicit $exec + %581:vgpr_32 = V_MAXIMUM3_F32_e64 0, %579, 0, %580, 0, %580, 0, 0, implicit $exec + %582:vgpr_32 = COPY %581 + %582:vgpr_32 = V_MOV_B32_dpp %582, %582, 322, 10, 15, -1, implicit $exec + %583:vgpr_32 = V_MAXIMUM3_F32_e64 0, %581, 0, %582, 0, %582, 0, 0, implicit $exec + %584:vgpr_32 = V_MOV_B32_dpp undef %584, %583, 323, 15, 15, -1, implicit $exec + %585:vgpr_32 = V_MAXIMUM3_F32_e64 0, %583, 0, %584, 0, %584, 0, 0, implicit $exec + %586:sgpr_32 = V_READLANE_B32 %585, 63 + %587:vgpr_32 = V_SUBREV_U32_e32 %448, %446.sub1, implicit $exec + %588:vgpr_32 = V_SUBREV_U32_e32 %448, %445.sub1, implicit $exec + %589:vgpr_32 = V_CVT_F32_I32_e32 %587, implicit $mode, implicit $exec + %590:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %589, implicit $mode, implicit $exec + %591:vgpr_32 = V_CVT_F32_I32_e32 %588, implicit $mode, implicit $exec + %592:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %591, implicit $mode, implicit $exec + %593:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %592, 0, %590, 0, 0, implicit $mode, implicit $exec + %594:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %593, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %595.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %594, implicit $mode, implicit $exec + %596:vgpr_32 = V_LSHRREV_B32_e32 16, %594, implicit $exec + %595.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %596, implicit $mode, implicit $exec + %597:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %595, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %598:vgpr_32 = V_MAXIMUM3_F32_e64 0, %597.sub0, 0, %597.sub1, 0, %597.sub1, 0, 0, implicit $exec + %599:vgpr_32 = V_MOV_B32_dpp undef %599, %598, 280, 15, 15, -1, implicit $exec + %600:vgpr_32 = V_MAXIMUM3_F32_e64 0, %598, 0, %599, 0, %599, 0, 0, implicit $exec + %601:vgpr_32 = V_MOV_B32_dpp undef %601, %600, 276, 15, 15, -1, implicit $exec + %602:vgpr_32 = V_MAXIMUM3_F32_e64 0, %600, 0, %601, 0, %601, 0, 0, implicit $exec + %603:vgpr_32 = V_MOV_B32_dpp undef %603, %602, 274, 15, 15, -1, implicit $exec + %604:vgpr_32 = V_MAXIMUM3_F32_e64 0, %602, 0, %603, 0, %603, 0, 0, implicit $exec + %605:vgpr_32 = V_MOV_B32_dpp undef %605, %604, 273, 15, 15, -1, implicit $exec + %606:vgpr_32 = V_MAXIMUM3_F32_e64 0, %604, 0, %605, 0, %605, 0, 0, implicit $exec + %607:vgpr_32 = COPY %606 + %607:vgpr_32 = V_MOV_B32_dpp %607, %607, 322, 10, 15, -1, implicit $exec + %608:vgpr_32 = V_MAXIMUM3_F32_e64 0, %606, 0, %607, 0, %607, 0, 0, implicit $exec + %609:vgpr_32 = V_MOV_B32_dpp undef %609, %608, 323, 15, 15, -1, implicit $exec + %610:vgpr_32 = V_MAXIMUM3_F32_e64 0, %608, 0, %609, 0, %609, 0, 0, implicit $exec + %611:sgpr_32 = V_READLANE_B32 %610, 63 + %612:vgpr_32 = V_SUBREV_U32_e32 %448, %446.sub2, implicit $exec + %613:vgpr_32 = V_SUBREV_U32_e32 %448, %445.sub2, implicit $exec + %614:vgpr_32 = V_CVT_F32_I32_e32 %612, implicit $mode, implicit $exec + %615:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %614, implicit $mode, implicit $exec + %616:vgpr_32 = V_CVT_F32_I32_e32 %613, implicit $mode, implicit $exec + %617:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %616, implicit $mode, implicit $exec + %618:vgpr_32 = nofpexcept V_PACK_B32_F16_e64 0, %617, 0, %615, 0, 0, implicit $mode, implicit $exec + %619:vgpr_32 = nofpexcept V_PK_MUL_F16 0, %318, 8, %618, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %620.sub0:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %619, implicit $mode, implicit $exec + %621:vgpr_32 = V_LSHRREV_B32_e32 16, %619, implicit $exec + %620.sub1:vreg_64_align2 = nofpexcept V_CVT_F32_F16_e32 %621, implicit $mode, implicit $exec + %622:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %620, 0, %184, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %623:vgpr_32 = V_MAXIMUM3_F32_e64 0, %622.sub0, 0, %622.sub1, 0, %622.sub1, 0, 0, implicit $exec + %624:vgpr_32 = V_MOV_B32_dpp undef %624, %623, 280, 15, 15, -1, implicit $exec + %625:vgpr_32 = V_MAXIMUM3_F32_e64 0, %623, 0, %624, 0, %624, 0, 0, implicit $exec + %626:vgpr_32 = V_MOV_B32_dpp undef %626, %625, 276, 15, 15, -1, implicit $exec + %627:vgpr_32 = V_MAXIMUM3_F32_e64 0, %625, 0, %626, 0, %626, 0, 0, implicit $exec + %628:vgpr_32 = V_MOV_B32_dpp undef %628, %627, 274, 15, 15, -1, implicit $exec + %629:vgpr_32 = V_MAXIMUM3_F32_e64 0, %627, 0, %628, 0, %628, 0, 0, implicit $exec + %630:vgpr_32 = V_MOV_B32_dpp undef %630, %629, 273, 15, 15, -1, implicit $exec + %631:vgpr_32 = V_MAXIMUM3_F32_e64 0, %629, 0, %630, 0, %630, 0, 0, implicit $exec + %632:vgpr_32 = COPY %631 + %632:vgpr_32 = V_MOV_B32_dpp %632, %632, 322, 10, 15, -1, implicit $exec + %633:vgpr_32 = V_MAXIMUM3_F32_e64 0, %631, 0, %632, 0, %632, 0, 0, implicit $exec + %634:vgpr_32 = V_MOV_B32_dpp undef %634, %633, 323, 15, 15, -1, implicit $exec + %635:vgpr_32 = V_MAXIMUM3_F32_e64 0, %633, 0, %634, 0, %634, 0, 0, implicit $exec + %636:sgpr_32 = V_READLANE_B32 %635, 63 + %637:vgpr_32 = nofpexcept V_ADD_F32_e32 0, %449, implicit $mode, implicit $exec + %638:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %638, 0, %637, 0, %637, 276, 15, 15, 1, implicit $mode, implicit $exec + %639:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %639, 0, %638, 0, %638, 274, 15, 15, 1, implicit $mode, implicit $exec + %640:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %640, 0, %639, 0, %639, 273, 15, 15, 1, implicit $mode, implicit $exec + %641:vgpr_32 = COPY %640 + %641:vgpr_32 = V_MOV_B32_dpp %641, %641, 322, 10, 15, -1, implicit $exec + %642:vgpr_32 = nofpexcept V_ADD_F32_e32 %640, %641, implicit $mode, implicit $exec + %643:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %643, 0, %642, 0, %642, 323, 15, 15, 1, implicit $mode, implicit $exec + undef %644.sub1:sreg_64 = V_READLANE_B32 %643, 63 + %645:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %511, 0, %252.sub0, 0, implicit $mode, implicit $exec + %646:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %486, 0, %251.sub0, 0, implicit $mode, implicit $exec + %647:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %511, 0, %511, 0, implicit $mode, implicit $exec + %648:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %486, 0, %486, 0, implicit $mode, implicit $exec + %649:sreg_64_xexec = S_OR_B64 %647, %645, implicit-def dead $scc + %650:sreg_64_xexec = S_OR_B64 %648, %646, implicit-def dead $scc + %651:vgpr_32 = COPY %511 + undef %252.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %252.sub0, 0, %651, %649, implicit $exec + %652:vgpr_32 = COPY %486 + undef %251.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %251.sub0, 0, %652, %650, implicit $exec + %653:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %472, 3, %251, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %654:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %497, 3, %252, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %655:vgpr_32 = nofpexcept V_EXP_F32_e32 %653.sub0, implicit $mode, implicit $exec + %656:vgpr_32 = nofpexcept V_EXP_F32_e32 %653.sub1, implicit $mode, implicit $exec + undef %657.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %654.sub0, implicit $mode, implicit $exec + undef %658.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %654.sub1, implicit $mode, implicit $exec + %659:vgpr_32 = nofpexcept V_ADD_F32_e32 %655, %656, implicit $mode, implicit $exec + %660:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %660, 0, %659, 0, %659, 280, 15, 15, 1, implicit $mode, implicit $exec + %661:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %661, 0, %660, 0, %660, 276, 15, 15, 1, implicit $mode, implicit $exec + %662:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %662, 0, %661, 0, %661, 274, 15, 15, 1, implicit $mode, implicit $exec + %657.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %657.sub0, 0, %662, 0, %662, 273, 15, 15, 1, implicit $mode, implicit $exec + %658.sub0:vreg_64_align2 = COPY %657.sub0 + %658.sub0:vreg_64_align2 = V_MOV_B32_dpp %658.sub0, %658.sub0, 322, 10, 15, -1, implicit $exec + %663:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %657, 8, %658, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %664.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %664.sub0, %663.sub0, 323, 15, 15, -1, implicit $exec + %664.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %664.sub1, %663.sub1, 280, 15, 15, -1, implicit $exec + %665:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %663, 8, %664, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %666.sub0:sreg_64 = V_READLANE_B32 %665.sub0, 63 + %667:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %667, 0, %665.sub1, 0, %665.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %668:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %668, 0, %667, 0, %667, 274, 15, 15, 1, implicit $mode, implicit $exec + undef %669.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %669.sub0, 0, %668, 0, %668, 273, 15, 15, 1, implicit $mode, implicit $exec + undef %670.sub0:vreg_64_align2 = COPY %669.sub0 + undef %670.sub0:vreg_64_align2 = V_MOV_B32_dpp %670.sub0, %670.sub0, 322, 10, 15, -1, implicit $exec + %251.sub1:vreg_64_align2 = COPY %252.sub0 + %671:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %320, 11, %251, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %672:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %561, 0, %250.sub0, 0, implicit $mode, implicit $exec + %673:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %536, 0, %249.sub0, 0, implicit $mode, implicit $exec + %674:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %561, 0, %561, 0, implicit $mode, implicit $exec + %675:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %536, 0, %536, 0, implicit $mode, implicit $exec + %676:sreg_64_xexec = S_OR_B64 %674, %672, implicit-def dead $scc + %677:sreg_64_xexec = S_OR_B64 %675, %673, implicit-def dead $scc + %678:vgpr_32 = COPY %561 + undef %250.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %250.sub0, 0, %678, %676, implicit $exec + %679:vgpr_32 = COPY %536 + undef %249.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %249.sub0, 0, %679, %677, implicit $exec + %680:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %522, 3, %249, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %681:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %547, 3, %250, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %669.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %680.sub0, implicit $mode, implicit $exec + %670.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %680.sub1, implicit $mode, implicit $exec + undef %682.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %681.sub0, implicit $mode, implicit $exec + undef %683.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %681.sub1, implicit $mode, implicit $exec + %684:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %669, 8, %670, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %685.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %685.sub0, %684.sub0, 323, 15, 15, -1, implicit $exec + %685.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %685.sub1, %684.sub1, 280, 15, 15, -1, implicit $exec + %686:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %684, 8, %685, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %666.sub1:sreg_64 = V_READLANE_B32 %686.sub0, 63 + %687:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %687, 0, %686.sub1, 0, %686.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %688:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %688, 0, %687, 0, %687, 274, 15, 15, 1, implicit $mode, implicit $exec + %682.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %682.sub0, 0, %688, 0, %688, 273, 15, 15, 1, implicit $mode, implicit $exec + %683.sub0:vreg_64_align2 = COPY %682.sub0 + %683.sub0:vreg_64_align2 = V_MOV_B32_dpp %683.sub0, %683.sub0, 322, 10, 15, -1, implicit $exec + %689:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %682, 8, %683, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %690.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %690.sub0, %689.sub0, 323, 15, 15, -1, implicit $exec + %690.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %690.sub1, %689.sub1, 280, 15, 15, -1, implicit $exec + %691:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %689, 8, %690, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %692.sub0:sreg_64 = V_READLANE_B32 %691.sub0, 63 + %693:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %693, 0, %691.sub1, 0, %691.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %694:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %694, 0, %693, 0, %693, 274, 15, 15, 1, implicit $mode, implicit $exec + undef %695.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %695.sub0, 0, %694, 0, %694, 273, 15, 15, 1, implicit $mode, implicit $exec + undef %696.sub0:vreg_64_align2 = COPY %695.sub0 + undef %696.sub0:vreg_64_align2 = V_MOV_B32_dpp %696.sub0, %696.sub0, 322, 10, 15, -1, implicit $exec + %249.sub1:vreg_64_align2 = COPY %250.sub0 + %697:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %321, 11, %249, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %698:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %611, 0, %248.sub0, 0, implicit $mode, implicit $exec + %699:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %586, 0, %247.sub0, 0, implicit $mode, implicit $exec + %700:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %611, 0, %611, 0, implicit $mode, implicit $exec + %701:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %586, 0, %586, 0, implicit $mode, implicit $exec + %702:sreg_64_xexec = S_OR_B64 %700, %698, implicit-def dead $scc + %703:sreg_64_xexec = S_OR_B64 %701, %699, implicit-def dead $scc + %704:vgpr_32 = COPY %611 + undef %248.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %248.sub0, 0, %704, %702, implicit $exec + %705:vgpr_32 = COPY %586 + undef %247.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %247.sub0, 0, %705, %703, implicit $exec + %706:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %572, 3, %247, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %707:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %597, 3, %248, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %695.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %706.sub0, implicit $mode, implicit $exec + %696.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %706.sub1, implicit $mode, implicit $exec + undef %708.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %707.sub0, implicit $mode, implicit $exec + undef %709.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %707.sub1, implicit $mode, implicit $exec + %710:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %695, 8, %696, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %711.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %711.sub0, %710.sub0, 323, 15, 15, -1, implicit $exec + %711.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %711.sub1, %710.sub1, 280, 15, 15, -1, implicit $exec + %712:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %710, 8, %711, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %692.sub1:sreg_64 = V_READLANE_B32 %712.sub0, 63 + %713:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %713, 0, %712.sub1, 0, %712.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %714:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %714, 0, %713, 0, %713, 274, 15, 15, 1, implicit $mode, implicit $exec + %708.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %708.sub0, 0, %714, 0, %714, 273, 15, 15, 1, implicit $mode, implicit $exec + %709.sub0:vreg_64_align2 = COPY %708.sub0 + %709.sub0:vreg_64_align2 = V_MOV_B32_dpp %709.sub0, %709.sub0, 322, 10, 15, -1, implicit $exec + %715:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %708, 8, %709, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %716.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %716.sub0, %715.sub0, 323, 15, 15, -1, implicit $exec + %716.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %716.sub1, %715.sub1, 280, 15, 15, -1, implicit $exec + %717:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %715, 8, %716, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %718.sub0:sreg_64 = V_READLANE_B32 %717.sub0, 63 + %719:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %719, 0, %717.sub1, 0, %717.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %720:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %720, 0, %719, 0, %719, 274, 15, 15, 1, implicit $mode, implicit $exec + undef %721.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_dpp undef %721.sub0, 0, %720, 0, %720, 273, 15, 15, 1, implicit $mode, implicit $exec + undef %722.sub0:vreg_64_align2 = COPY %721.sub0 + undef %722.sub0:vreg_64_align2 = V_MOV_B32_dpp %722.sub0, %722.sub0, 322, 10, 15, -1, implicit $exec + %247.sub1:vreg_64_align2 = COPY %248.sub0 + %723:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %322, 11, %247, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %724:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %461, 0, %461, 0, implicit $mode, implicit $exec + %725:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %636, 0, %636, 0, implicit $mode, implicit $exec + %726:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %246.sub1, 0, implicit $mode, implicit $exec + %727:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %636, 0, %246.sub0, 0, implicit $mode, implicit $exec + %728:sreg_64_xexec = S_OR_B64 %724, %726, implicit-def dead $scc + %729:sreg_64_xexec = S_OR_B64 %725, %727, implicit-def dead $scc + %730:vgpr_32 = COPY %461 + %246.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %246.sub1, 0, %730, %728, implicit $exec + %731:vgpr_32 = COPY %636 + %246.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %246.sub0, 0, %731, %729, implicit $exec + %732:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %622, 3, %246, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %721.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %732.sub0, implicit $mode, implicit $exec + %722.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %732.sub1, implicit $mode, implicit $exec + %733:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %721, 8, %722, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %734.sub0:vreg_64_align2 = V_MOV_B32_dpp undef %734.sub0, %733.sub0, 323, 15, 15, -1, implicit $exec + %734.sub1:vreg_64_align2 = V_MOV_B32_dpp undef %734.sub1, %733.sub1, 280, 15, 15, -1, implicit $exec + %735:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %733, 8, %734, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %718.sub1:sreg_64 = V_READLANE_B32 %735.sub0, 63 + %736:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %736, 0, %735.sub1, 0, %735.sub1, 276, 15, 15, 1, implicit $mode, implicit $exec + %737:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %737, 0, %736, 0, %736, 274, 15, 15, 1, implicit $mode, implicit $exec + %738:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %738, 0, %737, 0, %737, 273, 15, 15, 1, implicit $mode, implicit $exec + %739:vgpr_32 = COPY %738 + %739:vgpr_32 = V_MOV_B32_dpp %739, %739, 322, 10, 15, -1, implicit $exec + %740:vgpr_32 = nofpexcept V_ADD_F32_e32 %738, %739, implicit $mode, implicit $exec + %741:vgpr_32 = nofpexcept V_ADD_F32_dpp undef %741, 0, %740, 0, %740, 323, 15, 15, 1, implicit $mode, implicit $exec + %644.sub0:sreg_64 = V_READLANE_B32 %741, 63 + %742:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %323, 11, %246, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %743:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %245.sub1, 0, implicit $mode, implicit $exec + %744:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %245.sub0, 0, implicit $mode, implicit $exec + %745:sreg_64_xexec = S_OR_B64 %724, %743, implicit-def dead $scc + %746:sreg_64_xexec = S_OR_B64 %724, %744, implicit-def dead $scc + %245.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %245.sub1, 0, %730, %745, implicit $exec + %245.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %245.sub0, 0, %730, %746, implicit $exec + %747:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %324, 11, %245, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %748:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %244.sub1, 0, implicit $mode, implicit $exec + %749:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %244.sub0, 0, implicit $mode, implicit $exec + %750:sreg_64_xexec = S_OR_B64 %724, %748, implicit-def dead $scc + %751:sreg_64_xexec = S_OR_B64 %724, %749, implicit-def dead $scc + %244.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %244.sub1, 0, %730, %750, implicit $exec + %244.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %244.sub0, 0, %730, %751, implicit $exec + %752:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %325, 11, %244, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %753:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %243.sub1, 0, implicit $mode, implicit $exec + %754:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %243.sub0, 0, implicit $mode, implicit $exec + %755:sreg_64_xexec = S_OR_B64 %724, %753, implicit-def dead $scc + %756:sreg_64_xexec = S_OR_B64 %724, %754, implicit-def dead $scc + %243.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %243.sub1, 0, %730, %755, implicit $exec + %243.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %243.sub0, 0, %730, %756, implicit $exec + %757:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %326, 11, %243, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %758:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %242.sub1, 0, implicit $mode, implicit $exec + %759:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %242.sub0, 0, implicit $mode, implicit $exec + %760:sreg_64_xexec = S_OR_B64 %724, %758, implicit-def dead $scc + %761:sreg_64_xexec = S_OR_B64 %724, %759, implicit-def dead $scc + %242.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %242.sub1, 0, %730, %760, implicit $exec + %242.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %242.sub0, 0, %730, %761, implicit $exec + %762:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %327, 11, %242, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %763:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %241.sub1, 0, implicit $mode, implicit $exec + %764:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %241.sub0, 0, implicit $mode, implicit $exec + %765:sreg_64_xexec = S_OR_B64 %724, %763, implicit-def dead $scc + %766:sreg_64_xexec = S_OR_B64 %724, %764, implicit-def dead $scc + %241.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %241.sub1, 0, %730, %765, implicit $exec + %241.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %241.sub0, 0, %730, %766, implicit $exec + %767:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %328, 11, %241, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %768:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %240.sub1, 0, implicit $mode, implicit $exec + %769:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %240.sub0, 0, implicit $mode, implicit $exec + %770:sreg_64_xexec = S_OR_B64 %724, %768, implicit-def dead $scc + %771:sreg_64_xexec = S_OR_B64 %724, %769, implicit-def dead $scc + %240.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %240.sub1, 0, %730, %770, implicit $exec + %240.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %240.sub0, 0, %730, %771, implicit $exec + %772:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %329, 11, %240, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %773:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %239.sub1, 0, implicit $mode, implicit $exec + %774:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %239.sub0, 0, implicit $mode, implicit $exec + %775:sreg_64_xexec = S_OR_B64 %724, %773, implicit-def dead $scc + %776:sreg_64_xexec = S_OR_B64 %724, %774, implicit-def dead $scc + %239.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %239.sub1, 0, %730, %775, implicit $exec + %239.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %239.sub0, 0, %730, %776, implicit $exec + %777:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %330, 11, %239, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %778:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %238.sub1, 0, implicit $mode, implicit $exec + %779:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %238.sub0, 0, implicit $mode, implicit $exec + %780:sreg_64_xexec = S_OR_B64 %724, %778, implicit-def dead $scc + %781:sreg_64_xexec = S_OR_B64 %724, %779, implicit-def dead $scc + %238.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %238.sub1, 0, %730, %780, implicit $exec + %238.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %238.sub0, 0, %730, %781, implicit $exec + %782:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %331, 11, %238, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %783:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %237.sub1, 0, implicit $mode, implicit $exec + %784:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %237.sub0, 0, implicit $mode, implicit $exec + %785:sreg_64_xexec = S_OR_B64 %724, %783, implicit-def dead $scc + %786:sreg_64_xexec = S_OR_B64 %724, %784, implicit-def dead $scc + %237.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %237.sub1, 0, %730, %785, implicit $exec + %237.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %237.sub0, 0, %730, %786, implicit $exec + %787:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %332, 11, %237, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %788:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %236.sub1, 0, implicit $mode, implicit $exec + %789:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %236.sub0, 0, implicit $mode, implicit $exec + %790:sreg_64_xexec = S_OR_B64 %724, %788, implicit-def dead $scc + %791:sreg_64_xexec = S_OR_B64 %724, %789, implicit-def dead $scc + %236.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %236.sub1, 0, %730, %790, implicit $exec + %236.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %236.sub0, 0, %730, %791, implicit $exec + %792:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %333, 11, %236, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %793:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %235.sub1, 0, implicit $mode, implicit $exec + %794:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %235.sub0, 0, implicit $mode, implicit $exec + %795:sreg_64_xexec = S_OR_B64 %724, %793, implicit-def dead $scc + %796:sreg_64_xexec = S_OR_B64 %724, %794, implicit-def dead $scc + %235.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %235.sub1, 0, %730, %795, implicit $exec + %235.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %235.sub0, 0, %730, %796, implicit $exec + %797:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %334, 11, %235, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %798:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %234.sub1, 0, implicit $mode, implicit $exec + %799:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %234.sub0, 0, implicit $mode, implicit $exec + %800:sreg_64_xexec = S_OR_B64 %724, %798, implicit-def dead $scc + %801:sreg_64_xexec = S_OR_B64 %724, %799, implicit-def dead $scc + %234.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %234.sub1, 0, %730, %800, implicit $exec + %234.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %234.sub0, 0, %730, %801, implicit $exec + %802:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %335, 11, %234, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %803:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %233.sub1, 0, implicit $mode, implicit $exec + %804:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %233.sub0, 0, implicit $mode, implicit $exec + %805:sreg_64_xexec = S_OR_B64 %724, %803, implicit-def dead $scc + %806:sreg_64_xexec = S_OR_B64 %724, %804, implicit-def dead $scc + %233.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %233.sub1, 0, %730, %805, implicit $exec + %233.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %233.sub0, 0, %730, %806, implicit $exec + %807:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %336, 11, %233, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %808:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %232.sub1, 0, implicit $mode, implicit $exec + %809:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %232.sub0, 0, implicit $mode, implicit $exec + %810:sreg_64_xexec = S_OR_B64 %724, %808, implicit-def dead $scc + %811:sreg_64_xexec = S_OR_B64 %724, %809, implicit-def dead $scc + %232.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %232.sub1, 0, %730, %810, implicit $exec + %232.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %232.sub0, 0, %730, %811, implicit $exec + %812:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %337, 11, %232, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %813:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %231.sub1, 0, implicit $mode, implicit $exec + %814:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %231.sub0, 0, implicit $mode, implicit $exec + %815:sreg_64_xexec = S_OR_B64 %724, %813, implicit-def dead $scc + %816:sreg_64_xexec = S_OR_B64 %724, %814, implicit-def dead $scc + %231.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %231.sub1, 0, %730, %815, implicit $exec + %231.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %231.sub0, 0, %730, %816, implicit $exec + %817:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %338, 11, %231, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %818:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %230.sub1, 0, implicit $mode, implicit $exec + %819:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %230.sub0, 0, implicit $mode, implicit $exec + %820:sreg_64_xexec = S_OR_B64 %724, %818, implicit-def dead $scc + %821:sreg_64_xexec = S_OR_B64 %724, %819, implicit-def dead $scc + %230.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %230.sub1, 0, %730, %820, implicit $exec + %230.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %230.sub0, 0, %730, %821, implicit $exec + %822:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %339, 11, %230, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %823:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %229.sub1, 0, implicit $mode, implicit $exec + %824:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %229.sub0, 0, implicit $mode, implicit $exec + %825:sreg_64_xexec = S_OR_B64 %724, %823, implicit-def dead $scc + %826:sreg_64_xexec = S_OR_B64 %724, %824, implicit-def dead $scc + %229.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %229.sub1, 0, %730, %825, implicit $exec + %229.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %229.sub0, 0, %730, %826, implicit $exec + %827:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %340, 11, %229, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %828:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %228.sub1, 0, implicit $mode, implicit $exec + %829:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %228.sub0, 0, implicit $mode, implicit $exec + %830:sreg_64_xexec = S_OR_B64 %724, %828, implicit-def dead $scc + %831:sreg_64_xexec = S_OR_B64 %724, %829, implicit-def dead $scc + %228.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %228.sub1, 0, %730, %830, implicit $exec + %228.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %228.sub0, 0, %730, %831, implicit $exec + %832:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %341, 11, %228, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %833:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %227.sub1, 0, implicit $mode, implicit $exec + %834:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %227.sub0, 0, implicit $mode, implicit $exec + %835:sreg_64_xexec = S_OR_B64 %724, %833, implicit-def dead $scc + %836:sreg_64_xexec = S_OR_B64 %724, %834, implicit-def dead $scc + %227.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %227.sub1, 0, %730, %835, implicit $exec + %227.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %227.sub0, 0, %730, %836, implicit $exec + %837:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %342, 11, %227, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %838:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %226.sub1, 0, implicit $mode, implicit $exec + %839:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %226.sub0, 0, implicit $mode, implicit $exec + %840:sreg_64_xexec = S_OR_B64 %724, %838, implicit-def dead $scc + %841:sreg_64_xexec = S_OR_B64 %724, %839, implicit-def dead $scc + %226.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %226.sub1, 0, %730, %840, implicit $exec + %226.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %226.sub0, 0, %730, %841, implicit $exec + %842:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %343, 11, %226, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %843:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %225.sub1, 0, implicit $mode, implicit $exec + %844:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %225.sub0, 0, implicit $mode, implicit $exec + %845:sreg_64_xexec = S_OR_B64 %724, %843, implicit-def dead $scc + %846:sreg_64_xexec = S_OR_B64 %724, %844, implicit-def dead $scc + %225.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %225.sub1, 0, %730, %845, implicit $exec + %225.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %225.sub0, 0, %730, %846, implicit $exec + %847:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %344, 11, %225, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %848:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %224.sub1, 0, implicit $mode, implicit $exec + %849:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %224.sub0, 0, implicit $mode, implicit $exec + %850:sreg_64_xexec = S_OR_B64 %724, %848, implicit-def dead $scc + %851:sreg_64_xexec = S_OR_B64 %724, %849, implicit-def dead $scc + %224.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %224.sub1, 0, %730, %850, implicit $exec + %224.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %224.sub0, 0, %730, %851, implicit $exec + %852:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %345, 11, %224, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %853:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %223.sub1, 0, implicit $mode, implicit $exec + %854:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %223.sub0, 0, implicit $mode, implicit $exec + %855:sreg_64_xexec = S_OR_B64 %724, %853, implicit-def dead $scc + %856:sreg_64_xexec = S_OR_B64 %724, %854, implicit-def dead $scc + %223.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %223.sub1, 0, %730, %855, implicit $exec + %223.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %223.sub0, 0, %730, %856, implicit $exec + %857:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %346, 11, %223, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %858:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %222.sub1, 0, implicit $mode, implicit $exec + %859:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %222.sub0, 0, implicit $mode, implicit $exec + %860:sreg_64_xexec = S_OR_B64 %724, %858, implicit-def dead $scc + %861:sreg_64_xexec = S_OR_B64 %724, %859, implicit-def dead $scc + %222.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %222.sub1, 0, %730, %860, implicit $exec + %222.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %222.sub0, 0, %730, %861, implicit $exec + %862:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %347, 11, %222, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %863:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %221.sub1, 0, implicit $mode, implicit $exec + %864:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %221.sub0, 0, implicit $mode, implicit $exec + %865:sreg_64_xexec = S_OR_B64 %724, %863, implicit-def dead $scc + %866:sreg_64_xexec = S_OR_B64 %724, %864, implicit-def dead $scc + %221.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %221.sub1, 0, %730, %865, implicit $exec + %221.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %221.sub0, 0, %730, %866, implicit $exec + %867:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %348, 11, %221, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %868:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %220.sub1, 0, implicit $mode, implicit $exec + %869:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %220.sub0, 0, implicit $mode, implicit $exec + %870:sreg_64_xexec = S_OR_B64 %724, %868, implicit-def dead $scc + %871:sreg_64_xexec = S_OR_B64 %724, %869, implicit-def dead $scc + %220.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %220.sub1, 0, %730, %870, implicit $exec + %220.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %220.sub0, 0, %730, %871, implicit $exec + %872:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %349, 11, %220, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %873:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %219.sub1, 0, implicit $mode, implicit $exec + %874:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %219.sub0, 0, implicit $mode, implicit $exec + %875:sreg_64_xexec = S_OR_B64 %724, %873, implicit-def dead $scc + %876:sreg_64_xexec = S_OR_B64 %724, %874, implicit-def dead $scc + %219.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %219.sub1, 0, %730, %875, implicit $exec + %219.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %219.sub0, 0, %730, %876, implicit $exec + %877:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %350, 11, %219, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %878:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %218.sub1, 0, implicit $mode, implicit $exec + %879:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %218.sub0, 0, implicit $mode, implicit $exec + %880:sreg_64_xexec = S_OR_B64 %724, %878, implicit-def dead $scc + %881:sreg_64_xexec = S_OR_B64 %724, %879, implicit-def dead $scc + %218.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %218.sub1, 0, %730, %880, implicit $exec + %218.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %218.sub0, 0, %730, %881, implicit $exec + %882:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %351, 11, %218, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %883:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %217.sub1, 0, implicit $mode, implicit $exec + %884:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %217.sub0, 0, implicit $mode, implicit $exec + %885:sreg_64_xexec = S_OR_B64 %724, %883, implicit-def dead $scc + %886:sreg_64_xexec = S_OR_B64 %724, %884, implicit-def dead $scc + %217.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %217.sub1, 0, %730, %885, implicit $exec + %217.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %217.sub0, 0, %730, %886, implicit $exec + %887:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %352, 11, %217, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %888:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %216.sub1, 0, implicit $mode, implicit $exec + %889:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %216.sub0, 0, implicit $mode, implicit $exec + %890:sreg_64_xexec = S_OR_B64 %724, %888, implicit-def dead $scc + %891:sreg_64_xexec = S_OR_B64 %724, %889, implicit-def dead $scc + %216.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %216.sub1, 0, %730, %890, implicit $exec + %216.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %216.sub0, 0, %730, %891, implicit $exec + %892:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %353, 11, %216, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %893:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %215.sub1, 0, implicit $mode, implicit $exec + %894:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %215.sub0, 0, implicit $mode, implicit $exec + %895:sreg_64_xexec = S_OR_B64 %724, %893, implicit-def dead $scc + %896:sreg_64_xexec = S_OR_B64 %724, %894, implicit-def dead $scc + %215.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %215.sub1, 0, %730, %895, implicit $exec + %215.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %215.sub0, 0, %730, %896, implicit $exec + %897:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %354, 11, %215, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %898:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %214.sub1, 0, implicit $mode, implicit $exec + %899:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %214.sub0, 0, implicit $mode, implicit $exec + %900:sreg_64_xexec = S_OR_B64 %724, %898, implicit-def dead $scc + %901:sreg_64_xexec = S_OR_B64 %724, %899, implicit-def dead $scc + %214.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %214.sub1, 0, %730, %900, implicit $exec + %214.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %214.sub0, 0, %730, %901, implicit $exec + %902:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %355, 11, %214, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %903:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %213.sub1, 0, implicit $mode, implicit $exec + %904:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %213.sub0, 0, implicit $mode, implicit $exec + %905:sreg_64_xexec = S_OR_B64 %724, %903, implicit-def dead $scc + %906:sreg_64_xexec = S_OR_B64 %724, %904, implicit-def dead $scc + %213.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %213.sub1, 0, %730, %905, implicit $exec + %213.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %213.sub0, 0, %730, %906, implicit $exec + %907:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %356, 11, %213, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %908:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %212.sub1, 0, implicit $mode, implicit $exec + %909:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %212.sub0, 0, implicit $mode, implicit $exec + %910:sreg_64_xexec = S_OR_B64 %724, %908, implicit-def dead $scc + %911:sreg_64_xexec = S_OR_B64 %724, %909, implicit-def dead $scc + %212.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %212.sub1, 0, %730, %910, implicit $exec + %212.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %212.sub0, 0, %730, %911, implicit $exec + %912:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %357, 11, %212, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %913:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %211.sub1, 0, implicit $mode, implicit $exec + %914:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %211.sub0, 0, implicit $mode, implicit $exec + %915:sreg_64_xexec = S_OR_B64 %724, %913, implicit-def dead $scc + %916:sreg_64_xexec = S_OR_B64 %724, %914, implicit-def dead $scc + %211.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %211.sub1, 0, %730, %915, implicit $exec + %211.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %211.sub0, 0, %730, %916, implicit $exec + %917:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %358, 11, %211, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %918:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %210.sub1, 0, implicit $mode, implicit $exec + %919:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %210.sub0, 0, implicit $mode, implicit $exec + %920:sreg_64_xexec = S_OR_B64 %724, %918, implicit-def dead $scc + %921:sreg_64_xexec = S_OR_B64 %724, %919, implicit-def dead $scc + %210.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %210.sub1, 0, %730, %920, implicit $exec + %210.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %210.sub0, 0, %730, %921, implicit $exec + %922:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %359, 11, %210, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %923:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %209.sub1, 0, implicit $mode, implicit $exec + %924:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %209.sub0, 0, implicit $mode, implicit $exec + %925:sreg_64_xexec = S_OR_B64 %724, %923, implicit-def dead $scc + %926:sreg_64_xexec = S_OR_B64 %724, %924, implicit-def dead $scc + %209.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %209.sub1, 0, %730, %925, implicit $exec + %209.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %209.sub0, 0, %730, %926, implicit $exec + %927:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %360, 11, %209, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %928:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %208.sub1, 0, implicit $mode, implicit $exec + %929:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %208.sub0, 0, implicit $mode, implicit $exec + %930:sreg_64_xexec = S_OR_B64 %724, %928, implicit-def dead $scc + %931:sreg_64_xexec = S_OR_B64 %724, %929, implicit-def dead $scc + %208.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %208.sub1, 0, %730, %930, implicit $exec + %208.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %208.sub0, 0, %730, %931, implicit $exec + %932:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %361, 11, %208, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %933:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %207.sub1, 0, implicit $mode, implicit $exec + %934:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %207.sub0, 0, implicit $mode, implicit $exec + %935:sreg_64_xexec = S_OR_B64 %724, %933, implicit-def dead $scc + %936:sreg_64_xexec = S_OR_B64 %724, %934, implicit-def dead $scc + %207.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %207.sub1, 0, %730, %935, implicit $exec + %207.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %207.sub0, 0, %730, %936, implicit $exec + %937:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %362, 11, %207, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %938:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %206.sub1, 0, implicit $mode, implicit $exec + %939:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %206.sub0, 0, implicit $mode, implicit $exec + %940:sreg_64_xexec = S_OR_B64 %724, %938, implicit-def dead $scc + %941:sreg_64_xexec = S_OR_B64 %724, %939, implicit-def dead $scc + %206.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %206.sub1, 0, %730, %940, implicit $exec + %206.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %206.sub0, 0, %730, %941, implicit $exec + %942:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %363, 11, %206, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %943:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %205.sub1, 0, implicit $mode, implicit $exec + %944:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %205.sub0, 0, implicit $mode, implicit $exec + %945:sreg_64_xexec = S_OR_B64 %724, %943, implicit-def dead $scc + %946:sreg_64_xexec = S_OR_B64 %724, %944, implicit-def dead $scc + %205.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %205.sub1, 0, %730, %945, implicit $exec + %205.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %205.sub0, 0, %730, %946, implicit $exec + %947:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %364, 11, %205, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %948:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %204.sub1, 0, implicit $mode, implicit $exec + %949:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %204.sub0, 0, implicit $mode, implicit $exec + %950:sreg_64_xexec = S_OR_B64 %724, %948, implicit-def dead $scc + %951:sreg_64_xexec = S_OR_B64 %724, %949, implicit-def dead $scc + %204.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %204.sub1, 0, %730, %950, implicit $exec + %204.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %204.sub0, 0, %730, %951, implicit $exec + %952:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %365, 11, %204, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %953:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %203.sub1, 0, implicit $mode, implicit $exec + %954:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %203.sub0, 0, implicit $mode, implicit $exec + %955:sreg_64_xexec = S_OR_B64 %724, %953, implicit-def dead $scc + %956:sreg_64_xexec = S_OR_B64 %724, %954, implicit-def dead $scc + %203.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %203.sub1, 0, %730, %955, implicit $exec + %203.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %203.sub0, 0, %730, %956, implicit $exec + %957:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %366, 11, %203, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %958:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %202.sub1, 0, implicit $mode, implicit $exec + %959:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %202.sub0, 0, implicit $mode, implicit $exec + %960:sreg_64_xexec = S_OR_B64 %724, %958, implicit-def dead $scc + %961:sreg_64_xexec = S_OR_B64 %724, %959, implicit-def dead $scc + %202.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %202.sub1, 0, %730, %960, implicit $exec + %202.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %202.sub0, 0, %730, %961, implicit $exec + %962:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %367, 11, %202, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %963:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %201.sub1, 0, implicit $mode, implicit $exec + %964:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %201.sub0, 0, implicit $mode, implicit $exec + %965:sreg_64_xexec = S_OR_B64 %724, %963, implicit-def dead $scc + %966:sreg_64_xexec = S_OR_B64 %724, %964, implicit-def dead $scc + %201.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %201.sub1, 0, %730, %965, implicit $exec + %201.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %201.sub0, 0, %730, %966, implicit $exec + %967:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %368, 11, %201, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %968:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %200.sub1, 0, implicit $mode, implicit $exec + %969:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %200.sub0, 0, implicit $mode, implicit $exec + %970:sreg_64_xexec = S_OR_B64 %724, %968, implicit-def dead $scc + %971:sreg_64_xexec = S_OR_B64 %724, %969, implicit-def dead $scc + %200.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %200.sub1, 0, %730, %970, implicit $exec + %200.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %200.sub0, 0, %730, %971, implicit $exec + %972:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %369, 11, %200, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %973:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %199.sub1, 0, implicit $mode, implicit $exec + %974:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %199.sub0, 0, implicit $mode, implicit $exec + %975:sreg_64_xexec = S_OR_B64 %724, %973, implicit-def dead $scc + %976:sreg_64_xexec = S_OR_B64 %724, %974, implicit-def dead $scc + %199.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %199.sub1, 0, %730, %975, implicit $exec + %199.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %199.sub0, 0, %730, %976, implicit $exec + %977:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %370, 11, %199, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %978:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %198.sub1, 0, implicit $mode, implicit $exec + %979:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %198.sub0, 0, implicit $mode, implicit $exec + %980:sreg_64_xexec = S_OR_B64 %724, %978, implicit-def dead $scc + %981:sreg_64_xexec = S_OR_B64 %724, %979, implicit-def dead $scc + %198.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %198.sub1, 0, %730, %980, implicit $exec + %198.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %198.sub0, 0, %730, %981, implicit $exec + %982:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %371, 11, %198, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %983:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %197.sub1, 0, implicit $mode, implicit $exec + %984:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %197.sub0, 0, implicit $mode, implicit $exec + %985:sreg_64_xexec = S_OR_B64 %724, %983, implicit-def dead $scc + %986:sreg_64_xexec = S_OR_B64 %724, %984, implicit-def dead $scc + %197.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %197.sub1, 0, %730, %985, implicit $exec + %197.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %197.sub0, 0, %730, %986, implicit $exec + %987:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %372, 11, %197, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %988:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %196.sub1, 0, implicit $mode, implicit $exec + %989:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %196.sub0, 0, implicit $mode, implicit $exec + %990:sreg_64_xexec = S_OR_B64 %724, %988, implicit-def dead $scc + %991:sreg_64_xexec = S_OR_B64 %724, %989, implicit-def dead $scc + %196.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %196.sub1, 0, %730, %990, implicit $exec + %196.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %196.sub0, 0, %730, %991, implicit $exec + %992:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %373, 11, %196, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %993:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %195.sub1, 0, implicit $mode, implicit $exec + %994:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %195.sub0, 0, implicit $mode, implicit $exec + %995:sreg_64_xexec = S_OR_B64 %724, %993, implicit-def dead $scc + %996:sreg_64_xexec = S_OR_B64 %724, %994, implicit-def dead $scc + %195.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %195.sub1, 0, %730, %995, implicit $exec + %195.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %195.sub0, 0, %730, %996, implicit $exec + %997:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %374, 11, %195, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %998:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %194.sub1, 0, implicit $mode, implicit $exec + %999:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %194.sub0, 0, implicit $mode, implicit $exec + %1000:sreg_64_xexec = S_OR_B64 %724, %998, implicit-def dead $scc + %1001:sreg_64_xexec = S_OR_B64 %724, %999, implicit-def dead $scc + %194.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %194.sub1, 0, %730, %1000, implicit $exec + %194.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %194.sub0, 0, %730, %1001, implicit $exec + %1002:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %375, 11, %194, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1003:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %193.sub1, 0, implicit $mode, implicit $exec + %1004:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %193.sub0, 0, implicit $mode, implicit $exec + %1005:sreg_64_xexec = S_OR_B64 %724, %1003, implicit-def dead $scc + %1006:sreg_64_xexec = S_OR_B64 %724, %1004, implicit-def dead $scc + %193.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %193.sub1, 0, %730, %1005, implicit $exec + %193.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %193.sub0, 0, %730, %1006, implicit $exec + %1007:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %376, 11, %193, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1008:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %192.sub1, 0, implicit $mode, implicit $exec + %1009:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %192.sub0, 0, implicit $mode, implicit $exec + %1010:sreg_64_xexec = S_OR_B64 %724, %1008, implicit-def dead $scc + %1011:sreg_64_xexec = S_OR_B64 %724, %1009, implicit-def dead $scc + %192.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %192.sub1, 0, %730, %1010, implicit $exec + %192.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %192.sub0, 0, %730, %1011, implicit $exec + %1012:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %377, 11, %192, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1013:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %191.sub1, 0, implicit $mode, implicit $exec + %1014:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %191.sub0, 0, implicit $mode, implicit $exec + %1015:sreg_64_xexec = S_OR_B64 %724, %1013, implicit-def dead $scc + %1016:sreg_64_xexec = S_OR_B64 %724, %1014, implicit-def dead $scc + %191.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %191.sub1, 0, %730, %1015, implicit $exec + %191.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %191.sub0, 0, %730, %1016, implicit $exec + %1017:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %378, 11, %191, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1018:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %190.sub1, 0, implicit $mode, implicit $exec + %1019:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %190.sub0, 0, implicit $mode, implicit $exec + %1020:sreg_64_xexec = S_OR_B64 %724, %1018, implicit-def dead $scc + %1021:sreg_64_xexec = S_OR_B64 %724, %1019, implicit-def dead $scc + %190.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %190.sub1, 0, %730, %1020, implicit $exec + %190.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %190.sub0, 0, %730, %1021, implicit $exec + %1022:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %379, 11, %190, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1023:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %189.sub1, 0, implicit $mode, implicit $exec + %1024:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %189.sub0, 0, implicit $mode, implicit $exec + %1025:sreg_64_xexec = S_OR_B64 %724, %1023, implicit-def dead $scc + %1026:sreg_64_xexec = S_OR_B64 %724, %1024, implicit-def dead $scc + %189.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %189.sub1, 0, %730, %1025, implicit $exec + %189.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %189.sub0, 0, %730, %1026, implicit $exec + %1027:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %380, 11, %189, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1028:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %188.sub1, 0, implicit $mode, implicit $exec + %1029:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %188.sub0, 0, implicit $mode, implicit $exec + %1030:sreg_64_xexec = S_OR_B64 %724, %1028, implicit-def dead $scc + %1031:sreg_64_xexec = S_OR_B64 %724, %1029, implicit-def dead $scc + %188.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %188.sub1, 0, %730, %1030, implicit $exec + %188.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %188.sub0, 0, %730, %1031, implicit $exec + %1032:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %381, 11, %188, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1033:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %187.sub1, 0, implicit $mode, implicit $exec + %1034:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %187.sub0, 0, implicit $mode, implicit $exec + %1035:sreg_64_xexec = S_OR_B64 %724, %1033, implicit-def dead $scc + %1036:sreg_64_xexec = S_OR_B64 %724, %1034, implicit-def dead $scc + %187.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %187.sub1, 0, %730, %1035, implicit $exec + %187.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %187.sub0, 0, %730, %1036, implicit $exec + %1037:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %382, 11, %187, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1038:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %174.sub1, 0, implicit $mode, implicit $exec + %1039:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %461, 0, %174.sub0, 0, implicit $mode, implicit $exec + %1040:sreg_64_xexec = S_OR_B64 %724, %1038, implicit-def dead $scc + %1041:sreg_64_xexec = S_OR_B64 %724, %1039, implicit-def dead $scc + %174.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %174.sub1, 0, %730, %1040, implicit $exec + %174.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %174.sub0, 0, %730, %1041, implicit $exec + %1042:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %383, 11, %174, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %1043.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %671.sub0, implicit $mode, implicit $exec + %1043.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %671.sub1, implicit $mode, implicit $exec + undef %1044.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %697.sub0, implicit $mode, implicit $exec + %1044.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %697.sub1, implicit $mode, implicit $exec + undef %1045.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %723.sub0, implicit $mode, implicit $exec + %1045.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %723.sub1, implicit $mode, implicit $exec + undef %1046.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %742.sub0, implicit $mode, implicit $exec + %1046.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %742.sub1, implicit $mode, implicit $exec + undef %1047.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %747.sub0, implicit $mode, implicit $exec + %1047.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %747.sub1, implicit $mode, implicit $exec + undef %1048.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %752.sub0, implicit $mode, implicit $exec + %1048.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %752.sub1, implicit $mode, implicit $exec + undef %1049.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %757.sub0, implicit $mode, implicit $exec + %1049.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %757.sub1, implicit $mode, implicit $exec + undef %1050.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %762.sub0, implicit $mode, implicit $exec + %1050.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %762.sub1, implicit $mode, implicit $exec + undef %1051.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %767.sub0, implicit $mode, implicit $exec + %1051.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %767.sub1, implicit $mode, implicit $exec + undef %1052.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %772.sub0, implicit $mode, implicit $exec + %1052.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %772.sub1, implicit $mode, implicit $exec + undef %1053.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %777.sub0, implicit $mode, implicit $exec + %1053.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %777.sub1, implicit $mode, implicit $exec + undef %1054.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %782.sub0, implicit $mode, implicit $exec + %1054.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %782.sub1, implicit $mode, implicit $exec + undef %1055.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %787.sub0, implicit $mode, implicit $exec + %1055.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %787.sub1, implicit $mode, implicit $exec + undef %1056.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %792.sub0, implicit $mode, implicit $exec + %1056.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %792.sub1, implicit $mode, implicit $exec + undef %1057.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %797.sub0, implicit $mode, implicit $exec + %1057.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %797.sub1, implicit $mode, implicit $exec + undef %1058.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %802.sub0, implicit $mode, implicit $exec + %1058.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %802.sub1, implicit $mode, implicit $exec + undef %1059.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %807.sub0, implicit $mode, implicit $exec + %1059.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %807.sub1, implicit $mode, implicit $exec + undef %1060.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %812.sub0, implicit $mode, implicit $exec + %1060.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %812.sub1, implicit $mode, implicit $exec + undef %1061.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %817.sub0, implicit $mode, implicit $exec + %1061.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %817.sub1, implicit $mode, implicit $exec + undef %1062.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %822.sub0, implicit $mode, implicit $exec + %1062.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %822.sub1, implicit $mode, implicit $exec + undef %1063.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %827.sub0, implicit $mode, implicit $exec + %1063.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %827.sub1, implicit $mode, implicit $exec + undef %1064.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %832.sub0, implicit $mode, implicit $exec + %1064.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %832.sub1, implicit $mode, implicit $exec + undef %1065.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %837.sub0, implicit $mode, implicit $exec + %1065.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %837.sub1, implicit $mode, implicit $exec + undef %1066.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %842.sub0, implicit $mode, implicit $exec + %1066.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %842.sub1, implicit $mode, implicit $exec + %1051.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %847.sub0, implicit $mode, implicit $exec + %1051.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %847.sub1, implicit $mode, implicit $exec + %1052.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %852.sub0, implicit $mode, implicit $exec + %1052.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %852.sub1, implicit $mode, implicit $exec + %1053.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %857.sub0, implicit $mode, implicit $exec + %1053.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %857.sub1, implicit $mode, implicit $exec + %1054.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %862.sub0, implicit $mode, implicit $exec + %1054.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %862.sub1, implicit $mode, implicit $exec + %1055.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %867.sub0, implicit $mode, implicit $exec + %1055.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %867.sub1, implicit $mode, implicit $exec + %1056.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %872.sub0, implicit $mode, implicit $exec + %1056.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %872.sub1, implicit $mode, implicit $exec + %1057.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %877.sub0, implicit $mode, implicit $exec + %1057.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %877.sub1, implicit $mode, implicit $exec + %1058.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %882.sub0, implicit $mode, implicit $exec + %1058.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %882.sub1, implicit $mode, implicit $exec + undef %1067.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %887.sub0, implicit $mode, implicit $exec + %1067.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %887.sub1, implicit $mode, implicit $exec + undef %1068.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %892.sub0, implicit $mode, implicit $exec + %1068.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %892.sub1, implicit $mode, implicit $exec + undef %1069.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %897.sub0, implicit $mode, implicit $exec + %1069.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %897.sub1, implicit $mode, implicit $exec + undef %1070.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %902.sub0, implicit $mode, implicit $exec + %1070.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %902.sub1, implicit $mode, implicit $exec + undef %1071.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %907.sub0, implicit $mode, implicit $exec + %1071.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %907.sub1, implicit $mode, implicit $exec + undef %1072.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %912.sub0, implicit $mode, implicit $exec + %1072.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %912.sub1, implicit $mode, implicit $exec + undef %1073.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %917.sub0, implicit $mode, implicit $exec + %1073.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %917.sub1, implicit $mode, implicit $exec + undef %1074.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %922.sub0, implicit $mode, implicit $exec + %1074.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %922.sub1, implicit $mode, implicit $exec + undef %1075.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %927.sub0, implicit $mode, implicit $exec + %1075.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %927.sub1, implicit $mode, implicit $exec + undef %1076.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %932.sub0, implicit $mode, implicit $exec + %1076.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %932.sub1, implicit $mode, implicit $exec + undef %1077.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %937.sub0, implicit $mode, implicit $exec + %1077.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %937.sub1, implicit $mode, implicit $exec + undef %1078.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %942.sub0, implicit $mode, implicit $exec + %1078.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %942.sub1, implicit $mode, implicit $exec + undef %1079.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %947.sub0, implicit $mode, implicit $exec + %1079.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %947.sub1, implicit $mode, implicit $exec + undef %1080.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %952.sub0, implicit $mode, implicit $exec + %1080.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %952.sub1, implicit $mode, implicit $exec + undef %1081.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %957.sub0, implicit $mode, implicit $exec + %1081.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %957.sub1, implicit $mode, implicit $exec + undef %1082.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %962.sub0, implicit $mode, implicit $exec + %1082.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %962.sub1, implicit $mode, implicit $exec + undef %1083.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %967.sub0, implicit $mode, implicit $exec + %1083.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %967.sub1, implicit $mode, implicit $exec + undef %1084.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %972.sub0, implicit $mode, implicit $exec + %1084.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %972.sub1, implicit $mode, implicit $exec + undef %1085.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %977.sub0, implicit $mode, implicit $exec + %1085.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %977.sub1, implicit $mode, implicit $exec + undef %1086.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %982.sub0, implicit $mode, implicit $exec + %1086.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %982.sub1, implicit $mode, implicit $exec + undef %1087.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %987.sub0, implicit $mode, implicit $exec + %1087.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %987.sub1, implicit $mode, implicit $exec + undef %1088.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %992.sub0, implicit $mode, implicit $exec + %1088.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %992.sub1, implicit $mode, implicit $exec + undef %1089.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %997.sub0, implicit $mode, implicit $exec + %1089.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %997.sub1, implicit $mode, implicit $exec + undef %1090.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1002.sub0, implicit $mode, implicit $exec + %1090.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1002.sub1, implicit $mode, implicit $exec + %1075.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1007.sub0, implicit $mode, implicit $exec + %1075.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1007.sub1, implicit $mode, implicit $exec + %1076.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1012.sub0, implicit $mode, implicit $exec + %1076.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1012.sub1, implicit $mode, implicit $exec + %1077.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1017.sub0, implicit $mode, implicit $exec + %1077.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1017.sub1, implicit $mode, implicit $exec + %1078.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1022.sub0, implicit $mode, implicit $exec + %1078.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1022.sub1, implicit $mode, implicit $exec + %1079.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1027.sub0, implicit $mode, implicit $exec + %1079.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1027.sub1, implicit $mode, implicit $exec + %1080.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1032.sub0, implicit $mode, implicit $exec + %1080.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1032.sub1, implicit $mode, implicit $exec + %1081.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1037.sub0, implicit $mode, implicit $exec + %1081.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1037.sub1, implicit $mode, implicit $exec + %1082.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1042.sub0, implicit $mode, implicit $exec + %1082.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1042.sub1, implicit $mode, implicit $exec + %1091:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %315, 8, %1043, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1092:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %314, 8, %1044, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1093:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %283, 8, %1045, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1094:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %282, 8, %1046, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1095:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %313, 8, %1047, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1096:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %312, 8, %1048, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1097:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %281, 8, %1049, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1098:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %280, 8, %1050, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1099:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %311, 8, %1051.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1100:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %310, 8, %1052.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1101:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %279, 8, %1053.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1102:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %278, 8, %1054.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1103:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %309, 8, %1055.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1104:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %308, 8, %1056.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1105:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %277, 8, %1057.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1106:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %276, 8, %1058.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1107:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %307, 8, %1059, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1108:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %306, 8, %1060, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1109:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %275, 8, %1061, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1110:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %274, 8, %1062, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1111:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %305, 8, %1063, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1112:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %304, 8, %1064, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1113:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %273, 8, %1065, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1114:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %272, 8, %1066, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1115:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %303, 8, %1051.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1116:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %302, 8, %1052.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1117:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %271, 8, %1053.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1118:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %270, 8, %1054.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1119:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %301, 8, %1055.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1120:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %300, 8, %1056.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1121:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %269, 8, %1057.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1122:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %268, 8, %1058.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1123:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %299, 8, %1067, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1124:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %298, 8, %1068, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1125:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %267, 8, %1069, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1126:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %266, 8, %1070, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1127:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %297, 8, %1071, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1128:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %296, 8, %1072, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1129:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %265, 8, %1073, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1130:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %264, 8, %1074, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1131:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %295, 8, %1075.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1132:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %294, 8, %1076.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1133:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %263, 8, %1077.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1134:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %262, 8, %1078.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1135:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %293, 8, %1079.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1136:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %292, 8, %1080.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1137:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %261, 8, %1081.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1138:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %260, 8, %1082.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1139:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %291, 8, %1083, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1140:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %290, 8, %1084, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1141:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %259, 8, %1085, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1142:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %258, 8, %1086, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1143:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %289, 8, %1087, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1144:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %288, 8, %1088, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1145:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %257, 8, %1089, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1146:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %256, 8, %1090, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1147:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %287, 8, %1075.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1148:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %286, 8, %1076.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1149:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %255, 8, %1077.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1150:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %254, 8, %1078.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1151:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %285, 8, %1079.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1152:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %284, 8, %1080.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1153:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %253, 8, %1081.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1154:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %173, 8, %1082.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %315:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1091, 8, %666, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %314:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1092, 8, %692, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %283:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1093, 8, %718, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %282:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1094, 8, %644, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %1155.sub0:sgpr_64 = COPY %644.sub1 + %313:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1095, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %312:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1096, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %281:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1097, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %280:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1098, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %311:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1099, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %310:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1100, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %279:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1101, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %278:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1102, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %309:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1103, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %308:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1104, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %258:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1142, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %289:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1143, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %288:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1144, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %257:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1145, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %256:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1146, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %287:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1147, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %286:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1148, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %255:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1149, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %254:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1150, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %285:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1151, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %284:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1152, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %253:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1153, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %173:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1154, 0, %1155, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1156:vgpr_32 = disjoint V_OR_B32_e32 %386, %108, implicit $exec + %1157:vgpr_32 = disjoint V_OR_B32_e32 %386, %109, implicit $exec + %1158:vgpr_32 = disjoint V_OR_B32_e32 %386, %110, implicit $exec + %1159:vgpr_32 = disjoint V_OR_B32_e32 %386, %111, implicit $exec + %1160:sreg_32 = S_LSHL_B32 %17, 12, implicit-def dead $scc + %1161:sreg_32 = exact S_LSHR_B32 %389, 4, implicit-def dead $scc + %1162:sreg_32 = S_ADD_I32 %1160, %1161, implicit-def dead $scc + %15:sreg_32 = S_ADD_I32 %1162, 32768, implicit-def dead $scc + %1163:vgpr_32 = V_LSHLREV_B32_e32 1, %1156, implicit $exec + $m0 = COPY %15 + BUFFER_LOAD_DWORDX4_LDS_OFFEN %1163, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_ADD_I32 %1162, 33824, implicit-def dead $scc + %1164:vgpr_32 = V_LSHLREV_B32_e32 1, %1157, implicit $exec + BUFFER_LOAD_DWORDX4_LDS_OFFEN %1164, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_ADD_I32 %1162, 34880, implicit-def dead $scc + %1165:vgpr_32 = V_LSHLREV_B32_e32 1, %1158, implicit $exec + BUFFER_LOAD_DWORDX4_LDS_OFFEN %1165, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + $m0 = S_ADD_I32 %1162, 35936, implicit-def dead $scc + %1166:vgpr_32 = V_LSHLREV_B32_e32 1, %1159, implicit $exec + BUFFER_LOAD_DWORDX4_LDS_OFFEN %1166, %7, 0, 0, 0, 0, 1, implicit $exec, implicit $m0 + %1167:vgpr_32 = nuw V_ADD_U32_e32 %319, %108, implicit $exec + undef %1168.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1167, 0, 0, implicit $exec + %1168.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1167, 512, 0, implicit $exec + undef %1169.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1167, 1056, 0, implicit $exec + %1169.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1167, 1568, 0, implicit $exec + undef %1170.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1167, 2112, 0, implicit $exec + %1170.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1167, 2624, 0, implicit $exec + undef %1171.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1167, 3168, 0, implicit $exec + %1171.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1167, 3680, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + undef %1172.sub0:vreg_128_align2 = COPY %1043.sub0 + %1172.sub1:vreg_128_align2 = COPY %1051.sub0 + %1172.sub2:vreg_128_align2 = COPY %1059.sub0 + %1172.sub3:vreg_128_align2 = COPY %1051.sub2 + DS_WRITE_B128_gfx9 %11, %1172, 0, 0, implicit $exec + %1051.sub0:vreg_128_align2 = COPY %1043.sub1 + %1051.sub2:vreg_128_align2 = COPY %1059.sub1 + DS_WRITE_B128_gfx9 %11, %1051, 16, 0, implicit $exec + undef %1173.sub0:vreg_128_align2 = COPY %1044.sub0 + %1173.sub1:vreg_128_align2 = COPY %1052.sub0 + %1173.sub2:vreg_128_align2 = COPY %1060.sub0 + %1173.sub3:vreg_128_align2 = COPY %1052.sub2 + DS_WRITE_B128_gfx9 %11, %1173, 32, 0, implicit $exec + %1052.sub0:vreg_128_align2 = COPY %1044.sub1 + %1052.sub2:vreg_128_align2 = COPY %1060.sub1 + DS_WRITE_B128_gfx9 %11, %1052, 48, 0, implicit $exec + undef %1174.sub0:vreg_128_align2 = COPY %1045.sub0 + %1174.sub1:vreg_128_align2 = COPY %1053.sub0 + %1174.sub2:vreg_128_align2 = COPY %1061.sub0 + %1174.sub3:vreg_128_align2 = COPY %1053.sub2 + DS_WRITE_B128_gfx9 %11, %1174, 64, 0, implicit $exec + %1053.sub0:vreg_128_align2 = COPY %1045.sub1 + %1053.sub2:vreg_128_align2 = COPY %1061.sub1 + DS_WRITE_B128_gfx9 %11, %1053, 80, 0, implicit $exec + undef %1175.sub0:vreg_128_align2 = COPY %1046.sub0 + %1175.sub1:vreg_128_align2 = COPY %1054.sub0 + %1175.sub2:vreg_128_align2 = COPY %1062.sub0 + %1175.sub3:vreg_128_align2 = COPY %1054.sub2 + DS_WRITE_B128_gfx9 %11, %1175, 96, 0, implicit $exec + %1054.sub0:vreg_128_align2 = COPY %1046.sub1 + %1054.sub2:vreg_128_align2 = COPY %1062.sub1 + DS_WRITE_B128_gfx9 %11, %1054, 112, 0, implicit $exec + undef %1176.sub0:vreg_128_align2 = COPY %1047.sub0 + %1176.sub1:vreg_128_align2 = COPY %1055.sub0 + %1176.sub2:vreg_128_align2 = COPY %1063.sub0 + %1176.sub3:vreg_128_align2 = COPY %1055.sub2 + DS_WRITE_B128_gfx9 %11, %1176, 128, 0, implicit $exec + %1055.sub0:vreg_128_align2 = COPY %1047.sub1 + %1055.sub2:vreg_128_align2 = COPY %1063.sub1 + DS_WRITE_B128_gfx9 %11, %1055, 144, 0, implicit $exec + undef %1177.sub0:vreg_128_align2 = COPY %1048.sub0 + %1177.sub1:vreg_128_align2 = COPY %1056.sub0 + %1177.sub2:vreg_128_align2 = COPY %1064.sub0 + %1177.sub3:vreg_128_align2 = COPY %1056.sub2 + DS_WRITE_B128_gfx9 %11, %1177, 160, 0, implicit $exec + %1056.sub0:vreg_128_align2 = COPY %1048.sub1 + %1056.sub2:vreg_128_align2 = COPY %1064.sub1 + DS_WRITE_B128_gfx9 %11, %1056, 176, 0, implicit $exec + undef %1178.sub0:vreg_128_align2 = COPY %1049.sub0 + %1178.sub1:vreg_128_align2 = COPY %1057.sub0 + %1178.sub2:vreg_128_align2 = COPY %1065.sub0 + %1178.sub3:vreg_128_align2 = COPY %1057.sub2 + DS_WRITE_B128_gfx9 %11, %1178, 192, 0, implicit $exec + %1057.sub0:vreg_128_align2 = COPY %1049.sub1 + %1057.sub2:vreg_128_align2 = COPY %1065.sub1 + DS_WRITE_B128_gfx9 %11, %1057, 208, 0, implicit $exec + undef %1179.sub0:vreg_128_align2 = COPY %1050.sub0 + %1179.sub1:vreg_128_align2 = COPY %1058.sub0 + %1179.sub2:vreg_128_align2 = COPY %1066.sub0 + %1179.sub3:vreg_128_align2 = COPY %1058.sub2 + DS_WRITE_B128_gfx9 %11, %1179, 224, 0, implicit $exec + %1058.sub0:vreg_128_align2 = COPY %1050.sub1 + %1058.sub2:vreg_128_align2 = COPY %1066.sub1 + DS_WRITE_B128_gfx9 %11, %1058, 240, 0, implicit $exec + WAVE_BARRIER + %1180:vreg_128_align2 = DS_READ_B128_gfx9 %150, 0, 0, implicit $exec + WAVE_BARRIER + %1079.sub0:vreg_128_align2 = COPY %1071.sub1 + %1079.sub2:vreg_128_align2 = COPY %1087.sub1 + DS_WRITE_B128_gfx9 %11, %1079, 144, 0, implicit $exec + undef %1181.sub0:vreg_128_align2 = COPY %1072.sub0 + %1181.sub1:vreg_128_align2 = COPY %1080.sub0 + %1181.sub2:vreg_128_align2 = COPY %1088.sub0 + %1181.sub3:vreg_128_align2 = COPY %1080.sub2 + DS_WRITE_B128_gfx9 %11, %1181, 160, 0, implicit $exec + %1080.sub0:vreg_128_align2 = COPY %1072.sub1 + %1080.sub2:vreg_128_align2 = COPY %1088.sub1 + DS_WRITE_B128_gfx9 %11, %1080, 176, 0, implicit $exec + undef %1182.sub0:vreg_128_align2 = COPY %1073.sub0 + %1182.sub1:vreg_128_align2 = COPY %1081.sub0 + %1182.sub2:vreg_128_align2 = COPY %1089.sub0 + %1182.sub3:vreg_128_align2 = COPY %1081.sub2 + DS_WRITE_B128_gfx9 %11, %1182, 192, 0, implicit $exec + %1081.sub0:vreg_128_align2 = COPY %1073.sub1 + %1081.sub2:vreg_128_align2 = COPY %1089.sub1 + DS_WRITE_B128_gfx9 %11, %1081, 208, 0, implicit $exec + undef %1183.sub0:vreg_128_align2 = COPY %1074.sub0 + %1183.sub1:vreg_128_align2 = COPY %1082.sub0 + %1183.sub2:vreg_128_align2 = COPY %1090.sub0 + %1183.sub3:vreg_128_align2 = COPY %1082.sub2 + DS_WRITE_B128_gfx9 %11, %1183, 224, 0, implicit $exec + %1082.sub0:vreg_128_align2 = COPY %1074.sub1 + %1082.sub2:vreg_128_align2 = COPY %1090.sub1 + DS_WRITE_B128_gfx9 %11, %1082, 240, 0, implicit $exec + WAVE_BARRIER + %1184:vreg_128_align2 = DS_READ_B128_gfx9 %150, 0, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1185:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %655, implicit $mode, implicit $exec + DS_WRITE_B16_gfx9 %151, %1185, 0, 0, implicit $exec + %1186:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %656, implicit $mode, implicit $exec + DS_WRITE_B16_gfx9 %151, %1186, 128, 0, implicit $exec + DS_WRITE_B16_gfx9 %151, %11, 4096, 0, implicit $exec + DS_WRITE_B16_gfx9 %151, %11, 4224, 0, implicit $exec + DS_WRITE_B16_gfx9 %166, %11, 20608, 0, implicit $exec + DS_WRITE_B16_gfx9 %166, %11, 24576, 0, implicit $exec + DS_WRITE_B16_gfx9 %166, %11, 24704, 0, implicit $exec + DS_WRITE_B16_gfx9 %166, %11, 28672, 0, implicit $exec + DS_WRITE_B16_gfx9 %166, %11, 28800, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + undef %1187.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %169, 0, 16, 0, implicit $exec + %1188:vgpr_32 = V_ADD_U32_e32 4096, %169, implicit $exec + undef %1189.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1188, 0, 16, 0, implicit $exec + %1190:vgpr_32 = V_ADD_U32_e32 8192, %169, implicit $exec + undef %1191.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1190, 0, 16, 0, implicit $exec + %1192:vgpr_32 = V_ADD_U32_e32 12288, %169, implicit $exec + undef %1193.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1192, 0, 16, 0, implicit $exec + %1194:vgpr_32 = V_ADD_U32_e32 16384, %169, implicit $exec + undef %1195.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1194, 0, 16, 0, implicit $exec + %1196:vgpr_32 = V_ADD_U32_e32 20480, %169, implicit $exec + undef %1197.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1196, 0, 16, 0, implicit $exec + %1198:vgpr_32 = V_ADD_U32_e32 24576, %169, implicit $exec + undef %1199.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1198, 0, 16, 0, implicit $exec + %1200:vgpr_32 = V_ADD_U32_e32 28672, %169, implicit $exec + undef %1201.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1200, 0, 16, 0, implicit $exec + undef %1202.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %170, 0, 16, 0, implicit $exec + %1203:vgpr_32 = V_ADD_U32_e32 4096, %170, implicit $exec + undef %1204.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1203, 0, 16, 0, implicit $exec + %1205:vgpr_32 = V_ADD_U32_e32 8192, %170, implicit $exec + undef %1206.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1205, 0, 16, 0, implicit $exec + %1207:vgpr_32 = V_ADD_U32_e32 12288, %170, implicit $exec + undef %1208.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1207, 0, 16, 0, implicit $exec + %1209:vgpr_32 = V_ADD_U32_e32 16384, %170, implicit $exec + undef %1210.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1209, 0, 16, 0, implicit $exec + %1211:vgpr_32 = V_ADD_U32_e32 20480, %170, implicit $exec + undef %1212.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1211, 0, 16, 0, implicit $exec + %1213:vgpr_32 = V_ADD_U32_e32 24576, %170, implicit $exec + undef %1214.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1213, 0, 16, 0, implicit $exec + %1215:vgpr_32 = V_ADD_U32_e32 28672, %170, implicit $exec + undef %1216.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1215, 0, 16, 0, implicit $exec + undef %1217.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %171, 0, 16, 0, implicit $exec + %1218:vgpr_32 = V_ADD_U32_e32 4096, %171, implicit $exec + undef %1219.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1218, 0, 16, 0, implicit $exec + %1220:vgpr_32 = V_ADD_U32_e32 8192, %171, implicit $exec + undef %1221.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1220, 0, 16, 0, implicit $exec + %1222:vgpr_32 = V_ADD_U32_e32 12288, %171, implicit $exec + undef %1223.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1222, 0, 16, 0, implicit $exec + %1224:vgpr_32 = V_ADD_U32_e32 16384, %171, implicit $exec + undef %1225.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1224, 0, 16, 0, implicit $exec + %1226:vgpr_32 = V_ADD_U32_e32 20480, %171, implicit $exec + undef %1227.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1226, 0, 16, 0, implicit $exec + %1228:vgpr_32 = V_ADD_U32_e32 24576, %171, implicit $exec + undef %1229.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1228, 0, 16, 0, implicit $exec + %1230:vgpr_32 = V_ADD_U32_e32 28672, %171, implicit $exec + undef %1231.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1230, 0, 16, 0, implicit $exec + undef %1232.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %172, 0, 16, 0, implicit $exec + %1233:vgpr_32 = V_ADD_U32_e32 4096, %172, implicit $exec + undef %1234.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1233, 0, 16, 0, implicit $exec + %1235:vgpr_32 = V_ADD_U32_e32 8192, %172, implicit $exec + undef %1236.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1235, 0, 16, 0, implicit $exec + %1237:vgpr_32 = V_ADD_U32_e32 12288, %172, implicit $exec + undef %1238.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1237, 0, 16, 0, implicit $exec + %1239:vgpr_32 = V_ADD_U32_e32 16384, %172, implicit $exec + undef %1240.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1239, 0, 16, 0, implicit $exec + %1241:vgpr_32 = V_ADD_U32_e32 20480, %172, implicit $exec + undef %1242.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1241, 0, 16, 0, implicit $exec + %1243:vgpr_32 = V_ADD_U32_e32 24576, %172, implicit $exec + undef %1244.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1243, 0, 16, 0, implicit $exec + %1245:vgpr_32 = V_ADD_U32_e32 28672, %172, implicit $exec + undef %1246.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1245, 0, 16, 0, implicit $exec + %1202.sub0:av_192_align2 = COPY %1187.sub0 + %1202.sub1:av_192_align2 = COPY %1187.sub1 + %1232.sub0:av_192_align2 = COPY %1217.sub0 + %1232.sub1:av_192_align2 = COPY %1217.sub1 + %1187.sub4:av_192_align2 = COPY %1202.sub4 + %1187.sub5:av_192_align2 = COPY %1202.sub5 + %1217.sub4:av_192_align2 = COPY %1232.sub4 + %1197.sub4:av_192_align2 = COPY %1212.sub4 + %1197.sub5:av_192_align2 = COPY %1212.sub5 + %1227.sub4:av_192_align2 = COPY %1242.sub4 + %1227.sub5:av_192_align2 = COPY %1242.sub5 + %1214.sub0:av_192_align2 = COPY %1199.sub0 + %1214.sub1:av_192_align2 = COPY %1199.sub1 + %1244.sub0:av_192_align2 = COPY %1229.sub0 + %1244.sub1:av_192_align2 = COPY %1229.sub1 + %1199.sub4:av_192_align2 = COPY %1214.sub4 + %1199.sub5:av_192_align2 = COPY %1214.sub5 + %1229.sub4:av_192_align2 = COPY %1244.sub4 + %1229.sub5:av_192_align2 = COPY %1244.sub5 + %1216.sub0:av_192_align2 = COPY %1201.sub0 + %1216.sub1:av_192_align2 = COPY %1201.sub1 + %1246.sub0:av_192_align2 = COPY %1231.sub0 + %1246.sub1:av_192_align2 = COPY %1231.sub1 + %1201.sub4:av_192_align2 = COPY %1216.sub4 + %1201.sub5:av_192_align2 = COPY %1216.sub5 + %1231.sub4:av_192_align2 = COPY %1246.sub4 + %1231.sub5:av_192_align2 = COPY %1246.sub5 + undef %1247.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %175.sub2_sub3, 0, %1180.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1247.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %175.sub0_sub1, 0, %1180.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1248:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1202.sub0_sub1_sub2_sub3, %1247, 0, 0, 0, implicit $mode, implicit $exec + %1249:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1232.sub0_sub1_sub2_sub3, %1248, 0, 0, 0, implicit $mode, implicit $exec + %1250:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1187.sub2_sub3_sub4_sub5, %1249, 0, 0, 0, implicit $mode, implicit $exec + %175:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1217.sub2_sub3_sub4_sub5, %1250, 0, 0, 0, implicit $mode, implicit $exec + undef %1251.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %176.sub2_sub3, 12, %1180.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1251.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %176.sub0_sub1, 12, %1180.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1252:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1204.sub0_sub1_sub2_sub3, %1251, 0, 0, 0, implicit $mode, implicit $exec + %1253:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1234.sub0_sub1_sub2_sub3, %1252, 0, 0, 0, implicit $mode, implicit $exec + %1254:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1189.sub2_sub3_sub4_sub5, %1253, 0, 0, 0, implicit $mode, implicit $exec + %176:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1219.sub2_sub3_sub4_sub5, %1254, 0, 0, 0, implicit $mode, implicit $exec + undef %1255.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %177.sub2_sub3, 0, %1180.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1255.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %177.sub0_sub1, 0, %1180.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1256:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1206.sub0_sub1_sub2_sub3, %1255, 0, 0, 0, implicit $mode, implicit $exec + %1257:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1236.sub0_sub1_sub2_sub3, %1256, 0, 0, 0, implicit $mode, implicit $exec + %1258:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1191.sub2_sub3_sub4_sub5, %1257, 0, 0, 0, implicit $mode, implicit $exec + %177:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1221.sub2_sub3_sub4_sub5, %1258, 0, 0, 0, implicit $mode, implicit $exec + undef %1259.sub0:vreg_64_align2 = COPY %1180.sub3 + undef %1260.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %178.sub2_sub3, 0, %1259, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1260.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %178.sub0_sub1, 0, %1259, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1261:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1208.sub0_sub1_sub2_sub3, %1260, 0, 0, 0, implicit $mode, implicit $exec + %1262:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1238.sub0_sub1_sub2_sub3, %1261, 0, 0, 0, implicit $mode, implicit $exec + %1263:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1193.sub2_sub3_sub4_sub5, %1262, 0, 0, 0, implicit $mode, implicit $exec + %178:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1223.sub2_sub3_sub4_sub5, %1263, 0, 0, 0, implicit $mode, implicit $exec + undef %1264.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %179.sub2_sub3, 0, %1184.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1264.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %179.sub0_sub1, 0, %1184.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1265:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1210.sub0_sub1_sub2_sub3, %1264, 0, 0, 0, implicit $mode, implicit $exec + %1266:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1240.sub0_sub1_sub2_sub3, %1265, 0, 0, 0, implicit $mode, implicit $exec + %1267:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1195.sub2_sub3_sub4_sub5, %1266, 0, 0, 0, implicit $mode, implicit $exec + %179:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1225.sub2_sub3_sub4_sub5, %1267, 0, 0, 0, implicit $mode, implicit $exec + undef %1268.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %180.sub2_sub3, 12, %1184.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1268.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %180.sub0_sub1, 12, %1184.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1269:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1212.sub0_sub1_sub2_sub3, %1268, 0, 0, 0, implicit $mode, implicit $exec + %1270:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1242.sub0_sub1_sub2_sub3, %1269, 0, 0, 0, implicit $mode, implicit $exec + %1271:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1197.sub2_sub3_sub4_sub5, %1270, 0, 0, 0, implicit $mode, implicit $exec + %180:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1227.sub2_sub3_sub4_sub5, %1271, 0, 0, 0, implicit $mode, implicit $exec + undef %1272.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %181.sub2_sub3, 0, %1184.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1272.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %181.sub0_sub1, 0, %1184.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1273:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1214.sub0_sub1_sub2_sub3, %1272, 0, 0, 0, implicit $mode, implicit $exec + %1274:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1244.sub0_sub1_sub2_sub3, %1273, 0, 0, 0, implicit $mode, implicit $exec + %1275:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1199.sub2_sub3_sub4_sub5, %1274, 0, 0, 0, implicit $mode, implicit $exec + %181:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1229.sub2_sub3_sub4_sub5, %1275, 0, 0, 0, implicit $mode, implicit $exec + undef %1276.sub0:vreg_64_align2 = COPY %1184.sub3 + undef %1277.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %182.sub2_sub3, 0, %1276, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1277.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %182.sub0_sub1, 0, %1276, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1278:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1168, %1216.sub0_sub1_sub2_sub3, %1277, 0, 0, 0, implicit $mode, implicit $exec + %1279:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1169, %1246.sub0_sub1_sub2_sub3, %1278, 0, 0, 0, implicit $mode, implicit $exec + %1280:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1170, %1201.sub2_sub3_sub4_sub5, %1279, 0, 0, 0, implicit $mode, implicit $exec + %182:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1171, %1231.sub2_sub3_sub4_sub5, %1280, 0, 0, 0, implicit $mode, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %34:sreg_32 = nuw nsw S_ADD_I32 %34, 1, implicit-def dead $scc + S_CMP_LG_U32 %34, 30, implicit-def $scc + S_CBRANCH_SCC1 %bb.1, implicit killed $scc + S_BRANCH %bb.2 + + bb.2: + %1281:sreg_64_xexec = V_CMP_GT_U32_e64 7, %23, implicit $exec + undef %1282.sub0:sgpr_128 = S_MOV_B32 0 + %1283:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %24, implicit $exec + %1284:vgpr_32 = exact V_LSHRREV_B32_e32 2, %26, implicit $exec + %1285:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 10, %25, implicit $exec + %1286:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 7, %22, implicit $exec + %1287:vgpr_32 = V_AND_B32_e32 2048, %1286, implicit $exec + %1288:vgpr_32 = disjoint V_OR3_B32_e64 %1285, %1287, %148, implicit $exec + %1289:vgpr_32 = V_AND_B32_e32 65535, %104, implicit $exec + undef %1290.sub0:vreg_128_align2 = V_LSHL_OR_B32_e64 %104, 16, %1289, implicit $exec + %1290.sub2:vreg_128_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1290.sub1:vreg_128_align2 = COPY %1290.sub0 + %1290.sub3:vreg_128_align2 = COPY %1290.sub2 + DS_WRITE_B128_gfx9 %1288, %1290, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1290, 256, 0, implicit $exec + %1290.sub1:vreg_128_align2 = V_AND_B32_e32 65535, %104, implicit $exec + %1290.sub3:vreg_128_align2 = COPY %1290.sub2 + DS_WRITE_B128_gfx9 %1288, %1290, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1290, 384, 0, implicit $exec + WAVE_BARRIER + %1291:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 9, %22, implicit $exec + %1292:vgpr_32 = V_AND_B32_e32 2048, %1291, implicit $exec + %1293:vgpr_32 = disjoint V_OR3_B32_e64 %1292, %136, %137, implicit $exec + %1294:vgpr_32 = disjoint V_OR_B32_e32 %1293, %134, implicit $exec + %1295:vreg_128_align2 = DS_READ_B128_gfx9 %1294, 0, 0, implicit $exec + %1296:vgpr_32 = V_LSHRREV_B32_e32 16, %1295.sub0, implicit $exec + %1297:vgpr_32 = V_LSHRREV_B32_e32 16, %1295.sub1, implicit $exec + %1298:vreg_128_align2 = DS_READ_B128_gfx9 %1294, 1024, 0, implicit $exec + %1299:vgpr_32 = V_LSHRREV_B32_e32 16, %1298.sub0, implicit $exec + %1300:vgpr_32 = V_LSHRREV_B32_e32 16, %1298.sub1, implicit $exec + %1301:sreg_32 = S_MOV_B32 576 + %1302:vgpr_32 = V_BITOP3_B32_e64 %1293, %1301, %134, 54, implicit $exec + %1303:vreg_128_align2 = DS_READ_B128_gfx9 %1302, 0, 0, implicit $exec + %1304:vgpr_32 = V_LSHRREV_B32_e32 16, %1303.sub0, implicit $exec + %1305:vgpr_32 = V_LSHRREV_B32_e32 16, %1303.sub1, implicit $exec + %1306:vreg_128_align2 = DS_READ_B128_gfx9 %1302, 1024, 0, implicit $exec + %1307:vgpr_32 = V_LSHRREV_B32_e32 16, %1306.sub0, implicit $exec + %1308:vgpr_32 = V_LSHRREV_B32_e32 16, %1306.sub1, implicit $exec + WAVE_BARRIER + %1282.sub1:sgpr_128 = COPY %1282.sub0 + %1282.sub2:sgpr_128 = COPY %1282.sub0 + %1282.sub3:sgpr_128 = COPY %1282.sub0 + %1309:av_128_align2 = COPY %1282 + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + %304.sub1:vreg_64_align2, dead %272.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %304.sub1, %272.sub1, implicit $exec + %303.sub0:vreg_64_align2, %271.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %303.sub0, %271.sub0, implicit $exec + %303.sub1:vreg_64_align2, dead %271.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %303.sub1, %271.sub1, implicit $exec + %302.sub0:vreg_64_align2, %270.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %302.sub0, %270.sub0, implicit $exec + %302.sub1:vreg_64_align2, dead %270.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %302.sub1, %270.sub1, implicit $exec + %301.sub0:vreg_64_align2, %269.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %301.sub0, %269.sub0, implicit $exec + %301.sub1:vreg_64_align2, dead %269.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %301.sub1, %269.sub1, implicit $exec + %300.sub0:vreg_64_align2, %268.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %300.sub0, %268.sub0, implicit $exec + %300.sub1:vreg_64_align2, dead %268.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %300.sub1, %268.sub1, implicit $exec + %299.sub0:vreg_64_align2, %267.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %299.sub0, %267.sub0, implicit $exec + %299.sub1:vreg_64_align2, dead %267.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %299.sub1, %267.sub1, implicit $exec + %298.sub0:vreg_64_align2, %266.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %298.sub0, %266.sub0, implicit $exec + %298.sub1:vreg_64_align2, dead %266.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %298.sub1, %266.sub1, implicit $exec + %297.sub0:vreg_64_align2, %265.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %297.sub0, %265.sub0, implicit $exec + %297.sub1:vreg_64_align2, dead %265.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %297.sub1, %265.sub1, implicit $exec + %296.sub0:vreg_64_align2, %264.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %296.sub0, %264.sub0, implicit $exec + %296.sub1:vreg_64_align2, dead %264.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %296.sub1, %264.sub1, implicit $exec + %295.sub0:vreg_64_align2, %263.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %295.sub0, %263.sub0, implicit $exec + %295.sub1:vreg_64_align2, dead %263.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %295.sub1, %263.sub1, implicit $exec + %294.sub0:vreg_64_align2, %262.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %294.sub0, %262.sub0, implicit $exec + %294.sub1:vreg_64_align2, dead %262.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %294.sub1, %262.sub1, implicit $exec + %293.sub0:vreg_64_align2, %261.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %293.sub0, %261.sub0, implicit $exec + %293.sub1:vreg_64_align2, dead %261.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %293.sub1, %261.sub1, implicit $exec + %292.sub0:vreg_64_align2, %260.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %292.sub0, %260.sub0, implicit $exec + %292.sub1:vreg_64_align2, dead %260.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %292.sub1, %260.sub1, implicit $exec + %291.sub0:vreg_64_align2, %259.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %291.sub0, %259.sub0, implicit $exec + %291.sub1:vreg_64_align2, dead %259.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %291.sub1, %259.sub1, implicit $exec + %290.sub0:vreg_64_align2, %258.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %290.sub0, %258.sub0, implicit $exec + %290.sub1:vreg_64_align2, dead %258.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %290.sub1, %258.sub1, implicit $exec + %1310:vgpr_32 = COPY %289.sub0 + %1310:vgpr_32, %257.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %1310, %257.sub0, implicit $exec + %289.sub1:vreg_64_align2, dead %257.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %289.sub1, %257.sub1, implicit $exec + %288.sub0:vreg_64_align2, %256.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %288.sub0, %256.sub0, implicit $exec + %288.sub1:vreg_64_align2, dead %256.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %288.sub1, %256.sub1, implicit $exec + %287.sub0:vreg_64_align2, %255.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %287.sub0, %255.sub0, implicit $exec + %287.sub1:vreg_64_align2, dead %255.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %287.sub1, %255.sub1, implicit $exec + %286.sub0:vreg_64_align2, %254.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %286.sub0, %254.sub0, implicit $exec + %286.sub1:vreg_64_align2, dead %254.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %286.sub1, %254.sub1, implicit $exec + %285.sub0:vreg_64_align2, %253.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %285.sub0, %253.sub0, implicit $exec + %285.sub1:vreg_64_align2, dead %253.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %285.sub1, %253.sub1, implicit $exec + %284.sub0:vreg_64_align2, %173.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %284.sub0, %173.sub0, implicit $exec + %284.sub1:vreg_64_align2, dead %173.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %284.sub1, %173.sub1, implicit $exec + undef %1311.sub0:vreg_64_align2 = COPY %310.sub0 + %1311.sub1:vreg_64_align2 = COPY %306.sub0 + %306.sub0:vreg_64_align2 = COPY %310.sub1 + undef %1312.sub0:vreg_64_align2 = COPY %309.sub0 + %1312.sub1:vreg_64_align2 = COPY %305.sub0 + %305.sub0:vreg_64_align2 = COPY %309.sub1 + %289.sub0:vreg_64_align2 = COPY %293.sub1 + undef %1313.sub0:vreg_64_align2 = COPY %292.sub0 + %1313.sub1:vreg_64_align2 = COPY %288.sub0 + %288.sub0:vreg_64_align2 = COPY %292.sub1 + %251.sub0:vreg_64_align2, dead %247.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %251.sub0, %247.sub0, implicit $exec + %252.sub0:vreg_64_align2, dead %248.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %252.sub0, %248.sub0, implicit $exec + %249.sub0:vreg_64_align2, %246.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %249.sub0, %246.sub0, implicit $exec + %250.sub0:vreg_64_align2, dead %246.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %250.sub0, %246.sub1, implicit $exec + %245.sub0:vreg_64_align2, %243.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %245.sub0, %243.sub0, implicit $exec + %245.sub1:vreg_64_align2, dead %243.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %245.sub1, %243.sub1, implicit $exec + %244.sub0:vreg_64_align2, %242.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %244.sub0, %242.sub0, implicit $exec + %244.sub1:vreg_64_align2, dead %242.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %244.sub1, %242.sub1, implicit $exec + %241.sub0:vreg_64_align2, %239.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %241.sub0, %239.sub0, implicit $exec + %241.sub1:vreg_64_align2, dead %239.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %241.sub1, %239.sub1, implicit $exec + %240.sub0:vreg_64_align2, %238.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %240.sub0, %238.sub0, implicit $exec + %240.sub1:vreg_64_align2, dead %238.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %240.sub1, %238.sub1, implicit $exec + %237.sub0:vreg_64_align2, %235.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %237.sub0, %235.sub0, implicit $exec + %237.sub1:vreg_64_align2, dead %235.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %237.sub1, %235.sub1, implicit $exec + %236.sub0:vreg_64_align2, %234.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %236.sub0, %234.sub0, implicit $exec + %236.sub1:vreg_64_align2, dead %234.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %236.sub1, %234.sub1, implicit $exec + %233.sub0:vreg_64_align2, %231.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %233.sub0, %231.sub0, implicit $exec + %233.sub1:vreg_64_align2, dead %231.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %233.sub1, %231.sub1, implicit $exec + %232.sub0:vreg_64_align2, %230.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %232.sub0, %230.sub0, implicit $exec + %232.sub1:vreg_64_align2, dead %230.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %232.sub1, %230.sub1, implicit $exec + %229.sub0:vreg_64_align2, %227.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %229.sub0, %227.sub0, implicit $exec + %229.sub1:vreg_64_align2, dead %227.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %229.sub1, %227.sub1, implicit $exec + %192.sub1:vreg_64_align2, dead %190.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %192.sub1, %190.sub1, implicit $exec + %189.sub0:vreg_64_align2, %187.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %189.sub0, %187.sub0, implicit $exec + %189.sub1:vreg_64_align2, dead %187.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %189.sub1, %187.sub1, implicit $exec + %188.sub0:vreg_64_align2, %174.sub0:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %188.sub0, %174.sub0, implicit $exec + %188.sub1:vreg_64_align2, dead %174.sub1:vreg_64_align2 = V_PERMLANE32_SWAP_B32_e32 %188.sub1, %174.sub1, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1314:vgpr_32 = V_AND_B32_e32 65535, %185, implicit $exec + %1290.sub0:vreg_128_align2 = V_LSHL_OR_B32_e64 %185, 16, %1314, implicit $exec + %1290.sub1:vreg_128_align2 = COPY %1290.sub0 + %1290.sub3:vreg_128_align2 = COPY %1290.sub2 + DS_WRITE_B128_gfx9 %1288, %1290, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1290, 256, 0, implicit $exec + %1290.sub1:vreg_128_align2 = V_AND_B32_e32 65535, %185, implicit $exec + %1290.sub3:vreg_128_align2 = COPY %1290.sub2 + DS_WRITE_B128_gfx9 %1288, %1290, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1290, 384, 0, implicit $exec + WAVE_BARRIER + %1315:vreg_128_align2 = DS_READ_B128_gfx9 %1294, 0, 0, implicit $exec + %1316:vgpr_32 = V_LSHRREV_B32_e32 16, %1315.sub0, implicit $exec + %1317:vgpr_32 = V_LSHRREV_B32_e32 16, %1315.sub1, implicit $exec + %1318:vreg_128_align2 = DS_READ_B128_gfx9 %1294, 1024, 0, implicit $exec + %1319:vgpr_32 = V_LSHRREV_B32_e32 16, %1318.sub0, implicit $exec + %1320:vgpr_32 = V_LSHRREV_B32_e32 16, %1318.sub1, implicit $exec + %1321:vreg_128_align2 = DS_READ_B128_gfx9 %1302, 0, 0, implicit $exec + %1322:vgpr_32 = V_LSHRREV_B32_e32 16, %1321.sub0, implicit $exec + %1323:vgpr_32 = V_LSHRREV_B32_e32 16, %1321.sub1, implicit $exec + %1324:vreg_128_align2 = DS_READ_B128_gfx9 %1302, 1024, 0, implicit $exec + %1325:vgpr_32 = V_LSHRREV_B32_e32 16, %1324.sub0, implicit $exec + %1326:vgpr_32 = V_LSHRREV_B32_e32 16, %1324.sub1, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + %1327:vgpr_32 = V_AND_B32_e32 31, %22, implicit $exec + %1328:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 6, %1327, implicit $exec + %1329:vgpr_32 = nuw V_ADD_U32_e32 %12, %1328, implicit $exec + %1330:vreg_128_align2 = DS_READ_B128_gfx9 %1329, 0, 0, implicit $exec + %1331:vgpr_32 = disjoint V_OR_B32_e32 16, %1328, implicit $exec + %1332:vgpr_32 = nuw V_ADD_U32_e32 %12, %1331, implicit $exec + %1333:vreg_128_align2 = DS_READ_B128_gfx9 %1332, 0, 0, implicit $exec + %1334:vgpr_32 = disjoint V_OR_B32_e32 32, %1328, implicit $exec + %1335:vgpr_32 = nuw V_ADD_U32_e32 %12, %1334, implicit $exec + %1336:vreg_128_align2 = DS_READ_B128_gfx9 %1335, 0, 0, implicit $exec + %1337:vgpr_32 = disjoint V_OR_B32_e32 48, %1328, implicit $exec + %1338:vgpr_32 = nuw V_ADD_U32_e32 %12, %1337, implicit $exec + %1339:vreg_128_align2 = DS_READ_B128_gfx9 %1338, 0, 0, implicit $exec + %1340:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1340:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1330.sub0, %1340, implicit $exec + %1340:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1330.sub1, %1340, implicit $exec + %1340:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1330.sub2, %1340, implicit $exec + %1340:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1330.sub3, %1340, implicit $exec + %1341:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1341:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1333.sub0, %1341, implicit $exec + %1341:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1333.sub1, %1341, implicit $exec + %1341:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1333.sub2, %1341, implicit $exec + %1341:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1333.sub3, %1341, implicit $exec + %1342:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1342:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1336.sub0, %1342, implicit $exec + %1342:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1336.sub1, %1342, implicit $exec + %1342:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1336.sub2, %1342, implicit $exec + %1342:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1336.sub3, %1342, implicit $exec + %1343:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1343:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1339.sub0, %1343, implicit $exec + %1343:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1339.sub1, %1343, implicit $exec + %1343:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1339.sub2, %1343, implicit $exec + %1343:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1339.sub3, %1343, implicit $exec + %1344:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1344:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1330.sub0, %1344, implicit $exec + %1344:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1330.sub1, %1344, implicit $exec + %1344:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1330.sub2, %1344, implicit $exec + %1344:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1330.sub3, %1344, implicit $exec + %1345:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1345:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1333.sub0, %1345, implicit $exec + %1345:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1333.sub1, %1345, implicit $exec + %1345:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1333.sub2, %1345, implicit $exec + %1345:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1333.sub3, %1345, implicit $exec + %1346:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1346:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1336.sub0, %1346, implicit $exec + %1346:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1336.sub1, %1346, implicit $exec + %1346:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1336.sub2, %1346, implicit $exec + %1346:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1336.sub3, %1346, implicit $exec + %1347:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1347:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1339.sub0, %1347, implicit $exec + %1347:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1339.sub1, %1347, implicit $exec + %1347:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1339.sub2, %1347, implicit $exec + %1347:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1339.sub3, %1347, implicit $exec + %1348:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1348:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1330.sub0, %1348, implicit $exec + %1348:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1330.sub1, %1348, implicit $exec + %1348:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1330.sub2, %1348, implicit $exec + %1348:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1330.sub3, %1348, implicit $exec + %1349:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1349:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1333.sub0, %1349, implicit $exec + %1349:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1333.sub1, %1349, implicit $exec + %1349:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1333.sub2, %1349, implicit $exec + %1349:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1333.sub3, %1349, implicit $exec + %1350:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1350:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1336.sub0, %1350, implicit $exec + %1350:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1336.sub1, %1350, implicit $exec + %1350:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1336.sub2, %1350, implicit $exec + %1350:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1336.sub3, %1350, implicit $exec + %1351:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1351:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1339.sub0, %1351, implicit $exec + %1351:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1339.sub1, %1351, implicit $exec + %1351:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1339.sub2, %1351, implicit $exec + %1351:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1339.sub3, %1351, implicit $exec + %1352:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1352:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1330.sub0, %1352, implicit $exec + %1352:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1330.sub1, %1352, implicit $exec + %1352:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1330.sub2, %1352, implicit $exec + %1352:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1330.sub3, %1352, implicit $exec + %1353:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1353:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1333.sub0, %1353, implicit $exec + %1353:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1333.sub1, %1353, implicit $exec + %1353:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1333.sub2, %1353, implicit $exec + %1353:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1333.sub3, %1353, implicit $exec + %1354:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1354:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1336.sub0, %1354, implicit $exec + %1354:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1336.sub1, %1354, implicit $exec + %1354:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1336.sub2, %1354, implicit $exec + %1354:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1336.sub3, %1354, implicit $exec + %1355:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1355:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1339.sub0, %1355, implicit $exec + %1355:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1339.sub1, %1355, implicit $exec + %1355:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1339.sub2, %1355, implicit $exec + %1355:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1339.sub3, %1355, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1356:sreg_32 = S_LSHL_B32 %186, 8, implicit-def dead $scc + %1357:sreg_32 = S_AND_B32 %186, 255, implicit-def dead $scc + %1358:sreg_32 = S_OR_B32 %1357, %1356, implicit-def dead $scc + %1359:sreg_32 = S_LSHL_B32 %1358, 16, implicit-def dead $scc + %1360:sreg_32 = S_AND_B32 %1358, 65535, implicit-def dead $scc + undef %1361.sub0:sgpr_128 = S_OR_B32 %1360, %1359, implicit-def dead $scc + %1361.sub1:sgpr_128 = COPY %1282.sub0 + %1361.sub2:sgpr_128 = COPY %1282.sub0 + %1361.sub3:sgpr_128 = COPY %1282.sub0 + %1362:av_128_align2 = COPY %1361 + DS_WRITE_B128_gfx9 %1288, %1362, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1362, 256, 0, implicit $exec + %1363:sreg_32 = S_LSHL_B32 %1357, 16, implicit-def dead $scc + undef %1364.sub0:sgpr_128 = S_OR_B32 %1360, %1363, implicit-def dead $scc + %1364.sub1:sgpr_128 = COPY %1282.sub0 + %1364.sub2:sgpr_128 = COPY %1282.sub0 + %1364.sub3:sgpr_128 = COPY %1282.sub0 + %1365:av_128_align2 = COPY %1364 + DS_WRITE_B128_gfx9 %1288, %1365, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1365, 384, 0, implicit $exec + WAVE_BARRIER + %1366:vgpr_32 = DS_READ_I8_gfx9 %1294, 0, 0, implicit $exec + %1367:vgpr_32 = DS_READ_I8_gfx9 %1294, 1, 0, implicit $exec + %1368:vgpr_32 = DS_READ_I8_gfx9 %1294, 2, 0, implicit $exec + %1369:vgpr_32 = DS_READ_I8_gfx9 %1294, 3, 0, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + %1370:vgpr_32 = V_SUB_U32_e32 %1340, %1366, implicit $exec + %1371:vgpr_32 = V_SUB_U32_e32 %1341, %1366, implicit $exec + %1372:vgpr_32 = V_SUB_U32_e32 %1342, %1366, implicit $exec + %1373:vgpr_32 = V_SUB_U32_e32 %1343, %1366, implicit $exec + %1374:vgpr_32 = V_SUB_U32_e32 %1344, %1367, implicit $exec + %1375:vgpr_32 = V_SUB_U32_e32 %1345, %1367, implicit $exec + %1376:vgpr_32 = V_SUB_U32_e32 %1346, %1367, implicit $exec + %1377:vgpr_32 = V_SUB_U32_e32 %1347, %1367, implicit $exec + %1378:vgpr_32 = V_SUB_U32_e32 %1348, %1368, implicit $exec + %1379:vgpr_32 = V_SUB_U32_e32 %1349, %1368, implicit $exec + %1380:vgpr_32 = V_SUB_U32_e32 %1350, %1368, implicit $exec + %1381:vgpr_32 = V_SUB_U32_e32 %1351, %1368, implicit $exec + %1382:vgpr_32 = V_SUB_U32_e32 %1352, %1369, implicit $exec + %1383:vgpr_32 = V_SUB_U32_e32 %1353, %1369, implicit $exec + %1384:vgpr_32 = V_SUB_U32_e32 %1354, %1369, implicit $exec + %1385:vgpr_32 = V_SUB_U32_e32 %1355, %1369, implicit $exec + %1386:vgpr_32 = V_CVT_F32_I32_e32 %1370, implicit $mode, implicit $exec + %1387:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1386, implicit $mode, implicit $exec + %1388:vgpr_32 = V_CVT_F32_I32_e32 %1371, implicit $mode, implicit $exec + %1389:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1388, implicit $mode, implicit $exec + %1390:vgpr_32 = V_CVT_F32_I32_e32 %1372, implicit $mode, implicit $exec + %1391:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1390, implicit $mode, implicit $exec + %1392:vgpr_32 = V_CVT_F32_I32_e32 %1373, implicit $mode, implicit $exec + %1393:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1392, implicit $mode, implicit $exec + %1394:vgpr_32 = V_CVT_F32_I32_e32 %1374, implicit $mode, implicit $exec + %1395:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1394, implicit $mode, implicit $exec + %1396:vgpr_32 = V_CVT_F32_I32_e32 %1375, implicit $mode, implicit $exec + %1397:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1396, implicit $mode, implicit $exec + %1398:vgpr_32 = V_CVT_F32_I32_e32 %1376, implicit $mode, implicit $exec + %1399:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1398, implicit $mode, implicit $exec + %1400:vgpr_32 = V_CVT_F32_I32_e32 %1377, implicit $mode, implicit $exec + %1401:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1400, implicit $mode, implicit $exec + %1402:vgpr_32 = V_CVT_F32_I32_e32 %1378, implicit $mode, implicit $exec + %1403:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1402, implicit $mode, implicit $exec + %1404:vgpr_32 = V_CVT_F32_I32_e32 %1379, implicit $mode, implicit $exec + %1405:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1404, implicit $mode, implicit $exec + %1406:vgpr_32 = V_CVT_F32_I32_e32 %1380, implicit $mode, implicit $exec + %1407:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1406, implicit $mode, implicit $exec + %1408:vgpr_32 = V_CVT_F32_I32_e32 %1381, implicit $mode, implicit $exec + %1409:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1408, implicit $mode, implicit $exec + %1410:vgpr_32 = V_CVT_F32_I32_e32 %1382, implicit $mode, implicit $exec + %1411:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1410, implicit $mode, implicit $exec + %1412:vgpr_32 = V_CVT_F32_I32_e32 %1383, implicit $mode, implicit $exec + %1413:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1412, implicit $mode, implicit $exec + %1414:vgpr_32 = V_CVT_F32_I32_e32 %1384, implicit $mode, implicit $exec + %1415:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1414, implicit $mode, implicit $exec + %1416:vgpr_32 = V_CVT_F32_I32_e32 %1385, implicit $mode, implicit $exec + %1417:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %1416, implicit $mode, implicit $exec + %1418:vgpr_32 = nofpexcept V_MUL_F16_e32 %1315.sub0, %1387, implicit $mode, implicit $exec + %1419:vgpr_32 = nofpexcept V_MUL_F16_e32 %1318.sub0, %1389, implicit $mode, implicit $exec + %1420:vgpr_32 = nofpexcept V_MUL_F16_e32 %1321.sub0, %1391, implicit $mode, implicit $exec + %1421:vgpr_32 = nofpexcept V_MUL_F16_e32 %1324.sub0, %1393, implicit $mode, implicit $exec + %1422:vgpr_32 = nofpexcept V_MUL_F16_e32 %1316, %1395, implicit $mode, implicit $exec + %1423:vgpr_32 = nofpexcept V_MUL_F16_e32 %1319, %1397, implicit $mode, implicit $exec + %1424:vgpr_32 = nofpexcept V_MUL_F16_e32 %1322, %1399, implicit $mode, implicit $exec + %1425:vgpr_32 = nofpexcept V_MUL_F16_e32 %1325, %1401, implicit $mode, implicit $exec + %1426:vgpr_32 = nofpexcept V_MUL_F16_e32 %1315.sub1, %1403, implicit $mode, implicit $exec + %1427:vgpr_32 = nofpexcept V_MUL_F16_e32 %1318.sub1, %1405, implicit $mode, implicit $exec + %1428:vgpr_32 = nofpexcept V_MUL_F16_e32 %1321.sub1, %1407, implicit $mode, implicit $exec + %1429:vgpr_32 = nofpexcept V_MUL_F16_e32 %1324.sub1, %1409, implicit $mode, implicit $exec + %1430:vgpr_32 = nofpexcept V_MUL_F16_e32 %1317, %1411, implicit $mode, implicit $exec + %1431:vgpr_32 = nofpexcept V_MUL_F16_e32 %1320, %1413, implicit $mode, implicit $exec + %1432:vgpr_32 = nofpexcept V_MUL_F16_e32 %1323, %1415, implicit $mode, implicit $exec + %1433:vgpr_32 = nofpexcept V_MUL_F16_e32 %1326, %1417, implicit $mode, implicit $exec + %1434:sgpr_32 = S_MOV_B32 1069066811 + %1435:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1418, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1436:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1419, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1437:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1420, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1438:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1421, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1439:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1422, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1440:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1423, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1441:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1424, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1442:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1425, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1443:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1426, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1444:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1427, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1445:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1428, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %1446.sub0:vreg_64_align2 = nofpexcept V_FMA_MIX_F32 8, %1429, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1447:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1430, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1448:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1431, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1449:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1432, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1450:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %1433, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %1451:vgpr_32 = V_MOV_B32_e32 -8388608, implicit $exec + %1446.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %1451, 0, %1447, %1283, implicit $exec + %1452:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %1448, %1283, implicit $exec + %1453:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %1449, %1283, implicit $exec + %1454:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %1450, %1283, implicit $exec + %1455:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1447, %1283, implicit $exec + %1456:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1448, %1283, implicit $exec + %1457:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1449, %1283, implicit $exec + %1458:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1450, %1283, implicit $exec + %1459:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1435, 0, %1436, 0, %1436, 0, 0, implicit $exec + %1460:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1459, 0, %1437, 0, %1438, 0, 0, implicit $exec + %1461:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1439, 0, %1440, 0, %1440, 0, 0, implicit $exec + %1462:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1461, 0, %1441, 0, %1442, 0, 0, implicit $exec + %1463:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1443, 0, %1444, 0, %1444, 0, 0, implicit $exec + %1464:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1463, 0, %1445, 0, %1446.sub0, 0, 0, implicit $exec + %1465:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1455, 0, %1456, 0, %1456, 0, 0, implicit $exec + %1466:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1465, 0, %1457, 0, %1458, 0, 0, implicit $exec + %1467:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 2, %22, implicit $exec + %1468:vgpr_32 = V_XOR_B32_e32 64, %1467, implicit $exec + %1469:vgpr_32 = DS_BPERMUTE_B32 %1468, %1460, 0, implicit $exec + %1470:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1460, 0, %1469, 0, %1469, 0, 0, implicit $exec + %1471:vgpr_32 = COPY %1470 + %1471:vgpr_32 = V_MOV_B32_dpp %1471, %1471, 296, 15, 15, 0, implicit $exec + %1472:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1470, 0, %1471, 0, %1471, 0, 0, implicit $exec + %1473:vgpr_32 = COPY %1472 + %1473:vgpr_32 = V_MOV_B32_dpp %1473, %1473, 321, 15, 15, 0, implicit $exec + %1473:vgpr_32 = V_MOV_B32_dpp %1473, %1473, 27, 15, 15, 0, implicit $exec + %1474:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1472, 0, %1473, 0, %1473, 0, 0, implicit $exec + %1475:vgpr_32 = COPY %1474 + %1475:vgpr_32 = V_MOV_B32_dpp %1475, %1475, 78, 15, 15, 0, implicit $exec + %1476:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1474, 0, %1475, 0, %1475, 0, 0, implicit $exec + %1477:vgpr_32 = COPY %1476 + %1477:vgpr_32 = V_MOV_B32_dpp %1477, %1477, 177, 15, 15, 0, implicit $exec + %1478:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1476, 0, %1477, 0, %1477, 0, 0, implicit $exec + %1479:vgpr_32 = DS_BPERMUTE_B32 %1468, %1462, 0, implicit $exec + %1480:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1462, 0, %1479, 0, %1479, 0, 0, implicit $exec + %1481:vgpr_32 = COPY %1480 + %1481:vgpr_32 = V_MOV_B32_dpp %1481, %1481, 296, 15, 15, 0, implicit $exec + %1482:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1480, 0, %1481, 0, %1481, 0, 0, implicit $exec + %1483:vgpr_32 = COPY %1482 + %1483:vgpr_32 = V_MOV_B32_dpp %1483, %1483, 321, 15, 15, 0, implicit $exec + %1483:vgpr_32 = V_MOV_B32_dpp %1483, %1483, 27, 15, 15, 0, implicit $exec + %1484:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1482, 0, %1483, 0, %1483, 0, 0, implicit $exec + %1485:vgpr_32 = COPY %1484 + %1485:vgpr_32 = V_MOV_B32_dpp %1485, %1485, 78, 15, 15, 0, implicit $exec + %1486:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1484, 0, %1485, 0, %1485, 0, 0, implicit $exec + %1487:vgpr_32 = COPY %1486 + %1487:vgpr_32 = V_MOV_B32_dpp %1487, %1487, 177, 15, 15, 0, implicit $exec + %1488:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1486, 0, %1487, 0, %1487, 0, 0, implicit $exec + %1489:vgpr_32 = DS_BPERMUTE_B32 %1468, %1464, 0, implicit $exec + %1490:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1464, 0, %1489, 0, %1489, 0, 0, implicit $exec + %1491:vgpr_32 = COPY %1490 + %1491:vgpr_32 = V_MOV_B32_dpp %1491, %1491, 296, 15, 15, 0, implicit $exec + %1492:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1490, 0, %1491, 0, %1491, 0, 0, implicit $exec + %1493:vgpr_32 = COPY %1492 + %1493:vgpr_32 = V_MOV_B32_dpp %1493, %1493, 321, 15, 15, 0, implicit $exec + %1493:vgpr_32 = V_MOV_B32_dpp %1493, %1493, 27, 15, 15, 0, implicit $exec + %1494:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1492, 0, %1493, 0, %1493, 0, 0, implicit $exec + %1495:vgpr_32 = COPY %1494 + %1495:vgpr_32 = V_MOV_B32_dpp %1495, %1495, 78, 15, 15, 0, implicit $exec + %1496:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1494, 0, %1495, 0, %1495, 0, 0, implicit $exec + %1497:vgpr_32 = COPY %1496 + %1497:vgpr_32 = V_MOV_B32_dpp %1497, %1497, 177, 15, 15, 0, implicit $exec + %1498:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1496, 0, %1497, 0, %1497, 0, 0, implicit $exec + %1499:vgpr_32 = DS_BPERMUTE_B32 %1468, %1466, 0, implicit $exec + %1500:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1466, 0, %1499, 0, %1499, 0, 0, implicit $exec + %1501:vgpr_32 = COPY %1500 + %1501:vgpr_32 = V_MOV_B32_dpp %1501, %1501, 296, 15, 15, 0, implicit $exec + %1502:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1500, 0, %1501, 0, %1501, 0, 0, implicit $exec + %1503:vgpr_32 = COPY %1502 + %1503:vgpr_32 = V_MOV_B32_dpp %1503, %1503, 321, 15, 15, 0, implicit $exec + %1503:vgpr_32 = V_MOV_B32_dpp %1503, %1503, 27, 15, 15, 0, implicit $exec + %1504:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1502, 0, %1503, 0, %1503, 0, 0, implicit $exec + %1505:vgpr_32 = COPY %1504 + %1505:vgpr_32 = V_MOV_B32_dpp %1505, %1505, 78, 15, 15, 0, implicit $exec + %1506:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1504, 0, %1505, 0, %1505, 0, 0, implicit $exec + %1507:vgpr_32 = COPY %1506 + %1507:vgpr_32 = V_MOV_B32_dpp %1507, %1507, 177, 15, 15, 0, implicit $exec + %1508:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1506, 0, %1507, 0, %1507, 0, 0, implicit $exec + undef %1509.sub1:vreg_64_align2 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1510:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1509.sub1:vreg_64_align2 = V_MOV_B32_dpp %1509.sub1, %1510, 296, 15, 15, 0, implicit $exec + %1511:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1478, 0, %251.sub0, 0, implicit $mode, implicit $exec + %1512:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1488, 0, %252.sub0, 0, implicit $mode, implicit $exec + %1513:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1498, 0, %249.sub0, 0, implicit $mode, implicit $exec + %1514:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1508, 0, %250.sub0, 0, implicit $mode, implicit $exec + %1515:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %1478, 0, %1478, 0, implicit $mode, implicit $exec + %1516:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %1488, 0, %1488, 0, implicit $mode, implicit $exec + %1517:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %1498, 0, %1498, 0, implicit $mode, implicit $exec + %1518:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %1508, 0, %1508, 0, implicit $mode, implicit $exec + %1519:sreg_64_xexec = S_OR_B64 %1515, %1511, implicit-def dead $scc + %1520:vgpr_32 = V_CNDMASK_B32_e64 0, %251.sub0, 0, %1478, %1519, implicit $exec + %1521:sreg_64_xexec = S_OR_B64 %1516, %1512, implicit-def dead $scc + %1522:vgpr_32 = V_CNDMASK_B32_e64 0, %252.sub0, 0, %1488, %1521, implicit $exec + %1523:vgpr_32 = nofpexcept V_SUB_F32_e32 %1435, %1520, implicit $mode, implicit $exec + %1524:vgpr_32 = nofpexcept V_SUB_F32_e32 %1436, %1520, implicit $mode, implicit $exec + %1525:vgpr_32 = nofpexcept V_SUB_F32_e32 %1437, %1520, implicit $mode, implicit $exec + %1526:vgpr_32 = nofpexcept V_SUB_F32_e32 %1438, %1520, implicit $mode, implicit $exec + %1527:vgpr_32 = nofpexcept V_SUB_F32_e32 %1439, %1522, implicit $mode, implicit $exec + %1528:vgpr_32 = nofpexcept V_SUB_F32_e32 %1440, %1522, implicit $mode, implicit $exec + %1529:vgpr_32 = nofpexcept V_SUB_F32_e32 %1441, %1522, implicit $mode, implicit $exec + %1530:vgpr_32 = nofpexcept V_SUB_F32_e32 %1442, %1522, implicit $mode, implicit $exec + %1531:sreg_64_xexec = S_OR_B64 %1518, %1514, implicit-def dead $scc + %1532:sreg_64_xexec = S_OR_B64 %1517, %1513, implicit-def dead $scc + undef %1533.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, %250.sub0, 0, %1508, %1531, implicit $exec + %1533.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, %249.sub0, 0, %1498, %1532, implicit $exec + %1534:vgpr_32 = nofpexcept V_SUB_F32_e32 %1443, %1533.sub0, implicit $mode, implicit $exec + %1535:vgpr_32 = nofpexcept V_SUB_F32_e32 %1444, %1533.sub0, implicit $mode, implicit $exec + %1536:vgpr_32 = nofpexcept V_SUB_F32_e32 %1445, %1533.sub0, implicit $mode, implicit $exec + %1537:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1446, 11, %1533, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1538:vgpr_32 = nofpexcept V_SUB_F32_e32 %1452, %1533.sub1, implicit $mode, implicit $exec + %1539:vgpr_32 = nofpexcept V_SUB_F32_e32 %1453, %1533.sub1, implicit $mode, implicit $exec + %1540:vgpr_32 = nofpexcept V_SUB_F32_e32 %1454, %1533.sub1, implicit $mode, implicit $exec + undef %1541.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1523, implicit $mode, implicit $exec + %1541.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1524, implicit $mode, implicit $exec + undef %1542.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1525, implicit $mode, implicit $exec + %1542.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1526, implicit $mode, implicit $exec + undef %1543.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1527, implicit $mode, implicit $exec + %1543.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1528, implicit $mode, implicit $exec + undef %1544.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1529, implicit $mode, implicit $exec + %1544.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1530, implicit $mode, implicit $exec + undef %1545.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1534, implicit $mode, implicit $exec + %1545.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1535, implicit $mode, implicit $exec + undef %1546.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1536, implicit $mode, implicit $exec + %1546.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %1537.sub0, implicit $mode, implicit $exec + %1547:vgpr_32 = nofpexcept V_EXP_F32_e32 %1537.sub1, implicit $mode, implicit $exec + %1548:vgpr_32 = nofpexcept V_EXP_F32_e32 %1538, implicit $mode, implicit $exec + %1549:vgpr_32 = nofpexcept V_EXP_F32_e32 %1539, implicit $mode, implicit $exec + %1550:vgpr_32 = nofpexcept V_EXP_F32_e32 %1540, implicit $mode, implicit $exec + undef %1551.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %1547, %1283, implicit $exec + %1551.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %1548, %1283, implicit $exec + undef %1552.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %1549, %1283, implicit $exec + %1552.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %1550, %1283, implicit $exec + %1553:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1541, 8, %1542, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1554:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1543, 8, %1544, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1555:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1545, 8, %1546, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1556:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1551, 8, %1552, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1557:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1553, 4, %1553, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1558:vgpr_32 = nofpexcept V_ADD_F32_e32 %1555.sub0, %1555.sub1, implicit $mode, implicit $exec + %1559:vgpr_32 = nofpexcept V_ADD_F32_e32 %1556.sub0, %1556.sub1, implicit $mode, implicit $exec + undef %1560.sub0:vreg_64_align2 = DS_BPERMUTE_B32 %1468, %1557.sub0, 0, implicit $exec + %1557.sub1:vreg_64_align2 = COPY %1554.sub0 + %1560.sub1:vreg_64_align2 = COPY %1554.sub1 + %1561:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1557, 8, %1560, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1509.sub0:vreg_64_align2 = COPY %1561.sub0 + %1509.sub0:vreg_64_align2 = V_MOV_B32_dpp %1509.sub0, %1509.sub0, 296, 15, 15, 0, implicit $exec + %1562:vgpr_32 = DS_BPERMUTE_B32 %1468, %1561.sub1, 0, implicit $exec + %1563:vgpr_32 = nofpexcept V_ADD_F32_e32 %1561.sub1, %1562, implicit $mode, implicit $exec + %1564:vgpr_32 = COPY %1563 + %1564:vgpr_32 = V_MOV_B32_dpp %1564, %1564, 296, 15, 15, 0, implicit $exec + %1565:vgpr_32 = nofpexcept V_ADD_F32_e32 %1563, %1564, implicit $mode, implicit $exec + %1566:vgpr_32 = COPY %1565 + %1566:vgpr_32 = V_MOV_B32_dpp %1566, %1566, 321, 15, 15, 0, implicit $exec + %1566:vgpr_32 = V_MOV_B32_dpp %1566, %1566, 27, 15, 15, 0, implicit $exec + %1567:vgpr_32 = nofpexcept V_ADD_F32_e32 %1565, %1566, implicit $mode, implicit $exec + %1568:vgpr_32 = COPY %1567 + %1568:vgpr_32 = V_MOV_B32_dpp %1568, %1568, 78, 15, 15, 0, implicit $exec + %1569:vgpr_32 = nofpexcept V_ADD_F32_e32 %1567, %1568, implicit $mode, implicit $exec + %1570:vgpr_32 = COPY %1569 + %1570:vgpr_32 = V_MOV_B32_dpp %1570, %1570, 177, 15, 15, 0, implicit $exec + undef %1571.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_e32 %1569, %1570, implicit $mode, implicit $exec + %1572:vgpr_32 = DS_BPERMUTE_B32 %1468, %1558, 0, implicit $exec + %1573:vgpr_32 = nofpexcept V_ADD_F32_e32 %1558, %1572, implicit $mode, implicit $exec + %1574:vgpr_32 = COPY %1573 + %1574:vgpr_32 = V_MOV_B32_dpp %1574, %1574, 296, 15, 15, 0, implicit $exec + %1575:vgpr_32 = nofpexcept V_ADD_F32_e32 %1573, %1574, implicit $mode, implicit $exec + %1576:vgpr_32 = COPY %1575 + %1576:vgpr_32 = V_MOV_B32_dpp %1576, %1576, 321, 15, 15, 0, implicit $exec + %1576:vgpr_32 = V_MOV_B32_dpp %1576, %1576, 27, 15, 15, 0, implicit $exec + %1577:vgpr_32 = nofpexcept V_ADD_F32_e32 %1575, %1576, implicit $mode, implicit $exec + %1578:vgpr_32 = COPY %1577 + %1578:vgpr_32 = V_MOV_B32_dpp %1578, %1578, 78, 15, 15, 0, implicit $exec + %1579:vgpr_32 = nofpexcept V_ADD_F32_e32 %1577, %1578, implicit $mode, implicit $exec + %1580:vgpr_32 = COPY %1579 + %1580:vgpr_32 = V_MOV_B32_dpp %1580, %1580, 177, 15, 15, 0, implicit $exec + %1581:vgpr_32 = nofpexcept V_ADD_F32_e32 %1579, %1580, implicit $mode, implicit $exec + %1582:vgpr_32 = DS_BPERMUTE_B32 %1468, %1559, 0, implicit $exec + %1583:vgpr_32 = nofpexcept V_ADD_F32_e32 %1559, %1582, implicit $mode, implicit $exec + %1584:vgpr_32 = COPY %1583 + %1584:vgpr_32 = V_MOV_B32_dpp %1584, %1584, 296, 15, 15, 0, implicit $exec + %1585:vgpr_32 = nofpexcept V_ADD_F32_e32 %1583, %1584, implicit $mode, implicit $exec + %1586:vgpr_32 = COPY %1585 + %1586:vgpr_32 = V_MOV_B32_dpp %1586, %1586, 321, 15, 15, 0, implicit $exec + %1586:vgpr_32 = V_MOV_B32_dpp %1586, %1586, 27, 15, 15, 0, implicit $exec + %1587:vgpr_32 = nofpexcept V_ADD_F32_e32 %1585, %1586, implicit $mode, implicit $exec + %1588:vgpr_32 = COPY %1587 + %1588:vgpr_32 = V_MOV_B32_dpp %1588, %1588, 78, 15, 15, 0, implicit $exec + %1589:vgpr_32 = nofpexcept V_ADD_F32_e32 %1587, %1588, implicit $mode, implicit $exec + %1590:vgpr_32 = COPY %1589 + %1590:vgpr_32 = V_MOV_B32_dpp %1590, %1590, 177, 15, 15, 0, implicit $exec + %1591:vgpr_32 = nofpexcept V_ADD_F32_e32 %1589, %1590, implicit $mode, implicit $exec + %1592:vgpr_32 = nofpexcept V_SUB_F32_e32 %251.sub0, %1520, implicit $mode, implicit $exec + %1593:vgpr_32 = nofpexcept V_SUB_F32_e32 %252.sub0, %1522, implicit $mode, implicit $exec + %1594:vgpr_32 = nofpexcept V_SUB_F32_e32 %249.sub0, %1533.sub0, implicit $mode, implicit $exec + %1595:vgpr_32 = nofpexcept V_SUB_F32_e32 %250.sub0, %1533.sub1, implicit $mode, implicit $exec + undef %1596.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1592, implicit $mode, implicit $exec + undef %1597.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1593, implicit $mode, implicit $exec + undef %1598.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1594, implicit $mode, implicit $exec + undef %1599.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1595, implicit $mode, implicit $exec + %1600:vgpr_32 = nofpexcept V_MUL_F32_e32 %1598.sub0, %314.sub0, implicit $mode, implicit $exec + %1601:vgpr_32 = nofpexcept V_MUL_F32_e32 %1599.sub0, %314.sub1, implicit $mode, implicit $exec + %1602:vgpr_32 = nofpexcept V_ADD_F32_e32 %1600, %1581, implicit $mode, implicit $exec + %1603:vgpr_32 = nofpexcept V_ADD_F32_e32 %1601, %1591, implicit $mode, implicit $exec + undef %1604.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1541.sub0, 0, %1541.sub1, 0, 0, implicit $exec + %1604.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1542.sub0, 0, %1542.sub1, 0, 0, implicit $exec + undef %1605.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1543.sub0, 0, %1543.sub1, 0, 0, implicit $exec + %1605.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1544.sub0, 0, %1544.sub1, 0, 0, implicit $exec + undef %1606.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1545.sub0, 0, %1545.sub1, 0, 0, implicit $exec + %1606.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %1546.sub0, 0, %1546.sub1, 0, 0, implicit $exec + %1607:vgpr_32 = V_CVT_PK_F16_F32_e64 0, %1547, 0, %1548, 0, 0, implicit $exec + %1608:vgpr_32 = V_LSHRREV_B32_e32 16, %1607, implicit $exec + %1609:vgpr_32 = V_CVT_PK_F16_F32_e64 0, %1549, 0, %1550, 0, 0, implicit $exec + %1610:vgpr_32 = V_LSHRREV_B32_e32 16, %1609, implicit $exec + %1611:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1607, %1283, implicit $exec + %1612:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1608, %1283, implicit $exec + %1613:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1609, %1283, implicit $exec + %1614:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %1610, %1283, implicit $exec + %1615:vgpr_32 = nuw V_ADD_U32_e32 %13, %108, implicit $exec + undef %1616.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1615, 0, 0, implicit $exec + %1617:vgpr_32 = disjoint V_OR_B32_e32 512, %108, implicit $exec + %1618:vgpr_32 = nuw V_ADD_U32_e32 %13, %1617, implicit $exec + %1616.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1618, 0, 0, implicit $exec + %1619:vgpr_32 = nuw nsw V_ADD_U32_e32 1056, %108, implicit $exec + %1620:vgpr_32 = nuw V_ADD_U32_e32 %13, %1619, implicit $exec + undef %1621.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1620, 0, 0, implicit $exec + %1622:vgpr_32 = nuw nsw V_ADD_U32_e32 1568, %108, implicit $exec + %1623:vgpr_32 = nuw V_ADD_U32_e32 %13, %1622, implicit $exec + %1621.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1623, 0, 0, implicit $exec + %1624:vgpr_32 = nuw nsw V_ADD_U32_e32 2112, %108, implicit $exec + %1625:vgpr_32 = nuw V_ADD_U32_e32 %13, %1624, implicit $exec + undef %1626.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1625, 0, 0, implicit $exec + %1627:vgpr_32 = nuw nsw V_ADD_U32_e32 2624, %108, implicit $exec + %1628:vgpr_32 = nuw V_ADD_U32_e32 %13, %1627, implicit $exec + %1626.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1628, 0, 0, implicit $exec + %1629:vgpr_32 = nuw nsw V_ADD_U32_e32 3168, %108, implicit $exec + %1630:vgpr_32 = nuw V_ADD_U32_e32 %13, %1629, implicit $exec + undef %1631.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %1630, 0, 0, implicit $exec + %1632:vgpr_32 = nuw nsw V_ADD_U32_e32 3680, %108, implicit $exec + %1633:vgpr_32 = nuw V_ADD_U32_e32 %13, %1632, implicit $exec + %1631.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %1633, 0, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1634:vgpr_32 = nuw nsw V_LSHLREV_B32_e32 3, %1327, implicit $exec + %1635:vgpr_32 = V_MOV_B32_e32 1056, implicit $exec + %1636:vgpr_32 = V_CNDMASK_B32_e64 0, %1635, 0, 0, %1283, implicit $exec + %1637:vgpr_32 = V_XOR_B32_e32 %1636, %1634, implicit $exec + %1638:vgpr_32 = V_XOR_B32_e32 264, %1637, implicit $exec + %1639:vgpr_32 = V_XOR_B32_e32 528, %1637, implicit $exec + %1640:vgpr_32 = V_XOR_B32_e32 792, %1637, implicit $exec + %1641:sreg_32 = S_MOV_B32 84148480 + undef %1642.sub1:vreg_64_align2 = V_PERM_B32_e64 %1614, %1613, %1641, implicit $exec + %1642.sub0:vreg_64_align2 = V_PERM_B32_e64 %1612, %1611, %1641, implicit $exec + %1643:vgpr_32 = V_XOR_B32_e32 2112, %1637, implicit $exec + %1644:vgpr_32 = V_XOR_B32_e32 2376, %1637, implicit $exec + %1645:vgpr_32 = V_XOR_B32_e32 2640, %1637, implicit $exec + %1646:vgpr_32 = V_XOR_B32_e32 2904, %1637, implicit $exec + %1647:vgpr_32 = nuw V_ADD_U32_e32 %14, %1328, implicit $exec + %1648:vgpr_32 = nuw V_ADD_U32_e32 %14, %1331, implicit $exec + %1649:vgpr_32 = nuw V_ADD_U32_e32 %14, %1334, implicit $exec + %1650:vgpr_32 = nuw V_ADD_U32_e32 %14, %1337, implicit $exec + %1651:sreg_32 = S_LSHL_B32 %103, 8, implicit-def dead $scc + %1652:sreg_32 = S_AND_B32 %103, 255, implicit-def dead $scc + %1653:sreg_32 = S_OR_B32 %1652, %1651, implicit-def dead $scc + %1654:sreg_32 = S_LSHL_B32 %1653, 16, implicit-def dead $scc + %1655:sreg_32 = S_AND_B32 %1653, 65535, implicit-def dead $scc + undef %1656.sub0:sgpr_128 = S_OR_B32 %1655, %1654, implicit-def dead $scc + %1657:sreg_32 = S_LSHL_B32 %1652, 16, implicit-def dead $scc + undef %1658.sub0:sgpr_128 = S_OR_B32 %1655, %1657, implicit-def dead $scc + undef %1659.sub0:vreg_64_align2 = COPY %315.sub0 + %1659.sub1:vreg_64_align2 = COPY %311.sub0 + %1660:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1509.sub1, 0, 0, 0, 0, 0, 0, implicit $exec + %1661:vgpr_32 = COPY %1660 + %1661:vgpr_32 = V_MOV_B32_dpp %1661, %1661, 321, 15, 15, 0, implicit $exec + %1661:vgpr_32 = V_MOV_B32_dpp %1661, %1661, 27, 15, 15, 0, implicit $exec + %1662:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1660, 0, %1661, 0, %1661, 0, 0, implicit $exec + %1663:vgpr_32 = COPY %1662 + %1663:vgpr_32 = V_MOV_B32_dpp %1663, %1663, 78, 15, 15, 0, implicit $exec + %1664:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1662, 0, %1663, 0, %1663, 0, 0, implicit $exec + %1665:vgpr_32 = COPY %1664 + %1665:vgpr_32 = V_MOV_B32_dpp %1665, %1665, 177, 15, 15, 0, implicit $exec + %1666:vgpr_32 = V_MAXIMUM3_F32_e64 0, %1664, 0, %1665, 0, %1665, 0, 0, implicit $exec + %1667:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %245.sub0, 0, implicit $mode, implicit $exec + %1668:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %245.sub1, 0, implicit $mode, implicit $exec + %1669:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %244.sub0, 0, implicit $mode, implicit $exec + %1670:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %244.sub1, 0, implicit $mode, implicit $exec + %1671:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %241.sub0, 0, implicit $mode, implicit $exec + %1672:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %241.sub1, 0, implicit $mode, implicit $exec + %1673:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %240.sub0, 0, implicit $mode, implicit $exec + %1674:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %240.sub1, 0, implicit $mode, implicit $exec + %1675:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %237.sub0, 0, implicit $mode, implicit $exec + %1676:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %237.sub1, 0, implicit $mode, implicit $exec + %1677:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %236.sub0, 0, implicit $mode, implicit $exec + %1678:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %236.sub1, 0, implicit $mode, implicit $exec + %1679:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %233.sub0, 0, implicit $mode, implicit $exec + %1680:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %233.sub1, 0, implicit $mode, implicit $exec + %1681:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %232.sub0, 0, implicit $mode, implicit $exec + %1682:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %232.sub1, 0, implicit $mode, implicit $exec + %1683:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %229.sub0, 0, implicit $mode, implicit $exec + %1684:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %229.sub1, 0, implicit $mode, implicit $exec + %1685:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %228.sub0, 0, implicit $mode, implicit $exec + %1686:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %228.sub1, 0, implicit $mode, implicit $exec + %1687:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %225.sub0, 0, implicit $mode, implicit $exec + %1688:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %225.sub1, 0, implicit $mode, implicit $exec + %1689:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %224.sub0, 0, implicit $mode, implicit $exec + %1690:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %224.sub1, 0, implicit $mode, implicit $exec + %1691:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %221.sub0, 0, implicit $mode, implicit $exec + %1692:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %221.sub1, 0, implicit $mode, implicit $exec + %1693:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %220.sub0, 0, implicit $mode, implicit $exec + %1694:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %220.sub1, 0, implicit $mode, implicit $exec + %1695:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %217.sub0, 0, implicit $mode, implicit $exec + %1696:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %217.sub1, 0, implicit $mode, implicit $exec + %1697:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %216.sub0, 0, implicit $mode, implicit $exec + %1698:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %216.sub1, 0, implicit $mode, implicit $exec + %1699:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %213.sub0, 0, implicit $mode, implicit $exec + %1700:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %213.sub1, 0, implicit $mode, implicit $exec + %1701:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %212.sub0, 0, implicit $mode, implicit $exec + %1702:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %212.sub1, 0, implicit $mode, implicit $exec + %1703:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %209.sub0, 0, implicit $mode, implicit $exec + %1704:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %209.sub1, 0, implicit $mode, implicit $exec + %1705:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %208.sub0, 0, implicit $mode, implicit $exec + %1706:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %208.sub1, 0, implicit $mode, implicit $exec + %1707:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %205.sub0, 0, implicit $mode, implicit $exec + %1708:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %205.sub1, 0, implicit $mode, implicit $exec + %1709:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %204.sub0, 0, implicit $mode, implicit $exec + %1710:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %204.sub1, 0, implicit $mode, implicit $exec + %1711:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %201.sub0, 0, implicit $mode, implicit $exec + %1712:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %201.sub1, 0, implicit $mode, implicit $exec + %1713:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %200.sub0, 0, implicit $mode, implicit $exec + %1714:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %200.sub1, 0, implicit $mode, implicit $exec + %1715:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %197.sub0, 0, implicit $mode, implicit $exec + %1716:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %197.sub1, 0, implicit $mode, implicit $exec + %1717:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %196.sub0, 0, implicit $mode, implicit $exec + %1718:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %196.sub1, 0, implicit $mode, implicit $exec + %1719:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %193.sub0, 0, implicit $mode, implicit $exec + %1720:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %193.sub1, 0, implicit $mode, implicit $exec + %1721:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %192.sub0, 0, implicit $mode, implicit $exec + %1722:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %192.sub1, 0, implicit $mode, implicit $exec + %1723:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %189.sub0, 0, implicit $mode, implicit $exec + %1724:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %189.sub1, 0, implicit $mode, implicit $exec + %1725:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %188.sub0, 0, implicit $mode, implicit $exec + %1726:sreg_64 = nofpexcept V_CMP_GE_F32_e64 0, %1666, 0, %188.sub1, 0, implicit $mode, implicit $exec + %1727:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %1666, 0, %1666, 0, implicit $mode, implicit $exec + %1728:sreg_64_xexec = S_OR_B64 %1727, %1667, implicit-def dead $scc + %1729:vgpr_32 = V_CNDMASK_B32_e64 0, %245.sub0, 0, %1666, %1728, implicit $exec + %1730:sreg_64_xexec = S_OR_B64 %1727, %1668, implicit-def dead $scc + %1731:vgpr_32 = V_CNDMASK_B32_e64 0, %245.sub1, 0, %1666, %1730, implicit $exec + %1732:sreg_64_xexec = S_OR_B64 %1727, %1669, implicit-def dead $scc + %1733:vgpr_32 = V_CNDMASK_B32_e64 0, %244.sub0, 0, %1666, %1732, implicit $exec + %1734:sreg_64_xexec = S_OR_B64 %1727, %1670, implicit-def dead $scc + %1735:vgpr_32 = V_CNDMASK_B32_e64 0, %244.sub1, 0, %1666, %1734, implicit $exec + %1736:sreg_64_xexec = S_OR_B64 %1727, %1671, implicit-def dead $scc + %1737:vgpr_32 = V_CNDMASK_B32_e64 0, %241.sub0, 0, %1666, %1736, implicit $exec + %1738:sreg_64_xexec = S_OR_B64 %1727, %1672, implicit-def dead $scc + %1739:vgpr_32 = V_CNDMASK_B32_e64 0, %241.sub1, 0, %1666, %1738, implicit $exec + %1740:sreg_64_xexec = S_OR_B64 %1727, %1673, implicit-def dead $scc + %1741:vgpr_32 = V_CNDMASK_B32_e64 0, %240.sub0, 0, %1666, %1740, implicit $exec + %1742:sreg_64_xexec = S_OR_B64 %1727, %1674, implicit-def dead $scc + %1743:vgpr_32 = V_CNDMASK_B32_e64 0, %240.sub1, 0, %1666, %1742, implicit $exec + %1744:sreg_64_xexec = S_OR_B64 %1727, %1675, implicit-def dead $scc + %1745:vgpr_32 = V_CNDMASK_B32_e64 0, %237.sub0, 0, %1666, %1744, implicit $exec + %1746:sreg_64_xexec = S_OR_B64 %1727, %1676, implicit-def dead $scc + %1747:vgpr_32 = V_CNDMASK_B32_e64 0, %237.sub1, 0, %1666, %1746, implicit $exec + %1748:sreg_64_xexec = S_OR_B64 %1727, %1677, implicit-def dead $scc + %1749:vgpr_32 = V_CNDMASK_B32_e64 0, %236.sub0, 0, %1666, %1748, implicit $exec + %1750:sreg_64_xexec = S_OR_B64 %1727, %1678, implicit-def dead $scc + %1751:vgpr_32 = V_CNDMASK_B32_e64 0, %236.sub1, 0, %1666, %1750, implicit $exec + %1752:sreg_64_xexec = S_OR_B64 %1727, %1679, implicit-def dead $scc + %1753:vgpr_32 = V_CNDMASK_B32_e64 0, %233.sub0, 0, %1666, %1752, implicit $exec + %1754:sreg_64_xexec = S_OR_B64 %1727, %1680, implicit-def dead $scc + %1755:vgpr_32 = V_CNDMASK_B32_e64 0, %233.sub1, 0, %1666, %1754, implicit $exec + %1756:sreg_64_xexec = S_OR_B64 %1727, %1681, implicit-def dead $scc + %1757:vgpr_32 = V_CNDMASK_B32_e64 0, %232.sub0, 0, %1666, %1756, implicit $exec + %1758:sreg_64_xexec = S_OR_B64 %1727, %1682, implicit-def dead $scc + %1759:vgpr_32 = V_CNDMASK_B32_e64 0, %232.sub1, 0, %1666, %1758, implicit $exec + %1760:sreg_64_xexec = S_OR_B64 %1727, %1683, implicit-def dead $scc + %1761:vgpr_32 = V_CNDMASK_B32_e64 0, %229.sub0, 0, %1666, %1760, implicit $exec + %1762:sreg_64_xexec = S_OR_B64 %1727, %1684, implicit-def dead $scc + %1763:vgpr_32 = V_CNDMASK_B32_e64 0, %229.sub1, 0, %1666, %1762, implicit $exec + %1764:sreg_64_xexec = S_OR_B64 %1727, %1685, implicit-def dead $scc + %1765:vgpr_32 = V_CNDMASK_B32_e64 0, %228.sub0, 0, %1666, %1764, implicit $exec + %1766:sreg_64_xexec = S_OR_B64 %1727, %1686, implicit-def dead $scc + %1767:vgpr_32 = V_CNDMASK_B32_e64 0, %228.sub1, 0, %1666, %1766, implicit $exec + %1768:sreg_64_xexec = S_OR_B64 %1727, %1687, implicit-def dead $scc + %1769:vgpr_32 = V_CNDMASK_B32_e64 0, %225.sub0, 0, %1666, %1768, implicit $exec + %1770:sreg_64_xexec = S_OR_B64 %1727, %1688, implicit-def dead $scc + %1771:vgpr_32 = V_CNDMASK_B32_e64 0, %225.sub1, 0, %1666, %1770, implicit $exec + %1772:sreg_64_xexec = S_OR_B64 %1727, %1689, implicit-def dead $scc + %1773:vgpr_32 = V_CNDMASK_B32_e64 0, %224.sub0, 0, %1666, %1772, implicit $exec + %1774:sreg_64_xexec = S_OR_B64 %1727, %1690, implicit-def dead $scc + %1775:vgpr_32 = V_CNDMASK_B32_e64 0, %224.sub1, 0, %1666, %1774, implicit $exec + %1776:sreg_64_xexec = S_OR_B64 %1727, %1691, implicit-def dead $scc + %1777:vgpr_32 = V_CNDMASK_B32_e64 0, %221.sub0, 0, %1666, %1776, implicit $exec + %1778:sreg_64_xexec = S_OR_B64 %1727, %1692, implicit-def dead $scc + %1779:vgpr_32 = V_CNDMASK_B32_e64 0, %221.sub1, 0, %1666, %1778, implicit $exec + %1780:sreg_64_xexec = S_OR_B64 %1727, %1693, implicit-def dead $scc + %1781:vgpr_32 = V_CNDMASK_B32_e64 0, %220.sub0, 0, %1666, %1780, implicit $exec + %1782:sreg_64_xexec = S_OR_B64 %1727, %1694, implicit-def dead $scc + %1783:vgpr_32 = V_CNDMASK_B32_e64 0, %220.sub1, 0, %1666, %1782, implicit $exec + %1784:sreg_64_xexec = S_OR_B64 %1727, %1695, implicit-def dead $scc + %1785:vgpr_32 = V_CNDMASK_B32_e64 0, %217.sub0, 0, %1666, %1784, implicit $exec + %1786:sreg_64_xexec = S_OR_B64 %1727, %1696, implicit-def dead $scc + %1787:vgpr_32 = V_CNDMASK_B32_e64 0, %217.sub1, 0, %1666, %1786, implicit $exec + %1788:sreg_64_xexec = S_OR_B64 %1727, %1697, implicit-def dead $scc + %1789:vgpr_32 = V_CNDMASK_B32_e64 0, %216.sub0, 0, %1666, %1788, implicit $exec + %1790:sreg_64_xexec = S_OR_B64 %1727, %1698, implicit-def dead $scc + %1791:vgpr_32 = V_CNDMASK_B32_e64 0, %216.sub1, 0, %1666, %1790, implicit $exec + %1792:sreg_64_xexec = S_OR_B64 %1727, %1699, implicit-def dead $scc + %1793:vgpr_32 = V_CNDMASK_B32_e64 0, %213.sub0, 0, %1666, %1792, implicit $exec + %1794:sreg_64_xexec = S_OR_B64 %1727, %1700, implicit-def dead $scc + %1795:vgpr_32 = V_CNDMASK_B32_e64 0, %213.sub1, 0, %1666, %1794, implicit $exec + %1796:sreg_64_xexec = S_OR_B64 %1727, %1701, implicit-def dead $scc + %1797:vgpr_32 = V_CNDMASK_B32_e64 0, %212.sub0, 0, %1666, %1796, implicit $exec + %1798:sreg_64_xexec = S_OR_B64 %1727, %1702, implicit-def dead $scc + %1799:vgpr_32 = V_CNDMASK_B32_e64 0, %212.sub1, 0, %1666, %1798, implicit $exec + %1800:sreg_64_xexec = S_OR_B64 %1727, %1703, implicit-def dead $scc + %1801:vgpr_32 = V_CNDMASK_B32_e64 0, %209.sub0, 0, %1666, %1800, implicit $exec + %1802:sreg_64_xexec = S_OR_B64 %1727, %1704, implicit-def dead $scc + %1803:vgpr_32 = V_CNDMASK_B32_e64 0, %209.sub1, 0, %1666, %1802, implicit $exec + %1804:sreg_64_xexec = S_OR_B64 %1727, %1705, implicit-def dead $scc + %1805:vgpr_32 = V_CNDMASK_B32_e64 0, %208.sub0, 0, %1666, %1804, implicit $exec + %1806:sreg_64_xexec = S_OR_B64 %1727, %1706, implicit-def dead $scc + %1807:vgpr_32 = V_CNDMASK_B32_e64 0, %208.sub1, 0, %1666, %1806, implicit $exec + %1808:sreg_64_xexec = S_OR_B64 %1727, %1707, implicit-def dead $scc + %1809:vgpr_32 = V_CNDMASK_B32_e64 0, %205.sub0, 0, %1666, %1808, implicit $exec + %1810:sreg_64_xexec = S_OR_B64 %1727, %1708, implicit-def dead $scc + %1811:vgpr_32 = V_CNDMASK_B32_e64 0, %205.sub1, 0, %1666, %1810, implicit $exec + %1812:sreg_64_xexec = S_OR_B64 %1727, %1709, implicit-def dead $scc + %1813:vgpr_32 = V_CNDMASK_B32_e64 0, %204.sub0, 0, %1666, %1812, implicit $exec + %1814:sreg_64_xexec = S_OR_B64 %1727, %1710, implicit-def dead $scc + %1815:vgpr_32 = V_CNDMASK_B32_e64 0, %204.sub1, 0, %1666, %1814, implicit $exec + %1816:sreg_64_xexec = S_OR_B64 %1727, %1711, implicit-def dead $scc + %1817:vgpr_32 = V_CNDMASK_B32_e64 0, %201.sub0, 0, %1666, %1816, implicit $exec + %1818:sreg_64_xexec = S_OR_B64 %1727, %1712, implicit-def dead $scc + %1819:vgpr_32 = V_CNDMASK_B32_e64 0, %201.sub1, 0, %1666, %1818, implicit $exec + %1820:sreg_64_xexec = S_OR_B64 %1727, %1713, implicit-def dead $scc + %1821:vgpr_32 = V_CNDMASK_B32_e64 0, %200.sub0, 0, %1666, %1820, implicit $exec + %1822:sreg_64_xexec = S_OR_B64 %1727, %1714, implicit-def dead $scc + %1823:vgpr_32 = V_CNDMASK_B32_e64 0, %200.sub1, 0, %1666, %1822, implicit $exec + %1824:sreg_64_xexec = S_OR_B64 %1727, %1715, implicit-def dead $scc + %1825:vgpr_32 = V_CNDMASK_B32_e64 0, %197.sub0, 0, %1666, %1824, implicit $exec + %1826:sreg_64_xexec = S_OR_B64 %1727, %1716, implicit-def dead $scc + %1827:vgpr_32 = V_CNDMASK_B32_e64 0, %197.sub1, 0, %1666, %1826, implicit $exec + %1828:sreg_64_xexec = S_OR_B64 %1727, %1717, implicit-def dead $scc + %1829:vgpr_32 = V_CNDMASK_B32_e64 0, %196.sub0, 0, %1666, %1828, implicit $exec + %1830:sreg_64_xexec = S_OR_B64 %1727, %1718, implicit-def dead $scc + %1831:vgpr_32 = V_CNDMASK_B32_e64 0, %196.sub1, 0, %1666, %1830, implicit $exec + %1832:sreg_64_xexec = S_OR_B64 %1727, %1719, implicit-def dead $scc + %1833:vgpr_32 = V_CNDMASK_B32_e64 0, %193.sub0, 0, %1666, %1832, implicit $exec + %1834:sreg_64_xexec = S_OR_B64 %1727, %1720, implicit-def dead $scc + %1835:vgpr_32 = V_CNDMASK_B32_e64 0, %193.sub1, 0, %1666, %1834, implicit $exec + %1836:sreg_64_xexec = S_OR_B64 %1727, %1721, implicit-def dead $scc + %1837:vgpr_32 = V_CNDMASK_B32_e64 0, %192.sub0, 0, %1666, %1836, implicit $exec + %1838:sreg_64_xexec = S_OR_B64 %1727, %1722, implicit-def dead $scc + %1839:vgpr_32 = V_CNDMASK_B32_e64 0, %192.sub1, 0, %1666, %1838, implicit $exec + %1840:sreg_64_xexec = S_OR_B64 %1727, %1723, implicit-def dead $scc + %1841:vgpr_32 = V_CNDMASK_B32_e64 0, %189.sub0, 0, %1666, %1840, implicit $exec + %1842:sreg_64_xexec = S_OR_B64 %1727, %1724, implicit-def dead $scc + %1843:vgpr_32 = V_CNDMASK_B32_e64 0, %189.sub1, 0, %1666, %1842, implicit $exec + %1844:sreg_64_xexec = S_OR_B64 %1727, %1725, implicit-def dead $scc + %1845:vgpr_32 = V_CNDMASK_B32_e64 0, %188.sub0, 0, %1666, %1844, implicit $exec + %1846:sreg_64_xexec = S_OR_B64 %1727, %1726, implicit-def dead $scc + %1847:vgpr_32 = V_CNDMASK_B32_e64 0, %188.sub1, 0, %1666, %1846, implicit $exec + %1561.sub1:vreg_64_align2 = COPY %1290.sub2 + %1848:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1561, 8, %1509, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %1849.sub0:vreg_64_align2 = COPY %1848.sub0 + undef %1849.sub0:vreg_64_align2 = V_MOV_B32_dpp %1849.sub0, %1849.sub0, 321, 15, 15, 0, implicit $exec + undef %1849.sub0:vreg_64_align2 = V_MOV_B32_dpp %1849.sub0, %1849.sub0, 27, 15, 15, 0, implicit $exec + %1850:vgpr_32 = nofpexcept V_SUB_F32_e32 %245.sub0, %1729, implicit $mode, implicit $exec + %1851:vgpr_32 = nofpexcept V_SUB_F32_e32 %245.sub1, %1731, implicit $mode, implicit $exec + %1852:vgpr_32 = nofpexcept V_SUB_F32_e32 %244.sub0, %1733, implicit $mode, implicit $exec + %1853:vgpr_32 = nofpexcept V_SUB_F32_e32 %244.sub1, %1735, implicit $mode, implicit $exec + %1854:vgpr_32 = nofpexcept V_SUB_F32_e32 %241.sub0, %1737, implicit $mode, implicit $exec + %1855:vgpr_32 = nofpexcept V_SUB_F32_e32 %241.sub1, %1739, implicit $mode, implicit $exec + %1856:vgpr_32 = nofpexcept V_SUB_F32_e32 %240.sub0, %1741, implicit $mode, implicit $exec + %1857:vgpr_32 = nofpexcept V_SUB_F32_e32 %240.sub1, %1743, implicit $mode, implicit $exec + %1858:vgpr_32 = nofpexcept V_SUB_F32_e32 %237.sub0, %1745, implicit $mode, implicit $exec + %1859:vgpr_32 = nofpexcept V_SUB_F32_e32 %237.sub1, %1747, implicit $mode, implicit $exec + %1860:vgpr_32 = nofpexcept V_SUB_F32_e32 %236.sub0, %1749, implicit $mode, implicit $exec + %1861:vgpr_32 = nofpexcept V_SUB_F32_e32 %236.sub1, %1751, implicit $mode, implicit $exec + %1862:vgpr_32 = nofpexcept V_SUB_F32_e32 %233.sub0, %1753, implicit $mode, implicit $exec + %1863:vgpr_32 = nofpexcept V_SUB_F32_e32 %233.sub1, %1755, implicit $mode, implicit $exec + %1864:vgpr_32 = nofpexcept V_SUB_F32_e32 %232.sub0, %1757, implicit $mode, implicit $exec + %1865:vgpr_32 = nofpexcept V_SUB_F32_e32 %232.sub1, %1759, implicit $mode, implicit $exec + %1866:vgpr_32 = nofpexcept V_SUB_F32_e32 %229.sub0, %1761, implicit $mode, implicit $exec + %1867:vgpr_32 = nofpexcept V_SUB_F32_e32 %229.sub1, %1763, implicit $mode, implicit $exec + %1868:vgpr_32 = nofpexcept V_SUB_F32_e32 %228.sub0, %1765, implicit $mode, implicit $exec + %1869:vgpr_32 = nofpexcept V_SUB_F32_e32 %228.sub1, %1767, implicit $mode, implicit $exec + %1870:vgpr_32 = nofpexcept V_SUB_F32_e32 %225.sub0, %1769, implicit $mode, implicit $exec + %1871:vgpr_32 = nofpexcept V_SUB_F32_e32 %225.sub1, %1771, implicit $mode, implicit $exec + %1872:vgpr_32 = nofpexcept V_SUB_F32_e32 %224.sub0, %1773, implicit $mode, implicit $exec + %1873:vgpr_32 = nofpexcept V_SUB_F32_e32 %224.sub1, %1775, implicit $mode, implicit $exec + %1874:vgpr_32 = nofpexcept V_SUB_F32_e32 %221.sub0, %1777, implicit $mode, implicit $exec + %1875:vgpr_32 = nofpexcept V_SUB_F32_e32 %221.sub1, %1779, implicit $mode, implicit $exec + %1876:vgpr_32 = nofpexcept V_SUB_F32_e32 %220.sub0, %1781, implicit $mode, implicit $exec + %1877:vgpr_32 = nofpexcept V_SUB_F32_e32 %220.sub1, %1783, implicit $mode, implicit $exec + %1878:vgpr_32 = nofpexcept V_SUB_F32_e32 %217.sub0, %1785, implicit $mode, implicit $exec + %1879:vgpr_32 = nofpexcept V_SUB_F32_e32 %217.sub1, %1787, implicit $mode, implicit $exec + %1880:vgpr_32 = nofpexcept V_SUB_F32_e32 %216.sub0, %1789, implicit $mode, implicit $exec + %1881:vgpr_32 = nofpexcept V_SUB_F32_e32 %216.sub1, %1791, implicit $mode, implicit $exec + %1882:vgpr_32 = nofpexcept V_SUB_F32_e32 %213.sub0, %1793, implicit $mode, implicit $exec + %1883:vgpr_32 = nofpexcept V_SUB_F32_e32 %213.sub1, %1795, implicit $mode, implicit $exec + %1884:vgpr_32 = nofpexcept V_SUB_F32_e32 %212.sub0, %1797, implicit $mode, implicit $exec + %1885:vgpr_32 = nofpexcept V_SUB_F32_e32 %212.sub1, %1799, implicit $mode, implicit $exec + %1886:vgpr_32 = nofpexcept V_SUB_F32_e32 %209.sub0, %1801, implicit $mode, implicit $exec + %1887:vgpr_32 = nofpexcept V_SUB_F32_e32 %209.sub1, %1803, implicit $mode, implicit $exec + %1888:vgpr_32 = nofpexcept V_SUB_F32_e32 %208.sub0, %1805, implicit $mode, implicit $exec + %1889:vgpr_32 = nofpexcept V_SUB_F32_e32 %208.sub1, %1807, implicit $mode, implicit $exec + %1890:vgpr_32 = nofpexcept V_SUB_F32_e32 %205.sub0, %1809, implicit $mode, implicit $exec + %1891:vgpr_32 = nofpexcept V_SUB_F32_e32 %205.sub1, %1811, implicit $mode, implicit $exec + %1892:vgpr_32 = nofpexcept V_SUB_F32_e32 %204.sub0, %1813, implicit $mode, implicit $exec + %1893:vgpr_32 = nofpexcept V_SUB_F32_e32 %204.sub1, %1815, implicit $mode, implicit $exec + %1894:vgpr_32 = nofpexcept V_SUB_F32_e32 %201.sub0, %1817, implicit $mode, implicit $exec + %1895:vgpr_32 = nofpexcept V_SUB_F32_e32 %201.sub1, %1819, implicit $mode, implicit $exec + %1896:vgpr_32 = nofpexcept V_SUB_F32_e32 %200.sub0, %1821, implicit $mode, implicit $exec + %1897:vgpr_32 = nofpexcept V_SUB_F32_e32 %200.sub1, %1823, implicit $mode, implicit $exec + %1898:vgpr_32 = nofpexcept V_SUB_F32_e32 %197.sub0, %1825, implicit $mode, implicit $exec + %1899:vgpr_32 = nofpexcept V_SUB_F32_e32 %197.sub1, %1827, implicit $mode, implicit $exec + %1900:vgpr_32 = nofpexcept V_SUB_F32_e32 %196.sub0, %1829, implicit $mode, implicit $exec + %1901:vgpr_32 = nofpexcept V_SUB_F32_e32 %196.sub1, %1831, implicit $mode, implicit $exec + %1902:vgpr_32 = nofpexcept V_SUB_F32_e32 %193.sub0, %1833, implicit $mode, implicit $exec + %1903:vgpr_32 = nofpexcept V_SUB_F32_e32 %193.sub1, %1835, implicit $mode, implicit $exec + %1904:vgpr_32 = nofpexcept V_SUB_F32_e32 %192.sub0, %1837, implicit $mode, implicit $exec + %1905:vgpr_32 = nofpexcept V_SUB_F32_e32 %192.sub1, %1839, implicit $mode, implicit $exec + %1906:vgpr_32 = nofpexcept V_SUB_F32_e32 %189.sub0, %1841, implicit $mode, implicit $exec + %1907:vgpr_32 = nofpexcept V_SUB_F32_e32 %189.sub1, %1843, implicit $mode, implicit $exec + %1908:vgpr_32 = nofpexcept V_SUB_F32_e32 %188.sub0, %1845, implicit $mode, implicit $exec + %1909:vgpr_32 = nofpexcept V_SUB_F32_e32 %188.sub1, %1847, implicit $mode, implicit $exec + undef %1910.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1850, implicit $mode, implicit $exec + undef %1911.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1851, implicit $mode, implicit $exec + undef %1912.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1852, implicit $mode, implicit $exec + undef %1913.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1853, implicit $mode, implicit $exec + %1596.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1854, implicit $mode, implicit $exec + %1597.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1855, implicit $mode, implicit $exec + %1598.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1856, implicit $mode, implicit $exec + %1599.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1857, implicit $mode, implicit $exec + %1910.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1858, implicit $mode, implicit $exec + %1911.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1859, implicit $mode, implicit $exec + %1912.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1860, implicit $mode, implicit $exec + %1913.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1861, implicit $mode, implicit $exec + %1596.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1862, implicit $mode, implicit $exec + %1597.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1863, implicit $mode, implicit $exec + %1598.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1864, implicit $mode, implicit $exec + %1599.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1865, implicit $mode, implicit $exec + %1910.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1866, implicit $mode, implicit $exec + %1911.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1867, implicit $mode, implicit $exec + %1912.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1868, implicit $mode, implicit $exec + %1913.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1869, implicit $mode, implicit $exec + %1596.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1870, implicit $mode, implicit $exec + %1597.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1871, implicit $mode, implicit $exec + %1598.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1872, implicit $mode, implicit $exec + %1599.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1873, implicit $mode, implicit $exec + %1910.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1874, implicit $mode, implicit $exec + %1911.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1875, implicit $mode, implicit $exec + %1912.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1876, implicit $mode, implicit $exec + %1913.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1877, implicit $mode, implicit $exec + undef %1914.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1878, implicit $mode, implicit $exec + undef %1915.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1879, implicit $mode, implicit $exec + undef %1916.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1880, implicit $mode, implicit $exec + undef %1917.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1881, implicit $mode, implicit $exec + undef %1918.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1882, implicit $mode, implicit $exec + undef %1919.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1883, implicit $mode, implicit $exec + undef %1920.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1884, implicit $mode, implicit $exec + undef %1921.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1885, implicit $mode, implicit $exec + %1914.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1886, implicit $mode, implicit $exec + %1915.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1887, implicit $mode, implicit $exec + %1916.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1888, implicit $mode, implicit $exec + %1917.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1889, implicit $mode, implicit $exec + %1918.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1890, implicit $mode, implicit $exec + %1919.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1891, implicit $mode, implicit $exec + %1920.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1892, implicit $mode, implicit $exec + %1921.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1893, implicit $mode, implicit $exec + %1914.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1894, implicit $mode, implicit $exec + %1915.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1895, implicit $mode, implicit $exec + %1916.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1896, implicit $mode, implicit $exec + %1917.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1897, implicit $mode, implicit $exec + %1918.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1898, implicit $mode, implicit $exec + %1919.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1899, implicit $mode, implicit $exec + %1920.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1900, implicit $mode, implicit $exec + %1921.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1901, implicit $mode, implicit $exec + %1914.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1902, implicit $mode, implicit $exec + %1915.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1903, implicit $mode, implicit $exec + %1916.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1904, implicit $mode, implicit $exec + %1917.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1905, implicit $mode, implicit $exec + %1918.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1906, implicit $mode, implicit $exec + %1919.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1907, implicit $mode, implicit $exec + %1920.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1908, implicit $mode, implicit $exec + %1921.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %1909, implicit $mode, implicit $exec + DS_WRITE_B128_gfx9 %37, %1596, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1597, 16, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1598, 32, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1599, 48, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1910, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1911, 144, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1912, 160, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1913, 176, 0, implicit $exec + WAVE_BARRIER + %1922:vreg_128_align2 = DS_READ_B128_gfx9 %150, 0, 0, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %37, %1914, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1915, 16, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1916, 32, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1917, 48, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1918, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1919, 144, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1920, 160, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %1921, 176, 0, implicit $exec + WAVE_BARRIER + %1923:vreg_128_align2 = DS_READ_B128_gfx9 %150, 0, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1282.sub1:sgpr_128 = COPY %1282.sub0 + %1924:vreg_64_align2 = COPY %1282.sub0_sub1 + DS_WRITE2ST64_B64_gfx9 %1637, %1604, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1637, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1637, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1637, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1638, %1605, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1638, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1638, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1638, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1639, %1606, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1639, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1639, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1639, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1642, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 48, 56, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + undef %1925.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %169, 0, 16, 0, implicit $exec + %1926:vgpr_32 = V_ADD_U32_e32 4096, %169, implicit $exec + undef %1927.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1926, 0, 16, 0, implicit $exec + %1928:vgpr_32 = V_ADD_U32_e32 8192, %169, implicit $exec + undef %1929.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1928, 0, 16, 0, implicit $exec + %1930:vgpr_32 = V_ADD_U32_e32 12288, %169, implicit $exec + undef %1931.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1930, 0, 16, 0, implicit $exec + %1932:vgpr_32 = V_ADD_U32_e32 16384, %169, implicit $exec + undef %1933.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1932, 0, 16, 0, implicit $exec + %1934:vgpr_32 = V_ADD_U32_e32 20480, %169, implicit $exec + undef %1935.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1934, 0, 16, 0, implicit $exec + %1936:vgpr_32 = V_ADD_U32_e32 24576, %169, implicit $exec + undef %1937.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1936, 0, 16, 0, implicit $exec + %1938:vgpr_32 = V_ADD_U32_e32 28672, %169, implicit $exec + undef %1939.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1938, 0, 16, 0, implicit $exec + undef %1940.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %170, 0, 16, 0, implicit $exec + %1941:vgpr_32 = V_ADD_U32_e32 4096, %170, implicit $exec + undef %1942.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1941, 0, 16, 0, implicit $exec + %1943:vgpr_32 = V_ADD_U32_e32 8192, %170, implicit $exec + undef %1944.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1943, 0, 16, 0, implicit $exec + %1945:vgpr_32 = V_ADD_U32_e32 12288, %170, implicit $exec + undef %1946.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1945, 0, 16, 0, implicit $exec + %1947:vgpr_32 = V_ADD_U32_e32 16384, %170, implicit $exec + undef %1948.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1947, 0, 16, 0, implicit $exec + %1949:vgpr_32 = V_ADD_U32_e32 20480, %170, implicit $exec + undef %1950.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1949, 0, 16, 0, implicit $exec + %1951:vgpr_32 = V_ADD_U32_e32 24576, %170, implicit $exec + undef %1952.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1951, 0, 16, 0, implicit $exec + %1953:vgpr_32 = V_ADD_U32_e32 28672, %170, implicit $exec + undef %1954.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1953, 0, 16, 0, implicit $exec + undef %1955.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %171, 0, 16, 0, implicit $exec + %1956:vgpr_32 = V_ADD_U32_e32 4096, %171, implicit $exec + undef %1957.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1956, 0, 16, 0, implicit $exec + %1958:vgpr_32 = V_ADD_U32_e32 8192, %171, implicit $exec + undef %1959.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1958, 0, 16, 0, implicit $exec + %1960:vgpr_32 = V_ADD_U32_e32 12288, %171, implicit $exec + undef %1961.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1960, 0, 16, 0, implicit $exec + %1962:vgpr_32 = V_ADD_U32_e32 16384, %171, implicit $exec + undef %1963.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1962, 0, 16, 0, implicit $exec + %1964:vgpr_32 = V_ADD_U32_e32 20480, %171, implicit $exec + undef %1965.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1964, 0, 16, 0, implicit $exec + %1966:vgpr_32 = V_ADD_U32_e32 24576, %171, implicit $exec + undef %1967.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1966, 0, 16, 0, implicit $exec + %1968:vgpr_32 = V_ADD_U32_e32 28672, %171, implicit $exec + undef %1969.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %1968, 0, 16, 0, implicit $exec + undef %1970.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %172, 0, 16, 0, implicit $exec + %1971:vgpr_32 = V_ADD_U32_e32 4096, %172, implicit $exec + undef %1972.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1971, 0, 16, 0, implicit $exec + %1973:vgpr_32 = V_ADD_U32_e32 8192, %172, implicit $exec + undef %1974.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1973, 0, 16, 0, implicit $exec + %1975:vgpr_32 = V_ADD_U32_e32 12288, %172, implicit $exec + undef %1976.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1975, 0, 16, 0, implicit $exec + %1977:vgpr_32 = V_ADD_U32_e32 16384, %172, implicit $exec + undef %1978.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1977, 0, 16, 0, implicit $exec + %1979:vgpr_32 = V_ADD_U32_e32 20480, %172, implicit $exec + undef %1980.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1979, 0, 16, 0, implicit $exec + %1981:vgpr_32 = V_ADD_U32_e32 24576, %172, implicit $exec + undef %1982.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1981, 0, 16, 0, implicit $exec + %1983:vgpr_32 = V_ADD_U32_e32 28672, %172, implicit $exec + undef %1984.sub2_sub3_sub4_sub5:av_192_align2 = DS_READ2_B64_gfx9 %1983, 0, 16, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + %1985:vreg_128_align2 = DS_READ_B128_gfx9 %1647, 0, 0, implicit $exec + %1986:vreg_128_align2 = DS_READ_B128_gfx9 %1648, 0, 0, implicit $exec + %1987:vreg_128_align2 = DS_READ_B128_gfx9 %1649, 0, 0, implicit $exec + %1988:vreg_128_align2 = DS_READ_B128_gfx9 %1650, 0, 0, implicit $exec + %1989:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1989:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1985.sub0, %1989, implicit $exec + %1989:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1985.sub1, %1989, implicit $exec + %1989:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1985.sub2, %1989, implicit $exec + %1989:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1985.sub3, %1989, implicit $exec + %1990:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1990:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1986.sub0, %1990, implicit $exec + %1990:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1986.sub1, %1990, implicit $exec + %1990:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1986.sub2, %1990, implicit $exec + %1990:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1986.sub3, %1990, implicit $exec + %1991:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1991:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1987.sub0, %1991, implicit $exec + %1991:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1987.sub1, %1991, implicit $exec + %1991:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1987.sub2, %1991, implicit $exec + %1991:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1987.sub3, %1991, implicit $exec + %1992:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %1992:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub0, %1988.sub0, %1992, implicit $exec + %1992:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub1, %1988.sub1, %1992, implicit $exec + %1992:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub2, %1988.sub2, %1992, implicit $exec + %1992:vgpr_32 = V_DOT4C_I32_I8_e32 %38.sub3, %1988.sub3, %1992, implicit $exec + %1656.sub1:sgpr_128 = COPY %1282.sub0 + %1656.sub2:sgpr_128 = COPY %1282.sub0 + %1656.sub3:sgpr_128 = COPY %1282.sub0 + %1993:av_128_align2 = COPY %1656 + DS_WRITE_B128_gfx9 %1288, %1993, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1993, 256, 0, implicit $exec + %1658.sub1:sgpr_128 = COPY %1282.sub0 + %1658.sub2:sgpr_128 = COPY %1282.sub0 + %1658.sub3:sgpr_128 = COPY %1282.sub0 + %1994:av_128_align2 = COPY %1658 + DS_WRITE_B128_gfx9 %1288, %1994, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1994, 384, 0, implicit $exec + WAVE_BARRIER + %1995:vgpr_32 = DS_READ_I8_gfx9 %1294, 0, 0, implicit $exec + %1996:vgpr_32 = DS_READ_I8_gfx9 %1294, 1, 0, implicit $exec + %1997:vgpr_32 = DS_READ_I8_gfx9 %1294, 2, 0, implicit $exec + %1998:vgpr_32 = DS_READ_I8_gfx9 %1294, 3, 0, implicit $exec + %1999:vgpr_32 = V_SUB_U32_e32 %1989, %1995, implicit $exec + %2000:vgpr_32 = V_SUB_U32_e32 %1990, %1995, implicit $exec + %2001:vgpr_32 = V_SUB_U32_e32 %1991, %1995, implicit $exec + %2002:vgpr_32 = V_SUB_U32_e32 %1992, %1995, implicit $exec + %2003:vgpr_32 = V_CVT_F32_I32_e32 %1999, implicit $mode, implicit $exec + %2004:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2003, implicit $mode, implicit $exec + %2005:vgpr_32 = V_CVT_F32_I32_e32 %2000, implicit $mode, implicit $exec + %2006:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2005, implicit $mode, implicit $exec + %2007:vgpr_32 = V_CVT_F32_I32_e32 %2001, implicit $mode, implicit $exec + %2008:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2007, implicit $mode, implicit $exec + %2009:vgpr_32 = V_CVT_F32_I32_e32 %2002, implicit $mode, implicit $exec + %2010:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2009, implicit $mode, implicit $exec + %2011:vgpr_32 = nofpexcept V_MUL_F16_e32 %1295.sub0, %2004, implicit $mode, implicit $exec + %2012:vgpr_32 = nofpexcept V_MUL_F16_e32 %1298.sub0, %2006, implicit $mode, implicit $exec + %2013:vgpr_32 = nofpexcept V_MUL_F16_e32 %1303.sub0, %2008, implicit $mode, implicit $exec + %2014:vgpr_32 = nofpexcept V_MUL_F16_e32 %1306.sub0, %2010, implicit $mode, implicit $exec + %2015:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2011, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2016:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2012, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2017:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2013, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2018:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2014, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2019:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2015, 0, %2016, 0, %2016, 0, 0, implicit $exec + %2020:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2019, 0, %2017, 0, %2018, 0, 0, implicit $exec + %2021:vgpr_32 = DS_BPERMUTE_B32 %1468, %2020, 0, implicit $exec + %2022:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2020, 0, %2021, 0, %2021, 0, 0, implicit $exec + %2023:vgpr_32 = COPY %2022 + %2023:vgpr_32 = V_MOV_B32_dpp %2023, %2023, 296, 15, 15, 0, implicit $exec + %2024:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2022, 0, %2023, 0, %2023, 0, 0, implicit $exec + %2025:vgpr_32 = COPY %2024 + %2025:vgpr_32 = V_MOV_B32_dpp %2025, %2025, 321, 15, 15, 0, implicit $exec + %2025:vgpr_32 = V_MOV_B32_dpp %2025, %2025, 27, 15, 15, 0, implicit $exec + %2026:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2024, 0, %2025, 0, %2025, 0, 0, implicit $exec + %2027:vgpr_32 = COPY %2026 + %2027:vgpr_32 = V_MOV_B32_dpp %2027, %2027, 78, 15, 15, 0, implicit $exec + %2028:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2026, 0, %2027, 0, %2027, 0, 0, implicit $exec + %2029:vgpr_32 = COPY %2028 + %2029:vgpr_32 = V_MOV_B32_dpp %2029, %2029, 177, 15, 15, 0, implicit $exec + %2030:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2028, 0, %2029, 0, %2029, 0, 0, implicit $exec + %2031:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1520, 0, %2030, 0, implicit $mode, implicit $exec + %2032:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %2030, 0, %2030, 0, implicit $mode, implicit $exec + %2033:sreg_64_xexec = S_OR_B64 %2032, %2031, implicit-def dead $scc + %2034:vgpr_32 = V_CNDMASK_B32_e64 0, %1520, 0, %2030, %2033, implicit $exec + %2035:vgpr_32 = nofpexcept V_SUB_F32_e32 %2015, %2034, implicit $mode, implicit $exec + %2036:vgpr_32 = nofpexcept V_SUB_F32_e32 %2016, %2034, implicit $mode, implicit $exec + %2037:vgpr_32 = nofpexcept V_SUB_F32_e32 %2017, %2034, implicit $mode, implicit $exec + %2038:vgpr_32 = nofpexcept V_SUB_F32_e32 %2018, %2034, implicit $mode, implicit $exec + undef %2039.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2035, implicit $mode, implicit $exec + %2039.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2036, implicit $mode, implicit $exec + undef %2040.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2037, implicit $mode, implicit $exec + %2040.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2038, implicit $mode, implicit $exec + %2041:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2039, 8, %2040, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2042:vgpr_32 = nofpexcept V_ADD_F32_e32 %2041.sub0, %2041.sub1, implicit $mode, implicit $exec + %2043:vgpr_32 = DS_BPERMUTE_B32 %1468, %2042, 0, implicit $exec + undef %2044.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_e32 %2042, %2043, implicit $mode, implicit $exec + %2044.sub1:vreg_64_align2 = COPY %1282.sub0 + %1509.sub0:vreg_64_align2 = COPY %2044.sub0 + %1509.sub0:vreg_64_align2 = V_MOV_B32_dpp %1509.sub0, %1509.sub0, 296, 15, 15, 0, implicit $exec + %2045:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2044, 8, %1509, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1849.sub1:vreg_64_align2 = COPY %1848.sub1 + %1849.sub1:vreg_64_align2 = V_MOV_B32_dpp %1849.sub1, %1849.sub1, 321, 15, 15, 0, implicit $exec + %1849.sub1:vreg_64_align2 = V_MOV_B32_dpp %1849.sub1, %1849.sub1, 27, 15, 15, 0, implicit $exec + %2046:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %1848, 8, %1849, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1849.sub0:vreg_64_align2 = COPY %2045.sub0 + %1849.sub0:vreg_64_align2 = V_MOV_B32_dpp %1849.sub0, %1849.sub0, 321, 15, 15, 0, implicit $exec + %1849.sub0:vreg_64_align2 = V_MOV_B32_dpp %1849.sub0, %1849.sub0, 27, 15, 15, 0, implicit $exec + undef %2047.sub0:vreg_64_align2 = COPY %2046.sub0 + %2047.sub0:vreg_64_align2 = V_MOV_B32_dpp %2047.sub0, %2047.sub0, 78, 15, 15, 0, implicit $exec + %2048:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2045, 8, %1849, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2049:vreg_64_align2 = V_PK_MOV_B32 12, %2046, 8, %2048, 0, 0, 0, 0, 0, implicit $exec + %2049.sub0:vreg_64_align2 = V_MOV_B32_dpp %2049.sub0, %2049.sub0, 78, 15, 15, 0, implicit $exec + %2047.sub1:vreg_64_align2 = COPY %2049.sub0 + %2049.sub1:vreg_64_align2 = V_MOV_B32_dpp %2049.sub1, %2049.sub1, 78, 15, 15, 0, implicit $exec + undef %2050.sub0:vreg_64_align2 = COPY %2049.sub1 + %2050.sub1:vreg_64_align2 = COPY %2049.sub0 + %2051:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2046, 8, %2047, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2052:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2048, 8, %2050, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2053.sub0:vreg_64_align2 = COPY %2051.sub0 + undef %2053.sub0:vreg_64_align2 = V_MOV_B32_dpp %2053.sub0, %2053.sub0, 177, 15, 15, 0, implicit $exec + %2053.sub1:vreg_64_align2 = COPY %2051.sub1 + %2053.sub1:vreg_64_align2 = V_MOV_B32_dpp %2053.sub1, %2053.sub1, 177, 15, 15, 0, implicit $exec + undef %2054.sub0:vreg_64_align2 = COPY %2052.sub0 + %2054.sub0:vreg_64_align2 = V_MOV_B32_dpp %2054.sub0, %2054.sub0, 177, 15, 15, 0, implicit $exec + %2054.sub1:vreg_64_align2 = COPY %2053.sub1 + %2055:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2051, 8, %2053, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2056:vgpr_32 = nofpexcept V_MUL_F32_e32 %1910.sub0, %313.sub0, implicit $mode, implicit $exec + %2057:vgpr_32 = nofpexcept V_MUL_F32_e32 %1911.sub0, %313.sub1, implicit $mode, implicit $exec + %2058:vgpr_32 = nofpexcept V_MUL_F32_e32 %1912.sub0, %312.sub0, implicit $mode, implicit $exec + %2059:vgpr_32 = nofpexcept V_MUL_F32_e32 %1913.sub0, %312.sub1, implicit $mode, implicit $exec + %2060:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %1596.sub0_sub1, 8, %1659, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2061:vgpr_32 = nofpexcept V_MUL_F32_e32 %1912.sub1, %308.sub0, implicit $mode, implicit $exec + %2062:vgpr_32 = nofpexcept V_MUL_F32_e32 %1913.sub1, %308.sub1, implicit $mode, implicit $exec + undef %2063.sub0:vreg_64_align2 = COPY %1598.sub1 + %2063.sub1:vreg_64_align2 = COPY %1598.sub2 + %2064:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2063, 8, %1311, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2065.sub0:vreg_64_align2 = COPY %1599.sub1 + %2065.sub1:vreg_64_align2 = COPY %1599.sub2 + %2066:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2065, 8, %306, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2067.sub0:vreg_64_align2 = COPY %1910.sub1 + %2067.sub1:vreg_64_align2 = COPY %1910.sub2 + %2068:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2067, 8, %1312, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2069.sub0:vreg_64_align2 = COPY %1911.sub1 + %2069.sub1:vreg_64_align2 = COPY %1911.sub2 + %2070:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2069, 8, %305, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2071:vgpr_32 = nofpexcept V_MUL_F32_e32 %1912.sub2, %304.sub0, implicit $mode, implicit $exec + %2072:vgpr_32 = nofpexcept V_MUL_F32_e32 %1913.sub2, %304.sub1, implicit $mode, implicit $exec + %2073:vgpr_32 = nofpexcept V_MUL_F32_e32 %1598.sub3, %302.sub0, implicit $mode, implicit $exec + %2074:vgpr_32 = nofpexcept V_MUL_F32_e32 %1599.sub3, %302.sub1, implicit $mode, implicit $exec + %2075:vgpr_32 = nofpexcept V_MUL_F32_e32 %1910.sub3, %301.sub0, implicit $mode, implicit $exec + %2076:vgpr_32 = nofpexcept V_MUL_F32_e32 %1911.sub3, %301.sub1, implicit $mode, implicit $exec + %2077:vgpr_32 = nofpexcept V_MUL_F32_e32 %1912.sub3, %300.sub0, implicit $mode, implicit $exec + %2078:vgpr_32 = nofpexcept V_MUL_F32_e32 %1913.sub3, %300.sub1, implicit $mode, implicit $exec + %2079:vgpr_32 = nofpexcept V_MUL_F32_e32 %1918.sub0, %297.sub0, implicit $mode, implicit $exec + %2080:vgpr_32 = nofpexcept V_MUL_F32_e32 %1919.sub0, %297.sub1, implicit $mode, implicit $exec + %2081:vgpr_32 = nofpexcept V_MUL_F32_e32 %1920.sub0, %296.sub0, implicit $mode, implicit $exec + %2082:vgpr_32 = nofpexcept V_MUL_F32_e32 %1921.sub0, %296.sub1, implicit $mode, implicit $exec + %2083:vgpr_32 = nofpexcept V_MUL_F32_e32 %1918.sub1, %293.sub0, implicit $mode, implicit $exec + %2084:vgpr_32 = nofpexcept V_MUL_F32_e32 %1918.sub2, %1310, implicit $mode, implicit $exec + undef %2085.sub0:vreg_64_align2 = COPY %1919.sub1 + %2085.sub1:vreg_64_align2 = COPY %1919.sub2 + %2086:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2085, 8, %289, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2087.sub0:vreg_64_align2 = COPY %1920.sub1 + %2087.sub1:vreg_64_align2 = COPY %1920.sub2 + %2088:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2087, 8, %1313, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2089.sub0:vreg_64_align2 = COPY %1921.sub1 + %2089.sub1:vreg_64_align2 = COPY %1921.sub2 + %2090:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2089, 8, %288, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2091:vgpr_32 = nofpexcept V_MUL_F32_e32 %1918.sub3, %285.sub0, implicit $mode, implicit $exec + %2092:vgpr_32 = nofpexcept V_MUL_F32_e32 %1919.sub3, %285.sub1, implicit $mode, implicit $exec + %2093:vgpr_32 = nofpexcept V_MUL_F32_e32 %1920.sub3, %284.sub0, implicit $mode, implicit $exec + %2094:vgpr_32 = nofpexcept V_MUL_F32_e32 %1921.sub3, %284.sub1, implicit $mode, implicit $exec + %2095:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2052, 8, %2054, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2096:vgpr_32 = nofpexcept V_ADD_F32_e32 %2056, %2055.sub1, implicit $mode, implicit $exec + %2097:vgpr_32 = nofpexcept V_ADD_F32_e32 %2057, %2055.sub1, implicit $mode, implicit $exec + %2098:vgpr_32 = nofpexcept V_ADD_F32_e32 %2058, %2055.sub1, implicit $mode, implicit $exec + %2099:vgpr_32 = nofpexcept V_ADD_F32_e32 %2059, %2055.sub1, implicit $mode, implicit $exec + %2100:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2060, 8, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2101:vgpr_32 = nofpexcept V_ADD_F32_e32 %2061, %2055.sub1, implicit $mode, implicit $exec + %2102:vgpr_32 = nofpexcept V_ADD_F32_e32 %2062, %2055.sub1, implicit $mode, implicit $exec + %2103:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2064, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2104:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2066, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2105:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2068, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2106:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2070, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2107:vgpr_32 = nofpexcept V_ADD_F32_e32 %2071, %2055.sub1, implicit $mode, implicit $exec + %2108:vgpr_32 = nofpexcept V_ADD_F32_e32 %2072, %2055.sub1, implicit $mode, implicit $exec + %2109:vgpr_32 = nofpexcept V_ADD_F32_e32 %2073, %2055.sub1, implicit $mode, implicit $exec + %2110:vgpr_32 = nofpexcept V_ADD_F32_e32 %2074, %2055.sub1, implicit $mode, implicit $exec + %2111:vgpr_32 = nofpexcept V_ADD_F32_e32 %2075, %2055.sub1, implicit $mode, implicit $exec + %2112:vgpr_32 = nofpexcept V_ADD_F32_e32 %2076, %2055.sub1, implicit $mode, implicit $exec + %2113:vgpr_32 = nofpexcept V_ADD_F32_e32 %2077, %2055.sub1, implicit $mode, implicit $exec + %2114:vgpr_32 = nofpexcept V_ADD_F32_e32 %2078, %2055.sub1, implicit $mode, implicit $exec + %2115:vgpr_32 = nofpexcept V_ADD_F32_e32 %2079, %2055.sub1, implicit $mode, implicit $exec + %2116:vgpr_32 = nofpexcept V_ADD_F32_e32 %2080, %2055.sub1, implicit $mode, implicit $exec + %2117:vgpr_32 = nofpexcept V_ADD_F32_e32 %2081, %2055.sub1, implicit $mode, implicit $exec + %2118:vgpr_32 = nofpexcept V_ADD_F32_e32 %2082, %2055.sub1, implicit $mode, implicit $exec + %2119:vgpr_32 = nofpexcept V_ADD_F32_e32 %2083, %2055.sub1, implicit $mode, implicit $exec + %2120:vgpr_32 = nofpexcept V_ADD_F32_e32 %2084, %2055.sub1, implicit $mode, implicit $exec + %2121:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2086, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2122:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2088, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2123:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2090, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2124:vgpr_32 = nofpexcept V_ADD_F32_e32 %2091, %2055.sub1, implicit $mode, implicit $exec + %2125:vgpr_32 = nofpexcept V_ADD_F32_e32 %2092, %2055.sub1, implicit $mode, implicit $exec + %2126:vgpr_32 = nofpexcept V_ADD_F32_e32 %2093, %2055.sub1, implicit $mode, implicit $exec + %2127:vgpr_32 = nofpexcept V_ADD_F32_e32 %2094, %2055.sub1, implicit $mode, implicit $exec + %1940.sub0:av_192_align2 = COPY %1925.sub0 + %1940.sub1:av_192_align2 = COPY %1925.sub1 + %1970.sub0:av_192_align2 = COPY %1955.sub0 + %1970.sub1:av_192_align2 = COPY %1955.sub1 + %1925.sub4:av_192_align2 = COPY %1940.sub4 + %1925.sub5:av_192_align2 = COPY %1940.sub5 + %1955.sub4:av_192_align2 = COPY %1970.sub4 + %1955.sub5:av_192_align2 = COPY %1970.sub5 + %1942.sub0:av_192_align2 = COPY %1927.sub0 + %1942.sub1:av_192_align2 = COPY %1927.sub1 + %1972.sub0:av_192_align2 = COPY %1957.sub0 + %1972.sub1:av_192_align2 = COPY %1957.sub1 + %1927.sub4:av_192_align2 = COPY %1942.sub4 + %1927.sub5:av_192_align2 = COPY %1942.sub5 + %1957.sub4:av_192_align2 = COPY %1972.sub4 + %1957.sub5:av_192_align2 = COPY %1972.sub5 + %1944.sub0:av_192_align2 = COPY %1929.sub0 + %1944.sub1:av_192_align2 = COPY %1929.sub1 + %1974.sub0:av_192_align2 = COPY %1959.sub0 + %1954.sub0:av_192_align2 = COPY %1939.sub0 + %1954.sub1:av_192_align2 = COPY %1939.sub1 + %1984.sub0:av_192_align2 = COPY %1969.sub0 + %1984.sub1:av_192_align2 = COPY %1969.sub1 + %1939.sub4:av_192_align2 = COPY %1954.sub4 + %1939.sub5:av_192_align2 = COPY %1954.sub5 + %1969.sub4:av_192_align2 = COPY %1984.sub4 + %1969.sub5:av_192_align2 = COPY %1984.sub5 + undef %2128.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %175.sub2_sub3, 0, %1922.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2128.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %175.sub0_sub1, 0, %1922.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2129:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1940.sub0_sub1_sub2_sub3, %2128, 0, 0, 0, implicit $mode, implicit $exec + %2130:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1970.sub0_sub1_sub2_sub3, %2129, 0, 0, 0, implicit $mode, implicit $exec + %2131:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1925.sub2_sub3_sub4_sub5, %2130, 0, 0, 0, implicit $mode, implicit $exec + %2132:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1955.sub2_sub3_sub4_sub5, %2131, 0, 0, 0, implicit $mode, implicit $exec + undef %2133.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %176.sub2_sub3, 12, %1922.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2133.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %176.sub0_sub1, 12, %1922.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2134:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1942.sub0_sub1_sub2_sub3, %2133, 0, 0, 0, implicit $mode, implicit $exec + %2135:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1972.sub0_sub1_sub2_sub3, %2134, 0, 0, 0, implicit $mode, implicit $exec + %2136:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1927.sub2_sub3_sub4_sub5, %2135, 0, 0, 0, implicit $mode, implicit $exec + %2137:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1957.sub2_sub3_sub4_sub5, %2136, 0, 0, 0, implicit $mode, implicit $exec + undef %2138.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %177.sub2_sub3, 0, %1922.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2138.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %177.sub0_sub1, 0, %1922.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2139:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1944.sub0_sub1_sub2_sub3, %2138, 0, 0, 0, implicit $mode, implicit $exec + %2140:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1974.sub0_sub1_sub2_sub3, %2139, 0, 0, 0, implicit $mode, implicit $exec + %2141:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1929.sub2_sub3_sub4_sub5, %2140, 0, 0, 0, implicit $mode, implicit $exec + %2142:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1959.sub2_sub3_sub4_sub5, %2141, 0, 0, 0, implicit $mode, implicit $exec + undef %2143.sub0:vreg_64_align2 = COPY %1922.sub3 + undef %2144.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %178.sub2_sub3, 0, %2143, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2144.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %178.sub0_sub1, 0, %2143, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2145:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1946.sub0_sub1_sub2_sub3, %2144, 0, 0, 0, implicit $mode, implicit $exec + %2146:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1976.sub0_sub1_sub2_sub3, %2145, 0, 0, 0, implicit $mode, implicit $exec + %2147:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1931.sub2_sub3_sub4_sub5, %2146, 0, 0, 0, implicit $mode, implicit $exec + %2148:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1961.sub2_sub3_sub4_sub5, %2147, 0, 0, 0, implicit $mode, implicit $exec + undef %2149.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %179.sub2_sub3, 0, %1923.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2149.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %179.sub0_sub1, 0, %1923.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2150:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1948.sub0_sub1_sub2_sub3, %2149, 0, 0, 0, implicit $mode, implicit $exec + %2151:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1978.sub0_sub1_sub2_sub3, %2150, 0, 0, 0, implicit $mode, implicit $exec + %2152:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1933.sub2_sub3_sub4_sub5, %2151, 0, 0, 0, implicit $mode, implicit $exec + %2153:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1963.sub2_sub3_sub4_sub5, %2152, 0, 0, 0, implicit $mode, implicit $exec + undef %2154.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %180.sub2_sub3, 12, %1923.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2154.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %180.sub0_sub1, 12, %1923.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2155:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1950.sub0_sub1_sub2_sub3, %2154, 0, 0, 0, implicit $mode, implicit $exec + %2156:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1980.sub0_sub1_sub2_sub3, %2155, 0, 0, 0, implicit $mode, implicit $exec + %2157:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1935.sub2_sub3_sub4_sub5, %2156, 0, 0, 0, implicit $mode, implicit $exec + %2158:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1965.sub2_sub3_sub4_sub5, %2157, 0, 0, 0, implicit $mode, implicit $exec + undef %2159.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %181.sub2_sub3, 0, %1923.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2159.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %181.sub0_sub1, 0, %1923.sub2_sub3, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2160:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1952.sub0_sub1_sub2_sub3, %2159, 0, 0, 0, implicit $mode, implicit $exec + %2161:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1982.sub0_sub1_sub2_sub3, %2160, 0, 0, 0, implicit $mode, implicit $exec + %2162:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1937.sub2_sub3_sub4_sub5, %2161, 0, 0, 0, implicit $mode, implicit $exec + %2163:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1967.sub2_sub3_sub4_sub5, %2162, 0, 0, 0, implicit $mode, implicit $exec + undef %2164.sub0:vreg_64_align2 = COPY %1923.sub3 + undef %2165.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %182.sub2_sub3, 0, %2164, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2165.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_MUL_F32 8, %182.sub0_sub1, 0, %2164, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2166:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1616, %1954.sub0_sub1_sub2_sub3, %2165, 0, 0, 0, implicit $mode, implicit $exec + %2167:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1621, %1984.sub0_sub1_sub2_sub3, %2166, 0, 0, 0, implicit $mode, implicit $exec + %2168:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1626, %1939.sub2_sub3_sub4_sub5, %2167, 0, 0, 0, implicit $mode, implicit $exec + %2169:vreg_128_align2 = V_MFMA_F32_16X16X32_F16_vgprcd_e64 %1631, %1969.sub2_sub3_sub4_sub5, %2168, 0, 0, 0, implicit $mode, implicit $exec + %2170:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2170:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1985.sub0, %2170, implicit $exec + %2170:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1985.sub1, %2170, implicit $exec + %2170:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1985.sub2, %2170, implicit $exec + %2170:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1985.sub3, %2170, implicit $exec + %2171:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2171:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1986.sub0, %2171, implicit $exec + %2171:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1986.sub1, %2171, implicit $exec + %2171:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1986.sub2, %2171, implicit $exec + %2171:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1986.sub3, %2171, implicit $exec + %2172:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2172:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1987.sub0, %2172, implicit $exec + %2172:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1987.sub1, %2172, implicit $exec + %2172:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1987.sub2, %2172, implicit $exec + %2172:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1987.sub3, %2172, implicit $exec + %2173:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2173:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub0, %1988.sub0, %2173, implicit $exec + %2173:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub1, %1988.sub1, %2173, implicit $exec + %2173:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub2, %1988.sub2, %2173, implicit $exec + %2173:vgpr_32 = V_DOT4C_I32_I8_e32 %39.sub3, %1988.sub3, %2173, implicit $exec + %2174:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2174:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1985.sub0, %2174, implicit $exec + %2174:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1985.sub1, %2174, implicit $exec + %2174:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1985.sub2, %2174, implicit $exec + %2174:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1985.sub3, %2174, implicit $exec + %2175:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2175:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1986.sub0, %2175, implicit $exec + %2175:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1986.sub1, %2175, implicit $exec + %2175:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1986.sub2, %2175, implicit $exec + %2175:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1986.sub3, %2175, implicit $exec + %2176:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2176:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1987.sub0, %2176, implicit $exec + %2176:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1987.sub1, %2176, implicit $exec + %2176:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1987.sub2, %2176, implicit $exec + %2176:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1987.sub3, %2176, implicit $exec + %2177:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2177:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub0, %1988.sub0, %2177, implicit $exec + %2177:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub1, %1988.sub1, %2177, implicit $exec + %2177:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub2, %1988.sub2, %2177, implicit $exec + %2177:vgpr_32 = V_DOT4C_I32_I8_e32 %40.sub3, %1988.sub3, %2177, implicit $exec + %2178:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2178:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1985.sub0, %2178, implicit $exec + %2178:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1985.sub1, %2178, implicit $exec + %2178:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1985.sub2, %2178, implicit $exec + %2178:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1985.sub3, %2178, implicit $exec + %2179:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2179:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1986.sub0, %2179, implicit $exec + %2179:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1986.sub1, %2179, implicit $exec + %2179:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1986.sub2, %2179, implicit $exec + %2179:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1986.sub3, %2179, implicit $exec + %2180:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2180:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1987.sub0, %2180, implicit $exec + %2180:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1987.sub1, %2180, implicit $exec + %2180:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1987.sub2, %2180, implicit $exec + %2180:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1987.sub3, %2180, implicit $exec + %2181:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec + %2181:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub0, %1988.sub0, %2181, implicit $exec + %2181:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub1, %1988.sub1, %2181, implicit $exec + %2181:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub2, %1988.sub2, %2181, implicit $exec + %2181:vgpr_32 = V_DOT4C_I32_I8_e32 %41.sub3, %1988.sub3, %2181, implicit $exec + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + WAVE_BARRIER + DS_WRITE_B128_gfx9 %1288, %1309, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 256, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 128, 0, implicit $exec + DS_WRITE_B128_gfx9 %1288, %1309, 384, 0, implicit $exec + WAVE_BARRIER + %2182:vgpr_32 = V_SUB_U32_e32 %2170, %1996, implicit $exec + %2183:vgpr_32 = V_SUB_U32_e32 %2171, %1996, implicit $exec + %2184:vgpr_32 = V_SUB_U32_e32 %2172, %1996, implicit $exec + %2185:vgpr_32 = V_SUB_U32_e32 %2173, %1996, implicit $exec + %2186:vgpr_32 = V_SUB_U32_e32 %2174, %1997, implicit $exec + %2187:vgpr_32 = V_SUB_U32_e32 %2175, %1997, implicit $exec + %2188:vgpr_32 = V_SUB_U32_e32 %2176, %1997, implicit $exec + %2189:vgpr_32 = V_SUB_U32_e32 %2177, %1997, implicit $exec + %2190:vgpr_32 = V_SUB_U32_e32 %2178, %1998, implicit $exec + %2191:vgpr_32 = V_SUB_U32_e32 %2179, %1998, implicit $exec + %2192:vgpr_32 = V_SUB_U32_e32 %2180, %1998, implicit $exec + %2193:vgpr_32 = V_SUB_U32_e32 %2181, %1998, implicit $exec + %2194:vgpr_32 = V_CVT_F32_I32_e32 %2182, implicit $mode, implicit $exec + %2195:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2194, implicit $mode, implicit $exec + %2196:vgpr_32 = V_CVT_F32_I32_e32 %2183, implicit $mode, implicit $exec + %2197:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2196, implicit $mode, implicit $exec + %2198:vgpr_32 = V_CVT_F32_I32_e32 %2184, implicit $mode, implicit $exec + %2199:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2198, implicit $mode, implicit $exec + %2200:vgpr_32 = V_CVT_F32_I32_e32 %2185, implicit $mode, implicit $exec + %2201:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2200, implicit $mode, implicit $exec + %2202:vgpr_32 = V_CVT_F32_I32_e32 %2186, implicit $mode, implicit $exec + %2203:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2202, implicit $mode, implicit $exec + %2204:vgpr_32 = V_CVT_F32_I32_e32 %2187, implicit $mode, implicit $exec + %2205:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2204, implicit $mode, implicit $exec + %2206:vgpr_32 = V_CVT_F32_I32_e32 %2188, implicit $mode, implicit $exec + %2207:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2206, implicit $mode, implicit $exec + %2208:vgpr_32 = V_CVT_F32_I32_e32 %2189, implicit $mode, implicit $exec + %2209:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2208, implicit $mode, implicit $exec + %2210:vgpr_32 = V_CVT_F32_I32_e32 %2190, implicit $mode, implicit $exec + %2211:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2210, implicit $mode, implicit $exec + %2212:vgpr_32 = V_CVT_F32_I32_e32 %2191, implicit $mode, implicit $exec + %2213:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2212, implicit $mode, implicit $exec + %2214:vgpr_32 = V_CVT_F32_I32_e32 %2192, implicit $mode, implicit $exec + %2215:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2214, implicit $mode, implicit $exec + %2216:vgpr_32 = V_CVT_F32_I32_e32 %2193, implicit $mode, implicit $exec + %2217:vgpr_32 = nofpexcept V_CVT_F16_F32_e32 %2216, implicit $mode, implicit $exec + %2218:vgpr_32 = nofpexcept V_MUL_F16_e32 %1296, %2195, implicit $mode, implicit $exec + %2219:vgpr_32 = nofpexcept V_MUL_F16_e32 %1299, %2197, implicit $mode, implicit $exec + %2220:vgpr_32 = nofpexcept V_MUL_F16_e32 %1304, %2199, implicit $mode, implicit $exec + %2221:vgpr_32 = nofpexcept V_MUL_F16_e32 %1307, %2201, implicit $mode, implicit $exec + %2222:vgpr_32 = nofpexcept V_MUL_F16_e32 %1295.sub1, %2203, implicit $mode, implicit $exec + %2223:vgpr_32 = nofpexcept V_MUL_F16_e32 %1298.sub1, %2205, implicit $mode, implicit $exec + %2224:vgpr_32 = nofpexcept V_MUL_F16_e32 %1303.sub1, %2207, implicit $mode, implicit $exec + %2225:vgpr_32 = nofpexcept V_MUL_F16_e32 %1306.sub1, %2209, implicit $mode, implicit $exec + %2226:vgpr_32 = nofpexcept V_MUL_F16_e32 %1297, %2211, implicit $mode, implicit $exec + %2227:vgpr_32 = nofpexcept V_MUL_F16_e32 %1300, %2213, implicit $mode, implicit $exec + %2228:vgpr_32 = nofpexcept V_MUL_F16_e32 %1305, %2215, implicit $mode, implicit $exec + %2229:vgpr_32 = nofpexcept V_MUL_F16_e32 %1308, %2217, implicit $mode, implicit $exec + %2230:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2218, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2231:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2219, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2232:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2220, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2233:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2221, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2234:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2222, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2235:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2223, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2236:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2224, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2237:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2225, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2238:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2226, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2239:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2227, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2240:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2228, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2241:vgpr_32 = nofpexcept V_FMA_MIX_F32 8, %2229, 0, %1434, 1, 0, 0, 0, 0, implicit $mode, implicit $exec + %2242:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %2238, %1283, implicit $exec + %2243:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %2239, %1283, implicit $exec + %2244:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %2240, %1283, implicit $exec + %2245:vgpr_32 = V_CNDMASK_B32_e64 0, %1451, 0, %2241, %1283, implicit $exec + %2246:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2238, %1283, implicit $exec + %2247:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2239, %1283, implicit $exec + %2248:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2240, %1283, implicit $exec + %2249:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2241, %1283, implicit $exec + %2250:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2230, 0, %2231, 0, %2231, 0, 0, implicit $exec + %2251:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2250, 0, %2232, 0, %2233, 0, 0, implicit $exec + %2252:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2234, 0, %2235, 0, %2235, 0, 0, implicit $exec + %2253:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2252, 0, %2236, 0, %2237, 0, 0, implicit $exec + %2254:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2246, 0, %2247, 0, %2247, 0, 0, implicit $exec + %2255:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2254, 0, %2248, 0, %2249, 0, 0, implicit $exec + %2256:vgpr_32 = DS_BPERMUTE_B32 %1468, %2251, 0, implicit $exec + %2257:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2251, 0, %2256, 0, %2256, 0, 0, implicit $exec + %2258:vgpr_32 = COPY %2257 + %2258:vgpr_32 = V_MOV_B32_dpp %2258, %2258, 296, 15, 15, 0, implicit $exec + %2259:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2257, 0, %2258, 0, %2258, 0, 0, implicit $exec + %2260:vgpr_32 = COPY %2259 + %2260:vgpr_32 = V_MOV_B32_dpp %2260, %2260, 321, 15, 15, 0, implicit $exec + %2260:vgpr_32 = V_MOV_B32_dpp %2260, %2260, 27, 15, 15, 0, implicit $exec + %2261:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2259, 0, %2260, 0, %2260, 0, 0, implicit $exec + %2262:vgpr_32 = COPY %2261 + %2262:vgpr_32 = V_MOV_B32_dpp %2262, %2262, 78, 15, 15, 0, implicit $exec + %2263:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2261, 0, %2262, 0, %2262, 0, 0, implicit $exec + %2264:vgpr_32 = COPY %2263 + %2264:vgpr_32 = V_MOV_B32_dpp %2264, %2264, 177, 15, 15, 0, implicit $exec + %2265:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2263, 0, %2264, 0, %2264, 0, 0, implicit $exec + %2266:vgpr_32 = DS_BPERMUTE_B32 %1468, %2253, 0, implicit $exec + %2267:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2253, 0, %2266, 0, %2266, 0, 0, implicit $exec + %2268:vgpr_32 = COPY %2267 + %2268:vgpr_32 = V_MOV_B32_dpp %2268, %2268, 296, 15, 15, 0, implicit $exec + %2269:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2267, 0, %2268, 0, %2268, 0, 0, implicit $exec + %2270:vgpr_32 = COPY %2269 + %2270:vgpr_32 = V_MOV_B32_dpp %2270, %2270, 321, 15, 15, 0, implicit $exec + %2270:vgpr_32 = V_MOV_B32_dpp %2270, %2270, 27, 15, 15, 0, implicit $exec + %2271:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2269, 0, %2270, 0, %2270, 0, 0, implicit $exec + %2272:vgpr_32 = COPY %2271 + %2272:vgpr_32 = V_MOV_B32_dpp %2272, %2272, 78, 15, 15, 0, implicit $exec + %2273:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2271, 0, %2272, 0, %2272, 0, 0, implicit $exec + %2274:vgpr_32 = COPY %2273 + %2274:vgpr_32 = V_MOV_B32_dpp %2274, %2274, 177, 15, 15, 0, implicit $exec + %2275:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2273, 0, %2274, 0, %2274, 0, 0, implicit $exec + %2276:vgpr_32 = DS_BPERMUTE_B32 %1468, %2255, 0, implicit $exec + %2277:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2255, 0, %2276, 0, %2276, 0, 0, implicit $exec + %2278:vgpr_32 = COPY %2277 + %2278:vgpr_32 = V_MOV_B32_dpp %2278, %2278, 296, 15, 15, 0, implicit $exec + %2279:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2277, 0, %2278, 0, %2278, 0, 0, implicit $exec + %2280:vgpr_32 = COPY %2279 + %2280:vgpr_32 = V_MOV_B32_dpp %2280, %2280, 321, 15, 15, 0, implicit $exec + %2280:vgpr_32 = V_MOV_B32_dpp %2280, %2280, 27, 15, 15, 0, implicit $exec + %2281:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2279, 0, %2280, 0, %2280, 0, 0, implicit $exec + %2282:vgpr_32 = COPY %2281 + %2282:vgpr_32 = V_MOV_B32_dpp %2282, %2282, 78, 15, 15, 0, implicit $exec + %2283:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2281, 0, %2282, 0, %2282, 0, 0, implicit $exec + %2284:vgpr_32 = COPY %2283 + %2284:vgpr_32 = V_MOV_B32_dpp %2284, %2284, 177, 15, 15, 0, implicit $exec + %2285:vgpr_32 = V_MAXIMUM3_F32_e64 0, %2283, 0, %2284, 0, %2284, 0, 0, implicit $exec + %2286:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1522, 0, %2265, 0, implicit $mode, implicit $exec + %2287:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1533.sub0, 0, %2275, 0, implicit $mode, implicit $exec + %2288:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1533.sub1, 0, %2285, 0, implicit $mode, implicit $exec + %2289:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1729, 0, %1666, 0, implicit $mode, implicit $exec + %2290:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1731, 0, %1666, 0, implicit $mode, implicit $exec + %2291:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1733, 0, %1666, 0, implicit $mode, implicit $exec + %2292:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1735, 0, %1666, 0, implicit $mode, implicit $exec + %2293:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1737, 0, %1666, 0, implicit $mode, implicit $exec + %2294:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1739, 0, %1666, 0, implicit $mode, implicit $exec + %2295:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1741, 0, %1666, 0, implicit $mode, implicit $exec + %2296:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1743, 0, %1666, 0, implicit $mode, implicit $exec + %2297:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1745, 0, %1666, 0, implicit $mode, implicit $exec + %2298:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1747, 0, %1666, 0, implicit $mode, implicit $exec + %2299:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1749, 0, %1666, 0, implicit $mode, implicit $exec + %2300:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1751, 0, %1666, 0, implicit $mode, implicit $exec + %2301:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1753, 0, %1666, 0, implicit $mode, implicit $exec + %2302:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1755, 0, %1666, 0, implicit $mode, implicit $exec + %2303:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1757, 0, %1666, 0, implicit $mode, implicit $exec + %2304:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1759, 0, %1666, 0, implicit $mode, implicit $exec + %2305:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1761, 0, %1666, 0, implicit $mode, implicit $exec + %2306:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1763, 0, %1666, 0, implicit $mode, implicit $exec + %2307:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1765, 0, %1666, 0, implicit $mode, implicit $exec + %2308:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1767, 0, %1666, 0, implicit $mode, implicit $exec + %2309:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1769, 0, %1666, 0, implicit $mode, implicit $exec + %2310:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1771, 0, %1666, 0, implicit $mode, implicit $exec + %2311:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1773, 0, %1666, 0, implicit $mode, implicit $exec + %2312:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1775, 0, %1666, 0, implicit $mode, implicit $exec + %2313:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1777, 0, %1666, 0, implicit $mode, implicit $exec + %2314:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1779, 0, %1666, 0, implicit $mode, implicit $exec + %2315:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1781, 0, %1666, 0, implicit $mode, implicit $exec + %2316:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1783, 0, %1666, 0, implicit $mode, implicit $exec + %2317:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1785, 0, %1666, 0, implicit $mode, implicit $exec + %2318:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1787, 0, %1666, 0, implicit $mode, implicit $exec + %2319:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1789, 0, %1666, 0, implicit $mode, implicit $exec + %2320:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1791, 0, %1666, 0, implicit $mode, implicit $exec + %2321:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1793, 0, %1666, 0, implicit $mode, implicit $exec + %2322:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1795, 0, %1666, 0, implicit $mode, implicit $exec + %2323:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1797, 0, %1666, 0, implicit $mode, implicit $exec + %2324:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1799, 0, %1666, 0, implicit $mode, implicit $exec + %2325:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1801, 0, %1666, 0, implicit $mode, implicit $exec + %2326:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1803, 0, %1666, 0, implicit $mode, implicit $exec + %2327:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1805, 0, %1666, 0, implicit $mode, implicit $exec + %2328:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1807, 0, %1666, 0, implicit $mode, implicit $exec + %2329:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1809, 0, %1666, 0, implicit $mode, implicit $exec + %2330:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1811, 0, %1666, 0, implicit $mode, implicit $exec + %2331:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1813, 0, %1666, 0, implicit $mode, implicit $exec + %2332:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1815, 0, %1666, 0, implicit $mode, implicit $exec + %2333:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1817, 0, %1666, 0, implicit $mode, implicit $exec + %2334:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1819, 0, %1666, 0, implicit $mode, implicit $exec + %2335:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1821, 0, %1666, 0, implicit $mode, implicit $exec + %2336:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1823, 0, %1666, 0, implicit $mode, implicit $exec + %2337:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1825, 0, %1666, 0, implicit $mode, implicit $exec + %2338:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1827, 0, %1666, 0, implicit $mode, implicit $exec + %2339:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1829, 0, %1666, 0, implicit $mode, implicit $exec + %2340:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1831, 0, %1666, 0, implicit $mode, implicit $exec + %2341:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1833, 0, %1666, 0, implicit $mode, implicit $exec + %2342:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1835, 0, %1666, 0, implicit $mode, implicit $exec + %2343:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1837, 0, %1666, 0, implicit $mode, implicit $exec + %2344:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1839, 0, %1666, 0, implicit $mode, implicit $exec + %2345:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1841, 0, %1666, 0, implicit $mode, implicit $exec + %2346:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1843, 0, %1666, 0, implicit $mode, implicit $exec + %2347:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1845, 0, %1666, 0, implicit $mode, implicit $exec + %2348:sreg_64 = nofpexcept V_CMP_LE_F32_e64 0, %1847, 0, %1666, 0, implicit $mode, implicit $exec + %2349:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %2265, 0, %2265, 0, implicit $mode, implicit $exec + %2350:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %2275, 0, %2275, 0, implicit $mode, implicit $exec + %2351:sreg_64 = nofpexcept V_CMP_U_F32_e64 0, %2285, 0, %2285, 0, implicit $mode, implicit $exec + %2352:sreg_64_xexec = S_OR_B64 %2349, %2286, implicit-def dead $scc + %2353:vgpr_32 = V_CNDMASK_B32_e64 0, %1522, 0, %2265, %2352, implicit $exec + %2354:sreg_64_xexec = S_OR_B64 %2350, %2287, implicit-def dead $scc + %2355:vgpr_32 = V_CNDMASK_B32_e64 0, %1533.sub0, 0, %2275, %2354, implicit $exec + %2356:sreg_64_xexec = S_OR_B64 %2351, %2288, implicit-def dead $scc + %2357:vgpr_32 = V_CNDMASK_B32_e64 0, %1533.sub1, 0, %2285, %2356, implicit $exec + %2358:sreg_64_xexec = S_OR_B64 %1727, %2289, implicit-def dead $scc + %2359:vgpr_32 = V_CNDMASK_B32_e64 0, %1729, 0, %1666, %2358, implicit $exec + %2360:sreg_64_xexec = S_OR_B64 %1727, %2290, implicit-def dead $scc + %2361:vgpr_32 = V_CNDMASK_B32_e64 0, %1731, 0, %1666, %2360, implicit $exec + %2362:sreg_64_xexec = S_OR_B64 %1727, %2291, implicit-def dead $scc + %2363:vgpr_32 = V_CNDMASK_B32_e64 0, %1733, 0, %1666, %2362, implicit $exec + %2364:sreg_64_xexec = S_OR_B64 %1727, %2292, implicit-def dead $scc + %2365:vgpr_32 = V_CNDMASK_B32_e64 0, %1735, 0, %1666, %2364, implicit $exec + %2366:sreg_64_xexec = S_OR_B64 %1727, %2293, implicit-def dead $scc + %2367:vgpr_32 = V_CNDMASK_B32_e64 0, %1737, 0, %1666, %2366, implicit $exec + %2368:sreg_64_xexec = S_OR_B64 %1727, %2294, implicit-def dead $scc + %2369:vgpr_32 = V_CNDMASK_B32_e64 0, %1739, 0, %1666, %2368, implicit $exec + %2370:sreg_64_xexec = S_OR_B64 %1727, %2295, implicit-def dead $scc + %2371:vgpr_32 = V_CNDMASK_B32_e64 0, %1741, 0, %1666, %2370, implicit $exec + %2372:sreg_64_xexec = S_OR_B64 %1727, %2296, implicit-def dead $scc + %2373:vgpr_32 = V_CNDMASK_B32_e64 0, %1743, 0, %1666, %2372, implicit $exec + %2374:sreg_64_xexec = S_OR_B64 %1727, %2297, implicit-def dead $scc + %2375:vgpr_32 = V_CNDMASK_B32_e64 0, %1745, 0, %1666, %2374, implicit $exec + %2376:sreg_64_xexec = S_OR_B64 %1727, %2298, implicit-def dead $scc + %2377:vgpr_32 = V_CNDMASK_B32_e64 0, %1747, 0, %1666, %2376, implicit $exec + %2378:sreg_64_xexec = S_OR_B64 %1727, %2299, implicit-def dead $scc + %2379:vgpr_32 = V_CNDMASK_B32_e64 0, %1749, 0, %1666, %2378, implicit $exec + %2380:sreg_64_xexec = S_OR_B64 %1727, %2300, implicit-def dead $scc + %2381:vgpr_32 = V_CNDMASK_B32_e64 0, %1751, 0, %1666, %2380, implicit $exec + %2382:sreg_64_xexec = S_OR_B64 %1727, %2301, implicit-def dead $scc + %2383:vgpr_32 = V_CNDMASK_B32_e64 0, %1753, 0, %1666, %2382, implicit $exec + %2384:sreg_64_xexec = S_OR_B64 %1727, %2302, implicit-def dead $scc + %2385:vgpr_32 = V_CNDMASK_B32_e64 0, %1755, 0, %1666, %2384, implicit $exec + %2386:sreg_64_xexec = S_OR_B64 %1727, %2303, implicit-def dead $scc + %2387:vgpr_32 = V_CNDMASK_B32_e64 0, %1757, 0, %1666, %2386, implicit $exec + %2388:sreg_64_xexec = S_OR_B64 %1727, %2304, implicit-def dead $scc + %2389:vgpr_32 = V_CNDMASK_B32_e64 0, %1759, 0, %1666, %2388, implicit $exec + %2390:sreg_64_xexec = S_OR_B64 %1727, %2305, implicit-def dead $scc + %2391:vgpr_32 = V_CNDMASK_B32_e64 0, %1761, 0, %1666, %2390, implicit $exec + %2392:sreg_64_xexec = S_OR_B64 %1727, %2306, implicit-def dead $scc + %2393:vgpr_32 = V_CNDMASK_B32_e64 0, %1763, 0, %1666, %2392, implicit $exec + %2394:sreg_64_xexec = S_OR_B64 %1727, %2307, implicit-def dead $scc + %2395:vgpr_32 = V_CNDMASK_B32_e64 0, %1765, 0, %1666, %2394, implicit $exec + %2396:sreg_64_xexec = S_OR_B64 %1727, %2308, implicit-def dead $scc + %2397:vgpr_32 = V_CNDMASK_B32_e64 0, %1767, 0, %1666, %2396, implicit $exec + %2398:sreg_64_xexec = S_OR_B64 %1727, %2309, implicit-def dead $scc + %2399:vgpr_32 = V_CNDMASK_B32_e64 0, %1769, 0, %1666, %2398, implicit $exec + %2400:sreg_64_xexec = S_OR_B64 %1727, %2310, implicit-def dead $scc + %2401:vgpr_32 = V_CNDMASK_B32_e64 0, %1771, 0, %1666, %2400, implicit $exec + %2402:sreg_64_xexec = S_OR_B64 %1727, %2311, implicit-def dead $scc + %2403:vgpr_32 = V_CNDMASK_B32_e64 0, %1773, 0, %1666, %2402, implicit $exec + %2404:sreg_64_xexec = S_OR_B64 %1727, %2312, implicit-def dead $scc + %2405:vgpr_32 = V_CNDMASK_B32_e64 0, %1775, 0, %1666, %2404, implicit $exec + %2406:sreg_64_xexec = S_OR_B64 %1727, %2313, implicit-def dead $scc + %2407:vgpr_32 = V_CNDMASK_B32_e64 0, %1777, 0, %1666, %2406, implicit $exec + %2408:sreg_64_xexec = S_OR_B64 %1727, %2314, implicit-def dead $scc + %2409:vgpr_32 = V_CNDMASK_B32_e64 0, %1779, 0, %1666, %2408, implicit $exec + %2410:sreg_64_xexec = S_OR_B64 %1727, %2315, implicit-def dead $scc + %2411:vgpr_32 = V_CNDMASK_B32_e64 0, %1781, 0, %1666, %2410, implicit $exec + %2412:sreg_64_xexec = S_OR_B64 %1727, %2316, implicit-def dead $scc + %2413:vgpr_32 = V_CNDMASK_B32_e64 0, %1783, 0, %1666, %2412, implicit $exec + %2414:sreg_64_xexec = S_OR_B64 %1727, %2317, implicit-def dead $scc + %2415:vgpr_32 = V_CNDMASK_B32_e64 0, %1785, 0, %1666, %2414, implicit $exec + %2416:sreg_64_xexec = S_OR_B64 %1727, %2318, implicit-def dead $scc + %2417:vgpr_32 = V_CNDMASK_B32_e64 0, %1787, 0, %1666, %2416, implicit $exec + %2418:sreg_64_xexec = S_OR_B64 %1727, %2319, implicit-def dead $scc + %2419:vgpr_32 = V_CNDMASK_B32_e64 0, %1789, 0, %1666, %2418, implicit $exec + %2420:sreg_64_xexec = S_OR_B64 %1727, %2320, implicit-def dead $scc + %2421:vgpr_32 = V_CNDMASK_B32_e64 0, %1791, 0, %1666, %2420, implicit $exec + %2422:sreg_64_xexec = S_OR_B64 %1727, %2321, implicit-def dead $scc + %2423:vgpr_32 = V_CNDMASK_B32_e64 0, %1793, 0, %1666, %2422, implicit $exec + %2424:sreg_64_xexec = S_OR_B64 %1727, %2322, implicit-def dead $scc + %2425:vgpr_32 = V_CNDMASK_B32_e64 0, %1795, 0, %1666, %2424, implicit $exec + %2426:sreg_64_xexec = S_OR_B64 %1727, %2323, implicit-def dead $scc + %2427:vgpr_32 = V_CNDMASK_B32_e64 0, %1797, 0, %1666, %2426, implicit $exec + %2428:sreg_64_xexec = S_OR_B64 %1727, %2324, implicit-def dead $scc + %2429:vgpr_32 = V_CNDMASK_B32_e64 0, %1799, 0, %1666, %2428, implicit $exec + %2430:sreg_64_xexec = S_OR_B64 %1727, %2325, implicit-def dead $scc + %2431:vgpr_32 = V_CNDMASK_B32_e64 0, %1801, 0, %1666, %2430, implicit $exec + %2432:sreg_64_xexec = S_OR_B64 %1727, %2326, implicit-def dead $scc + %2433:vgpr_32 = V_CNDMASK_B32_e64 0, %1803, 0, %1666, %2432, implicit $exec + %2434:sreg_64_xexec = S_OR_B64 %1727, %2327, implicit-def dead $scc + %2435:vgpr_32 = V_CNDMASK_B32_e64 0, %1805, 0, %1666, %2434, implicit $exec + %2436:sreg_64_xexec = S_OR_B64 %1727, %2328, implicit-def dead $scc + %2437:vgpr_32 = V_CNDMASK_B32_e64 0, %1807, 0, %1666, %2436, implicit $exec + %2438:sreg_64_xexec = S_OR_B64 %1727, %2329, implicit-def dead $scc + %2439:vgpr_32 = V_CNDMASK_B32_e64 0, %1809, 0, %1666, %2438, implicit $exec + %2440:sreg_64_xexec = S_OR_B64 %1727, %2330, implicit-def dead $scc + %2441:vgpr_32 = V_CNDMASK_B32_e64 0, %1811, 0, %1666, %2440, implicit $exec + %2442:sreg_64_xexec = S_OR_B64 %1727, %2331, implicit-def dead $scc + %2443:vgpr_32 = V_CNDMASK_B32_e64 0, %1813, 0, %1666, %2442, implicit $exec + %2444:sreg_64_xexec = S_OR_B64 %1727, %2332, implicit-def dead $scc + %2445:vgpr_32 = V_CNDMASK_B32_e64 0, %1815, 0, %1666, %2444, implicit $exec + %2446:sreg_64_xexec = S_OR_B64 %1727, %2333, implicit-def dead $scc + %2447:vgpr_32 = V_CNDMASK_B32_e64 0, %1817, 0, %1666, %2446, implicit $exec + %2448:sreg_64_xexec = S_OR_B64 %1727, %2334, implicit-def dead $scc + %2449:vgpr_32 = V_CNDMASK_B32_e64 0, %1819, 0, %1666, %2448, implicit $exec + %2450:sreg_64_xexec = S_OR_B64 %1727, %2335, implicit-def dead $scc + %2451:vgpr_32 = V_CNDMASK_B32_e64 0, %1821, 0, %1666, %2450, implicit $exec + %2452:sreg_64_xexec = S_OR_B64 %1727, %2336, implicit-def dead $scc + %2453:vgpr_32 = V_CNDMASK_B32_e64 0, %1823, 0, %1666, %2452, implicit $exec + %2454:sreg_64_xexec = S_OR_B64 %1727, %2337, implicit-def dead $scc + %2455:vgpr_32 = V_CNDMASK_B32_e64 0, %1825, 0, %1666, %2454, implicit $exec + %2456:sreg_64_xexec = S_OR_B64 %1727, %2338, implicit-def dead $scc + %2457:vgpr_32 = V_CNDMASK_B32_e64 0, %1827, 0, %1666, %2456, implicit $exec + %2458:sreg_64_xexec = S_OR_B64 %1727, %2339, implicit-def dead $scc + %2459:vgpr_32 = V_CNDMASK_B32_e64 0, %1829, 0, %1666, %2458, implicit $exec + %2460:sreg_64_xexec = S_OR_B64 %1727, %2340, implicit-def dead $scc + %2461:vgpr_32 = V_CNDMASK_B32_e64 0, %1831, 0, %1666, %2460, implicit $exec + %2462:sreg_64_xexec = S_OR_B64 %1727, %2341, implicit-def dead $scc + %2463:vgpr_32 = V_CNDMASK_B32_e64 0, %1833, 0, %1666, %2462, implicit $exec + %2464:sreg_64_xexec = S_OR_B64 %1727, %2342, implicit-def dead $scc + %2465:vgpr_32 = V_CNDMASK_B32_e64 0, %1835, 0, %1666, %2464, implicit $exec + %2466:sreg_64_xexec = S_OR_B64 %1727, %2343, implicit-def dead $scc + %2467:vgpr_32 = V_CNDMASK_B32_e64 0, %1837, 0, %1666, %2466, implicit $exec + %2468:sreg_64_xexec = S_OR_B64 %1727, %2344, implicit-def dead $scc + %2469:vgpr_32 = V_CNDMASK_B32_e64 0, %1839, 0, %1666, %2468, implicit $exec + %2470:sreg_64_xexec = S_OR_B64 %1727, %2345, implicit-def dead $scc + %2471:vgpr_32 = V_CNDMASK_B32_e64 0, %1841, 0, %1666, %2470, implicit $exec + %2472:sreg_64_xexec = S_OR_B64 %1727, %2346, implicit-def dead $scc + %2473:vgpr_32 = V_CNDMASK_B32_e64 0, %1843, 0, %1666, %2472, implicit $exec + %2474:sreg_64_xexec = S_OR_B64 %1727, %2347, implicit-def dead $scc + %2475:vgpr_32 = V_CNDMASK_B32_e64 0, %1845, 0, %1666, %2474, implicit $exec + %2476:sreg_64_xexec = S_OR_B64 %1727, %2348, implicit-def dead $scc + %2477:vgpr_32 = V_CNDMASK_B32_e64 0, %1847, 0, %1666, %2476, implicit $exec + %2478:vgpr_32 = nofpexcept V_SUB_F32_e32 %2230, %2353, implicit $mode, implicit $exec + %2479:vgpr_32 = nofpexcept V_SUB_F32_e32 %2231, %2353, implicit $mode, implicit $exec + %2480:vgpr_32 = nofpexcept V_SUB_F32_e32 %2232, %2353, implicit $mode, implicit $exec + %2481:vgpr_32 = nofpexcept V_SUB_F32_e32 %2233, %2353, implicit $mode, implicit $exec + %2482:vgpr_32 = nofpexcept V_SUB_F32_e32 %2234, %2355, implicit $mode, implicit $exec + %2483:vgpr_32 = nofpexcept V_SUB_F32_e32 %2235, %2355, implicit $mode, implicit $exec + %2484:vgpr_32 = nofpexcept V_SUB_F32_e32 %2236, %2355, implicit $mode, implicit $exec + %2485:vgpr_32 = nofpexcept V_SUB_F32_e32 %2237, %2355, implicit $mode, implicit $exec + %2486:vgpr_32 = nofpexcept V_SUB_F32_e32 %2242, %2357, implicit $mode, implicit $exec + %2487:vgpr_32 = nofpexcept V_SUB_F32_e32 %2243, %2357, implicit $mode, implicit $exec + %2488:vgpr_32 = nofpexcept V_SUB_F32_e32 %2244, %2357, implicit $mode, implicit $exec + %2489:vgpr_32 = nofpexcept V_SUB_F32_e32 %2245, %2357, implicit $mode, implicit $exec + undef %2490.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2478, implicit $mode, implicit $exec + %2490.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2479, implicit $mode, implicit $exec + undef %2491.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2480, implicit $mode, implicit $exec + %2491.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2481, implicit $mode, implicit $exec + undef %2492.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2482, implicit $mode, implicit $exec + %2492.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2483, implicit $mode, implicit $exec + undef %2493.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2484, implicit $mode, implicit $exec + %2493.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2485, implicit $mode, implicit $exec + %2494:vgpr_32 = nofpexcept V_EXP_F32_e32 %2486, implicit $mode, implicit $exec + %2495:vgpr_32 = nofpexcept V_EXP_F32_e32 %2487, implicit $mode, implicit $exec + %2496:vgpr_32 = nofpexcept V_EXP_F32_e32 %2488, implicit $mode, implicit $exec + %2497:vgpr_32 = nofpexcept V_EXP_F32_e32 %2489, implicit $mode, implicit $exec + undef %2498.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %2494, %1283, implicit $exec + %2498.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %2495, %1283, implicit $exec + undef %2499.sub0:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %2496, %1283, implicit $exec + %2499.sub1:vreg_64_align2 = V_CNDMASK_B32_e64 0, 0, 0, %2497, %1283, implicit $exec + %2500:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2490, 8, %2491, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2501:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2492, 8, %2493, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2502:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2498, 8, %2499, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2503:vgpr_32 = nofpexcept V_ADD_F32_e32 %2500.sub0, %2500.sub1, implicit $mode, implicit $exec + %2504:vgpr_32 = nofpexcept V_ADD_F32_e32 %2501.sub0, %2501.sub1, implicit $mode, implicit $exec + %2505:vgpr_32 = nofpexcept V_ADD_F32_e32 %2502.sub0, %2502.sub1, implicit $mode, implicit $exec + %2506:vgpr_32 = DS_BPERMUTE_B32 %1468, %2503, 0, implicit $exec + %2507:vgpr_32 = nofpexcept V_ADD_F32_e32 %2503, %2506, implicit $mode, implicit $exec + %2508:vgpr_32 = COPY %2507 + %2508:vgpr_32 = V_MOV_B32_dpp %2508, %2508, 296, 15, 15, 0, implicit $exec + %2509:vgpr_32 = nofpexcept V_ADD_F32_e32 %2507, %2508, implicit $mode, implicit $exec + %2510:vgpr_32 = COPY %2509 + %2510:vgpr_32 = V_MOV_B32_dpp %2510, %2510, 321, 15, 15, 0, implicit $exec + %2510:vgpr_32 = V_MOV_B32_dpp %2510, %2510, 27, 15, 15, 0, implicit $exec + %2511:vgpr_32 = nofpexcept V_ADD_F32_e32 %2509, %2510, implicit $mode, implicit $exec + %2512:vgpr_32 = COPY %2511 + %2512:vgpr_32 = V_MOV_B32_dpp %2512, %2512, 78, 15, 15, 0, implicit $exec + %2513:vgpr_32 = nofpexcept V_ADD_F32_e32 %2511, %2512, implicit $mode, implicit $exec + %2514:vgpr_32 = COPY %2513 + %2514:vgpr_32 = V_MOV_B32_dpp %2514, %2514, 177, 15, 15, 0, implicit $exec + undef %2515.sub0:vreg_64_align2 = nofpexcept V_ADD_F32_e32 %2513, %2514, implicit $mode, implicit $exec + %2516:vgpr_32 = DS_BPERMUTE_B32 %1468, %2504, 0, implicit $exec + %2517:vgpr_32 = nofpexcept V_ADD_F32_e32 %2504, %2516, implicit $mode, implicit $exec + %2518:vgpr_32 = COPY %2517 + %2518:vgpr_32 = V_MOV_B32_dpp %2518, %2518, 296, 15, 15, 0, implicit $exec + %2519:vgpr_32 = nofpexcept V_ADD_F32_e32 %2517, %2518, implicit $mode, implicit $exec + %2520:vgpr_32 = COPY %2519 + %2520:vgpr_32 = V_MOV_B32_dpp %2520, %2520, 321, 15, 15, 0, implicit $exec + %2520:vgpr_32 = V_MOV_B32_dpp %2520, %2520, 27, 15, 15, 0, implicit $exec + %2521:vgpr_32 = nofpexcept V_ADD_F32_e32 %2519, %2520, implicit $mode, implicit $exec + %2522:vgpr_32 = COPY %2521 + %2522:vgpr_32 = V_MOV_B32_dpp %2522, %2522, 78, 15, 15, 0, implicit $exec + %2523:vgpr_32 = nofpexcept V_ADD_F32_e32 %2521, %2522, implicit $mode, implicit $exec + %2524:vgpr_32 = COPY %2523 + %2524:vgpr_32 = V_MOV_B32_dpp %2524, %2524, 177, 15, 15, 0, implicit $exec + %2525:vgpr_32 = nofpexcept V_ADD_F32_e32 %2523, %2524, implicit $mode, implicit $exec + %2526:vgpr_32 = DS_BPERMUTE_B32 %1468, %2505, 0, implicit $exec + %2527:vgpr_32 = nofpexcept V_ADD_F32_e32 %2505, %2526, implicit $mode, implicit $exec + %2528:vgpr_32 = COPY %2527 + %2528:vgpr_32 = V_MOV_B32_dpp %2528, %2528, 296, 15, 15, 0, implicit $exec + %2529:vgpr_32 = nofpexcept V_ADD_F32_e32 %2527, %2528, implicit $mode, implicit $exec + %2530:vgpr_32 = COPY %2529 + %2530:vgpr_32 = V_MOV_B32_dpp %2530, %2530, 321, 15, 15, 0, implicit $exec + %2530:vgpr_32 = V_MOV_B32_dpp %2530, %2530, 27, 15, 15, 0, implicit $exec + %2531:vgpr_32 = nofpexcept V_ADD_F32_e32 %2529, %2530, implicit $mode, implicit $exec + %2532:vgpr_32 = COPY %2531 + %2532:vgpr_32 = V_MOV_B32_dpp %2532, %2532, 78, 15, 15, 0, implicit $exec + %2533:vgpr_32 = nofpexcept V_ADD_F32_e32 %2531, %2532, implicit $mode, implicit $exec + %2534:vgpr_32 = COPY %2533 + %2534:vgpr_32 = V_MOV_B32_dpp %2534, %2534, 177, 15, 15, 0, implicit $exec + %2535:vgpr_32 = nofpexcept V_ADD_F32_e32 %2533, %2534, implicit $mode, implicit $exec + %2536:vgpr_32 = nofpexcept V_SUB_F32_e32 %1520, %2034, implicit $mode, implicit $exec + %2537:vgpr_32 = nofpexcept V_SUB_F32_e32 %1522, %2353, implicit $mode, implicit $exec + %2538:vgpr_32 = nofpexcept V_SUB_F32_e32 %1533.sub0, %2355, implicit $mode, implicit $exec + %2539:vgpr_32 = nofpexcept V_SUB_F32_e32 %1533.sub1, %2357, implicit $mode, implicit $exec + %2540:vgpr_32 = nofpexcept V_SUB_F32_e32 %1729, %2359, implicit $mode, implicit $exec + %2541:vgpr_32 = nofpexcept V_SUB_F32_e32 %1731, %2361, implicit $mode, implicit $exec + %2542:vgpr_32 = nofpexcept V_SUB_F32_e32 %1733, %2363, implicit $mode, implicit $exec + %2543:vgpr_32 = nofpexcept V_SUB_F32_e32 %1735, %2365, implicit $mode, implicit $exec + %2544:vgpr_32 = nofpexcept V_SUB_F32_e32 %1737, %2367, implicit $mode, implicit $exec + %2545:vgpr_32 = nofpexcept V_SUB_F32_e32 %1739, %2369, implicit $mode, implicit $exec + %2546:vgpr_32 = nofpexcept V_SUB_F32_e32 %1741, %2371, implicit $mode, implicit $exec + %2547:vgpr_32 = nofpexcept V_SUB_F32_e32 %1743, %2373, implicit $mode, implicit $exec + %2548:vgpr_32 = nofpexcept V_SUB_F32_e32 %1745, %2375, implicit $mode, implicit $exec + %2549:vgpr_32 = nofpexcept V_SUB_F32_e32 %1747, %2377, implicit $mode, implicit $exec + %2550:vgpr_32 = nofpexcept V_SUB_F32_e32 %1749, %2379, implicit $mode, implicit $exec + %2551:vgpr_32 = nofpexcept V_SUB_F32_e32 %1751, %2381, implicit $mode, implicit $exec + %2552:vgpr_32 = nofpexcept V_SUB_F32_e32 %1753, %2383, implicit $mode, implicit $exec + %2553:vgpr_32 = nofpexcept V_SUB_F32_e32 %1755, %2385, implicit $mode, implicit $exec + %2554:vgpr_32 = nofpexcept V_SUB_F32_e32 %1757, %2387, implicit $mode, implicit $exec + %2555:vgpr_32 = nofpexcept V_SUB_F32_e32 %1759, %2389, implicit $mode, implicit $exec + %2556:vgpr_32 = nofpexcept V_SUB_F32_e32 %1761, %2391, implicit $mode, implicit $exec + %2557:vgpr_32 = nofpexcept V_SUB_F32_e32 %1763, %2393, implicit $mode, implicit $exec + %2558:vgpr_32 = nofpexcept V_SUB_F32_e32 %1765, %2395, implicit $mode, implicit $exec + %2559:vgpr_32 = nofpexcept V_SUB_F32_e32 %1767, %2397, implicit $mode, implicit $exec + %2560:vgpr_32 = nofpexcept V_SUB_F32_e32 %1769, %2399, implicit $mode, implicit $exec + %2561:vgpr_32 = nofpexcept V_SUB_F32_e32 %1771, %2401, implicit $mode, implicit $exec + %2562:vgpr_32 = nofpexcept V_SUB_F32_e32 %1773, %2403, implicit $mode, implicit $exec + %2563:vgpr_32 = nofpexcept V_SUB_F32_e32 %1775, %2405, implicit $mode, implicit $exec + %2564:vgpr_32 = nofpexcept V_SUB_F32_e32 %1777, %2407, implicit $mode, implicit $exec + %2565:vgpr_32 = nofpexcept V_SUB_F32_e32 %1779, %2409, implicit $mode, implicit $exec + %2566:vgpr_32 = nofpexcept V_SUB_F32_e32 %1781, %2411, implicit $mode, implicit $exec + %2567:vgpr_32 = nofpexcept V_SUB_F32_e32 %1783, %2413, implicit $mode, implicit $exec + %2568:vgpr_32 = nofpexcept V_SUB_F32_e32 %1785, %2415, implicit $mode, implicit $exec + %2569:vgpr_32 = nofpexcept V_SUB_F32_e32 %1787, %2417, implicit $mode, implicit $exec + %2570:vgpr_32 = nofpexcept V_SUB_F32_e32 %1789, %2419, implicit $mode, implicit $exec + %2571:vgpr_32 = nofpexcept V_SUB_F32_e32 %1791, %2421, implicit $mode, implicit $exec + %2572:vgpr_32 = nofpexcept V_SUB_F32_e32 %1793, %2423, implicit $mode, implicit $exec + %2573:vgpr_32 = nofpexcept V_SUB_F32_e32 %1795, %2425, implicit $mode, implicit $exec + %2574:vgpr_32 = nofpexcept V_SUB_F32_e32 %1797, %2427, implicit $mode, implicit $exec + %2575:vgpr_32 = nofpexcept V_SUB_F32_e32 %1799, %2429, implicit $mode, implicit $exec + %2576:vgpr_32 = nofpexcept V_SUB_F32_e32 %1801, %2431, implicit $mode, implicit $exec + %2577:vgpr_32 = nofpexcept V_SUB_F32_e32 %1803, %2433, implicit $mode, implicit $exec + %2578:vgpr_32 = nofpexcept V_SUB_F32_e32 %1805, %2435, implicit $mode, implicit $exec + %2579:vgpr_32 = nofpexcept V_SUB_F32_e32 %1807, %2437, implicit $mode, implicit $exec + %2580:vgpr_32 = nofpexcept V_SUB_F32_e32 %1809, %2439, implicit $mode, implicit $exec + %2581:vgpr_32 = nofpexcept V_SUB_F32_e32 %1811, %2441, implicit $mode, implicit $exec + %2582:vgpr_32 = nofpexcept V_SUB_F32_e32 %1813, %2443, implicit $mode, implicit $exec + %2583:vgpr_32 = nofpexcept V_SUB_F32_e32 %1815, %2445, implicit $mode, implicit $exec + %2584:vgpr_32 = nofpexcept V_SUB_F32_e32 %1817, %2447, implicit $mode, implicit $exec + %2585:vgpr_32 = nofpexcept V_SUB_F32_e32 %1819, %2449, implicit $mode, implicit $exec + %2586:vgpr_32 = nofpexcept V_SUB_F32_e32 %1821, %2451, implicit $mode, implicit $exec + %2587:vgpr_32 = nofpexcept V_SUB_F32_e32 %1823, %2453, implicit $mode, implicit $exec + %2588:vgpr_32 = nofpexcept V_SUB_F32_e32 %1825, %2455, implicit $mode, implicit $exec + %2589:vgpr_32 = nofpexcept V_SUB_F32_e32 %1827, %2457, implicit $mode, implicit $exec + %2590:vgpr_32 = nofpexcept V_SUB_F32_e32 %1829, %2459, implicit $mode, implicit $exec + %2591:vgpr_32 = nofpexcept V_SUB_F32_e32 %1831, %2461, implicit $mode, implicit $exec + %2592:vgpr_32 = nofpexcept V_SUB_F32_e32 %1833, %2463, implicit $mode, implicit $exec + %2593:vgpr_32 = nofpexcept V_SUB_F32_e32 %1835, %2465, implicit $mode, implicit $exec + %2594:vgpr_32 = nofpexcept V_SUB_F32_e32 %1837, %2467, implicit $mode, implicit $exec + %2595:vgpr_32 = nofpexcept V_SUB_F32_e32 %1839, %2469, implicit $mode, implicit $exec + %2596:vgpr_32 = nofpexcept V_SUB_F32_e32 %1841, %2471, implicit $mode, implicit $exec + %2597:vgpr_32 = nofpexcept V_SUB_F32_e32 %1843, %2473, implicit $mode, implicit $exec + %2598:vgpr_32 = nofpexcept V_SUB_F32_e32 %1845, %2475, implicit $mode, implicit $exec + %2599:vgpr_32 = nofpexcept V_SUB_F32_e32 %1847, %2477, implicit $mode, implicit $exec + undef %2600.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2536, implicit $mode, implicit $exec + undef %2601.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2537, implicit $mode, implicit $exec + undef %2602.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2538, implicit $mode, implicit $exec + undef %2603.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2539, implicit $mode, implicit $exec + undef %2604.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2540, implicit $mode, implicit $exec + undef %2605.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2541, implicit $mode, implicit $exec + undef %2606.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2542, implicit $mode, implicit $exec + undef %2607.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2543, implicit $mode, implicit $exec + %2600.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2544, implicit $mode, implicit $exec + %2601.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2545, implicit $mode, implicit $exec + undef %2608.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2546, implicit $mode, implicit $exec + undef %2609.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2547, implicit $mode, implicit $exec + undef %2610.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2548, implicit $mode, implicit $exec + undef %2611.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2549, implicit $mode, implicit $exec + %2606.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2550, implicit $mode, implicit $exec + %2607.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2551, implicit $mode, implicit $exec + %2600.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2552, implicit $mode, implicit $exec + %2601.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2553, implicit $mode, implicit $exec + %2608.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2554, implicit $mode, implicit $exec + %2609.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2555, implicit $mode, implicit $exec + %2610.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2556, implicit $mode, implicit $exec + %2611.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2557, implicit $mode, implicit $exec + %2606.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2558, implicit $mode, implicit $exec + %2607.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2559, implicit $mode, implicit $exec + %2600.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2560, implicit $mode, implicit $exec + %2601.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2561, implicit $mode, implicit $exec + %2602.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2562, implicit $mode, implicit $exec + %2603.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2563, implicit $mode, implicit $exec + %2604.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2564, implicit $mode, implicit $exec + %2605.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2565, implicit $mode, implicit $exec + %2606.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2566, implicit $mode, implicit $exec + %2607.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2567, implicit $mode, implicit $exec + undef %2612.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2568, implicit $mode, implicit $exec + undef %2613.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2569, implicit $mode, implicit $exec + undef %2614.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2570, implicit $mode, implicit $exec + undef %2615.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2571, implicit $mode, implicit $exec + undef %2616.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2572, implicit $mode, implicit $exec + undef %2617.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2573, implicit $mode, implicit $exec + undef %2618.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2574, implicit $mode, implicit $exec + undef %2619.sub0:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2575, implicit $mode, implicit $exec + %2612.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2576, implicit $mode, implicit $exec + %2613.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2577, implicit $mode, implicit $exec + %2614.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2578, implicit $mode, implicit $exec + %2615.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2579, implicit $mode, implicit $exec + %2616.sub1:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2580, implicit $mode, implicit $exec + undef %2620.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2581, implicit $mode, implicit $exec + undef %2621.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2582, implicit $mode, implicit $exec + undef %2622.sub0:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2583, implicit $mode, implicit $exec + %2612.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2584, implicit $mode, implicit $exec + %2613.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2585, implicit $mode, implicit $exec + %2614.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2586, implicit $mode, implicit $exec + %2615.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2587, implicit $mode, implicit $exec + %2616.sub2:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2588, implicit $mode, implicit $exec + %2620.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2589, implicit $mode, implicit $exec + %2621.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2590, implicit $mode, implicit $exec + %2622.sub1:vreg_64_align2 = nofpexcept V_EXP_F32_e32 %2591, implicit $mode, implicit $exec + %2612.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2592, implicit $mode, implicit $exec + %2613.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2593, implicit $mode, implicit $exec + %2614.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2594, implicit $mode, implicit $exec + %2615.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2595, implicit $mode, implicit $exec + %2616.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2596, implicit $mode, implicit $exec + %2617.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2597, implicit $mode, implicit $exec + %2618.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2598, implicit $mode, implicit $exec + %2619.sub3:vreg_128_align2 = nofpexcept V_EXP_F32_e32 %2599, implicit $mode, implicit $exec + %2623:vgpr_32 = nofpexcept V_MUL_F32_e32 %1602, %2602.sub0, implicit $mode, implicit $exec + %2624:vgpr_32 = nofpexcept V_MUL_F32_e32 %1603, %2603.sub0, implicit $mode, implicit $exec + %2625:vgpr_32 = nofpexcept V_MUL_F32_e32 %2604.sub0, %2096, implicit $mode, implicit $exec + %2626:vgpr_32 = nofpexcept V_MUL_F32_e32 %2605.sub0, %2097, implicit $mode, implicit $exec + %2627:vgpr_32 = nofpexcept V_MUL_F32_e32 %2606.sub0, %2098, implicit $mode, implicit $exec + %2628:vgpr_32 = nofpexcept V_MUL_F32_e32 %2607.sub0, %2099, implicit $mode, implicit $exec + %2629:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2100, 8, %2600.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + undef %2630.sub0:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2606.sub1, %2101, implicit $mode, implicit $exec + %2631:vgpr_32 = nofpexcept V_MUL_F32_e32 %2607.sub1, %2102, implicit $mode, implicit $exec + %2632:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2608, 8, %2103, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2633:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2609, 8, %2104, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2634:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2610, 8, %2105, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2635:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2611, 8, %2106, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2630.sub1:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2606.sub2, %2107, implicit $mode, implicit $exec + %2636:vgpr_32 = nofpexcept V_MUL_F32_e32 %2607.sub2, %2108, implicit $mode, implicit $exec + %2637:vgpr_32 = nofpexcept V_MUL_F32_e32 %2602.sub3, %2109, implicit $mode, implicit $exec + %2638:vgpr_32 = nofpexcept V_MUL_F32_e32 %2603.sub3, %2110, implicit $mode, implicit $exec + %2639:vgpr_32 = nofpexcept V_MUL_F32_e32 %2604.sub3, %2111, implicit $mode, implicit $exec + %2640:vgpr_32 = nofpexcept V_MUL_F32_e32 %2605.sub3, %2112, implicit $mode, implicit $exec + %2641:vgpr_32 = nofpexcept V_MUL_F32_e32 %2606.sub3, %2113, implicit $mode, implicit $exec + %2642:vgpr_32 = nofpexcept V_MUL_F32_e32 %2607.sub3, %2114, implicit $mode, implicit $exec + undef %2643.sub0:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2616.sub0, %2115, implicit $mode, implicit $exec + %2644:vgpr_32 = nofpexcept V_MUL_F32_e32 %2617.sub0, %2116, implicit $mode, implicit $exec + %2645:vgpr_32 = nofpexcept V_MUL_F32_e32 %2618.sub0, %2117, implicit $mode, implicit $exec + %2646:vgpr_32 = nofpexcept V_MUL_F32_e32 %2619.sub0, %2118, implicit $mode, implicit $exec + %2643.sub1:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2616.sub1, %2119, implicit $mode, implicit $exec + undef %2647.sub0:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2616.sub2, %2120, implicit $mode, implicit $exec + %2648:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2620, 8, %2121, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2649:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2621, 8, %2122, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2650:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2622, 8, %2123, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2647.sub1:vreg_64_align2 = nofpexcept V_MUL_F32_e32 %2616.sub3, %2124, implicit $mode, implicit $exec + %2651:vgpr_32 = nofpexcept V_MUL_F32_e32 %2617.sub3, %2125, implicit $mode, implicit $exec + %2652:vgpr_32 = nofpexcept V_MUL_F32_e32 %2618.sub3, %2126, implicit $mode, implicit $exec + %2653:vgpr_32 = nofpexcept V_MUL_F32_e32 %2619.sub3, %2127, implicit $mode, implicit $exec + undef %2654.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2623, %2525, implicit $mode, implicit $exec + undef %2655.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2624, %2535, implicit $mode, implicit $exec + undef %2656.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2625, implicit $mode, implicit $exec + undef %2657.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2626, implicit $mode, implicit $exec + undef %2658.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2627, implicit $mode, implicit $exec + undef %2659.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2628, implicit $mode, implicit $exec + undef %2660.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_ADD_F32 8, %2629, 8, %2095, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %311.sub0:vreg_64_align2 = COPY %315.sub1 + %2661:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %1597.sub0_sub1, 8, %311, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %1571.sub1:vreg_64_align2 = COPY %2055.sub1 + %2662:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2661, 8, %1571, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2663:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2662, 8, %2601.sub0_sub1, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2515.sub1:vreg_64_align2 = COPY %2055.sub1 + undef %2664.sub0_sub1:vreg_128_align2 = nofpexcept V_PK_ADD_F32 8, %2515, 8, %2663, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2659.sub1:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2631, implicit $mode, implicit $exec + %2665:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2632, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2666:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2633, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2667:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2634, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2668:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2635, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2669:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2630, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2659.sub2:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2636, implicit $mode, implicit $exec + undef %2670.sub0:vreg_64_align2 = COPY %307.sub0 + %2670.sub1:vreg_64_align2 = COPY %303.sub0 + %2671:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %1596.sub2_sub3, 8, %2670, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2672:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2671, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2673:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2600.sub2_sub3, 8, %2672, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2660.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2673, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %303.sub0:vreg_64_align2 = COPY %307.sub1 + %2674:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %1597.sub2_sub3, 8, %303, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2675:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %2674, 12, %2055, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2676:vreg_64_align2 = nofpexcept V_PK_MUL_F32 8, %2601.sub2_sub3, 8, %2675, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2664.sub2_sub3:vreg_128_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2676, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2654.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2637, implicit $mode, implicit $exec + %2655.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2638, implicit $mode, implicit $exec + %2656.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2639, implicit $mode, implicit $exec + %2657.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2640, implicit $mode, implicit $exec + %2658.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2641, implicit $mode, implicit $exec + %2659.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2642, implicit $mode, implicit $exec + undef %2677.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2644, implicit $mode, implicit $exec + undef %2678.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2645, implicit $mode, implicit $exec + undef %2679.sub0:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2646, implicit $mode, implicit $exec + %2680:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2648, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2681:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2649, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2682:vreg_64_align2 = nofpexcept V_PK_ADD_F32 12, %2055, 8, %2650, 0, 0, 0, 0, 0, implicit $mode, implicit $exec + %2677.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2651, implicit $mode, implicit $exec + %2678.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2652, implicit $mode, implicit $exec + %2679.sub3:vreg_128_align2 = nofpexcept V_ADD_F32_e32 %2055.sub1, %2653, implicit $mode, implicit $exec + undef %2683.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2039.sub0, 0, %2039.sub1, 0, 0, implicit $exec + %2683.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2040.sub0, 0, %2040.sub1, 0, 0, implicit $exec + undef %2684.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2490.sub0, 0, %2490.sub1, 0, 0, implicit $exec + %2684.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2491.sub0, 0, %2491.sub1, 0, 0, implicit $exec + undef %2685.sub0:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2492.sub0, 0, %2492.sub1, 0, 0, implicit $exec + %2685.sub1:vreg_64_align2 = V_CVT_PK_F16_F32_e64 0, %2493.sub0, 0, %2493.sub1, 0, 0, implicit $exec + %2686:vgpr_32 = V_CVT_PK_F16_F32_e64 0, %2494, 0, %2495, 0, 0, implicit $exec + %2687:vgpr_32 = V_LSHRREV_B32_e32 16, %2686, implicit $exec + %2688:vgpr_32 = V_CVT_PK_F16_F32_e64 0, %2496, 0, %2497, 0, 0, implicit $exec + %2689:vgpr_32 = V_LSHRREV_B32_e32 16, %2688, implicit $exec + %2690:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2686, %1283, implicit $exec + %2691:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2687, %1283, implicit $exec + %2692:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2688, %1283, implicit $exec + %2693:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, %2689, %1283, implicit $exec + %2694:vgpr_32 = nuw V_ADD_U32_e32 %15, %108, implicit $exec + undef %2695.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %2694, 0, 0, implicit $exec + %2696:vgpr_32 = nuw V_ADD_U32_e32 %15, %1617, implicit $exec + %2695.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %2696, 0, 0, implicit $exec + %2697:vgpr_32 = nuw V_ADD_U32_e32 %15, %1619, implicit $exec + undef %2698.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %2697, 0, 0, implicit $exec + %2699:vgpr_32 = nuw V_ADD_U32_e32 %15, %1622, implicit $exec + %2698.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %2699, 0, 0, implicit $exec + %2700:vgpr_32 = nuw V_ADD_U32_e32 %15, %1624, implicit $exec + undef %2701.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %2700, 0, 0, implicit $exec + %2702:vgpr_32 = nuw V_ADD_U32_e32 %15, %1627, implicit $exec + %2701.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %2702, 0, 0, implicit $exec + %2703:vgpr_32 = nuw V_ADD_U32_e32 %15, %1629, implicit $exec + undef %2704.sub0_sub1:av_128_align2 = DS_READ_B64_TR_B16 %2703, 0, 0, implicit $exec + %2705:vgpr_32 = nuw V_ADD_U32_e32 %15, %1632, implicit $exec + %2704.sub2_sub3:av_128_align2 = DS_READ_B64_TR_B16 %2705, 0, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + DS_WRITE_B128_gfx9 %37, %2600, 0, 0, implicit $exec + DS_WRITE_B128_gfx9 %37, %2601, 16, 0, implicit $exec + %2602.sub1:vreg_128_align2 = COPY %2608.sub0 + %2602.sub2:vreg_128_align2 = COPY %2608.sub1 + DS_WRITE_B128_gfx9 %37, %2602, 32, 0, implicit $exec + %2603.sub1:vreg_128_align2 = COPY %2609.sub0 + %2603.sub2:vreg_128_align2 = COPY %2609.sub1 + DS_WRITE_B128_gfx9 %37, %2603, 48, 0, implicit $exec + %2604.sub1:vreg_128_align2 = COPY %2610.sub0 + %2604.sub2:vreg_128_align2 = COPY %2610.sub1 + DS_WRITE2ST64_B64_gfx9 %1639, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1639, %1924, %1924, 48, 56, 0, implicit $exec + undef %2706.sub1:vreg_64_align2 = V_PERM_B32_e64 %2693, %2692, %1641, implicit $exec + %2706.sub0:vreg_64_align2 = V_PERM_B32_e64 %2691, %2690, %1641, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %2706, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1640, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1643, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1644, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1645, %1924, %1924, 48, 56, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 0, 8, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 16, 24, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 32, 40, 0, implicit $exec + DS_WRITE2ST64_B64_gfx9 %1646, %1924, %1924, 48, 56, 0, implicit $exec + S_WAITCNT 49279 + WAVE_BARRIER + undef %2707.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %169, 0, 16, 0, implicit $exec + %2708:vgpr_32 = V_ADD_U32_e32 4096, %169, implicit $exec + undef %2709.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %2708, 0, 16, 0, implicit $exec + %2710:vgpr_32 = V_ADD_U32_e32 8192, %169, implicit $exec + undef %2711.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %2710, 0, 16, 0, implicit $exec + %2712:vgpr_32 = V_ADD_U32_e32 12288, %169, implicit $exec + undef %2713.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %2712, 0, 16, 0, implicit $exec + %2714:vgpr_32 = V_ADD_U32_e32 16384, %169, implicit $exec + undef %2715.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %2714, 0, 16, 0, implicit $exec + %2716:vgpr_32 = V_ADD_U32_e32 20480, %169, implicit $exec + undef %2717.sub0_sub1_sub2_sub3:av_192_align2 = DS_READ2_B64_gfx9 %2716, 0, 16, 0, implicit $exec +... From d66cd50b3c2b8290fb8cb418996e7cd8885e5dc5 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 17:19:18 +0200 Subject: [PATCH 026/511] clang/Driver: Use struct type for BoundArch instead of StringRef (#204748) Change BoundArch arguments in the clang driver from StringRef (or sometimes const char*) to a dedicated struct type that contains both the architecture string and a parsed OffloadArch enum field. In the future it may be useful to contain other feature bits here. Co-Authored-By: Claude Opus 4.6 --- clang/include/clang/Basic/OffloadArch.h | 32 ++- clang/include/clang/Driver/Action.h | 46 +-- clang/include/clang/Driver/Compilation.h | 21 +- clang/include/clang/Driver/Driver.h | 21 +- clang/include/clang/Driver/Job.h | 7 +- clang/include/clang/Driver/SanitizerArgs.h | 3 +- clang/include/clang/Driver/ToolChain.h | 27 +- clang/lib/Driver/Action.cpp | 26 +- clang/lib/Driver/Compilation.cpp | 14 +- clang/lib/Driver/Driver.cpp | 264 +++++++++--------- clang/lib/Driver/SanitizerArgs.cpp | 15 +- clang/lib/Driver/ToolChain.cpp | 27 +- clang/lib/Driver/ToolChains/AIX.cpp | 2 +- clang/lib/Driver/ToolChains/AIX.h | 3 +- clang/lib/Driver/ToolChains/AMDGPU.cpp | 45 +-- clang/lib/Driver/ToolChains/AMDGPU.h | 14 +- clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp | 13 +- clang/lib/Driver/ToolChains/AMDGPUOpenMP.h | 5 +- clang/lib/Driver/ToolChains/AVR.cpp | 2 +- clang/lib/Driver/ToolChains/AVR.h | 3 +- clang/lib/Driver/ToolChains/BareMetal.cpp | 8 +- clang/lib/Driver/ToolChains/BareMetal.h | 5 +- clang/lib/Driver/ToolChains/CSKYToolChain.cpp | 2 +- clang/lib/Driver/ToolChains/CSKYToolChain.h | 3 +- clang/lib/Driver/ToolChains/Clang.cpp | 39 ++- clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +- clang/lib/Driver/ToolChains/CrossWindows.cpp | 5 +- clang/lib/Driver/ToolChains/CrossWindows.h | 2 +- clang/lib/Driver/ToolChains/Cuda.cpp | 47 ++-- clang/lib/Driver/ToolChains/Cuda.h | 12 +- clang/lib/Driver/ToolChains/Darwin.cpp | 42 ++- clang/lib/Driver/ToolChains/Darwin.h | 17 +- clang/lib/Driver/ToolChains/Flang.cpp | 12 +- clang/lib/Driver/ToolChains/Flang.h | 9 +- clang/lib/Driver/ToolChains/FreeBSD.cpp | 5 +- clang/lib/Driver/ToolChains/FreeBSD.h | 2 +- clang/lib/Driver/ToolChains/Fuchsia.cpp | 11 +- clang/lib/Driver/ToolChains/Fuchsia.h | 7 +- clang/lib/Driver/ToolChains/Gnu.cpp | 10 +- clang/lib/Driver/ToolChains/Gnu.h | 5 +- clang/lib/Driver/ToolChains/HIPAMD.cpp | 17 +- clang/lib/Driver/ToolChains/HIPAMD.h | 7 +- clang/lib/Driver/ToolChains/HIPSPV.cpp | 15 +- clang/lib/Driver/ToolChains/HIPSPV.h | 7 +- clang/lib/Driver/ToolChains/HIPUtility.cpp | 10 +- clang/lib/Driver/ToolChains/HLSL.cpp | 2 +- clang/lib/Driver/ToolChains/HLSL.h | 2 +- clang/lib/Driver/ToolChains/Haiku.cpp | 5 +- clang/lib/Driver/ToolChains/Haiku.h | 2 +- clang/lib/Driver/ToolChains/Hexagon.cpp | 2 +- clang/lib/Driver/ToolChains/Hexagon.h | 3 +- clang/lib/Driver/ToolChains/Linux.cpp | 13 +- clang/lib/Driver/ToolChains/Linux.h | 7 +- clang/lib/Driver/ToolChains/MSP430.cpp | 2 +- clang/lib/Driver/ToolChains/MSP430.h | 3 +- clang/lib/Driver/ToolChains/MSVC.cpp | 17 +- clang/lib/Driver/ToolChains/MSVC.h | 9 +- clang/lib/Driver/ToolChains/Managarm.cpp | 5 +- clang/lib/Driver/ToolChains/Managarm.h | 2 +- clang/lib/Driver/ToolChains/MinGW.cpp | 7 +- clang/lib/Driver/ToolChains/MinGW.h | 5 +- clang/lib/Driver/ToolChains/NetBSD.cpp | 7 +- clang/lib/Driver/ToolChains/NetBSD.h | 5 +- clang/lib/Driver/ToolChains/OHOS.cpp | 5 +- clang/lib/Driver/ToolChains/OHOS.h | 2 +- clang/lib/Driver/ToolChains/OpenBSD.cpp | 5 +- clang/lib/Driver/ToolChains/OpenBSD.h | 2 +- clang/lib/Driver/ToolChains/PS4CPU.cpp | 12 +- clang/lib/Driver/ToolChains/PS4CPU.h | 7 +- clang/lib/Driver/ToolChains/SPIRVOpenMP.cpp | 2 +- clang/lib/Driver/ToolChains/SPIRVOpenMP.h | 3 +- clang/lib/Driver/ToolChains/SYCL.cpp | 14 +- clang/lib/Driver/ToolChains/SYCL.h | 5 +- clang/lib/Driver/ToolChains/Serenity.cpp | 4 +- clang/lib/Driver/ToolChains/Serenity.h | 2 +- clang/lib/Driver/ToolChains/Solaris.cpp | 5 +- clang/lib/Driver/ToolChains/Solaris.h | 2 +- clang/lib/Driver/ToolChains/VEToolchain.cpp | 3 +- clang/lib/Driver/ToolChains/VEToolchain.h | 3 +- clang/lib/Driver/ToolChains/WebAssembly.cpp | 8 +- clang/lib/Driver/ToolChains/WebAssembly.h | 5 +- clang/lib/Driver/ToolChains/XCore.cpp | 3 +- clang/lib/Driver/ToolChains/XCore.h | 3 +- clang/lib/Driver/ToolChains/ZOS.cpp | 2 +- clang/lib/Driver/ToolChains/ZOS.h | 3 +- clang/test/Driver/hip-link-bundle-archive.hip | 12 +- clang/test/Driver/hip-phases.hip | 24 +- clang/test/Driver/hip-target-id.hip | 2 +- clang/test/Driver/hip-toolchain-no-rdc.hip | 8 +- clang/unittests/Driver/DXCModeTest.cpp | 11 +- 90 files changed, 550 insertions(+), 596 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 74501a419e362..dc7840cd78a18 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -9,8 +9,10 @@ #ifndef LLVM_CLANG_BASIC_OFFLOADARCH_H #define LLVM_CLANG_BASIC_OFFLOADARCH_H +#include "llvm/ADT/StringRef.h" +#include + namespace llvm { -class StringRef; class Triple; } // namespace llvm @@ -163,6 +165,34 @@ OffloadArch StringToOffloadArch(llvm::StringRef S); llvm::Triple OffloadArchToTriple(const llvm::Triple &DefaultToolchainTriple, OffloadArch ID); +/// Represents a bound architecture for offload / multiple architecture +/// compilation. +struct BoundArch { + llvm::StringRef ArchName; + + /// The parsed offload architecture enum. + /// Will be OffloadArch::Unknown if ArchName not recognized. + OffloadArch Arch = OffloadArch::Unused; + + BoundArch() = default; + explicit BoundArch(llvm::StringRef Name) + : ArchName(Name), + Arch(Name.empty() ? OffloadArch::Unknown : StringToOffloadArch(Name)) {} + + BoundArch(llvm::StringRef Name, OffloadArch A) : ArchName(Name), Arch(A) {} + + bool empty() const { return ArchName.empty(); } + explicit operator bool() const { return Arch != OffloadArch::Unused; } + + bool operator==(const BoundArch &Other) const { + return Arch == Other.Arch && ArchName == Other.ArchName; + } + + bool operator<(const BoundArch &Other) const { + return std::tie(Arch, ArchName) < std::tie(Other.Arch, Other.ArchName); + } +}; + } // namespace clang #endif // LLVM_CLANG_BASIC_OFFLOADARCH_H diff --git a/clang/include/clang/Driver/Action.h b/clang/include/clang/Driver/Action.h index 67937b00f6bcf..bbd6f03dd30da 100644 --- a/clang/include/clang/Driver/Action.h +++ b/clang/include/clang/Driver/Action.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_DRIVER_ACTION_H #include "clang/Basic/LLVM.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Types.h" #include "clang/Driver/Util.h" #include "llvm/ADT/ArrayRef.h" @@ -131,7 +132,7 @@ class Action { OffloadKind OffloadingDeviceKind = OFK_None; /// The Offloading architecture associated with this action. - const char *OffloadingArch = nullptr; + BoundArch OffloadingArch; /// The Offloading toolchain associated with this device action. const ToolChain *OffloadingToolChain = nullptr; @@ -192,14 +193,14 @@ class Action { /// Set the device offload info of this action and propagate it to its /// dependences. - void propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch, + void propagateDeviceOffloadInfo(OffloadKind OKind, BoundArch OArch, const ToolChain *OToolChain); /// Append the host offload info of this action and propagate it to its /// dependences. - void propagateHostOffloadInfo(unsigned OKinds, const char *OArch); + void propagateHostOffloadInfo(unsigned OKinds, BoundArch OArch); - void setHostOffloadInfo(unsigned OKinds, const char *OArch) { + void setHostOffloadInfo(unsigned OKinds, BoundArch OArch) { ActiveOffloadKindMask |= OKinds; OffloadingArch = OArch; } @@ -213,7 +214,7 @@ class Action { } OffloadKind getOffloadingDeviceKind() const { return OffloadingDeviceKind; } - const char *getOffloadingArch() const { return OffloadingArch; } + BoundArch getOffloadingArch() const { return OffloadingArch; } const ToolChain *getOffloadingToolChain() const { return OffloadingToolChain; } @@ -253,14 +254,14 @@ class InputAction : public Action { class BindArchAction : public Action { virtual void anchor(); - /// The architecture to bind, or 0 if the default architecture + /// The architecture to bind, or empty if the default architecture /// should be bound. - StringRef ArchName; + BoundArch ArchName; public: - BindArchAction(Action *Input, StringRef ArchName); + BindArchAction(Action *Input, BoundArch ArchName); - StringRef getArchName() const { return ArchName; } + BoundArch getArch() const { return ArchName; } static bool classof(const Action *A) { return A->getKind() == BindArchClass; @@ -279,7 +280,7 @@ class OffloadAction final : public Action { class DeviceDependences final { public: using ToolChainList = SmallVector; - using BoundArchList = SmallVector; + using BoundArchList = SmallVector; using OffloadKindList = SmallVector; private: @@ -303,12 +304,11 @@ class OffloadAction final : public Action { public: /// Add an action along with the associated toolchain, bound arch, and /// offload kind. - void add(Action &A, const ToolChain &TC, const char *BoundArch, - OffloadKind OKind); + void add(Action &A, const ToolChain &TC, BoundArch BA, OffloadKind OKind); /// Add an action along with the associated toolchain, bound arch, and /// offload kinds. - void add(Action &A, const ToolChain &TC, const char *BoundArch, + void add(Action &A, const ToolChain &TC, BoundArch BA, unsigned OffloadKindMask); /// Get each of the individual arrays. @@ -330,29 +330,29 @@ class OffloadAction final : public Action { const ToolChain &HostToolChain; /// The architectures that should be used with this action. - const char *HostBoundArch = nullptr; + BoundArch HostBoundArch; /// The offload kind of each dependence. unsigned HostOffloadKinds = 0u; public: - HostDependence(Action &A, const ToolChain &TC, const char *BoundArch, + HostDependence(Action &A, const ToolChain &TC, BoundArch BA, const unsigned OffloadKinds) - : HostAction(A), HostToolChain(TC), HostBoundArch(BoundArch), + : HostAction(A), HostToolChain(TC), HostBoundArch(BA), HostOffloadKinds(OffloadKinds) {} /// Constructor version that obtains the offload kinds from the device /// dependencies. - HostDependence(Action &A, const ToolChain &TC, const char *BoundArch, + HostDependence(Action &A, const ToolChain &TC, BoundArch BoundArch, const DeviceDependences &DDeps); Action *getAction() const { return &HostAction; } const ToolChain *getToolChain() const { return &HostToolChain; } - const char *getBoundArch() const { return HostBoundArch; } + BoundArch getBoundArch() const { return HostBoundArch; } unsigned getOffloadKinds() const { return HostOffloadKinds; } }; using OffloadActionWorkTy = - llvm::function_ref; + llvm::function_ref; private: /// The host offloading toolchain that should be used with the action. @@ -598,13 +598,13 @@ class OffloadUnbundlingJobAction final : public JobAction { const ToolChain *DependentToolChain = nullptr; /// The bound architecture of the dependent action. - StringRef DependentBoundArch; + BoundArch DependentBoundArch; /// The offload kind of the dependent action. const OffloadKind DependentOffloadKind = OFK_None; DependentActionInfo(const ToolChain *DependentToolChain, - StringRef DependentBoundArch, + BoundArch DependentBoundArch, const OffloadKind DependentOffloadKind) : DependentToolChain(DependentToolChain), DependentBoundArch(DependentBoundArch), @@ -621,9 +621,9 @@ class OffloadUnbundlingJobAction final : public JobAction { OffloadUnbundlingJobAction(Action *Input); /// Register information about a dependent action. - void registerDependentActionInfo(const ToolChain *TC, StringRef BoundArch, + void registerDependentActionInfo(const ToolChain *TC, BoundArch BA, OffloadKind Kind) { - DependentActionInfoArray.push_back({TC, BoundArch, Kind}); + DependentActionInfoArray.push_back({TC, BA, Kind}); } /// Return the information about all depending actions. diff --git a/clang/include/clang/Driver/Compilation.h b/clang/include/clang/Driver/Compilation.h index 4ad2dc34a1f85..825806b6cfe33 100644 --- a/clang/include/clang/Driver/Compilation.h +++ b/clang/include/clang/Driver/Compilation.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_DRIVER_COMPILATION_H #include "clang/Basic/LLVM.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Action.h" #include "clang/Driver/Job.h" #include "clang/Driver/Util.h" @@ -82,16 +83,16 @@ class Compilation { /// architecture, and device offload kind. struct TCArgsKey final { const ToolChain *TC = nullptr; - StringRef BoundArch; + BoundArch BoundArchitecture; Action::OffloadKind DeviceOffloadKind = Action::OFK_None; - TCArgsKey(const ToolChain *TC, StringRef BoundArch, + TCArgsKey(const ToolChain *TC, BoundArch BA, Action::OffloadKind DeviceOffloadKind) - : TC(TC), BoundArch(BoundArch), DeviceOffloadKind(DeviceOffloadKind) {} + : TC(TC), BoundArchitecture(BA), DeviceOffloadKind(DeviceOffloadKind) {} bool operator<(const TCArgsKey &K) const { - return std::tie(TC, BoundArch, DeviceOffloadKind) < - std::tie(K.TC, K.BoundArch, K.DeviceOffloadKind); + return std::tie(TC, BoundArchitecture, DeviceOffloadKind) < + std::tie(K.TC, K.BoundArchitecture, K.DeviceOffloadKind); } }; std::map TCArgs; @@ -128,7 +129,7 @@ class Compilation { /// The bound architecture currently being built, if any. Set around /// ConstructJob calls so addCommand can stamp it onto each new Command. - StringRef CurrentBoundArch; + BoundArch CurrentBoundArch; public: Compilation(const Driver &D, const ToolChain &DefaultToolChain, @@ -220,8 +221,8 @@ class Compilation { Jobs.addJob(std::move(Cmd)); } - StringRef getCurrentBoundArch() const { return CurrentBoundArch; } - void setCurrentBoundArch(StringRef Arch) { CurrentBoundArch = Arch; } + BoundArch getCurrentBoundArch() const { return CurrentBoundArch; } + void setCurrentBoundArch(BoundArch BA) { CurrentBoundArch = BA; } llvm::opt::ArgStringList &getTempFiles() { return TempFiles; } const llvm::opt::ArgStringList &getTempFiles() const { return TempFiles; } @@ -248,11 +249,11 @@ class Compilation { /// If a device offloading kind is specified, a translation specific for that /// kind is performed, if any. /// - /// \param BoundArch - The bound architecture name, or 0. + /// \param BA - The bound architecture. /// \param DeviceOffloadKind - The offload device kind that should be used in /// the translation, if any. const llvm::opt::DerivedArgList & - getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, + getArgsForToolChain(const ToolChain *TC, BoundArch BA, Action::OffloadKind DeviceOffloadKind); /// addTempFile - Add a file to remove on exit, and returns its diff --git a/clang/include/clang/Driver/Driver.h b/clang/include/clang/Driver/Driver.h index 19a371163f050..eece9ac5293f0 100644 --- a/clang/include/clang/Driver/Driver.h +++ b/clang/include/clang/Driver/Driver.h @@ -12,6 +12,7 @@ #include "clang/Basic/Diagnostic.h" #include "clang/Basic/HeaderInclude.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/Action.h" #include "clang/Driver/DriverDiagnostic.h" #include "clang/Driver/InputInfo.h" @@ -523,7 +524,7 @@ class Driver { /// Returns the set of bound architectures active for this offload kind. /// If there are no bound architctures we return a set containing only the /// empty string. - llvm::SmallVector + llvm::SmallVector getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Action::OffloadKind Kind, const ToolChain &TC) const; @@ -658,7 +659,7 @@ class Driver { /// return an InputInfo for the result of running \p A. Will only construct /// jobs for a given (Action, ToolChain, BoundArch, DeviceKind) tuple once. InputInfoList BuildJobsForAction( - Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + Compilation &C, const Action *A, const ToolChain *TC, BoundArch BA, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, @@ -668,17 +669,17 @@ class Driver { const char *getDefaultImageName() const; /// Creates a temp file. - /// 1. If \p MultipleArch is false or \p BoundArch is empty, the temp file is + /// 1. If \p MultipleArch is false or \p BA is empty, the temp file is /// in the temporary directory with name $Prefix-%%%%%%.$Suffix. - /// 2. If \p MultipleArch is true and \p BoundArch is not empty, + /// 2. If \p MultipleArch is true and \p BA is not empty, /// 2a. If \p NeedUniqueDirectory is false, the temp file is in the - /// temporary directory with name $Prefix-$BoundArch-%%%%%.$Suffix. + /// temporary directory with name $Prefix-$BA-%%%%%.$Suffix. /// 2b. If \p NeedUniqueDirectory is true, the temp file is in a unique /// subdiretory with random name under the temporary directory, and - /// the temp file itself has name $Prefix-$BoundArch.$Suffix. + /// the temp file itself has name $Prefix-$BA.$Suffix. const char *CreateTempFile(Compilation &C, StringRef Prefix, StringRef Suffix, bool MultipleArchs = false, - StringRef BoundArch = {}, + StringRef BoundArchStr = {}, bool NeedUniqueDirectory = false) const; /// GetNamedOutputPath - Return the name to use for the output of @@ -689,12 +690,12 @@ class Driver { /// \param JA - The action of interest. /// \param BaseInput - The original input file that this action was /// triggered by. - /// \param BoundArch - The bound architecture. + /// \param BA - The bound architecture. /// \param AtTopLevel - Whether this is a "top-level" action. /// \param MultipleArchs - Whether multiple -arch options were supplied. /// \param NormalizedTriple - The normalized triple of the relevant target. const char *GetNamedOutputPath(Compilation &C, const JobAction &JA, - const char *BaseInput, StringRef BoundArch, + const char *BaseInput, BoundArch BA, bool AtTopLevel, bool MultipleArchs, StringRef NormalizedTriple) const; @@ -785,7 +786,7 @@ class Driver { /// jobs specifically for the given action, but will use the cache when /// building jobs for the Action's inputs. InputInfoList BuildJobsForActionNoCache( - Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + Compilation &C, const Action *A, const ToolChain *TC, BoundArch BA, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, diff --git a/clang/include/clang/Driver/Job.h b/clang/include/clang/Driver/Job.h index 116254f79ae6f..56a147e717237 100644 --- a/clang/include/clang/Driver/Job.h +++ b/clang/include/clang/Driver/Job.h @@ -10,6 +10,7 @@ #define LLVM_CLANG_DRIVER_JOB_H #include "clang/Basic/LLVM.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Driver/InputInfo.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -152,7 +153,7 @@ class Command { /// The bound architecture for this command (e.g. "arm64", "x86_64"). /// Non-empty only for Darwin multi-arch builds. - std::string BoundArch; + std::string BoundArchStr; /// When a response file is needed, we try to put most arguments in an /// exclusive file, while others remains as regular command line arguments. @@ -195,8 +196,8 @@ class Command { const Tool &getCreator() const { return Creator; } /// Return the bound architecture for this command, if any. - StringRef getBoundArch() const { return BoundArch; } - void setBoundArch(StringRef Arch) { BoundArch = std::string(Arch); } + BoundArch getBoundArch() const { return BoundArch(BoundArchStr); } + void setBoundArch(BoundArch BA) { BoundArchStr = BA.ArchName.str(); } /// Returns the kind of response file supported by the current invocation. const ResponseFileSupport &getResponseFileSupport() { diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h index d4ee17802fd8e..6a01b3e36d44c 100644 --- a/clang/include/clang/Driver/SanitizerArgs.h +++ b/clang/include/clang/Driver/SanitizerArgs.h @@ -8,6 +8,7 @@ #ifndef LLVM_CLANG_DRIVER_SANITIZERARGS_H #define LLVM_CLANG_DRIVER_SANITIZERARGS_H +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/Sanitizers.h" #include "clang/Driver/Action.h" #include "clang/Driver/Types.h" @@ -87,7 +88,7 @@ class SanitizerArgs { /// Parses the sanitizer arguments from an argument list. SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, bool DiagnoseErrors = true, bool DiagnoseBoundArchErrors = true, - StringRef BoundArch = "", + BoundArch BA = {}, Action::OffloadKind DeviceOffloadKind = Action::OFK_None); bool needsSharedRt() const { return SharedRuntime; } diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h index 8953c299268df..863ba1084cb1a 100644 --- a/clang/include/clang/Driver/ToolChain.h +++ b/clang/include/clang/Driver/ToolChain.h @@ -11,6 +11,7 @@ #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/Sanitizers.h" #include "clang/Driver/Action.h" #include "clang/Driver/Multilib.h" @@ -347,7 +348,7 @@ class ToolChain { Multilib::flags_list getMultilibFlags(const llvm::opt::ArgList &) const; SanitizerArgs getSanitizerArgs( - const llvm::opt::ArgList &JobArgs, StringRef BoundArch = "", + const llvm::opt::ArgList &JobArgs, BoundArch BA = {}, Action::OffloadKind DeviceOffloadKind = Action::OFK_None) const; /// Returns the feature requirement for a sanitizer on a specific arch for @@ -355,7 +356,7 @@ class ToolChain { /// the sanitizer is generally supported but requires a specific feature for /// the given BoundArch, or an empty StringRef otherwise. virtual StringRef getSanitizerRequirement(SanitizerMask Kinds, - StringRef BoundArch) const { + BoundArch BA) const { return {}; } @@ -393,11 +394,11 @@ class ToolChain { /// specific translations are needed. If \p DeviceOffloadKind is specified /// the translation specific for that offload kind is performed. /// - /// \param BoundArch - The bound architecture name, or 0. + /// \param BA - The bound architecture. /// \param DeviceOffloadKind - The device offload kind used for the /// translation. virtual llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { return nullptr; } @@ -421,7 +422,7 @@ class ToolChain { /// a null pointer, otherwise return a DerivedArgList containing the /// translated arguments. virtual llvm::opt::DerivedArgList * - TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateXarchArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind, SmallVectorImpl *AllocatedArgs) const; @@ -715,7 +716,7 @@ class ToolChain { /// ComputeLLVMTriple - Return the LLVM target triple to use, after taking /// command line arguments into account. virtual std::string - ComputeLLVMTriple(const llvm::opt::ArgList &Args, StringRef BoundArch = {}, + ComputeLLVMTriple(const llvm::opt::ArgList &Args, BoundArch BA = {}, types::ID InputType = types::TY_INVALID) const; /// ComputeEffectiveClangTriple - Return the Clang triple to use for this @@ -724,8 +725,7 @@ class ToolChain { /// sets the deployment target) determines the version in the triple passed to /// Clang. virtual std::string - ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, - StringRef BoundArch = {}, + ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, BoundArch BA = {}, types::ID InputType = types::TY_INVALID) const; /// getDefaultObjCRuntime - Return the default Objective-C runtime @@ -754,9 +754,10 @@ class ToolChain { llvm::opt::ArgStringList &CC1Args) const; /// Add options that need to be passed to cc1 for this target. - virtual void addClangTargetOptions( - const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const; + virtual void + addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, + Action::OffloadKind DeviceOffloadKind) const; /// Add options that need to be passed to cc1as for this target. virtual void @@ -868,7 +869,7 @@ class ToolChain { /// Get paths for device libraries. virtual llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const; /// Add the system specific libraries for the active offload kinds. @@ -878,7 +879,7 @@ class ToolChain { /// Return sanitizers which are available in this toolchain. virtual SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const; /// Return sanitizers which are enabled by default. diff --git a/clang/lib/Driver/Action.cpp b/clang/lib/Driver/Action.cpp index 72a42a6f957ee..c0b84ffcc95be 100644 --- a/clang/lib/Driver/Action.cpp +++ b/clang/lib/Driver/Action.cpp @@ -59,7 +59,7 @@ const char *Action::getClassName(ActionClass AC) { llvm_unreachable("invalid class"); } -void Action::propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch, +void Action::propagateDeviceOffloadInfo(OffloadKind OKind, BoundArch OArch, const ToolChain *OToolChain) { // Offload action set its own kinds on their dependences. if (Kind == OffloadClass) @@ -79,7 +79,7 @@ void Action::propagateDeviceOffloadInfo(OffloadKind OKind, const char *OArch, A->propagateDeviceOffloadInfo(OffloadingDeviceKind, OArch, OToolChain); } -void Action::propagateHostOffloadInfo(unsigned OKinds, const char *OArch) { +void Action::propagateHostOffloadInfo(unsigned OKinds, BoundArch OArch) { // Offload action set its own kinds on their dependences. if (Kind == OffloadClass) return; @@ -188,7 +188,7 @@ InputAction::InputAction(const Arg &_Input, types::ID _Type, StringRef _Id) void BindArchAction::anchor() {} -BindArchAction::BindArchAction(Action *Input, StringRef ArchName) +BindArchAction::BindArchAction(Action *Input, BoundArch ArchName) : Action(BindArchClass, Input), ArchName(ArchName) {} void OffloadAction::anchor() {} @@ -198,7 +198,7 @@ OffloadAction::OffloadAction(const HostDependence &HDep) OffloadingArch = HDep.getBoundArch(); ActiveOffloadKindMask = HDep.getOffloadKinds(); HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(), - HDep.getBoundArch()); + OffloadingArch); } OffloadAction::OffloadAction(const DeviceDependences &DDeps, types::ID Ty) @@ -226,10 +226,9 @@ OffloadAction::OffloadAction(const HostDependence &HDep, : Action(OffloadClass, HDep.getAction()), HostTC(HDep.getToolChain()), DevToolChains(DDeps.getToolChains()) { // We use the kinds of the host dependence for this action. - OffloadingArch = HDep.getBoundArch(); + BoundArch BA = HDep.getBoundArch(); ActiveOffloadKindMask = HDep.getOffloadKinds(); - HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(), - HDep.getBoundArch()); + HDep.getAction()->propagateHostOffloadInfo(HDep.getOffloadKinds(), BA); // Add device inputs and propagate info to the device actions. Do work only if // we have dependencies. @@ -314,20 +313,19 @@ OffloadAction::getSingleDeviceDependence(bool DoNotConsiderHostActions) const { } void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC, - const char *BoundArch, - OffloadKind OKind) { + BoundArch BA, OffloadKind OKind) { DeviceActions.push_back(&A); DeviceToolChains.push_back(&TC); - DeviceBoundArchs.push_back(BoundArch); + DeviceBoundArchs.push_back(BA); DeviceOffloadKinds.push_back(OKind); } void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC, - const char *BoundArch, + BoundArch BA, unsigned OffloadKindMask) { DeviceActions.push_back(&A); DeviceToolChains.push_back(&TC); - DeviceBoundArchs.push_back(BoundArch); + DeviceBoundArchs.push_back(BA); // Add each active offloading kind from a mask. for (OffloadKind OKind : {OFK_OpenMP, OFK_Cuda, OFK_HIP, OFK_SYCL}) @@ -336,9 +334,9 @@ void OffloadAction::DeviceDependences::add(Action &A, const ToolChain &TC, } OffloadAction::HostDependence::HostDependence(Action &A, const ToolChain &TC, - const char *BoundArch, + BoundArch BA, const DeviceDependences &DDeps) - : HostAction(A), HostToolChain(TC), HostBoundArch(BoundArch) { + : HostAction(A), HostToolChain(TC), HostBoundArch(BA) { for (auto K : DDeps.getOffloadKinds()) HostOffloadKinds |= K; } diff --git a/clang/lib/Driver/Compilation.cpp b/clang/lib/Driver/Compilation.cpp index f8ca2a3d09407..377ac7e2ad43e 100644 --- a/clang/lib/Driver/Compilation.cpp +++ b/clang/lib/Driver/Compilation.cpp @@ -55,12 +55,12 @@ Compilation::~Compilation() { } const DerivedArgList & -Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, +Compilation::getArgsForToolChain(const ToolChain *TC, BoundArch BA, Action::OffloadKind DeviceOffloadKind) { if (!TC) TC = &DefaultToolChain; - DerivedArgList *&Entry = TCArgs[{TC, BoundArch, DeviceOffloadKind}]; + DerivedArgList *&Entry = TCArgs[{TC, BA, DeviceOffloadKind}]; if (!Entry) { SmallVector AllocatedArgs; DerivedArgList *OpenMPArgs = nullptr; @@ -74,10 +74,10 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, DerivedArgList *NewDAL = nullptr; if (!OpenMPArgs) { - NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BoundArch, - DeviceOffloadKind, &AllocatedArgs); + NewDAL = TC->TranslateXarchArgs(*TranslatedArgs, BA, DeviceOffloadKind, + &AllocatedArgs); } else { - NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BoundArch, DeviceOffloadKind, + NewDAL = TC->TranslateXarchArgs(*OpenMPArgs, BA, DeviceOffloadKind, &AllocatedArgs); if (!NewDAL) NewDAL = OpenMPArgs; @@ -86,11 +86,11 @@ Compilation::getArgsForToolChain(const ToolChain *TC, StringRef BoundArch, } if (!NewDAL) { - Entry = TC->TranslateArgs(*TranslatedArgs, BoundArch, DeviceOffloadKind); + Entry = TC->TranslateArgs(*TranslatedArgs, BA, DeviceOffloadKind); if (!Entry) Entry = TranslatedArgs; } else { - Entry = TC->TranslateArgs(*NewDAL, BoundArch, DeviceOffloadKind); + Entry = TC->TranslateArgs(*NewDAL, BA, DeviceOffloadKind); if (!Entry) Entry = NewDAL; else diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index e67886abc35b6..ea06235de5c1f 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -2101,7 +2101,7 @@ void Driver::generateCompilationDiagnostics( } if (ArchNames.size() > 1) { // Build a reproducer only for the bound arch that crashed. - StringRef FailingArch = Cmd.getBoundArch(); + StringRef FailingArch = Cmd.getBoundArch().ArchName; if (FailingArch.empty()) { Diag(clang::diag::note_drv_command_failed_diag_msg) << "Error generating preprocessed source(s) - cannot generate " @@ -2732,7 +2732,8 @@ bool Driver::HandleImmediateArgs(Compilation &C) { // FIXME: Remove when darwin's toolchain is initialized during construction. // FIXME: For some more esoteric targets the default toolchain is not the // correct one. - C.getArgsForToolChain(&TC, Triple.getArchName(), Action::OFK_Host); + C.getArgsForToolChain(&TC, BoundArch(Triple.getArchName()), + Action::OFK_Host); RegisterEffectiveTriple TripleRAII(TC, Triple); switch (RLT) { case ToolChain::RLT_CompilerRT: @@ -2823,31 +2824,30 @@ static unsigned PrintActions1(const Compilation &C, Action *A, if (InputAction *IA = dyn_cast(A)) { os << "\"" << IA->getInputArg().getValue() << "\""; } else if (BindArchAction *BIA = dyn_cast(A)) { - os << '"' << BIA->getArchName() << '"' << ", {" + os << '"' << BIA->getArch().ArchName << '"' << ", {" << PrintActions1(C, *BIA->input_begin(), Ids, SibIndent, SibKind) << "}"; } else if (OffloadAction *OA = dyn_cast(A)) { bool IsFirst = true; - OA->doOnEachDependence( - [&](Action *A, const ToolChain *TC, const char *BoundArch) { - assert(TC && "Unknown host toolchain"); - // E.g. for two CUDA device dependences whose bound arch is sm_20 and - // sm_35 this will generate: - // "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device" - // (nvptx64-nvidia-cuda:sm_35) {#ID} - if (!IsFirst) - os << ", "; - os << '"'; - os << A->getOffloadingKindPrefix(); - os << " ("; - os << TC->getTripleString(); - if (BoundArch) - os << ":" << BoundArch; - os << ")"; - os << '"'; - os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}"; - IsFirst = false; - SibKind = OtherSibAction; - }); + OA->doOnEachDependence([&](Action *A, const ToolChain *TC, BoundArch BA) { + assert(TC && "Unknown host toolchain"); + // E.g. for two CUDA device dependences whose bound arch is sm_20 and + // sm_35 this will generate: + // "cuda-device" (nvptx64-nvidia-cuda:sm_20) {#ID}, "cuda-device" + // (nvptx64-nvidia-cuda:sm_35) {#ID} + if (!IsFirst) + os << ", "; + os << '"'; + os << A->getOffloadingKindPrefix(); + os << " ("; + os << TC->getTripleString(); + if (!BA.empty()) + os << ":" << BA.ArchName; + os << ")"; + os << '"'; + os << " {" << PrintActions1(C, A, Ids, SibIndent, SibKind) << "}"; + IsFirst = false; + SibKind = OtherSibAction; + }); } else { const ActionList *AL = &A->getInputs(); @@ -2871,8 +2871,8 @@ static unsigned PrintActions1(const Compilation &C, Action *A, auto S = A->getOffloadingKindPrefix(); if (!S.empty()) { offload_os << ", (" << S; - if (A->getOffloadingArch()) - offload_os << ", " << A->getOffloadingArch(); + if (!A->getOffloadingArch().empty()) + offload_os << ", " << A->getOffloadingArch().ArchName; offload_os << ")"; } } @@ -2956,7 +2956,7 @@ void Driver::BuildUniversalActions(Compilation &C, const ToolChain &TC, ActionList Inputs; for (unsigned i = 0, e = Archs.size(); i != e; ++i) - Inputs.push_back(C.MakeAction(Act, Archs[i])); + Inputs.push_back(C.MakeAction(Act, BoundArch(Archs[i]))); // Lipo if necessary, we do it this way because we need to set the arch flag // so that -Xarch_ gets overwritten. @@ -3415,20 +3415,8 @@ class OffloadingActionBuilder final { bool EmitLLVM = false; bool EmitAsm = false; - /// ID to identify each device compilation. For CUDA it is simply the - /// GPU arch string. For HIP it is either the GPU arch string or GPU - /// arch string plus feature strings delimited by a plus sign, e.g. - /// gfx906+xnack. - struct TargetID { - /// Target ID string which is persistent throughout the compilation. - const char *ID; - TargetID(OffloadArch Arch) { ID = OffloadArchToString(Arch); } - TargetID(const char *ID) : ID(ID) {} - operator const char *() { return ID; } - operator StringRef() { return StringRef(ID); } - }; /// List of GPU architectures to use in this compilation. - SmallVector GpuArchList; + SmallVector GpuArchList; /// The CUDA actions for the current input. ActionList CudaDeviceActions; @@ -3536,16 +3524,15 @@ class OffloadingActionBuilder final { void appendTopLevelActions(ActionList &AL) override { // Utility to append actions to the top level list. - auto AddTopLevel = [&](Action *A, TargetID TargetID, - const ToolChain *TC) { + auto AddTopLevel = [&](Action *A, BoundArch BA, const ToolChain *TC) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, *TC, TargetID, AssociatedOffloadKind); + Dep.add(*A, *TC, BA, AssociatedOffloadKind); AL.push_back(C.MakeAction(Dep, A->getType())); }; // If we have a fat binary, add it to the list. if (CudaFatBinary) { - AddTopLevel(CudaFatBinary, OffloadArch::Unused, FatBinaryToolChain); + AddTopLevel(CudaFatBinary, {}, FatBinaryToolChain); CudaDeviceActions.clear(); CudaFatBinary = nullptr; return; @@ -3595,7 +3582,7 @@ class OffloadingActionBuilder final { return true; } - std::set> GpuArchs; + std::set> GpuArchs; for (Action::OffloadKind Kind : {Action::OFK_Cuda, Action::OFK_HIP}) { for (auto &I : llvm::make_range(C.getOffloadToolChains(Kind))) { for (auto Arch : @@ -3605,7 +3592,7 @@ class OffloadingActionBuilder final { } for (auto [Arch, TC] : GpuArchs) { - GpuArchList.push_back(Arch.data()); + GpuArchList.push_back(Arch); ToolChains.push_back(TC); } @@ -3706,7 +3693,7 @@ class OffloadingActionBuilder final { C.MakeAction(DeviceActions, types::TY_CUDA_FATBIN); if (!CompileDeviceOnly) { - DA.add(*CudaFatBinary, *FatBinaryToolChain, /*BoundArch=*/nullptr, + DA.add(*CudaFatBinary, *FatBinaryToolChain, /*BA=*/{}, Action::OFK_Cuda); // Clear the fat binary, it is already a dependence to an host // action. @@ -3842,7 +3829,7 @@ class OffloadingActionBuilder final { Action *BackendAction = nullptr; if (ToolChains[I]->getTriple().isSPIRV() || (ToolChains[I]->getTriple().isAMDGCN() && - GpuArchList[I] == StringRef("amdgcnspirv"))) { + GpuArchList[I].ArchName == StringRef("amdgcnspirv"))) { // Emit LLVM bitcode for SPIR-V targets. SPIR-V device tool chain // (HIPSPVToolChain or HIPAMDToolChain) runs post-link LLVM IR // passes. @@ -3885,7 +3872,7 @@ class OffloadingActionBuilder final { types::TY_HIP_FATBIN); if (!CompileDeviceOnly) { - DA.add(*CudaFatBinary, *FatBinaryToolChain, /*BoundArch=*/nullptr, + DA.add(*CudaFatBinary, *FatBinaryToolChain, /*BA=*/{}, AssociatedOffloadKind); // Clear the fat binary, it is already a dependence to an host // action. @@ -3989,7 +3976,7 @@ class OffloadingActionBuilder final { auto *TopDeviceLinkAction = C.MakeAction( Actions, CompileDeviceOnly ? types::TY_HIP_FATBIN : types::TY_Object); - DDeps.add(*TopDeviceLinkAction, *FatBinaryToolChain, nullptr, + DDeps.add(*TopDeviceLinkAction, *FatBinaryToolChain, /*BA=*/{}, AssociatedOffloadKind); // Offload the host object to the host linker. AL.push_back( @@ -4127,7 +4114,7 @@ class OffloadingActionBuilder final { // for that. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, DDeps); + /*BA=*/{}, DDeps); return C.MakeAction(HDep, DDeps); } @@ -4155,7 +4142,7 @@ class OffloadingActionBuilder final { C.MakeAction(HostAction); UnbundlingHostAction->registerDependentActionInfo( C.getSingleOffloadToolChain(), - /*BoundArch=*/StringRef(), Action::OFK_Host); + /*BA=*/{}, Action::OFK_Host); HostAction = UnbundlingHostAction; recordHostAction(HostAction, InputArg); } @@ -4227,7 +4214,7 @@ class OffloadingActionBuilder final { // associated with the current input. if (HostAction) HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], - /*BoundArch=*/nullptr); + /*BA=*/{}); return false; } @@ -4256,7 +4243,7 @@ class OffloadingActionBuilder final { // needs to set its offloading kind directly. if (HA) HA->propagateHostOffloadInfo(SB->getAssociatedOffloadKind(), - /*BoundArch=*/nullptr); + /*BA=*/{}); } return HA; } @@ -4287,7 +4274,7 @@ class OffloadingActionBuilder final { // is a link action it is assumed to depend on all actions generated so // far. HostAction->setHostOffloadInfo(ActiveOffloadKinds, - /*BoundArch=*/nullptr); + /*BA=*/{}); // Propagate active offloading kinds for each input to the link action. // Each input may have different active offloading kind. for (auto *A : HostAction->inputs()) { @@ -4297,7 +4284,7 @@ class OffloadingActionBuilder final { auto OFKLoc = InputArgToOffloadKindMap.find(ArgLoc->second); if (OFKLoc == InputArgToOffloadKindMap.end()) continue; - A->propagateHostOffloadInfo(OFKLoc->second, /*BoundArch=*/nullptr); + A->propagateHostOffloadInfo(OFKLoc->second, /*BA=*/{}); } return HostAction; } @@ -4307,7 +4294,7 @@ class OffloadingActionBuilder final { // to do that explicitly here. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch*/ nullptr, ActiveOffloadKinds); + /*BA=*/{}, ActiveOffloadKinds); return C.MakeAction(HDep, DDeps); } }; @@ -4664,7 +4651,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, OffloadBuilder->appendTopLevelActions(Actions, Current, InputArg); else if (Current) Current->propagateHostOffloadInfo(C.getActiveOffloadKinds(), - /*BoundArch=*/nullptr); + /*BA=*/{}); } // Add a link action if necessary. @@ -4688,7 +4675,7 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Args.hasArg(options::OPT_offload_link)) { LA = C.MakeAction(LinkerInputs, types::TY_Image); LA->propagateHostOffloadInfo(C.getActiveOffloadKinds(), - /*BoundArch=*/nullptr); + /*BA=*/{}); } else { // If we are linking but were passed -emit-llvm, we will be calling // llvm-link, so set the output type accordingly. This is only allowed in @@ -4899,7 +4886,7 @@ getConflictOffloadArchCombination(const llvm::DenseSet &Archs, return getConflictTargetIDCombination(ArchSet); } -llvm::SmallVector +llvm::SmallVector Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Action::OffloadKind Kind, const ToolChain &TC) const { // --offload and --offload-arch options are mutually exclusive. @@ -4914,7 +4901,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, } llvm::DenseSet Archs; - for (auto *Arg : C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind)) { + for (auto *Arg : C.getArgsForToolChain(&TC, /*BA=*/{}, Kind)) { // Add or remove the seen architectures in order of appearance. If an // invalid architecture is given we simply exit. if (Arg->getOption().matches(options::OPT_offload_arch_EQ)) { @@ -4934,7 +4921,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); else - return llvm::SmallVector(); + return {}; } } else { StringRef CanonicalStr = @@ -4942,7 +4929,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, if (!CanonicalStr.empty()) Archs.insert(CanonicalStr); else - return llvm::SmallVector(); + return {}; } } } else if (Arg->getOption().matches(options::OPT_no_offload_arch_EQ)) { @@ -4977,7 +4964,7 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, Archs.insert(StringRef()); } else if (Kind == Action::OFK_OpenMP) { // Accept legacy `-march` device arguments for OpenMP. - if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) + if (auto *Arg = C.getArgsForToolChain(&TC, /*BA=*/{}, Kind) .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { @@ -5000,7 +4987,13 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, SmallVector Sorted(Archs.begin(), Archs.end()); llvm::sort(Sorted); - return Sorted; + + // Convert to BoundArch, parsing each architecture string once + SmallVector Result; + Result.reserve(Sorted.size()); + for (StringRef Arch : Sorted) + Result.push_back(BoundArch(Arch)); + return Result; } Action * @@ -5064,9 +5057,9 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, continue; // Get the product of all bound architectures and toolchains. - SmallVector> TCAndArchs; + SmallVector> TCAndArchs; for (const ToolChain *TC : ToolChains) { - for (StringRef Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { + for (BoundArch Arch : getOffloadArchs(C, C.getArgs(), Kind, *TC)) { TCAndArchs.push_back(std::make_pair(TC, Arch)); DeviceActions.push_back( C.MakeAction(*InputArg, InputType, CUID)); @@ -5104,7 +5097,7 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, continue; // Propagate the ToolChain so we can use it in ConstructPhaseAction. - A->propagateDeviceOffloadInfo(Kind, TCAndArch->second.data(), + A->propagateDeviceOffloadInfo(Kind, TCAndArch->second, TCAndArch->first); A = ConstructPhaseAction(C, Args, Phase, A, Kind, TCAndArch->first->getLTOMode(Args, Kind)); @@ -5118,9 +5111,9 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, HostAction->setCannotBeCollapsedWithNextDependentAction(); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - TCAndArch->second.data(), Kind); + TCAndArch->second, Kind); OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + DDep.add(*A, *TCAndArch->first, TCAndArch->second, Kind); A = C.MakeAction(HDep, DDep); } @@ -5149,16 +5142,16 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, auto *TCAndArch = TCAndArchs.begin(); for (Action *A : DeviceActions) { - DDeps.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + DDeps.add(*A, *TCAndArch->first, TCAndArch->second, Kind); OffloadAction::DeviceDependences DDep; - DDep.add(*A, *TCAndArch->first, TCAndArch->second.data(), Kind); + DDep.add(*A, *TCAndArch->first, TCAndArch->second, Kind); // Compiling CUDA in non-RDC mode uses the PTX output if available. for (Action *Input : A->getInputs()) if (Kind == Action::OFK_Cuda && A->getType() == types::TY_Object && !Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, false)) - DDep.add(*Input, *TCAndArch->first, TCAndArch->second.data(), Kind); + DDep.add(*Input, *TCAndArch->first, TCAndArch->second, Kind); OffloadActions.push_back(C.MakeAction(DDep, A->getType())); ++TCAndArch; @@ -5190,15 +5183,15 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_CUDA_FATBIN); DDep.add(*FatbinAction, *C.getSingleOffloadToolChain(), - nullptr, Action::OFK_Cuda); + /*BA=*/{}, Action::OFK_Cuda); } else if (HIPNoRDC && offloadDeviceOnly()) { // If we are in device-only non-RDC-mode we just emit the final HIP // fatbinary for each translation unit, linking each input individually. Action *FatbinAction = C.MakeAction(OffloadActions, types::TY_HIP_FATBIN); DDep.add(*FatbinAction, - *C.getOffloadToolChains().first->second, nullptr, - Action::OFK_HIP); + *C.getOffloadToolChains().first->second, + /*BA=*/{}, Action::OFK_HIP); } else if (HIPNoRDC) { // Host + device assembly: defer to clang-offload-bundler (see // BuildActions). @@ -5221,14 +5214,14 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, C.MakeAction(AL, types::TY_HIP_FATBIN); DDep.add(*PackagerAction, *C.getOffloadToolChains().first->second, - /*BoundArch=*/nullptr, Action::OFK_HIP); + /*BA=*/{}, Action::OFK_HIP); } else { // Package all the offloading actions into a single output that can be // embedded in the host and linked. Action *PackagerAction = C.MakeAction(OffloadActions, types::TY_Image); DDep.add(*PackagerAction, *C.getSingleOffloadToolChain(), - nullptr, C.getActiveOffloadKinds()); + /*BA=*/{}, C.getActiveOffloadKinds()); } // HIP wants '--offload-device-only' to create a fatbinary by default. @@ -5242,7 +5235,7 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, }) && isa(HostAction); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, SingleDeviceOutput ? DDep : DDeps); + /*BA=*/{}, SingleDeviceOutput ? DDep : DDeps); return C.MakeAction(HDep, SingleDeviceOutput ? DDep : DDeps); } @@ -5493,7 +5486,7 @@ void Driver::BuildJobs(Compilation &C) const { } BuildJobsForAction(C, A, &C.getDefaultToolChain(), - /*BoundArch*/ StringRef(), + /*BA=*/{}, /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, /*LinkingOutput*/ LinkingOutput, CachedResults, @@ -5942,13 +5935,12 @@ class ToolSelector final { /// armv7 and armv7s both map to the same triple -- so we need both in our map. /// Also, we need to add the offloading device kind, as the same tool chain can /// be used for host and device for some programming models, e.g. OpenMP. -static std::string GetTriplePlusArchString(const ToolChain *TC, - StringRef BoundArch, +static std::string GetTriplePlusArchString(const ToolChain *TC, BoundArch BA, Action::OffloadKind OffloadKind) { - std::string TriplePlusArch = TC->getTriple().str(); - if (!BoundArch.empty()) { + std::string TriplePlusArch = TC->getTriple().normalize(); + if (!BA.empty()) { TriplePlusArch += "-"; - TriplePlusArch += BoundArch; + TriplePlusArch += BA.ArchName; } TriplePlusArch += "-"; TriplePlusArch += Action::GetOffloadKindName(OffloadKind); @@ -5956,20 +5948,20 @@ static std::string GetTriplePlusArchString(const ToolChain *TC, } InputInfoList Driver::BuildJobsForAction( - Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + Compilation &C, const Action *A, const ToolChain *TC, BoundArch BA, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair ActionTC = { - A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + A, GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}; auto CachedResult = CachedResults.find(ActionTC); if (CachedResult != CachedResults.end()) { return CachedResult->second; } InputInfoList Result = BuildJobsForActionNoCache( - C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, TargetDeviceOffloadKind); + C, A, TC, BA, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, + TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } @@ -5988,9 +5980,10 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args, OffloadingPrefix = Action::GetOffloadingFileNamePrefix( JA->getOffloadingDeviceKind(), TC ? TC->getEffectiveTriple().str() : "", /*CreatePrefixForHost=*/false); - if (const char *Arch = JA->getOffloadingArch()) { + BoundArch Arch = JA->getOffloadingArch(); + if (!Arch.empty()) { OffloadingPrefix += "-"; - OffloadingPrefix += Arch; + OffloadingPrefix += Arch.ArchName; } } else if (JA->getOffloadingHostActiveKinds() != Action::OFK_None && C.getDriver().isSaveTempsEnabled()) { @@ -6038,7 +6031,7 @@ static void handleTimeTrace(Compilation &C, const ArgList &Args, } InputInfoList Driver::BuildJobsForActionNoCache( - Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + Compilation &C, const Action *A, const ToolChain *TC, BoundArch BA, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map, InputInfoList> &CachedResults, @@ -6047,8 +6040,8 @@ InputInfoList Driver::BuildJobsForActionNoCache( // Track the bound arch for commands constructed in this scope so // generateCompilationDiagnostics can identify the crashing arch. - StringRef SavedBoundArch = C.getCurrentBoundArch(); - C.setCurrentBoundArch(BoundArch); + BoundArch SavedBoundArch = C.getCurrentBoundArch(); + C.setCurrentBoundArch(BA); auto RestoreBoundArch = llvm::scope_exit([&] { C.setCurrentBoundArch(SavedBoundArch); }); @@ -6059,7 +6052,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // computed. Get the default arguments for OFK_None to ensure that // initialization is performed before processing the offload action. // FIXME: Remove when darwin's toolchain is initialized during construction. - C.getArgsForToolChain(TC, BoundArch, Action::OFK_Host); + C.getArgsForToolChain(TC, BA, Action::OFK_Host); // The offload action is expected to be used in four different situations. // @@ -6089,9 +6082,9 @@ InputInfoList Driver::BuildJobsForActionNoCache( if (OA->hasSingleDeviceDependence() || !OA->hasHostDependence()) { InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, - const char *DepBoundArch) { + BoundArch DepBoundArch) { DevA.append(BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, - /*MultipleArchs*/ !!DepBoundArch, + /*MultipleArchs=*/!DepBoundArch.empty(), LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); }); @@ -6104,11 +6097,11 @@ InputInfoList Driver::BuildJobsForActionNoCache( // dependence. The dependences can't therefore be a top-level action. OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, - [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { + [&](Action *DepA, const ToolChain *DepTC, BoundArch DepBoundArch) { OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, - /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, - DepA->getOffloadingDeviceKind())); + /*MultipleArchs=*/!DepBoundArch.empty(), LinkingOutput, + CachedResults, DepA->getOffloadingDeviceKind())); }); A = BuildingForOffloadDevice @@ -6119,7 +6112,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // toolchain, return the cached input if so. std::pair ActionTC = { OA->getHostDependence(), - GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}; auto It = CachedResults.find(ActionTC); if (It != CachedResults.end()) { InputInfoList Inputs = It->second; @@ -6142,12 +6135,12 @@ InputInfoList Driver::BuildJobsForActionNoCache( if (const BindArchAction *BAA = dyn_cast(A)) { const ToolChain *TC; - StringRef ArchName = BAA->getArchName(); + BoundArch ArchName = BAA->getArch(); if (!ArchName.empty()) TC = &getToolChain(C.getArgs(), - computeTargetTriple(*this, TargetTriple, - C.getArgs(), ArchName)); + computeTargetTriple(*this, TargetTriple, C.getArgs(), + ArchName.ArchName)); else TC = &C.getDefaultToolChain(); @@ -6174,11 +6167,11 @@ InputInfoList Driver::BuildJobsForActionNoCache( for (const auto *OA : CollapsedOffloadActions) cast(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, - [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { + [&](Action *DepA, const ToolChain *DepTC, BoundArch DepBoundArch) { OffloadDependencesInputInfo.append(BuildJobsForAction( - C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, - /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, - DepA->getOffloadingDeviceKind())); + C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, + /*MultipleArchs=*/!DepBoundArch.empty(), LinkingOutput, + CachedResults, DepA->getOffloadingDeviceKind())); }); // Only use pipes when there is exactly one input. @@ -6190,7 +6183,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( bool SubJobAtTopLevel = AtTopLevel && (isa(A) || isa(A)); InputInfos.append(BuildJobsForAction( - C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, + C, Input, TC, BA, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } @@ -6217,14 +6210,14 @@ InputInfoList Driver::BuildJobsForActionNoCache( llvm::Triple EffectiveTriple; const ToolChain &ToolTC = T->getToolChain(); const ArgList &Args = - C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind()); + C.getArgsForToolChain(TC, BA, A->getOffloadingDeviceKind()); if (InputInfos.size() != 1) { EffectiveTriple = - llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args, BoundArch)); + llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args, BA)); } else { // Pass along the input type if it can be unambiguously determined. - EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple( - Args, BoundArch, InputInfos[0].getType())); + EffectiveTriple = llvm::Triple( + ToolTC.ComputeEffectiveClangTriple(Args, BA, InputInfos[0].getType())); } RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple); @@ -6259,14 +6252,14 @@ InputInfoList Driver::BuildJobsForActionNoCache( // Get the unique string identifier for this dependence and cache the // result. - StringRef Arch; + BoundArch Arch; if (TargetDeviceOffloadKind == Action::OFK_HIP) { if (UI.DependentOffloadKind == Action::OFK_Host) - Arch = StringRef(); + Arch = BoundArch(); else - Arch = UI.DependentBoundArch; + Arch = BoundArch(UI.DependentBoundArch); } else - Arch = BoundArch; + Arch = BA; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, UI.DependentOffloadKind)}] = { @@ -6276,7 +6269,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // Now that we have all the results generated, select the one that should be // returned for the current depending action. std::pair ActionTC = { - A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + A, GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); Result = CachedResults[ActionTC].front(); @@ -6290,9 +6283,9 @@ InputInfoList Driver::BuildJobsForActionNoCache( /*CreatePrefixForHost=*/isa(A) || !(A->getOffloadingHostActiveKinds() == Action::OFK_None || AtTopLevel)); - Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, - AtTopLevel, MultipleArchs, - OffloadingPrefix), + Result = InputInfo(A, + GetNamedOutputPath(C, *JA, BaseInput, BA, AtTopLevel, + MultipleArchs, OffloadingPrefix), BaseInput); if (T->canEmitIR()) handleTimeTrace(C, Args, JA, BaseInput, Result); @@ -6378,7 +6371,7 @@ static bool HasPreprocessOutput(const Action &JA) { const char *Driver::CreateTempFile(Compilation &C, StringRef Prefix, StringRef Suffix, bool MultipleArchs, - StringRef BoundArch, + StringRef BoundArchStr, bool NeedUniqueDirectory) const { SmallString<128> TmpName; Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir); @@ -6398,14 +6391,14 @@ const char *Driver::CreateTempFile(Compilation &C, StringRef Prefix, return ""; } } else { - if (MultipleArchs && !BoundArch.empty()) { + if (MultipleArchs && !BoundArchStr.empty()) { if (NeedUniqueDirectory) { TmpName = GetTemporaryDirectory(Prefix); - llvm::sys::path::append(TmpName, - Twine(Prefix) + "-" + BoundArch + "." + Suffix); + llvm::sys::path::append(TmpName, Twine(Prefix) + "-" + BoundArchStr + + "." + Suffix); } else { - TmpName = - GetTemporaryPath((Twine(Prefix) + "-" + BoundArch).str(), Suffix); + TmpName = GetTemporaryPath((Twine(Prefix) + "-" + BoundArchStr).str(), + Suffix); } } else { @@ -6439,11 +6432,10 @@ static const char *GetModuleOutputPath(Compilation &C, const JobAction &JA, } const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, - const char *BaseInput, - StringRef OrigBoundArch, bool AtTopLevel, - bool MultipleArchs, + const char *BaseInput, BoundArch BA, + bool AtTopLevel, bool MultipleArchs, StringRef OffloadingPrefix) const { - std::string BoundArch = sanitizeTargetIDInFileName(OrigBoundArch); + std::string BoundArchStr = sanitizeTargetIDInFileName(BA.ArchName); llvm::PrettyStackTraceString CrashInfo("Computing output path"); @@ -6458,7 +6450,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, (JA.getOffloadingDeviceKind() == Action::OFK_None || JA.getOffloadingDeviceKind() == Action::OFK_Host) && Triple.isOSDarwin(); - return CreateTempFile(C, Prefix, Suffix, MultipleArchs, BoundArch, + return CreateTempFile(C, Prefix, Suffix, MultipleArchs, BoundArchStr, NeedUniqueDirectory); }; @@ -6633,9 +6625,9 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, llvm::sys::path::replace_extension(Output, ""); } Output += OffloadingPrefix; - if (MultipleArchs && !BoundArch.empty()) { + if (MultipleArchs && !BoundArchStr.empty()) { Output += "-"; - Output.append(BoundArch); + Output.append(BoundArchStr); } if (UseOutExtension) Output += ".out"; @@ -6661,9 +6653,9 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, End = BaseName.rfind('.'); SmallString<128> Suffixed(BaseName.substr(0, End)); Suffixed += OffloadingPrefix; - if (MultipleArchs && !BoundArch.empty()) { + if (MultipleArchs && !BoundArchStr.empty()) { Suffixed += "-"; - Suffixed.append(BoundArch); + Suffixed.append(BoundArchStr); } // When using both -save-temps and -emit-llvm, use a ".tmp.bc" suffix for // the unoptimized bitcode so that it does not get overwritten by the ".bc" diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index 74ebd0bf375d3..bf77e748ebd0c 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -399,7 +399,7 @@ bool SanitizerArgs::needsLTO() const { SanitizerArgs::SanitizerArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, bool DiagnoseErrors, bool DiagnoseBoundArchErrors, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) { SanitizerMask AllRemove; // During the loop below, the accumulated set of // sanitizers disabled by the current sanitizer @@ -416,14 +416,13 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, SanitizerMask Kinds; // Figure out the base toolchain's sanitizer support so we can diagnose the - // diff for a specific BoundArch. + // diff for a specific BA. const SanitizerMask ToolChainSupported = - setGroupBits(TC.getSupportedSanitizers("", DeviceOffloadKind)); + setGroupBits(TC.getSupportedSanitizers({}, DeviceOffloadKind)); const SanitizerMask BoundArchSupported = - BoundArch.empty() ? ToolChainSupported - : setGroupBits(TC.getSupportedSanitizers( - BoundArch, DeviceOffloadKind)); + BA ? setGroupBits(TC.getSupportedSanitizers(BA, DeviceOffloadKind)) + : ToolChainSupported; CfiCrossDso = Args.hasFlag(options::OPT_fsanitize_cfi_cross_dso, options::OPT_fno_sanitize_cfi_cross_dso, false); @@ -567,7 +566,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, // Check if the toolchain provides a feature requirement hint for // any of the unsupported sanitizers StringRef Requirement = - TC.getSanitizerRequirement(ArchSpecificUnsupported, BoundArch); + TC.getSanitizerRequirement(ArchSpecificUnsupported, BA); if (!Requirement.empty()) { // Emit diagnostic with feature requirement // @@ -579,7 +578,7 @@ SanitizerArgs::SanitizerArgs(const ToolChain &TC, err_drv_unsupported_option_for_offload_arch_req_feature : diag:: warn_drv_unsupported_option_for_offload_arch_req_feature) - << Arg->getAsString(Args) << BoundArch << Requirement; + << Arg->getAsString(Args) << BA.ArchName << Requirement; } else { // Fall back to generic diagnostic if no requirement was provided SanitizerSet UnsupportedSet; diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 8db726cedfe90..7d93e7f65daf5 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -517,15 +517,14 @@ ToolChain::getMultilibFlags(const llvm::opt::ArgList &Args) const { } SanitizerArgs -ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs, - StringRef BoundArch, +ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // When -fno-gpu-sanitize is specified for GPU targets, don't emit // diagnostics about unsupported sanitizers for specific GPU arches, // since sanitizers are disabled for the GPU anyway. bool DiagnoseBoundArchErrors = - BoundArchSanitizerArgsChecked.insert(BoundArch).second; - if (!BoundArch.empty() && getTriple().isGPU() && + BoundArchSanitizerArgsChecked.insert(BA.ArchName).second; + if (BA && getTriple().isGPU() && !JobArgs.hasFlag(options::OPT_fgpu_sanitize, options::OPT_fno_gpu_sanitize, true)) { DiagnoseBoundArchErrors = false; @@ -533,7 +532,7 @@ ToolChain::getSanitizerArgs(const llvm::opt::ArgList &JobArgs, SanitizerArgs SanArgs(*this, JobArgs, /*DiagnoseErrors=*/!SanitizerArgsChecked, - DiagnoseBoundArchErrors, BoundArch, DeviceOffloadKind); + DiagnoseBoundArchErrors, BA, DeviceOffloadKind); SanitizerArgsChecked = true; return SanArgs; @@ -1431,8 +1430,7 @@ bool ToolChain::isThreadModelSupported(const StringRef Model) const { return false; } -std::string ToolChain::ComputeLLVMTriple(const ArgList &Args, - StringRef BoundArch, +std::string ToolChain::ComputeLLVMTriple(const ArgList &Args, BoundArch BA, types::ID InputType) const { switch (getTriple().getArch()) { default: @@ -1486,9 +1484,9 @@ std::string ToolChain::ComputeLLVMTriple(const ArgList &Args, } std::string ToolChain::ComputeEffectiveClangTriple(const ArgList &Args, - StringRef BoundArch, + BoundArch BA, types::ID InputType) const { - return ComputeLLVMTriple(Args, BoundArch, InputType); + return ComputeLLVMTriple(Args, BA, InputType); } std::string ToolChain::computeSysRoot() const { @@ -1501,7 +1499,7 @@ void ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } void ToolChain::addClangTargetOptions( - const ArgList &DriverArgs, ArgStringList &CC1Args, StringRef BoundArch, + const ArgList &DriverArgs, ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const {} void ToolChain::addClangCC1ASTargetOptions(const ArgList &Args, @@ -1840,7 +1838,7 @@ ToolChain::getSystemGPUArchs(const llvm::opt::ArgList &Args) const { } SanitizerMask -ToolChain::getSupportedSanitizers(StringRef BoundArch, +ToolChain::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // Return sanitizers which don't require runtime support and are not // platform dependent. @@ -1881,7 +1879,7 @@ void ToolChain::addSYCLIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const {} llvm::SmallVector -ToolChain::getDeviceLibs(const ArgList &DriverArgs, StringRef BoundArch, +ToolChain::getDeviceLibs(const ArgList &DriverArgs, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const { return {}; } @@ -2101,7 +2099,7 @@ static bool isXArchCompatibleTripleArch(const llvm::Triple &TT, } llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( - const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind OFK, SmallVectorImpl *AllocatedArgs) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); @@ -2119,8 +2117,7 @@ llvm::opt::DerivedArgList *ToolChain::TranslateXarchArgs( Skip = IsDevice; } else if (A->getOption().matches(options::OPT_Xarch__)) { StringRef Val = A->getValue(); - NeedTrans = Val == getArchName() || - (!BoundArch.empty() && Val == BoundArch) || + NeedTrans = Val == getArchName() || (BA && Val == BA.ArchName) || isXArchCompatibleTripleArch(Triple, Val); Skip = !NeedTrans; } diff --git a/clang/lib/Driver/ToolChains/AIX.cpp b/clang/lib/Driver/ToolChains/AIX.cpp index a6a890db305b9..586bc08067eff 100644 --- a/clang/lib/Driver/ToolChains/AIX.cpp +++ b/clang/lib/Driver/ToolChains/AIX.cpp @@ -556,7 +556,7 @@ static void addTocDataOptions(const llvm::opt::ArgList &Args, void AIX::addClangTargetOptions( const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args, - StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { Args.AddLastArg(CC1Args, options::OPT_mignore_xcoff_visibility); Args.AddLastArg(CC1Args, options::OPT_mdefault_visibility_export_mapping_EQ); Args.addOptInFlag(CC1Args, options::OPT_mxcoff_roptr, options::OPT_mno_xcoff_roptr); diff --git a/clang/lib/Driver/ToolChains/AIX.h b/clang/lib/Driver/ToolChains/AIX.h index afe0556d68ed4..6ae726ef23e7e 100644 --- a/clang/lib/Driver/ToolChains/AIX.h +++ b/clang/lib/Driver/ToolChains/AIX.h @@ -85,8 +85,7 @@ class LLVM_LIBRARY_VISIBILITY AIX : public ToolChain { void addClangTargetOptions( const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, - Action::OffloadKind DeviceOffloadingKind) const override; + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const override; void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp index dfafe48c1900f..325dba03c4136 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp @@ -690,10 +690,9 @@ Tool *AMDGPUToolChain::buildLinker() const { } DerivedArgList * -AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, +AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - DerivedArgList *DAL = - Generic_ELF::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + DerivedArgList *DAL = Generic_ELF::TranslateArgs(Args, BA, DeviceOffloadKind); if (!DAL) { DAL = new DerivedArgList(Args.getBaseArgs()); for (Arg *A : Args) @@ -730,9 +729,10 @@ AMDGPUToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, } } - if (!BoundArch.empty()) { + if (!BA.empty()) { DAL->eraseArg(options::OPT_mcpu_EQ); - DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), BoundArch); + DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_mcpu_EQ), + BA.ArchName); } AMDGPUToolChain::ParsedTargetIDType PTID = checkTargetID(*DAL); @@ -798,7 +798,9 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType( if (JA.getOffloadingDeviceKind() == Action::OFK_HIP || JA.getOffloadingDeviceKind() == Action::OFK_Cuda) { - auto Arch = getProcessorFromTargetID(getTriple(), JA.getOffloadingArch()); + BoundArch BA = JA.getOffloadingArch(); + // FIXME: Missing conversion from OffloadArch to GPUKind + auto Arch = getProcessorFromTargetID(getTriple(), BA.ArchName); auto Kind = llvm::AMDGPU::parseArchAMDGCN(Arch); if (FPType && FPType == &llvm::APFloat::IEEEsingle() && DriverArgs.hasFlag(options::OPT_fgpu_flush_denormals_to_zero, @@ -842,10 +844,10 @@ ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple, } DerivedArgList * -ROCMToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, +ROCMToolChain::TranslateArgs(const DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { DerivedArgList *DAL = - AMDGPUToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + AMDGPUToolChain::TranslateArgs(Args, BA, DeviceOffloadKind); // Filter out sanitizer coverage options that are not supported for AMDGPU. for (Arg *A : Args) { @@ -866,7 +868,7 @@ ROCMToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, void AMDGPUToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { // Default to "hidden" visibility, as object level linking will not be // supported for the foreseeable future. // TODO: remove the SPIR-V bypass once it can encode (hidden) visibility. @@ -980,8 +982,8 @@ AMDGPUToolChain::getSystemGPUArchs(const ArgList &Args) const { void ROCMToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { - AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { + AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadingKind); // For the OpenCL case where there is no offload target, accept -nostdlib to @@ -1008,9 +1010,8 @@ void ROCMToolChain::addClangTargetOptions( // Get the device name and canonicalize it. For offload compilation, // BoundArch contains the full target ID. For non-offload (OpenCL), // fall back to -mcpu. - StringRef TargetID = BoundArch.empty() - ? DriverArgs.getLastArgValue(options::OPT_mcpu_EQ) - : BoundArch; + StringRef TargetID = + BA ? BA.ArchName : DriverArgs.getLastArgValue(options::OPT_mcpu_EQ); StringRef GpuArch = getProcessorFromTargetID(getTriple(), TargetID); StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(GpuArch); @@ -1027,7 +1028,7 @@ void ROCMToolChain::addClangTargetOptions( // Add the generic set of libraries. BCLibs.append(RocmInstallation->getCommonBitcodeLibs( DriverArgs, LibDeviceFile, GpuArch, DeviceOffloadingKind, - getSanitizerArgs(DriverArgs, TargetID, DeviceOffloadingKind) + getSanitizerArgs(DriverArgs, BoundArch{TargetID}, DeviceOffloadingKind) .needsAsanRt())); for (auto [BCFile, Internalize] : BCLibs) { @@ -1118,7 +1119,7 @@ ROCMToolChain::getCommonDeviceLibNames( return RocmInstallation->getCommonBitcodeLibs( DriverArgs, LibDeviceFile, GPUArch, DeviceOffloadingKind, - getSanitizerArgs(DriverArgs, TargetID, DeviceOffloadingKind) + getSanitizerArgs(DriverArgs, BoundArch(TargetID), DeviceOffloadingKind) .needsAsanRt()); } @@ -1146,23 +1147,23 @@ static bool isXnackAvailable(const llvm::Triple &TT, llvm::StringRef TargetID) { } SanitizerMask AMDGPUToolChain::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { SanitizerMask SupportedMask = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); // Address sanitizer is potentially supported, but depends on the exact target // arch xnack support. - if (BoundArch.empty() || isXnackAvailable(getTriple(), BoundArch)) + if (!BA || isXnackAvailable(getTriple(), BA.ArchName)) SupportedMask |= SanitizerKind::Address; return SupportedMask; } StringRef AMDGPUToolChain::getSanitizerRequirement(SanitizerMask Kinds, - StringRef BoundArch) const { + BoundArch BA) const { // Address sanitizer requires xnack+ feature - if ((Kinds & SanitizerKind::Address) && !BoundArch.empty() && - !isXnackAvailable(getTriple(), BoundArch)) { + if ((Kinds & SanitizerKind::Address) && BA && + !isXnackAvailable(getTriple(), BA.ArchName)) { return "xnack+"; } return ""; diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h index 94f2fbae25388..4aa9402458b38 100644 --- a/clang/lib/Driver/ToolChains/AMDGPU.h +++ b/clang/lib/Driver/ToolChains/AMDGPU.h @@ -73,13 +73,12 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { bool SupportsProfiling() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, @@ -102,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { const char *getDefaultLinker() const override { return "ld.lld"; } StringRef getSanitizerRequirement(SanitizerMask Kinds, - StringRef BoundArch) const override; + BoundArch BA) const override; /// Uses amdgpu-arch tool to get arch of the system GPU. Will return error /// if unable to find one. @@ -135,7 +134,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF { void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; }; @@ -145,13 +144,12 @@ class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain { const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; // Returns a list of device library names shared by different languages diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp index 9a90e491bb27d..c9e724bced9f0 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp @@ -32,7 +32,7 @@ AMDGPUOpenMPToolChain::AMDGPUOpenMPToolChain(const Driver &D, void AMDGPUOpenMPToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { assert(DeviceOffloadingKind == Action::OFK_OpenMP && "Only OpenMP offloading kinds are supported."); @@ -40,8 +40,7 @@ void AMDGPUOpenMPToolChain::addClangTargetOptions( true)) return; - for (auto BCFile : - getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) { + for (auto BCFile : getDeviceLibs(DriverArgs, BA, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgString(BCFile.Path)); @@ -80,15 +79,15 @@ AMDGPUOpenMPToolChain::computeMSVCVersion(const Driver *D, llvm::SmallVector AMDGPUOpenMPToolChain::getDeviceLibs( - const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const { if (!Args.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, true)) return {}; - StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch); + StringRef GpuArch = getProcessorFromTargetID(getTriple(), BA.ArchName); SmallVector BCLibs; - for (auto BCLib : - getCommonDeviceLibNames(Args, BoundArch, GpuArch, DeviceOffloadingKind)) + for (auto BCLib : getCommonDeviceLibNames(Args, BA.ArchName, GpuArch, + DeviceOffloadingKind)) BCLibs.emplace_back(BCLib); return BCLibs; diff --git a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h index bc8b195383c6f..2d5b9e751895f 100644 --- a/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h +++ b/clang/lib/Driver/ToolChains/AMDGPUOpenMP.h @@ -35,8 +35,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; void AddClangCXXStdlibIncludeArgs( @@ -53,7 +52,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUOpenMPToolChain final const llvm::opt::ArgList &Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadKind) const override; /// OpenMP uses LTO by default to link device bitcode. diff --git a/clang/lib/Driver/ToolChains/AVR.cpp b/clang/lib/Driver/ToolChains/AVR.cpp index c2b5288627b3a..99f75f67be3f5 100644 --- a/clang/lib/Driver/ToolChains/AVR.cpp +++ b/clang/lib/Driver/ToolChains/AVR.cpp @@ -466,7 +466,7 @@ void AVRToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, void AVRToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // Reject C/C++ compilation for avr1 devices since they have no SRAM. const Driver &D = getDriver(); std::string CPU = getCPUName(D, DriverArgs, getTriple()); diff --git a/clang/lib/Driver/ToolChains/AVR.h b/clang/lib/Driver/ToolChains/AVR.h index e232fb3c0281c..44c1fb21a5d9c 100644 --- a/clang/lib/Driver/ToolChains/AVR.h +++ b/clang/lib/Driver/ToolChains/AVR.h @@ -28,8 +28,7 @@ class LLVM_LIBRARY_VISIBILITY AVRToolChain : public Generic_ELF { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; std::optional findAVRLibcInstallation() const; diff --git a/clang/lib/Driver/ToolChains/BareMetal.cpp b/clang/lib/Driver/ToolChains/BareMetal.cpp index 0a8e5d7f5b74a..ba454acbf755c 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.cpp +++ b/clang/lib/Driver/ToolChains/BareMetal.cpp @@ -351,8 +351,7 @@ void BareMetal::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } void BareMetal::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, - StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); } @@ -650,14 +649,13 @@ void baremetal::Linker::ConstructJob(Compilation &C, const JobAction &JA, // code, ignoring all runtime library support issues on the assumption that // baremetal targets typically implement their own runtime support. SanitizerMask -BareMetal::getSupportedSanitizers(StringRef BoundArch, +BareMetal::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64 || getTriple().getArch() == llvm::Triple::aarch64_be; const bool IsRISCV64 = getTriple().isRISCV64(); - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::KernelAddress; Res |= SanitizerKind::PointerCompare; diff --git a/clang/lib/Driver/ToolChains/BareMetal.h b/clang/lib/Driver/ToolChains/BareMetal.h index 14e9c6e990cae..5e6fb3cca5fa4 100644 --- a/clang/lib/Driver/ToolChains/BareMetal.h +++ b/clang/lib/Driver/ToolChains/BareMetal.h @@ -67,8 +67,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { llvm::opt::ArgStringList &CC1Args) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, @@ -79,7 +78,7 @@ class LLVM_LIBRARY_VISIBILITY BareMetal : public Generic_ELF { std::string computeSysRoot() const override; std::string getCompilerRTPath() const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; private: diff --git a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp index 670abb4e75082..bee7b9d728158 100644 --- a/clang/lib/Driver/ToolChains/CSKYToolChain.cpp +++ b/clang/lib/Driver/ToolChains/CSKYToolChain.cpp @@ -75,7 +75,7 @@ CSKYToolChain::GetUnwindLibType(const llvm::opt::ArgList &Args) const { void CSKYToolChain::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + BoundArch BA, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); } diff --git a/clang/lib/Driver/ToolChains/CSKYToolChain.h b/clang/lib/Driver/ToolChains/CSKYToolChain.h index 82f5acb42e98f..3eb01d2d764e3 100644 --- a/clang/lib/Driver/ToolChains/CSKYToolChain.h +++ b/clang/lib/Driver/ToolChains/CSKYToolChain.h @@ -22,8 +22,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYToolChain : public Generic_ELF { const llvm::opt::ArgList &Args); void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const override; RuntimeLibType GetDefaultRuntimeLibType() const override; UnwindLibType GetUnwindLibType(const llvm::opt::ArgList &Args) const override; diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index ca924ccf9152d..a93dd2969504c 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -1088,11 +1088,10 @@ void Clang::AddPreprocessingOptions(Compilation &C, const JobAction &JA, std::set ArchIDs; for (auto &I : llvm::make_range(C.getOffloadToolChains(Action::OFK_Cuda))) { const ToolChain *TC = I.second; - for (StringRef Arch : + for (BoundArch Arch : D.getOffloadArchs(C, C.getArgs(), Action::OFK_Cuda, *TC)) { - OffloadArch OA = StringToOffloadArch(Arch); - if (IsNVIDIAOffloadArch(OA)) - ArchIDs.insert(CudaArchToID(OA)); + if (IsNVIDIAOffloadArch(Arch.Arch)) + ArchIDs.insert(CudaArchToID(Arch.Arch)); } } @@ -1314,7 +1313,7 @@ static void renderRemarksOptions(const ArgList &Args, ArgStringList &CmdArgs, F += Action::GetOffloadingFileNamePrefix(JA.getOffloadingDeviceKind(), Triple.str()); F += "-"; - F += JA.getOffloadingArch(); + F += JA.getOffloadingArch().ArchName; } } @@ -6194,9 +6193,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.AddLastArg(CmdArgs, options::OPT_fno_knr_functions); - const char *OffloadArch = JA.getOffloadingArch(); - auto SanitizeArgs = TC.getSanitizerArgs(Args, OffloadArch ? OffloadArch : "", - JA.getOffloadingDeviceKind()); + BoundArch OffloadArch = JA.getOffloadingArch(); + auto SanitizeArgs = + TC.getSanitizerArgs(Args, OffloadArch, JA.getOffloadingDeviceKind()); Args.AddLastArg(CmdArgs, options::OPT_fallow_runtime_check_skip_hot_cutoff_EQ); @@ -6235,7 +6234,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // `--gpu-use-aux-triple-only` is specified. if (AuxTriple && !Args.getLastArg(options::OPT_gpu_use_aux_triple_only)) { const ArgList &HostArgs = - C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None); + C.getArgsForToolChain(nullptr, BoundArch(), Action::OFK_None); std::string HostCPU = getCPUName(D, HostArgs, *AuxTriple, /*FromAs*/ false); if (!HostCPU.empty()) { CmdArgs.push_back("-aux-target-cpu"); @@ -9387,7 +9386,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, if (const auto *OA = dyn_cast(CurDep)) { CurTC = nullptr; - OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) { + OA->doOnEachDependence([&](Action *A, const ToolChain *TC, BoundArch BA) { assert(CurTC == nullptr && "Expected one dependence!"); CurKind = A->getOffloadingDeviceKind(); CurTC = TC; @@ -9399,10 +9398,9 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, TCArgs, CurDep->getOffloadingArch())) .normalize(llvm::Triple::CanonicalForm::FOUR_IDENT); - if ((CurKind != Action::OFK_Host) && - !StringRef(CurDep->getOffloadingArch()).empty()) { + if ((CurKind != Action::OFK_Host) && !CurDep->getOffloadingArch().empty()) { Triples += '-'; - Triples += CurDep->getOffloadingArch(); + Triples += CurDep->getOffloadingArch().ArchName; } } CmdArgs.push_back(TCArgs.MakeArgString(Triples)); @@ -9420,7 +9418,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, const ToolChain *CurTC = &getToolChain(); if (const auto *OA = dyn_cast(JA.getInputs()[I])) { CurTC = nullptr; - OA->doOnEachDependence([&](Action *, const ToolChain *TC, const char *) { + OA->doOnEachDependence([&](Action *, const ToolChain *TC, BoundArch) { assert(CurTC == nullptr && "Expected one dependence!"); CurTC = TC; }); @@ -9483,7 +9481,7 @@ void OffloadBundler::ConstructJobMultipleOutputs( Dep.DependentOffloadKind == Action::OFK_Cuda) && !Dep.DependentBoundArch.empty()) { Triples += '-'; - Triples += Dep.DependentBoundArch; + Triples += Dep.DependentBoundArch.ArchName; } } @@ -9532,9 +9530,10 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, C.getArgsForToolChain(TC, OffloadAction->getOffloadingArch(), OffloadAction->getOffloadingDeviceKind()); StringRef File = C.getArgs().MakeArgString(TC->getInputFilename(Input)); - StringRef Arch = OffloadAction->getOffloadingArch() - ? OffloadAction->getOffloadingArch() - : TCArgs.getLastArgValue(options::OPT_march_EQ); + BoundArch Arch = OffloadAction->getOffloadingArch(); + if (Arch.empty()) + Arch = BoundArch(TCArgs.getLastArgValue(options::OPT_march_EQ)); + StringRef Kind = Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()); @@ -9550,7 +9549,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, SmallVector Parts{ "file=" + File.str(), "triple=" + TC->ComputeEffectiveClangTriple(TCArgs, Arch), - "arch=" + (Arch.empty() ? "generic" : Arch.str()), + "arch=" + (Arch.empty() ? "generic" : Arch.ArchName.str()), "kind=" + Kind.str(), }; @@ -9704,7 +9703,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, ArgStringList CompilerArgs; ArgStringList LinkerArgs; const DerivedArgList &ToolChainArgs = - C.getArgsForToolChain(TC, /*BoundArch=*/"", Kind); + C.getArgsForToolChain(TC, /*BA=*/{}, Kind); for (Arg *A : ToolChainArgs) { if (A->getOption().matches(OPT_Zlinker_input)) LinkerArgs.emplace_back(A->getValue()); diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp index 07ad7d80b39c1..ba4341ed41f1a 100644 --- a/clang/lib/Driver/ToolChains/CommonArgs.cpp +++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp @@ -1894,7 +1894,7 @@ const char *tools::SplitDebugName(const JobAction &JA, const ArgList &Args, const InputInfo &Output) { auto AddPostfix = [JA](auto &F) { if (JA.getOffloadingDeviceKind() == Action::OFK_HIP) - F += (Twine("_") + JA.getOffloadingArch()).str(); + F += (Twine("_") + JA.getOffloadingArch().ArchName).str(); F += ".dwo"; }; if (Arg *A = Args.getLastArg(options::OPT_gsplit_dwarf_EQ)) diff --git a/clang/lib/Driver/ToolChains/CrossWindows.cpp b/clang/lib/Driver/ToolChains/CrossWindows.cpp index 5bd1cd46cc080..180e91e857c0f 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.cpp +++ b/clang/lib/Driver/ToolChains/CrossWindows.cpp @@ -283,9 +283,8 @@ AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, } clang::SanitizerMask CrossWindowsToolChain::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/Driver/ToolChains/CrossWindows.h b/clang/lib/Driver/ToolChains/CrossWindows.h index d6263823759c7..729d9846934e5 100644 --- a/clang/lib/Driver/ToolChains/CrossWindows.h +++ b/clang/lib/Driver/ToolChains/CrossWindows.h @@ -75,7 +75,7 @@ class LLVM_LIBRARY_VISIBILITY CrossWindowsToolChain : public Generic_GCC { llvm::opt::ArgStringList &CmdArgs) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index b490ee5615d0c..9590ff976275c 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -397,16 +397,16 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, static_cast(getToolChain()); assert(TC.getTriple().isNVPTX() && "Wrong platform"); - StringRef GPUArchName; + BoundArch GPUArch; // If this is a CUDA action we need to extract the device architecture // from the Job's associated architecture, otherwise use the -march=arch // option. This option may come from -Xopenmp-target flag or the default // value. if (JA.isDeviceOffloading(Action::OFK_Cuda)) { - GPUArchName = JA.getOffloadingArch(); + GPUArch = JA.getOffloadingArch(); } else { - GPUArchName = Args.getLastArgValue(options::OPT_march_EQ); - if (GPUArchName.empty()) { + GPUArch = BoundArch(Args.getLastArgValue(options::OPT_march_EQ)); + if (GPUArch.empty()) { C.getDriver().Diag(diag::err_drv_offload_missing_gpu_arch) << getToolChain().getArchName() << getShortName(); return; @@ -414,13 +414,12 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, } // Obtain architecture from the action. - OffloadArch gpu_arch = StringToOffloadArch(GPUArchName); - assert(gpu_arch != OffloadArch::Unknown && + assert(GPUArch.Arch != OffloadArch::Unknown && "Device action expected to have an architecture."); // Check that our installation's ptxas supports gpu_arch. if (!Args.hasArg(options::OPT_no_cuda_version_check)) { - TC.CudaInstallation.CheckCudaVersionSupportsArch(gpu_arch); + TC.CudaInstallation.CheckCudaVersionSupportsArch(GPUArch.Arch); } ArgStringList CmdArgs; @@ -470,7 +469,7 @@ void NVPTX::Assembler::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-v"); CmdArgs.push_back("--gpu-name"); - CmdArgs.push_back(Args.MakeArgString(OffloadArchToString(gpu_arch))); + CmdArgs.push_back(Args.MakeArgString(GPUArch.ArchName)); CmdArgs.push_back("--output-file"); std::string OutputFileName = TC.getInputFilename(Output); @@ -556,15 +555,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA, auto *A = II.getAction(); assert(A->getInputs().size() == 1 && "Device offload action is expected to have a single input"); - StringRef GpuArch = A->getOffloadingArch(); + BoundArch GpuArch = A->getOffloadingArch(); assert(!GpuArch.empty() && "Device action expected to have associated a GPU architecture!"); - if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch)) + if (II.getType() == types::TY_PP_Asm && + !shouldIncludePTX(Args, GpuArch.ArchName)) continue; StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf"; CmdArgs.push_back(Args.MakeArgString( - "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) + + "--image3=kind=" + Kind + ",sm=" + GpuArch.ArchName.drop_front(3) + ",file=" + getToolChain().getInputFilename(II))); } @@ -758,9 +758,9 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple, llvm::opt::DerivedArgList * NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind OffloadKind) const { - DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BoundArch, OffloadKind); + DerivedArgList *DAL = ToolChain::TranslateArgs(Args, BA, OffloadKind); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); @@ -796,8 +796,7 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, void NVPTXToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { -} + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const {} void NVPTXToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, ArgStringList &CC1Args) const { @@ -884,9 +883,8 @@ CudaToolChain::CudaToolChain(const Driver &D, const llvm::Triple &Triple, void CudaToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { - HostTC.addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadingKind); + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { + HostTC.addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadingKind); StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); assert((DeviceOffloadingKind == Action::OFK_OpenMP || @@ -987,10 +985,9 @@ std::string CudaToolChain::getInputFilename(const InputInfo &Input) const { llvm::opt::DerivedArgList * CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - DerivedArgList *DAL = - HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); + DerivedArgList *DAL = HostTC.TranslateArgs(Args, BA, DeviceOffloadKind); if (!DAL) DAL = new DerivedArgList(Args.getBaseArgs()); @@ -1003,10 +1000,10 @@ CudaToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } } - if (!BoundArch.empty()) { + if (BA) { DAL->eraseArg(options::OPT_march_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), - BoundArch); + BA.ArchName); } return DAL; } @@ -1059,7 +1056,7 @@ void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, } SanitizerMask CudaToolChain::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // The CudaToolChain only supports sanitizers in the sense that it allows // sanitizer arguments on the command line if they are supported by the host // toolchain. The CudaToolChain will actually ignore any command line @@ -1071,7 +1068,7 @@ SanitizerMask CudaToolChain::getSupportedSanitizers( // tolerate flags meant only for the host toolchain. // FIXME: Be accurate and use DeviceOffloadKind. - return HostTC.getSupportedSanitizers(BoundArch, DeviceOffloadKind); + return HostTC.getSupportedSanitizers(BA, DeviceOffloadKind); } VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index 390ebad9442bc..da527b559dac7 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -85,13 +85,12 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain { const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, @@ -145,12 +144,11 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { std::string getInputFilename(const InputInfo &Input) const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; llvm::DenormalMode getDefaultDenormalModeForType( @@ -172,7 +170,7 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { llvm::opt::ArgStringList &CC1Args) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; VersionTuple diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index 59744d1cb3e8c..2d307f5ee6366 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -1254,9 +1254,9 @@ void Darwin::VerifyTripleForSDK(const llvm::opt::ArgList &Args, } std::string Darwin::ComputeEffectiveClangTriple(const ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const { - llvm::Triple Triple(ComputeLLVMTriple(Args, BoundArch, InputType)); + llvm::Triple Triple(ComputeLLVMTriple(Args, BA, InputType)); // If the target isn't initialized (e.g., an unknown Darwin platform, return // the default triple). Note: we intentionally do NOT call @@ -1349,10 +1349,9 @@ void DarwinClang::addClangWarningOptions(ArgStringList &CC1Args) const { void DarwinClang::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - Darwin::addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadKind); + Darwin::addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadKind); } /// Take a path that speculatively points into Xcode and return the @@ -3161,8 +3160,7 @@ void DarwinClang::AddCCKextLibArgs(const ArgList &Args, CmdArgs.push_back(Args.MakeArgString(P)); } -DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, - StringRef BoundArch, +DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, BoundArch BA, Action::OffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); @@ -3231,8 +3229,8 @@ DerivedArgList *MachO::TranslateArgs(const DerivedArgList &Args, // Add the arch options based on the particular spelling of -arch, to match // how the driver works. - if (!BoundArch.empty()) { - StringRef Name = BoundArch; + if (BA) { + StringRef Name = BA.ArchName; const Option MCpu = Opts.getOption(options::OPT_mcpu_EQ); const Option MArch = Opts.getOption(options::OPT_march_EQ); @@ -3440,11 +3438,10 @@ bool Darwin::isSizedDeallocationUnavailable() const { void MachO::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - ToolChain::addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadKind); + ToolChain::addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadKind); // On arm64e, we enable all the features required for the Darwin userspace // ABI @@ -3491,10 +3488,9 @@ void MachO::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, void Darwin::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - MachO::addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadKind); + MachO::addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadKind); // When compiling device code (e.g. SPIR-V for HIP), skip host-specific // flags like -faligned-alloc-unavailable and -fno-sized-deallocation @@ -3660,14 +3656,13 @@ void Darwin::addClangCC1ASTargetOptions( } DerivedArgList * -Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, +Darwin::TranslateArgs(const DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // First get the generic Apple args, before moving onto Darwin-specific ones. - DerivedArgList *DAL = - MachO::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + DerivedArgList *DAL = MachO::TranslateArgs(Args, BA, DeviceOffloadKind); // If no architecture is bound, none of the translations here are relevant. - if (BoundArch.empty()) + if (!BA) return DAL; // Add an explicit version min argument for the deployment target. We do this @@ -3696,12 +3691,12 @@ Darwin::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, } } - auto Arch = tools::darwin::getArchTypeForMachOArchName(BoundArch); + auto Arch = tools::darwin::getArchTypeForMachOArchName(BA.ArchName); if ((Arch == llvm::Triple::arm || Arch == llvm::Triple::thumb)) { if (Args.hasFlag(options::OPT_fomit_frame_pointer, options::OPT_fno_omit_frame_pointer, false)) getDriver().Diag(clang::diag::warn_drv_unsupported_opt_for_target) - << "-fomit-frame-pointer" << BoundArch; + << "-fomit-frame-pointer" << BA.ArchName; } return DAL; @@ -4052,12 +4047,11 @@ void Darwin::CheckObjCARC() const { } SanitizerMask -Darwin::getSupportedSanitizers(StringRef BoundArch, +Darwin::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64; - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h index a737d2ac799c4..7b5f1980a2dc8 100644 --- a/clang/lib/Driver/ToolChains/Darwin.h +++ b/clang/lib/Driver/ToolChains/Darwin.h @@ -147,8 +147,7 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; private: @@ -251,7 +250,7 @@ class LLVM_LIBRARY_VISIBILITY MachO : public ToolChain { bool HasNativeLLVMSupport() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; bool IsBlocksDefault() const override { @@ -404,7 +403,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO { ~Darwin() override; std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const override; /// @name Darwin Specific Toolchain Implementation @@ -596,8 +595,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangCC1ASTargetOptions( @@ -619,7 +617,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO { bool isCrossCompiling() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; CXXStdlibType GetDefaultCXXStdlibType() const override; @@ -656,7 +654,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public AppleMachO { bool SupportsEmbeddedBitcode() const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; }; @@ -686,8 +684,7 @@ class LLVM_LIBRARY_VISIBILITY DarwinClang : public Darwin { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddLinkARCArgs(const llvm::opt::ArgList &Args, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 5f536b5d96f90..1d74a34583311 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -528,7 +528,7 @@ static void processVSRuntimeLibrary(const ToolChain &TC, const ArgList &Args, } void Flang::AddAMDGPUTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { if (Arg *A = Args.getLastArg(options::OPT_mcode_object_version_EQ)) { StringRef Val = A->getValue(); @@ -539,11 +539,11 @@ void Flang::AddAMDGPUTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, } const ToolChain &TC = getToolChain(); - TC.addClangTargetOptions(Args, CmdArgs, BoundArch, DeviceOffloadKind); + TC.addClangTargetOptions(Args, CmdArgs, BA, DeviceOffloadKind); } void Flang::AddNVPTXTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // we cannot use addClangTargetOptions, as it appends unsupported args for // flang: -fcuda-is-device, -fno-threadsafe-statics, @@ -580,7 +580,7 @@ void Flang::AddNVPTXTargetArgs(const ArgList &Args, ArgStringList &CmdArgs, } void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const ToolChain &TC = getToolChain(); const llvm::Triple &Triple = TC.getEffectiveTriple(); @@ -607,11 +607,11 @@ void Flang::addTargetOptions(const ArgList &Args, ArgStringList &CmdArgs, case llvm::Triple::r600: case llvm::Triple::amdgcn: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); - AddAMDGPUTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind); + AddAMDGPUTargetArgs(Args, CmdArgs, BA, DeviceOffloadKind); break; case llvm::Triple::nvptx: case llvm::Triple::nvptx64: - AddNVPTXTargetArgs(Args, CmdArgs, BoundArch, DeviceOffloadKind); + AddNVPTXTargetArgs(Args, CmdArgs, BA, DeviceOffloadKind); break; case llvm::Triple::riscv64: getTargetFeatures(D, Triple, Args, CmdArgs, /*ForAs*/ false); diff --git a/clang/lib/Driver/ToolChains/Flang.h b/clang/lib/Driver/ToolChains/Flang.h index f08baa0fd5c12..a887301c703cd 100644 --- a/clang/lib/Driver/ToolChains/Flang.h +++ b/clang/lib/Driver/ToolChains/Flang.h @@ -64,8 +64,7 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { /// \param [in] BoundArch The bound architecture for offload compilation /// \param [in] DeviceOffloadKind The offload kind void addTargetOptions(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CmdArgs, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const; /// Add specific options for AArch64 target. @@ -82,13 +81,11 @@ class LLVM_LIBRARY_VISIBILITY Flang : public Tool { /// \param [in] BoundArch The bound architecture for offload compilation /// \param [in] DeviceOffloadKind The offload kind void AddAMDGPUTargetArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CmdArgs, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const; void AddNVPTXTargetArgs(const llvm::opt::ArgList &Args, - llvm::opt::ArgStringList &CmdArgs, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CmdArgs, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const; /// Add specific options for LoongArch64 target. diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index 1218868ac3bda..c45ae14f4e643 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -477,14 +477,13 @@ bool FreeBSD::isPIEDefault(const llvm::opt::ArgList &Args) const { } SanitizerMask -FreeBSD::getSupportedSanitizers(StringRef BoundArch, +FreeBSD::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsAArch64 = getTriple().getArch() == llvm::Triple::aarch64; const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; const bool IsMIPS64 = getTriple().isMIPS64(); - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/Driver/ToolChains/FreeBSD.h b/clang/lib/Driver/ToolChains/FreeBSD.h index 79a7dbaafe39e..55400e17b5ea4 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.h +++ b/clang/lib/Driver/ToolChains/FreeBSD.h @@ -87,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY FreeBSD : public Generic_ELF { getDefaultUnwindTableLevel(const llvm::opt::ArgList &Args) const override; bool isPIEDefault(const llvm::opt::ArgList &Args) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; unsigned GetDefaultDwarfVersion() const override { return 4; } // Until dtrace (via CTF) and LLDB can deal with distributed debug info, diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index d63d0dbd96416..abde9fa10482d 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -330,9 +330,9 @@ Fuchsia::Fuchsia(const Driver &D, const llvm::Triple &Triple, } std::string Fuchsia::ComputeEffectiveClangTriple(const ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const { - llvm::Triple Triple(ComputeLLVMTriple(Args, BoundArch, InputType)); + llvm::Triple Triple(ComputeLLVMTriple(Args, BA, InputType)); return Triple.str(); } @@ -366,7 +366,7 @@ ToolChain::CXXStdlibType Fuchsia::GetCXXStdlibType(const ArgList &Args) const { } void Fuchsia::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) @@ -473,10 +473,9 @@ void Fuchsia::AddCXXStdlibLibArgs(const ArgList &Args, } SanitizerMask -Fuchsia::getSupportedSanitizers(StringRef BoundArch, +Fuchsia::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::HWAddress; Res |= SanitizerKind::PointerCompare; diff --git a/clang/lib/Driver/ToolChains/Fuchsia.h b/clang/lib/Driver/ToolChains/Fuchsia.h index daeb936b394f4..85d3b5594b672 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.h +++ b/clang/lib/Driver/ToolChains/Fuchsia.h @@ -81,11 +81,11 @@ class LLVM_LIBRARY_VISIBILITY Fuchsia : public ToolChain { } std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask getDefaultSanitizers() const override; @@ -100,8 +100,7 @@ class LLVM_LIBRARY_VISIBILITY Fuchsia : public ToolChain { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 0bb51de630c99..b58c10145e7f3 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -3454,8 +3454,7 @@ Generic_GCC::addLibStdCxxIncludePaths(const llvm::opt::ArgList &DriverArgs, } llvm::opt::DerivedArgList * -Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, +Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { if (DeviceOffloadKind == Action::OFK_None || DeviceOffloadKind == Action::OFK_Host) @@ -3483,13 +3482,13 @@ Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, } // Add the bound architecture to the arguments list if present. - if (!BoundArch.empty()) { + if (!BA.empty()) { options::ID Opt = getTriple().isARM() || getTriple().isPPC() || getTriple().isAArch64() || getTriple().isAMDGPU() ? options::OPT_mcpu_EQ : options::OPT_march_EQ; DAL->eraseArg(Opt); - DAL->AddJoinedArg(nullptr, Opts.getOption(Opt), BoundArch); + DAL->AddJoinedArg(nullptr, Opts.getOption(Opt), BA.ArchName); } return DAL; @@ -3498,8 +3497,7 @@ Generic_GCC::TranslateArgs(const llvm::opt::DerivedArgList &Args, void Generic_ELF::anchor() {} void Generic_ELF::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, - StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) diff --git a/clang/lib/Driver/ToolChains/Gnu.h b/clang/lib/Driver/ToolChains/Gnu.h index a4c4de65cd44f..d27ce3076a28f 100644 --- a/clang/lib/Driver/ToolChains/Gnu.h +++ b/clang/lib/Driver/ToolChains/Gnu.h @@ -373,7 +373,7 @@ class LLVM_LIBRARY_VISIBILITY Generic_GCC : public ToolChain { bool isPICDefaultForced() const override; bool IsIntegratedAssemblerDefault() const override; llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: @@ -442,8 +442,7 @@ class LLVM_LIBRARY_VISIBILITY Generic_ELF : public Generic_GCC { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; virtual std::string getDynamicLinker(const llvm::opt::ArgList &Args) const { diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp b/clang/lib/Driver/ToolChains/HIPAMD.cpp index 185d733216538..139a7a6e90703 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.cpp +++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp @@ -234,7 +234,7 @@ HIPAMDToolChain::HIPAMDToolChain(const Driver &D, const llvm::Triple &Triple, void HIPAMDToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); @@ -279,8 +279,7 @@ void HIPAMDToolChain::addClangTargetOptions( return; // No DeviceLibs for SPIR-V. } - for (auto BCFile : - getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) { + for (auto BCFile : getDeviceLibs(DriverArgs, BA, DeviceOffloadingKind)) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); CC1Args.push_back(DriverArgs.MakeArgStringRef(BCFile.Path)); @@ -289,10 +288,10 @@ void HIPAMDToolChain::addClangTargetOptions( llvm::opt::DerivedArgList * HIPAMDToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { llvm::opt::DerivedArgList *DAL = - ROCMToolChain::TranslateArgs(Args, BoundArch, DeviceOffloadKind); + ROCMToolChain::TranslateArgs(Args, BA, DeviceOffloadKind); return DAL; } @@ -335,9 +334,9 @@ VersionTuple HIPAMDToolChain::computeMSVCVersion(const Driver *D, llvm::SmallVector HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, - llvm::StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { - assert(!BoundArch.empty() && "Must have an explicit GPU arch."); + assert(BA && "Must have an explicit GPU arch."); llvm::SmallVector BCLibs; const llvm::Triple &TT = getEffectiveTriple(); @@ -347,7 +346,7 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, TT.getEnvironment() == llvm::Triple::LLVM) return {}; - StringRef GpuArch = getProcessorFromTargetID(getTriple(), BoundArch); + StringRef GpuArch = getProcessorFromTargetID(getTriple(), BA.ArchName); if (GpuArch == "amdgcnspirv") return {}; @@ -385,7 +384,7 @@ HIPAMDToolChain::getDeviceLibs(const llvm::opt::ArgList &DriverArgs, } // Add common device libraries like ocml etc. - for (auto N : getCommonDeviceLibNames(DriverArgs, BoundArch, GpuArch, + for (auto N : getCommonDeviceLibNames(DriverArgs, BA.ArchName, GpuArch, DeviceOffloadingKind)) BCLibs.emplace_back(N); diff --git a/clang/lib/Driver/ToolChains/HIPAMD.h b/clang/lib/Driver/ToolChains/HIPAMD.h index 00643b1c2b256..a156f9144f7a8 100644 --- a/clang/lib/Driver/ToolChains/HIPAMD.h +++ b/clang/lib/Driver/ToolChains/HIPAMD.h @@ -62,13 +62,12 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; void @@ -82,7 +81,7 @@ class LLVM_LIBRARY_VISIBILITY HIPAMDToolChain final : public ROCMToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; VersionTuple diff --git a/clang/lib/Driver/ToolChains/HIPSPV.cpp b/clang/lib/Driver/ToolChains/HIPSPV.cpp index 0d93e55137889..d6900c767d1f7 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.cpp +++ b/clang/lib/Driver/ToolChains/HIPSPV.cpp @@ -152,7 +152,7 @@ HIPSPVToolChain::HIPSPVToolChain(const Driver &D, const llvm::Triple &Triple, void HIPSPVToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { if (!HostTC) { assert(DeviceOffloadingKind == Action::OFK_None && @@ -160,8 +160,7 @@ void HIPSPVToolChain::addClangTargetOptions( return; } - HostTC->addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadingKind); + HostTC->addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadingKind); assert(DeviceOffloadingKind == Action::OFK_HIP && "Only HIP offloading kinds are supported for GPUs."); @@ -181,7 +180,7 @@ void HIPSPVToolChain::addClangTargetOptions( {"-fvisibility=hidden", "-fapply-global-visibility-to-externs"}); for (const BitCodeLibraryInfo &BCFile : - getDeviceLibs(DriverArgs, BoundArch, DeviceOffloadingKind)) + getDeviceLibs(DriverArgs, BA, DeviceOffloadingKind)) CC1Args.append( {"-mlink-builtin-bitcode", DriverArgs.MakeArgString(BCFile.Path)}); } @@ -243,7 +242,7 @@ void HIPSPVToolChain::AddHIPIncludeArgs(const ArgList &DriverArgs, llvm::SmallVector HIPSPVToolChain::getDeviceLibs( - const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch, + const llvm::opt::ArgList &DriverArgs, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; if (!DriverArgs.hasFlag(options::OPT_offloadlib, options::OPT_no_offloadlib, @@ -307,7 +306,7 @@ HIPSPVToolChain::getDeviceLibs( } SanitizerMask HIPSPVToolChain::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // The HIPSPVToolChain only supports sanitizers in the sense that it allows // sanitizer arguments on the command line if they are supported by the host // toolchain. The HIPSPVToolChain will actually ignore any command line @@ -320,8 +319,8 @@ SanitizerMask HIPSPVToolChain::getSupportedSanitizers( // FIXME: Be accurate and use DeviceOffloadKind. if (HostTC) - return HostTC->getSupportedSanitizers(BoundArch, DeviceOffloadKind); - return ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + return HostTC->getSupportedSanitizers(BA, DeviceOffloadKind); + return ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); } VersionTuple HIPSPVToolChain::computeMSVCVersion(const Driver *D, diff --git a/clang/lib/Driver/ToolChains/HIPSPV.h b/clang/lib/Driver/ToolChains/HIPSPV.h index 8e2fd91a4b7ac..337c9c9993876 100644 --- a/clang/lib/Driver/ToolChains/HIPSPV.h +++ b/clang/lib/Driver/ToolChains/HIPSPV.h @@ -58,8 +58,7 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; @@ -74,11 +73,11 @@ class LLVM_LIBRARY_VISIBILITY HIPSPVToolChain final : public ToolChain { void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadKind) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; VersionTuple diff --git a/clang/lib/Driver/ToolChains/HIPUtility.cpp b/clang/lib/Driver/ToolChains/HIPUtility.cpp index e174132cf0f3f..001f9f3400a06 100644 --- a/clang/lib/Driver/ToolChains/HIPUtility.cpp +++ b/clang/lib/Driver/ToolChains/HIPUtility.cpp @@ -308,15 +308,15 @@ void HIP::constructHIPFatbinCommand(Compilation &C, const JobAction &JA, const auto *A = II.getAction(); const llvm::Triple &InputTriple = A->getOffloadingToolChain()->getTriple(); - auto ArchStr = llvm::StringRef(A->getOffloadingArch()); + BoundArch BA = A->getOffloadingArch(); BundlerTargetArg += ',' + OffloadKind + '-'; - if (ArchStr == "amdgcnspirv") + if (BA.ArchName == "amdgcnspirv") BundlerTargetArg += normalizeForBundler(llvm::Triple("spirv64-amd-amdhsa"), true); else - BundlerTargetArg += normalizeForBundler(InputTriple, !ArchStr.empty()); - if (!ArchStr.empty()) - BundlerTargetArg += '-' + ArchStr.str(); + BundlerTargetArg += normalizeForBundler(InputTriple, !BA.empty()); + if (BA) + BundlerTargetArg += '-' + BA.ArchName.str(); } BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg)); diff --git a/clang/lib/Driver/ToolChains/HLSL.cpp b/clang/lib/Driver/ToolChains/HLSL.cpp index 834b8acc78734..558408cc969f0 100644 --- a/clang/lib/Driver/ToolChains/HLSL.cpp +++ b/clang/lib/Driver/ToolChains/HLSL.cpp @@ -431,7 +431,7 @@ clang::driver::toolchains::HLSLToolChain::parseTargetProfile( } DerivedArgList * -HLSLToolChain::TranslateArgs(const DerivedArgList &Args, StringRef BoundArch, +HLSLToolChain::TranslateArgs(const DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); diff --git a/clang/lib/Driver/ToolChains/HLSL.h b/clang/lib/Driver/ToolChains/HLSL.h index 0806c252c1073..83a94d45ad35a 100644 --- a/clang/lib/Driver/ToolChains/HLSL.h +++ b/clang/lib/Driver/ToolChains/HLSL.h @@ -76,7 +76,7 @@ class LLVM_LIBRARY_VISIBILITY HLSLToolChain : public ToolChain { bool isPICDefaultForced() const override { return false; } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; static std::optional parseTargetProfile(StringRef TargetProfile); diff --git a/clang/lib/Driver/ToolChains/Haiku.cpp b/clang/lib/Driver/ToolChains/Haiku.cpp index 083769f2c7954..a155f22b75a79 100644 --- a/clang/lib/Driver/ToolChains/Haiku.cpp +++ b/clang/lib/Driver/ToolChains/Haiku.cpp @@ -279,10 +279,9 @@ Tool *Haiku::buildLinker() const { return new tools::haiku::Linker(*this); } bool Haiku::HasNativeLLVMSupport() const { return true; } SanitizerMask -Haiku::getSupportedSanitizers(StringRef BoundArch, +Haiku::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; diff --git a/clang/lib/Driver/ToolChains/Haiku.h b/clang/lib/Driver/ToolChains/Haiku.h index 03dbbb8a50e07..bb68bc515c9b0 100644 --- a/clang/lib/Driver/ToolChains/Haiku.h +++ b/clang/lib/Driver/ToolChains/Haiku.h @@ -62,7 +62,7 @@ class LLVM_LIBRARY_VISIBILITY Haiku : public Generic_ELF { } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; unsigned GetDefaultDwarfVersion() const override { return 4; } diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index ab207317667c6..b671db98a7798 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -780,7 +780,7 @@ unsigned HexagonToolChain::getOptimizationLevel( void HexagonToolChain::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind) const { bool UseInitArrayDefault = getTriple().isMusl(); diff --git a/clang/lib/Driver/ToolChains/Hexagon.h b/clang/lib/Driver/ToolChains/Hexagon.h index 9c4a92ba79e67..1f848a8de061d 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.h +++ b/clang/lib/Driver/ToolChains/Hexagon.h @@ -77,8 +77,7 @@ class LLVM_LIBRARY_VISIBILITY HexagonToolChain : public Linux { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index 512788d235fec..f3d356305245e 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -484,10 +484,10 @@ static void setPAuthABIInTriple(const Driver &D, const ArgList &Args, } std::string Linux::ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const { std::string TripleString = - Generic_ELF::ComputeEffectiveClangTriple(Args, BoundArch, InputType); + Generic_ELF::ComputeEffectiveClangTriple(Args, BA, InputType); if (getTriple().isAArch64()) { llvm::Triple Triple(TripleString); setPAuthABIInTriple(getDriver(), Args, Triple); @@ -552,12 +552,12 @@ static void handlePAuthABI(const Driver &D, const ArgList &DriverArgs, void Linux::addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { llvm::Triple Triple(ComputeEffectiveClangTriple(DriverArgs)); if (Triple.isAArch64() && Triple.getEnvironment() == llvm::Triple::PAuthTest) handlePAuthABI(getDriver(), DriverArgs, CC1Args); - Generic_ELF::addClangTargetOptions(DriverArgs, CC1Args, BoundArch, + Generic_ELF::addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadKind); } @@ -958,7 +958,7 @@ bool Linux::IsMathErrnoDefault() const { } SanitizerMask -Linux::getSupportedSanitizers(StringRef BoundArch, +Linux::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; @@ -977,8 +977,7 @@ Linux::getSupportedSanitizers(StringRef BoundArch, const bool IsSystemZ = getTriple().getArch() == llvm::Triple::systemz; const bool IsHexagon = getTriple().getArch() == llvm::Triple::hexagon; const bool IsAndroid = getTriple().isAndroid(); - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h index 63f77b4b4d87c..6c81bbc71f7c2 100644 --- a/clang/lib/Driver/ToolChains/Linux.h +++ b/clang/lib/Driver/ToolChains/Linux.h @@ -52,18 +52,17 @@ class LLVM_LIBRARY_VISIBILITY Linux : public Generic_ELF { bool isPIEDefault(const llvm::opt::ArgList &Args) const override; bool IsMathErrnoDefault() const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; std::string ComputeEffectiveClangTriple( - const llvm::opt::ArgList &Args, llvm::StringRef BoundArch = {}, + const llvm::opt::ArgList &Args, BoundArch BA = {}, types::ID InputType = types::TY_INVALID) const override; std::string computeSysRoot() const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; std::string getDynamicLinker(const llvm::opt::ArgList &Args) const override; diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp index 5152512cf28da..d543637c311d2 100644 --- a/clang/lib/Driver/ToolChains/MSP430.cpp +++ b/clang/lib/Driver/ToolChains/MSP430.cpp @@ -157,7 +157,7 @@ void MSP430ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, void MSP430ToolChain::addClangTargetOptions(const ArgList &DriverArgs, ArgStringList &CC1Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); diff --git a/clang/lib/Driver/ToolChains/MSP430.h b/clang/lib/Driver/ToolChains/MSP430.h index 41935e1b5173a..865bcfb7cd3da 100644 --- a/clang/lib/Driver/ToolChains/MSP430.h +++ b/clang/lib/Driver/ToolChains/MSP430.h @@ -33,8 +33,7 @@ class LLVM_LIBRARY_VISIBILITY MSP430ToolChain : public Generic_ELF { AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const override; bool isPICDefault() const override { return false; } diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 0796bdff96d46..eb81f1b4e142c 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -915,8 +915,9 @@ VersionTuple MSVCToolChain::computeMSVCVersion(const Driver *D, return MSVT; } -std::string MSVCToolChain::ComputeEffectiveClangTriple( - const ArgList &Args, llvm::StringRef BoundArch, types::ID InputType) const { +std::string +MSVCToolChain::ComputeEffectiveClangTriple(const ArgList &Args, BoundArch BA, + types::ID InputType) const { // The MSVC version doesn't care about the architecture, even though it // may look at the triple internally. VersionTuple MSVT = computeMSVCVersion(/*D=*/nullptr, Args); @@ -926,7 +927,7 @@ std::string MSVCToolChain::ComputeEffectiveClangTriple( // For the rest of the triple, however, a computed architecture name may // be needed. llvm::Triple Triple( - ToolChain::ComputeEffectiveClangTriple(Args, BoundArch, InputType)); + ToolChain::ComputeEffectiveClangTriple(Args, BA, InputType)); if (Triple.getEnvironment() == llvm::Triple::MSVC) { StringRef ObjFmt = Triple.getEnvironmentName().split('-').second; if (ObjFmt.empty()) @@ -939,9 +940,8 @@ std::string MSVCToolChain::ComputeEffectiveClangTriple( } SanitizerMask MSVCToolChain::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; @@ -1078,8 +1078,7 @@ static void TranslatePermissiveMinus(Arg *A, llvm::opt::DerivedArgList &DAL, llvm::opt::DerivedArgList * MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, - Action::OffloadKind OFK) const { + BoundArch BA, Action::OffloadKind OFK) const { DerivedArgList *DAL = new DerivedArgList(Args.getBaseArgs()); const OptTable &Opts = getDriver().getOpts(); @@ -1134,7 +1133,7 @@ MSVCToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } void MSVCToolChain::addClangTargetOptions( - const ArgList &DriverArgs, ArgStringList &CC1Args, StringRef BoundArch, + const ArgList &DriverArgs, ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // MSVC STL kindly allows removing all usages of typeid by defining // _HAS_STATIC_RTTI to 0. Do so, when compiling with -fno-rtti diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h index 9f09ff3bbb48d..0ec73fb59b921 100644 --- a/clang/lib/Driver/ToolChains/MSVC.h +++ b/clang/lib/Driver/ToolChains/MSVC.h @@ -49,7 +49,7 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { const llvm::opt::ArgList &Args); llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; UnwindTableLevel @@ -114,10 +114,10 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { const llvm::opt::ArgList &Args) const override; std::string ComputeEffectiveClangTriple(const llvm::opt::ArgList &Args, - llvm::StringRef BoundArch, + BoundArch BA, types::ID InputType) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void printVerboseInfo(raw_ostream &OS) const override; @@ -126,8 +126,7 @@ class LLVM_LIBRARY_VISIBILITY MSVCToolChain : public ToolChain { void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: diff --git a/clang/lib/Driver/ToolChains/Managarm.cpp b/clang/lib/Driver/ToolChains/Managarm.cpp index 2779136fe9ee3..4566e7258e929 100644 --- a/clang/lib/Driver/ToolChains/Managarm.cpp +++ b/clang/lib/Driver/ToolChains/Managarm.cpp @@ -198,11 +198,10 @@ void Managarm::addLibStdCxxIncludePaths( } SanitizerMask -Managarm::getSupportedSanitizers(StringRef BoundArch, +Managarm::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; Res |= SanitizerKind::KernelAddress; diff --git a/clang/lib/Driver/ToolChains/Managarm.h b/clang/lib/Driver/ToolChains/Managarm.h index 52d0e296cddd3..eef026cc191aa 100644 --- a/clang/lib/Driver/ToolChains/Managarm.h +++ b/clang/lib/Driver/ToolChains/Managarm.h @@ -41,7 +41,7 @@ class LLVM_LIBRARY_VISIBILITY Managarm : public Generic_ELF { } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; std::string computeSysRoot() const override; diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 11dca2fa4231d..4237a6906be3b 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -627,9 +627,8 @@ toolchains::MinGW::GetExceptionModel(const ArgList &Args) const { } SanitizerMask toolchains::MinGW::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; @@ -746,7 +745,7 @@ void toolchains::MinGW::AddClangSystemIncludeArgs(const ArgList &DriverArgs, void toolchains::MinGW::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { if (Arg *A = DriverArgs.getLastArg(options::OPT_mguard_EQ)) { StringRef GuardArgs = A->getValue(); if (GuardArgs == "none") { diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h index eaec447a16ac6..36ab7b6c6e295 100644 --- a/clang/lib/Driver/ToolChains/MinGW.h +++ b/clang/lib/Driver/ToolChains/MinGW.h @@ -74,7 +74,7 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain { bool isPICDefaultForced() const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; llvm::ExceptionHandling GetExceptionModel( @@ -85,8 +85,7 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain { llvm::opt::ArgStringList &CC1Args) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, diff --git a/clang/lib/Driver/ToolChains/NetBSD.cpp b/clang/lib/Driver/ToolChains/NetBSD.cpp index ade8a07cb0b1a..31a5723c17c2f 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.cpp +++ b/clang/lib/Driver/ToolChains/NetBSD.cpp @@ -532,12 +532,11 @@ llvm::ExceptionHandling NetBSD::GetExceptionModel(const ArgList &Args) const { } SanitizerMask -NetBSD::getSupportedSanitizers(StringRef BoundArch, +NetBSD::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); if (IsX86 || IsX86_64) { Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; @@ -562,7 +561,7 @@ NetBSD::getSupportedSanitizers(StringRef BoundArch, } void NetBSD::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { const SanitizerArgs &SanArgs = getSanitizerArgs(DriverArgs); if (SanArgs.hasAnySanitizer()) diff --git a/clang/lib/Driver/ToolChains/NetBSD.h b/clang/lib/Driver/ToolChains/NetBSD.h index d6072ab37dc11..c6a40ff34036c 100644 --- a/clang/lib/Driver/ToolChains/NetBSD.h +++ b/clang/lib/Driver/ToolChains/NetBSD.h @@ -77,13 +77,12 @@ class LLVM_LIBRARY_VISIBILITY NetBSD : public Generic_ELF { const llvm::opt::ArgList &Args) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: diff --git a/clang/lib/Driver/ToolChains/OHOS.cpp b/clang/lib/Driver/ToolChains/OHOS.cpp index 8ea8980d3746f..dda00c9250a5f 100644 --- a/clang/lib/Driver/ToolChains/OHOS.cpp +++ b/clang/lib/Driver/ToolChains/OHOS.cpp @@ -380,10 +380,9 @@ void OHOS::addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const { } SanitizerMask -OHOS::getSupportedSanitizers(StringRef BoundArch, +OHOS::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; diff --git a/clang/lib/Driver/ToolChains/OHOS.h b/clang/lib/Driver/ToolChains/OHOS.h index 493ec156dbd26..bc4bd96e31ed5 100644 --- a/clang/lib/Driver/ToolChains/OHOS.h +++ b/clang/lib/Driver/ToolChains/OHOS.h @@ -80,7 +80,7 @@ class LLVM_LIBRARY_VISIBILITY OHOS : public Generic_ELF { StringRef SysRoot) const override; void addExtraOpts(llvm::opt::ArgStringList &CmdArgs) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addProfileRTLibs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index a8ac97ac5f1b9..14680dc4b0e5b 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -287,12 +287,11 @@ void openbsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, } SanitizerMask -OpenBSD::getSupportedSanitizers(StringRef BoundArch, +OpenBSD::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; const bool IsX86_64 = getTriple().getArch() == llvm::Triple::x86_64; - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); if (IsX86 || IsX86_64) { Res |= SanitizerKind::Vptr; Res |= SanitizerKind::Fuzzer; diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h index 3008932b7d757..a92c4d321bac6 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.h +++ b/clang/lib/Driver/ToolChains/OpenBSD.h @@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF { unsigned GetDefaultDwarfVersion() const override { return 2; } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: diff --git a/clang/lib/Driver/ToolChains/PS4CPU.cpp b/clang/lib/Driver/ToolChains/PS4CPU.cpp index f9f66d53a0e81..8c8d0bf033585 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.cpp +++ b/clang/lib/Driver/ToolChains/PS4CPU.cpp @@ -565,9 +565,8 @@ Tool *toolchains::PS5CPU::buildLinker() const { } SanitizerMask toolchains::PS4PS5Base::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Address; Res |= SanitizerKind::PointerCompare; Res |= SanitizerKind::PointerSubtract; @@ -576,15 +575,14 @@ SanitizerMask toolchains::PS4PS5Base::getSupportedSanitizers( } SanitizerMask toolchains::PS5CPU::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - PS4PS5Base::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = PS4PS5Base::getSupportedSanitizers(BA, DeviceOffloadKind); Res |= SanitizerKind::Thread; return Res; } void toolchains::PS4PS5Base::addClangTargetOptions( - const ArgList &DriverArgs, ArgStringList &CC1Args, StringRef BoundArch, + const ArgList &DriverArgs, ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { // PS4/PS5 do not use init arrays. if (DriverArgs.hasArg(options::OPT_fuse_init_array)) { diff --git a/clang/lib/Driver/ToolChains/PS4CPU.h b/clang/lib/Driver/ToolChains/PS4CPU.h index 002e4d313be8b..32d6a9d48e939 100644 --- a/clang/lib/Driver/ToolChains/PS4CPU.h +++ b/clang/lib/Driver/ToolChains/PS4CPU.h @@ -107,13 +107,12 @@ class LLVM_LIBRARY_VISIBILITY PS4PS5Base : public Generic_ELF { } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, - Action::OffloadKind DeviceOffloadingKind) const override; + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const override; void addClangWarningOptions(llvm::opt::ArgStringList &CC1Args) const override; @@ -178,7 +177,7 @@ class LLVM_LIBRARY_VISIBILITY PS5CPU : public PS4PS5Base { unsigned GetDefaultDwarfVersion() const override { return 5; } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; const char *getLinkerBaseName() const override { return "lld"; } diff --git a/clang/lib/Driver/ToolChains/SPIRVOpenMP.cpp b/clang/lib/Driver/ToolChains/SPIRVOpenMP.cpp index 161f8939929aa..23a5789cb3d2d 100644 --- a/clang/lib/Driver/ToolChains/SPIRVOpenMP.cpp +++ b/clang/lib/Driver/ToolChains/SPIRVOpenMP.cpp @@ -22,7 +22,7 @@ SPIRVOpenMPToolChain::SPIRVOpenMPToolChain(const Driver &D, void SPIRVOpenMPToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { if (DeviceOffloadingKind != Action::OFK_OpenMP) return; diff --git a/clang/lib/Driver/ToolChains/SPIRVOpenMP.h b/clang/lib/Driver/ToolChains/SPIRVOpenMP.h index 75f9f5626eea4..5177c6b662564 100644 --- a/clang/lib/Driver/ToolChains/SPIRVOpenMP.h +++ b/clang/lib/Driver/ToolChains/SPIRVOpenMP.h @@ -21,8 +21,7 @@ class LLVM_LIBRARY_VISIBILITY SPIRVOpenMPToolChain : public SPIRVToolChain { void addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, - Action::OffloadKind DeviceOffloadingKind) const override; + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const override; const ToolChain &HostTC; }; diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 7ee7eaf58a740..d41b5c06da53f 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -128,17 +128,15 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, void SYCLToolChain::addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, Action::OffloadKind DeviceOffloadingKind) const { - HostTC.addClangTargetOptions(DriverArgs, CC1Args, BoundArch, - DeviceOffloadingKind); + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const { + HostTC.addClangTargetOptions(DriverArgs, CC1Args, BA, DeviceOffloadingKind); } llvm::opt::DerivedArgList * SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, - StringRef BoundArch, + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - DerivedArgList *DAL = - HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind); + DerivedArgList *DAL = HostTC.TranslateArgs(Args, BA, DeviceOffloadKind); bool IsNewDAL = false; if (!DAL) { @@ -174,10 +172,10 @@ SYCLToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args, } const OptTable &Opts = getDriver().getOpts(); - if (!BoundArch.empty()) { + if (BA) { DAL->eraseArg(options::OPT_march_EQ); DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), - BoundArch); + BA.ArchName); } return DAL; } diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index ae512723c0cea..d404ce2f93923 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -27,12 +27,11 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { } llvm::opt::DerivedArgList * - TranslateArgs(const llvm::opt::DerivedArgList &Args, StringRef BoundArch, + TranslateArgs(const llvm::opt::DerivedArgList &Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; bool useIntegratedAs() const override { return true; } diff --git a/clang/lib/Driver/ToolChains/Serenity.cpp b/clang/lib/Driver/ToolChains/Serenity.cpp index 60a966fac39e9..d43dffec8d31f 100644 --- a/clang/lib/Driver/ToolChains/Serenity.cpp +++ b/clang/lib/Driver/ToolChains/Serenity.cpp @@ -167,9 +167,9 @@ void tools::serenity::Linker::ConstructJob(Compilation &C, const JobAction &JA, } SanitizerMask -Serenity::getSupportedSanitizers(StringRef BoundArch, +Serenity::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { - return ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind) | + return ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind) | SanitizerKind::KernelAddress; } diff --git a/clang/lib/Driver/ToolChains/Serenity.h b/clang/lib/Driver/ToolChains/Serenity.h index 3ecd6c6b4f4a5..2cc25c0f9430d 100644 --- a/clang/lib/Driver/ToolChains/Serenity.h +++ b/clang/lib/Driver/ToolChains/Serenity.h @@ -65,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY Serenity final : public Generic_ELF { bool isPIEDefault(const llvm::opt::ArgList &) const override { return true; } SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; bool IsMathErrnoDefault() const override { return false; } diff --git a/clang/lib/Driver/ToolChains/Solaris.cpp b/clang/lib/Driver/ToolChains/Solaris.cpp index 9af721ecd053e..e5f1cbb033c3b 100644 --- a/clang/lib/Driver/ToolChains/Solaris.cpp +++ b/clang/lib/Driver/ToolChains/Solaris.cpp @@ -329,12 +329,11 @@ Solaris::Solaris(const Driver &D, const llvm::Triple &Triple, } SanitizerMask -Solaris::getSupportedSanitizers(StringRef BoundArch, +Solaris::getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { const bool IsSparc = getTriple().getArch() == llvm::Triple::sparc; const bool IsX86 = getTriple().getArch() == llvm::Triple::x86; - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); // FIXME: Omit SparcV9 and X86_64 until 64-bit support is figured out. if (IsSparc || IsX86) { Res |= SanitizerKind::Address; diff --git a/clang/lib/Driver/ToolChains/Solaris.h b/clang/lib/Driver/ToolChains/Solaris.h index c31463ee8e13a..54bdd2e88b049 100644 --- a/clang/lib/Driver/ToolChains/Solaris.h +++ b/clang/lib/Driver/ToolChains/Solaris.h @@ -65,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY Solaris : public Generic_ELF { llvm::opt::ArgStringList &CC1Args) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; const char *getDefaultLinker() const override; diff --git a/clang/lib/Driver/ToolChains/VEToolchain.cpp b/clang/lib/Driver/ToolChains/VEToolchain.cpp index 6093d378f0ec2..b676acf3728f6 100644 --- a/clang/lib/Driver/ToolChains/VEToolchain.cpp +++ b/clang/lib/Driver/ToolChains/VEToolchain.cpp @@ -106,8 +106,7 @@ void VEToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } void VEToolChain::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, - StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); bool UseInitArrayDefault = true; diff --git a/clang/lib/Driver/ToolChains/VEToolchain.h b/clang/lib/Driver/ToolChains/VEToolchain.h index 2a49f8ed37f01..71cf071ebecec 100644 --- a/clang/lib/Driver/ToolChains/VEToolchain.h +++ b/clang/lib/Driver/ToolChains/VEToolchain.h @@ -36,8 +36,7 @@ class LLVM_LIBRARY_VISIBILITY VEToolChain : public Linux { llvm::opt::ArgStringList &CC1Args) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 5bc43b37d06bf..8ca09b11836e5 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -325,8 +325,7 @@ bool WebAssembly::SupportsProfiling() const { return false; } bool WebAssembly::HasNativeLLVMSupport() const { return true; } void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, - StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { if (!DriverArgs.hasFlag(options::OPT_fuse_init_array, options::OPT_fno_use_init_array, true)) @@ -572,9 +571,8 @@ void WebAssembly::AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, } SanitizerMask WebAssembly::getSupportedSanitizers( - StringRef BoundArch, Action::OffloadKind DeviceOffloadKind) const { - SanitizerMask Res = - ToolChain::getSupportedSanitizers(BoundArch, DeviceOffloadKind); + BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { + SanitizerMask Res = ToolChain::getSupportedSanitizers(BA, DeviceOffloadKind); if (getTriple().isOSEmscripten()) { Res |= SanitizerKind::Vptr | SanitizerKind::Leak; } diff --git a/clang/lib/Driver/ToolChains/WebAssembly.h b/clang/lib/Driver/ToolChains/WebAssembly.h index 756e2a03ff915..f91987ba3883a 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.h +++ b/clang/lib/Driver/ToolChains/WebAssembly.h @@ -53,8 +53,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain { unsigned GetDefaultDwarfVersion() const override { return 4; } void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; RuntimeLibType GetDefaultRuntimeLibType() const override; CXXStdlibType GetCXXStdlibType(const llvm::opt::ArgList &Args) const override; @@ -67,7 +66,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssembly final : public ToolChain { void AddCXXStdlibLibArgs(const llvm::opt::ArgList &Args, llvm::opt::ArgStringList &CmdArgs) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; const char *getDefaultLinker() const override; diff --git a/clang/lib/Driver/ToolChains/XCore.cpp b/clang/lib/Driver/ToolChains/XCore.cpp index 37452eff55baa..a381047e5baf1 100644 --- a/clang/lib/Driver/ToolChains/XCore.cpp +++ b/clang/lib/Driver/ToolChains/XCore.cpp @@ -126,8 +126,7 @@ void XCoreToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } void XCoreToolChain::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, - StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind) const { CC1Args.push_back("-nostdsysteminc"); // Set `-fno-use-cxa-atexit` to default. diff --git a/clang/lib/Driver/ToolChains/XCore.h b/clang/lib/Driver/ToolChains/XCore.h index 1f3272530ddad..cc7638d6566d4 100644 --- a/clang/lib/Driver/ToolChains/XCore.h +++ b/clang/lib/Driver/ToolChains/XCore.h @@ -68,8 +68,7 @@ class LLVM_LIBRARY_VISIBILITY XCoreToolChain : public ToolChain { llvm::opt::ArgStringList &CC1Args) const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, - llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, + llvm::opt::ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; void AddClangCXXStdlibIncludeArgs( const llvm::opt::ArgList &DriverArgs, diff --git a/clang/lib/Driver/ToolChains/ZOS.cpp b/clang/lib/Driver/ToolChains/ZOS.cpp index fc7b7a6ed186c..c9e61cf5f43f0 100644 --- a/clang/lib/Driver/ToolChains/ZOS.cpp +++ b/clang/lib/Driver/ToolChains/ZOS.cpp @@ -28,7 +28,7 @@ ZOS::ZOS(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) ZOS::~ZOS() {} void ZOS::addClangTargetOptions(const ArgList &DriverArgs, - ArgStringList &CC1Args, StringRef BoundArch, + ArgStringList &CC1Args, BoundArch BA, Action::OffloadKind DeviceOffloadKind) const { // Pass "-faligned-alloc-unavailable" only when the user hasn't manually // enabled or disabled aligned allocations. diff --git a/clang/lib/Driver/ToolChains/ZOS.h b/clang/lib/Driver/ToolChains/ZOS.h index 5d0b2fef79ec8..c406d20e33f25 100644 --- a/clang/lib/Driver/ToolChains/ZOS.h +++ b/clang/lib/Driver/ToolChains/ZOS.h @@ -82,8 +82,7 @@ class LLVM_LIBRARY_VISIBILITY ZOS : public ToolChain { void addClangTargetOptions( const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, - llvm::StringRef BoundArch, - Action::OffloadKind DeviceOffloadingKind) const override; + BoundArch BA, Action::OffloadKind DeviceOffloadingKind) const override; const char *getDefaultLinker() const override { return "/bin/ld"; } diff --git a/clang/test/Driver/hip-link-bundle-archive.hip b/clang/test/Driver/hip-link-bundle-archive.hip index 6606e19790a52..f1902957fbc13 100644 --- a/clang/test/Driver/hip-link-bundle-archive.hip +++ b/clang/test/Driver/hip-link-bundle-archive.hip @@ -68,19 +68,19 @@ // RUN: -nogpuinc -nogpulib %s -fgpu-rdc %t/hipBundled2.lib \ // RUN: 2>&1 | FileCheck -check-prefix=MSVC %s -// GNU1: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB:libhipBundled\.a]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles" -// GNU2: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB:libhipBundled\.a\.5\.2]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles" -// GNU: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx1030" {{.*}} "[[A1030]]" -// GNU: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx906" "-output=[[A906:.*\.a]]" "-allow-missing-bundles" +// GNU1: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB:libhipBundled\.a]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx906" "-output=[[A906:.*\.a]]" "-allow-missing-bundles" +// GNU2: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB:libhipBundled\.a\.5\.2]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx906" "-output=[[A906:.*\.a]]" "-allow-missing-bundles" // GNU: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx906" {{.*}} "[[A906]]" +// GNU: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}[[LIB]]" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles" +// GNU: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx1030" {{.*}} "[[A1030]]" // GNU-L: "{{.*}}ld{{.*}}" {{.*}}"-o" "a.out" {{.*}}"-lhipBundled" // GNU-LA: "{{.*}}ld{{.*}}" {{.*}}"-o" "a.out" {{.*}}"-l:libhipBundled.a" // GNU-A: "{{.*}}ld{{.*}}" {{.*}}"-o" "a.out" "{{.*}}[[LIB]]" // NONARCHIVE-NOT: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*libNonArchive\.a}}" // NONE-NOT: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*NoneExist\.a}}" -// MSVC: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}hipBundled2.lib" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles" -// MSVC: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx1030" {{.*}} "[[A1030]]" // MSVC: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}hipBundled2.lib" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx906" "-output=[[A906:.*\.a]]" "-allow-missing-bundles" // MSVC: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx906" {{.*}} "[[A906]]" +// MSVC: "{{.*}}clang-offload-bundler" "-unbundle" "-type=a" "-input={{.*}}hipBundled2.lib" "-targets=hip-amdgcn-amd-amdhsa-unknown-gfx1030" "-output=[[A1030:.*\.a]]" "-allow-missing-bundles" +// MSVC: "{{.*}}lld{{.*}}" {{.*}}"-plugin-opt=mcpu=gfx1030" {{.*}} "[[A1030]]" // MSVC: "{{.*}}link{{.*}}" {{.*}}"-out:a.exe" {{.*}}hipBundled2.lib" diff --git a/clang/test/Driver/hip-phases.hip b/clang/test/Driver/hip-phases.hip index 4554519960133..c63f1259c4414 100644 --- a/clang/test/Driver/hip-phases.hip +++ b/clang/test/Driver/hip-phases.hip @@ -257,8 +257,8 @@ // DBIN-DAG: [[P4:[0-9]+]]: assembler, {[[P3]]}, object, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P5:[0-9]+]]: linker, {[[P4]]}, image, (device-[[T]], [[ARCH]]) // DBIN-DAG: [[P6:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P5]]}, image -// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip, ) -// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P7]]}, hip-fatbin +// DBIN-DAG: [[P7:[0-9]+]]: linker, {[[P6]]}, hip-fatbin, (device-hip) +// DBIN-DAG: [[P8:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P7]]}, hip-fatbin // DBIN-NOT: host // @@ -357,8 +357,8 @@ // DBIN2-DAG: [[P11:[0-9]+]]: assembler, {[[P10]]}, object, (device-[[T]], [[ARCH2]]) // DBIN2-DAG: [[P12:[0-9]+]]: linker, {[[P11]]}, image, (device-[[T]], [[ARCH2]]) // DBIN2-DAG: [[P13:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P12]]}, image -// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip, ) -// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:)" {[[P14]]}, hip-fatbin +// DBIN2-DAG: [[P14:[0-9]+]]: linker, {[[P6]], [[P13]]}, hip-fatbin, (device-hip) +// DBIN2-DAG: [[P15:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa)" {[[P14]]}, hip-fatbin // DBIN2-NOT: host // @@ -391,8 +391,8 @@ // DASM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, assembler, (device-[[T]], [[ARCH2]]) // DASM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, assembler -// DASM2-BUNDLE: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, ) -// DASM2-NOBUNDLE-NOT: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip, ) +// DASM2-BUNDLE: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip) +// DASM2-NOBUNDLE-NOT: clang-offload-bundler, {[[P4]], [[P9]]}, assembler, (device-hip) // DASM2-NOT: host // @@ -518,8 +518,8 @@ // PPE-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) // PPE-DAG: [[P1:[0-9]+]]: preprocessor, {[[P0]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH]]) // PPE-DAG: [[P2:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH]])" {[[P1]]}, [[T]]-cpp-output -// PPEB-DAG: [[P3:[0-9]+]]: clang-offload-bundler, {[[P2]]}, [[T]]-cpp-output, (device-hip, ) -// PPEN-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, ) +// PPEB-DAG: [[P3:[0-9]+]]: clang-offload-bundler, {[[P2]]}, [[T]]-cpp-output, (device-hip) +// PPEN-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip) // PPE-NOT: host // PPE2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) @@ -528,8 +528,8 @@ // PPE2-DAG: [[P5:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T]], (device-[[T]], [[ARCH2:gfx900]]) // PPE2-DAG: [[P6:[0-9]+]]: preprocessor, {[[P5]]}, [[T]]-cpp-output, (device-[[T]], [[ARCH2]]) // PPE2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P6]]}, [[T]]-cpp-output -// PPE2B-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P2]], [[P9]]}, [[T]]-cpp-output, (device-hip, ) -// PPE2N-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip, ) +// PPE2B-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P2]], [[P9]]}, [[T]]-cpp-output, (device-hip) +// PPE2N-NOT: clang-offload-bundler, {{.*}}, [[T]]-cpp-output, (device-hip) // PPE2-NOT: host // LLVM-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]], (device-[[T]], [[ARCH:gfx803]]) @@ -549,7 +549,7 @@ // LLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P6]]}, ir, (device-[[T]], [[ARCH2]]) // LLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) // LLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir -// LLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, ) +// LLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip) // LLVM2-NOT: host // PPELLVM2-DAG: [[P0:[0-9]+]]: input, "{{.*}}hip-phases.hip", [[T:hip]]-cpp-output @@ -560,7 +560,7 @@ // PPELLVM2-DAG: [[P7:[0-9]+]]: compiler, {[[P1]]}, ir, (device-[[T]], [[ARCH2:gfx900]]) // PPELLVM2-DAG: [[P8:[0-9]+]]: backend, {[[P7]]}, ir, (device-[[T]], [[ARCH2]]) // PPELLVM2-DAG: [[P9:[0-9]+]]: offload, "device-[[T]] (amdgcn-amd-amdhsa:[[ARCH2]])" {[[P8]]}, ir -// PPELLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip, ) +// PPELLVM2-DAG: [[P10:[0-9]+]]: clang-offload-bundler, {[[P4]], [[P9]]}, ir, (device-hip) // PPELLVM2-NOT: host // Test mixed HIP and C++ compilation. HIP program should have HIP offload kind. diff --git a/clang/test/Driver/hip-target-id.hip b/clang/test/Driver/hip-target-id.hip index 1a1363d577d27..b7889b1634404 100644 --- a/clang/test/Driver/hip-target-id.hip +++ b/clang/test/Driver/hip-target-id.hip @@ -65,4 +65,4 @@ // RUN: --offload-arch=gfx906 \ // RUN: --no-offload-new-driver --rocm-path=%S/Inputs/rocm \ // RUN: %s 2>&1 | FileCheck -check-prefix=MULTI %s -// MULTI: "-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx900:xnack+,hipv4-amdgcn-amd-amdhsa--gfx900:xnack-,hipv4-amdgcn-amd-amdhsa--gfx906,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc+,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc-" +// MULTI: "-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx900:xnack+,hipv4-amdgcn-amd-amdhsa--gfx900:xnack-,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc+,hipv4-amdgcn-amd-amdhsa--gfx908:sramecc-,hipv4-amdgcn-amd-amdhsa--gfx906" diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip b/clang/test/Driver/hip-toolchain-no-rdc.hip index a97a25a7b05a8..6f148ce01ff30 100644 --- a/clang/test/Driver/hip-toolchain-no-rdc.hip +++ b/clang/test/Driver/hip-toolchain-no-rdc.hip @@ -231,14 +231,14 @@ // Check mixed AMDGCNSPIRV and concrete GPU arch. // +// AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" {{.*}} "-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]" +// AMDGCNSPIRV: {{".*lld.*"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_OBJ]]" // AMDGCNSPIRV: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-llvm-bc" {{.*}}"-flto=full"{{.*}} "-fembed-bitcode=marker" "-disable-llvm-passes" {{.*}} "-o" "[[AMDGCNSPV_BC:.*bc]]" // AMDGCNSPIRV: {{".*llvm-link.*"}} "-o" "[[AMDGCNSPV_TMP:.*bc]]" "[[AMDGCNSPV_BC]]" // AMDGCNSPIRV: {{".*llvm-spirv.*"}} "--spirv-max-version=1.6" "--spirv-ext=+all" {{.*}} "[[AMDGCNSPV_TMP]]" {{.*}}"-o" "[[AMDGCNSPV_CO:.*out]]" -// AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" {{.*}} "-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]" -// AMDGCNSPIRV: {{".*lld.*"}} {{.*}}"-plugin-opt=mcpu=gfx900" {{.*}} "-o" "[[GFX900_CO:.*out]]" {{.*}}"[[GFX900_OBJ]]" // AMDGCNSPIRV: {{".*clang-offload-bundler.*"}} "-type=o" -// AMDGCNSPIRV-SAME: "-targets={{.*}}hip-spirv64-amd-amdhsa--amdgcnspirv,hip-amdgcn-amd-amdhsa--gfx900" -// AMDGCNSPIRV-SAME: "-input=[[AMDGCNSPV_CO]]" "-input=[[GFX900_CO]]" +// AMDGCNSPIRV-SAME: "-targets={{.*}}hipv4-amdgcn-amd-amdhsa--gfx900,hipv4-spirv64-amd-amdhsa--amdgcnspirv" +// AMDGCNSPIRV-SAME: "-input=[[GFX900_CO]]" "-input=[[AMDGCNSPV_CO]]" // AMDGCNSPIRV-NEW: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-llvm-bc" {{.*}} "-o" "[[AMDGCNSPV_BC:[^"]*]]" // Check verbose printing with the new driver. diff --git a/clang/unittests/Driver/DXCModeTest.cpp b/clang/unittests/Driver/DXCModeTest.cpp index 130da620b40b5..300edf6ff7145 100644 --- a/clang/unittests/Driver/DXCModeTest.cpp +++ b/clang/unittests/Driver/DXCModeTest.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/DiagnosticIDs.h" #include "clang/Basic/DiagnosticOptions.h" #include "clang/Basic/LLVM.h" +#include "clang/Basic/OffloadArch.h" #include "clang/Basic/TargetOptions.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/CreateInvocationFromArgs.h" @@ -129,7 +130,7 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DAL->append(A); std::unique_ptr TranslatedArgs{ - TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)}; + TC.TranslateArgs(*DAL, BoundArch("0"), Action::OffloadKind::OFK_None)}; EXPECT_NE(TranslatedArgs, nullptr); if (TranslatedArgs) { auto *A = @@ -150,7 +151,7 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DAL->append(A); TranslatedArgs.reset( - TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)); + TC.TranslateArgs(*DAL, BoundArch("0"), Action::OffloadKind::OFK_None)); EXPECT_EQ(Diags.getNumErrors(), 1u); EXPECT_STREQ( DiagConsumer->Errors.back().c_str(), @@ -166,7 +167,7 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DAL->append(A); TranslatedArgs.reset( - TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)); + TC.TranslateArgs(*DAL, BoundArch("0"), Action::OffloadKind::OFK_None)); EXPECT_EQ(Diags.getNumErrors(), 2u); EXPECT_STREQ(DiagConsumer->Errors.back().c_str(), "invalid validator version : 1; format of validator version is " @@ -181,7 +182,7 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DAL->append(A); TranslatedArgs.reset( - TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)); + TC.TranslateArgs(*DAL, BoundArch("0"), Action::OffloadKind::OFK_None)); EXPECT_EQ(Diags.getNumErrors(), 3u); EXPECT_STREQ( DiagConsumer->Errors.back().c_str(), @@ -197,7 +198,7 @@ TEST(DxcModeTest, ValidatorVersionValidation) { DAL->append(A); TranslatedArgs.reset( - TC.TranslateArgs(*DAL, "0", Action::OffloadKind::OFK_None)); + TC.TranslateArgs(*DAL, BoundArch("0"), Action::OffloadKind::OFK_None)); EXPECT_EQ(Diags.getNumErrors(), 4u); EXPECT_STREQ( DiagConsumer->Errors.back().c_str(), From ed84dca890f658ce9c5bbedb2f8a9ce498ec5d9a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 22 Jun 2026 17:33:21 +0200 Subject: [PATCH 027/511] [InstCombine] Try folding align assumes with unkown offset (#204962) There are a few folds which don't depend on the offset of the alignment, but are nevertheless guarded on whether the offset is known. Run these folds unconditionally instead. --- llvm/include/llvm/IR/BundleAttributes.h | 1 + llvm/lib/Analysis/ValueTracking.cpp | 2 +- llvm/lib/IR/BundleAttributes.cpp | 6 +++-- .../InstCombine/InstCombineCalls.cpp | 9 ++++--- llvm/test/Transforms/InstCombine/assume.ll | 26 +++++++++++++++++++ 5 files changed, 38 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/IR/BundleAttributes.h b/llvm/include/llvm/IR/BundleAttributes.h index a456be4aa7a9a..dd43d4ed276ea 100644 --- a/llvm/include/llvm/IR/BundleAttributes.h +++ b/llvm/include/llvm/IR/BundleAttributes.h @@ -30,6 +30,7 @@ inline BundleAttr getBundleAttrFromOBU(OperandBundleUse OBU) { struct AssumeAlignInfo { const Use &Ptr; const Use &Alignment; + const Use *Offset; std::optional AlignmentVal; std::optional OffsetVal; }; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 9334551005224..70345319ed57d 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1075,7 +1075,7 @@ void llvm::computeKnownBitsFromContext(const Value *V, KnownBits &Known, if (Elem.Index != AssumptionCache::ExprResultIdx) { if (auto OBU = I->getOperandBundleAt(Elem.Index); getBundleAttrFromOBU(OBU) == BundleAttr::Align) { - auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU); + auto [Ptr, _, _2, Alignment, Offset] = getAssumeAlignInfo(OBU); if (Ptr == V && Alignment && Offset && isPowerOf2_64(*Alignment) && isValidAssumeForContext(I, Q)) { Known.Zero |= (*Alignment - 1) & ~*Offset; diff --git a/llvm/lib/IR/BundleAttributes.cpp b/llvm/lib/IR/BundleAttributes.cpp index 2beb57c6dd710..4a6ed18714c2c 100644 --- a/llvm/lib/IR/BundleAttributes.cpp +++ b/llvm/lib/IR/BundleAttributes.cpp @@ -39,10 +39,12 @@ BundleAttr llvm::getBundleAttrFromID(uint32_t ID) { AssumeAlignInfo llvm::getAssumeAlignInfo(OperandBundleUse OBU) { assert(OBU.getTagID() == LLVMContext::OB_Align && OBU.Inputs.size() >= 2 && OBU.Inputs.size() <= 3); - AssumeAlignInfo Ret{OBU.Inputs[0], OBU.Inputs[1], std::nullopt, std::nullopt}; + AssumeAlignInfo Ret{OBU.Inputs[0], OBU.Inputs[1], nullptr, std::nullopt, + std::nullopt}; if (auto *Align = dyn_cast(OBU.Inputs[1])) Ret.AlignmentVal = Align->getZExtValue(); if (OBU.Inputs.size() == 3) { + Ret.Offset = &OBU.Inputs[2]; if (auto *Offset = dyn_cast(OBU.Inputs[2])) Ret.OffsetVal = Offset->getZExtValue(); } else { @@ -83,7 +85,7 @@ bool llvm::assumeBundleImpliesNonNull(const Value *Val, const Function *Context, OperandBundleUse OBU) { switch (getBundleAttrFromOBU(OBU)) { case BundleAttr::Align: { - auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU); + auto [Ptr, _, _2, Alignment, Offset] = getAssumeAlignInfo(OBU); return Ptr == Val && Alignment && Offset && isPowerOf2_64(*Alignment) && *Offset % *Alignment != 0; } diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 6992602f54fe9..ce9e4b836a56e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3701,9 +3701,9 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { llvm_unreachable("Unexpected Attribute"); case BundleAttr::Align: { // Try to remove redundant alignment assumptions. - auto [Ptr, _, Alignment, Offset] = getAssumeAlignInfo(OBU); + auto [Ptr, _, OffsetPtr, Alignment, Offset] = getAssumeAlignInfo(OBU); - if (!Alignment || !Offset) + if (!Alignment) break; // Remove align 1 and non-power-of-two bundles; they don't add any @@ -3716,10 +3716,13 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { GEP->getMaxPreservedAlignment(getDataLayout()) >= *Alignment) { Builder.CreateAlignmentAssumption( getDataLayout(), GEP->getPointerOperand(), *Alignment, - *Offset == 0 ? nullptr : Builder.getInt64(*Offset)); + OffsetPtr ? const_cast(OffsetPtr->get()) : nullptr); return RemoveBundle(); } + if (!Offset) + break; + Value *BasePtr; const APInt *PtrOffset; if (match(Ptr.get(), m_PtrAdd(m_Value(BasePtr), m_APInt(PtrOffset)))) { diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 2d4d51b8bbe4b..69220811ac206 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -199,6 +199,16 @@ define void @align_on_gep_keeping_alignment(ptr %ptr, i64 %offset) { ret void } +define void @align_on_gep_keeping_alignment_variable_offset(ptr %ptr, i64 %offset, i64 %offset2) { +; CHECK-LABEL: @align_on_gep_keeping_alignment_variable_offset( +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 8, i64 [[OFFSET2:%.*]]) ] +; CHECK-NEXT: ret void +; + %ptr2 = getelementptr [8 x i8], ptr %ptr, i64 %offset + call void @llvm.assume(i1 true) [ "align"(ptr %ptr2, i64 8, i64 %offset2) ] + ret void +} + define void @align_on_gep_not_keeping_alignment(ptr %ptr, i64 %offset) { ; CHECK-LABEL: @align_on_gep_not_keeping_alignment( ; CHECK-NEXT: [[PTR2:%.*]] = getelementptr [4 x i8], ptr [[PTR:%.*]], i64 [[OFFSET:%.*]] @@ -242,6 +252,22 @@ define void @non_power_of_two_align(ptr %ptr) { ret void } +define void @non_power_of_two_align_variable_offset(ptr %ptr, i64 %i) { +; CHECK-LABEL: @non_power_of_two_align_variable_offset( +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 3, i64 %i) ] + ret void +} + +define void @align_1_variable_offset(ptr %ptr, i64 %i) { +; CHECK-LABEL: @align_1_variable_offset( +; CHECK-NEXT: ret void +; + call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 1, i64 %i) ] + ret void +} + ; Same check as in @foo1, but make sure it works if the assume is first too. define i32 @foo2(ptr %a) #0 { From 7b41b5c5e3cd3939e7932fa14c585eeb7680ce62 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 22 Jun 2026 10:38:22 -0500 Subject: [PATCH 028/511] [flang][OpenMP] Refactor semantic check of SINGLE construct (#204339) Extract it into a separate function and simplify the code. Avoid making the distinction between a clause appearing on the "begin" and the "end" directives for the purposes of emitting diagnostic messages. One change in behavior is that using the same list item multiple times in COPYPRIVATE clause(s) is an error regardless of the placement of the clauses. Previously in some cases it was treated as a warning. Part of the motivation is the goal of eliminating explicit definitions of end-directives for directives that are not delimited, e.g. "end single", but not "end declare_variant". --- flang/lib/Semantics/check-omp-structure.cpp | 130 ++++++++++-------- flang/lib/Semantics/check-omp-structure.h | 1 + .../Semantics/OpenMP/clause-validity01.f90 | 3 +- flang/test/Semantics/OpenMP/single03.f90 | 13 +- flang/test/Semantics/OpenMP/single04.f90 | 33 ++--- 5 files changed, 93 insertions(+), 87 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index b61662995ad57..e643ceb25ce34 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -1283,63 +1283,7 @@ void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) { } if (GetContext().directive == llvm::omp::Directive::OMPD_single) { - std::set singleCopyprivateSyms; - std::set endSingleCopyprivateSyms; - bool foundNowait{false}; - parser::CharBlock NowaitSource; - - auto catchCopyPrivateNowaitClauses = [&](const auto &dirSpec, bool isEnd) { - for (auto &clause : dirSpec.Clauses().v) { - if (clause.Id() == llvm::omp::Clause::OMPC_copyprivate) { - for (const auto &ompObject : GetOmpObjectList(clause)->v) { - const auto *name{parser::Unwrap(ompObject)}; - if (Symbol * symbol{name->symbol}) { - if (singleCopyprivateSyms.count(symbol)) { - if (isEnd) { - context_.Warn(common::UsageWarning::OpenMPUsage, name->source, - "The COPYPRIVATE clause with '%s' is already used on the SINGLE directive"_warn_en_US, - name->ToString()); - } else { - context_.Say(name->source, - "'%s' appears in more than one COPYPRIVATE clause on the SINGLE directive"_err_en_US, - name->ToString()); - } - } else if (endSingleCopyprivateSyms.count(symbol)) { - context_.Say(name->source, - "'%s' appears in more than one COPYPRIVATE clause on the END SINGLE directive"_err_en_US, - name->ToString()); - } else { - if (isEnd) { - endSingleCopyprivateSyms.insert(symbol); - } else { - singleCopyprivateSyms.insert(symbol); - } - } - } - } - } else if (clause.Id() == llvm::omp::Clause::OMPC_nowait) { - if (foundNowait) { - context_.Say(clause.source, - "At most one NOWAIT clause can appear on the SINGLE directive"_err_en_US); - } else { - foundNowait = !isEnd; - } - if (!NowaitSource.ToString().size()) { - NowaitSource = clause.source; - } - } - } - }; - catchCopyPrivateNowaitClauses(beginSpec, false); - if (endSpec) { - catchCopyPrivateNowaitClauses(*endSpec, true); - } - unsigned version{context_.langOptions().OpenMPVersion}; - if (version <= 52 && NowaitSource.ToString().size() && - (singleCopyprivateSyms.size() || endSingleCopyprivateSyms.size())) { - context_.Say(NowaitSource, - "NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive"_err_en_US); - } + CheckSingleConstruct(x); } switch (beginSpec.DirId()) { @@ -1387,6 +1331,78 @@ void OmpStructureChecker::Enter(const parser::OmpBlockConstruct &x) { } } +void OmpStructureChecker::CheckSingleConstruct( + const parser::OmpBlockConstruct &x) { + const parser::OmpDirectiveSpecification &beginSpec{x.BeginDir()}; + unsigned version{context_.langOptions().OpenMPVersion}; + SymbolSourceMap copyPrivateSyms; + parser::CharBlock nowaitSource1, nowaitSource2; + + auto catchCopyPrivateNowaitClauses = + [&](const parser::OmpDirectiveSpecification &spec, + parser::CharBlock &nowaitSource) { + for (auto &clause : spec.Clauses().v) { + llvm::omp::Clause clauseId{clause.Id()}; + if (clauseId == llvm::omp::Clause::OMPC_copyprivate) { + GetSymbolsInObjectList(*GetOmpObjectList(clause), copyPrivateSyms); + } else if (clauseId == llvm::omp::Clause::OMPC_nowait) { + if (nowaitSource.empty()) { + nowaitSource = clause.source; + } + } + } + }; + + catchCopyPrivateNowaitClauses(beginSpec, nowaitSource1); + if (auto &endSpec{x.EndDir()}) { + catchCopyPrivateNowaitClauses(*endSpec, nowaitSource2); + } + + std::string nowaitName{// + GetUpperName(llvm::omp::Clause::OMPC_nowait, version)}; + std::string copyPrivateName{ + GetUpperName(llvm::omp::Clause::OMPC_copyprivate, version)}; + std::string singleName{ + GetUpperName(llvm::omp::Directive::OMPD_single, version)}; + + std::pair last{nullptr, {}}; + bool reported{false}; + + for (auto [symbol, source] : copyPrivateSyms) { + if (symbol == last.first) { + if (!reported) { + context_ + .Say(source, "'%s' appears more than once in a %s clause"_err_en_US, + symbol->name().ToString(), copyPrivateName) + .Attach(last.second, "Previous occurrence of '%s'"_en_US, + symbol->name().ToString()); + reported = true; + } + } else { + reported = false; + } + last = std::make_pair(symbol, source); + } + + if (!nowaitSource1.empty() && !nowaitSource2.empty()) { + context_ + .Say(nowaitSource2, + // Match the message text with the one emitted by "CheckAllowed". + "At most one %s clause can appear on the %s directive"_err_en_US, + nowaitName, singleName) + .Attach(nowaitSource1, "Previous occurrence of %s"_en_US, nowaitName); + } + + if (version <= 52 && !copyPrivateSyms.empty() && + (!nowaitSource1.empty() || !nowaitSource2.empty())) { + parser::CharBlock source{ + !nowaitSource1.empty() ? nowaitSource1 : nowaitSource2}; + context_.Say(source, + "%s clause must not be used with %s clause on %s directive"_err_en_US, + nowaitName, copyPrivateName, singleName); + } +} + void OmpStructureChecker::CheckMasterNesting( const parser::OmpBlockConstruct &x) { // A MASTER region may not be `closely nested` inside a worksharing, loop, diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 1154fce5e00a7..1283feb32ef5f 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -345,6 +345,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase { const parser::OmpReductionIdentifier &ident); void CheckReductionModifier(const parser::OmpReductionModifier &); void CheckLastprivateModifier(const parser::OmpLastprivateModifier &); + void CheckSingleConstruct(const parser::OmpBlockConstruct &x); void CheckMasterNesting(const parser::OmpBlockConstruct &x); void ChecksOnOrderedAsBlock(); void CheckBarrierNesting(const parser::OpenMPSimpleStandaloneConstruct &x); diff --git a/flang/test/Semantics/OpenMP/clause-validity01.f90 b/flang/test/Semantics/OpenMP/clause-validity01.f90 index bd8b9f343de77..ec89afd53ab0d 100644 --- a/flang/test/Semantics/OpenMP/clause-validity01.f90 +++ b/flang/test/Semantics/OpenMP/clause-validity01.f90 @@ -330,12 +330,11 @@ !$omp parallel b = 1 !ERROR: LASTPRIVATE clause is not allowed on the SINGLE directive - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single private(a) lastprivate(c) nowait a = 3.14 !ERROR: COPYPRIVATE variable 'a' may not appear on a PRIVATE or FIRSTPRIVATE clause on a SINGLE construct !ERROR: At most one NOWAIT clause can appear on the SINGLE directive - !ERROR: At most one NOWAIT clause can appear on the SINGLE directive !ERROR: At most one NOWAIT clause can appear on the END SINGLE directive !$omp end single copyprivate(a) nowait nowait c = 2 diff --git a/flang/test/Semantics/OpenMP/single03.f90 b/flang/test/Semantics/OpenMP/single03.f90 index e64155c845c86..64fedc414829b 100644 --- a/flang/test/Semantics/OpenMP/single03.f90 +++ b/flang/test/Semantics/OpenMP/single03.f90 @@ -1,17 +1,12 @@ ! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=52 ! -! OpenMP Version 5.2 -! -! 2.10.2 single Construct -! Copyprivate and Nowait clauses are allowed in both clause and end clause - subroutine omp_single integer, save :: i integer :: j i = 10; j = 11 !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single copyprivate(i) nowait print *, "omp single", i !$omp end single @@ -23,7 +18,7 @@ subroutine omp_single !$omp end parallel !$omp parallel - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single nowait print *, "omp single", i !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context @@ -32,14 +27,14 @@ subroutine omp_single !ERROR: COPYPRIVATE variable 'i' is not PRIVATE or THREADPRIVATE in outer context !$omp single copyprivate(i) print *, "omp single", i - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp end single nowait !ERROR: COPYPRIVATE variable 'j' may not appear on a PRIVATE or FIRSTPRIVATE clause on a SINGLE construct !$omp single private(j) copyprivate(j) print *, "omp single", j !ERROR: COPYPRIVATE variable 'j' may not appear on a PRIVATE or FIRSTPRIVATE clause on a SINGLE construct - !WARNING: The COPYPRIVATE clause with 'j' is already used on the SINGLE directive [-Wopenmp-usage] + !ERROR: 'j' appears more than once in a COPYPRIVATE clause !$omp end single copyprivate(j) !$omp single nowait diff --git a/flang/test/Semantics/OpenMP/single04.f90 b/flang/test/Semantics/OpenMP/single04.f90 index 7daa74ab62218..58d6e6f0582cb 100644 --- a/flang/test/Semantics/OpenMP/single04.f90 +++ b/flang/test/Semantics/OpenMP/single04.f90 @@ -1,10 +1,5 @@ ! RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=52 ! -! OpenMP Version 5.2 -! -! 2.10.2 single Construct -! Valid and invalid testcases for copyprivate and nowait clause on the single directive - program single ! Valid testcases !$omp single @@ -26,17 +21,17 @@ program single ! Invalid testcases !$omp single print *, x - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp end single copyprivate(x) nowait - !ERROR: 'x' appears in more than one COPYPRIVATE clause on the SINGLE directive + !ERROR: 'x' appears more than once in a COPYPRIVATE clause !$omp single copyprivate(x) copyprivate(x) print *, x !$omp end single !$omp single print *, x - !ERROR: 'x' appears in more than one COPYPRIVATE clause on the END SINGLE directive + !ERROR: 'x' appears more than once in a COPYPRIVATE clause !$omp end single copyprivate(x) copyprivate(x) !ERROR: At most one NOWAIT clause can appear on the SINGLE directive @@ -49,33 +44,33 @@ program single !ERROR: At most one NOWAIT clause can appear on the END SINGLE directive !$omp end single nowait nowait - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single copyprivate(x) nowait print *, x - !WARNING: The COPYPRIVATE clause with 'x' is already used on the SINGLE directive [-Wopenmp-usage] + !ERROR: 'x' appears more than once in a COPYPRIVATE clause !ERROR: At most one NOWAIT clause can appear on the SINGLE directive !$omp end single copyprivate(x) nowait !$omp single copyprivate(x) print *, x - !WARNING: The COPYPRIVATE clause with 'x' is already used on the SINGLE directive [-Wopenmp-usage] - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: 'x' appears more than once in a COPYPRIVATE clause + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp end single copyprivate(x) nowait - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single copyprivate(x, y) nowait print *, x - !WARNING: The COPYPRIVATE clause with 'x' is already used on the SINGLE directive [-Wopenmp-usage] - !ERROR: 'z' appears in more than one COPYPRIVATE clause on the END SINGLE directive + !ERROR: 'x' appears more than once in a COPYPRIVATE clause + !ERROR: 'z' appears more than once in a COPYPRIVATE clause !ERROR: At most one NOWAIT clause can appear on the SINGLE directive !$omp end single copyprivate(x, z) copyprivate(z) nowait - !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on the SINGLE directive + !ERROR: NOWAIT clause must not be used with COPYPRIVATE clause on SINGLE directive !$omp single copyprivate(x) nowait copyprivate(y) copyprivate(z) print *, x - !WARNING: The COPYPRIVATE clause with 'x' is already used on the SINGLE directive [-Wopenmp-usage] - !WARNING: The COPYPRIVATE clause with 'y' is already used on the SINGLE directive [-Wopenmp-usage] - !WARNING: The COPYPRIVATE clause with 'z' is already used on the SINGLE directive [-Wopenmp-usage] + !ERROR: 'x' appears more than once in a COPYPRIVATE clause + !ERROR: 'y' appears more than once in a COPYPRIVATE clause + !ERROR: 'z' appears more than once in a COPYPRIVATE clause !ERROR: At most one NOWAIT clause can appear on the SINGLE directive !$omp end single copyprivate(x, y, z) nowait end program From 0c98a421e247b4d76c11f0d4ca4d549ac663abff Mon Sep 17 00:00:00 2001 From: earnol Date: Mon, 22 Jun 2026 11:39:23 -0400 Subject: [PATCH 029/511] [clang-tidy] Fix bugprone-misplaced-widening-cast false positive on bitfield assignments (#197554) When CheckImplicitCasts=true, the checker used the declared type of a bitfield insread of the actual bitfield width to determine if widening occurs. This caused false positives when assigning to a bitfield whose declared type is wider than the source, but whose bitfield width actually matches the source type. This behavior is fixed. This PR fixes https://github.com/llvm/llvm-project/issues/197261 issue. Co-authored-by: Vladislav Aranov --- .../bugprone/MisplacedWideningCastCheck.cpp | 26 +- clang-tools-extra/docs/ReleaseNotes.rst | 6 + .../misplaced-widening-cast-bitfield.cpp | 289 ++++++++++++++++++ 3 files changed, 318 insertions(+), 3 deletions(-) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/bugprone/misplaced-widening-cast-bitfield.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp index f040235322a4f..61bbbf8af2abd 100644 --- a/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/MisplacedWideningCastCheck.cpp @@ -42,7 +42,14 @@ void MisplacedWideningCastCheck::registerMatchers(MatchFinder *Finder) { Finder->addMatcher(varDecl(hasInitializer(Cast)), this); Finder->addMatcher(returnStmt(hasReturnValue(Cast)), this); Finder->addMatcher(callExpr(hasAnyArgument(Cast)), this); - Finder->addMatcher(binaryOperator(hasOperatorName("="), hasRHS(Cast)), this); + // When assigning to a bit field, bind the FieldDecl so check() can use the + // actual bit width instead of the declared type width. + Finder->addMatcher( + binaryOperator(hasOperatorName("="), + hasLHS(expr(optionally(memberExpr(hasDeclaration( + fieldDecl(isBitField()).bind("BitField")))))), + hasRHS(Cast)), + this); Finder->addMatcher( binaryOperator(isComparisonOperator(), hasEitherOperand(Cast)), this); } @@ -195,14 +202,27 @@ void MisplacedWideningCastCheck::check(const MatchFinder::MatchResult &Result) { const QualType CastType = Cast->getType(); const QualType CalcType = Calc->getType(); + // If assigning to a bit field, use the bit field width as the effective + // target width. The declared type may be wider than the actual bit field + // storage. + unsigned TargetWidth = Context.getIntWidth(CastType); + bool IsBitfieldAssign = false; + if (const auto *FD = Result.Nodes.getNodeAs("BitField")) { + TargetWidth = FD->getBitWidthValue(); + IsBitfieldAssign = true; + } + // Explicit truncation using cast. - if (Context.getIntWidth(CastType) < Context.getIntWidth(CalcType)) + if (TargetWidth < Context.getIntWidth(CalcType)) return; // If CalcType and CastType have same size then there is no real danger, but // there can be a portability problem. - if (Context.getIntWidth(CastType) == Context.getIntWidth(CalcType)) { + if (TargetWidth == Context.getIntWidth(CalcType)) { + // Bit field width is fixed across platforms — no portability concern. + if (IsBitfieldAssign) + return; const auto *CastBuiltinType = dyn_cast(CastType->getUnqualifiedDesugaredType()); const auto *CalcBuiltinType = diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index d167f5e90f88a..bbd4f8f5b3c58 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -407,6 +407,12 @@ Changes in existing checks ` check by printing the macro definition in the warning message if the macro is defined on command line. +- Improved :doc:`bugprone-misplaced-widening-cast + ` check by fixing a false + positive on bit field assignments when the `CheckImplicitCasts` option is + enabled. The check now uses the actual bit field width instead of the + declared type to determine if widening occurs. + - Improved :doc:`bugprone-move-forwarding-reference ` check by fixing some false positives in the context of moved lambda captures. diff --git a/clang-tools-extra/test/clang-tidy/checkers/bugprone/misplaced-widening-cast-bitfield.cpp b/clang-tools-extra/test/clang-tidy/checkers/bugprone/misplaced-widening-cast-bitfield.cpp new file mode 100644 index 0000000000000..c6b8cc9f8a8dd --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/bugprone/misplaced-widening-cast-bitfield.cpp @@ -0,0 +1,289 @@ +// RUN: %check_clang_tidy %s bugprone-misplaced-widening-cast %t -- \ +// RUN: -config="{CheckOptions: {bugprone-misplaced-widening-cast.CheckImplicitCasts: true}}" \ +// RUN: -- -target x86_64-unknown-unknown +// RUN: %check_clang_tidy %s bugprone-misplaced-widening-cast %t -- \ +// RUN: -config="{CheckOptions: {bugprone-misplaced-widening-cast.CheckImplicitCasts: true}}" \ +// RUN: -- -target i386-unknown-unknown + +// Tests rely on specific type sizes: +// unsigned int = 32, unsigned short = 16, unsigned char = 8, +// unsigned long = 64, unsigned long long = 64 bits. + +struct BitfieldHeader { + unsigned long long field32 : 32; + unsigned long field16 : 16; + unsigned int field8 : 8; + unsigned long long field40 : 40; + unsigned long long field24 : 24; + long long sfield32 : 32; + long sfield16 : 16; +}; + +// --- Implicit casts: no widening cases --- + +// 32-bit bit field from unsigned int (32-bit) — no widening. +void bitfield32_shift(unsigned int size) { + struct BitfieldHeader h = {}; + h.field32 = size << 1U; +} + +void bitfield32_multiply(unsigned int size) { + struct BitfieldHeader h = {}; + h.field32 = size * 2U; +} + +void bitfield32_add(unsigned int size) { + struct BitfieldHeader h = {}; + h.field32 = size + 1U; +} + +void bitfield32_subtract(unsigned int size) { + struct BitfieldHeader h = {}; + h.field32 = size - 1U; +} + +void bitfield32_not(unsigned int size) { + struct BitfieldHeader h = {}; + h.field32 = ~size; +} + +// 16-bit bit field from unsigned short (16-bit) — no widening. +// Note: integer promotion makes CalcType 'int' (32-bit), but bit field is 16-bit, +// so this is truncation, not widening. No warning expected. +void bitfield16_shift(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = size << 1; +} + +void bitfield16_multiply(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = size * 2; +} + +void bitfield16_add(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = size + 1; +} + +void bitfield16_subtract(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = size - 1; +} + +void bitfield16_not(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = ~size; +} + +// 8-bit bit field from unsigned char (8-bit) — no widening. +// Same: integer promotion makes CalcType 'int' (32-bit), bit field is 8-bit = truncation. +void bitfield8_shift(unsigned char size) { + struct BitfieldHeader h = {}; + h.field8 = size << 1; +} + +void bitfield8_multiply(unsigned char size) { + struct BitfieldHeader h = {}; + h.field8 = size * 2; +} + +void bitfield8_add(unsigned char size) { + struct BitfieldHeader h = {}; + h.field8 = size + 1; +} + +void bitfield8_subtract(unsigned char size) { + struct BitfieldHeader h = {}; + h.field8 = size - 1; +} + +void bitfield8_not(unsigned char size) { + struct BitfieldHeader h = {}; + h.field8 = ~size; +} + +// --- Implicit casts: widening cases (should warn) --- + +// 40-bit bit field from unsigned int (32-bit) — widening DOES occur. Should warn. +void bitfield40_shift(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = size << 1U; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +void bitfield40_multiply(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = size * 2U; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +void bitfield40_add(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = size + 1U; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +void bitfield40_subtract(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = size - 1U; + // FIXME: checker doesn't detect potential widening for subtraction. + // E.g. if size==0, result is 0xFFFFFFFF (32-bit), but in 40-bit space + // it should be 0xFFFFFFFFFF. Limitation of getMaxCalculationWidth. +} + +void bitfield40_not(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = ~size; + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +// --- Implicit casts: truncation cases (no warning) --- + +// 24-bit bit field from unsigned short (16-bit) — after integer promotion, +// CalcType is 'int' (32-bit) which is wider than the 24-bit bit field. +// This is truncation, not widening. No warning expected. +void bitfield24_shift(unsigned short size) { + struct BitfieldHeader h = {}; + h.field24 = size << 1; +} + +void bitfield24_multiply(unsigned short size) { + struct BitfieldHeader h = {}; + h.field24 = size * 2; +} + +void bitfield24_add(unsigned short size) { + struct BitfieldHeader h = {}; + h.field24 = size + 1; +} + +void bitfield24_subtract(unsigned short size) { + struct BitfieldHeader h = {}; + h.field24 = size - 1; +} + +void bitfield24_not(unsigned short size) { + struct BitfieldHeader h = {}; + h.field24 = ~size; +} + +// --- Explicit casts with bit fields --- + +// Source (unsigned short, 16-bit) == bit field width (16-bit). No warnings. +void explicit_cast_same_to_declared(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned long)(size << 1); +} + +void explicit_cast_same_to_bitfield(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned short)(size << 1); +} + +void explicit_cast_same_to_narrower(unsigned short size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned char)(size << 1); +} + +// Source (unsigned int, 32-bit) > bit field width (16-bit). Truncation, no warnings. +void explicit_cast_wider_to_declared(unsigned int size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned long)(size << 1U); +} + +void explicit_cast_wider_to_bitfield(unsigned int size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned short)(size << 1U); +} + +void explicit_cast_wider_to_narrower(unsigned int size) { + struct BitfieldHeader h = {}; + h.field16 = (unsigned char)(size << 1U); +} + +// Source (unsigned int, 32-bit) < bit field width (40-bit). Widening — should warn. +void explicit_cast_widen_shift(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = (unsigned long long)(size << 1U); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +void explicit_cast_widen_multiply(unsigned int size) { + struct BitfieldHeader h = {}; + h.field40 = (unsigned long long)(size * 2U); + // CHECK-MESSAGES: :[[@LINE-1]]:15: warning: either cast from 'unsigned int' to 'unsigned long long' +} + +// --- Cross-assignment cases --- + +// Bit field assigned to a normal (non-bit field) variable. +// h.field8 has declared type 'unsigned int' (32-bit), so h.field8 << 1 is 'unsigned int'. +// Assigning to 'long' (64-bit) is widening. +void bitfield_to_normal_widen(struct BitfieldHeader h) { + long l; + l = h.field8 << 1; + // CHECK-MESSAGES: :[[@LINE-1]]:7: warning: either cast from 'int' to 'long' +} + +void bitfield_to_normal_no_warn(struct BitfieldHeader h) { + unsigned int i; + i = h.field8 << 1; +} + +// Bit fields of different sizes assigned to each other. +void bitfield_small_to_large(struct BitfieldHeader h) { + struct BitfieldHeader h2 = {}; + h2.field40 = h.field8 << 1; + // CHECK-MESSAGES: :[[@LINE-1]]:16: warning: either cast from 'int' to 'unsigned long long' +} + +void bitfield_same_size(struct BitfieldHeader h) { + struct BitfieldHeader h2 = {}; + h2.field32 = h.field32 << 1; +} + +void bitfield_large_to_small(struct BitfieldHeader h) { + struct BitfieldHeader h2 = {}; + h2.field8 = h.field32 << 1; +} + +// --- Signed bit field tests --- + +// int is 32 bits on x86, sfield32 is 32-bit signed — no widening. +void sbitfield32_shift(int size) { + struct BitfieldHeader h = {}; + h.sfield32 = size << 1; +} + +void sbitfield32_multiply(int size) { + struct BitfieldHeader h = {}; + h.sfield32 = size * 2; +} + +void sbitfield32_add(int size) { + struct BitfieldHeader h = {}; + h.sfield32 = size + 1; +} + +// short promotes to int (32-bit), sfield16 is 16-bit — truncation, no warning. +void sbitfield16_shift(short size) { + struct BitfieldHeader h = {}; + h.sfield16 = size << 1; +} + +void sbitfield16_multiply(short size) { + struct BitfieldHeader h = {}; + h.sfield16 = size * 2; +} + +void sbitfield16_add(short size) { + struct BitfieldHeader h = {}; + h.sfield16 = size + 1; +} + +// FIXME: Subtraction with short: short promotes to int (32-bit), assigning to long long +// (64-bit) is widening. Checker doesn't warn for '-' (limitation of getMaxCalculationWidth). +void subtract_short_widen(short size) { + long long l; + l = size - 1; +} From 210f118b19f43396cec55b70d4df98d475e5d6e6 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Mon, 22 Jun 2026 17:40:45 +0200 Subject: [PATCH 030/511] Reapply "[OpenMP][offload] Cross-team reductions with variable number of teams" (#204914) (#205071) Together with the fix for https://github.com/llvm/llvm-project/pull/195102#issuecomment-4756584289 --- clang/include/clang/Basic/LangOptions.def | 1 - clang/include/clang/Options/Options.td | 6 +- clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 12 +- clang/lib/Driver/ToolChains/Clang.cpp | 13 +- clang/lib/Frontend/CompilerInvocation.cpp | 7 - clang/test/Driver/openmp-offload-gpu.c | 4 +- clang/test/OpenMP/bug60602.cpp | 4 +- .../OpenMP/declare_target_local_codegen.cpp | 6 +- clang/test/OpenMP/distribute_codegen.cpp | 20 +- .../distribute_firstprivate_codegen.cpp | 8 +- .../OpenMP/distribute_lastprivate_codegen.cpp | 8 +- .../distribute_parallel_for_codegen.cpp | 56 +- ...bute_parallel_for_firstprivate_codegen.cpp | 8 +- .../distribute_parallel_for_if_codegen.cpp | 16 +- ...ibute_parallel_for_lastprivate_codegen.cpp | 8 +- ...ibute_parallel_for_num_threads_codegen.cpp | 48 +- ...istribute_parallel_for_private_codegen.cpp | 8 +- ...tribute_parallel_for_proc_bind_codegen.cpp | 6 +- .../distribute_parallel_for_simd_codegen.cpp | 56 +- ...parallel_for_simd_firstprivate_codegen.cpp | 8 +- ...istribute_parallel_for_simd_if_codegen.cpp | 64 +- ..._parallel_for_simd_lastprivate_codegen.cpp | 8 +- ..._parallel_for_simd_num_threads_codegen.cpp | 48 +- ...bute_parallel_for_simd_private_codegen.cpp | 8 +- ...te_parallel_for_simd_proc_bind_codegen.cpp | 6 +- .../OpenMP/distribute_private_codegen.cpp | 12 +- clang/test/OpenMP/distribute_simd_codegen.cpp | 40 +- .../distribute_simd_firstprivate_codegen.cpp | 8 +- .../distribute_simd_lastprivate_codegen.cpp | 8 +- .../distribute_simd_private_codegen.cpp | 12 +- .../distribute_simd_reduction_codegen.cpp | 8 +- clang/test/OpenMP/map_struct_ordering.cpp | 2 +- clang/test/OpenMP/nvptx_lambda_capturing.cpp | 10 +- clang/test/OpenMP/reduction_implicit_map.cpp | 12 +- .../spirv_target_teams_reduction_addrspace.c | 2 +- .../OpenMP/target_codegen_global_capture.cpp | 12 +- clang/test/OpenMP/target_default_codegen.cpp | 32 +- .../OpenMP/target_defaultmap_codegen_03.cpp | 16 +- .../target_dyn_groupprivate_codegen.cpp | 24 +- .../OpenMP/target_firstprivate_codegen.cpp | 48 +- .../OpenMP/target_has_device_addr_codegen.cpp | 30 +- .../target_has_device_addr_codegen_01.cpp | 4 +- .../OpenMP/target_is_device_ptr_codegen.cpp | 88 +- ..._of_structs_with_nested_mapper_codegen.cpp | 2 +- ...et_map_array_section_no_length_codegen.cpp | 8 +- ..._of_structs_with_nested_mapper_codegen.cpp | 2 +- clang/test/OpenMP/target_map_codegen_03.cpp | 4 +- clang/test/OpenMP/target_map_codegen_hold.cpp | 24 +- .../OpenMP/target_map_deref_array_codegen.cpp | 6 +- .../OpenMP/target_map_member_expr_codegen.cpp | 6 +- .../target_offload_mandatory_codegen.cpp | 6 +- .../target_ompx_dyn_cgroup_mem_codegen.cpp | 24 +- clang/test/OpenMP/target_parallel_codegen.cpp | 28 +- .../OpenMP/target_parallel_for_codegen.cpp | 56 +- .../target_parallel_for_simd_codegen.cpp | 56 +- ...target_parallel_generic_loop_codegen-1.cpp | 24 +- ...target_parallel_generic_loop_codegen-2.cpp | 4 +- ...l_generic_loop_uses_allocators_codegen.cpp | 2 +- .../OpenMP/target_parallel_if_codegen.cpp | 24 +- .../target_parallel_num_threads_codegen.cpp | 24 +- ...et_parallel_num_threads_strict_codegen.cpp | 16 +- .../OpenMP/target_task_affinity_codegen.cpp | 4 +- clang/test/OpenMP/target_teams_codegen.cpp | 52 +- .../target_teams_distribute_codegen.cpp | 28 +- ...rget_teams_distribute_collapse_codegen.cpp | 12 +- ...teams_distribute_dist_schedule_codegen.cpp | 36 +- ..._teams_distribute_firstprivate_codegen.cpp | 8 +- ...t_teams_distribute_lastprivate_codegen.cpp | 8 +- ..._teams_distribute_parallel_for_codegen.cpp | 8 +- ...stribute_parallel_for_collapse_codegen.cpp | 12 +- ...ute_parallel_for_dist_schedule_codegen.cpp | 36 +- ...bute_parallel_for_firstprivate_codegen.cpp | 8 +- ...ams_distribute_parallel_for_if_codegen.cpp | 12 +- ...ibute_parallel_for_lastprivate_codegen.cpp | 8 +- ..._distribute_parallel_for_order_codegen.cpp | 2 +- ...istribute_parallel_for_private_codegen.cpp | 8 +- ...tribute_parallel_for_proc_bind_codegen.cpp | 6 +- ...tribute_parallel_for_reduction_codegen.cpp | 8 +- ...stribute_parallel_for_schedule_codegen.cpp | 120 +- ...s_distribute_parallel_for_simd_codegen.cpp | 12 +- ...ute_parallel_for_simd_collapse_codegen.cpp | 12 +- ...arallel_for_simd_dist_schedule_codegen.cpp | 36 +- ...parallel_for_simd_firstprivate_codegen.cpp | 8 +- ...istribute_parallel_for_simd_if_codegen.cpp | 48 +- ..._parallel_for_simd_lastprivate_codegen.cpp | 8 +- ...bute_parallel_for_simd_private_codegen.cpp | 8 +- ...te_parallel_for_simd_proc_bind_codegen.cpp | 6 +- ...te_parallel_for_simd_reduction_codegen.cpp | 8 +- ...ute_parallel_for_simd_schedule_codegen.cpp | 120 +- ...arget_teams_distribute_private_codegen.cpp | 8 +- ...get_teams_distribute_reduction_codegen.cpp | 80 +- .../target_teams_distribute_simd_codegen.cpp | 56 +- ...teams_distribute_simd_collapse_codegen.cpp | 12 +- ..._distribute_simd_dist_schedule_codegen.cpp | 36 +- ...s_distribute_simd_firstprivate_codegen.cpp | 8 +- ...ms_distribute_simd_lastprivate_codegen.cpp | 8 +- ..._teams_distribute_simd_private_codegen.cpp | 8 +- ...eams_distribute_simd_reduction_codegen.cpp | 8 +- .../target_teams_generic_loop_codegen-1.cpp | 8 +- .../target_teams_generic_loop_codegen.cpp | 28 +- ...et_teams_generic_loop_collapse_codegen.cpp | 12 +- .../target_teams_generic_loop_if_codegen.cpp | 10 +- ...arget_teams_generic_loop_order_codegen.cpp | 2 +- ...get_teams_generic_loop_private_codegen.cpp | 8 +- ...t_teams_generic_loop_reduction_codegen.cpp | 8 +- ...s_generic_loop_uses_allocators_codegen.cpp | 2 +- .../test/OpenMP/target_teams_map_codegen.cpp | 36 +- .../OpenMP/target_teams_num_teams_codegen.cpp | 24 +- ...cpp => target_teams_reduction_codegen.cpp} | 1489 +---------------- .../target_teams_thread_limit_codegen.cpp | 24 +- clang/test/OpenMP/teams_codegen.cpp | 40 +- .../test/OpenMP/teams_distribute_codegen.cpp | 24 +- .../teams_distribute_collapse_codegen.cpp | 12 +- ...teams_distribute_dist_schedule_codegen.cpp | 36 +- .../teams_distribute_firstprivate_codegen.cpp | 8 +- .../teams_distribute_lastprivate_codegen.cpp | 8 +- .../teams_distribute_parallel_for_codegen.cpp | 24 +- ...stribute_parallel_for_collapse_codegen.cpp | 12 +- ...distribute_parallel_for_copyin_codegen.cpp | 8 +- ...ute_parallel_for_dist_schedule_codegen.cpp | 36 +- ...bute_parallel_for_firstprivate_codegen.cpp | 8 +- ...ams_distribute_parallel_for_if_codegen.cpp | 16 +- ...ibute_parallel_for_lastprivate_codegen.cpp | 8 +- ...ibute_parallel_for_num_threads_codegen.cpp | 24 +- ...istribute_parallel_for_private_codegen.cpp | 8 +- ...tribute_parallel_for_proc_bind_codegen.cpp | 6 +- ...tribute_parallel_for_reduction_codegen.cpp | 8 +- ...stribute_parallel_for_schedule_codegen.cpp | 120 +- ...s_distribute_parallel_for_simd_codegen.cpp | 24 +- ...ute_parallel_for_simd_collapse_codegen.cpp | 12 +- ...arallel_for_simd_dist_schedule_codegen.cpp | 36 +- ...parallel_for_simd_firstprivate_codegen.cpp | 8 +- ...istribute_parallel_for_simd_if_codegen.cpp | 64 +- ..._parallel_for_simd_lastprivate_codegen.cpp | 8 +- ..._parallel_for_simd_num_threads_codegen.cpp | 24 +- ...bute_parallel_for_simd_private_codegen.cpp | 8 +- ...te_parallel_for_simd_proc_bind_codegen.cpp | 6 +- ...te_parallel_for_simd_reduction_codegen.cpp | 8 +- ...ute_parallel_for_simd_schedule_codegen.cpp | 120 +- .../teams_distribute_private_codegen.cpp | 8 +- .../teams_distribute_reduction_codegen.cpp | 8 +- .../OpenMP/teams_distribute_simd_codegen.cpp | 36 +- ...teams_distribute_simd_collapse_codegen.cpp | 12 +- ..._distribute_simd_dist_schedule_codegen.cpp | 36 +- ...s_distribute_simd_firstprivate_codegen.cpp | 8 +- ...ms_distribute_simd_lastprivate_codegen.cpp | 8 +- .../teams_distribute_simd_private_codegen.cpp | 8 +- ...eams_distribute_simd_reduction_codegen.cpp | 8 +- .../OpenMP/teams_firstprivate_codegen.cpp | 24 +- .../OpenMP/teams_generic_loop_codegen-1.cpp | 24 +- .../teams_generic_loop_collapse_codegen.cpp | 12 +- .../teams_generic_loop_private_codegen.cpp | 8 +- .../teams_generic_loop_reduction_codegen.cpp | 8 +- clang/test/OpenMP/teams_private_codegen.cpp | 20 +- .../llvm/Frontend/OpenMP/OMPConstants.h | 2 +- .../llvm/Frontend/OpenMP/OMPIRBuilder.h | 42 +- .../include/llvm/Frontend/OpenMP/OMPKinds.def | 14 +- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 227 ++- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 +- llvm/test/Transforms/OpenMP/add_attributes.ll | 8 +- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 4 - .../allocatable_gpu_reduction_teams.mlir | 16 +- .../LLVMIR/omptarget-multi-reduction.mlir | 5 +- .../LLVMIR/omptarget-region-device-llvm.mlir | 2 +- ...distribute-reduction-array-descriptor.mlir | 25 +- .../omptarget-teams-distribute-reduction.mlir | 3 +- .../LLVMIR/omptarget-teams-reduction.mlir | 3 +- .../LLVMIR/openmp-target-launch-device.mlir | 4 +- offload/include/Shared/Environment.h | 4 +- .../common/include/PluginInterface.h | 12 +- .../common/src/PluginInterface.cpp | 26 +- openmp/device/include/Interface.h | 13 +- openmp/device/src/Reduction.cpp | 322 ++-- 173 files changed, 2032 insertions(+), 3292 deletions(-) rename clang/test/OpenMP/{nvptx_teams_reduction_codegen.cpp => target_teams_reduction_codegen.cpp} (60%) diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 319fd18cddb36..d68784b7efbcd 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -231,7 +231,6 @@ LANGOPT(OpenMPCUDAMode , 1, 0, NotCompatible, "Generate code for OpenMP pragm LANGOPT(OpenMPIRBuilder , 1, 0, NotCompatible, "Use the experimental OpenMP-IR-Builder codegen path.") LANGOPT(OpenMPCUDANumSMs , 32, 0, NotCompatible, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, NotCompatible, "Number of blocks per SM for CUDA devices.") -LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, NotCompatible, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") LANGOPT(OpenMPTargetDebug , 32, 0, NotCompatible, "Enable debugging in the OpenMP offloading device RTL") LANGOPT(OpenMPOptimisticCollapse , 1, 0, NotCompatible, "Use at most 32 bits to represent the collapsed loop nest counter.") LANGOPT(OpenMPThreadSubscription , 1, 0, NotCompatible, "Assume work-shared loops do not have more iterations than participating threads.") diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index e4a9d95ece0ab..377897a15f746 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -4120,7 +4120,11 @@ def fopenmp_cuda_number_of_sm_EQ : Joined<["-"], "fopenmp-cuda-number-of-sm=">, def fopenmp_cuda_blocks_per_sm_EQ : Joined<["-"], "fopenmp-cuda-blocks-per-sm=">, Group, Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams-reduction-recs-num=">, Group, - Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>; + Flags<[NoArgumentUnused, HelpHidden]>, Visibility<[ClangOption, CC1Option]>, + HelpText<"Deprecated and ignored. The teams reduction buffer is sized " + "automatically at kernel launch to match the actual number of " + "teams; this flag is accepted for backwards compatibility only " + "and emits a deprecation warning when used.">; //===----------------------------------------------------------------------===// // Shared cc1 + fc1 OpenMP Target Options diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index cb0e7297f1a89..19e8c73884dfc 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -788,8 +788,7 @@ void CGOpenMPRuntimeGPU::emitKernelDeinit(CodeGenFunction &CGF, ? 0 : DL.getTypeAllocSize(LLVMReductionsBufferTy).getFixedValue(); CGBuilderTy &Bld = CGF.Builder; - OMPBuilder.createTargetDeinit(Bld, ReductionDataSize, - C.getLangOpts().OpenMPCUDAReductionBufNum); + OMPBuilder.createTargetDeinit(Bld, ReductionDataSize); TeamsReductions.clear(); } @@ -1698,8 +1697,6 @@ void CGOpenMPRuntimeGPU::emitReduction( bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind); bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind); - ASTContext &C = CGM.getContext(); - if (Options.SimpleReduction) { assert(!TeamsReduction && !ParallelReduction && "Invalid reduction selection in emitReduction."); @@ -1790,12 +1787,13 @@ void CGOpenMPRuntimeGPU::emitReduction( Idx++; } + bool IsSPMD = getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD; llvm::OpenMPIRBuilder::InsertPointTy AfterIP = cantFail(OMPBuilder.createReductionsGPU( OmpLoc, AllocaIP, CodeGenIP, ReductionInfos, /*IsByRef=*/{}, false, - TeamsReduction, llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, - CGF.getTarget().getGridValue(), - C.getLangOpts().OpenMPCUDAReductionBufNum, RTLoc)); + TeamsReduction, IsSPMD, + llvm::OpenMPIRBuilder::ReductionGenCBKind::Clang, + CGF.getTarget().getGridValue(), RTLoc)); CGF.Builder.restoreIP(AfterIP); } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index a93dd2969504c..418d540895681 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6951,8 +6951,17 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fno-openmp-extensions"); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_number_of_sm_EQ); Args.AddAllArgs(CmdArgs, options::OPT_fopenmp_cuda_blocks_per_sm_EQ); - Args.AddAllArgs(CmdArgs, - options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ); + // '-fopenmp-cuda-teams-reduction-recs-num=' is deprecated and has no + // effect: the teams reduction buffer is sized at kernel launch by the + // offload plugin to match the actual number of teams. Honoring a + // smaller user-supplied value would silently truncate the buffer for + // larger launches. + if (Arg *A = Args.getLastArg( + options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ)) + D.Diag(diag::warn_drv_deprecated_custom) + << A->getAsString(Args) + << "the value is ignored; the teams reduction buffer is sized " + "automatically at kernel launch"; if (Args.hasFlag(options::OPT_fopenmp_optimistic_collapse, options::OPT_fno_openmp_optimistic_collapse, /*Default=*/false)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index d2847739e3143..ca2d02c7dbd97 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3910,10 +3910,6 @@ void CompilerInvocationBase::GenerateLangArgs(const LangOptions &Opts, GenerateArg(Consumer, OPT_fopenmp_cuda_blocks_per_sm_EQ, Twine(Opts.OpenMPCUDABlocksPerSM)); - if (Opts.OpenMPCUDAReductionBufNum != 1024) - GenerateArg(Consumer, OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, - Twine(Opts.OpenMPCUDAReductionBufNum)); - if (!Opts.OMPTargetTriples.empty()) { std::string Targets; llvm::raw_string_ostream OS(Targets); @@ -4359,9 +4355,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.OpenMPCUDABlocksPerSM = getLastArgIntValue(Args, options::OPT_fopenmp_cuda_blocks_per_sm_EQ, Opts.OpenMPCUDABlocksPerSM, Diags); - Opts.OpenMPCUDAReductionBufNum = getLastArgIntValue( - Args, options::OPT_fopenmp_cuda_teams_reduction_recs_num_EQ, - Opts.OpenMPCUDAReductionBufNum, Diags); } // Set the value of the debugging flag used in the new offloading device RTL. diff --git a/clang/test/Driver/openmp-offload-gpu.c b/clang/test/Driver/openmp-offload-gpu.c index b5644e6ae445f..4ad0aede8a50e 100644 --- a/clang/test/Driver/openmp-offload-gpu.c +++ b/clang/test/Driver/openmp-offload-gpu.c @@ -193,8 +193,8 @@ // RUN: %clang -### -nogpulib -nogpuinc -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 %s -fopenmp-cuda-teams-reduction-recs-num=2048 2>&1 \ // RUN: | FileCheck -check-prefix=CUDA_RED_RECS %s -// CUDA_RED_RECS: "-cc1"{{.*}}"-triple" "nvptx64-nvidia-cuda" -// CUDA_RED_RECS-SAME: "-fopenmp-cuda-teams-reduction-recs-num=2048" +// CUDA_RED_RECS: warning: argument '-fopenmp-cuda-teams-reduction-recs-num=2048' is deprecated, the value is ignored; the teams reduction buffer is sized automatically at kernel launch +// CUDA_RED_RECS-NOT: "-fopenmp-cuda-teams-reduction-recs-num=2048" // RUN: %clang -### -fopenmp=libomp -fopenmp-targets=nvptx64-nvidia-cuda --cuda-path=%S/Inputs/CUDA_102/usr/local/cuda \ // RUN: --offload-arch=sm_52 --libomptarget-nvptx-bc-path=%S/Inputs/libomptarget/libomptarget-nvptx-test.bc %s 2>&1 \ diff --git a/clang/test/OpenMP/bug60602.cpp b/clang/test/OpenMP/bug60602.cpp index e9174d7be3a12..8235a5a7d83d1 100644 --- a/clang/test/OpenMP/bug60602.cpp +++ b/clang/test/OpenMP/bug60602.cpp @@ -119,7 +119,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP38]], align 4 // CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -223,7 +223,7 @@ int kernel_within_loop(int *a, int *b, int N, int num_iters) { // CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP89]], 1 // CHECK-NEXT: [[TMP90:%.*]] = zext i32 [[ADD]] to i64 // CHECK-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP91]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP91]], align 4 // CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP92]], align 4 // CHECK-NEXT: [[TMP93:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 diff --git a/clang/test/OpenMP/declare_target_local_codegen.cpp b/clang/test/OpenMP/declare_target_local_codegen.cpp index b82e8b3bba9ff..fe3a46aed5916 100644 --- a/clang/test/OpenMP/declare_target_local_codegen.cpp +++ b/clang/test/OpenMP/declare_target_local_codegen.cpp @@ -130,7 +130,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -217,7 +217,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -291,7 +291,7 @@ int use_new_local_vars() { // HOST-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // HOST-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// HOST-NEXT: store i32 4, ptr [[TMP8]], align 4 +// HOST-NEXT: store i32 5, ptr [[TMP8]], align 4 // HOST-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // HOST-NEXT: store i32 2, ptr [[TMP9]], align 4 // HOST-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index 62b7ad8b979a2..afd18e91911dd 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -169,7 +169,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -368,7 +368,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -567,7 +567,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -774,7 +774,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK1-NEXT: [[TMP13:%.*]] = zext i32 [[ADD4]] to i64 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -949,7 +949,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1130,7 +1130,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1325,7 +1325,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1520,7 +1520,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1723,7 +1723,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 // CHECK3-NEXT: [[TMP13:%.*]] = zext i32 [[ADD4]] to i64 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1898,7 +1898,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp index 019961381c0fc..d95623a597cbc 100644 --- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp @@ -551,7 +551,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -854,7 +854,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1230,7 +1230,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1531,7 +1531,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp index 59696384191b1..79dc4a4617803 100644 --- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp @@ -536,7 +536,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -857,7 +857,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1252,7 +1252,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1571,7 +1571,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index 50b60fc756a5a..9083dafdafc5e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -4418,7 +4418,7 @@ int main() { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4499,7 +4499,7 @@ int main() { // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK9-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK9-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK9-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK9-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -4589,7 +4589,7 @@ int main() { // CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK9-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK9-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK9-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK9-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -4670,7 +4670,7 @@ int main() { // CHECK9-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK9-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK9-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK9-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK9-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -4760,7 +4760,7 @@ int main() { // CHECK9-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK9-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK9-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK9-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK9-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -4841,7 +4841,7 @@ int main() { // CHECK9-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK9-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK9-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK9-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK9-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -4931,7 +4931,7 @@ int main() { // CHECK9-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK9-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK9-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK9-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK9-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -6779,7 +6779,7 @@ int main() { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6860,7 +6860,7 @@ int main() { // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK9-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK9-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK9-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK9-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -6950,7 +6950,7 @@ int main() { // CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK9-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK9-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK9-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK9-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -7031,7 +7031,7 @@ int main() { // CHECK9-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK9-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK9-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK9-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK9-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -7121,7 +7121,7 @@ int main() { // CHECK9-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK9-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK9-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK9-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK9-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -7202,7 +7202,7 @@ int main() { // CHECK9-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK9-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK9-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK9-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK9-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -7292,7 +7292,7 @@ int main() { // CHECK9-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK9-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK9-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK9-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK9-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -9156,7 +9156,7 @@ int main() { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9237,7 +9237,7 @@ int main() { // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK11-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -9327,7 +9327,7 @@ int main() { // CHECK11-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK11-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -9408,7 +9408,7 @@ int main() { // CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK11-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -9498,7 +9498,7 @@ int main() { // CHECK11-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK11-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK11-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK11-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK11-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -9579,7 +9579,7 @@ int main() { // CHECK11-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK11-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK11-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK11-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK11-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -9669,7 +9669,7 @@ int main() { // CHECK11-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK11-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK11-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK11-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK11-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -11466,7 +11466,7 @@ int main() { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -11547,7 +11547,7 @@ int main() { // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK11-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -11637,7 +11637,7 @@ int main() { // CHECK11-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK11-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -11718,7 +11718,7 @@ int main() { // CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK11-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -11808,7 +11808,7 @@ int main() { // CHECK11-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK11-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK11-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK11-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK11-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -11889,7 +11889,7 @@ int main() { // CHECK11-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK11-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK11-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK11-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK11-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -11979,7 +11979,7 @@ int main() { // CHECK11-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK11-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK11-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK11-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK11-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp index 53a02dbae1122..52143e931a5e3 100644 --- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp @@ -834,7 +834,7 @@ int main() { // CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK8-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK8-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK8-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1267,7 +1267,7 @@ int main() { // CHECK8-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK8-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK8-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK8-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1767,7 +1767,7 @@ int main() { // CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK10-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK10-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK10-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2194,7 +2194,7 @@ int main() { // CHECK10-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK10-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK10-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK10-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp index 70dfdce5bca53..9e11909e25cb9 100644 --- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp @@ -142,7 +142,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -183,7 +183,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -532,7 +532,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -573,7 +573,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -623,7 +623,7 @@ int main() { // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1134,7 +1134,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1175,7 +1175,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1225,7 +1225,7 @@ int main() { // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp index ff6ca0518b944..0ec3fb8f40225 100644 --- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp @@ -810,7 +810,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1279,7 +1279,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1816,7 +1816,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2279,7 +2279,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp index 642cd65280689..a07ce8902ed52 100644 --- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp @@ -2523,7 +2523,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2582,7 +2582,7 @@ int main() { // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2996,7 +2996,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3037,7 +3037,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3094,7 +3094,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3135,7 +3135,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3848,7 +3848,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3907,7 +3907,7 @@ int main() { // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4321,7 +4321,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4362,7 +4362,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4419,7 +4419,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4460,7 +4460,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5173,7 +5173,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5232,7 +5232,7 @@ int main() { // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5646,7 +5646,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5687,7 +5687,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5744,7 +5744,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5785,7 +5785,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6498,7 +6498,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6557,7 +6557,7 @@ int main() { // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6971,7 +6971,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7012,7 +7012,7 @@ int main() { // CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -7069,7 +7069,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7110,7 +7110,7 @@ int main() { // CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp index 3be500569d431..a28bcf088fd71 100644 --- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp @@ -535,7 +535,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -868,7 +868,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1270,7 +1270,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1597,7 +1597,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp index e1cdb581fd58a..d403a56e57a9e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp @@ -77,7 +77,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -118,7 +118,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -451,7 +451,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index f5505ef0df251..1e6bdd1bff81b 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -4808,7 +4808,7 @@ int main() { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4889,7 +4889,7 @@ int main() { // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK9-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK9-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK9-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK9-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -4979,7 +4979,7 @@ int main() { // CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK9-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK9-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK9-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK9-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -5060,7 +5060,7 @@ int main() { // CHECK9-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK9-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK9-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK9-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK9-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -5150,7 +5150,7 @@ int main() { // CHECK9-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK9-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK9-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK9-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK9-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -5231,7 +5231,7 @@ int main() { // CHECK9-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK9-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK9-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK9-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK9-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -5321,7 +5321,7 @@ int main() { // CHECK9-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK9-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK9-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK9-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK9-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -7337,7 +7337,7 @@ int main() { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7418,7 +7418,7 @@ int main() { // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK9-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK9-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK9-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK9-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -7508,7 +7508,7 @@ int main() { // CHECK9-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK9-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK9-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK9-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK9-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -7589,7 +7589,7 @@ int main() { // CHECK9-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK9-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK9-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK9-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK9-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -7679,7 +7679,7 @@ int main() { // CHECK9-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK9-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK9-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK9-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK9-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -7760,7 +7760,7 @@ int main() { // CHECK9-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK9-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK9-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK9-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK9-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -7850,7 +7850,7 @@ int main() { // CHECK9-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK9-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK9-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK9-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK9-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -9867,7 +9867,7 @@ int main() { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9948,7 +9948,7 @@ int main() { // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK11-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -10038,7 +10038,7 @@ int main() { // CHECK11-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK11-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -10119,7 +10119,7 @@ int main() { // CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK11-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -10209,7 +10209,7 @@ int main() { // CHECK11-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK11-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK11-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK11-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK11-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -10290,7 +10290,7 @@ int main() { // CHECK11-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK11-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK11-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK11-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK11-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -10380,7 +10380,7 @@ int main() { // CHECK11-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK11-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK11-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK11-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK11-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 @@ -12345,7 +12345,7 @@ int main() { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -12426,7 +12426,7 @@ int main() { // CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP65]], 1 // CHECK11-NEXT: [[TMP66:%.*]] = zext i32 [[ADD13]] to i64 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK11-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK11-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -12516,7 +12516,7 @@ int main() { // CHECK11-NEXT: [[ADD27:%.*]] = add nsw i32 [[TMP111]], 1 // CHECK11-NEXT: [[TMP112:%.*]] = zext i32 [[ADD27]] to i64 // CHECK11-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP113]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP113]], align 4 // CHECK11-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP114]], align 4 // CHECK11-NEXT: [[TMP115:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -12597,7 +12597,7 @@ int main() { // CHECK11-NEXT: [[ADD41:%.*]] = add nsw i32 [[TMP152]], 1 // CHECK11-NEXT: [[TMP153:%.*]] = zext i32 [[ADD41]] to i64 // CHECK11-NEXT: [[TMP154:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP154]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP154]], align 4 // CHECK11-NEXT: [[TMP155:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP155]], align 4 // CHECK11-NEXT: [[TMP156:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS42]], i32 0, i32 2 @@ -12687,7 +12687,7 @@ int main() { // CHECK11-NEXT: [[ADD56:%.*]] = add nsw i32 [[TMP198]], 1 // CHECK11-NEXT: [[TMP199:%.*]] = zext i32 [[ADD56]] to i64 // CHECK11-NEXT: [[TMP200:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP200]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP200]], align 4 // CHECK11-NEXT: [[TMP201:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP201]], align 4 // CHECK11-NEXT: [[TMP202:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS57]], i32 0, i32 2 @@ -12768,7 +12768,7 @@ int main() { // CHECK11-NEXT: [[ADD70:%.*]] = add nsw i32 [[TMP239]], 1 // CHECK11-NEXT: [[TMP240:%.*]] = zext i32 [[ADD70]] to i64 // CHECK11-NEXT: [[TMP241:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP241]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP241]], align 4 // CHECK11-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK11-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS71]], i32 0, i32 2 @@ -12858,7 +12858,7 @@ int main() { // CHECK11-NEXT: [[ADD85:%.*]] = add nsw i32 [[TMP285]], 1 // CHECK11-NEXT: [[TMP286:%.*]] = zext i32 [[ADD85]] to i64 // CHECK11-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP287]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP287]], align 4 // CHECK11-NEXT: [[TMP288:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP288]], align 4 // CHECK11-NEXT: [[TMP289:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS86]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp index cd206eaaca868..c52f9277802f2 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -897,7 +897,7 @@ int main() { // CHECK8-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK8-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK8-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK8-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK8-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK8-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1344,7 +1344,7 @@ int main() { // CHECK8-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK8-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK8-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK8-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK8-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK8-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK8-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1858,7 +1858,7 @@ int main() { // CHECK10-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK10-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK10-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK10-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK10-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK10-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2299,7 +2299,7 @@ int main() { // CHECK10-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK10-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK10-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK10-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK10-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK10-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK10-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp index ef19a2830d385..4057f8156826d 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp @@ -139,7 +139,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -180,7 +180,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -557,7 +557,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -598,7 +598,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -648,7 +648,7 @@ int main() { // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1201,7 +1201,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1242,7 +1242,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1292,7 +1292,7 @@ int main() { // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1835,7 +1835,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1876,7 +1876,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2253,7 +2253,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2294,7 +2294,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2344,7 +2344,7 @@ int main() { // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3132,7 +3132,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3173,7 +3173,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3223,7 +3223,7 @@ int main() { // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -4342,7 +4342,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4383,7 +4383,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4760,7 +4760,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4801,7 +4801,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4851,7 +4851,7 @@ int main() { // CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -5404,7 +5404,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5445,7 +5445,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5495,7 +5495,7 @@ int main() { // CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -6038,7 +6038,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6079,7 +6079,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6456,7 +6456,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6497,7 +6497,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6547,7 +6547,7 @@ int main() { // CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -7335,7 +7335,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7376,7 +7376,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -7426,7 +7426,7 @@ int main() { // CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP50]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP50]], align 4 // CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP51]], align 4 // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp index 6edb0db3047f0..7815c94319dd5 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -877,7 +877,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1360,7 +1360,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1911,7 +1911,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2388,7 +2388,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp index e20b95db8e9a4..b502884ae51f8 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp @@ -127,7 +127,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -186,7 +186,7 @@ int main() { // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -628,7 +628,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -669,7 +669,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -726,7 +726,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -767,7 +767,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1875,7 +1875,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1934,7 +1934,7 @@ int main() { // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK5-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2376,7 +2376,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2417,7 +2417,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2474,7 +2474,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2515,7 +2515,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3284,7 +3284,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3343,7 +3343,7 @@ int main() { // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3785,7 +3785,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3826,7 +3826,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3883,7 +3883,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3924,7 +3924,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5032,7 +5032,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5091,7 +5091,7 @@ int main() { // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP34]], align 4 // CHECK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5533,7 +5533,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5574,7 +5574,7 @@ int main() { // CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5631,7 +5631,7 @@ int main() { // CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5672,7 +5672,7 @@ int main() { // CHECK13-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp index e832ded4715ef..1495f09967ece 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp @@ -589,7 +589,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -936,7 +936,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1352,7 +1352,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1693,7 +1693,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp index bb77a4f417b2d..6641ca6ce1139 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -77,7 +77,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -118,7 +118,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -479,7 +479,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp index efd893c3370e7..9c52d2cdc1200 100644 --- a/clang/test/OpenMP/distribute_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_private_codegen.cpp @@ -368,7 +368,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -409,7 +409,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -707,7 +707,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1011,7 +1011,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1052,7 +1052,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1348,7 +1348,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp index f2aacde1648e9..329978c9a01e6 100644 --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -197,7 +197,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -405,7 +405,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -611,7 +611,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -836,7 +836,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[ADD4]] to i64 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1034,7 +1034,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1222,7 +1222,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1426,7 +1426,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1628,7 +1628,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1849,7 +1849,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[TMP18:%.*]] = zext i32 [[ADD4]] to i64 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2047,7 +2047,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2235,7 +2235,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2443,7 +2443,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2649,7 +2649,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2874,7 +2874,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK5-NEXT: [[TMP18:%.*]] = zext i32 [[ADD4]] to i64 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 3, ptr [[TMP20]], align 4 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3103,7 +3103,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3291,7 +3291,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3495,7 +3495,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3697,7 +3697,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3918,7 +3918,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK7-NEXT: [[TMP18:%.*]] = zext i32 [[ADD4]] to i64 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 3, ptr [[TMP20]], align 4 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4147,7 +4147,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK7-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK7-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp index bf3b464232aca..14389db5a78c3 100644 --- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp @@ -604,7 +604,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -914,7 +914,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1297,7 +1297,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1605,7 +1605,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp index 550ab778d981c..ff07d46051db1 100644 --- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp @@ -591,7 +591,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -919,7 +919,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1321,7 +1321,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1647,7 +1647,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp index 846a2d56d026b..6c8178c336677 100644 --- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp @@ -410,7 +410,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -460,7 +460,7 @@ int main() { // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -776,7 +776,7 @@ int main() { // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1088,7 +1088,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1138,7 +1138,7 @@ int main() { // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1452,7 +1452,7 @@ int main() { // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp index fdcce9ad0edfa..eb138336415bf 100644 --- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -111,7 +111,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -269,7 +269,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -424,7 +424,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -582,7 +582,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/map_struct_ordering.cpp b/clang/test/OpenMP/map_struct_ordering.cpp index cbfa59a7cb051..5a847afd72724 100644 --- a/clang/test/OpenMP/map_struct_ordering.cpp +++ b/clang/test/OpenMP/map_struct_ordering.cpp @@ -107,7 +107,7 @@ int map_struct() { // CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS4]], i32 0, i32 0 // CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS5]], i32 0, i32 0 // CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP22]], align 4 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp index 27baf95a13e81..40f8a30759287 100644 --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -197,7 +197,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds [12 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds [12 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP69]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP69]], align 4 // CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 12, ptr [[TMP70]], align 4 // CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -329,7 +329,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP142:%.*]] = getelementptr inbounds [12 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [12 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP144]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP144]], align 4 // CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK1-NEXT: store i32 12, ptr [[TMP145]], align 4 // CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -563,7 +563,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -627,7 +627,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP52]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP52]], align 4 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP53]], align 4 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -782,7 +782,7 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/reduction_implicit_map.cpp b/clang/test/OpenMP/reduction_implicit_map.cpp index 22995e757c59a..153b4fa9e5441 100644 --- a/clang/test/OpenMP/reduction_implicit_map.cpp +++ b/clang/test/OpenMP/reduction_implicit_map.cpp @@ -340,7 +340,7 @@ int main() // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -387,7 +387,7 @@ int main() // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -940,7 +940,7 @@ int main() // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP35]], 1 // CHECK2-NEXT: [[TMP36:%.*]] = zext i32 [[ADD]] to i64 // CHECK2-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 6, ptr [[TMP38]], align 4 // CHECK2-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1039,7 +1039,7 @@ int main() // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP87]], 1 // CHECK2-NEXT: [[TMP88:%.*]] = zext i32 [[ADD17]] to i64 // CHECK2-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP89]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP89]], align 4 // CHECK2-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK2-NEXT: store i32 6, ptr [[TMP90]], align 4 // CHECK2-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 @@ -1096,7 +1096,7 @@ int main() // CHECK2-NEXT: [[TMP115:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK2-NEXT: [[TMP116:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK2-NEXT: [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP117]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP117]], align 4 // CHECK2-NEXT: [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK2-NEXT: store i32 3, ptr [[TMP118]], align 4 // CHECK2-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -1153,7 +1153,7 @@ int main() // CHECK2-NEXT: [[TMP143:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS31]], i32 0, i32 0 // CHECK2-NEXT: [[TMP144:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS32]], i32 0, i32 0 // CHECK2-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK2-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 1 // CHECK2-NEXT: store i32 3, ptr [[TMP146]], align 4 // CHECK2-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS34]], i32 0, i32 2 diff --git a/clang/test/OpenMP/spirv_target_teams_reduction_addrspace.c b/clang/test/OpenMP/spirv_target_teams_reduction_addrspace.c index 7217ef9400a6b..eb4fb88eef885 100644 --- a/clang/test/OpenMP/spirv_target_teams_reduction_addrspace.c +++ b/clang/test/OpenMP/spirv_target_teams_reduction_addrspace.c @@ -13,7 +13,7 @@ // CHECK: call spir_func align 8 addrspace(9) ptr addrspace(4) @__kmpc_alloc_shared(i64 4) // Verify the reduction runtime function is called. -// CHECK: call spir_func addrspace(9) i32 @__kmpc_nvptx_teams_reduce_nowait_v2( +// CHECK: call spir_func addrspace(9) i32 @__kmpc_gpu_xteam_reduce_nowait( // Verify __kmpc_free_shared is called. // CHECK: call spir_func addrspace(9) void @__kmpc_free_shared(ptr addrspace(4) diff --git a/clang/test/OpenMP/target_codegen_global_capture.cpp b/clang/test/OpenMP/target_codegen_global_capture.cpp index 2b6ba17ad673f..bc278f0d51964 100644 --- a/clang/test/OpenMP/target_codegen_global_capture.cpp +++ b/clang/test/OpenMP/target_codegen_global_capture.cpp @@ -294,7 +294,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP53]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP53]], align 4 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -629,7 +629,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP58]], align 4 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -960,7 +960,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP58]], align 4 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1214,7 +1214,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP47]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP47]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP48]], align 4 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1549,7 +1549,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP51]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP51]], align 4 // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP52]], align 4 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1880,7 +1880,7 @@ int tbar2(short a, short b, short c, short d){ // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP51]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP51]], align 4 // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP52]], align 4 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_default_codegen.cpp b/clang/test/OpenMP/target_default_codegen.cpp index 63c33d0e56d88..a41ee46067187 100644 --- a/clang/test/OpenMP/target_default_codegen.cpp +++ b/clang/test/OpenMP/target_default_codegen.cpp @@ -146,7 +146,7 @@ void foo8() { // CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP11]], align 4 // CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 2, ptr [[TMP12]], align 4 // CK-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -218,7 +218,7 @@ void foo8() { // CK-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -293,7 +293,7 @@ void foo8() { // CK-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -371,7 +371,7 @@ void foo8() { // CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -466,7 +466,7 @@ void foo8() { // CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP18]], align 4 // CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4 // CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -578,7 +578,7 @@ void foo8() { // CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP18]], align 4 // CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4 // CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -687,7 +687,7 @@ void foo8() { // CK-64-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP18]], align 4 // CK-64-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 4, ptr [[TMP19]], align 4 // CK-64-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -780,7 +780,7 @@ void foo8() { // CK-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-64-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK-64-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-64-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -999,7 +999,7 @@ void foo8() { // CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1072,7 +1072,7 @@ void foo8() { // CK-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1147,7 +1147,7 @@ void foo8() { // CK-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1225,7 +1225,7 @@ void foo8() { // CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1316,7 +1316,7 @@ void foo8() { // CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP16]], align 4 // CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4 // CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1425,7 +1425,7 @@ void foo8() { // CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP16]], align 4 // CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4 // CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1534,7 +1534,7 @@ void foo8() { // CK-32-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP16]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP16]], align 4 // CK-32-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 4, ptr [[TMP17]], align 4 // CK-32-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1631,7 +1631,7 @@ void foo8() { // CK-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK-32-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK-32-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK-32-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_defaultmap_codegen_03.cpp b/clang/test/OpenMP/target_defaultmap_codegen_03.cpp index c165eac641cea..8e82c39356402 100644 --- a/clang/test/OpenMP/target_defaultmap_codegen_03.cpp +++ b/clang/test/OpenMP/target_defaultmap_codegen_03.cpp @@ -140,7 +140,7 @@ void foo4(){ // CK1-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK1-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK1-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK1-64-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CK1-64-NEXT: store i32 5, ptr [[TMP11]], align 4 // CK1-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK1-64-NEXT: store i32 2, ptr [[TMP12]], align 4 // CK1-64-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -217,7 +217,7 @@ void foo4(){ // CK1-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK1-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK1-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK1-32-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK1-32-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK1-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK1-32-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK1-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -322,7 +322,7 @@ void foo4(){ // CK2-64-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK2-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK2-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK2-64-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK2-64-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK2-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK2-64-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK2-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -396,7 +396,7 @@ void foo4(){ // CK2-32-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK2-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK2-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK2-32-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK2-32-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK2-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK2-32-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK2-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -495,7 +495,7 @@ void foo4(){ // CK3-64-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK3-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK3-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK3-64-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK3-64-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK3-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK3-64-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK3-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -570,7 +570,7 @@ void foo4(){ // CK3-32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK3-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK3-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK3-32-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK3-32-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK3-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK3-32-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK3-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -674,7 +674,7 @@ void foo4(){ // CK4-64-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK4-64-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK4-64-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK4-64-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK4-64-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK4-64-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK4-64-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK4-64-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -749,7 +749,7 @@ void foo4(){ // CK4-32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK4-32-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK4-32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK4-32-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CK4-32-NEXT: store i32 5, ptr [[TMP10]], align 4 // CK4-32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK4-32-NEXT: store i32 2, ptr [[TMP11]], align 4 // CK4-32-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp b/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp index 8b0c44969effa..353686c9d9953 100644 --- a/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp +++ b/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp @@ -229,7 +229,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -277,7 +277,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -393,7 +393,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -495,7 +495,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -569,7 +569,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK1-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -974,7 +974,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1164,7 +1164,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1212,7 +1212,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1328,7 +1328,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1430,7 +1430,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1504,7 +1504,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK3-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1905,7 +1905,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp index 7bb657f20d919..57c35327b234d 100644 --- a/clang/test/OpenMP/target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -6165,7 +6165,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP27]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK0-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK0-NEXT: store i32 4, ptr [[TMP28]], align 4 // CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6272,7 +6272,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP80:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK0-NEXT: [[TMP81:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK0-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK0-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK0-NEXT: store i32 10, ptr [[TMP83]], align 4 // CHECK0-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6326,7 +6326,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK0-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK0-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK0-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK0-NEXT: store i32 3, ptr [[TMP110]], align 4 // CHECK0-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -6594,7 +6594,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP29:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK0-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK0-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK0-NEXT: store i32 6, ptr [[TMP32]], align 4 // CHECK0-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6687,7 +6687,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK0-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK0-NEXT: store i32 4, ptr [[TMP19]], align 4 // CHECK0-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6760,7 +6760,7 @@ int bar(int n, double *ptr) { // CHECK0-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK0-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK0-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK0-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK0-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK0-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK0-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6984,7 +6984,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP27]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP28]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7091,7 +7091,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP80:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP81:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP83]], align 4 // CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -7145,7 +7145,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP110]], align 4 // CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -7413,7 +7413,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7506,7 +7506,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7579,7 +7579,7 @@ int bar(int n, double *ptr) { // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7801,7 +7801,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 4, ptr [[TMP26]], align 4 // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7910,7 +7910,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP80:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP81:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK2-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK2-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK2-NEXT: store i32 10, ptr [[TMP83]], align 4 // CHECK2-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -7964,7 +7964,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK2-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK2-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK2-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK2-NEXT: store i32 3, ptr [[TMP110]], align 4 // CHECK2-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -8232,7 +8232,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 6, ptr [[TMP32]], align 4 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8325,7 +8325,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 4, ptr [[TMP19]], align 4 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8398,7 +8398,7 @@ int bar(int n, double *ptr) { // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8620,7 +8620,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8729,7 +8729,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP80:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK3-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP83]], align 4 // CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -8783,7 +8783,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP107:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP110]], align 4 // CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -9051,7 +9051,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9144,7 +9144,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9217,7 +9217,7 @@ int bar(int n, double *ptr) { // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_has_device_addr_codegen.cpp b/clang/test/OpenMP/target_has_device_addr_codegen.cpp index b4d77240f245d..062fbac69c99e 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen.cpp @@ -334,7 +334,7 @@ void use_template() { // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -384,7 +384,7 @@ void use_template() { // CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -431,7 +431,7 @@ void use_template() { // CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP58]], align 4 // CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -481,7 +481,7 @@ void use_template() { // CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS14]], i32 0, i32 0 // CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS15]], i32 0, i32 0 // CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS17]], i32 0, i32 2 @@ -528,7 +528,7 @@ void use_template() { // CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 // CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP105]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP105]], align 4 // CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP106]], align 4 // CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -575,7 +575,7 @@ void use_template() { // CHECK-NEXT: [[TMP126:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 // CHECK-NEXT: [[TMP127:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 // CHECK-NEXT: [[TMP128:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP128]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP128]], align 4 // CHECK-NEXT: [[TMP129:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP129]], align 4 // CHECK-NEXT: [[TMP130:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 @@ -759,7 +759,7 @@ void use_template() { // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -809,7 +809,7 @@ void use_template() { // CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -856,7 +856,7 @@ void use_template() { // CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP58]], align 4 // CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -903,7 +903,7 @@ void use_template() { // CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP80]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP80]], align 4 // CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP81]], align 4 // CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -989,7 +989,7 @@ void use_template() { // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1039,7 +1039,7 @@ void use_template() { // CHECK-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1086,7 +1086,7 @@ void use_template() { // CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP58]], align 4 // CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -1133,7 +1133,7 @@ void use_template() { // CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CHECK-NEXT: [[TMP79:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CHECK-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP80]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP80]], align 4 // CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP81]], align 4 // CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -1356,7 +1356,7 @@ void use_template() { // CHECK-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK-NEXT: [[TMP22:%.*]] = sext i32 [[TMP21]] to i64 // CHECK-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 4, ptr [[TMP24]], align 4 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp index 0a7d3b0872743..f2ef6b3d5f609 100644 --- a/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp +++ b/clang/test/OpenMP/target_has_device_addr_codegen_01.cpp @@ -108,7 +108,7 @@ int main() { // CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP29]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP29]], align 4 // CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 7, ptr [[TMP30]], align 4 // CHECK-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -222,7 +222,7 @@ int main() { // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP27]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP27]], align 4 // CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP28]], align 4 // CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp index 13373b167e74a..d77119c87a381 100644 --- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp +++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp @@ -1833,7 +1833,7 @@ void bar() { // CK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK10-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK10-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK10-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1881,7 +1881,7 @@ void bar() { // CK10-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CK10-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CK10-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP33]], align 4 // CK10-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP34]], align 4 // CK10-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1929,7 +1929,7 @@ void bar() { // CK10-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CK10-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CK10-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP57]], align 4 // CK10-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP58]], align 4 // CK10-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -1980,7 +1980,7 @@ void bar() { // CK10-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CK10-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CK10-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP83]], align 4 // CK10-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP84]], align 4 // CK10-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -2031,7 +2031,7 @@ void bar() { // CK10-NEXT: [[TMP107:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 // CK10-NEXT: [[TMP108:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CK10-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP109]], align 4 // CK10-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP110]], align 4 // CK10-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2082,7 +2082,7 @@ void bar() { // CK10-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CK10-NEXT: [[TMP134:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CK10-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP135]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP135]], align 4 // CK10-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CK10-NEXT: store i32 2, ptr [[TMP136]], align 4 // CK10-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2143,7 +2143,7 @@ void bar() { // CK10-NEXT: [[TMP165:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 // CK10-NEXT: [[TMP166:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 // CK10-NEXT: [[TMP167:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CK10-NEXT: store i32 4, ptr [[TMP167]], align 4 +// CK10-NEXT: store i32 5, ptr [[TMP167]], align 4 // CK10-NEXT: [[TMP168:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 // CK10-NEXT: store i32 3, ptr [[TMP168]], align 4 // CK10-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 @@ -2360,7 +2360,7 @@ void bar() { // CK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2408,7 +2408,7 @@ void bar() { // CK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP33]], align 4 // CK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP34]], align 4 // CK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -2456,7 +2456,7 @@ void bar() { // CK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CK11-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP57]], align 4 // CK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP58]], align 4 // CK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -2507,7 +2507,7 @@ void bar() { // CK11-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CK11-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CK11-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP83]], align 4 // CK11-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP84]], align 4 // CK11-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -2558,7 +2558,7 @@ void bar() { // CK11-NEXT: [[TMP107:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 // CK11-NEXT: [[TMP108:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CK11-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP109]], align 4 // CK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP110]], align 4 // CK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2609,7 +2609,7 @@ void bar() { // CK11-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CK11-NEXT: [[TMP134:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CK11-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP135]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP135]], align 4 // CK11-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CK11-NEXT: store i32 2, ptr [[TMP136]], align 4 // CK11-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2670,7 +2670,7 @@ void bar() { // CK11-NEXT: [[TMP165:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 // CK11-NEXT: [[TMP166:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 // CK11-NEXT: [[TMP167:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CK11-NEXT: store i32 4, ptr [[TMP167]], align 4 +// CK11-NEXT: store i32 5, ptr [[TMP167]], align 4 // CK11-NEXT: [[TMP168:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 // CK11-NEXT: store i32 3, ptr [[TMP168]], align 4 // CK11-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 @@ -2887,7 +2887,7 @@ void bar() { // CK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK12-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK12-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2935,7 +2935,7 @@ void bar() { // CK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CK12-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP33]], align 4 // CK12-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP34]], align 4 // CK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -2983,7 +2983,7 @@ void bar() { // CK12-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CK12-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CK12-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP57]], align 4 // CK12-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP58]], align 4 // CK12-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -3034,7 +3034,7 @@ void bar() { // CK12-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CK12-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CK12-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP83]], align 4 // CK12-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP84]], align 4 // CK12-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -3085,7 +3085,7 @@ void bar() { // CK12-NEXT: [[TMP107:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 // CK12-NEXT: [[TMP108:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CK12-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP109]], align 4 // CK12-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP110]], align 4 // CK12-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3136,7 +3136,7 @@ void bar() { // CK12-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CK12-NEXT: [[TMP134:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CK12-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP135]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP135]], align 4 // CK12-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CK12-NEXT: store i32 2, ptr [[TMP136]], align 4 // CK12-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -3197,7 +3197,7 @@ void bar() { // CK12-NEXT: [[TMP165:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 // CK12-NEXT: [[TMP166:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 // CK12-NEXT: [[TMP167:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CK12-NEXT: store i32 4, ptr [[TMP167]], align 4 +// CK12-NEXT: store i32 5, ptr [[TMP167]], align 4 // CK12-NEXT: [[TMP168:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 // CK12-NEXT: store i32 3, ptr [[TMP168]], align 4 // CK12-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 @@ -3414,7 +3414,7 @@ void bar() { // CK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK13-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3462,7 +3462,7 @@ void bar() { // CK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP33]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP33]], align 4 // CK13-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP34]], align 4 // CK13-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -3510,7 +3510,7 @@ void bar() { // CK13-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CK13-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CK13-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP57]], align 4 // CK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP58]], align 4 // CK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -3561,7 +3561,7 @@ void bar() { // CK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS13]], i32 0, i32 0 // CK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS14]], i32 0, i32 0 // CK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP83]], align 4 // CK13-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP84]], align 4 // CK13-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -3612,7 +3612,7 @@ void bar() { // CK13-NEXT: [[TMP107:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS20]], i32 0, i32 0 // CK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP109]], align 4 // CK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP110]], align 4 // CK13-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3663,7 +3663,7 @@ void bar() { // CK13-NEXT: [[TMP133:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CK13-NEXT: [[TMP134:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CK13-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP135]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP135]], align 4 // CK13-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CK13-NEXT: store i32 2, ptr [[TMP136]], align 4 // CK13-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -3724,7 +3724,7 @@ void bar() { // CK13-NEXT: [[TMP165:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS35]], i32 0, i32 0 // CK13-NEXT: [[TMP166:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS36]], i32 0, i32 0 // CK13-NEXT: [[TMP167:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 0 -// CK13-NEXT: store i32 4, ptr [[TMP167]], align 4 +// CK13-NEXT: store i32 5, ptr [[TMP167]], align 4 // CK13-NEXT: [[TMP168:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 1 // CK13-NEXT: store i32 3, ptr [[TMP168]], align 4 // CK13-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS38]], i32 0, i32 2 @@ -4233,7 +4233,7 @@ void bar() { // CK20-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK20-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK20-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK20-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK20-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK20-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK20-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4296,7 +4296,7 @@ void bar() { // CK20-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CK20-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CK20-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CK20-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CK20-NEXT: store i32 5, ptr [[TMP41]], align 4 // CK20-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CK20-NEXT: store i32 3, ptr [[TMP42]], align 4 // CK20-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4366,7 +4366,7 @@ void bar() { // CK20-NEXT: [[TMP75:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CK20-NEXT: [[TMP76:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 // CK20-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CK20-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CK20-NEXT: store i32 5, ptr [[TMP77]], align 4 // CK20-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CK20-NEXT: store i32 4, ptr [[TMP78]], align 4 // CK20-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -4535,7 +4535,7 @@ void bar() { // CK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK21-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK21-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK21-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4598,7 +4598,7 @@ void bar() { // CK21-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CK21-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CK21-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CK21-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CK21-NEXT: store i32 5, ptr [[TMP41]], align 4 // CK21-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CK21-NEXT: store i32 3, ptr [[TMP42]], align 4 // CK21-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4668,7 +4668,7 @@ void bar() { // CK21-NEXT: [[TMP75:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CK21-NEXT: [[TMP76:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 // CK21-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CK21-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CK21-NEXT: store i32 5, ptr [[TMP77]], align 4 // CK21-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CK21-NEXT: store i32 4, ptr [[TMP78]], align 4 // CK21-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -4837,7 +4837,7 @@ void bar() { // CK22-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK22-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK22-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK22-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK22-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK22-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK22-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK22-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4901,7 +4901,7 @@ void bar() { // CK22-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CK22-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CK22-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CK22-NEXT: store i32 4, ptr [[TMP42]], align 4 +// CK22-NEXT: store i32 5, ptr [[TMP42]], align 4 // CK22-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CK22-NEXT: store i32 3, ptr [[TMP43]], align 4 // CK22-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4972,7 +4972,7 @@ void bar() { // CK22-NEXT: [[TMP77:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CK22-NEXT: [[TMP78:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 // CK22-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CK22-NEXT: store i32 4, ptr [[TMP79]], align 4 +// CK22-NEXT: store i32 5, ptr [[TMP79]], align 4 // CK22-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CK22-NEXT: store i32 4, ptr [[TMP80]], align 4 // CK22-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -5141,7 +5141,7 @@ void bar() { // CK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK23-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CK23-NEXT: store i32 5, ptr [[TMP8]], align 4 // CK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK23-NEXT: store i32 2, ptr [[TMP9]], align 4 // CK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5205,7 +5205,7 @@ void bar() { // CK23-NEXT: [[TMP40:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CK23-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CK23-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CK23-NEXT: store i32 4, ptr [[TMP42]], align 4 +// CK23-NEXT: store i32 5, ptr [[TMP42]], align 4 // CK23-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CK23-NEXT: store i32 3, ptr [[TMP43]], align 4 // CK23-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5276,7 +5276,7 @@ void bar() { // CK23-NEXT: [[TMP77:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CK23-NEXT: [[TMP78:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES13]], i32 0, i32 0 // CK23-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CK23-NEXT: store i32 4, ptr [[TMP79]], align 4 +// CK23-NEXT: store i32 5, ptr [[TMP79]], align 4 // CK23-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CK23-NEXT: store i32 4, ptr [[TMP80]], align 4 // CK23-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -5698,7 +5698,7 @@ void bar() { // CK30-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK30-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK30-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK30-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK30-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK30-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK30-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK30-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5769,7 +5769,7 @@ void bar() { // CK31-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK31-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK31-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK31-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK31-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK31-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK31-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK31-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5840,7 +5840,7 @@ void bar() { // CK32-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK32-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK32-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK32-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK32-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK32-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK32-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK32-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5911,7 +5911,7 @@ void bar() { // CK33-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CK33-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CK33-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CK33-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CK33-NEXT: store i32 5, ptr [[TMP9]], align 4 // CK33-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CK33-NEXT: store i32 2, ptr [[TMP10]], align 4 // CK33-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_array_of_structs_with_nested_mapper_codegen.cpp b/clang/test/OpenMP/target_map_array_of_structs_with_nested_mapper_codegen.cpp index 1b9fbb9868916..c9bc3a54c38f2 100644 --- a/clang/test/OpenMP/target_map_array_of_structs_with_nested_mapper_codegen.cpp +++ b/clang/test/OpenMP/target_map_array_of_structs_with_nested_mapper_codegen.cpp @@ -64,7 +64,7 @@ void foo() { // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp b/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp index 51999244fd3a3..81a4e6503d74d 100644 --- a/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp +++ b/clang/test/OpenMP/target_map_array_section_no_length_codegen.cpp @@ -108,7 +108,7 @@ void array_section_no_length_map_clause(float *d, int index) { // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 3, ptr [[TMP15]], align 4 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -165,7 +165,7 @@ void array_section_no_length_map_clause(float *d, int index) { // CHECK-NEXT: [[TMP41:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK-NEXT: store i32 3, ptr [[TMP44]], align 4 // CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -244,7 +244,7 @@ void array_section_no_length_map_clause(float *d, int index) { // CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK-NEXT: [[TMP84:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP85]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP85]], align 4 // CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK-NEXT: store i32 5, ptr [[TMP86]], align 4 // CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -336,7 +336,7 @@ void array_section_no_length_map_clause(float *d, int index) { // CHECK-NEXT: [[TMP134:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK-NEXT: [[TMP135:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP136]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP136]], align 4 // CHECK-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 1 // CHECK-NEXT: store i32 5, ptr [[TMP137]], align 4 // CHECK-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS25]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_array_section_of_structs_with_nested_mapper_codegen.cpp b/clang/test/OpenMP/target_map_array_section_of_structs_with_nested_mapper_codegen.cpp index daee73d80480e..f530c92c8f1fe 100644 --- a/clang/test/OpenMP/target_map_array_section_of_structs_with_nested_mapper_codegen.cpp +++ b/clang/test/OpenMP/target_map_array_section_of_structs_with_nested_mapper_codegen.cpp @@ -61,7 +61,7 @@ void foo() { // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_codegen_03.cpp b/clang/test/OpenMP/target_map_codegen_03.cpp index f62032f971a61..2a506ee627ab8 100644 --- a/clang/test/OpenMP/target_map_codegen_03.cpp +++ b/clang/test/OpenMP/target_map_codegen_03.cpp @@ -102,7 +102,7 @@ void implicit_maps_nested_integer (int a){ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP12]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -209,7 +209,7 @@ void implicit_maps_nested_integer (int a){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP12]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_codegen_hold.cpp b/clang/test/OpenMP/target_map_codegen_hold.cpp index a46c5fa9a2b5f..fbc9c1818b7d8 100644 --- a/clang/test/OpenMP/target_map_codegen_hold.cpp +++ b/clang/test/OpenMP/target_map_codegen_hold.cpp @@ -249,7 +249,7 @@ void ST::test_present_members() { // CHECK-USE-PPC64LE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-USE-PPC64LE-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK-USE-PPC64LE-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-USE-PPC64LE-NEXT: store i32 8, ptr [[TMP39]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -296,7 +296,7 @@ void ST::test_present_members() { // CHECK-USE-PPC64LE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK-USE-PPC64LE-NEXT: store i32 4, ptr [[TMP61]], align 4 +// CHECK-USE-PPC64LE-NEXT: store i32 5, ptr [[TMP61]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK-USE-PPC64LE-NEXT: store i32 2, ptr [[TMP62]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -429,7 +429,7 @@ void ST::test_present_members() { // CHECK-USE-PPC64LE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-USE-PPC64LE-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-USE-PPC64LE-NEXT: store i32 4, ptr [[TMP20]], align 4 +// CHECK-USE-PPC64LE-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-USE-PPC64LE-NEXT: store i32 4, ptr [[TMP21]], align 4 // CHECK-USE-PPC64LE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -573,7 +573,7 @@ void ST::test_present_members() { // CHECK-USE-I386-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP39:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-USE-I386-NEXT: store i32 4, ptr [[TMP40]], align 4 +// CHECK-USE-I386-NEXT: store i32 5, ptr [[TMP40]], align 4 // CHECK-USE-I386-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-USE-I386-NEXT: store i32 8, ptr [[TMP41]], align 4 // CHECK-USE-I386-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -620,7 +620,7 @@ void ST::test_present_members() { // CHECK-USE-I386-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP62:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK-USE-I386-NEXT: store i32 4, ptr [[TMP63]], align 4 +// CHECK-USE-I386-NEXT: store i32 5, ptr [[TMP63]], align 4 // CHECK-USE-I386-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK-USE-I386-NEXT: store i32 2, ptr [[TMP64]], align 4 // CHECK-USE-I386-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -754,7 +754,7 @@ void ST::test_present_members() { // CHECK-USE-I386-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-USE-I386-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-USE-I386-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK-USE-I386-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK-USE-I386-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-USE-I386-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK-USE-I386-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -896,7 +896,7 @@ void ST::test_present_members() { // CHECK-NOUSE-PPC64LE-NEXT: [[TMP36:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP37:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NOUSE-PPC64LE-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK-NOUSE-PPC64LE-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NOUSE-PPC64LE-NEXT: store i32 8, ptr [[TMP39]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -943,7 +943,7 @@ void ST::test_present_members() { // CHECK-NOUSE-PPC64LE-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP60:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK-NOUSE-PPC64LE-NEXT: store i32 4, ptr [[TMP61]], align 4 +// CHECK-NOUSE-PPC64LE-NEXT: store i32 5, ptr [[TMP61]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK-NOUSE-PPC64LE-NEXT: store i32 2, ptr [[TMP62]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1042,7 +1042,7 @@ void ST::test_present_members() { // CHECK-NOUSE-PPC64LE-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NOUSE-PPC64LE-NEXT: store i32 4, ptr [[TMP20]], align 4 +// CHECK-NOUSE-PPC64LE-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NOUSE-PPC64LE-NEXT: store i32 4, ptr [[TMP21]], align 4 // CHECK-NOUSE-PPC64LE-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1175,7 +1175,7 @@ void ST::test_present_members() { // CHECK-NOUSE-I386-NEXT: [[TMP38:%.*]] = getelementptr inbounds [8 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP39:%.*]] = getelementptr inbounds [8 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NOUSE-I386-NEXT: store i32 4, ptr [[TMP40]], align 4 +// CHECK-NOUSE-I386-NEXT: store i32 5, ptr [[TMP40]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NOUSE-I386-NEXT: store i32 8, ptr [[TMP41]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1222,7 +1222,7 @@ void ST::test_present_members() { // CHECK-NOUSE-I386-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP62:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK-NOUSE-I386-NEXT: store i32 4, ptr [[TMP63]], align 4 +// CHECK-NOUSE-I386-NEXT: store i32 5, ptr [[TMP63]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK-NOUSE-I386-NEXT: store i32 2, ptr [[TMP64]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1322,7 +1322,7 @@ void ST::test_present_members() { // CHECK-NOUSE-I386-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK-NOUSE-I386-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NOUSE-I386-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK-NOUSE-I386-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NOUSE-I386-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK-NOUSE-I386-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_deref_array_codegen.cpp b/clang/test/OpenMP/target_map_deref_array_codegen.cpp index d0288aa8aa9d2..e109aa4914266 100644 --- a/clang/test/OpenMP/target_map_deref_array_codegen.cpp +++ b/clang/test/OpenMP/target_map_deref_array_codegen.cpp @@ -123,7 +123,7 @@ void foo(int **t1d) // CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -188,7 +188,7 @@ void foo(int **t1d) // CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP61]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP61]], align 4 // CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK-NEXT: store i32 4, ptr [[TMP62]], align 4 // CHECK-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -282,7 +282,7 @@ void foo(int **t1d) // CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS12]], i32 0, i32 0 // CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS13]], i32 0, i32 0 // CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK-NEXT: store i32 6, ptr [[TMP110]], align 4 // CHECK-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_map_member_expr_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_codegen.cpp index f2d07ca474138..14b7dce325e79 100644 --- a/clang/test/OpenMP/target_map_member_expr_codegen.cpp +++ b/clang/test/OpenMP/target_map_member_expr_codegen.cpp @@ -168,7 +168,7 @@ void foo() { // CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP17]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP17]], align 4 // CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -333,7 +333,7 @@ void foo() { // CHECK-NEXT: [[TMP55:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS9]], i32 0, i32 0 // CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS10]], i32 0, i32 0 // CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 3, ptr [[TMP58]], align 4 // CHECK-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -426,7 +426,7 @@ void foo() { // CHECK-NEXT: [[TMP105:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [5 x i64], ptr [[DOTOFFLOAD_SIZES22]], i32 0, i32 0 // CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_offload_mandatory_codegen.cpp b/clang/test/OpenMP/target_offload_mandatory_codegen.cpp index 7ce4997f8eadb..458a5c4256be6 100644 --- a/clang/test/OpenMP/target_offload_mandatory_codegen.cpp +++ b/clang/test/OpenMP/target_offload_mandatory_codegen.cpp @@ -44,7 +44,7 @@ void host_dev(int device) { // MANDATORY-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // MANDATORY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // MANDATORY-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// MANDATORY-NEXT: store i32 4, ptr [[TMP5]], align 4 +// MANDATORY-NEXT: store i32 5, ptr [[TMP5]], align 4 // MANDATORY-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // MANDATORY-NEXT: store i32 1, ptr [[TMP6]], align 4 // MANDATORY-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -101,7 +101,7 @@ void host_dev(int device) { // MANDATORY-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // MANDATORY-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // MANDATORY-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// MANDATORY-NEXT: store i32 4, ptr [[TMP6]], align 4 +// MANDATORY-NEXT: store i32 5, ptr [[TMP6]], align 4 // MANDATORY-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // MANDATORY-NEXT: store i32 1, ptr [[TMP7]], align 4 // MANDATORY-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -162,7 +162,7 @@ void host_dev(int device) { // MANDATORY-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // MANDATORY-NEXT: [[TMP7:%.*]] = sext i32 [[TMP6]] to i64 // MANDATORY-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// MANDATORY-NEXT: store i32 4, ptr [[TMP8]], align 4 +// MANDATORY-NEXT: store i32 5, ptr [[TMP8]], align 4 // MANDATORY-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // MANDATORY-NEXT: store i32 1, ptr [[TMP9]], align 4 // MANDATORY-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp index 93eb89f0c330c..af8c1e223705e 100644 --- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp +++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp @@ -262,7 +262,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -310,7 +310,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -426,7 +426,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -528,7 +528,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -602,7 +602,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK1-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1007,7 +1007,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1197,7 +1197,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1245,7 +1245,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1361,7 +1361,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP28:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[TMP29:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP23]], 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1463,7 +1463,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1537,7 +1537,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK3-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1938,7 +1938,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index b81cc894f4f5e..266da5f073912 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -380,7 +380,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -443,7 +443,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP62]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP62]], align 4 // CHECK1-NEXT: [[TMP63:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP63]], align 4 // CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -559,7 +559,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP116:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 // CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP118]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP118]], align 4 // CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP119]], align 4 // CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -686,7 +686,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META27]] -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META27]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1058,7 +1058,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1172,7 +1172,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1266,7 +1266,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1578,7 +1578,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP29]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP29]], align 4 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP30]], align 4 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1641,7 +1641,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS7]], i32 0, i32 0 // CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS10]], i32 0, i32 2 @@ -1759,7 +1759,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP116:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP118]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP118]], align 4 // CHECK3-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP119]], align 4 // CHECK3-NEXT: [[TMP120:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -1886,7 +1886,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META28]] -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -2258,7 +2258,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2372,7 +2372,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2466,7 +2466,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index da8251b7b8046..d585eb9cbd36e 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -377,7 +377,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP12]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP12]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP13]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -510,7 +510,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP84]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP84]], align 4 // CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP85]], align 4 // CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -637,7 +637,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK1-NEXT: store i32 11, ptr [[TMP147]], align 4 // CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -1083,7 +1083,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias [[META30]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META30]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META30]] // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP18]], align 4, !noalias [[META30]] // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1521,7 +1521,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1635,7 +1635,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1729,7 +1729,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2107,7 +2107,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2237,7 +2237,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP80]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP80]], align 4 // CHECK3-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP81]], align 4 // CHECK3-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -2366,7 +2366,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP142:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP143:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP144]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP144]], align 4 // CHECK3-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK3-NEXT: store i32 11, ptr [[TMP145]], align 4 // CHECK3-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -2810,7 +2810,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP18]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -3248,7 +3248,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3362,7 +3362,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3456,7 +3456,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5489,7 +5489,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP12]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP12]], align 4 // CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 1, ptr [[TMP13]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5622,7 +5622,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP82:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK17-NEXT: [[TMP83:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK17-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP84]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP84]], align 4 // CHECK17-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP85]], align 4 // CHECK17-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -5749,7 +5749,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP144:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK17-NEXT: [[TMP145:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK17-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK17-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK17-NEXT: store i32 11, ptr [[TMP147]], align 4 // CHECK17-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -6195,7 +6195,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias [[META30]] // CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 -// CHECK17-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META30]] +// CHECK17-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META30]] // CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP18]], align 4, !noalias [[META30]] // CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -6633,7 +6633,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK17-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6747,7 +6747,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK17-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6841,7 +6841,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7219,7 +7219,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 1, ptr [[TMP11]], align 4 // CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7349,7 +7349,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP78:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK19-NEXT: [[TMP79:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK19-NEXT: [[TMP80:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP80]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP80]], align 4 // CHECK19-NEXT: [[TMP81:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP81]], align 4 // CHECK19-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -7478,7 +7478,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP142:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK19-NEXT: [[TMP143:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK19-NEXT: [[TMP144:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP144]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP144]], align 4 // CHECK19-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK19-NEXT: store i32 11, ptr [[TMP145]], align 4 // CHECK19-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -7922,7 +7922,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON:%.*]], ptr [[TMP9]], i32 0, i32 1 // CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 -// CHECK19-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK19-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP18]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -8360,7 +8360,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK19-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8474,7 +8474,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK19-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8568,7 +8568,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index fdd18d9ed7a8c..5efed69be52be 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -419,7 +419,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -482,7 +482,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP74:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP74]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP74]], align 4 // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP75]], align 4 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -609,7 +609,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP134:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK1-NEXT: [[TMP135:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP136]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP136]], align 4 // CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK1-NEXT: store i32 11, ptr [[TMP137]], align 4 // CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -793,7 +793,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META31]] -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META31]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META31]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META31]] // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1490,7 +1490,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1604,7 +1604,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1698,7 +1698,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2142,7 +2142,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2205,7 +2205,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP70]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP70]], align 4 // CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP71]], align 4 // CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -2334,7 +2334,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP134:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP134]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP134]], align 4 // CHECK3-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK3-NEXT: store i32 11, ptr [[TMP135]], align 4 // CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -2518,7 +2518,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META32]] -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -3213,7 +3213,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3327,7 +3327,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3421,7 +3421,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3871,7 +3871,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK5-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 4, ptr [[TMP44]], align 4 // CHECK5-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3934,7 +3934,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP72:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK5-NEXT: [[TMP73:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK5-NEXT: [[TMP74:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP74]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP74]], align 4 // CHECK5-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK5-NEXT: store i32 3, ptr [[TMP75]], align 4 // CHECK5-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -4061,7 +4061,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP134:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK5-NEXT: [[TMP135:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK5-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP136]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP136]], align 4 // CHECK5-NEXT: [[TMP137:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK5-NEXT: store i32 11, ptr [[TMP137]], align 4 // CHECK5-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -4245,7 +4245,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META31]] -// CHECK5-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META31]] +// CHECK5-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META31]] // CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META31]] // CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -4963,7 +4963,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP39:%.*]] = select i1 [[LOADEDV4]], i32 0, i32 1 // CHECK5-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK5-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 7, ptr [[TMP42]], align 4 // CHECK5-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5077,7 +5077,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5171,7 +5171,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5701,7 +5701,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP37:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 4, ptr [[TMP40]], align 4 // CHECK7-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5764,7 +5764,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP68:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK7-NEXT: [[TMP69:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK7-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP70]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP70]], align 4 // CHECK7-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 1 // CHECK7-NEXT: store i32 3, ptr [[TMP71]], align 4 // CHECK7-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS11]], i32 0, i32 2 @@ -5893,7 +5893,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP132:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS18]], i32 0, i32 0 // CHECK7-NEXT: [[TMP133:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK7-NEXT: [[TMP134:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP134]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP134]], align 4 // CHECK7-NEXT: [[TMP135:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK7-NEXT: store i32 11, ptr [[TMP135]], align 4 // CHECK7-NEXT: [[TMP136:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -6077,7 +6077,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META32]] -// CHECK7-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK7-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK7-NEXT: store i32 1, ptr [[TMP15]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -6793,7 +6793,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP39:%.*]] = select i1 [[LOADEDV4]], i32 0, i32 1 // CHECK7-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK7-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK7-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 7, ptr [[TMP42]], align 4 // CHECK7-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6907,7 +6907,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7001,7 +7001,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp index 22391a9978eae..3f7fa4621411b 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-1.cpp @@ -4217,7 +4217,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4434,7 +4434,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4613,7 +4613,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4764,7 +4764,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4943,7 +4943,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5094,7 +5094,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5273,7 +5273,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5424,7 +5424,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5582,7 +5582,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP11]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP11]], align 4 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5673,7 +5673,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5803,7 +5803,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5874,7 +5874,7 @@ int bar(int a){ // OMP-DEfAULT-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // OMP-DEfAULT-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// OMP-DEfAULT-NEXT: store i32 4, ptr [[TMP12]], align 4 +// OMP-DEfAULT-NEXT: store i32 5, ptr [[TMP12]], align 4 // OMP-DEfAULT-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // OMP-DEfAULT-NEXT: store i32 2, ptr [[TMP13]], align 4 // OMP-DEfAULT-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp index c8402211b7f8b..30d5ae3089a6b 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp @@ -94,7 +94,7 @@ int nested(int a){ // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -322,7 +322,7 @@ int nested(int a){ // CHECK-X86-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-X86-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-X86-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-X86-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK-X86-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK-X86-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-X86-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK-X86-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp index 5c3419369e8f3..279cf1a40d4ac 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_uses_allocators_codegen.cpp @@ -96,7 +96,7 @@ void foo() { // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp index 6e124f13cc7bc..0917f4289d5c3 100644 --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -276,7 +276,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP20:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -347,7 +347,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP53:%.*]] = select i1 [[LOADEDV15]], i32 0, i32 1 // CHECK1-NEXT: [[TMP54:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP53]], 0 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP55]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP56]], align 4 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -437,7 +437,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP13:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -488,7 +488,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP36]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP37]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 2 @@ -567,7 +567,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -627,7 +627,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP40]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP41]], align 4 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -991,7 +991,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1062,7 +1062,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP53:%.*]] = select i1 [[LOADEDV15]], i32 0, i32 1 // CHECK3-NEXT: [[TMP54:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP53]], 0 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP55]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP56]], align 4 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS16]], i32 0, i32 2 @@ -1152,7 +1152,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP13:%.*]] = select i1 [[LOADEDV3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1203,7 +1203,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS7]], i32 0, i32 0 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP37]], align 4 // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS9]], i32 0, i32 2 @@ -1282,7 +1282,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1342,7 +1342,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP40]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP41]], align 4 // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp index d6e084c3b78aa..2878e37e6eec3 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -277,7 +277,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -325,7 +325,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -402,7 +402,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP11]], 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -457,7 +457,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP42]], align 4 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -523,7 +523,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -597,7 +597,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = zext i16 [[TMP41]] to i32 // CHECK1-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -908,7 +908,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -956,7 +956,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1033,7 +1033,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP11]], 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1088,7 +1088,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP42]], align 4 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1154,7 +1154,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1228,7 +1228,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = zext i16 [[TMP41]] to i32 // CHECK3-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp index 29cc5184d8372..b5b44fd50a7dd 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp @@ -153,7 +153,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -217,7 +217,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP47]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP47]], align 4 // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP48]], align 4 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -346,7 +346,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -401,7 +401,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP44]], 0 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP46]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP46]], align 4 // CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP47]], align 4 // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -643,7 +643,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -707,7 +707,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP47]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP47]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP48]], align 4 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -836,7 +836,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP17:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP18]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP18]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -891,7 +891,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP44:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_2]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP44]], 0 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP46]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP46]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP47]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_task_affinity_codegen.cpp b/clang/test/OpenMP/target_task_affinity_codegen.cpp index 8361f11394aea..23e2962583250 100644 --- a/clang/test/OpenMP/target_task_affinity_codegen.cpp +++ b/clang/test/OpenMP/target_task_affinity_codegen.cpp @@ -163,7 +163,7 @@ int main() { // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP36]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP37]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -406,7 +406,7 @@ int main() { // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP36]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP37]], align 4 // CHECK3-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index eb59d83d71391..441e3634fe989 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -489,7 +489,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP59]], align 4 // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -552,7 +552,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP89]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP89]], align 4 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP90]], align 4 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 2 @@ -616,7 +616,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP117:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0 // CHECK1-NEXT: [[TMP118:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 0 // CHECK1-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP119]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP119]], align 4 // CHECK1-NEXT: [[TMP120:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP120]], align 4 // CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 2 @@ -675,7 +675,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 // CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 // CHECK1-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP150]], align 4 // CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 @@ -734,7 +734,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP177:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 // CHECK1-NEXT: [[TMP178:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 // CHECK1-NEXT: [[TMP179:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP179]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP179]], align 4 // CHECK1-NEXT: [[TMP180:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP180]], align 4 // CHECK1-NEXT: [[TMP181:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 2 @@ -845,7 +845,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP233:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP234:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP235:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP235]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP235]], align 4 // CHECK1-NEXT: [[TMP236:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP236]], align 4 // CHECK1-NEXT: [[TMP237:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -901,7 +901,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP258:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 // CHECK1-NEXT: [[TMP259:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 // CHECK1-NEXT: [[TMP260:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP260]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP260]], align 4 // CHECK1-NEXT: [[TMP261:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP261]], align 4 // CHECK1-NEXT: [[TMP262:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -951,7 +951,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP283:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 // CHECK1-NEXT: [[TMP284:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 // CHECK1-NEXT: [[TMP285:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP285]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP285]], align 4 // CHECK1-NEXT: [[TMP286:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP286]], align 4 // CHECK1-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -1105,7 +1105,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META39]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META39]] // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4, !noalias [[META39]] // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1613,7 +1613,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1786,7 +1786,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1900,7 +1900,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1994,7 +1994,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2386,7 +2386,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP56]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP56]], align 4 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP57]], align 4 // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2449,7 +2449,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP87]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP87]], align 4 // CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP88]], align 4 // CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS13]], i32 0, i32 2 @@ -2513,7 +2513,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP115:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP116:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS19]], i32 0, i32 0 // CHECK3-NEXT: [[TMP117:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP117]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP117]], align 4 // CHECK3-NEXT: [[TMP118:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP118]], align 4 // CHECK3-NEXT: [[TMP119:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS21]], i32 0, i32 2 @@ -2572,7 +2572,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP145:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS26]], i32 0, i32 0 // CHECK3-NEXT: [[TMP146:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS27]], i32 0, i32 0 // CHECK3-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP147]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP147]], align 4 // CHECK3-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP148]], align 4 // CHECK3-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS29]], i32 0, i32 2 @@ -2631,7 +2631,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP175:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS34]], i32 0, i32 0 // CHECK3-NEXT: [[TMP176:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS35]], i32 0, i32 0 // CHECK3-NEXT: [[TMP177:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP177]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP177]], align 4 // CHECK3-NEXT: [[TMP178:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP178]], align 4 // CHECK3-NEXT: [[TMP179:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS37]], i32 0, i32 2 @@ -2744,7 +2744,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP233:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK3-NEXT: [[TMP234:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP235:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP235]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP235]], align 4 // CHECK3-NEXT: [[TMP236:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP236]], align 4 // CHECK3-NEXT: [[TMP237:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -2800,7 +2800,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP258:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS51]], i32 0, i32 0 // CHECK3-NEXT: [[TMP259:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS52]], i32 0, i32 0 // CHECK3-NEXT: [[TMP260:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP260]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP260]], align 4 // CHECK3-NEXT: [[TMP261:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP261]], align 4 // CHECK3-NEXT: [[TMP262:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -2850,7 +2850,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP283:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS58]], i32 0, i32 0 // CHECK3-NEXT: [[TMP284:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS59]], i32 0, i32 0 // CHECK3-NEXT: [[TMP285:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP285]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP285]], align 4 // CHECK3-NEXT: [[TMP286:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP286]], align 4 // CHECK3-NEXT: [[TMP287:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -3004,7 +3004,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META40]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META40]] // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4, !noalias [[META40]] // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -3511,7 +3511,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3684,7 +3684,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3798,7 +3798,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3892,7 +3892,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index 3bcb621cd798e..41ce9b1ff4c35 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -439,7 +439,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP59]], align 4 // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -502,7 +502,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP89]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP89]], align 4 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP90]], align 4 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -629,7 +629,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP149:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK1-NEXT: [[TMP150:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP151]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP151]], align 4 // CHECK1-NEXT: [[TMP152:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK1-NEXT: store i32 11, ptr [[TMP152]], align 4 // CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -838,7 +838,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META27]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4, !noalias [[META27]] // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1437,7 +1437,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1580,7 +1580,7 @@ int bar(int n){ // CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK1-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1675,7 +1675,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2233,7 +2233,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP56]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP56]], align 4 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP57]], align 4 // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2296,7 +2296,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP87]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP87]], align 4 // CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP88]], align 4 // CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -2425,7 +2425,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP149:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK3-NEXT: [[TMP150:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP151]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP151]], align 4 // CHECK3-NEXT: [[TMP152:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK3-NEXT: store i32 11, ptr [[TMP152]], align 4 // CHECK3-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -2634,7 +2634,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META28]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -3233,7 +3233,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3376,7 +3376,7 @@ int bar(int n){ // CHECK3-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK3-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3471,7 +3471,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp index 579f61a5f6b16..243f5440f9e4f 100644 --- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp @@ -130,7 +130,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -299,7 +299,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -536,7 +536,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -780,7 +780,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1013,7 +1013,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1255,7 +1255,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp index 016e85d442776..974403e8c4a9c 100644 --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -167,7 +167,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -215,7 +215,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -263,7 +263,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -603,7 +603,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -651,7 +651,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -699,7 +699,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1084,7 +1084,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1161,7 +1161,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1249,7 +1249,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK9-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK9-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -1697,7 +1697,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1744,7 +1744,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1791,7 +1791,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -2173,7 +2173,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2251,7 +2251,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2340,7 +2340,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK11-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2785,7 +2785,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2832,7 +2832,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2879,7 +2879,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp index b23dee677e662..fd2f0af36decc 100644 --- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp @@ -309,7 +309,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -597,7 +597,7 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1126,7 +1126,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1412,7 +1412,7 @@ int main() { // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp index 1749f706c272c..b867cc46957a9 100644 --- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp @@ -531,7 +531,7 @@ int main() { // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -853,7 +853,7 @@ int main() { // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1246,7 +1246,7 @@ int main() { // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1566,7 +1566,7 @@ int main() { // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index b4911fb9aa59f..6f239e97533fb 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -663,7 +663,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP32:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK2-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0 // CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -736,7 +736,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP68]], 1 // CHECK2-NEXT: [[TMP69:%.*]] = zext i32 [[ADD17]] to i64 // CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP70]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP70]], align 4 // CHECK2-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK2-NEXT: store i32 4, ptr [[TMP71]], align 4 // CHECK2-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 @@ -1321,7 +1321,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP32:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK4-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP27]], 0 // CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK4-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK4-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK4-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1394,7 +1394,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP68]], 1 // CHECK4-NEXT: [[TMP69:%.*]] = zext i32 [[ADD17]] to i64 // CHECK4-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK4-NEXT: store i32 4, ptr [[TMP70]], align 4 +// CHECK4-NEXT: store i32 5, ptr [[TMP70]], align 4 // CHECK4-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK4-NEXT: store i32 4, ptr [[TMP71]], align 4 // CHECK4-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp index 9219d6a08d37c..9b1ad8e520ef6 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp @@ -135,7 +135,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -379,7 +379,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -687,7 +687,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1057,7 +1057,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1365,7 +1365,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1737,7 +1737,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp index 37c8f13b47ee5..b4a1fe7092ad3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -179,7 +179,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -227,7 +227,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -275,7 +275,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -837,7 +837,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -885,7 +885,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -933,7 +933,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1530,7 +1530,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1607,7 +1607,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1695,7 +1695,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK9-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK9-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2472,7 +2472,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2519,7 +2519,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2577,7 +2577,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK9-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK9-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3198,7 +3198,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3276,7 +3276,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3365,7 +3365,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK11-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -4127,7 +4127,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4174,7 +4174,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4232,7 +4232,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK11-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK11-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 6a7d146356a78..29941c9458feb 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -370,7 +370,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -788,7 +788,7 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1447,7 +1447,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1859,7 +1859,7 @@ int main() { // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp index 59157fa822c39..696f7a3efb34a 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp @@ -127,7 +127,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -168,7 +168,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -513,7 +513,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -578,7 +578,7 @@ int main() { // CHECK1-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -1095,7 +1095,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1156,7 +1156,7 @@ int main() { // CHECK1-NEXT: [[TMP32:%.*]] = select i1 [[LOADEDV5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp index 0fd9b03accb3b..1e96565e926b5 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -792,7 +792,7 @@ int main() { // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1268,7 +1268,7 @@ int main() { // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1806,7 +1806,7 @@ int main() { // CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2276,7 +2276,7 @@ int main() { // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp index b1d9ff0dbf7df..a6eac01c8fad3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -38,7 +38,7 @@ void gtid_test() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp index 1c095825d995d..1a3a09b0deac0 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -594,7 +594,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1070,7 +1070,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1346,7 +1346,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp index 32cb612c23307..225a0640ff08f 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -72,7 +72,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -113,7 +113,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -446,7 +446,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp index c87a7523d9a4e..5c93a9f4ef702 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp @@ -115,7 +115,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -414,7 +414,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -710,7 +710,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1005,7 +1005,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index 523b5f23550df..dc167f4d31acc 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -249,7 +249,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -297,7 +297,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -345,7 +345,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -393,7 +393,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -441,7 +441,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -1318,7 +1318,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1366,7 +1366,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1414,7 +1414,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1462,7 +1462,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -1510,7 +1510,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2360,7 +2360,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2408,7 +2408,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -2456,7 +2456,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK5-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2504,7 +2504,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK5-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK5-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK5-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK5-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2552,7 +2552,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK5-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK5-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK5-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK5-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -3429,7 +3429,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3477,7 +3477,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -3525,7 +3525,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK7-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK7-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK7-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK7-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3573,7 +3573,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK7-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK7-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK7-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK7-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3621,7 +3621,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK7-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK7-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK7-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK7-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -4531,7 +4531,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4608,7 +4608,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK13-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK13-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK13-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -4696,7 +4696,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK13-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -4773,7 +4773,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP144]], 1 // CHECK13-NEXT: [[TMP145:%.*]] = zext i32 [[ADD45]] to i64 // CHECK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP147]], align 4 // CHECK13-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -4861,7 +4861,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP188]], 1 // CHECK13-NEXT: [[TMP189:%.*]] = zext i32 [[ADD62]] to i64 // CHECK13-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK13-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP191]], align 4 // CHECK13-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -6101,7 +6101,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6148,7 +6148,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6206,7 +6206,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK13-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK13-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -6253,7 +6253,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK13-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK13-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -6311,7 +6311,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK13-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK13-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK13-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -7270,7 +7270,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7348,7 +7348,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK15-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK15-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK15-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK15-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -7437,7 +7437,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK15-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK15-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -7515,7 +7515,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP147]], 1 // CHECK15-NEXT: [[TMP148:%.*]] = zext i32 [[ADD45]] to i64 // CHECK15-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK15-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP150]], align 4 // CHECK15-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -7604,7 +7604,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP192]], 1 // CHECK15-NEXT: [[TMP193:%.*]] = zext i32 [[ADD62]] to i64 // CHECK15-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP194]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP194]], align 4 // CHECK15-NEXT: [[TMP195:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP195]], align 4 // CHECK15-NEXT: [[TMP196:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -8819,7 +8819,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8866,7 +8866,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -8924,7 +8924,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK15-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK15-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK15-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK15-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -8971,7 +8971,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK15-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK15-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK15-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK15-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -9029,7 +9029,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK15-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK15-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK15-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -9961,7 +9961,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -10038,7 +10038,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK17-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK17-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK17-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK17-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -10126,7 +10126,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK17-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK17-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -10203,7 +10203,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP144]], 1 // CHECK17-NEXT: [[TMP145:%.*]] = zext i32 [[ADD45]] to i64 // CHECK17-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK17-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP147]], align 4 // CHECK17-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -10291,7 +10291,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP188]], 1 // CHECK17-NEXT: [[TMP189:%.*]] = zext i32 [[ADD62]] to i64 // CHECK17-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK17-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP191]], align 4 // CHECK17-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -11531,7 +11531,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -11578,7 +11578,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -11636,7 +11636,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK17-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK17-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK17-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK17-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -11683,7 +11683,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK17-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK17-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK17-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK17-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -11741,7 +11741,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK17-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK17-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK17-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK17-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -12700,7 +12700,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK19-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -12778,7 +12778,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK19-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK19-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK19-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK19-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -12867,7 +12867,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK19-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -12945,7 +12945,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP147]], 1 // CHECK19-NEXT: [[TMP148:%.*]] = zext i32 [[ADD45]] to i64 // CHECK19-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK19-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP150]], align 4 // CHECK19-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -13034,7 +13034,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP192]], 1 // CHECK19-NEXT: [[TMP193:%.*]] = zext i32 [[ADD62]] to i64 // CHECK19-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP194]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP194]], align 4 // CHECK19-NEXT: [[TMP195:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP195]], align 4 // CHECK19-NEXT: [[TMP196:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -14249,7 +14249,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -14296,7 +14296,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -14354,7 +14354,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK19-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK19-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK19-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK19-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -14401,7 +14401,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK19-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK19-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK19-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK19-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -14459,7 +14459,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK19-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK19-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index 3d3974681c40f..24218a15b23a2 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -187,7 +187,7 @@ void test_target_teams_atomic() { // CHECK1-NEXT: [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP31]], 0 // CHECK1-NEXT: [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -260,7 +260,7 @@ void test_target_teams_atomic() { // CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP73]], 1 // CHECK1-NEXT: [[TMP74:%.*]] = zext i32 [[ADD17]] to i64 // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP75]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP75]], align 4 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP76]], align 4 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 @@ -843,7 +843,7 @@ void test_target_teams_atomic() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1146,7 +1146,7 @@ void test_target_teams_atomic() { // CHECK3-NEXT: [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP31]], 0 // CHECK3-NEXT: [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1219,7 +1219,7 @@ void test_target_teams_atomic() { // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP73]], 1 // CHECK3-NEXT: [[TMP74:%.*]] = zext i32 [[ADD17]] to i64 // CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP75]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP75]], align 4 // CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP76]], align 4 // CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 @@ -1792,7 +1792,7 @@ void test_target_teams_atomic() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp index 5034f8b2ef388..9f34f9d737b97 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -135,7 +135,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -395,7 +395,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -859,7 +859,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1265,7 +1265,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1589,7 +1589,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1997,7 +1997,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index ea323ecb19e67..c60100cc2072e 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -179,7 +179,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -227,7 +227,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -275,7 +275,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -879,7 +879,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -927,7 +927,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -975,7 +975,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1861,7 +1861,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1938,7 +1938,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2026,7 +2026,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK9-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK9-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2875,7 +2875,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2922,7 +2922,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2980,7 +2980,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK9-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK9-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3643,7 +3643,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3721,7 +3721,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3810,7 +3810,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK11-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -4644,7 +4644,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4691,7 +4691,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4749,7 +4749,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK11-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK11-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index a57d13a7b2966..4bd2f911aa476 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -368,7 +368,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -800,7 +800,7 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1473,7 +1473,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1899,7 +1899,7 @@ int main() { // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index f3c56508afe18..176b4c95d2593 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -139,7 +139,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -180,7 +180,7 @@ int main() { // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -568,7 +568,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -633,7 +633,7 @@ int main() { // CHECK1-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -1192,7 +1192,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1253,7 +1253,7 @@ int main() { // CHECK1-NEXT: [[TMP32:%.*]] = select i1 [[LOADEDV5]], i32 0, i32 1 // CHECK1-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1811,7 +1811,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1852,7 +1852,7 @@ int main() { // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2240,7 +2240,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2305,7 +2305,7 @@ int main() { // CHECK3-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK3-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -3094,7 +3094,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3155,7 +3155,7 @@ int main() { // CHECK3-NEXT: [[TMP32:%.*]] = select i1 [[LOADEDV5]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -4306,7 +4306,7 @@ int main() { // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4347,7 +4347,7 @@ int main() { // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4735,7 +4735,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4800,7 +4800,7 @@ int main() { // CHECK9-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK9-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK9-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -5359,7 +5359,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5420,7 +5420,7 @@ int main() { // CHECK9-NEXT: [[TMP32:%.*]] = select i1 [[LOADEDV5]], i32 0, i32 1 // CHECK9-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK9-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK9-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK9-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -5978,7 +5978,7 @@ int main() { // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6019,7 +6019,7 @@ int main() { // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP30]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP30]], align 4 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6407,7 +6407,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6472,7 +6472,7 @@ int main() { // CHECK11-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK11-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK11-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -7261,7 +7261,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7322,7 +7322,7 @@ int main() { // CHECK11-NEXT: [[TMP32:%.*]] = select i1 [[LOADEDV5]], i32 0, i32 1 // CHECK11-NEXT: [[TMP33:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK11-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP35]], align 4 // CHECK11-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp index b17d2b86c7350..d6ac98498de25 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -820,7 +820,7 @@ int main() { // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1310,7 +1310,7 @@ int main() { // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1862,7 +1862,7 @@ int main() { // CHECK7-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK7-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK7-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2346,7 +2346,7 @@ int main() { // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK7-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp index 9e6fee05efb36..b975f393e3666 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -608,7 +608,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1098,7 +1098,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1388,7 +1388,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp index 86873edae4ebb..0c326ec8eab24 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -72,7 +72,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -113,7 +113,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -474,7 +474,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp index f4d3a3da9140c..a7f5d24a52586 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -115,7 +115,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -428,7 +428,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -738,7 +738,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1047,7 +1047,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index 10421dfd4aba7..2b6783b5be9b3 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -249,7 +249,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -297,7 +297,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -345,7 +345,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -393,7 +393,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -441,7 +441,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -1388,7 +1388,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1436,7 +1436,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1484,7 +1484,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1532,7 +1532,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -1580,7 +1580,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2500,7 +2500,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2548,7 +2548,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -2596,7 +2596,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK5-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2644,7 +2644,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK5-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK5-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK5-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK5-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2692,7 +2692,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK5-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK5-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK5-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK5-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -3639,7 +3639,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3687,7 +3687,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -3735,7 +3735,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK7-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK7-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK7-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK7-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3783,7 +3783,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK7-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK7-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK7-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK7-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3831,7 +3831,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK7-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK7-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK7-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK7-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -5196,7 +5196,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5273,7 +5273,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK13-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK13-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK13-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -5361,7 +5361,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK13-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -5438,7 +5438,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP144]], 1 // CHECK13-NEXT: [[TMP145:%.*]] = zext i32 [[ADD45]] to i64 // CHECK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP147]], align 4 // CHECK13-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -5526,7 +5526,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP188]], 1 // CHECK13-NEXT: [[TMP189:%.*]] = zext i32 [[ADD62]] to i64 // CHECK13-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK13-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP191]], align 4 // CHECK13-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -6886,7 +6886,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6933,7 +6933,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6991,7 +6991,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK13-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK13-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -7038,7 +7038,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK13-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK13-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -7096,7 +7096,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK13-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK13-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK13-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -8125,7 +8125,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8203,7 +8203,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK15-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK15-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK15-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK15-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -8292,7 +8292,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK15-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK15-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -8370,7 +8370,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP147]], 1 // CHECK15-NEXT: [[TMP148:%.*]] = zext i32 [[ADD45]] to i64 // CHECK15-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK15-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP150]], align 4 // CHECK15-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -8459,7 +8459,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP192]], 1 // CHECK15-NEXT: [[TMP193:%.*]] = zext i32 [[ADD62]] to i64 // CHECK15-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP194]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP194]], align 4 // CHECK15-NEXT: [[TMP195:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP195]], align 4 // CHECK15-NEXT: [[TMP196:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -9794,7 +9794,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9841,7 +9841,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -9899,7 +9899,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK15-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK15-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK15-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK15-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -9946,7 +9946,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK15-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK15-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK15-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK15-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -10004,7 +10004,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK15-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK15-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK15-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -11006,7 +11006,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -11083,7 +11083,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK17-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK17-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK17-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK17-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -11171,7 +11171,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK17-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK17-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -11248,7 +11248,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP144]], 1 // CHECK17-NEXT: [[TMP145:%.*]] = zext i32 [[ADD45]] to i64 // CHECK17-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK17-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP147]], align 4 // CHECK17-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -11336,7 +11336,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP188]], 1 // CHECK17-NEXT: [[TMP189:%.*]] = zext i32 [[ADD62]] to i64 // CHECK17-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK17-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP191]], align 4 // CHECK17-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -12696,7 +12696,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -12743,7 +12743,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -12801,7 +12801,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK17-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK17-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK17-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK17-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -12848,7 +12848,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK17-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK17-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK17-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK17-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -12906,7 +12906,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK17-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK17-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK17-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK17-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -13935,7 +13935,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK19-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -14013,7 +14013,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK19-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK19-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK19-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK19-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -14102,7 +14102,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK19-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -14180,7 +14180,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD45:%.*]] = add nsw i32 [[TMP147]], 1 // CHECK19-NEXT: [[TMP148:%.*]] = zext i32 [[ADD45]] to i64 // CHECK19-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK19-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP150]], align 4 // CHECK19-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS46]], i32 0, i32 2 @@ -14269,7 +14269,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD62:%.*]] = add nsw i32 [[TMP192]], 1 // CHECK19-NEXT: [[TMP193:%.*]] = zext i32 [[ADD62]] to i64 // CHECK19-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP194]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP194]], align 4 // CHECK19-NEXT: [[TMP195:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP195]], align 4 // CHECK19-NEXT: [[TMP196:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS63]], i32 0, i32 2 @@ -15604,7 +15604,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -15651,7 +15651,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -15709,7 +15709,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK19-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK19-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK19-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK19-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -15756,7 +15756,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK19-NEXT: [[TMP82:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK19-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP83]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP83]], align 4 // CHECK19-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP84]], align 4 // CHECK19-NEXT: [[TMP85:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -15814,7 +15814,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS24]], i32 0, i32 0 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS25]], i32 0, i32 0 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK19-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK19-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp index f0782fbee3776..78d3ba4f2e1ec 100644 --- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp @@ -251,7 +251,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -432,7 +432,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -804,7 +804,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -983,7 +983,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp index 3131c5aa405f7..7067a0f219e8f 100644 --- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp @@ -341,7 +341,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -388,7 +388,7 @@ int main() { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -436,7 +436,7 @@ int main() { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -484,7 +484,7 @@ int main() { // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -532,7 +532,7 @@ int main() { // CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -580,7 +580,7 @@ int main() { // CHECK1-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 // CHECK1-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 // CHECK1-NEXT: [[TMP123:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP123]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP123]], align 4 // CHECK1-NEXT: [[TMP124:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP124]], align 4 // CHECK1-NEXT: [[TMP125:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 2 @@ -627,7 +627,7 @@ int main() { // CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0 // CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0 // CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP147]], align 4 // CHECK1-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 2 @@ -674,7 +674,7 @@ int main() { // CHECK1-NEXT: [[TMP167:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 // CHECK1-NEXT: [[TMP168:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP169]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP169]], align 4 // CHECK1-NEXT: [[TMP170:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP170]], align 4 // CHECK1-NEXT: [[TMP171:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 2 @@ -722,7 +722,7 @@ int main() { // CHECK1-NEXT: [[TMP190:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0 // CHECK1-NEXT: [[TMP191:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0 // CHECK1-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK1-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP193]], align 4 // CHECK1-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -770,7 +770,7 @@ int main() { // CHECK1-NEXT: [[TMP213:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0 // CHECK1-NEXT: [[TMP214:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0 // CHECK1-NEXT: [[TMP215:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP215]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP215]], align 4 // CHECK1-NEXT: [[TMP216:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP216]], align 4 // CHECK1-NEXT: [[TMP217:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -2265,7 +2265,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2318,7 +2318,7 @@ int main() { // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2372,7 +2372,7 @@ int main() { // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK1-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -2426,7 +2426,7 @@ int main() { // CHECK1-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK1-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK1-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP86]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP86]], align 4 // CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP87]], align 4 // CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -2480,7 +2480,7 @@ int main() { // CHECK1-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK1-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK1-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK1-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -2534,7 +2534,7 @@ int main() { // CHECK1-NEXT: [[TMP136:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 // CHECK1-NEXT: [[TMP137:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 // CHECK1-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP138]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP138]], align 4 // CHECK1-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP139]], align 4 // CHECK1-NEXT: [[TMP140:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 2 @@ -2587,7 +2587,7 @@ int main() { // CHECK1-NEXT: [[TMP162:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0 // CHECK1-NEXT: [[TMP163:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0 // CHECK1-NEXT: [[TMP164:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP164]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP164]], align 4 // CHECK1-NEXT: [[TMP165:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP165]], align 4 // CHECK1-NEXT: [[TMP166:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 2 @@ -2640,7 +2640,7 @@ int main() { // CHECK1-NEXT: [[TMP188:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 // CHECK1-NEXT: [[TMP189:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK1-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK1-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP191]], align 4 // CHECK1-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 2 @@ -2694,7 +2694,7 @@ int main() { // CHECK1-NEXT: [[TMP214:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0 // CHECK1-NEXT: [[TMP215:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0 // CHECK1-NEXT: [[TMP216:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP216]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP216]], align 4 // CHECK1-NEXT: [[TMP217:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP217]], align 4 // CHECK1-NEXT: [[TMP218:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -2748,7 +2748,7 @@ int main() { // CHECK1-NEXT: [[TMP240:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0 // CHECK1-NEXT: [[TMP241:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0 // CHECK1-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP242]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK1-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP243]], align 4 // CHECK1-NEXT: [[TMP244:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -4329,7 +4329,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4376,7 +4376,7 @@ int main() { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4424,7 +4424,7 @@ int main() { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -4472,7 +4472,7 @@ int main() { // CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -4520,7 +4520,7 @@ int main() { // CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -4568,7 +4568,7 @@ int main() { // CHECK3-NEXT: [[TMP121:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 // CHECK3-NEXT: [[TMP122:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 // CHECK3-NEXT: [[TMP123:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP123]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP123]], align 4 // CHECK3-NEXT: [[TMP124:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP124]], align 4 // CHECK3-NEXT: [[TMP125:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 2 @@ -4615,7 +4615,7 @@ int main() { // CHECK3-NEXT: [[TMP144:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0 // CHECK3-NEXT: [[TMP145:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0 // CHECK3-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP146]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP146]], align 4 // CHECK3-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP147]], align 4 // CHECK3-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 2 @@ -4662,7 +4662,7 @@ int main() { // CHECK3-NEXT: [[TMP167:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 // CHECK3-NEXT: [[TMP168:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK3-NEXT: [[TMP169:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP169]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP169]], align 4 // CHECK3-NEXT: [[TMP170:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP170]], align 4 // CHECK3-NEXT: [[TMP171:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 2 @@ -4710,7 +4710,7 @@ int main() { // CHECK3-NEXT: [[TMP190:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0 // CHECK3-NEXT: [[TMP191:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0 // CHECK3-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK3-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP193]], align 4 // CHECK3-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -4758,7 +4758,7 @@ int main() { // CHECK3-NEXT: [[TMP213:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0 // CHECK3-NEXT: [[TMP214:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0 // CHECK3-NEXT: [[TMP215:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP215]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP215]], align 4 // CHECK3-NEXT: [[TMP216:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP216]], align 4 // CHECK3-NEXT: [[TMP217:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -6253,7 +6253,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6306,7 +6306,7 @@ int main() { // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP34]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP34]], align 4 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6360,7 +6360,7 @@ int main() { // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK3-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP60]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP60]], align 4 // CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP61]], align 4 // CHECK3-NEXT: [[TMP62:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -6414,7 +6414,7 @@ int main() { // CHECK3-NEXT: [[TMP84:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP86]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP86]], align 4 // CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP87]], align 4 // CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -6468,7 +6468,7 @@ int main() { // CHECK3-NEXT: [[TMP110:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP111:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS23]], i32 0, i32 0 // CHECK3-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP112]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP112]], align 4 // CHECK3-NEXT: [[TMP113:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP113]], align 4 // CHECK3-NEXT: [[TMP114:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS26]], i32 0, i32 2 @@ -6522,7 +6522,7 @@ int main() { // CHECK3-NEXT: [[TMP136:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS29]], i32 0, i32 0 // CHECK3-NEXT: [[TMP137:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS30]], i32 0, i32 0 // CHECK3-NEXT: [[TMP138:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP138]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP138]], align 4 // CHECK3-NEXT: [[TMP139:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP139]], align 4 // CHECK3-NEXT: [[TMP140:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS33]], i32 0, i32 2 @@ -6575,7 +6575,7 @@ int main() { // CHECK3-NEXT: [[TMP162:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS36]], i32 0, i32 0 // CHECK3-NEXT: [[TMP163:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS37]], i32 0, i32 0 // CHECK3-NEXT: [[TMP164:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP164]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP164]], align 4 // CHECK3-NEXT: [[TMP165:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP165]], align 4 // CHECK3-NEXT: [[TMP166:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS40]], i32 0, i32 2 @@ -6628,7 +6628,7 @@ int main() { // CHECK3-NEXT: [[TMP188:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS43]], i32 0, i32 0 // CHECK3-NEXT: [[TMP189:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS44]], i32 0, i32 0 // CHECK3-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP190]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP190]], align 4 // CHECK3-NEXT: [[TMP191:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP191]], align 4 // CHECK3-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS47]], i32 0, i32 2 @@ -6682,7 +6682,7 @@ int main() { // CHECK3-NEXT: [[TMP214:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS50]], i32 0, i32 0 // CHECK3-NEXT: [[TMP215:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS51]], i32 0, i32 0 // CHECK3-NEXT: [[TMP216:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP216]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP216]], align 4 // CHECK3-NEXT: [[TMP217:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP217]], align 4 // CHECK3-NEXT: [[TMP218:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS54]], i32 0, i32 2 @@ -6736,7 +6736,7 @@ int main() { // CHECK3-NEXT: [[TMP240:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS57]], i32 0, i32 0 // CHECK3-NEXT: [[TMP241:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS58]], i32 0, i32 0 // CHECK3-NEXT: [[TMP242:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP242]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP242]], align 4 // CHECK3-NEXT: [[TMP243:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP243]], align 4 // CHECK3-NEXT: [[TMP244:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 89df7bef76a96..038abb4fe564e 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -436,7 +436,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP59]], align 4 // CHECK1-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -499,7 +499,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP89]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP89]], align 4 // CHECK1-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP90]], align 4 // CHECK1-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -615,7 +615,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP143:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK1-NEXT: [[TMP144:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK1-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1 // CHECK1-NEXT: store i32 10, ptr [[TMP146]], align 4 // CHECK1-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 2 @@ -829,7 +829,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK1-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 -// CHECK1-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK1-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4, !noalias [[META32]] // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -1432,7 +1432,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1575,7 +1575,7 @@ int bar(int n){ // CHECK1-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK1-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1670,7 +1670,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2256,7 +2256,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP56]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP56]], align 4 // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP57]], align 4 // CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2319,7 +2319,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP87]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP87]], align 4 // CHECK3-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP88]], align 4 // CHECK3-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -2437,7 +2437,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP143:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK3-NEXT: [[TMP144:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK3-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1 // CHECK3-NEXT: store i32 10, ptr [[TMP146]], align 4 // CHECK3-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 2 @@ -2651,7 +2651,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 -// CHECK3-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK3-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -3254,7 +3254,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [6 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3397,7 +3397,7 @@ int bar(int n){ // CHECK3-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK3-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3492,7 +3492,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4080,7 +4080,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK5-NEXT: [[TMP57:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK5-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK5-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP59]], align 4 // CHECK5-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4143,7 +4143,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP87:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK5-NEXT: [[TMP88:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK5-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP89]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP89]], align 4 // CHECK5-NEXT: [[TMP90:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK5-NEXT: store i32 3, ptr [[TMP90]], align 4 // CHECK5-NEXT: [[TMP91:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -4259,7 +4259,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP143:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK5-NEXT: [[TMP144:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK5-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK5-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1 // CHECK5-NEXT: store i32 10, ptr [[TMP146]], align 4 // CHECK5-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 2 @@ -4473,7 +4473,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK5-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 -// CHECK5-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] +// CHECK5-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META32]] // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK5-NEXT: store i32 4, ptr [[TMP20]], align 4, !noalias [[META32]] // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -5093,7 +5093,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 7, ptr [[TMP39]], align 4 // CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5236,7 +5236,7 @@ int bar(int n){ // CHECK5-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK5-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK5-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5331,7 +5331,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5969,7 +5969,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS5]], i32 0, i32 0 // CHECK7-NEXT: [[TMP55:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS6]], i32 0, i32 0 // CHECK7-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP56]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP56]], align 4 // CHECK7-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP57]], align 4 // CHECK7-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6032,7 +6032,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP85:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS10]], i32 0, i32 0 // CHECK7-NEXT: [[TMP86:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS11]], i32 0, i32 0 // CHECK7-NEXT: [[TMP87:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP87]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP87]], align 4 // CHECK7-NEXT: [[TMP88:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 1 // CHECK7-NEXT: store i32 3, ptr [[TMP88]], align 4 // CHECK7-NEXT: [[TMP89:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS14]], i32 0, i32 2 @@ -6150,7 +6150,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP143:%.*]] = getelementptr inbounds [10 x ptr], ptr [[DOTOFFLOAD_PTRS21]], i32 0, i32 0 // CHECK7-NEXT: [[TMP144:%.*]] = getelementptr inbounds [10 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK7-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK7-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 1 // CHECK7-NEXT: store i32 10, ptr [[TMP146]], align 4 // CHECK7-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS24]], i32 0, i32 2 @@ -6364,7 +6364,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[TMP9]], i32 0, i32 2 // CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 // CHECK7-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 -// CHECK7-NEXT: store i32 4, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] +// CHECK7-NEXT: store i32 5, ptr [[KERNEL_ARGS_I]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1 // CHECK7-NEXT: store i32 4, ptr [[TMP20]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2 @@ -6984,7 +6984,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP37:%.*]] = getelementptr inbounds [7 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK7-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK7-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 7, ptr [[TMP39]], align 4 // CHECK7-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7127,7 +7127,7 @@ int bar(int n){ // CHECK7-NEXT: [[ADD5:%.*]] = add i32 [[TMP33]], 1 // CHECK7-NEXT: [[TMP34:%.*]] = zext i32 [[ADD5]] to i64 // CHECK7-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK7-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 6, ptr [[TMP36]], align 4 // CHECK7-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7222,7 +7222,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 4, ptr [[TMP20]], align 4 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp index a6fe016dd81d1..eeccfc8f54b7d 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp @@ -130,7 +130,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -307,7 +307,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -692,7 +692,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -954,7 +954,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1195,7 +1195,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1455,7 +1455,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp index 05f1b4332999a..2397792c2d990 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -167,7 +167,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -215,7 +215,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -263,7 +263,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -624,7 +624,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -672,7 +672,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -720,7 +720,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1373,7 +1373,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1450,7 +1450,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1538,7 +1538,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP106]], 1 // CHECK9-NEXT: [[TMP107:%.*]] = zext i32 [[ADD30]] to i64 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP108]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK9-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2022,7 +2022,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2069,7 +2069,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2116,7 +2116,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -2519,7 +2519,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2597,7 +2597,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2686,7 +2686,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP108]], 1 // CHECK11-NEXT: [[TMP109:%.*]] = zext i32 [[ADD30]] to i64 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP111]], align 4 // CHECK11-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -3167,7 +3167,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3214,7 +3214,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3261,7 +3261,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp index b2b8e9d13fdea..890d65d36e20a 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -309,7 +309,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -604,7 +604,7 @@ int main() { // CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1140,7 +1140,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1433,7 +1433,7 @@ int main() { // CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp index eaebf274913ba..5318e59ecc51d 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp @@ -581,7 +581,7 @@ int main() { // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -910,7 +910,7 @@ int main() { // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1310,7 +1310,7 @@ int main() { // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1637,7 +1637,7 @@ int main() { // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp index d73523685213c..4e50412785cae 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -251,7 +251,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -439,7 +439,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -818,7 +818,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1004,7 +1004,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp index d76a3b0b068f4..2e59184cea3bb 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -103,7 +103,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -295,7 +295,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -484,7 +484,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -676,7 +676,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp index 7f320d98e1cfb..f897879abd4d7 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp @@ -645,7 +645,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK2-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -718,7 +718,7 @@ int target_teams_fun(int *g){ // CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP66]], 1 // CHECK2-NEXT: [[TMP67:%.*]] = zext i32 [[ADD17]] to i64 // CHECK2-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP68]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK2-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK2-NEXT: store i32 4, ptr [[TMP69]], align 4 // CHECK2-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 @@ -1286,7 +1286,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK4-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK4-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK4-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK4-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1359,7 +1359,7 @@ int target_teams_fun(int *g){ // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP66]], 1 // CHECK4-NEXT: [[TMP67:%.*]] = zext i32 [[ADD17]] to i64 // CHECK4-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 0 -// CHECK4-NEXT: store i32 4, ptr [[TMP68]], align 4 +// CHECK4-NEXT: store i32 5, ptr [[TMP68]], align 4 // CHECK4-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 1 // CHECK4-NEXT: store i32 4, ptr [[TMP69]], align 4 // CHECK4-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS18]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp index 0cc8e46518c9a..aacf464f056dc 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen.cpp @@ -156,8 +156,7 @@ int foo() { // IR-GPU: .omp.lastprivate.done: // IR-GPU-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 // IR-GPU-NEXT: store ptr [[SUM1_ASCAST]], ptr [[TMP20]], align 8 -// IR-GPU-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// IR-GPU-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 400, ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// IR-GPU-NEXT: [[TMP21:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr addrspacecast (ptr addrspace(1) @[[GLOB1]] to ptr), ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr @_omp_reduction_shuffle_and_reduce_func.1, ptr @_omp_reduction_inter_warp_copy_func.2, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) // IR-GPU-NEXT: [[TMP22:%.*]] = icmp eq i32 [[TMP21]], 1 // IR-GPU-NEXT: br i1 [[TMP22]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // IR-GPU: .omp.reduction.then: @@ -647,31 +646,6 @@ int foo() { // IR-GPU-NEXT: ret void // // -// IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// IR-GPU-NEXT: entry: -// IR-GPU-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4, addrspace(5) -// IR-GPU-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8, addrspace(5) -// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8, addrspace(5) -// IR-GPU-NEXT: [[DOTADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR]] to ptr -// IR-GPU-NEXT: [[DOTADDR1_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR1]] to ptr -// IR-GPU-NEXT: [[DOTADDR2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTADDR2]] to ptr -// IR-GPU-NEXT: [[DOTOMP_REDUCTION_RED_LIST_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_REDUCTION_RED_LIST]] to ptr -// IR-GPU-NEXT: store ptr [[TMP0]], ptr [[DOTADDR_ASCAST]], align 8 -// IR-GPU-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1_ASCAST]], align 4 -// IR-GPU-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR_ASCAST]], align 8 -// IR-GPU-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1_ASCAST]], align 4 -// IR-GPU-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], i64 0, i64 0 -// IR-GPU-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// IR-GPU-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// IR-GPU-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 -// IR-GPU-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2_ASCAST]], align 8 -// IR-GPU-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3foov_l22_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST_ASCAST]], ptr [[TMP8]]) #[[ATTR2]] -// IR-GPU-NEXT: ret void -// -// // IR-GPU-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func // IR-GPU-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // IR-GPU-NEXT: entry: diff --git a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp index dc0443816f9ad..b102300f4ddd0 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_collapse_codegen.cpp @@ -135,7 +135,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -379,7 +379,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -687,7 +687,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1057,7 +1057,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1365,7 +1365,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1737,7 +1737,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp index 3e287b92edd64..28a65f12279fe 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_if_codegen.cpp @@ -122,7 +122,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -393,7 +393,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -458,7 +458,7 @@ int main() { // CHECK1-NEXT: [[TMP33:%.*]] = select i1 [[LOADEDV6]], i32 0, i32 1 // CHECK1-NEXT: [[TMP34:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP33]], 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -746,7 +746,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -807,7 +807,7 @@ int main() { // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp index 67a9a73200976..2b429e82c9455 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_order_codegen.cpp @@ -38,7 +38,7 @@ void gtid_test() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp index c45d57894d886..efc5fc6680d99 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -493,7 +493,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -865,7 +865,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1044,7 +1044,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp index c87a7523d9a4e..5c93a9f4ef702 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_reduction_codegen.cpp @@ -115,7 +115,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -414,7 +414,7 @@ int main() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -710,7 +710,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1005,7 +1005,7 @@ int main() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp index 25e0274e08a51..b78e3823ccf6a 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_uses_allocators_codegen.cpp @@ -297,7 +297,7 @@ void foo() { // CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp index 75a417c407bc8..f08e75cf6f9d7 100644 --- a/clang/test/OpenMP/target_teams_map_codegen.cpp +++ b/clang/test/OpenMP/target_teams_map_codegen.cpp @@ -106,7 +106,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -192,7 +192,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -294,7 +294,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -446,7 +446,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -532,7 +532,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -618,7 +618,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -722,7 +722,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -781,7 +781,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1101,7 +1101,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1160,7 +1160,7 @@ void mapInt128() { // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -1413,7 +1413,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1499,7 +1499,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1601,7 +1601,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP11]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP12]], align 4 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1753,7 +1753,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1839,7 +1839,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1925,7 +1925,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2029,7 +2029,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2088,7 +2088,7 @@ void mapInt128() { // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP43]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP43]], align 4 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp index d57a33f63a299..24b4027f41e95 100644 --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -262,7 +262,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -310,7 +310,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -387,7 +387,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP11]], 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -442,7 +442,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK1-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP42]], align 4 // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -508,7 +508,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -582,7 +582,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK1-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -893,7 +893,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -941,7 +941,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1018,7 +1018,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP11]], 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1073,7 +1073,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP39:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 // CHECK3-NEXT: [[TMP40:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP39]], 0 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP41]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP41]], align 4 // CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP42]], align 4 // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1139,7 +1139,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1213,7 +1213,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK3-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_reduction_codegen.cpp similarity index 60% rename from clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp rename to clang/test/OpenMP/target_teams_reduction_codegen.cpp index e48357aa864d7..31c51d953acaa 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/target_teams_reduction_codegen.cpp @@ -4,7 +4,7 @@ // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -fopenmp-cuda-teams-reduction-recs-num=2048 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -fopenmp-cuda-teams-reduction-recs-num=2048 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -84,6 +84,8 @@ int bar(int n){ // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_SCRATCH:%.*]] = alloca [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST:%.*]] = alloca [1 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 8 @@ -95,14 +97,18 @@ int bar(int n){ // CHECK1-NEXT: store double [[ADD]], ptr [[E1]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: store ptr [[E1]], ptr [[TMP2]], align 8 -// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 8 +// CHECK1-NEXT: call void @_omp_reduction_list_to_global_copy_func(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +// CHECK1-NEXT: br i1 [[TMP6]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK1-NEXT: call void @_omp_reduction_global_to_list_copy_func(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[E1]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP7]], [[TMP8]] // CHECK1-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -256,27 +262,6 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP8]]) #[[ATTR4]] -// CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: @@ -360,6 +345,8 @@ int bar(int n){ // CHECK1-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_SCRATCH:%.*]] = alloca [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST:%.*]] = alloca [2 x ptr], align 8 // CHECK1-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8 // CHECK1-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 8 @@ -382,21 +369,28 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[C1]], ptr [[TMP4]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[D2]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6) -// CHECK1-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK1-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 0 +// CHECK1-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 1 +// CHECK1-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 8 +// CHECK1-NEXT: call void @_omp_reduction_list_to_global_copy_func3(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_global_to_list_copy_func4, ptr @_omp_reduction_global_to_list_reduce_func5) +// CHECK1-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK1-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32 +// CHECK1-NEXT: call void @_omp_reduction_global_to_list_copy_func4(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK1-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP0]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, ptr [[C1]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 // CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 // CHECK1-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]] +// CHECK1-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, ptr [[D2]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] // CHECK1-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: @@ -593,32 +587,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4 -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func4 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -645,7 +614,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func5 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -725,8 +694,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP6]], align 8 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[B2]], ptr [[TMP7]], align 8 -// CHECK1-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14) +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func8, ptr @_omp_reduction_inter_warp_copy_func9, ptr @_omp_reduction_list_to_global_copy_func10, ptr @_omp_reduction_global_to_list_copy_func11, ptr @_omp_reduction_global_to_list_reduce_func12) // CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 // CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: @@ -793,7 +761,7 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[A1]], ptr [[TMP5]], align 8 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 // CHECK1-NEXT: store ptr [[B2]], ptr [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8) +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func6, ptr @_omp_reduction_inter_warp_copy_func7) // CHECK1-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 // CHECK1-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: @@ -821,7 +789,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func6 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -908,7 +876,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func7 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -982,7 +950,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func8 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -1069,7 +1037,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func9 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -1143,7 +1111,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func10 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -1170,32 +1138,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12 -// CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 8 -// CHECK1-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func11 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -1222,7 +1165,7 @@ int bar(int n){ // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func12 // CHECK1-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 8 @@ -1281,6 +1224,8 @@ int bar(int n){ // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_SCRATCH:%.*]] = alloca [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], align 8 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST:%.*]] = alloca [1 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 @@ -1292,14 +1237,18 @@ int bar(int n){ // CHECK2-NEXT: store double [[ADD]], ptr [[E1]], align 8 // CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 // CHECK2-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4 -// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1 -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP3]], ptr [[TMP4]], align 4 +// CHECK2-NEXT: call void @_omp_reduction_list_to_global_copy_func(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[TMP5]], 1 +// CHECK2-NEXT: br i1 [[TMP6]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]] +// CHECK2-NEXT: call void @_omp_reduction_global_to_list_copy_func(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load double, ptr [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[E1]], align 8 +// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP7]], [[TMP8]] // CHECK2-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -1453,27 +1402,6 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP8]]) #[[ATTR4]] -// CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: @@ -1557,6 +1485,8 @@ int bar(int n){ // CHECK2-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_SCRATCH:%.*]] = alloca [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], align 8 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST:%.*]] = alloca [2 x ptr], align 4 // CHECK2-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK2-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 @@ -1579,21 +1509,28 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 // CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6) -// CHECK2-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK2-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store ptr [[TMP6]], ptr [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: store ptr [[TMP8]], ptr [[TMP9]], align 4 +// CHECK2-NEXT: call void @_omp_reduction_list_to_global_copy_func3(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK2-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_PER_THREAD_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_global_to_list_copy_func4, ptr @_omp_reduction_global_to_list_reduce_func5) +// CHECK2-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK2-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32 +// CHECK2-NEXT: call void @_omp_reduction_global_to_list_copy_func4(ptr [[DOTOMP_REDUCTION_SCRATCH]], i32 0, ptr [[DOTOMP_REDUCTION_RED_LIST]]) +// CHECK2-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP0]], align 1 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP12]] to i32 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, ptr [[C1]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP13]] to i32 // CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 // CHECK2-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]] +// CHECK2-NEXT: [[TMP14:%.*]] = load float, ptr [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load float, ptr [[D2]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP14]], [[TMP15]] // CHECK2-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: @@ -1790,32 +1727,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4 -// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func4 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -1842,7 +1754,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func5 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -1922,8 +1834,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 -// CHECK2-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 1024, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14) +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_gpu_xteam_reduce_nowait(ptr @[[GLOB1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func8, ptr @_omp_reduction_inter_warp_copy_func9, ptr @_omp_reduction_list_to_global_copy_func10, ptr @_omp_reduction_global_to_list_copy_func11, ptr @_omp_reduction_global_to_list_reduce_func12) // CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 // CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: @@ -1990,7 +1901,7 @@ int bar(int n){ // CHECK2-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4 // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 // CHECK2-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8) +// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func6, ptr @_omp_reduction_inter_warp_copy_func7) // CHECK2-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 // CHECK2-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: @@ -2018,7 +1929,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func6 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2105,7 +2016,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func7 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2179,7 +2090,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func8 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2266,7 +2177,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func9 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2340,7 +2251,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func10 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2367,32 +2278,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12 -// CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK2-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1 -// CHECK2-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func11 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2419,7 +2305,7 @@ int bar(int n){ // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func12 // CHECK2-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 @@ -2443,1200 +2329,3 @@ int bar(int n){ // CHECK2-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] // CHECK2-NEXT: ret void // -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20 -// CHECK3-SAME: (ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E1:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4, !nonnull [[META6:![0-9]+]], !align [[META7:![0-9]+]] -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_kernel_environment, ptr [[DYN_PTR]]) -// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1 -// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP3]], ptr [[E1]], align 8 -// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[E1]]) #[[ATTR4:[0-9]+]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() -// CHECK3-NEXT: ret void -// CHECK3: worker.exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[E]], ptr [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[E_ADDR]], align 4, !nonnull [[META6]], !align [[META7]] -// CHECK3-NEXT: [[E1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 8) -// CHECK3-NEXT: store double 0.000000e+00, ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 -// CHECK3-NEXT: store double [[ADD]], ptr [[E1]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[E1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func, ptr @_omp_reduction_inter_warp_copy_func, ptr @_omp_reduction_list_to_global_copy_func, ptr @_omp_reduction_list_to_global_reduce_func, ptr @_omp_reduction_global_to_list_copy_func, ptr @_omp_reduction_global_to_list_reduce_func) -// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 1 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double, ptr [[E1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP5]], [[TMP6]] -// CHECK3-NEXT: store double [[ADD2]], ptr [[TMP0]], align 8 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[E1]], i32 8) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP9]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 -// CHECK3-NEXT: [[TMP15:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) -// CHECK3-NEXT: store i64 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i64, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i64, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP19:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP21:%.*]] = and i1 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: [[TMP22:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK3-NEXT: [[TMP23:%.*]] = and i16 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP23]], 0 -// CHECK3-NEXT: [[TMP25:%.*]] = and i1 [[TMP22]], [[TMP24]] -// CHECK3-NEXT: [[TMP26:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK3-NEXT: [[TMP27:%.*]] = and i1 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: [[TMP28:%.*]] = or i1 [[TMP18]], [[TMP21]] -// CHECK3-NEXT: [[TMP29:%.*]] = or i1 [[TMP28]], [[TMP27]] -// CHECK3-NEXT: br i1 [[TMP29]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: br i1 [[TMP32]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP36:%.*]] = load ptr, ptr [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load double, ptr [[TMP34]], align 8 -// CHECK3-NEXT: store double [[TMP37]], ptr [[TMP36]], align 8 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 0, ptr [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: br label [[PRECOND:%.*]] -// CHECK3: precond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 2 -// CHECK3-NEXT: br i1 [[TMP7]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK3: body: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP9]], i32 [[TMP6]] -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], ptr addrspace(3) [[TMP11]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP13]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK3: then3: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP16]], i32 [[TMP6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load volatile i32, ptr addrspace(3) [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], ptr [[TMP17]], align 4 -// CHECK3-NEXT: br label [[IFCONT5:%.*]] -// CHECK3: else4: -// CHECK3-NEXT: br label [[IFCONT5]] -// CHECK3: ifcont5: -// CHECK3-NEXT: [[TMP19:%.*]] = add nsw i32 [[TMP6]], 1 -// CHECK3-NEXT: store i32 [[TMP19]], ptr [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: br label [[PRECOND]] -// CHECK3: exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP7]], align 8 -// CHECK3-NEXT: store double [[TMP10]], ptr [[TMP9]], align 8 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP8]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP9]], align 8 -// CHECK3-NEXT: store double [[TMP10]], ptr [[TMP7]], align 8 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP8]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26 -// CHECK3-SAME: (i32 noundef [[C:%.*]], i32 noundef [[D:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_kernel_environment, ptr [[DYN_PTR]]) -// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = load i8, ptr [[C_ADDR]], align 1 -// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: store i8 [[TMP1]], ptr [[C1]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load float, ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store float [[TMP2]], ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP3]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[C1]], ptr [[D2]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK3-NEXT: call void @__kmpc_target_deinit() -// CHECK3-NEXT: ret void -// CHECK3: worker.exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 1 dereferenceable(1) [[C:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[C]], ptr [[C_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META6]] -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META6]], !align [[META8:![0-9]+]] -// CHECK3-NEXT: [[C1:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 1) -// CHECK3-NEXT: [[D2:%.*]] = call align 8 ptr @__kmpc_alloc_shared(i32 4) -// CHECK3-NEXT: store i8 0, ptr [[C1]], align 1 -// CHECK3-NEXT: store float 1.000000e+00, ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 -// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK3-NEXT: store i8 [[CONV3]], ptr [[C1]], align 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 -// CHECK3-NEXT: store float [[MUL]], ptr [[D2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[C1]], ptr [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[D2]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func1, ptr @_omp_reduction_inter_warp_copy_func2, ptr @_omp_reduction_list_to_global_copy_func3, ptr @_omp_reduction_list_to_global_reduce_func4, ptr @_omp_reduction_global_to_list_copy_func5, ptr @_omp_reduction_global_to_list_reduce_func6) -// CHECK3-NEXT: [[TMP7:%.*]] = icmp eq i32 [[TMP6]], 1 -// CHECK3-NEXT: br i1 [[TMP7]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP8]] to i32 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[C1]], align 1 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP9]] to i32 -// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], ptr [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP10:%.*]] = load float, ptr [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load float, ptr [[D2]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store float [[MUL8]], ptr [[TMP1]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[D2]], i32 4) -// CHECK3-NEXT: call void @__kmpc_free_shared(ptr [[C1]], i32 1) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func1 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = sext i8 [[TMP12]] to i32 -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[TMP14]] to i16 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP13]], i16 [[TMP6]], i16 [[TMP15]]) -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK3-NEXT: store i8 [[TMP17]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr float, ptr [[TMP21]], i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP26:%.*]] = trunc i32 [[TMP25]] to i16 -// CHECK3-NEXT: [[TMP27:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP24]], i16 [[TMP6]], i16 [[TMP26]]) -// CHECK3-NEXT: store i32 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i32, ptr [[TMP21]], i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 -// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] -// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] -// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] -// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] -// CHECK3: then5: -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = load i8, ptr [[TMP46]], align 1 -// CHECK3-NEXT: store i8 [[TMP49]], ptr [[TMP48]], align 1 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = load float, ptr [[TMP51]], align 4 -// CHECK3-NEXT: store float [[TMP54]], ptr [[TMP53]], align 4 -// CHECK3-NEXT: br label [[IFCONT7:%.*]] -// CHECK3: else6: -// CHECK3-NEXT: br label [[IFCONT7]] -// CHECK3: ifcont7: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func2 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK3-NEXT: store volatile i8 [[TMP9]], ptr addrspace(3) [[TMP8]], align 1 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK3: then3: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i8, ptr addrspace(3) [[TMP11]], align 1 -// CHECK3-NEXT: store i8 [[TMP14]], ptr [[TMP13]], align 1 -// CHECK3-NEXT: br label [[IFCONT5:%.*]] -// CHECK3: else4: -// CHECK3-NEXT: br label [[IFCONT5]] -// CHECK3: ifcont5: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP16]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP18]], ptr addrspace(3) [[TMP17]], align 4 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]] -// CHECK3: then13: -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i32, ptr addrspace(3) [[TMP20]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], ptr [[TMP22]], align 4 -// CHECK3-NEXT: br label [[IFCONT15:%.*]] -// CHECK3: else14: -// CHECK3-NEXT: br label [[IFCONT15]] -// CHECK3: ifcont15: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func3 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP7]], align 1 -// CHECK3-NEXT: store i8 [[TMP10]], ptr [[TMP9]], align 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP12]], align 4 -// CHECK3-NEXT: store float [[TMP15]], ptr [[TMP14]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func4 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func5 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 1 -// CHECK3-NEXT: store i8 [[TMP10]], ptr [[TMP7]], align 1 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP13]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load float, ptr [[TMP14]], align 4 -// CHECK3-NEXT: store float [[TMP15]], ptr [[TMP12]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func6 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 -// CHECK3-SAME: (i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], ptr noalias noundef [[DYN_PTR:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_target_init(ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_kernel_environment, ptr [[DYN_PTR]]) -// CHECK3-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP0]], -1 -// CHECK3-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]] -// CHECK3: user_code.entry: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: store i32 0, ptr [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTZERO_ADDR]], ptr [[A_ADDR]], ptr [[B_ADDR]]) #[[ATTR4]] -// CHECK3-NEXT: call void @__kmpc_target_deinit() -// CHECK3-NEXT: ret void -// CHECK3: worker.exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META9:![0-9]+]] -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x ptr], ptr [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK3-NEXT: call void @__kmpc_parallel_60(ptr @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined, ptr null, ptr [[CAPTURED_VARS_ADDRS]], i32 2, i32 0) -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: %"_openmp_teams_reductions_buffer_$_$ptr" = call ptr @__kmpc_reduction_get_fixed_buffer() -// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr @[[GLOB1]], ptr %"_openmp_teams_reductions_buffer_$_$ptr", i32 2048, i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func9, ptr @_omp_reduction_inter_warp_copy_func10, ptr @_omp_reduction_list_to_global_copy_func11, ptr @_omp_reduction_list_to_global_reduce_func12, ptr @_omp_reduction_global_to_list_copy_func13, ptr @_omp_reduction_global_to_list_reduce_func14) -// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 -// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[OR]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP12]] to i32 -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP13]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i16 [[COND]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined -// CHECK3-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]], ptr noundef nonnull align 4 dereferenceable(4) [[A:%.*]], ptr noundef nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store ptr [[A]], ptr [[A_ADDR]], align 4 -// CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[A_ADDR]], align 4, !nonnull [[META6]], !align [[META8]] -// CHECK3-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META6]], !align [[META9]] -// CHECK3-NEXT: store i32 0, ptr [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[OR]], ptr [[A1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[B2]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[A1]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[B2]], ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr @[[GLOB1]], i64 8, ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr @_omp_reduction_shuffle_and_reduce_func7, ptr @_omp_reduction_inter_warp_copy_func8) -// CHECK3-NEXT: [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 1 -// CHECK3-NEXT: br i1 [[TMP8]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: store i32 [[OR5]], ptr [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP11]] to i32 -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP12]] to i32 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END11:%.*]] -// CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, ptr [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END11]] -// CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP13]], [[COND_TRUE9]] ], [ [[TMP14]], [[COND_FALSE10]] ] -// CHECK3-NEXT: store i16 [[COND12]], ptr [[TMP1]], align 2 -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 -// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 -// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) -// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 -// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 -// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] -// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] -// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] -// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] -// CHECK3: then5: -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 -// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 -// CHECK3-NEXT: br label [[IFCONT7:%.*]] -// CHECK3: else6: -// CHECK3-NEXT: br label [[IFCONT7]] -// CHECK3: ifcont7: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK3: then3: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: br label [[IFCONT5:%.*]] -// CHECK3: else4: -// CHECK3-NEXT: br label [[IFCONT5]] -// CHECK3: ifcont5: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]] -// CHECK3: then13: -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2 -// CHECK3-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2 -// CHECK3-NEXT: br label [[IFCONT15:%.*]] -// CHECK3: else14: -// CHECK3-NEXT: br label [[IFCONT15]] -// CHECK3: ifcont15: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func9 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i16 noundef signext [[TMP1:%.*]], i16 noundef signext [[TMP2:%.*]], i16 noundef signext [[TMP3:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, ptr [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, ptr [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, ptr [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP12]], i16 [[TMP6]], i16 [[TMP14]]) -// CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[TMP9]], i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT]], ptr [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load ptr, ptr [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, ptr [[TMP19]], align 2 -// CHECK3-NEXT: [[TMP23:%.*]] = sext i16 [[TMP22]] to i32 -// CHECK3-NEXT: [[TMP24:%.*]] = call i32 @__kmpc_get_warp_size() -// CHECK3-NEXT: [[TMP25:%.*]] = trunc i32 [[TMP24]] to i16 -// CHECK3-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP23]], i16 [[TMP6]], i16 [[TMP25]]) -// CHECK3-NEXT: [[TMP27:%.*]] = trunc i32 [[TMP26]] to i16 -// CHECK3-NEXT: store i16 [[TMP27]], ptr [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr i16, ptr [[TMP19]], i32 1 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr i16, ptr [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: store ptr [[DOTOMP_REDUCTION_ELEMENT4]], ptr [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp eq i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ult i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP33:%.*]] = and i1 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP7]], 2 -// CHECK3-NEXT: [[TMP35:%.*]] = and i16 [[TMP5]], 1 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP35]], 0 -// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP34]], [[TMP36]] -// CHECK3-NEXT: [[TMP38:%.*]] = icmp sgt i16 [[TMP6]], 0 -// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: [[TMP40:%.*]] = or i1 [[TMP30]], [[TMP33]] -// CHECK3-NEXT: [[TMP41:%.*]] = or i1 [[TMP40]], [[TMP39]] -// CHECK3-NEXT: br i1 [[TMP41]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP4]], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP42:%.*]] = icmp eq i16 [[TMP7]], 1 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp uge i16 [[TMP5]], [[TMP6]] -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: br i1 [[TMP44]], label [[THEN5:%.*]], label [[ELSE6:%.*]] -// CHECK3: then5: -// CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP46:%.*]] = load ptr, ptr [[TMP45]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP48:%.*]] = load ptr, ptr [[TMP47]], align 4 -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, ptr [[TMP46]], align 4 -// CHECK3-NEXT: store i32 [[TMP49]], ptr [[TMP48]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP51:%.*]] = load ptr, ptr [[TMP50]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP53:%.*]] = load ptr, ptr [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = load i16, ptr [[TMP51]], align 2 -// CHECK3-NEXT: store i16 [[TMP54]], ptr [[TMP53]], align 2 -// CHECK3-NEXT: br label [[IFCONT7:%.*]] -// CHECK3: else6: -// CHECK3-NEXT: br label [[IFCONT7]] -// CHECK3: ifcont7: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func10 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[TMP3]], 31 -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_get_hardware_thread_id_in_block() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[TMP4]], 5 -// CHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP9]], ptr addrspace(3) [[TMP8]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[TMP2]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN3:%.*]], label [[ELSE4:%.*]] -// CHECK3: then3: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load volatile i32, ptr addrspace(3) [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], ptr [[TMP13]], align 4 -// CHECK3-NEXT: br label [[IFCONT5:%.*]] -// CHECK3: else4: -// CHECK3-NEXT: br label [[IFCONT5]] -// CHECK3: ifcont5: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, ptr [[TMP16]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP18]], ptr addrspace(3) [[TMP17]], align 2 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM11:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) -// CHECK3-NEXT: call void @__kmpc_barrier(ptr @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM11]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD12:%.*]] = icmp ult i32 [[TMP2]], [[TMP19]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD12]], label [[THEN13:%.*]], label [[ELSE14:%.*]] -// CHECK3: then13: -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [32 x i32], ptr addrspace(3) @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[TMP2]] -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load volatile i16, ptr addrspace(3) [[TMP20]], align 2 -// CHECK3-NEXT: store i16 [[TMP23]], ptr [[TMP22]], align 2 -// CHECK3-NEXT: br label [[IFCONT15:%.*]] -// CHECK3: else14: -// CHECK3-NEXT: br label [[IFCONT15]] -// CHECK3: ifcont15: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func11 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP12]], align 2 -// CHECK3-NEXT: store i16 [[TMP15]], ptr [[TMP14]], align 2 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func12 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[DOTOMP_REDUCTION_RED_LIST]], ptr [[TMP11]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func13 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK3-NEXT: store i32 [[TMP10]], ptr [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x ptr], ptr [[TMP3]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP4]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP13]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = load i16, ptr [[TMP14]], align 2 -// CHECK3-NEXT: store i16 [[TMP15]], ptr [[TMP12]], align 2 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func14 -// CHECK3-SAME: (ptr noundef [[TMP0:%.*]], i32 noundef [[TMP1:%.*]], ptr noundef [[TMP2:%.*]]) #[[ATTR3]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca ptr, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x ptr], align 4 -// CHECK3-NEXT: store ptr [[TMP0]], ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: store ptr [[TMP2]], ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: store ptr [[TMP7]], ptr [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP3]], i32 [[TMP4]] -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], ptr [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: store ptr [[TMP10]], ptr [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33_omp_outlined_omp$reduction$reduction_func"(ptr [[TMP11]], ptr [[DOTOMP_REDUCTION_RED_LIST]]) #[[ATTR4]] -// CHECK3-NEXT: ret void -// diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp index 9bb603402def9..5c97b05c60be4 100644 --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -262,7 +262,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -310,7 +310,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK1-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -403,7 +403,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0 // CHECK1-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP22]], align 4 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -458,7 +458,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK1-NEXT: [[TMP48:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0 // CHECK1-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP49]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP49]], align 4 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP50]], align 4 // CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -524,7 +524,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -598,7 +598,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK1-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK1-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK1-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 @@ -912,7 +912,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP20]], 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -960,7 +960,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP46]], align 4 // CHECK3-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS6]], i32 0, i32 2 @@ -1053,7 +1053,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP17]], 0 // CHECK3-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP22]], align 4 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1108,7 +1108,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP47:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_3]], align 4 // CHECK3-NEXT: [[TMP48:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP47]], 0 // CHECK3-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP49]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP49]], align 4 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP50]], align 4 // CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -1174,7 +1174,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1248,7 +1248,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP42:%.*]] = sext i16 [[TMP41]] to i32 // CHECK3-NEXT: [[TMP43:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP42]], 0 // CHECK3-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK3-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP45]], align 4 // CHECK3-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS4]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp index d69ce6e9a2ea8..3d3f2754276d5 100644 --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -367,7 +367,7 @@ void foo() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -417,7 +417,7 @@ void foo() { // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -478,7 +478,7 @@ void foo() { // CHECK1-NEXT: [[TMP65:%.*]] = load i32, ptr [[LA]], align 4 // CHECK1-NEXT: [[TMP66:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP65]], 0 // CHECK1-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK1-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP68]], align 4 // CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -539,7 +539,7 @@ void foo() { // CHECK1-NEXT: [[TMP97:%.*]] = load i32, ptr [[LA]], align 4 // CHECK1-NEXT: [[TMP98:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP97]], 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP99]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP99]], align 4 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -635,7 +635,7 @@ void foo() { // CHECK1-NEXT: [[TMP149:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0 // CHECK1-NEXT: [[TMP150:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP148]], 0 // CHECK1-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP151]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP151]], align 4 // CHECK1-NEXT: [[TMP152:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP152]], align 4 // CHECK1-NEXT: [[TMP153:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -706,7 +706,7 @@ void foo() { // CHECK1-NEXT: [[TMP186:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD36]], 0 // CHECK1-NEXT: [[TMP187:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD38]], 0 // CHECK1-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP188]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP188]], align 4 // CHECK1-NEXT: [[TMP189:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP189]], align 4 // CHECK1-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 2 @@ -1017,7 +1017,7 @@ void foo() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1067,7 +1067,7 @@ void foo() { // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP35]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP35]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP36]], align 4 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1128,7 +1128,7 @@ void foo() { // CHECK3-NEXT: [[TMP65:%.*]] = load i32, ptr [[LA]], align 4 // CHECK3-NEXT: [[TMP66:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP65]], 0 // CHECK3-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP67]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP67]], align 4 // CHECK3-NEXT: [[TMP68:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP68]], align 4 // CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1189,7 +1189,7 @@ void foo() { // CHECK3-NEXT: [[TMP97:%.*]] = load i32, ptr [[LA]], align 4 // CHECK3-NEXT: [[TMP98:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP97]], 0 // CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP99]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP99]], align 4 // CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP100]], align 4 // CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS20]], i32 0, i32 2 @@ -1282,7 +1282,7 @@ void foo() { // CHECK3-NEXT: [[TMP147:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0 // CHECK3-NEXT: [[TMP148:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP146]], 0 // CHECK3-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP149]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP149]], align 4 // CHECK3-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP150]], align 4 // CHECK3-NEXT: [[TMP151:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS28]], i32 0, i32 2 @@ -1353,7 +1353,7 @@ void foo() { // CHECK3-NEXT: [[TMP184:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD36]], 0 // CHECK3-NEXT: [[TMP185:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD38]], 0 // CHECK3-NEXT: [[TMP186:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP186]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP186]], align 4 // CHECK3-NEXT: [[TMP187:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP187]], align 4 // CHECK3-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS39]], i32 0, i32 2 @@ -1654,7 +1654,7 @@ void foo() { // CHECK9-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0 // CHECK9-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1724,7 +1724,7 @@ void foo() { // CHECK9-NEXT: [[TMP56:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP53]], 0 // CHECK9-NEXT: [[TMP57:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP55]], 0 // CHECK9-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK9-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP59]], align 4 // CHECK9-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1897,7 +1897,7 @@ void foo() { // CHECK11-NEXT: [[TMP19:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP16]], 0 // CHECK11-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1967,7 +1967,7 @@ void foo() { // CHECK11-NEXT: [[TMP56:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP53]], 0 // CHECK11-NEXT: [[TMP57:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP55]], 0 // CHECK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP58]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP58]], align 4 // CHECK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP59]], align 4 // CHECK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -2140,7 +2140,7 @@ void foo() { // CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A2]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 // CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP16]], align 4 // CHECK17-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2202,7 +2202,7 @@ void foo() { // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK17-NEXT: [[TMP44:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0 // CHECK17-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK17-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP46]], align 4 // CHECK17-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 @@ -2362,7 +2362,7 @@ void foo() { // CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A2]], align 4 // CHECK19-NEXT: [[TMP14:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP13]], 0 // CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP15]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP15]], align 4 // CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP16]], align 4 // CHECK19-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2424,7 +2424,7 @@ void foo() { // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 123 // CHECK19-NEXT: [[TMP44:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[ADD]], 0 // CHECK19-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP45]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP45]], align 4 // CHECK19-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP46]], align 4 // CHECK19-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS8]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp index 76a716deac638..09eb2eda2f4c5 100644 --- a/clang/test/OpenMP/teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_codegen.cpp @@ -260,7 +260,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK1-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -326,7 +326,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK1-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -667,7 +667,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK3-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -733,7 +733,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK3-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1054,7 +1054,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1272,7 +1272,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1457,7 +1457,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1610,7 +1610,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1800,7 +1800,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2007,7 +2007,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK25-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK25-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2202,7 +2202,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK27-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2408,7 +2408,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK27-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp index d3a87c8fb420d..b0b2051fd3e8c 100644 --- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp @@ -133,7 +133,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -302,7 +302,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -539,7 +539,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -777,7 +777,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1010,7 +1010,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1246,7 +1246,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp index 5413175cf3ea6..863b3f8885e87 100644 --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -176,7 +176,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -224,7 +224,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -272,7 +272,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -612,7 +612,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -660,7 +660,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -708,7 +708,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1091,7 +1091,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1168,7 +1168,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1245,7 +1245,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1 // CHECK9-NEXT: [[TMP101:%.*]] = zext i32 [[ADD29]] to i64 // CHECK9-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP102]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP102]], align 4 // CHECK9-NEXT: [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP103]], align 4 // CHECK9-NEXT: [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -1685,7 +1685,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1732,7 +1732,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1779,7 +1779,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -2159,7 +2159,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2237,7 +2237,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2315,7 +2315,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1 // CHECK11-NEXT: [[TMP103:%.*]] = zext i32 [[ADD29]] to i64 // CHECK11-NEXT: [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP104]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP104]], align 4 // CHECK11-NEXT: [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP105]], align 4 // CHECK11-NEXT: [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2752,7 +2752,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2799,7 +2799,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2846,7 +2846,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp index 57cfe006caf23..b304d1900f464 100644 --- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -602,7 +602,7 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1131,7 +1131,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1419,7 +1419,7 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp index 85108f1d8f842..01820d6cd5ee3 100644 --- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp @@ -527,7 +527,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -845,7 +845,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1237,7 +1237,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1553,7 +1553,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp index 3dac2eb57cea7..d8cb02262bc9a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp @@ -260,7 +260,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK1-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -326,7 +326,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK1-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -878,7 +878,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK3-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -944,7 +944,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK3-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1468,7 +1468,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1787,7 +1787,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2069,7 +2069,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2295,7 +2295,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2554,7 +2554,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2862,7 +2862,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK25-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK25-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3130,7 +3130,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK27-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3433,7 +3433,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK27-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp index 0c97fbc153741..dd70046cfc58c 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp @@ -138,7 +138,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -382,7 +382,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -690,7 +690,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1048,7 +1048,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1356,7 +1356,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1716,7 +1716,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp index f71435e84dd19..85404b0915f15 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp @@ -129,7 +129,7 @@ int main() { // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -361,7 +361,7 @@ int main() { // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP15]], align 4 // CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -600,7 +600,7 @@ int main() { // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -827,7 +827,7 @@ int main() { // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP14]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP14]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP15]], align 4 // CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp index def5a31d1b8e8..2ff4da62109ab 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp @@ -188,7 +188,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -236,7 +236,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -284,7 +284,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -846,7 +846,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -894,7 +894,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -942,7 +942,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1538,7 +1538,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1615,7 +1615,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1701,7 +1701,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK9-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK9-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2462,7 +2462,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2509,7 +2509,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2565,7 +2565,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK9-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK9-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3188,7 +3188,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3266,7 +3266,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3353,7 +3353,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK11-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK11-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -4099,7 +4099,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4146,7 +4146,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4202,7 +4202,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK11-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp index a946f2d57f016..f4442d34e8006 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -346,7 +346,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -766,7 +766,7 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1425,7 +1425,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1839,7 +1839,7 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp index 0a03f6fe1922e..0a63f0521cc73 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp @@ -135,7 +135,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -176,7 +176,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -526,7 +526,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -567,7 +567,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -625,7 +625,7 @@ int main() { // CHECK1-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK1-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1147,7 +1147,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1188,7 +1188,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1246,7 +1246,7 @@ int main() { // CHECK1-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK1-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp index b5e0a7695db9c..0e736dd750345 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp @@ -775,7 +775,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1241,7 +1241,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1775,7 +1775,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2235,7 +2235,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp index e33529d8f86a8..c23e5c57413ab 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp @@ -108,7 +108,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -172,7 +172,7 @@ int main() { // CHECK1-NEXT: [[TMP35:%.*]] = zext i8 [[TMP34]] to i32 // CHECK1-NEXT: [[TMP36:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP35]], 0 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP38]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -591,7 +591,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -632,7 +632,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -691,7 +691,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -741,7 +741,7 @@ int main() { // CHECK1-NEXT: [[TMP26:%.*]] = zext i8 [[TMP25]] to i32 // CHECK1-NEXT: [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP29]], align 4 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1468,7 +1468,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1532,7 +1532,7 @@ int main() { // CHECK5-NEXT: [[TMP35:%.*]] = zext i8 [[TMP34]] to i32 // CHECK5-NEXT: [[TMP36:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP35]], 0 // CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP38]], align 4 // CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1951,7 +1951,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1992,7 +1992,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2051,7 +2051,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2101,7 +2101,7 @@ int main() { // CHECK5-NEXT: [[TMP26:%.*]] = zext i8 [[TMP25]] to i32 // CHECK5-NEXT: [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP29]], align 4 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp index 91b92dc44ca6c..70d402d8b2bd2 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp @@ -274,7 +274,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -556,7 +556,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1032,7 +1032,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1308,7 +1308,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp index c3a12a08a5d98..189d35498cf4f 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp @@ -74,7 +74,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -115,7 +115,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -448,7 +448,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp index c66b83b783122..628e171b8ca9e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp @@ -122,7 +122,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -424,7 +424,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -723,7 +723,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1021,7 +1021,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index 83f572f28ec29..198632d0bbe6d 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -263,7 +263,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -311,7 +311,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -359,7 +359,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -407,7 +407,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -455,7 +455,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -1332,7 +1332,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1380,7 +1380,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1428,7 +1428,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1476,7 +1476,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK3-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK3-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK3-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK3-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -1524,7 +1524,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK3-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK3-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK3-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK3-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2374,7 +2374,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2422,7 +2422,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -2470,7 +2470,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK5-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2518,7 +2518,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK5-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK5-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK5-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK5-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2566,7 +2566,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK5-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK5-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK5-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK5-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -3443,7 +3443,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK7-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3491,7 +3491,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK7-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK7-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK7-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK7-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -3539,7 +3539,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK7-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK7-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK7-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK7-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3587,7 +3587,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK7-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK7-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK7-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK7-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3635,7 +3635,7 @@ int main (int argc, char **argv) { // CHECK7-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK7-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK7-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK7-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK7-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK7-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK7-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK7-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -4543,7 +4543,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4620,7 +4620,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK13-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK13-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK13-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -4706,7 +4706,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK13-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK13-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -4783,7 +4783,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP143]], 1 // CHECK13-NEXT: [[TMP144:%.*]] = zext i32 [[ADD44]] to i64 // CHECK13-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP146]], align 4 // CHECK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -4869,7 +4869,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP186]], 1 // CHECK13-NEXT: [[TMP187:%.*]] = zext i32 [[ADD60]] to i64 // CHECK13-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP188]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP188]], align 4 // CHECK13-NEXT: [[TMP189:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP189]], align 4 // CHECK13-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -6083,7 +6083,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6130,7 +6130,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6186,7 +6186,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK13-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -6233,7 +6233,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK13-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -6289,7 +6289,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK13-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -7252,7 +7252,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK15-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7330,7 +7330,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK15-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK15-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK15-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK15-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -7417,7 +7417,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK15-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK15-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -7495,7 +7495,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP146]], 1 // CHECK15-NEXT: [[TMP147:%.*]] = zext i32 [[ADD44]] to i64 // CHECK15-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP148]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP148]], align 4 // CHECK15-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK15-NEXT: store i32 4, ptr [[TMP149]], align 4 // CHECK15-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -7582,7 +7582,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP190]], 1 // CHECK15-NEXT: [[TMP191:%.*]] = zext i32 [[ADD60]] to i64 // CHECK15-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK15-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK15-NEXT: store i32 5, ptr [[TMP193]], align 4 // CHECK15-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -8771,7 +8771,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8818,7 +8818,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK15-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK15-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK15-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -8874,7 +8874,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK15-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK15-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK15-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK15-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -8921,7 +8921,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK15-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK15-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK15-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK15-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK15-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -8977,7 +8977,7 @@ int main (int argc, char **argv) { // CHECK15-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK15-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK15-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK15-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK15-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK15-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK15-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK15-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -9913,7 +9913,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9990,7 +9990,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK17-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK17-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK17-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK17-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -10076,7 +10076,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK17-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK17-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK17-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -10153,7 +10153,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP143]], 1 // CHECK17-NEXT: [[TMP144:%.*]] = zext i32 [[ADD44]] to i64 // CHECK17-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK17-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP146]], align 4 // CHECK17-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -10239,7 +10239,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP186]], 1 // CHECK17-NEXT: [[TMP187:%.*]] = zext i32 [[ADD60]] to i64 // CHECK17-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP188]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP188]], align 4 // CHECK17-NEXT: [[TMP189:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP189]], align 4 // CHECK17-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -11453,7 +11453,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -11500,7 +11500,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -11556,7 +11556,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK17-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK17-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK17-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK17-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -11603,7 +11603,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK17-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK17-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK17-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK17-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -11659,7 +11659,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK17-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK17-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -12622,7 +12622,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK19-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -12700,7 +12700,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK19-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK19-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK19-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK19-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -12787,7 +12787,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK19-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK19-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -12865,7 +12865,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP146]], 1 // CHECK19-NEXT: [[TMP147:%.*]] = zext i32 [[ADD44]] to i64 // CHECK19-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP148]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP148]], align 4 // CHECK19-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP149]], align 4 // CHECK19-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -12952,7 +12952,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP190]], 1 // CHECK19-NEXT: [[TMP191:%.*]] = zext i32 [[ADD60]] to i64 // CHECK19-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK19-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP193]], align 4 // CHECK19-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -14141,7 +14141,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -14188,7 +14188,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -14244,7 +14244,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK19-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK19-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK19-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK19-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -14291,7 +14291,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK19-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK19-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK19-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK19-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -14347,7 +14347,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK19-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp index ec85efaa80cf6..6006e4bc525c6 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp @@ -268,7 +268,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK1-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -343,7 +343,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP67]], 1 // CHECK1-NEXT: [[TMP68:%.*]] = zext i32 [[ADD14]] to i64 // CHECK1-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP69]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP69]], align 4 // CHECK1-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 4, ptr [[TMP70]], align 4 // CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -955,7 +955,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK3-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1030,7 +1030,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP67]], 1 // CHECK3-NEXT: [[TMP68:%.*]] = zext i32 [[ADD14]] to i64 // CHECK3-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP69]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP69]], align 4 // CHECK3-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 4, ptr [[TMP70]], align 4 // CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1891,7 +1891,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK9-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2268,7 +2268,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK11-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2772,7 +2772,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK17-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3048,7 +3048,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP13]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP13]], align 4 // CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP14]], align 4 // CHECK19-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3482,7 +3482,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK25-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK25-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK25-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK25-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3837,7 +3837,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK25-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK25-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4130,7 +4130,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK27-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK27-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK27-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK27-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4480,7 +4480,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK27-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp index ffcb5f3ee3274..365f11cfac5c6 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp @@ -143,7 +143,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -403,7 +403,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -867,7 +867,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1261,7 +1261,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1585,7 +1585,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1981,7 +1981,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp index b8655f75805a4..5072e10b2b63e 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp @@ -191,7 +191,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -239,7 +239,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -287,7 +287,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -891,7 +891,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -939,7 +939,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -987,7 +987,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1872,7 +1872,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1949,7 +1949,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2035,7 +2035,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK9-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK9-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK9-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK9-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2868,7 +2868,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2915,7 +2915,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2971,7 +2971,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK9-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK9-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3636,7 +3636,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3714,7 +3714,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3801,7 +3801,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK11-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK11-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK11-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK11-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -4619,7 +4619,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4666,7 +4666,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4722,7 +4722,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK11-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK11-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 9e9b36f7a6507..13ada7cfe3991 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -349,7 +349,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -783,7 +783,7 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1456,7 +1456,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1884,7 +1884,7 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp index 61eb5d9c5772b..dd6ef2802b61a 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp @@ -132,7 +132,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -173,7 +173,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -551,7 +551,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -592,7 +592,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -650,7 +650,7 @@ int main() { // CHECK1-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK1-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1214,7 +1214,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1255,7 +1255,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1313,7 +1313,7 @@ int main() { // CHECK1-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK1-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -1866,7 +1866,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1907,7 +1907,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2285,7 +2285,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2326,7 +2326,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2384,7 +2384,7 @@ int main() { // CHECK3-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK3-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -3178,7 +3178,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3219,7 +3219,7 @@ int main() { // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3277,7 +3277,7 @@ int main() { // CHECK3-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK3-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -4421,7 +4421,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4462,7 +4462,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4840,7 +4840,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4881,7 +4881,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -4939,7 +4939,7 @@ int main() { // CHECK9-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK9-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -5503,7 +5503,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5544,7 +5544,7 @@ int main() { // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -5602,7 +5602,7 @@ int main() { // CHECK9-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK9-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -6155,7 +6155,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6196,7 +6196,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6574,7 +6574,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6615,7 +6615,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6673,7 +6673,7 @@ int main() { // CHECK11-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK11-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -7467,7 +7467,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -7508,7 +7508,7 @@ int main() { // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -7566,7 +7566,7 @@ int main() { // CHECK11-NEXT: [[TMP52:%.*]] = select i1 [[LOADEDV]], i32 0, i32 1 // CHECK11-NEXT: [[TMP53:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP52]], 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp index 1145ca41c9e33..f7a0f6d70928b 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp @@ -848,7 +848,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1328,7 +1328,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1876,7 +1876,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2350,7 +2350,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp index 549ee9c9a052d..e0ded2894f6d4 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp @@ -110,7 +110,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -174,7 +174,7 @@ int main() { // CHECK1-NEXT: [[TMP35:%.*]] = zext i8 [[TMP34]] to i32 // CHECK1-NEXT: [[TMP36:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP35]], 0 // CHECK1-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP38]], align 4 // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -621,7 +621,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -662,7 +662,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -721,7 +721,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -771,7 +771,7 @@ int main() { // CHECK1-NEXT: [[TMP26:%.*]] = zext i8 [[TMP25]] to i32 // CHECK1-NEXT: [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP29]], align 4 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1903,7 +1903,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1967,7 +1967,7 @@ int main() { // CHECK5-NEXT: [[TMP35:%.*]] = zext i8 [[TMP34]] to i32 // CHECK5-NEXT: [[TMP36:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP35]], 0 // CHECK5-NEXT: [[TMP37:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP37]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP37]], align 4 // CHECK5-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP38]], align 4 // CHECK5-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2414,7 +2414,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2455,7 +2455,7 @@ int main() { // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK5-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2514,7 +2514,7 @@ int main() { // CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2564,7 +2564,7 @@ int main() { // CHECK5-NEXT: [[TMP26:%.*]] = zext i8 [[TMP25]] to i32 // CHECK5-NEXT: [[TMP27:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP26]], 0 // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK5-NEXT: store i32 1, ptr [[TMP29]], align 4 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp index 0e0c43c7536c9..cd32dcf83d136 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp @@ -276,7 +276,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -572,7 +572,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1062,7 +1062,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1352,7 +1352,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp index 3d8e4eda79b88..2d90f72cbc7c9 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp @@ -76,7 +76,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -117,7 +117,7 @@ int main() { // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP25]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP26]], align 4 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -478,7 +478,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp index 1cfb686f15caf..b94c7966f96e9 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp @@ -126,7 +126,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -442,7 +442,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -755,7 +755,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1067,7 +1067,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index bfc126a5c8e8b..8f2bf80f413cc 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -273,7 +273,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -321,7 +321,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -369,7 +369,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -417,7 +417,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK1-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK1-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK1-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -465,7 +465,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK1-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK1-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK1-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK1-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -1412,7 +1412,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK2-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1460,7 +1460,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK2-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -1508,7 +1508,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK2-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK2-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1556,7 +1556,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK2-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK2-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK2-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK2-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -1604,7 +1604,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK2-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK2-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK2-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK2-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK2-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK2-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK2-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -2551,7 +2551,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2599,7 +2599,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK5-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -2647,7 +2647,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK5-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2695,7 +2695,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK5-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK5-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK5-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK5-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -2743,7 +2743,7 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK5-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK5-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK5-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK5-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK5-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK5-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK5-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -3663,7 +3663,7 @@ int main (int argc, char **argv) { // CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK6-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK6-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK6-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3711,7 +3711,7 @@ int main (int argc, char **argv) { // CHECK6-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK6-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK6-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK6-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK6-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK6-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -3759,7 +3759,7 @@ int main (int argc, char **argv) { // CHECK6-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK6-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK6-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK6-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK6-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK6-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK6-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK6-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -3807,7 +3807,7 @@ int main (int argc, char **argv) { // CHECK6-NEXT: [[TMP75:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS19]], i32 0, i32 0 // CHECK6-NEXT: [[TMP76:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS20]], i32 0, i32 0 // CHECK6-NEXT: [[TMP77:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 0 -// CHECK6-NEXT: store i32 4, ptr [[TMP77]], align 4 +// CHECK6-NEXT: store i32 5, ptr [[TMP77]], align 4 // CHECK6-NEXT: [[TMP78:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 1 // CHECK6-NEXT: store i32 2, ptr [[TMP78]], align 4 // CHECK6-NEXT: [[TMP79:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS23]], i32 0, i32 2 @@ -3855,7 +3855,7 @@ int main (int argc, char **argv) { // CHECK6-NEXT: [[TMP98:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS27]], i32 0, i32 0 // CHECK6-NEXT: [[TMP99:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS28]], i32 0, i32 0 // CHECK6-NEXT: [[TMP100:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 0 -// CHECK6-NEXT: store i32 4, ptr [[TMP100]], align 4 +// CHECK6-NEXT: store i32 5, ptr [[TMP100]], align 4 // CHECK6-NEXT: [[TMP101:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 1 // CHECK6-NEXT: store i32 2, ptr [[TMP101]], align 4 // CHECK6-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS31]], i32 0, i32 2 @@ -5218,7 +5218,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK13-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK13-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK13-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK13-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -5295,7 +5295,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK13-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK13-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK13-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK13-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -5381,7 +5381,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK13-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK13-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -5458,7 +5458,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP143]], 1 // CHECK13-NEXT: [[TMP144:%.*]] = zext i32 [[ADD44]] to i64 // CHECK13-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK13-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK13-NEXT: store i32 4, ptr [[TMP146]], align 4 // CHECK13-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -5544,7 +5544,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP186]], 1 // CHECK13-NEXT: [[TMP187:%.*]] = zext i32 [[ADD60]] to i64 // CHECK13-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP188]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP188]], align 4 // CHECK13-NEXT: [[TMP189:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK13-NEXT: store i32 5, ptr [[TMP189]], align 4 // CHECK13-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -6878,7 +6878,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK13-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -6925,7 +6925,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK13-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK13-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK13-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK13-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -6981,7 +6981,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK13-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK13-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK13-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK13-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -7028,7 +7028,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK13-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK13-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK13-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK13-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK13-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -7084,7 +7084,7 @@ int main (int argc, char **argv) { // CHECK13-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK13-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK13-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK13-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK13-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK13-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK13-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK13-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -8117,7 +8117,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK14-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK14-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK14-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK14-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -8194,7 +8194,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK14-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK14-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK14-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK14-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK14-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -8280,7 +8280,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP105]], 1 // CHECK14-NEXT: [[TMP106:%.*]] = zext i32 [[ADD29]] to i64 // CHECK14-NEXT: [[TMP107:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP107]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP107]], align 4 // CHECK14-NEXT: [[TMP108:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK14-NEXT: store i32 5, ptr [[TMP108]], align 4 // CHECK14-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -8357,7 +8357,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP143]], 1 // CHECK14-NEXT: [[TMP144:%.*]] = zext i32 [[ADD44]] to i64 // CHECK14-NEXT: [[TMP145:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP145]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP145]], align 4 // CHECK14-NEXT: [[TMP146:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK14-NEXT: store i32 4, ptr [[TMP146]], align 4 // CHECK14-NEXT: [[TMP147:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -8443,7 +8443,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP186]], 1 // CHECK14-NEXT: [[TMP187:%.*]] = zext i32 [[ADD60]] to i64 // CHECK14-NEXT: [[TMP188:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP188]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP188]], align 4 // CHECK14-NEXT: [[TMP189:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK14-NEXT: store i32 5, ptr [[TMP189]], align 4 // CHECK14-NEXT: [[TMP190:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -9777,7 +9777,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK14-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK14-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK14-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK14-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -9824,7 +9824,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK14-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK14-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK14-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK14-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -9880,7 +9880,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK14-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK14-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK14-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK14-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK14-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -9927,7 +9927,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK14-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK14-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK14-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK14-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK14-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -9983,7 +9983,7 @@ int main (int argc, char **argv) { // CHECK14-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK14-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK14-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK14-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK14-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK14-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK14-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK14-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -11016,7 +11016,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK17-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK17-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK17-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK17-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -11094,7 +11094,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK17-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK17-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK17-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK17-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -11181,7 +11181,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK17-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK17-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -11259,7 +11259,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP146]], 1 // CHECK17-NEXT: [[TMP147:%.*]] = zext i32 [[ADD44]] to i64 // CHECK17-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP148]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP148]], align 4 // CHECK17-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP149]], align 4 // CHECK17-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -11346,7 +11346,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP190]], 1 // CHECK17-NEXT: [[TMP191:%.*]] = zext i32 [[ADD60]] to i64 // CHECK17-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK17-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK17-NEXT: store i32 5, ptr [[TMP193]], align 4 // CHECK17-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -12655,7 +12655,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -12702,7 +12702,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK17-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK17-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK17-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK17-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -12758,7 +12758,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK17-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK17-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK17-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK17-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -12805,7 +12805,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK17-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK17-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK17-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK17-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -12861,7 +12861,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK17-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK17-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK17-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK17-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK17-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 @@ -13867,7 +13867,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK19-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK19-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK19-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK19-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -13945,7 +13945,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK19-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK19-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK19-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK19-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -14032,7 +14032,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP107]], 1 // CHECK19-NEXT: [[TMP108:%.*]] = zext i32 [[ADD29]] to i64 // CHECK19-NEXT: [[TMP109:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP109]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP109]], align 4 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -14110,7 +14110,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD44:%.*]] = add nsw i32 [[TMP146]], 1 // CHECK19-NEXT: [[TMP147:%.*]] = zext i32 [[ADD44]] to i64 // CHECK19-NEXT: [[TMP148:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP148]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP148]], align 4 // CHECK19-NEXT: [[TMP149:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP149]], align 4 // CHECK19-NEXT: [[TMP150:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS45]], i32 0, i32 2 @@ -14197,7 +14197,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[ADD60:%.*]] = add nsw i32 [[TMP190]], 1 // CHECK19-NEXT: [[TMP191:%.*]] = zext i32 [[ADD60]] to i64 // CHECK19-NEXT: [[TMP192:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP192]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP192]], align 4 // CHECK19-NEXT: [[TMP193:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 1 // CHECK19-NEXT: store i32 5, ptr [[TMP193]], align 4 // CHECK19-NEXT: [[TMP194:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS61]], i32 0, i32 2 @@ -15506,7 +15506,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -15553,7 +15553,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK19-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK19-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK19-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK19-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -15609,7 +15609,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP57:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK19-NEXT: [[TMP58:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK19-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP59]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP59]], align 4 // CHECK19-NEXT: [[TMP60:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP60]], align 4 // CHECK19-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -15656,7 +15656,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP80:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS15]], i32 0, i32 0 // CHECK19-NEXT: [[TMP81:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS16]], i32 0, i32 0 // CHECK19-NEXT: [[TMP82:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP82]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP82]], align 4 // CHECK19-NEXT: [[TMP83:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP83]], align 4 // CHECK19-NEXT: [[TMP84:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS19]], i32 0, i32 2 @@ -15712,7 +15712,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP108:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_BASEPTRS23]], i32 0, i32 0 // CHECK19-NEXT: [[TMP109:%.*]] = getelementptr inbounds [3 x ptr], ptr [[DOTOFFLOAD_PTRS24]], i32 0, i32 0 // CHECK19-NEXT: [[TMP110:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP110]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP110]], align 4 // CHECK19-NEXT: [[TMP111:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 1 // CHECK19-NEXT: store i32 3, ptr [[TMP111]], align 4 // CHECK19-NEXT: [[TMP112:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS27]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp index d177bc933e1ae..afda66cc1462b 100644 --- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp @@ -254,7 +254,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -435,7 +435,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -807,7 +807,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -986,7 +986,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp index ba4522565d75b..32c5fe3b4a42f 100644 --- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp @@ -111,7 +111,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -299,7 +299,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -484,7 +484,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -672,7 +672,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index 8c0790b37c9f4..d5000a5b18a53 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -308,7 +308,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP29]], 0 // CHECK1-NEXT: [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK1-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK1-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -374,7 +374,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP69]], 1 // CHECK1-NEXT: [[TMP70:%.*]] = zext i32 [[ADD14]] to i64 // CHECK1-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP71]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP71]], align 4 // CHECK1-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP72]], align 4 // CHECK1-NEXT: [[TMP73:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -759,7 +759,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP37:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP29]], 0 // CHECK3-NEXT: [[TMP38:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP32]], 0 // CHECK3-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK3-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -825,7 +825,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP69]], 1 // CHECK3-NEXT: [[TMP70:%.*]] = zext i32 [[ADD14]] to i64 // CHECK3-NEXT: [[TMP71:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP71]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP71]], align 4 // CHECK3-NEXT: [[TMP72:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP72]], align 4 // CHECK3-NEXT: [[TMP73:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1445,7 +1445,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1675,7 +1675,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2046,7 +2046,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP20]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 4, ptr [[TMP21]], align 4 // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2232,7 +2232,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2416,7 +2416,7 @@ int main (int argc, char **argv) { // CHECK21-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK21-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK21-NEXT: store i32 4, ptr [[TMP20]], align 4 +// CHECK21-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK21-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK21-NEXT: store i32 4, ptr [[TMP21]], align 4 // CHECK21-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2652,7 +2652,7 @@ int main (int argc, char **argv) { // CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK23-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK23-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK23-NEXT: store i32 4, ptr [[TMP21]], align 4 +// CHECK23-NEXT: store i32 5, ptr [[TMP21]], align 4 // CHECK23-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK23-NEXT: store i32 4, ptr [[TMP22]], align 4 // CHECK23-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3214,7 +3214,7 @@ int main (int argc, char **argv) { // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK33-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK33-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK33-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK33-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK33-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK33-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK33-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3435,7 +3435,7 @@ int main (int argc, char **argv) { // CHECK33-NEXT: [[TMP22:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK33-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 // CHECK33-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK33-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK33-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK33-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK33-NEXT: store i32 4, ptr [[TMP25]], align 4 // CHECK33-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3637,7 +3637,7 @@ int main (int argc, char **argv) { // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK35-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK35-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK35-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK35-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK35-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK35-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK35-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3857,7 +3857,7 @@ int main (int argc, char **argv) { // CHECK35-NEXT: [[TMP22:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK35-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 // CHECK35-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK35-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK35-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK35-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK35-NEXT: store i32 4, ptr [[TMP25]], align 4 // CHECK35-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4068,7 +4068,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK37-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK37-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK37-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK37-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK37-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK37-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK37-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4336,7 +4336,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[TMP22:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK37-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 // CHECK37-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK37-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK37-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK37-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK37-NEXT: store i32 4, ptr [[TMP25]], align 4 // CHECK37-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4548,7 +4548,7 @@ int main (int argc, char **argv) { // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP29]], 1 // CHECK39-NEXT: [[TMP30:%.*]] = zext i32 [[ADD]] to i64 // CHECK39-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK39-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK39-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK39-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK39-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK39-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -4814,7 +4814,7 @@ int main (int argc, char **argv) { // CHECK39-NEXT: [[TMP22:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK39-NEXT: [[TMP23:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP21]], 0 // CHECK39-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK39-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK39-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK39-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK39-NEXT: store i32 4, ptr [[TMP25]], align 4 // CHECK39-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp index b59b7fc57a80e..b4988768c5b17 100644 --- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp @@ -134,7 +134,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -311,7 +311,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -696,7 +696,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -952,7 +952,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1193,7 +1193,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1447,7 +1447,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp index 90ea6aaac0fea..f722d6b00f514 100644 --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -176,7 +176,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -224,7 +224,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -272,7 +272,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK1-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -633,7 +633,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -681,7 +681,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS3]], i32 0, i32 0 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS4]], i32 0, i32 0 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS7]], i32 0, i32 2 @@ -729,7 +729,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS11]], i32 0, i32 0 // CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS12]], i32 0, i32 0 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK3-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1380,7 +1380,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1457,7 +1457,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK9-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK9-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK9-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP65]], align 4 // CHECK9-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1534,7 +1534,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP100]], 1 // CHECK9-NEXT: [[TMP101:%.*]] = zext i32 [[ADD29]] to i64 // CHECK9-NEXT: [[TMP102:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP102]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP102]], align 4 // CHECK9-NEXT: [[TMP103:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP103]], align 4 // CHECK9-NEXT: [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -2010,7 +2010,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2057,7 +2057,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK9-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK9-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2104,7 +2104,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK9-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK9-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK9-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK9-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 @@ -2505,7 +2505,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2583,7 +2583,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP63]], 1 // CHECK11-NEXT: [[TMP64:%.*]] = zext i32 [[ADD14]] to i64 // CHECK11-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP65]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP65]], align 4 // CHECK11-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP66]], align 4 // CHECK11-NEXT: [[TMP67:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -2661,7 +2661,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD29:%.*]] = add nsw i32 [[TMP102]], 1 // CHECK11-NEXT: [[TMP103:%.*]] = zext i32 [[ADD29]] to i64 // CHECK11-NEXT: [[TMP104:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP104]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP104]], align 4 // CHECK11-NEXT: [[TMP105:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP105]], align 4 // CHECK11-NEXT: [[TMP106:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS30]], i32 0, i32 2 @@ -3134,7 +3134,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -3181,7 +3181,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS1]], i32 0, i32 0 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP31]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP31]], align 4 // CHECK11-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP32]], align 4 // CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -3228,7 +3228,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS8]], i32 0, i32 0 // CHECK11-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS9]], i32 0, i32 0 // CHECK11-NEXT: [[TMP54:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP54]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP54]], align 4 // CHECK11-NEXT: [[TMP55:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP55]], align 4 // CHECK11-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS12]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index ff6b6130cfe68..fb63d78a3dec0 100644 --- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -609,7 +609,7 @@ int main() { // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1145,7 +1145,7 @@ int main() { // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1440,7 +1440,7 @@ int main() { // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp index 249838037867e..b2785f61892cc 100644 --- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp @@ -577,7 +577,7 @@ int main() { // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK9-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK9-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -902,7 +902,7 @@ int main() { // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1301,7 +1301,7 @@ int main() { // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP28]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP28]], align 4 // CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP29]], align 4 // CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1624,7 +1624,7 @@ int main() { // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP23]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP23]], align 4 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp index 7b4304954d7ac..c4a40603dff59 100644 --- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -255,7 +255,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -443,7 +443,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -822,7 +822,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1008,7 +1008,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp index 1707da3a5524b..e43655d5b4b84 100644 --- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -111,7 +111,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -306,7 +306,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -498,7 +498,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -693,7 +693,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_firstprivate_codegen.cpp index 5fdf78f7664eb..2febaaf46ef87 100644 --- a/clang/test/OpenMP/teams_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/teams_firstprivate_codegen.cpp @@ -336,7 +336,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK9-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -386,7 +386,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP49]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP49]], align 4 // CHECK9-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP50]], align 4 // CHECK9-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -687,7 +687,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -737,7 +737,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK9-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK9-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK9-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK9-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK9-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1160,7 +1160,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [6 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP24]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP24]], align 4 // CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP25]], align 4 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1210,7 +1210,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP47:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP48:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK11-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP49]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP49]], align 4 // CHECK11-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP50]], align 4 // CHECK11-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -1511,7 +1511,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [5 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP19]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP19]], align 4 // CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 5, ptr [[TMP20]], align 4 // CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1561,7 +1561,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK11-NEXT: [[TMP42:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS2]], i32 0, i32 0 // CHECK11-NEXT: [[TMP43:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS3]], i32 0, i32 0 // CHECK11-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK11-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP45]], align 4 // CHECK11-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS5]], i32 0, i32 2 @@ -2012,7 +2012,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK17-NEXT: [[TMP44:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP45:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK17-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP46]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP46]], align 4 // CHECK17-NEXT: [[TMP47:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 9, ptr [[TMP47]], align 4 // CHECK17-NEXT: [[TMP48:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2257,7 +2257,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK17-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP56:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK17-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP57]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP57]], align 4 // CHECK17-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 11, ptr [[TMP58]], align 4 // CHECK17-NEXT: [[TMP59:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2490,7 +2490,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK19-NEXT: [[TMP42:%.*]] = getelementptr inbounds [9 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP43:%.*]] = getelementptr inbounds [9 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK19-NEXT: [[TMP44:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP44]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP44]], align 4 // CHECK19-NEXT: [[TMP45:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 9, ptr [[TMP45]], align 4 // CHECK19-NEXT: [[TMP46:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2734,7 +2734,7 @@ void array_func(float a[3], St s[2], int n, long double vla1[n]) { // CHECK19-NEXT: [[TMP54:%.*]] = getelementptr inbounds [11 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP55:%.*]] = getelementptr inbounds [11 x i64], ptr [[DOTOFFLOAD_SIZES]], i32 0, i32 0 // CHECK19-NEXT: [[TMP56:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP56]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP56]], align 4 // CHECK19-NEXT: [[TMP57:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 11, ptr [[TMP57]], align 4 // CHECK19-NEXT: [[TMP58:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp index e0fcd6863e7a0..a2ee43621b377 100644 --- a/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/teams_generic_loop_codegen-1.cpp @@ -259,7 +259,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK1-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -325,7 +325,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK1-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK1-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK1-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK1-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK1-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -666,7 +666,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP30:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP24]], 0 // CHECK3-NEXT: [[TMP31:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP25]], 0 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP32]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP32]], align 4 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 5, ptr [[TMP33]], align 4 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -732,7 +732,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP62]], 1 // CHECK3-NEXT: [[TMP63:%.*]] = zext i32 [[ADD14]] to i64 // CHECK3-NEXT: [[TMP64:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP64]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP64]], align 4 // CHECK3-NEXT: [[TMP65:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 1 // CHECK3-NEXT: store i32 3, ptr [[TMP65]], align 4 // CHECK3-NEXT: [[TMP66:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS15]], i32 0, i32 2 @@ -1053,7 +1053,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK9-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK9-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK9-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK9-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1271,7 +1271,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK11-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1456,7 +1456,7 @@ int main (int argc, char **argv) { // CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK17-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK17-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK17-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1609,7 +1609,7 @@ int main (int argc, char **argv) { // CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK19-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK19-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1799,7 +1799,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK25-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK25-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK25-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK25-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2006,7 +2006,7 @@ int main (int argc, char **argv) { // CHECK25-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK25-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK25-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK25-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK25-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK25-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK25-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK25-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2201,7 +2201,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK27-NEXT: [[TMP25:%.*]] = zext i32 [[ADD]] to i64 // CHECK27-NEXT: [[TMP26:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP26]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP26]], align 4 // CHECK27-NEXT: [[TMP27:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP27]], align 4 // CHECK27-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -2407,7 +2407,7 @@ int main (int argc, char **argv) { // CHECK27-NEXT: [[TMP20:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP18]], 0 // CHECK27-NEXT: [[TMP21:%.*]] = insertvalue [3 x i32] zeroinitializer, i32 [[TMP19]], 0 // CHECK27-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK27-NEXT: store i32 4, ptr [[TMP22]], align 4 +// CHECK27-NEXT: store i32 5, ptr [[TMP22]], align 4 // CHECK27-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK27-NEXT: store i32 4, ptr [[TMP23]], align 4 // CHECK27-NEXT: [[TMP24:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp index f2e27b9bca288..3af1208803916 100644 --- a/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_collapse_codegen.cpp @@ -138,7 +138,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -307,7 +307,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -544,7 +544,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP38:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP38]], 1 // CHECK9-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP39]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP39]], align 4 // CHECK9-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 6, ptr [[TMP40]], align 4 // CHECK9-NEXT: [[TMP41:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -782,7 +782,7 @@ int main (int argc, char **argv) { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1015,7 +1015,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP37:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR_3]], align 8 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP37]], 1 // CHECK11-NEXT: [[TMP38:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP38]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP38]], align 4 // CHECK11-NEXT: [[TMP39:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 6, ptr [[TMP39]], align 4 // CHECK11-NEXT: [[TMP40:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1251,7 +1251,7 @@ int main (int argc, char **argv) { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp index 22e3144fe802d..9bce27a883db6 100644 --- a/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_private_codegen.cpp @@ -274,7 +274,7 @@ int main() { // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -455,7 +455,7 @@ int main() { // CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: store ptr undef, ptr [[_TMP1]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -827,7 +827,7 @@ int main() { // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1006,7 +1006,7 @@ int main() { // CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: store ptr undef, ptr [[_TMP1]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp index 168df88ec1d7c..8e75b30bf7233 100644 --- a/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp +++ b/clang/test/OpenMP/teams_generic_loop_reduction_codegen.cpp @@ -122,7 +122,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -309,7 +309,7 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -493,7 +493,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -680,7 +680,7 @@ int main() { // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP10]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP11]], align 4 // CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/clang/test/OpenMP/teams_private_codegen.cpp b/clang/test/OpenMP/teams_private_codegen.cpp index 24900cf8e2adc..87e5d7bc855db 100644 --- a/clang/test/OpenMP/teams_private_codegen.cpp +++ b/clang/test/OpenMP/teams_private_codegen.cpp @@ -225,7 +225,7 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK1-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK1-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK1-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -421,7 +421,7 @@ int main() { // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK3-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK3-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK3-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -587,7 +587,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -766,7 +766,7 @@ int main() { // CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -852,7 +852,7 @@ int main() { // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1095,7 +1095,7 @@ int main() { // CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK9-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK9-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK9-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1217,7 +1217,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1396,7 +1396,7 @@ int main() { // CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 1, ptr [[TMP6]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1482,7 +1482,7 @@ int main() { // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP10]], align 4 // CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 @@ -1725,7 +1725,7 @@ int main() { // CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_BASEPTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x ptr], ptr [[DOTOFFLOAD_PTRS]], i32 0, i32 0 // CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 0 -// CHECK11-NEXT: store i32 4, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 5, ptr [[TMP8]], align 4 // CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 1 // CHECK11-NEXT: store i32 2, ptr [[TMP9]], align 4 // CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS]], i32 0, i32 2 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h index bc0e69af4071d..99be75c7e9673 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPConstants.h @@ -72,7 +72,7 @@ enum class IdentFlag { #include "llvm/Frontend/OpenMP/OMPKinds.def" // Version of the kernel argument format used by the omp runtime. -#define OMP_KERNEL_ARG_VERSION 4 +#define OMP_KERNEL_ARG_VERSION 5 // Minimum version of the compiler that generates a kernel dynamic pointer. #define OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR 3 diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h index 888870a9dc5c5..965ac358c259e 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -2047,7 +2047,7 @@ class OpenMPIRBuilder { /// /// \param DescriptorAddr Address of the descriptor to initialize /// \param DataPtr Pointer to the actual data the descriptor should reference - /// \param ElemType Type of elements in the array (may be array type) + /// \param SrcDescriptorAddr Address of the descriptor to copy metadata from /// \param DescriptorType Type of the descriptor structure /// \param DataPtrPtrGen Callback to get the base_ptr field in the descriptor /// @@ -2058,6 +2058,22 @@ class OpenMPIRBuilder { function_ref DataPtrPtrGen); + /// Allocate a by-ref reduction descriptor, copy \p SrcDescriptorAddr into it, + /// and update its data pointer to reference \p DataPtr. + /// + /// \param AllocaIP Insertion point for the descriptor allocation. + /// \param RI Reduction info containing descriptor type and access callback. + /// \param DataPtr Pointer to the actual data the descriptor should reference. + /// \param SrcDescriptorAddr Address of the descriptor to copy metadata from. + /// \param DescriptorPtrTy Pointer type expected by the descriptor consumer. + /// + /// \return The new descriptor address, or an Error if descriptor generation + /// fails. + Expected createReductionDescriptorCopy( + InsertPointTy AllocaIP, const ReductionInfo &RI, Value *DataPtr, + Value *SrcDescriptorAddr, Type *DescriptorPtrTy, + const Twine &Name = ".omp.reduction.byref_descriptor"); + /// Emits reduction function. /// \param ReducerName Name of the function calling the reduction. /// \param ReductionInfos Array type containing the ReductionOps. @@ -2166,7 +2182,7 @@ class OpenMPIRBuilder { /// 4. Call the OpenMP runtime on the GPU to reduce across teams. /// The last team writes the global reduced value to memory. /// - /// ret = __kmpc_nvptx_teams_reduce_nowait(..., + /// ret = __kmpc_gpu_teams_reduce_nowait(..., /// reduceData, shuffleReduceFn, interWarpCpyFn, /// scratchpadCopyFn, loadAndReduceFn) /// @@ -2331,18 +2347,26 @@ class OpenMPIRBuilder { /// \param IsByRef For each reduction clause, whether the reduction is by-ref. /// \param IsTeamsReduction Optional flag set if it is a teams /// reduction. + /// \param IsSPMD Optional flag set when the surrounding kernel + /// is compiled in SPMD execution mode (every + /// reduction private is then known to be a + /// per-thread scratch alloca). When false, the + /// teams-reduction call site emits per-thread + /// scratch and copies the team-local value in so + /// the runtime's cross-team work cannot race on + /// team-shared LDS storage produced by Generic + /// globalization (Generic-SPMD case after + /// OpenMPOpt SPMD-ization). /// \param GridValue Optional GPU grid value. - /// \param ReductionBufNum Optional OpenMPCUDAReductionBufNumValue to be /// used for teams reduction. /// \param SrcLocInfo Source location information global. LLVM_ABI InsertPointOrErrorTy createReductionsGPU( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef ReductionInfos, ArrayRef IsByRef, bool IsNoWait = false, - bool IsTeamsReduction = false, + bool IsTeamsReduction = false, bool IsSPMD = false, ReductionGenCBKind ReductionGenCBKind = ReductionGenCBKind::MLIR, - std::optional GridValue = {}, unsigned ReductionBufNum = 1024, - Value *SrcLocInfo = nullptr); + std::optional GridValue = {}, Value *SrcLocInfo = nullptr); // TODO: provide atomic and non-atomic reduction generators for reduction // operators defined by the OpenMP specification. @@ -2729,7 +2753,6 @@ class OpenMPIRBuilder { SmallVector MaxThreads = {-1}; int32_t MinThreads = 1; int32_t ReductionDataSize = 0; - int32_t ReductionBufferLength = 0; }; /// Container to pass LLVM IR runtime values or constants related to the @@ -3414,11 +3437,8 @@ class OpenMPIRBuilder { /// \param Loc The insert and source location description. /// \param TeamsReductionDataSize The maximal size of all the reduction data /// for teams reduction. - /// \param TeamsReductionBufferLength The number of elements (each of up to - /// \p TeamsReductionDataSize size), in the teams reduction buffer. LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, - int32_t TeamsReductionDataSize = 0, - int32_t TeamsReductionBufferLength = 1024); + int32_t TeamsReductionDataSize = 0); ///} diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def index ae19490de0f46..e253c838e5e28 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def +++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def @@ -96,7 +96,7 @@ __OMP_STRUCT_TYPE(AsyncInfo, __tgt_async_info, false, Int8Ptr) __OMP_STRUCT_TYPE(DependInfo, kmp_dep_info, false, SizeTy, SizeTy, Int8) __OMP_STRUCT_TYPE(Task, kmp_task_ompbuilder_t, false, VoidPtr, VoidPtr, Int32, VoidPtr, VoidPtr) __OMP_STRUCT_TYPE(ConfigurationEnvironment, ConfigurationEnvironmentTy, false, - Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32, Int32) + Int8, Int8, Int8, Int32, Int32, Int32, Int32, Int32) __OMP_STRUCT_TYPE(DynamicEnvironment, DynamicEnvironmentTy, false, Int16) __OMP_STRUCT_TYPE(KernelEnvironment, KernelEnvironmentTy, false, ConfigurationEnvironment, IdentPtr, DynamicEnvironmentPtr) @@ -492,10 +492,9 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32) __OMP_RTL(__kmpc_shuffle_int32, false, Int32, Int32, Int16, Int16) __OMP_RTL(__kmpc_nvptx_parallel_reduce_nowait_v2, false, Int32, IdentPtr, Int64, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr) -__OMP_RTL(__kmpc_nvptx_teams_reduce_nowait_v2, false, Int32, IdentPtr, - VoidPtr, Int32, Int64, VoidPtr, ShuffleReducePtr, InterWarpCopyPtr, - GlobalListPtr, GlobalListPtr, GlobalListPtr, GlobalListPtr) -__OMP_RTL(__kmpc_reduction_get_fixed_buffer, false, VoidPtr, ) +__OMP_RTL(__kmpc_gpu_xteam_reduce_nowait, false, Int32, IdentPtr, VoidPtr, + ShuffleReducePtr, InterWarpCopyPtr, GlobalListPtr, GlobalListPtr, + GlobalListPtr) __OMP_RTL(__kmpc_shuffle_int64, false, Int64, Int64, Int16, Int16) @@ -1098,9 +1097,8 @@ __OMP_RTL_ATTRS(__kmpc_shuffle_int32, AttributeSet(), SExt, ParamAttrs(SExt, SExt, SExt)) __OMP_RTL_ATTRS(__kmpc_nvptx_parallel_reduce_nowait_v2, AttributeSet(), SExt, ParamAttrs()) -__OMP_RTL_ATTRS(__kmpc_nvptx_teams_reduce_nowait_v2, AttributeSet(), SExt, - ParamAttrs(AttributeSet(), AttributeSet(), ZExt)) -__OMP_RTL_ATTRS(__kmpc_reduction_get_fixed_buffer, GetterAttrs, AttributeSet(), ParamAttrs()) +__OMP_RTL_ATTRS(__kmpc_gpu_xteam_reduce_nowait, AttributeSet(), SExt, + ParamAttrs()) __OMP_RTL_ATTRS(__kmpc_shuffle_int64, AttributeSet(), AttributeSet(), ParamAttrs(AttributeSet(), SExt, SExt)) diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 92eb7de0d882f..f8c1999fe1b89 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3896,6 +3896,31 @@ OpenMPIRBuilder::generateReductionDescriptor( return Builder.saveIP(); } +Expected OpenMPIRBuilder::createReductionDescriptorCopy( + InsertPointTy AllocaIP, const ReductionInfo &RI, Value *DataPtr, + Value *SrcDescriptorAddr, Type *DescriptorPtrTy, const Twine &Name) { + InsertPointTy OldIP = Builder.saveIP(); + Builder.restoreIP(AllocaIP); + + AllocaInst *DescriptorAlloca = + Builder.CreateAlloca(RI.ByRefAllocatedType, nullptr, Name); + DescriptorAlloca->setAlignment( + M.getDataLayout().getPrefTypeAlign(RI.ByRefAllocatedType)); + Value *DescriptorAddr = Builder.CreatePointerBitCastOrAddrSpaceCast( + DescriptorAlloca, DescriptorPtrTy, + DescriptorAlloca->getName() + ".ascast"); + + Builder.restoreIP(OldIP); + + InsertPointOrErrorTy GenResult = + generateReductionDescriptor(DescriptorAddr, DataPtr, SrcDescriptorAddr, + RI.ByRefAllocatedType, RI.DataPtrPtrGen); + if (!GenResult) + return GenResult.takeError(); + + return DescriptorAddr; +} + Expected OpenMPIRBuilder::emitListToGlobalCopyFunction( ArrayRef ReductionInfos, Type *ReductionsBufferTy, AttributeList FuncAttrs, ArrayRef IsByRef) { @@ -4098,15 +4123,6 @@ Expected OpenMPIRBuilder::emitListToGlobalReduceFunction( ReductionsBufferTy, BufferVD, 0, En.index()); if (!IsByRef.empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) { - InsertPointTy OldIP = Builder.saveIP(); - Builder.restoreIP(AllocaIP); - - Value *ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType); - ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast( - ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->getName() + ".ascast"); - - Builder.restoreIP(OldIP); - // Get source descriptor from the reduce list argument Value *ReduceList = Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast); @@ -4118,14 +4134,12 @@ Expected OpenMPIRBuilder::emitListToGlobalReduceFunction( Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrPtr); // Copy descriptor from source and update base_ptr to global buffer data - InsertPointOrErrorTy GenResult = - generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr, - RI.ByRefAllocatedType, RI.DataPtrPtrGen); + Expected ByRefAlloc = createReductionDescriptorCopy( + AllocaIP, RI, GlobValPtr, SrcDescriptorAddr, Builder.getPtrTy()); + if (!ByRefAlloc) + return ByRefAlloc.takeError(); - if (!GenResult) - return GenResult.takeError(); - - Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr); + Builder.CreateStore(*ByRefAlloc, TargetElementPtrPtr); } else { Builder.CreateStore(GlobValPtr, TargetElementPtrPtr); } @@ -4340,15 +4354,6 @@ Expected OpenMPIRBuilder::emitGlobalToListReduceFunction( ReductionsBufferTy, BufferVD, 0, En.index()); if (!IsByRef.empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) { - InsertPointTy OldIP = Builder.saveIP(); - Builder.restoreIP(AllocaIP); - - Value *ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType); - ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast( - ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->getName() + ".ascast"); - - Builder.restoreIP(OldIP); - // Get source descriptor from the reduce list Value *ReduceListVal = Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast); @@ -4360,13 +4365,12 @@ Expected OpenMPIRBuilder::emitGlobalToListReduceFunction( Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrPtr); // Copy descriptor from source and update base_ptr to global buffer data - InsertPointOrErrorTy GenResult = - generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr, - RI.ByRefAllocatedType, RI.DataPtrPtrGen); - if (!GenResult) - return GenResult.takeError(); + Expected ByRefAlloc = createReductionDescriptorCopy( + AllocaIP, RI, GlobValPtr, SrcDescriptorAddr, Builder.getPtrTy()); + if (!ByRefAlloc) + return ByRefAlloc.takeError(); - Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr); + Builder.CreateStore(*ByRefAlloc, TargetElementPtrPtr); } else { Builder.CreateStore(GlobValPtr, TargetElementPtrPtr); } @@ -4532,9 +4536,9 @@ checkReductionInfos(ArrayRef ReductionInfos, OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef ReductionInfos, - ArrayRef IsByRef, bool IsNoWait, bool IsTeamsReduction, + ArrayRef IsByRef, bool IsNoWait, bool IsTeamsReduction, bool IsSPMD, ReductionGenCBKind ReductionGenCBKind, std::optional GridValue, - unsigned ReductionBufNum, Value *SrcLocInfo) { + Value *SrcLocInfo) { if (!updateToLocation(Loc)) return InsertPointTy(); Builder.restoreIP(CodeGenIP); @@ -4637,16 +4641,13 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy); - // NOTE: ReductionDataSize is passed as the reduce_data_size - // argument to __kmpc_nvptx_{parallel,teams}_reduce_nowait_v2, but - // the runtime implementations do not currently use it. The teams - // runtime reads ReductionDataSize from KernelEnvironmentTy instead - // (set separately via TargetKernelDefaultAttrs). It is computed - // here conservatively as max(element sizes) * N rather than the - // exact sum, which over-calculates the size for mixed reduction - // types but is harmless given the argument is unused. - // TODO: Consider dropping this computation if the runtime API is - // ever revised to remove the unused parameter. + // NOTE: ReductionDataSize is passed as the reduce_data_size argument to + // __kmpc_nvptx_parallel_reduce_nowait_v2, but the runtime implementations do + // not currently use it. It is computed here conservatively as max(element + // sizes) * N rather than the exact sum, which over-calculates the size for + // mixed reduction types but is harmless given the argument is unused. + // TODO: Consider dropping this computation if the runtime API is ever revised + // to remove the unused parameter. unsigned MaxDataSize = 0; SmallVector ReductionTypeArgs; for (auto En : enumerate(ReductionInfos)) { @@ -4663,6 +4664,16 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( } Value *ReductionDataSize = Builder.getInt64(MaxDataSize * ReductionInfos.size()); + + // Helper function to copy thread-local data back to the original reduction + // list. + Function *CopyScratchToListFunc = nullptr; + // Thread-local storage for the reduction variables. + Value *ScratchForCopyBack = nullptr; + // RL pointer to which the final value from the per-thread scratch should be + // copied back. (Basically RL, appropriately casted if necessary.) + Value *RLForCopyBack = RL; + if (!IsTeamsReduction) { Value *SarFuncCast = Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy); @@ -4677,19 +4688,12 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( CodeGenIP = Builder.saveIP(); StructType *ReductionsBufferTy = StructType::create( Ctx, ReductionTypeArgs, "struct._globalized_locals_ty"); - Function *RedFixedBufferFn = getOrCreateRuntimeFunctionPtr( - RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer); Expected LtGCFunc = emitListToGlobalCopyFunction( ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef); if (!LtGCFunc) return LtGCFunc.takeError(); - Expected LtGRFunc = emitListToGlobalReduceFunction( - ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef); - if (!LtGRFunc) - return LtGRFunc.takeError(); - Expected GtLCFunc = emitGlobalToListCopyFunction( ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef); if (!GtLCFunc) @@ -4702,23 +4706,93 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( Builder.restoreIP(CodeGenIP); - Value *KernelTeamsReductionPtr = createRuntimeFunctionCall( - RedFixedBufferFn, {}, "_openmp_teams_reductions_buffer_$_$ptr"); - - Value *Args3[] = {SrcLocInfo, - KernelTeamsReductionPtr, - Builder.getInt32(ReductionBufNum), - ReductionDataSize, - RL, - *SarFunc, - WcFunc, - *LtGCFunc, - *LtGRFunc, - *GtLCFunc, - *GtLRFunc}; + // The runtime's cross-team final aggregate uses the storage pointed at by + // its reduce-list argument as per-thread scratch. When the surrounding + // kernel is already in SPMD execution mode, clang emitted each reduction + // private as a per-thread `alloca addrspace(5)`, so the original red_list + // (RL) is already per-thread and nothing else is needed. + // + // When the kernel is in Non-SPMD execution mode at codegen time, clang's + // Generic-mode globalization put the reduction private into team-shared + // LDS. OpenMPOpt may later upgrade the kernel to Generic-SPMD, at which + // point all threads of the last team would race on the shared LDS slot. + // Emit a per-thread scratch buffer and a per-thread RL, copy the team-local + // value in, and hand the per-thread RL to the runtime instead. The writer + // thread copies the final value from that per-thread scratch back to RL + // before running the existing combine path below. + + // Thread-local RL (might need localization below before being passed to the + // runtime). + Value *RuntimeRL = RL; + + if (!IsSPMD) { + CodeGenIP = Builder.saveIP(); + Builder.restoreIP(AllocaIP); + // Allocate thread-local buffer for the reduction variables. + Value *PerThreadScratchAlloca = Builder.CreateAlloca( + ReductionsBufferTy, /*ArraySize=*/nullptr, ".omp.reduction.scratch"); + Value *PerThreadScratch = Builder.CreatePointerBitCastOrAddrSpaceCast( + PerThreadScratchAlloca, PtrTy, + PerThreadScratchAlloca->getName() + ".ascast"); + // Allocate thread-local buffer for the pointers to the reduction + // variables. + Value *PerThreadRedListAlloca = + Builder.CreateAlloca(RedArrayTy, /*ArraySize=*/nullptr, + ".omp.reduction.per_thread_red_list"); + RuntimeRL = Builder.CreatePointerBitCastOrAddrSpaceCast( + PerThreadRedListAlloca, PtrTy, + PerThreadRedListAlloca->getName() + ".ascast"); + Builder.restoreIP(CodeGenIP); + + // Iterate over the reduction variables and copy the team-local value to + // the thread-local buffer. + for (auto En : enumerate(ReductionInfos)) { + const ReductionInfo &RI = En.value(); + bool IsByRefElem = !IsByRef.empty() && IsByRef[En.index()]; + + Value *FieldPtr = Builder.CreateConstInBoundsGEP2_32( + ReductionsBufferTy, PerThreadScratch, 0, En.index()); + Value *Slot = Builder.CreateConstInBoundsGEP2_32(RedArrayTy, RuntimeRL, + 0, En.index()); + + Value *RuntimeListEntry = FieldPtr; + if (IsByRefElem && RI.DataPtrPtrGen) { + Value *SrcDescriptor = + Builder.CreateLoad(RI.ElementType, RI.PrivateVariable); + Expected Descriptor = createReductionDescriptorCopy( + AllocaIP, RI, FieldPtr, SrcDescriptor, PtrTy); + if (!Descriptor) + return Descriptor.takeError(); + RuntimeListEntry = *Descriptor; + } + Builder.CreateStore(RuntimeListEntry, Slot); + } + // The copy helpers were emitted with default-AS (AS 0) pointer params + // (see emitListToGlobalCopyFunction / emitGlobalToListCopyFunction), + // but PerThreadScratch and RL live in the target's default AS, which + // is non-zero on e.g. SPIRV. (See Config.getDefaultTargetAS().) + Type *CopyArg0Ty = (*LtGCFunc)->getFunctionType()->getParamType(0); + Type *CopyArg2Ty = (*LtGCFunc)->getFunctionType()->getParamType(2); + ScratchForCopyBack = Builder.CreatePointerBitCastOrAddrSpaceCast( + PerThreadScratch, CopyArg0Ty); + RLForCopyBack = + Builder.CreatePointerBitCastOrAddrSpaceCast(RL, CopyArg2Ty); + // Use index 0 because there is no array of target values to index into, + // there is only one thread-local memory slot. + // restoreIP above left a stale/empty debug location; this inlinable call + // to a debug-info-bearing helper needs one or the verifier rejects the + // module ("!dbg attachment points at wrong subprogram") after inlining. + Builder.SetCurrentDebugLocation(Loc.DL); + Builder.CreateCall( + *LtGCFunc, {ScratchForCopyBack, Builder.getInt32(0), RLForCopyBack}); + CopyScratchToListFunc = *GtLCFunc; + } + + Value *Args3[] = {SrcLocInfo, RuntimeRL, *SarFunc, WcFunc, + *LtGCFunc, *GtLCFunc, *GtLRFunc}; Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr( - RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2); + RuntimeFunction::OMPRTL___kmpc_gpu_xteam_reduce_nowait); Res = createRuntimeFunctionCall(TeamsReduceFn, Args3); } @@ -4734,11 +4808,23 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU( // break; emitBlock(ThenBB, CurFunc); + // Copy the writer thread's per-thread scratch result back into the original + // red-list storage before the existing combine path reads RI.PrivateVariable. + // Set a debug location: this inlinable call to a debug-info-bearing helper + // needs one or the verifier rejects the module after inlining. + if (ScratchForCopyBack) { + Builder.SetCurrentDebugLocation(Loc.DL); + Builder.CreateCall( + CopyScratchToListFunc, + {ScratchForCopyBack, Builder.getInt32(0), RLForCopyBack}); + } + // Add emission of __kmpc_end_reduce{_nowait}(); for (auto En : enumerate(ReductionInfos)) { const ReductionInfo &RI = En.value(); Type *ValueType = RI.ElementType; Value *RedValue = RI.Variable; + Value *RHS = Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy); @@ -8197,8 +8283,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit( Constant *MaxTeams = ConstantInt::getSigned(Int32, Attrs.MaxTeams.front()); Constant *ReductionDataSize = ConstantInt::getSigned(Int32, Attrs.ReductionDataSize); - Constant *ReductionBufferLength = - ConstantInt::getSigned(Int32, Attrs.ReductionBufferLength); Function *Fn = getOrCreateRuntimeFunctionPtr( omp::RuntimeFunction::OMPRTL___kmpc_target_init); @@ -8230,7 +8314,6 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit( MinTeams, MaxTeams, ReductionDataSize, - ReductionBufferLength, }); Constant *KernelEnvironmentInitializer = ConstantStruct::get( KernelEnvironment, { @@ -8295,8 +8378,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit( } void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, - int32_t TeamsReductionDataSize, - int32_t TeamsReductionBufferLength) { + int32_t TeamsReductionDataSize) { if (!updateToLocation(Loc)) return; @@ -8305,7 +8387,7 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, createRuntimeFunctionCall(Fn, {}); - if (!TeamsReductionBufferLength || !TeamsReductionDataSize) + if (!TeamsReductionDataSize) return; Function *Kernel = Builder.GetInsertBlock()->getParent(); @@ -8321,9 +8403,6 @@ void OpenMPIRBuilder::createTargetDeinit(const LocationDescription &Loc, auto *NewInitializer = ConstantFoldInsertValueInstruction( KernelEnvironmentInitializer, ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7}); - NewInitializer = ConstantFoldInsertValueInstruction( - NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength), - {0, 8}); KernelEnvironmentGV->setInitializer(NewInitializer); } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 31e9d41ca3410..e1287e1353c86 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -4948,7 +4948,7 @@ struct AAKernelInfoCallSite : AAKernelInfo { case OMPRTL___kmpc_end_master: case OMPRTL___kmpc_barrier: case OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2: - case OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2: + case OMPRTL___kmpc_gpu_xteam_reduce_nowait: case OMPRTL___kmpc_error: case OMPRTL___kmpc_flush: case OMPRTL___kmpc_get_hardware_thread_id_in_block: diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll index e5259b7e91411..f2afc67426ed5 100644 --- a/llvm/test/Transforms/OpenMP/add_attributes.ll +++ b/llvm/test/Transforms/OpenMP/add_attributes.ll @@ -724,7 +724,7 @@ declare i32 @__kmpc_masked(ptr, i32, i32); declare i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr, i64, ptr, ptr, ptr); -declare i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr, ptr, i32, i64, ptr, ptr, ptr, ptr, ptr, ptr, ptr); +declare i32 @__kmpc_gpu_xteam_reduce_nowait(ptr, ptr, ptr, ptr, ptr, ptr, ptr); declare i32 @__kmpc_omp_reg_task_with_affinity(ptr, i32, ptr, i32, ptr); @@ -1366,7 +1366,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; CHECK: declare i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr, i64, ptr, ptr, ptr) ; CHECK-NOT: Function Attrs -; CHECK: declare i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr, ptr, i32, i64, ptr, ptr, ptr, ptr, ptr, ptr, ptr) +; CHECK: declare i32 @__kmpc_gpu_xteam_reduce_nowait(ptr, ptr, ptr, ptr, ptr, ptr, ptr) ; CHECK: ; Function Attrs: nounwind ; CHECK: declare i32 @__kmpc_omp_reg_task_with_affinity(ptr, i32, ptr, i32, ptr) @@ -2008,7 +2008,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; OPTIMISTIC: declare i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr, i64, ptr, ptr, ptr) ; OPTIMISTIC-NOT: Function Attrs -; OPTIMISTIC: declare i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr, ptr, i32, i64, ptr, ptr, ptr, ptr, ptr, ptr, ptr) +; OPTIMISTIC: declare i32 @__kmpc_gpu_xteam_reduce_nowait(ptr, ptr, ptr, ptr, ptr, ptr, ptr) ; OPTIMISTIC: nofree nosync nounwind willreturn ; OPTIMISTIC: declare i32 @__kmpc_omp_reg_task_with_affinity(ptr nofree readonly captures(none), i32, ptr nofree readonly captures(none), i32, ptr nofree readonly captures(none)) @@ -2666,7 +2666,7 @@ declare i32 @__tgt_target_kernel_nowait(ptr, i64, i32, i32, ptr, ptr, i32, ptr, ; EXT: declare signext i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(ptr, i64, ptr, ptr, ptr) ; EXT-NOT: Function Attrs -; EXT: declare signext i32 @__kmpc_nvptx_teams_reduce_nowait_v2(ptr, ptr, i32 zeroext, i64, ptr, ptr, ptr, ptr, ptr, ptr, ptr) +; EXT: declare signext i32 @__kmpc_gpu_xteam_reduce_nowait(ptr, ptr, ptr, ptr, ptr, ptr, ptr) ; EXT: ; Function Attrs: nounwind ; EXT: declare signext i32 @__kmpc_omp_reg_task_with_affinity(ptr, i32 signext, ptr, i32 signext, ptr) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 703f72d1ab5bc..77cc7a388a984 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -8194,10 +8194,6 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, attrs.MinThreads = 1; attrs.MaxThreads.front() = combinedMaxThreadsVal; attrs.ReductionDataSize = reductionDataSize; - // TODO: Allow modified buffer length similar to - // fopenmp-cuda-teams-reduction-recs-num flag in clang. - if (attrs.ReductionDataSize != 0) - attrs.ReductionBufferLength = 1024; } /// Gather LLVM runtime values for all clauses evaluated in the host that are diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir index afa07c93851df..ee289d59c7172 100644 --- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir +++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir @@ -83,18 +83,10 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : // CHECK: store float %[[ALLOC_VAL]], ptr %[[GLOB_ELEM_PTR]], align 4 // CHECK: } -// CHECK: define internal void @_omp_reduction_list_to_global_reduce_func({{.*}}) {{.*}} { -// Allocate a descriptor to manage the element retrieved from the globalized local array. -// CHECK: %[[ALLOC_DESC:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8 }, align 8, addrspace(5) -// CHECK: %[[ALLOC_DESC_ASCAST:.*]] = addrspacecast ptr addrspace(5) %[[ALLOC_DESC]] to ptr - -// CHECK: %[[RED_ARR_LIST:.*]] = getelementptr inbounds [1 x ptr], ptr %{{.*}}, i64 0, i64 0 -// CHECK: %[[GLOB_ELEM_PTR:.*]] = getelementptr inbounds %[[GLOBALIZED_LOCALS]], ptr %{{.*}}, i32 0, i32 0 -// CHECK: %[[ALLOC_PTR_PTR:.*]] = getelementptr { ptr, i64, i32, i8, i8, i8, i8 }, ptr %[[ALLOC_DESC_ASCAST]], i32 0, i32 0 -// Store the pointer to the gloalized local element into the locally allocated descriptor. -// CHECK: store ptr %[[GLOB_ELEM_PTR]], ptr %[[ALLOC_PTR_PTR]], align 8 -// CHECK: store ptr %[[ALLOC_DESC_ASCAST]], ptr %[[RED_ARR_LIST]], align 8 -// CHECK: } +// Note: the _omp_reduction_list_to_global_reduce_func helper is intentionally +// no longer emitted by OpenMPIRBuilder::createReductionsGPU after the move to +// __kmpc_gpu_xteam_reduce_nowait, which does not take that callback. +// CHECK-NOT: define internal void @_omp_reduction_list_to_global_reduce_func // CHECK: define internal void @_omp_reduction_global_to_list_copy_func({{.*}}) {{.*}} { // CHECK: %[[RED_ARR_LIST:.*]] = getelementptr inbounds [1 x ptr], ptr %{{.*}}, i64 0, i64 0 diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir index 8950db3fc48aa..36785b5040aa0 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir @@ -103,12 +103,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo } // CHECK: kernel_environment = -// CHECK-SAME: i32 24, i32 1024 +// CHECK-SAME: i32 24 // CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]] -// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2 +// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_gpu_xteam_reduce_nowait // CHECK: icmp eq i32 %[[MASTER]], 1 // CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]] // CHECK: [[THEN]]: +// CHECK-NEXT: call void @_omp_reduction_global_to_list_copy_func // CHECK-NEXT: %[[FINAL_LHS0:[A-Za-z0-9_.]*]] = load double // CHECK-NEXT: %[[FINAL_RHS0:[A-Za-z0-9_.]*]] = load double // CHECK-NEXT: %[[FINAL_RESULT0:[A-Za-z0-9_.]*]] = fadd contract double %[[FINAL_LHS0]], %[[FINAL_RHS0]] diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index 76a842e7138f2..5fefa6869340f 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -32,7 +32,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: @[[SRC_LOC:.*]] = private unnamed_addr constant [23 x i8] c"{{[^"]*}}", align 1 // CHECK: @[[IDENT:.*]] = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @[[SRC_LOC]] }, align 8 // CHECK: @[[DYNA_ENV:.*]] = weak_odr protected global %struct.DynamicEnvironmentTy zeroinitializer -// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] } +// CHECK: @[[KERNEL_ENV:.*]] = weak_odr protected constant %struct.KernelEnvironmentTy { %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 1, i32 1, i32 256, i32 -1, i32 -1, i32 0 }, ptr @[[IDENT]], ptr @[[DYNA_ENV]] } // CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{[^_]+}}_{{[^_]+}}_omp_target_region__l{{[0-9]+}}(ptr %[[ADDR_A:.*]], ptr %[[ADDR_B:.*]], ptr %[[ADDR_C:.*]], ptr %[[DYN_PTR:.*]]) // CHECK: %[[TMP_A:.*]] = alloca ptr, align 8, addrspace(5) // CHECK: %[[ASCAST_A:.*]] = addrspacecast ptr addrspace(5) %[[TMP_A]] to ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir index 663b78261e06c..e4ac990942f76 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir @@ -52,11 +52,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : // reduction. The by-ref element type is [4 x i32] = 16 bytes, so the // struct should be {[4 x i32]} = 16 bytes. Failing to account for the by-ref // indirection would result in a struct of {ptr} = 8 bytes. -// AMDGCN: @{{.*}}_kernel_environment = {{.*}} %struct.ConfigurationEnvironmentTy { {{.*}}i32 16, i32 1024 } - -// Verify the reduce_data_size argument to __kmpc_nvptx_teams_reduce_nowait_v2 -// matches the by-ref element type size (16), not the pointer size (8). -// AMDGCN: call i32 @__kmpc_nvptx_teams_reduce_nowait_v2({{.*}}, i32 1024, i64 16, +// ReductionBufferLength is 0: the offload plugin sizes the teams reduction +// buffer at launch from the actual number of teams. +// AMDGCN: @{{.*}}_kernel_environment = {{.*}} %struct.ConfigurationEnvironmentTy { {{.*}}i32 16 } // Verify descriptor is copied via memcpy and base_ptr is updated in all helpers // AMDGCN-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func @@ -64,10 +62,9 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : // AMDGCN: getelementptr {{.*}} ptr {{%.*}}, i32 0, i32 0 // AMDGCN: store ptr {{%.*}}, ptr -// AMDGCN-LABEL: define internal void @_omp_reduction_list_to_global_reduce_func -// AMDGCN: call void @llvm.memcpy{{.*}}(ptr {{.*}}, ptr {{.*}}, i64 {{[0-9]+}}, i1 false) -// AMDGCN: getelementptr {{.*}} ptr {{%.*}}, i32 0, i32 0 -// AMDGCN: store ptr {{%.*}}, ptr +// No longer emitted: __kmpc_gpu_xteam_reduce_nowait does not take the +// list-to-global reduce callback, so createReductionsGPU stops emitting it. +// AMDGCN-NOT: define internal void @_omp_reduction_list_to_global_reduce_func // AMDGCN-LABEL: define internal void @_omp_reduction_global_to_list_copy_func // AMDGCN: call void @llvm.memcpy{{.*}}(ptr {{.*}}, ptr {{.*}}, i64 {{[0-9]+}}, i1 false) @@ -121,8 +118,7 @@ module attributes {llvm.target_triple = "nvptx64-nvidia-cuda", omp.is_gpu = true } } -// NVPTX: @{{.*}}_kernel_environment = {{.*}} %struct.ConfigurationEnvironmentTy { {{.*}}i32 16, i32 1024 } -// NVPTX: call i32 @__kmpc_nvptx_teams_reduce_nowait_v2({{.*}}, i32 1024, i64 16, +// NVPTX: @{{.*}}_kernel_environment = {{.*}} %struct.ConfigurationEnvironmentTy { {{.*}}i32 16 } // Verify descriptor is copied via memcpy and base_ptr is updated in all helpers // NVPTX-LABEL: define internal void @_omp_reduction_shuffle_and_reduce_func @@ -130,10 +126,9 @@ module attributes {llvm.target_triple = "nvptx64-nvidia-cuda", omp.is_gpu = true // NVPTX: getelementptr {{.*}} ptr {{%.*}}, i32 0, i32 0 // NVPTX: store ptr {{%.*}}, ptr -// NVPTX-LABEL: define internal void @_omp_reduction_list_to_global_reduce_func -// NVPTX: call void @llvm.memcpy{{.*}}(ptr {{.*}}, ptr {{.*}}, i64 {{[0-9]+}}, i1 false) -// NVPTX: getelementptr {{.*}} ptr {{%.*}}, i32 0, i32 0 -// NVPTX: store ptr {{%.*}}, ptr +// No longer emitted: __kmpc_gpu_xteam_reduce_nowait does not take the +// list-to-global reduce callback, so createReductionsGPU stops emitting it. +// NVPTX-NOT: define internal void @_omp_reduction_list_to_global_reduce_func // NVPTX-LABEL: define internal void @_omp_reduction_global_to_list_copy_func // NVPTX: call void @llvm.memcpy{{.*}}(ptr {{.*}}, ptr {{.*}}, i64 {{[0-9]+}}, i1 false) diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir index 8cb6594f9130e..42c11a7c756d8 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir @@ -52,11 +52,12 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: call i32 @__kmpc_target_init // CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]] // CHECK: define internal void @[[OUTLINED]] -// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2 +// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_gpu_xteam_reduce_nowait // CHECK: icmp eq i32 %[[MASTER]], 1 // CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]] // CHECK: [[THEN]]: +// CHECK-NEXT: call void @_omp_reduction_global_to_list_copy_func // CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32 // CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32 // CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]] diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir index a6154235ec874..865a2701a5eb5 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir @@ -58,10 +58,11 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: call i32 @__kmpc_target_init // CHECK: call void @[[OUTLINED:__omp_offloading_[A-Za-z0-9_.]*]] -// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2 +// CHECK: %[[MASTER:.+]] = call i32 @__kmpc_gpu_xteam_reduce_nowait // CHECK: icmp eq i32 %[[MASTER]], 1 // CHECK: i1 %{{.+}}, label %[[THEN:[A-Za-z0-9_.]*]], label %[[DONE:[A-Za-z0-9_.]*]] // CHECK: [[THEN]]: +// CHECK-NEXT: call void @_omp_reduction_global_to_list_copy_func // CHECK-NEXT: %[[FINAL_LHS:[A-Za-z0-9_.]*]] = load i32 // CHECK-NEXT: %[[FINAL_RHS:[A-Za-z0-9_.]*]] = load i32 // CHECK-NEXT: %[[FINAL_RESULT:[A-Za-z0-9_.]*]] = add i32 %[[FINAL_LHS]], %[[FINAL_RHS]] diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir index e27f7fe4b2e7e..7183b55bedc86 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir @@ -3,13 +3,13 @@ // CHECK: @[[EXEC_MODE1:.*]] = weak protected constant i8 1 // CHECK: @llvm.compiler.used{{.*}} = appending global [1 x ptr] [ptr @[[EXEC_MODE1]]], section "llvm.metadata" // CHECK: @[[KERNEL1_ENV:.*_kernel_environment]] = weak_odr protected constant %struct.KernelEnvironmentTy { -// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE1:1]], i32 [[MIN_THREADS1:1]], i32 [[MAX_THREADS1:10]], i32 [[MIN_TEAMS1:1]], i32 [[MAX_TEAMS1:-1]], i32 0, i32 0 }, +// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE1:1]], i32 [[MIN_THREADS1:1]], i32 [[MAX_THREADS1:10]], i32 [[MIN_TEAMS1:1]], i32 [[MAX_TEAMS1:-1]], i32 0 }, // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} } // CHECK: @[[EXEC_MODE2:.*]] = weak protected constant i8 1 // CHECK: @llvm.compiler.used{{.*}} = appending global [1 x ptr] [ptr @[[EXEC_MODE2]]], section "llvm.metadata" // CHECK: @[[KERNEL2_ENV:.*_kernel_environment]] = weak_odr protected constant %struct.KernelEnvironmentTy { -// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE2:1]], i32 [[MIN_THREADS2:1]], i32 [[MAX_THREADS2:30]], i32 [[MIN_TEAMS2:40]], i32 [[MAX_TEAMS2:40]], i32 0, i32 0 }, +// CHECK-SAME: %struct.ConfigurationEnvironmentTy { i8 1, i8 1, i8 [[EXEC_MODE2:1]], i32 [[MIN_THREADS2:1]], i32 [[MAX_THREADS2:30]], i32 [[MIN_TEAMS2:40]], i32 [[MAX_TEAMS2:40]], i32 0 }, // CHECK-SAME: ptr @{{.*}}, ptr @{{.*}} } module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} { diff --git a/offload/include/Shared/Environment.h b/offload/include/Shared/Environment.h index 142fba40340e6..35cba3b47b186 100644 --- a/offload/include/Shared/Environment.h +++ b/offload/include/Shared/Environment.h @@ -58,7 +58,6 @@ struct ConfigurationEnvironmentTy { int32_t MinTeams = -1; int32_t MaxTeams = -1; int32_t ReductionDataSize = 0; - int32_t ReductionBufferLength = 0; //} }; @@ -85,8 +84,7 @@ enum class DynCGroupMemFallbackType : uint8_t { struct KernelLaunchEnvironmentTy { void *ReductionBuffer = nullptr; void *DynCGroupMemFbPtr = nullptr; - uint32_t ReductionCnt = 0; - uint32_t ReductionIterCnt = 0; + uint32_t ReductionTeamsDone = 0; uint32_t DynCGroupMemSize = 0; DynCGroupMemFallbackType DynCGroupMemFb = DynCGroupMemFallbackType::None; }; diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index dad061ae3c238..9c83d2c9319fb 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -464,11 +464,13 @@ struct GenericKernelTy { } /// Return a device pointer to a new kernel launch environment. - Expected - getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice, - const KernelArgsTy &KernelArgs, - const DynBlockMemConfTy &DynBlockMemConf, - AsyncInfoWrapperTy &AsyncInfoWrapper) const; + /// + /// \p NumBlocks0 is the number of blocks for this launch and is used to size + /// the reduction buffer. + Expected getKernelLaunchEnvironment( + GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs, + const DynBlockMemConfTy &DynBlockMemConf, + AsyncInfoWrapperTy &AsyncInfoWrapper, uint32_t NumBlocks0) const; /// Indicate whether an execution mode is valid. static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) { diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 0c345d84fa907..0e0e1163d6e39 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -110,7 +110,7 @@ Expected GenericKernelTy::getKernelLaunchEnvironment( GenericDeviceTy &GenericDevice, const KernelArgsTy &KernelArgs, const DynBlockMemConfTy &DynBlockMemConf, - AsyncInfoWrapperTy &AsyncInfoWrapper) const { + AsyncInfoWrapperTy &AsyncInfoWrapper, uint32_t NumBlocks0) const { // Ctor/Dtor have no arguments, replaying uses the original kernel launch // environment. Older versions of the compiler do not generate a kernel // launch environment. @@ -119,9 +119,15 @@ GenericKernelTy::getKernelLaunchEnvironment( KernelArgs.Version < OMP_KERNEL_ARG_MIN_VERSION_WITH_DYN_PTR) return nullptr; - if ((!KernelEnvironment.Configuration.ReductionDataSize || - !KernelEnvironment.Configuration.ReductionBufferLength) && - KernelArgs.DynCGroupMem == 0) + const auto &RedCfg = KernelEnvironment.Configuration; + const bool NeedsReductionBuffer = RedCfg.ReductionDataSize != 0; + if (NeedsReductionBuffer && KernelArgs.Version < OMP_KERNEL_ARG_VERSION) + return Plugin::error(ErrorCode::INVALID_BINARY, + "kernel was built against an older OpenMP " + "kernel-launch-environment ABI (v%u); current " + "runtime requires v%u for cross-team reductions", + KernelArgs.Version, OMP_KERNEL_ARG_VERSION); + if (!NeedsReductionBuffer && !KernelArgs.DynCGroupMem) return reinterpret_cast(~0); auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy), @@ -143,11 +149,10 @@ GenericKernelTy::getKernelLaunchEnvironment( LocalKLE.DynCGroupMemFb = DynBlockMemConf.Fallback; LocalKLE.ReductionBuffer = nullptr; - if (KernelEnvironment.Configuration.ReductionDataSize && - KernelEnvironment.Configuration.ReductionBufferLength) { + if (NeedsReductionBuffer) { + // Use number of teams many buffer elements. auto AllocOrErr = GenericDevice.dataAlloc( - KernelEnvironment.Configuration.ReductionDataSize * - KernelEnvironment.Configuration.ReductionBufferLength, + uint64_t(RedCfg.ReductionDataSize) * NumBlocks0, /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE); if (!AllocOrErr) return AllocOrErr.takeError(); @@ -284,8 +289,9 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs, AsyncInfoWrapper.freeAllocationAfterSynchronization( DynBlockMemConf.FallbackPtr); - auto KernelLaunchEnvOrErr = getKernelLaunchEnvironment( - GenericDevice, KernelArgs, DynBlockMemConf, AsyncInfoWrapper); + auto KernelLaunchEnvOrErr = + getKernelLaunchEnvironment(GenericDevice, KernelArgs, DynBlockMemConf, + AsyncInfoWrapper, EffectiveNumBlocks[0]); if (!KernelLaunchEnvOrErr) return KernelLaunchEnvOrErr.takeError(); diff --git a/openmp/device/include/Interface.h b/openmp/device/include/Interface.h index 6a33ea2432c89..cf455bf030270 100644 --- a/openmp/device/include/Interface.h +++ b/openmp/device/include/Interface.h @@ -235,19 +235,18 @@ void __kmpc_target_deinit(); /// Reduction /// ///{ -void *__kmpc_reduction_get_fixed_buffer(); - int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc, uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct); -int32_t __kmpc_nvptx_teams_reduce_nowait_v2( - IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records, - uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct, - InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, - ListGlobalFnTy glcpyFct, ListGlobalFnTy glredFct); +int32_t __kmpc_gpu_xteam_reduce_nowait(IdentTy *Loc, void *reduce_data, + ShuffleReductFnTy shflFct, + InterWarpCopyFnTy cpyFct, + ListGlobalFnTy lgcpyFct, + ListGlobalFnTy glcpyFct, + ListGlobalFnTy glredFct); ///} /// Synchronization diff --git a/openmp/device/src/Reduction.cpp b/openmp/device/src/Reduction.cpp index 1295b5a508059..ec772d357a425 100644 --- a/openmp/device/src/Reduction.cpp +++ b/openmp/device/src/Reduction.cpp @@ -10,9 +10,7 @@ // //===----------------------------------------------------------------------===// -#include "Debug.h" #include "DeviceTypes.h" -#include "DeviceUtils.h" #include "Interface.h" #include "Mapping.h" #include "State.h" @@ -20,6 +18,18 @@ using namespace ompx; +static constexpr uint32_t kmpc_min(uint32_t a, uint32_t b) { + return a < b ? a : b; +} + +// Round down to the nearest multiple of the warp size. Return 1 if the value is +// less than the warp size. +static uint32_t round_down_to_warpsize(uint32_t s) { + if (s < mapping::getWarpSize()) + return 1; + return (s & ~static_cast(mapping::getWarpSize() - 1u)); +} + static void gpu_regular_warp_reduce(void *reduce_data, ShuffleReductFnTy shflFct) { for (uint32_t mask = mapping::getWarpSize() / 2; mask > 0; mask /= 2) { @@ -61,6 +71,69 @@ static uint32_t gpu_irregular_simd_reduce(void *reduce_data, return (logical_lane_id == 0); } +// Reduction within a block on the GPU. +// +// Template parameters: +// - checkLiveness: Whether to check the liveness of the lanes. This is +// useful if gpu_block_reduce is called in a context where +// partial warps or L2 parallel regions are possible. +// Parameters: +// - reduce_data: Pointer to the reduction data +// - shflFct: Shuffle reduction function +// - cpyFct: Inter-warp copy function (copies data from each warp's thread +// 0 to the lanes of the zeroth warp) +// - NumThreads: Number of threads to consider / values to reduce +// - ThreadId: Thread ID in block (getThreadIdInBlock() in SPMD and 0 in +// Generic mode) +// +// Returns: +// - 1 if the thread is the zeroth thread of the block +// - 0 otherwise +// +// Note that it is expected that the caller checks for NumThreads <= 1 and acts +// in a way that suits the callers situation. If checkLiveness is false, this +// function performs a regular warp reduce unconditionally. +// +template +static uint32_t gpu_block_reduce(void *reduce_data, ShuffleReductFnTy shflFct, + InterWarpCopyFnTy cpyFct, uint32_t NumThreads, + uint32_t BlockThreadId) { + if constexpr (checkLiveness) { + __kmpc_impl_lanemask_t Liveness = mapping::activemask(); + // Check for partial warp with non-contiguous lanes. + if (Liveness == lanes::All) { + gpu_regular_warp_reduce(reduce_data, shflFct); + } else if (!(Liveness & (Liveness + 1))) { + // Partial warp but contiguous lanes. + gpu_irregular_warp_reduce(reduce_data, shflFct, utils::popc(Liveness), + BlockThreadId % mapping::getWarpSize()); + } else { + // Dispersed lanes. Only threads in L2 parallel region may enter here. + return gpu_irregular_simd_reduce(reduce_data, shflFct); + } + } else { + gpu_regular_warp_reduce(reduce_data, shflFct); + } + + // When we have more than [mapping::getWarpSize()] number of threads + // a block reduction is performed here. + // + // Only L1 parallel region can enter this if condition. + + if (NumThreads > mapping::getWarpSize()) { + uint32_t WarpsNeeded = utils::roundUp(NumThreads, mapping::getWarpSize()); + // Gather all the reduced values from each warp to the first warp. + cpyFct(reduce_data, WarpsNeeded); + + uint32_t WarpId = BlockThreadId / mapping::getWarpSize(); + if (WarpId == 0) + gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded, + BlockThreadId); + } + + return BlockThreadId == 0; +} + static int32_t nvptx_parallel_reduce_nowait(void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct) { @@ -116,51 +189,9 @@ static int32_t nvptx_parallel_reduce_nowait(void *reduce_data, return BlockThreadId == 0; } #endif - __kmpc_impl_lanemask_t Liveness = mapping::activemask(); - if (Liveness == lanes::All) // Full warp - gpu_regular_warp_reduce(reduce_data, shflFct); - else if (!(Liveness & (Liveness + 1))) // Partial warp but contiguous lanes - gpu_irregular_warp_reduce(reduce_data, shflFct, - /*LaneCount=*/utils::popc(Liveness), - /*LaneId=*/mapping::getThreadIdInBlock() % - mapping::getWarpSize()); - else { // Dispersed lanes. Only threads in L2 - // parallel region may enter here; return - // early. - return gpu_irregular_simd_reduce(reduce_data, shflFct); - } - - // When we have more than [mapping::getWarpSize()] number of threads - // a block reduction is performed here. - // - // Only L1 parallel region can enter this if condition. - if (NumThreads > mapping::getWarpSize()) { - uint32_t WarpsNeeded = utils::roundUp(NumThreads, mapping::getWarpSize()); - // Gather all the reduced values from each warp - // to the first warp. - cpyFct(reduce_data, WarpsNeeded); - uint32_t WarpId = BlockThreadId / mapping::getWarpSize(); - if (WarpId == 0) - gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded, - BlockThreadId); - - return BlockThreadId == 0; - } - - // Get the OMP thread Id. This is different from BlockThreadId in the case - // of an L2 parallel region. - return BlockThreadId == 0; -} - -static uint32_t roundToWarpsize(uint32_t s) { - if (s < mapping::getWarpSize()) - return 1; - return utils::alignDown(s, mapping::getWarpSize()); -} - -static constexpr uint32_t kmpcMin(uint32_t x, uint32_t y) { - return x < y ? x : y; + return gpu_block_reduce(reduce_data, shflFct, cpyFct, NumThreads, + BlockThreadId); } extern "C" { @@ -173,144 +204,99 @@ int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(IdentTy *Loc, return nvptx_parallel_reduce_nowait(reduce_data, shflFct, cpyFct); } +// Reduction across teams on the GPU. +// +// Parameters: +// - Loc: Location of the reduction +// - reduce_data: Pointer to the reduction data +// - shflFct: Shuffle reduction function +// - cpyFct: Inter-warp copy function (copies data from each warp's thread 0 +// to the lanes of the zeroth warp) +// - lgcpyFct: List-global copy function (copies the reduction data from the +// local thread to the global buffer) +// - glcpyFct: Global copy function (copies the reduction data from the global +// buffer to the local thread) +// - glredFct: Global reduce function (reduces the reduction data from the +// global buffer to the local thread) +// +// Returns: +// - 1 if this thread must write the final reduced value back to the shared +// reduction variable (i.e. thread 0 of the single team when NumTeams == 1, +// or thread 0 of the last team to finish its partial reduction otherwise). +// - 0 otherwise. +// [[clang::always_inline]] -int32_t __kmpc_nvptx_teams_reduce_nowait_v2( - IdentTy *Loc, void *GlobalBuffer, uint32_t num_of_records, - uint64_t reduce_data_size, void *reduce_data, ShuffleReductFnTy shflFct, - InterWarpCopyFnTy cpyFct, ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, - ListGlobalFnTy glcpyFct, ListGlobalFnTy glredFct) { - // Terminate all threads in non-SPMD mode except for the master thread. - uint32_t ThreadId = mapping::getThreadIdInBlock(); - if (mapping::isGenericMode()) { +int32_t __kmpc_gpu_xteam_reduce_nowait(IdentTy *Loc, void *reduce_data, + ShuffleReductFnTy shflFct, + InterWarpCopyFnTy cpyFct, + ListGlobalFnTy lgcpyFct, + ListGlobalFnTy glcpyFct, + ListGlobalFnTy glredFct) { + uint32_t ThreadId; + uint32_t NumThreads; + + if (mapping::isSPMDMode()) { + // In SPMD mode all workers participate in the teams reduction. + ThreadId = mapping::getThreadIdInBlock(); + NumThreads = mapping::getNumberOfThreadsInBlock(); + } else { + // In generic mode, only the team master participates in the teams + // reduction because the workers are waiting for parallel work. if (!mapping::isMainThreadInGenericMode()) return 0; ThreadId = 0; + NumThreads = 1; } - uint32_t &IterCnt = state::getKernelLaunchEnvironment().ReductionIterCnt; - uint32_t &Cnt = state::getKernelLaunchEnvironment().ReductionCnt; - - // In non-generic mode all workers participate in the teams reduction. - // In generic mode only the team master participates in the teams - // reduction because the workers are waiting for parallel work. - uint32_t NumThreads = omp_get_num_threads(); uint32_t TeamId = omp_get_team_num(); uint32_t NumTeams = omp_get_num_teams(); - [[clang::loader_uninitialized]] static Local Bound; - [[clang::loader_uninitialized]] static Local ChunkTeamCount; - - // Block progress for teams greater than the current upper - // limit. We always only allow a number of teams less or equal - // to the number of slots in the buffer. - bool IsMaster = (ThreadId == 0); - while (IsMaster) { - Bound = atomic::load(&IterCnt, atomic::acquire); - if (TeamId < Bound + num_of_records) - break; - } - if (IsMaster) { - int ModBockId = TeamId % num_of_records; - if (TeamId < num_of_records) { - lgcpyFct(GlobalBuffer, ModBockId, reduce_data); - } else - lgredFct(GlobalBuffer, ModBockId, reduce_data); - - // Propagate the memory writes above to the world. - fence::kernel(atomic::release); - - // Increment team counter. - // This counter is incremented by all teams in the current - // num_of_records chunk. - ChunkTeamCount = atomic::inc(&Cnt, num_of_records - 1u, atomic::seq_cst, - atomic::MemScopeTy::device); + // Fast path for single-team kernels: no cross-team work required, + // the team-local reduction already produced the final result. + if (NumTeams <= 1) + return ThreadId == 0; + + uint32_t &TeamsDone = state::getKernelLaunchEnvironment().ReductionTeamsDone; + void *GlobalBuffer = state::getKernelLaunchEnvironment().ReductionBuffer; + [[clang::loader_uninitialized]] static Local TeamsDoneResult; + + // Save the team's reduced value in the global buffer and atomically + // increment the teams-done counter. + if (ThreadId == 0) { + lgcpyFct(GlobalBuffer, TeamId, reduce_data); + // We let the atomic inc wrap around if the value gets larger than + // NumTeams-1, which makes the counter self-reset. + TeamsDoneResult = atomic::inc(&TeamsDone, NumTeams - 1u, atomic::acq_rel, + atomic::MemScopeTy::device); } - // Synchronize in SPMD mode as in generic mode all but 1 threads are in the - // state machine. + // This sync is needed so that all threads from last team see the shared teams + // done counter value and know that they are in the last team. if (mapping::isSPMDMode()) synchronize::threadsAligned(atomic::acq_rel); - // reduce_data is global or shared so before being reduced within the - // warp we need to bring it in local memory: - // local_reduce_data = reduce_data[i] - // - // Example for 3 reduction variables a, b, c (of potentially different - // types): - // - // buffer layout (struct of arrays): - // a, a, ..., a, b, b, ... b, c, c, ... c - // |__________| - // num_of_records - // - // local_data_reduce layout (struct): - // a, b, c - // - // Each thread will have a local struct containing the values to be - // reduced: - // 1. do reduction within each warp. - // 2. do reduction across warps. - // 3. write the final result to the main reduction variable - // by returning 1 in the thread holding the reduction result. - - // Check if this is the very last team. - unsigned NumRecs = kmpcMin(NumTeams, uint32_t(num_of_records)); - if (ChunkTeamCount == NumTeams - Bound - 1) { - // Ensure we see the global memory writes by other teams - fence::kernel(atomic::acquire); - - // - // Last team processing. - // - if (ThreadId >= NumRecs) - return 0; - NumThreads = roundToWarpsize(kmpcMin(NumThreads, NumRecs)); - if (ThreadId >= NumThreads) - return 0; - - // Load from buffer and reduce. - glcpyFct(GlobalBuffer, ThreadId, reduce_data); - for (uint32_t i = NumThreads + ThreadId; i < NumRecs; i += NumThreads) - glredFct(GlobalBuffer, i, reduce_data); - - // Reduce across warps to the warp master. - if (NumThreads > 1) { - gpu_regular_warp_reduce(reduce_data, shflFct); - - // When we have more than [mapping::getWarpSize()] number of threads - // a block reduction is performed here. - uint32_t ActiveThreads = kmpcMin(NumRecs, NumThreads); - if (ActiveThreads > mapping::getWarpSize()) { - uint32_t WarpsNeeded = - utils::roundUp(ActiveThreads, mapping::getWarpSize()); - // Gather all the reduced values from each warp - // to the first warp. - cpyFct(reduce_data, WarpsNeeded); - - uint32_t WarpId = ThreadId / mapping::getWarpSize(); - if (WarpId == 0) - gpu_irregular_warp_reduce(reduce_data, shflFct, WarpsNeeded, - ThreadId); - } - } + // If teams done counter reaches NumTeams-1, this is the last team. + if (TeamsDoneResult != NumTeams - 1u) + return 0; - if (IsMaster) { - Cnt = 0; - IterCnt = 0; - return 1; - } + // The last team performs final reduction across all team values. + NumThreads = kmpc_min(NumThreads, round_down_to_warpsize(NumTeams)); + if (ThreadId >= NumThreads) return 0; - } - if (IsMaster && ChunkTeamCount == num_of_records - 1) { - // Allow SIZE number of teams to proceed writing their - // intermediate results to the global buffer. - atomic::add(&IterCnt, uint32_t(num_of_records), atomic::seq_cst); - } - return 0; -} -} + // Make sure that global buffer is fresh. + fence::kernel(atomic::acquire); + // Get the team values from the global buffer. + glcpyFct(GlobalBuffer, ThreadId, reduce_data); + // In case we have more teams than threads, we need to iterate over the + // remaining teams. + for (uint32_t I = NumThreads + ThreadId; I < NumTeams; I += NumThreads) + glredFct(GlobalBuffer, I, reduce_data); + + if (NumThreads == 1) + return 1; -void *__kmpc_reduction_get_fixed_buffer() { - return state::getKernelLaunchEnvironment().ReductionBuffer; + return gpu_block_reduce(reduce_data, shflFct, cpyFct, NumThreads, + ThreadId); } +} // extern "C" From 1bdb07a82863dd4ed747bdd5e8fac9185622ce5a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Mon, 22 Jun 2026 17:52:44 +0200 Subject: [PATCH 031/511] [InstCombine] Merge consecutive assumes (#204983) This should make assumes a bit more efficient, since it removes a few instructions. This should also help with optimizations that are limited in how many instructions they step through. --- .../InstCombine/InstCombineCalls.cpp | 22 ++++++++++++++++--- .../InstCombine/assume-loop-align.ll | 3 +-- llvm/test/Transforms/InstCombine/assume.ll | 20 +++++------------ .../PhaseOrdering/AArch64/std-find.ll | 3 +-- 4 files changed, 27 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ce9e4b836a56e..880d896e12d6e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3844,10 +3844,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - // If the assume has operand bundles, the folds below will never work, so - // don't bother trying. - if (II->hasOperandBundles()) + if (II->hasOperandBundles()) { + // Merge consecutive assumes to save some resources + if (auto *PrevAI = dyn_cast_or_null(II->getPrevNode()); + PrevAI && PrevAI->hasOperandBundles()) { + SmallVector Bundles; + Bundles.reserve(II->getNumOperandBundles() + + PrevAI->getNumOperandBundles()); + for (auto Bundle : PrevAI->operand_bundles()) + Bundles.emplace_back(Bundle); + for (auto Bundle : II->operand_bundles()) + Bundles.emplace_back(Bundle); + Builder.CreateAssumption(Bundles); + eraseInstFromFunction(*PrevAI); + return eraseInstFromFunction(*II); + } + + // If the assume has operand bundles, the folds below will never work, so + // don't bother trying. break; + } Value *IIOperand = II->getArgOperand(0); diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll index 0c5e403ca54a9..2701775f011e8 100644 --- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll @@ -10,8 +10,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, ptr %b) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[B:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64), "align"(ptr [[B:%.*]], i64 64) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 69220811ac206..1902fcbb95afd 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -134,8 +134,7 @@ define i1 @align_with_offset_on_gep(ptr %base) { define void @align_with_constant_offset_0(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_0( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -145,8 +144,7 @@ define void @align_with_constant_offset_0(ptr %ptr) { define void @align_with_constant_offset_1(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_1( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 -8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 -8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -157,8 +155,7 @@ define void @align_with_constant_offset_1(ptr %ptr) { define void @align_with_constant_offset_4(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_4( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -169,8 +166,7 @@ define void @align_with_constant_offset_4(ptr %ptr) { define void @align_with_constant_offset_8(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_8( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -180,8 +176,7 @@ define void @align_with_constant_offset_8(ptr %ptr) { define void @align_with_variable_offset(ptr %ptr, i64 %offset) { ; CHECK-LABEL: @align_with_variable_offset( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -627,10 +622,7 @@ define void @redundant_nonnull3(ptr %ptr) { define void @partially_redundant(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5) { ; CHECK-LABEL: @partially_redundant( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR2:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]), "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]), "nonnull"(ptr [[PTR:%.*]]), "nonnull"(ptr [[PTR2:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr), "nonnull"(ptr %ptr2) ] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index 69b23200b239b..4ca7f780cdc5e 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -244,8 +244,7 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2), "align"(ptr [[LAST]], i64 2) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: From 4c50a343695d7cb585ab5d96f25cea512dd6d351 Mon Sep 17 00:00:00 2001 From: Petar Avramovic Date: Mon, 22 Jun 2026 18:03:06 +0200 Subject: [PATCH 032/511] AMDGPU/GlobalISel: RegBankLegalize rules for pk_u64 add and sub (#205079) --- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 4 ++- llvm/test/CodeGen/AMDGPU/packed-u64.ll | 30 ++++++++++++++----- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index 0f0c9557e4e60..b0a8c1dd11e6e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -593,7 +593,9 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt}) .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}}) - .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}}); + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}}) + .Any({{UniV2S64}, {{UniInVgprV2S64}, {VgprV2S64, VgprV2S64}}}) + .Any({{DivV2S64}, {{VgprV2S64}, {VgprV2S64, VgprV2S64}}}); addRulesForGOpcs({G_UADDO, G_USUBO}, Standard) .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}}) diff --git a/llvm/test/CodeGen/AMDGPU/packed-u64.ll b/llvm/test/CodeGen/AMDGPU/packed-u64.ll index ea3c4ce56ae52..c1140977339dc 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-u64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1251,GFX1251-SDAG %s -; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1251,GFX1251-GISEL %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1251 < %s | FileCheck -enable-var-scope -check-prefixes=GFX1251,GFX1251-GISEL %s define amdgpu_kernel void @add_v2_vv(ptr addrspace(1) %a) { ; GFX1251-LABEL: add_v2_vv: @@ -70,16 +70,24 @@ define amdgpu_kernel void @add_v2_ss(ptr addrspace(1) %a, <2 x i64> %x, <2 x i64 ; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-GISEL-NEXT: s_clause 0x1 ; GFX1251-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv ; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-GISEL-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] ; GFX1251-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX1251-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1251-GISEL-NEXT: global_store_b128 v4, v[0:3], s[6:7] ; GFX1251-GISEL-NEXT: s_endpgm %add = add <2 x i64> %x, %y store <2 x i64> %add, ptr addrspace(1) %a, align 8 @@ -705,16 +713,24 @@ define amdgpu_kernel void @sub_v2_ss(ptr addrspace(1) %a, <2 x i64> %x, <2 x i64 ; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-GISEL-NEXT: s_clause 0x1 ; GFX1251-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv ; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15] -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-GISEL-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] ; GFX1251-GISEL-NEXT: v_mov_b32_e32 v4, 0 -; GFX1251-GISEL-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s1, v1 +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s2, v2 +; GFX1251-GISEL-NEXT: v_readfirstlane_b32 s3, v3 +; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[0:1] +; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[2:3] +; GFX1251-GISEL-NEXT: global_store_b128 v4, v[0:3], s[6:7] ; GFX1251-GISEL-NEXT: s_endpgm %sub = sub <2 x i64> %x, %y store <2 x i64> %sub, ptr addrspace(1) %a, align 8 From 1bf80d8af7ce50dcf2283ab05e4d4e2dd3c5ec24 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Mon, 22 Jun 2026 17:04:39 +0100 Subject: [PATCH 033/511] [AMDGPU] Add a test for sext i1 feeding into icmp (#205121) This is a precommit of a test case that causes problems for #204238. --- llvm/test/CodeGen/AMDGPU/sign_extend.ll | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll index ece46b59ba49e..373618f3da1b8 100644 --- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll +++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll @@ -604,6 +604,34 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(ptr addrspace(1) %out, ptr addr ret void } +define i32 @s_ext_cmp_ext(i32 %arg, float inreg %x) { +; SI-LABEL: s_ext_cmp_ext: +; SI: ; %bb.0: +; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; SI-NEXT: v_cvt_u32_f32_e32 v1, s16 +; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; SI-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; SI-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-NEXT: s_setpc_b64 s[30:31] +; +; VI-LABEL: s_ext_cmp_ext: +; VI: ; %bb.0: +; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; VI-NEXT: v_cvt_u32_f32_e32 v1, s16 +; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc +; VI-NEXT: v_cmp_eq_u32_e32 vcc, v0, v1 +; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-NEXT: s_setpc_b64 s[30:31] + %cvt = fptoui float %x to i32 + %ne = icmp ne i32 %cvt, 0 + %sext = sext i1 %ne to i32 + %eq = icmp eq i32 %arg, %sext + %zext = zext i1 %eq to i32 + ret i32 %zext +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 attributes #1 = { nounwind readnone } From 1c4283015c8433c591daf0e4c2f0050d8eb2d45b Mon Sep 17 00:00:00 2001 From: Zorojuro Date: Mon, 22 Jun 2026 21:36:55 +0530 Subject: [PATCH 034/511] [libc][math] Add missing freebsd entrypoint for cbrtf16 (#205011) Adds missing freebsd entrypoint in [5429667](https://github.com/llvm/llvm-project/pull/205011/commits/5429667e053398d39e180f084e0dd5c4c416aa27) and fixes the wrong entry point for `crbtf1` in [cdc394f](https://github.com/llvm/llvm-project/pull/205011/commits/cdc394f8ba7dffb9f49563d601def7cf31561d6f), which was `cbrtf` in `baremetal/riscv` --- libc/config/baremetal/riscv/entrypoints.txt | 2 +- libc/config/freebsd/x86_64/entrypoints.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index 948b870e37987..aa1f92a3ce6c9 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -610,7 +610,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.asinhf16 libc.src.math.atan2f16 libc.src.math.canonicalizef16 - libc.src.math.cbrtf + libc.src.math.cbrtf16 libc.src.math.ceilf16 libc.src.math.copysignf16 libc.src.math.cosf16 diff --git a/libc/config/freebsd/x86_64/entrypoints.txt b/libc/config/freebsd/x86_64/entrypoints.txt index b4745d9400335..394b83130f067 100644 --- a/libc/config/freebsd/x86_64/entrypoints.txt +++ b/libc/config/freebsd/x86_64/entrypoints.txt @@ -375,6 +375,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.atanhf16 libc.src.math.atanpif16 libc.src.math.canonicalizef16 + libc.src.math.cbrtf16 libc.src.math.ceilf16 libc.src.math.copysignf16 libc.src.math.cosf16 From 73ea5d80a5821d1fb0d4c557d96d02d28a16dc2a Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Mon, 22 Jun 2026 18:12:37 +0200 Subject: [PATCH 035/511] [flang-rt][openmp] Add file-level dependencies for builtin mod files (#204260) CMake currently intentionally ignores intrinsic dependencies. flang-rt already had a workaround using target-level dependencies, but it does not know about dependencies between .mod files created within the same add_library that CMake ignores. As a result, as reported in #203549, updating a .mod did not trigger rebuilding the .mod files that depend on it. Specifically, .mod files store the checksum of used .mod files which need to be updated and therefore require transitive rebuidling. As mentioned, CMake already adds this file-level dependency itself for non-intrinsic modules dependencies. In this PR we are injecting the additional dependencies that CMake does not add via OBJECT_DEPENDS. Three caveats: 1. Using OBJECT_DEPENDS for dependencies between modules of the same OBJECT library makes Ninja complain about circular dependencies. To avoid, split __fortran_builtins.f90 and __cuda_builtins.f90 into their own OBJECT libraries each. 2. The dependency cannot be on the .mod files. Because of how CMake's dependency mechanism works, the dependency scanning runs only after ensuring that the OBJECT_DEPENDS files exist. So if they don't exist yet, we get a missing dependency error. To avoid, we depend on the .o file instead. 3. Generator expressions do not work in OBJECT_DEPENDS. That is, we cannot use `$`, but fortunately we can use OBJECT_OUTPUTS to make the location predictable. At some point in the future, https://gitlab.kitware.com/cmake/cmake/-/issues/26803 should be resolved and become the minimum required version to build LLVM, in which case these workarounds can be removed. --- flang-rt/lib/runtime/CMakeLists.txt | 39 +++++++++++++++++++++++------ openmp/module/CMakeLists.txt | 6 +++++ 2 files changed, 38 insertions(+), 7 deletions(-) diff --git a/flang-rt/lib/runtime/CMakeLists.txt b/flang-rt/lib/runtime/CMakeLists.txt index a956cda31b52d..9db7037355498 100644 --- a/flang-rt/lib/runtime/CMakeLists.txt +++ b/flang-rt/lib/runtime/CMakeLists.txt @@ -266,7 +266,7 @@ endif () # When a target depends on an object library, CMake seems to try to only build # the object files that the target actual needs. If we are only interested -# in the module files, nothing get is built at all. To ensure that the module +# in the module files, nothing gets built at all. To ensure that the module # files are built, insert a custom target that is opaque to CMake so it cannot # apply this optimization. Dependees on module files must depend on this # barrier instead. An actual COMMAND (that does nothing) seems to be necessary @@ -291,18 +291,43 @@ if (FLANG_RT_FORTRAN_MODULES) # to disable this behavior, unfortunately it does not work with Ninja # (https://gitlab.kitware.com/cmake/cmake/-/issues/26803) # As a workaround, we build those intrinsic modules first such that the main - # runtime can depend on it. - add_flangrt_library(flang_rt.mod.intrinsics OBJECT - ${intrinsics_sources} + # runtime can depend on it. To ensure that modules files are also transitively + # updated if a USE'd .mod file changes (a .mod stores the checksums of all + # .mod files it depends on and therefore needs to be updated as well), inject + # an file-level dependency via OBJECT_DEPENDS. + + add_flangrt_library(flang_rt.mod.fortran.builtins OBJECT + __fortran_builtins.f90 + ) + set_property(SOURCE __fortran_builtins.f90 PROPERTY OBJECT_OUTPUTS "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__fortran_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}") + flang_module_target(flang_rt.mod.fortran.builtins PUBLIC) + add_module_barrier(flang_rt.mod.fortran.builtins.barrier flang_rt.mod.fortran.builtins) + + add_flangrt_library(flang_rt.mod.cuda.builtins OBJECT + __cuda_builtins.f90 ) - flang_module_target(flang_rt.mod.intrinsics PUBLIC) - add_module_barrier(flang_rt.mod.intrinsics.barrier flang_rt.mod.intrinsics) + set_property(SOURCE __cuda_builtins.f90 PROPERTY OBJECT_OUTPUTS "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__cuda_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}") + add_dependencies(flang_rt.mod.cuda.builtins flang_rt.mod.fortran.builtins.barrier) + set_property(SOURCE __cuda_builtins.f90 + APPEND PROPERTY OBJECT_DEPENDS + "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__fortran_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}" + ) + flang_module_target(flang_rt.mod.cuda.builtins PUBLIC) + add_module_barrier(flang_rt.mod.cuda.builtins.barrier flang_rt.mod.cuda.builtins) # The modules themselves add_flangrt_library(flang_rt.mod OBJECT ${module_sources} ) - add_dependencies(flang_rt.mod flang_rt.mod.intrinsics.barrier) + add_dependencies(flang_rt.mod flang_rt.mod.fortran.builtins.barrier flang_rt.mod.cuda.builtins.barrier) + foreach(_srcfile IN LISTS module_sources) + set_property(SOURCE ${_srcfile} + APPEND PROPERTY OBJECT_DEPENDS + "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__fortran_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}" + "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__cuda_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}" + ) + endforeach() + flang_module_target(flang_rt.mod PUBLIC) add_module_barrier(flang-rt-mod flang_rt.mod) diff --git a/openmp/module/CMakeLists.txt b/openmp/module/CMakeLists.txt index c4b9554bdffa4..451cbf9cc8fc8 100644 --- a/openmp/module/CMakeLists.txt +++ b/openmp/module/CMakeLists.txt @@ -34,6 +34,12 @@ endif () flang_module_target(libomp-mod PUBLIC) add_dependencies(libomp-mod ${RUNTIMES_FORTRAN_BUILD_DEPS}) +if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES) + set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/omp_lib.F90" + APPEND PROPERTY OBJECT_DEPENDS + "${CMAKE_BINARY_DIR}/modules/${CMAKE_CFG_INTDIR}/__fortran_builtins${CMAKE_Fortran_OUTPUT_EXTENSION}" + ) +endif () install(FILES "${RUNTIMES_OUTPUT_RESOURCE_MOD_DIR}/omp_lib.h" From c5c22910644ba2bd18be393fe28267288223a0c4 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 18:19:29 +0200 Subject: [PATCH 036/511] gitignore: Add emacs lock files (#205055) --- .gitignore | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.gitignore b/.gitignore index 9d4e86ab10caa..a4382c9ea7390 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,8 @@ #==============================================================================# # Temp files created by most text editors. *~ +# Emacs lock files +.#* # Merge files created by git. *.orig # Reject files created by patch. From f08c8eedee38d148acfe123c84f6d26e850b537c Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Mon, 22 Jun 2026 17:21:38 +0100 Subject: [PATCH 037/511] [lldb][Windows] Remember server's primary stop thread on gdb-remote stops (#203525) The client kept the previously-selected thread across stops, ignoring the primary tid from the server in `Tthread:`. On Windows, lldb-server halts the inferior by injecting a thread called `DbgUiRemoteBreakin` whose only job is to execute an int 3. After an interrupt, the process has `N+1` threads and the new one is what caused the stop. The server reports that thread as primary, but the client ignored it and stayed on whichever thread was selected before. In `TestExpressionInSyscall` that thread is main, paused just past `NtDelayExecution`'s syscall with a garbage value in the RSI register. Evaluating an expression there crashes the JIT trampoline. This patch makes `ProcessGDBRemote::RefreshStateAfterStop` record the primary tid from the most recent T-packet's `thread:` and select it after applying per thread stop infos. Fixes `TestBreakpointSetRestart` and `TestExpressionInSyscall` with LLDB_USE_LLDB_SERVER=1. rdar://180307914 --- .../Plugins/Process/gdb-remote/ProcessGDBRemote.cpp | 11 ++++++++++- .../Plugins/Process/gdb-remote/ProcessGDBRemote.h | 3 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index a986d0350bf57..85b9516a416fb 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -2636,6 +2636,8 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { SetAddressableBitMasks(addressable_bits); + m_last_stop_primary_tid = tid; + ThreadSP thread_sp = SetThreadStopInfo( tid, expedited_register_map, signo, thread_name, reason, description, exc_type, exc_data, thread_dispatch_qaddr, queue_vars_valid, @@ -2684,7 +2686,14 @@ void ProcessGDBRemote::RefreshStateAfterStop() { if (m_initial_tid != LLDB_INVALID_THREAD_ID) { m_thread_list.SetSelectedThreadByID(m_initial_tid); m_initial_tid = LLDB_INVALID_THREAD_ID; - } + } else if (m_last_stop_primary_tid != LLDB_INVALID_THREAD_ID && + StateIsRunningState(m_last_broadcast_state)) { + if (ThreadSP primary_thread_sp = + m_thread_list.FindThreadByProtocolID(m_last_stop_primary_tid, + /*can_update=*/false)) + m_thread_list.SetSelectedThreadByID(primary_thread_sp->GetID()); + } + m_last_stop_primary_tid = LLDB_INVALID_THREAD_ID; // Let all threads recover from stopping and do any clean up based on the // previous thread state (if any). diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 0a3386082c388..7a030cd966a18 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -322,6 +322,9 @@ class ProcessGDBRemote : public Process, lldb::CommandObjectSP m_command_sp; int64_t m_breakpoint_pc_offset; lldb::tid_t m_initial_tid; // The initial thread ID, given by stub on attach + lldb::tid_t m_last_stop_primary_tid = + LLDB_INVALID_THREAD_ID; // Thread ID extracted from the most recent + // T-packet's "thread:" key. bool m_use_g_packet_for_reading; bool m_allow_flash_writes; From 1c84f2992e9357a490094b9c1d9017fe4943a3e3 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 22 Jun 2026 09:26:56 -0700 Subject: [PATCH 038/511] [AMDGPU] Prevent folding of immediates larger than 64 bit (#204434) --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 ++ llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir | 37 ++++++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index cd057355b1f1d..76944081477de 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1098,6 +1098,9 @@ SIFoldOperandsImpl::isRegSeqSplat(MachineInstr &RegSeq) const { TRI->getChannelFromSubReg(SubReg1)) return {}; + if (TRI->getSubRegIdxSize(SubReg0) != 32) + return {}; + int64_t MergedVal = Make_64(Op1->getImm(), Op0->getImm()); if (I == 0) SplatVal64 = MergedVal; diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir new file mode 100644 index 0000000000000..f2e9c2f96183a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1251 -run-pass=si-fold-operands -o - %s | FileCheck --check-prefix=GFX1251 %s + +--- +name: pk_add_f64_no_fold_imm +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_no_fold_imm + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4611686018427387904, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4611686018427387904, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec +... + +# FIXME: This should be foldable +--- +name: pk_add_f64_fold_splat +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_fold_splat + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec +... From 2693cc173d9dc80ab70de1d583fc3c3ab33262e1 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Mon, 22 Jun 2026 09:28:47 -0700 Subject: [PATCH 039/511] [AMDGPU] Make v2x64 BUILD_VECTOR legal on gfx1251 (#204470) --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 17 +- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 5 +- llvm/test/CodeGen/AMDGPU/packed-fp64.ll | 294 +++++++----------- llvm/test/CodeGen/AMDGPU/packed-u64.ll | 176 +++++------ llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll | 31 +- llvm/test/CodeGen/AMDGPU/shl.v2i64.ll | 50 +-- 6 files changed, 233 insertions(+), 340 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 81053507b1ef5..ccdff3c0de381 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -554,13 +554,16 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); bool IsRegSeq = true; unsigned NOps = N->getNumOperands(); + unsigned EltSizeInRegs = EltVT.getSizeInBits() / 32; + assert(IsGCN || EltSizeInRegs == 1); for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? if (isa(N->getOperand(i))) { IsRegSeq = false; break; } - unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) + unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel( + i * EltSizeInRegs, EltSizeInRegs) : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = N->getOperand(i); RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); @@ -571,7 +574,8 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, EltVT); for (unsigned i = NOps; i < NumVectorElts; ++i) { - unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) + unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel( + i * EltSizeInRegs, EltSizeInRegs) : R600RegisterInfo::getSubRegFromChannel(i); RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); RegSeqArgs[1 + (2 * i) + 1] = @@ -738,11 +742,12 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { } const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); - assert(VT.getVectorElementType().bitsEq(MVT::i32)); + EVT EltTy = VT.getVectorElementType(); + assert(EltTy.bitsEq(MVT::i32) || EltTy.bitsEq(MVT::i64)); + unsigned VecInBits = NumVectorElts * EltTy.getScalarSizeInBits(); const TargetRegisterClass *RegClass = - N->isDivergent() - ? TRI->getDefaultVectorSuperClassForBitWidth(NumVectorElts * 32) - : SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32); + N->isDivergent() ? TRI->getDefaultVectorSuperClassForBitWidth(VecInBits) + : SIRegisterInfo::getSGPRClassForBitWidth(VecInBits); SelectBuildVector(N, RegClass->getID()); return; diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 0267f696a8a93..076bd7c97c44c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -905,7 +905,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, if (Subtarget->hasPackedFP64Ops()) { setOperationAction({ISD::FADD, ISD::FMUL, ISD::FMA, ISD::FNEG, ISD::FMINNUM_IEEE, ISD::FMAXNUM_IEEE, - ISD::FCANONICALIZE}, + ISD::FCANONICALIZE, ISD::BUILD_VECTOR}, MVT::v2f64, Legal); setOperationAction( {ISD::FMINNUM, ISD::FMAXNUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM}, @@ -918,7 +918,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, } if (Subtarget->hasPackedU64Ops()) { - setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL}, MVT::v2i64, Legal); + setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL, ISD::BUILD_VECTOR}, + MVT::v2i64, Legal); setOperationAction({ISD::ADD, ISD::SUB, ISD::SHL}, {MVT::v4i64, MVT::v8i64, MVT::v16i64, MVT::v32i64}, Custom); diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll index 1ec6d41ee67dd..4b7386a6475e7 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll @@ -55,10 +55,10 @@ define amdgpu_kernel void @fadd_v2_ss(ptr addrspace(1) %a, <2 x double> %x, <2 x ; GFX1251-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: v_mov_b32_e32 v4, 0 @@ -107,12 +107,12 @@ define amdgpu_kernel void @fadd_v4_vs(ptr addrspace(1) %a, <4 x double> %x) { ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v16, s[0:1] ; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v16, s[0:1] offset:16 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v12, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v14, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v11, s15 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[12:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[4:7], v[4:7], v[8:11] @@ -406,9 +406,9 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40590000 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4059000000000000 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -441,35 +441,20 @@ define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) { } define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fadd_v2_v_v_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[2:5], v[4:7], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fadd_v2_v_v_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_f64 v[2:5], v[4:7], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fadd_v2_v_v_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-NEXT: v_mov_b32_e32 v1, 0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_f64 v[2:5], v[4:7], v[0:3] +; GFX1251-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -490,9 +475,9 @@ define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x3ff00000 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -530,9 +515,8 @@ define amdgpu_kernel void @fadd_v2_v_lit_hi0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x3ff00000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -569,9 +553,8 @@ define amdgpu_kernel void @fadd_v2_v_lit_lo0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 0x3ff00000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v6, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 1.0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -608,9 +591,8 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x3ff00000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v7, 2.0 :: v_dual_mov_b32 v6, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 2.0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -653,8 +635,8 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset @@ -698,11 +680,12 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s4, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s4 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX1251-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 +; GFX1251-SDAG-NEXT: s_mov_b32 s4, s2 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm @@ -745,9 +728,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s4, s3, 0x80000000 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s4 +; GFX1251-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 +; GFX1251-SDAG-NEXT: s_mov_b32 s4, s2 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[4:5] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] @@ -795,10 +779,10 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo2(ptr addrspace(1) %a, double %x, do ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[6:7] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm @@ -845,8 +829,8 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi2(ptr addrspace(1) %a, double %x, do ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s6 :: v_dual_mov_b32 v5, s7 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[6:7] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] @@ -937,10 +921,10 @@ define amdgpu_kernel void @fmul_v2_ss(ptr addrspace(1) %a, <2 x double> %x, <2 x ; GFX1251-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v0, s8 :: v_dual_mov_b32 v1, s9 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v2, s10 :: v_dual_mov_b32 v3, s11 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s12 :: v_dual_mov_b32 v5, s13 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s14 :: v_dual_mov_b32 v7, s15 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: v_mov_b32_e32 v4, 0 @@ -989,12 +973,12 @@ define amdgpu_kernel void @fmul_v4_vs(ptr addrspace(1) %a, <4 x double> %x) { ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v16, s[0:1] ; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v16, s[0:1] offset:16 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v12, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v14, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v11, s15 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[12:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[4:7], v[4:7], v[8:11] @@ -1288,9 +1272,9 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40590000 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4059000000000000 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1323,35 +1307,20 @@ define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) { } define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fmul_v2_v_v_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[2:5], v[4:7], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fmul_v2_v_v_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_mul_f64 v[2:5], v[4:7], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fmul_v2_v_v_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-NEXT: v_mov_b32_e32 v1, 0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_mul_f64 v[2:5], v[4:7], v[0:3] +; GFX1251-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -1370,9 +1339,9 @@ define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40100000 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 4.0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1410,9 +1379,8 @@ define amdgpu_kernel void @fmul_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40100000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v7, 0x40080000 :: v_dual_mov_b32 v6, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 4.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 0x4008000000000000 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1453,8 +1421,8 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, s2 :: v_dual_mov_b32 v5, s3 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, s2 :: v_dual_mov_b32 v7, s3 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset @@ -1874,12 +1842,11 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x40690000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v9, 0x40590000 :: v_dual_mov_b32 v6, v4 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v8, v4 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, v4 :: v_dual_mov_b32 v11, v9 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4069000000000000 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], 0x4059000000000000 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], v[8:9] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v12, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1916,35 +1883,20 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) { } define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fma_v2_v_v_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[2:5], v[4:7], v[0:3], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fma_v2_v_v_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_fma_f64 v[2:5], v[4:7], v[0:3], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fma_v2_v_v_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-NEXT: v_mov_b32_e32 v1, 0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_fma_f64 v[2:5], v[4:7], v[0:3], v[0:3] +; GFX1251-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -1963,12 +1915,11 @@ define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x3ff00000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v9, 0x40100000 :: v_dual_mov_b32 v6, v4 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v7, v5 :: v_dual_mov_b32 v8, v4 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, v4 :: v_dual_mov_b32 v11, v9 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], 4.0 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], v[8:9] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v12, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -2010,12 +1961,10 @@ define amdgpu_kernel void @fma_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0x3ff00000 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v7, 2.0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v9, 0x40100000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v11, 0x40080000 :: v_dual_mov_b32 v10, v4 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v8, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 2.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], 4.0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], 0x4008000000000000 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v12, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -2059,14 +2008,13 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_mov_b32 s4, s2 -; GFX1251-SDAG-NEXT: s_mov_b32 s6, s2 -; GFX1251-SDAG-NEXT: s_mov_b32 s7, s5 +; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) +; GFX1251-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] +; GFX1251-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5] ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[6:7] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm @@ -2493,19 +2441,17 @@ define amdgpu_kernel void @fneg_v2f64_pkfma(ptr addrspace(1) %out) { ; GFX1251-SDAG-LABEL: fneg_v2f64_pkfma: ; GFX1251-SDAG: ; %bb.0: ; %entry ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-SDAG-NEXT: v_and_b32_e32 v1, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: s_mov_b32 s2, 0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v2, s2 -; GFX1251-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX1251-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX1251-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1 ; GFX1251-SDAG-NEXT: v_cndmask_b32_e64 v1, 0x3ff00000, 0, vcc_lo -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], 0, v[0:3] neg_lo:[0,0,1] neg_hi:[0,0,1] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[2:5], v[0:3], 0, v[0:3] neg_lo:[0,0,1] neg_hi:[0,0,1] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_store_b128 v4, v[0:3], s[0:1] +; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] ; GFX1251-SDAG-NEXT: s_endpgm ; ; GFX1251-GISEL-LABEL: fneg_v2f64_pkfma: diff --git a/llvm/test/CodeGen/AMDGPU/packed-u64.ll b/llvm/test/CodeGen/AMDGPU/packed-u64.ll index c1140977339dc..2ae10b67a5d62 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-u64.ll @@ -54,12 +54,12 @@ define amdgpu_kernel void @add_v2_ss(ptr addrspace(1) %a, <2 x i64> %x, <2 x i64 ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-SDAG-NEXT: v_mov_b32_e32 v8, 0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, s9 :: v_dual_mov_b32 v2, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v4, s12 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v6, s14 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v7, s15 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] @@ -107,12 +107,12 @@ define amdgpu_kernel void @add_v4_vs(ptr addrspace(1) %a, <4 x i64> %x) { ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v16, s[0:1] ; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v16, s[0:1] offset:16 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v12, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v14, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v11, s15 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[12:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[4:7], v[4:7], v[8:11] @@ -406,9 +406,9 @@ define amdgpu_kernel void @add_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0x64 :: v_dual_mov_b32 v5, 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -441,35 +441,20 @@ define amdgpu_kernel void @add_v2_v_imm(ptr addrspace(1) %a) { } define amdgpu_kernel void @add_v2_v_v_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: add_v2_v_v_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[2:5], v[4:7], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: add_v2_v_v_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_nc_u64 v[2:5], v[4:7], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: add_v2_v_v_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-NEXT: v_mov_b32_e32 v1, 0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_nc_u64 v[2:5], v[4:7], v[0:3] +; GFX1251-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -490,9 +475,9 @@ define amdgpu_kernel void @add_v2_v_lit_splat(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -530,9 +515,8 @@ define amdgpu_kernel void @add_v2_v_lit_hi0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v5 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -569,9 +553,8 @@ define amdgpu_kernel void @add_v2_v_lit_lo0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v6, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v7, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 1 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -608,9 +591,8 @@ define amdgpu_kernel void @add_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 2 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -697,12 +679,12 @@ define amdgpu_kernel void @sub_v2_ss(ptr addrspace(1) %a, <2 x i64> %x, <2 x i64 ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x34 nv ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-SDAG-NEXT: v_mov_b32_e32 v8, 0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v0, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, s9 :: v_dual_mov_b32 v2, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v3, s11 :: v_dual_mov_b32 v4, s12 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, s13 :: v_dual_mov_b32 v6, s14 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v7, s15 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15] ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] @@ -750,12 +732,12 @@ define amdgpu_kernel void @sub_v4_vs(ptr addrspace(1) %a, <4 x i64> %x) { ; GFX1251-SDAG-NEXT: s_clause 0x1 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v16, s[0:1] ; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v16, s[0:1] offset:16 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v10, s14 :: v_dual_mov_b32 v12, s8 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v13, s9 :: v_dual_mov_b32 v14, s10 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v15, s11 :: v_dual_mov_b32 v11, s15 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v8, s12 :: v_dual_mov_b32 v9, s13 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[12:13], s[8:9] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[14:15], s[10:11] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], s[12:13] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], s[14:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_2) ; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[12:15] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[4:7], v[4:7], v[8:11] @@ -1049,9 +1031,9 @@ define amdgpu_kernel void @sub_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0x64 :: v_dual_mov_b32 v5, 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1089,9 +1071,9 @@ define amdgpu_kernel void @sub_v2_imm_v(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0x64 :: v_dual_mov_b32 v5, 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1124,35 +1106,20 @@ define amdgpu_kernel void @sub_v2_imm_v(ptr addrspace(1) %a) { } define amdgpu_kernel void @sub_v2_v_v_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: sub_v2_v_v_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, v0 -; GFX1251-SDAG-NEXT: v_mov_b32_e32 v3, v1 -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[2:5], v[4:7], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: sub_v2_v_v_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 -; GFX1251-GISEL-NEXT: v_mov_b32_e32 v1, 0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[2:3], v[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_sub_nc_u64 v[2:5], v[4:7], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: sub_v2_v_v_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v0, 0x3ff, v0 +; GFX1251-NEXT: v_mov_b32_e32 v1, 0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], v[0:1] +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[4:7], v0, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_sub_nc_u64 v[2:5], v[4:7], v[0:3] +; GFX1251-NEXT: global_store_b128 v0, v[2:5], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -1173,9 +1140,9 @@ define amdgpu_kernel void @sub_v2_v_lit_splat(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 1 :: v_dual_mov_b32 v5, 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v4 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1213,9 +1180,8 @@ define amdgpu_kernel void @sub_v2_v_lit_hi0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, v5 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1252,9 +1218,8 @@ define amdgpu_kernel void @sub_v2_v_lit_lo0(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v6, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v7, v4 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 1 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 @@ -1291,9 +1256,8 @@ define amdgpu_kernel void @sub_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_dual_mov_b32 v6, 2 :: v_dual_mov_b32 v7, v5 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], 2 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 diff --git a/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll index a045572322ee8..ee7e164057938 100644 --- a/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll @@ -102,8 +102,8 @@ define amdgpu_kernel void @pk_lshl_add_u64_s2s(<2 x i64> %v, <2 x i64> %a) { ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: s_lshl_b64 s[0:1], s[8:9], 2 ; GFX1251-NEXT: s_lshl_b64 s[2:3], s[10:11], 2 -; GFX1251-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 -; GFX1251-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX1251-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GFX1251-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX1251-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -147,14 +147,13 @@ define i32 @pk_lshl_add_u64_maybe_oob(<2 x ptr> %p, <2 x i32> %i) { ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: v_dual_mov_b32 v6, v5 :: v_dual_ashrrev_i32 v5, 31, v4 ; GFX1251-NEXT: s_mov_b32 s0, 2 -; GFX1251-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-NEXT: v_dual_mov_b32 v8, 12 :: v_dual_mov_b32 v11, s0 -; GFX1251-NEXT: v_dual_mov_b32 v10, s0 :: v_dual_ashrrev_i32 v7, 31, v6 -; GFX1251-NEXT: v_mov_b32_e32 v9, 0 -; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-NEXT: v_pk_lshl_add_u64 v[0:3], v[4:7], v[10:11], v[0:3] -; GFX1251-NEXT: v_dual_mov_b32 v10, v8 :: v_dual_mov_b32 v11, v9 -; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-NEXT: v_mov_b64_e32 v[8:9], 12 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX1251-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_ashrrev_i32 v7, 31, v6 +; GFX1251-NEXT: v_mov_b32_e32 v12, s0 +; GFX1251-NEXT: v_mov_b64_e32 v[10:11], v[8:9] +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1251-NEXT: v_pk_lshl_add_u64 v[0:3], v[4:7], v[12:13], v[0:3] ; GFX1251-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[8:11] ; GFX1251-NEXT: flat_load_b32 v4, v[0:1] ; GFX1251-NEXT: flat_load_b32 v5, v[2:3] @@ -180,8 +179,8 @@ define amdgpu_kernel void @pk_lshl_add_u64_s2s_shift2_3(<2 x i64> %v, <2 x i64> ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: s_lshl_b64 s[0:1], s[8:9], 2 ; GFX1251-NEXT: s_lshl_b64 s[2:3], s[10:11], 3 -; GFX1251-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 -; GFX1251-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX1251-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GFX1251-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX1251-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -204,8 +203,8 @@ define amdgpu_kernel void @pk_lshl_add_u64_s2s_shift2_4(<2 x i64> %v, <2 x i64> ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: s_lshl_b64 s[0:1], s[8:9], 2 ; GFX1251-NEXT: s_lshl_b64 s[2:3], s[10:11], 4 -; GFX1251-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 -; GFX1251-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX1251-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GFX1251-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX1251-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) @@ -226,8 +225,8 @@ define amdgpu_kernel void @pk_lshl_add_u64_s2s_shift2_5(<2 x i64> %v, <2 x i64> ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: s_lshl_b64 s[0:1], s[8:9], 2 ; GFX1251-NEXT: s_lshl_b64 s[2:3], s[10:11], 5 -; GFX1251-NEXT: v_dual_mov_b32 v0, s12 :: v_dual_mov_b32 v1, s13 -; GFX1251-NEXT: v_dual_mov_b32 v2, s14 :: v_dual_mov_b32 v3, s15 +; GFX1251-NEXT: v_mov_b64_e32 v[0:1], s[12:13] +; GFX1251-NEXT: v_mov_b64_e32 v[2:3], s[14:15] ; GFX1251-NEXT: v_mov_b64_e32 v[4:5], s[0:1] ; GFX1251-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i64.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i64.ll index ecbb578957232..9b57faa81d46f 100644 --- a/llvm/test/CodeGen/AMDGPU/shl.v2i64.ll +++ b/llvm/test/CodeGen/AMDGPU/shl.v2i64.ll @@ -94,14 +94,10 @@ define amdgpu_kernel void @s_shl_v2i64_imm_s(ptr addrspace(1) %out, <2 x i64> %r ; GFX1251-NEXT: s_clause 0x1 ; GFX1251-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 nv ; GFX1251-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-NEXT: s_mov_b64 s[8:9], 0x4d2 -; GFX1251-NEXT: s_wait_xcnt 0x0 -; GFX1251-NEXT: s_movk_i32 s4, 0x162e -; GFX1251-NEXT: s_mov_b32 s5, s9 ; GFX1251-NEXT: v_mov_b32_e32 v4, 0 ; GFX1251-NEXT: s_wait_kmcnt 0x0 -; GFX1251-NEXT: s_lshl_b64 s[0:1], s[8:9], s0 -; GFX1251-NEXT: s_lshl_b64 s[2:3], s[4:5], s2 +; GFX1251-NEXT: s_lshl_b64 s[0:1], 0x4d2, s0 +; GFX1251-NEXT: s_lshl_b64 s[2:3], 0x162e, s2 ; GFX1251-NEXT: v_mov_b64_e32 v[0:1], s[0:1] ; GFX1251-NEXT: v_mov_b64_e32 v[2:3], s[2:3] ; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[6:7] @@ -185,36 +181,18 @@ define amdgpu_kernel void @shl_s_v_v2i64(ptr addrspace(1) %out, ptr addrspace(1) } define amdgpu_kernel void @shl_imm_v_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { -; GFX1250-LABEL: shl_imm_v_v2i64: -; GFX1250: ; %bb.0: -; GFX1250-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv -; GFX1250-NEXT: v_and_b32_e32 v4, 0x3ff, v0 -; GFX1250-NEXT: s_wait_kmcnt 0x0 -; GFX1250-NEXT: global_load_b128 v[0:3], v4, s[2:3] scale_offset -; GFX1250-NEXT: s_wait_loadcnt 0x0 -; GFX1250-NEXT: v_lshlrev_b64_e64 v[2:3], v2, 8 -; GFX1250-NEXT: v_lshlrev_b64_e64 v[0:1], v0, 8 -; GFX1250-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset -; GFX1250-NEXT: s_endpgm -; -; GFX1251-LABEL: shl_imm_v_v2i64: -; GFX1251: ; %bb.0: -; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv -; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 -; GFX1251-NEXT: s_wait_kmcnt 0x0 -; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[2:3] scale_offset -; GFX1251-NEXT: s_wait_xcnt 0x0 -; GFX1251-NEXT: s_mov_b64 s[2:3], 8 -; GFX1251-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX1251-NEXT: s_mov_b32 s4, s2 -; GFX1251-NEXT: s_mov_b32 s5, s3 -; GFX1251-NEXT: s_wait_loadcnt 0x0 -; GFX1251-NEXT: v_lshlrev_b64_e64 v[2:3], v2, s[4:5] -; GFX1251-NEXT: v_lshlrev_b64_e64 v[0:1], v0, s[2:3] -; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset -; GFX1251-NEXT: s_endpgm +; GCN-LABEL: shl_imm_v_v2i64: +; GCN: ; %bb.0: +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GCN-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 nv +; GCN-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GCN-NEXT: s_wait_kmcnt 0x0 +; GCN-NEXT: global_load_b128 v[0:3], v4, s[2:3] scale_offset +; GCN-NEXT: s_wait_loadcnt 0x0 +; GCN-NEXT: v_lshlrev_b64_e64 v[2:3], v2, 8 +; GCN-NEXT: v_lshlrev_b64_e64 v[0:1], v0, 8 +; GCN-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GCN-NEXT: s_endpgm %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64 %in.gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %in, i64 %tid.ext From 89e8dd9a17b620a902f13da8b8d0322a2d823dd9 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 18:30:29 +0200 Subject: [PATCH 040/511] Revert "runtimes: Pass CMAKE_SYSTEM_NAME based on target triple" (#205133) Reverts llvm/llvm-project#203504 Breaks build pending #205130 --- clang/cmake/modules/ClangConfig.cmake.in | 5 +- cmake/Modules/GetTripleCMakeSystemName.cmake | 89 ------------------- cmake/Modules/NormalizeTriple.cmake | 36 -------- llvm/cmake/modules/LLVMConfig.cmake.in | 69 +++++++------- .../modules/LLVMExternalProjectUtils.cmake | 64 ++++--------- llvm/runtimes/CMakeLists.txt | 4 + runtimes/CMakeLists.txt | 18 +++- 7 files changed, 73 insertions(+), 212 deletions(-) delete mode 100644 cmake/Modules/GetTripleCMakeSystemName.cmake delete mode 100644 cmake/Modules/NormalizeTriple.cmake diff --git a/clang/cmake/modules/ClangConfig.cmake.in b/clang/cmake/modules/ClangConfig.cmake.in index e199c7e17b6b7..68f723d050117 100644 --- a/clang/cmake/modules/ClangConfig.cmake.in +++ b/clang/cmake/modules/ClangConfig.cmake.in @@ -13,10 +13,7 @@ set(CLANG_LINK_CLANG_DYLIB "@CLANG_LINK_CLANG_DYLIB@") set(CLANG_DEFAULT_LINKER "@CLANG_DEFAULT_LINKER@") # Provide all our library targets to users. -# Skip when cross-compiling, as host library targets are not usable. -if(NOT CMAKE_CROSSCOMPILING) - @CLANG_CONFIG_INCLUDE_EXPORTS@ -endif() +@CLANG_CONFIG_INCLUDE_EXPORTS@ # By creating clang-tablegen-targets here, subprojects that depend on Clang's # tablegen-generated headers can always depend on this target whether building diff --git a/cmake/Modules/GetTripleCMakeSystemName.cmake b/cmake/Modules/GetTripleCMakeSystemName.cmake deleted file mode 100644 index 6cd8d3c59324e..0000000000000 --- a/cmake/Modules/GetTripleCMakeSystemName.cmake +++ /dev/null @@ -1,89 +0,0 @@ -#===--------------------------------------------------------------------===// -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for details. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===--------------------------------------------------------------------===// - -# Extract the OS component from a target triple and map it to the -# corresponding CMake system name. -# -# Usage: -# get_triple_cmake_system_name( ) -# -# Parses the triple (arch-vendor-os[-env]) and sets to the -# CMake-style system name (e.g. "Darwin", "Linux", "Windows"). -# Unrecognized OS values are mapped to "Generic". This expects a -# normalized triple. - -function(get_triple_cmake_system_name triple out_var) - string(REPLACE "-" ";" _components "${triple}") - list(LENGTH _components _len) - if(_len LESS 3) - set(${out_var} "${CMAKE_HOST_SYSTEM_NAME}" PARENT_SCOPE) - return() - endif() - - list(GET _components 1 _vendor) - list(GET _components 2 _os) - set(_env "") - if(_len GREATER_EQUAL 4) - list(GET _components 3 _env) - endif() - - # Check the special environment components first, since it can - # override the usual OS mapping. - if("${_env}" MATCHES "^android") - set(${out_var} "Android" PARENT_SCOPE) - elseif("${_env}" MATCHES "^cygnus") - set(${out_var} "CYGWIN" PARENT_SCOPE) - elseif("${_os}" MATCHES "^darwin|^macos") - set(${out_var} "Darwin" PARENT_SCOPE) - elseif("${_os}" MATCHES "^ios") - set(${out_var} "iOS" PARENT_SCOPE) - elseif("${_os}" MATCHES "^tvos") - set(${out_var} "tvOS" PARENT_SCOPE) - elseif("${_os}" MATCHES "^watchos") - set(${out_var} "watchOS" PARENT_SCOPE) - elseif("${_os}" MATCHES "^xros|^visionos") - set(${out_var} "visionOS" PARENT_SCOPE) - elseif("${_vendor}" STREQUAL "apple") - # Catch-all for other Apple triples (e.g. driverkit, bridgeos). - set(${out_var} "Darwin" PARENT_SCOPE) - elseif("${_os}" MATCHES "^linux") - set(${out_var} "Linux" PARENT_SCOPE) - elseif("${_os}" MATCHES "^windows") - set(${out_var} "Windows" PARENT_SCOPE) - elseif("${_os}" MATCHES "^freebsd|^kfreebsd") - set(${out_var} "FreeBSD" PARENT_SCOPE) - elseif("${_os}" MATCHES "^netbsd") - set(${out_var} "NetBSD" PARENT_SCOPE) - elseif("${_os}" MATCHES "^openbsd") - set(${out_var} "OpenBSD" PARENT_SCOPE) - elseif("${_os}" MATCHES "^dragonfly") - set(${out_var} "DragonFly" PARENT_SCOPE) - elseif("${_os}" MATCHES "^solaris") - set(${out_var} "SunOS" PARENT_SCOPE) - elseif("${_os}" MATCHES "^aix") - set(${out_var} "AIX" PARENT_SCOPE) - elseif("${_os}" MATCHES "^fuchsia") - set(${out_var} "Fuchsia" PARENT_SCOPE) - elseif("${_os}" MATCHES "^haiku") - set(${out_var} "Haiku" PARENT_SCOPE) - elseif("${_os}" MATCHES "^emscripten") - set(${out_var} "Emscripten" PARENT_SCOPE) - elseif("${_os}" MATCHES "^wasi") - set(${out_var} "WASI" PARENT_SCOPE) - elseif("${_os}" MATCHES "^rtems") - set(${out_var} "RTEMS" PARENT_SCOPE) - elseif("${_os}" MATCHES "^zos") - set(${out_var} "OS390" PARENT_SCOPE) - elseif("${_os}" MATCHES "^hurd") - set(${out_var} "GNU" PARENT_SCOPE) - elseif("${_os}" MATCHES "^serenity") - set(${out_var} "SerenityOS" PARENT_SCOPE) - else() - set(${out_var} "Generic" PARENT_SCOPE) - endif() -endfunction() diff --git a/cmake/Modules/NormalizeTriple.cmake b/cmake/Modules/NormalizeTriple.cmake deleted file mode 100644 index 08f09a22bdbb0..0000000000000 --- a/cmake/Modules/NormalizeTriple.cmake +++ /dev/null @@ -1,36 +0,0 @@ -#===--------------------------------------------------------------------===// -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for details. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -#===--------------------------------------------------------------------===// - -# Normalize a target triple using clang's -print-target-triple. -# -# Usage: -# normalize_triple( ) -# -# Runs --target= -print-target-triple to produce a -# canonical triple. If the compiler invocation fails (e.g. the compiler -# is not clang), is returned unchanged. - -function(normalize_triple compiler triple out_var) - set(_prefix "") - if(CMAKE_C_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") - set(_prefix "/clang:") - endif() - execute_process( - COMMAND "${compiler}" "${_prefix}--target=${triple}" "${_prefix}-print-target-triple" - RESULT_VARIABLE _result - OUTPUT_VARIABLE _output - OUTPUT_STRIP_TRAILING_WHITESPACE - ERROR_QUIET) - if(_result EQUAL 0 AND _output) - set(${out_var} "${_output}" PARENT_SCOPE) - else() - # TODO(#97876): Report an error. - message(WARNING "Failed to execute `${compiler} ${_prefix}--target=${triple} ${_prefix}-print-target-triple` to normalize target triple.") - set(${out_var} "${triple}" PARENT_SCOPE) - endif() -endfunction() diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index 6ef0cef7d0296..300c25e7c6101 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -56,47 +56,52 @@ set(LLVM_ENABLE_ASSERTIONS @LLVM_ENABLE_ASSERTIONS@) set(LLVM_ENABLE_EH @LLVM_ENABLE_EH@) set(LLVM_ENABLE_FFI @LLVM_ENABLE_FFI@) +if(LLVM_ENABLE_FFI) + find_package(FFI) +endif() + set(LLVM_ENABLE_RTTI @LLVM_ENABLE_RTTI@) + +set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@) +if(LLVM_ENABLE_LIBEDIT) + find_package(LibEdit) +endif() + set(LLVM_ENABLE_THREADS @LLVM_ENABLE_THREADS@) + set(LLVM_ENABLE_UNWIND_TABLES @LLVM_ENABLE_UNWIND_TABLES@) + set(LLVM_ENABLE_ZLIB @LLVM_ENABLE_ZLIB@) +if(LLVM_ENABLE_ZLIB) + set(ZLIB_ROOT @ZLIB_ROOT@) + find_package(ZLIB) +endif() + set(LLVM_ENABLE_ZSTD @LLVM_ENABLE_ZSTD@) +if(LLVM_ENABLE_ZSTD) + find_package(zstd) +endif() + set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@) +if(LLVM_ENABLE_LIBXML2) + find_package(LibXml2) +endif() + set(LLVM_ENABLE_CURL @LLVM_ENABLE_CURL@) +if(LLVM_ENABLE_CURL) + find_package(CURL) +endif() + set(LLVM_ENABLE_HTTPLIB @LLVM_ENABLE_HTTPLIB@) +if(LLVM_ENABLE_HTTPLIB) + find_package(httplib) +endif() + set(LLVM_WITH_Z3 @LLVM_WITH_Z3@) -set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) -set(LLVM_ENABLE_LIBEDIT @HAVE_LIBEDIT@) -# These are host libraries that LLVM was built with. Only find them when the -# consumer can actually use them (i.e. not when cross-compiling for an -# incompatible target). -if(NOT CMAKE_CROSSCOMPILING) - if(LLVM_ENABLE_FFI) - find_package(FFI) - endif() - if(LLVM_ENABLE_LIBEDIT) - find_package(LibEdit) - endif() - if(LLVM_ENABLE_ZLIB) - set(ZLIB_ROOT @ZLIB_ROOT@) - find_package(ZLIB) - endif() - if(LLVM_ENABLE_ZSTD) - find_package(zstd) - endif() - if(LLVM_ENABLE_LIBXML2) - find_package(LibXml2) - endif() - if(LLVM_ENABLE_CURL) - find_package(CURL) - endif() - if(LLVM_ENABLE_HTTPLIB) - find_package(httplib) - endif() - if(LLVM_ENABLE_DIA_SDK) - find_package(DIASDK) - endif() +set(LLVM_ENABLE_DIA_SDK @LLVM_ENABLE_DIA_SDK@) +if(LLVM_ENABLE_DIA_SDK) + find_package(DIASDK) endif() set(LLVM_NATIVE_ARCH @LLVM_NATIVE_ARCH@) @@ -147,7 +152,7 @@ set(LLVM_ENABLE_SHARED_LIBS @BUILD_SHARED_LIBS@) set(LLVM_DEFAULT_EXTERNAL_LIT "@LLVM_CONFIG_DEFAULT_EXTERNAL_LIT@") set(LLVM_LIT_ARGS "@LLVM_LIT_ARGS@") -if(NOT TARGET LLVMSupport AND NOT CMAKE_CROSSCOMPILING) +if(NOT TARGET LLVMSupport) @LLVM_CONFIG_INCLUDE_EXPORTS@ @llvm_config_include_buildtree_only_exports@ endif() diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake index 9567792e664e4..ee270d70a778d 100644 --- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake @@ -84,6 +84,12 @@ function(llvm_ExternalProject_Add name source_dir) endif() endforeach() + # If CMAKE_SYSTEM_NAME is not set explicitly in the arguments passed to us, + # reflect CMake's own default. + if (NOT _cmake_system_name) + set(_cmake_system_name "${CMAKE_HOST_SYSTEM_NAME}") + endif() + if(NOT ARG_TARGET_TRIPLE) set(target_triple ${LLVM_DEFAULT_TARGET_TRIPLE}) else() @@ -92,36 +98,6 @@ function(llvm_ExternalProject_Add name source_dir) is_msvc_triple(is_msvc_target "${target_triple}") - if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING) - set(_cmake_c_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}") - set(_cmake_cxx_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++${CMAKE_EXECUTABLE_SUFFIX}") - set(_cmake_asm_compiler "${_cmake_c_compiler}") - if(is_msvc_target) - set(_cmake_c_compiler "${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX}") - set(_cmake_cxx_compiler "${_cmake_c_compiler}") - set(_cmake_asm_compiler "${_cmake_c_compiler}") - endif() - else() - set(_cmake_c_compiler "${CMAKE_C_COMPILER}") - set(_cmake_cxx_compiler "${CMAKE_CXX_COMPILER}") - set(_cmake_asm_compiler "${CMAKE_C_COMPILER}") - endif() - - # If CMAKE_SYSTEM_NAME is not set explicitly in the arguments passed to us, - # derive it from the target triple if available, otherwise reflect CMake's - # own default. This ensures that cross-compilation targets get the correct - # platform files (e.g. AMDGPU targets on a Darwin host won't get macOS flags). - if (NOT _cmake_system_name) - if(ARG_TARGET_TRIPLE) - include(NormalizeTriple) - normalize_triple("${_cmake_c_compiler}" "${ARG_TARGET_TRIPLE}" _normalized_triple) - include(GetTripleCMakeSystemName) - get_triple_cmake_system_name("${_normalized_triple}" _cmake_system_name) - else() - set(_cmake_system_name "${CMAKE_HOST_SYSTEM_NAME}") - endif() - endif() - if(NOT ARG_TOOLCHAIN_TOOLS) set(ARG_TOOLCHAIN_TOOLS clang) if (ARG_ENABLE_FORTRAN) @@ -255,9 +231,15 @@ function(llvm_ExternalProject_Add name source_dir) if(ARG_USE_TOOLCHAIN AND NOT CMAKE_CROSSCOMPILING) if(CLANG_IN_TOOLCHAIN) - set(compiler_args -DCMAKE_C_COMPILER=${_cmake_c_compiler} - -DCMAKE_CXX_COMPILER=${_cmake_cxx_compiler} - -DCMAKE_ASM_COMPILER=${_cmake_asm_compiler}) + if(is_msvc_target) + set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX} + -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX} + -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang-cl${CMAKE_EXECUTABLE_SUFFIX}) + else() + set(compiler_args -DCMAKE_C_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX} + -DCMAKE_CXX_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang++${CMAKE_EXECUTABLE_SUFFIX} + -DCMAKE_ASM_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/clang${CMAKE_EXECUTABLE_SUFFIX}) + endif() endif() if(FLANG_IN_TOOLCHAIN) list(APPEND compiler_args -DCMAKE_Fortran_COMPILER=${LLVM_RUNTIME_OUTPUT_INTDIR}/flang${CMAKE_EXECUTABLE_SUFFIX}) @@ -397,22 +379,6 @@ function(llvm_ExternalProject_Add name source_dir) list(APPEND compiler_args -DCMAKE_CXX_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) list(APPEND compiler_args -DCMAKE_Fortran_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) list(APPEND compiler_args -DCMAKE_ASM_COMPILER_TARGET=${ARG_TARGET_TRIPLE}) - - # Pass CMAKE_SYSTEM_NAME derived from the target triple so the sub-build - # loads the correct platform files instead of the host's. - if(NOT "${_cmake_system_name}" STREQUAL "${CMAKE_HOST_SYSTEM_NAME}") - list(APPEND compiler_args -DCMAKE_SYSTEM_NAME=${_cmake_system_name}) - endif() - - # Forward Darwin-specific variables only when targeting Darwin. - if("${_cmake_system_name}" STREQUAL "Darwin") - if(CMAKE_OSX_SYSROOT) - list(APPEND compiler_args -DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}) - endif() - if(CMAKE_OSX_DEPLOYMENT_TARGET) - list(APPEND compiler_args -DCMAKE_OSX_DEPLOYMENT_TARGET=${CMAKE_OSX_DEPLOYMENT_TARGET}) - endif() - endif() endif() if(CMAKE_VERBOSE_MAKEFILE) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index af50413c9b0b7..6d81b26d2d416 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -4,6 +4,10 @@ # the two files. set(COMMON_CMAKE_ARGS "-DHAVE_LLVM_LIT=ON;-DCLANG_RESOURCE_DIR=${CLANG_RESOURCE_DIR}") +if(APPLE AND CMAKE_OSX_SYSROOT AND (LLVM_TARGET_TRIPLE STREQUAL LLVM_HOST_TRIPLE)) + # Only propagate the host sysroot for native runtimes builds. + list(APPEND RUNTIMES_CMAKE_ARGS "-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}") +endif() foreach(proj ${LLVM_ENABLE_RUNTIMES}) string(TOUPPER "${proj}" canon_name) STRING(REGEX REPLACE "-" "_" canon_name ${canon_name}) diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt index 36ebe594edc0d..0a84ef3957f76 100644 --- a/runtimes/CMakeLists.txt +++ b/runtimes/CMakeLists.txt @@ -228,8 +228,22 @@ message(STATUS "LLVM default target triple: ${LLVM_DEFAULT_TARGET_TRIPLE}") set(LLVM_TARGET_TRIPLE "${LLVM_DEFAULT_TARGET_TRIPLE}") if(CMAKE_C_COMPILER_ID MATCHES "Clang") - include(NormalizeTriple) - normalize_triple("${CMAKE_C_COMPILER}" "${LLVM_DEFAULT_TARGET_TRIPLE}" LLVM_DEFAULT_TARGET_TRIPLE) + set(option_prefix "") + if (CMAKE_C_COMPILER_FRONTEND_VARIANT MATCHES "MSVC") + set(option_prefix "/clang:") + endif() + set(print_target_triple ${CMAKE_C_COMPILER} ${option_prefix}--target=${LLVM_DEFAULT_TARGET_TRIPLE} ${option_prefix}-print-target-triple) + execute_process(COMMAND ${print_target_triple} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + OUTPUT_STRIP_TRAILING_WHITESPACE) + if(result EQUAL 0) + set(LLVM_DEFAULT_TARGET_TRIPLE ${output}) + else() + string(REPLACE ";" " " print_target_triple "${print_target_triple}") + # TODO(#97876): Report an error. + message(WARNING "Failed to execute `${print_target_triple}` to normalize target triple.") + endif() endif() # Determine output and install paths based on LLVM_TARGET_TRIPLE From 3573e17c622b35f12d04abf8876a00053beedc7c Mon Sep 17 00:00:00 2001 From: youngd007 Date: Mon, 22 Jun 2026 12:47:26 -0400 Subject: [PATCH 041/511] [lldb][bazel] Add the Windows process plugin to the bazel build (#203146) Add a cc_library for the native Windows process plugin (ProcessWindowsCommon), gated to @platforms//os:windows, and register it via the dedicated @LLDB_PROCESS_WINDOWS_PLUGIN@ slot in the generated Plugins.def. This mirrors the CMake build, which special-cases ProcessWindowsCommon into that slot so it is initialized after all other process plugins but before ProcessGDBRemote. With the help of claude. Tested internally at Meta by converting Bazel -> BUCK and confirming matches working BUCK contents for windows lldb build. --- .../llvm-project-overlay/lldb/BUILD.bazel | 3 ++ .../lldb/source/Plugins/BUILD.bazel | 34 ++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel index eb9384476972e..0245d01cfe32d 100644 --- a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -267,6 +267,9 @@ cc_library( "@platforms//os:linux": [ "//lldb/source/Plugins:PluginProcessLinux", ], + "@platforms//os:windows": [ + "//lldb/source/Plugins:PluginProcessWindowsCommon", + ], "//conditions:default": [], }), alwayslink = True, diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel index 52e36eb703729..49edf59fed59a 100644 --- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -2422,6 +2422,30 @@ cc_library( ], ) +cc_library( + name = "PluginProcessWindowsCommon", + srcs = glob(["Process/Windows/Common/*.cpp"]), + hdrs = glob([ + "Process/Windows/Common/*.h", + "Process/Windows/Common/**/*.h", + ]), + includes = [".."], + target_compatible_with = select({ + "@platforms//os:windows": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":PluginDynamicLoaderWindowsDYLD", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Target", + "//lldb:Utility", + "//llvm:Support", + ], +) + _DEFAULT_LOAD_PLUGINS = "\n".join(["LLDB_PLUGIN({})".format(x) for x in DEFAULT_PLUGINS]) + \ "\n" + "\n".join(["LLDB_SCRIPT_PLUGIN({})".format(x) for x in DEFAULT_SCRIPT_PLUGINS]) @@ -2429,9 +2453,17 @@ expand_template( name = "plugins_config_gen", out = "Plugins.def", substitutions = { - "@LLDB_PROCESS_WINDOWS_PLUGIN@": "", "@LLDB_PROCESS_GDB_PLUGIN@": "LLDB_PLUGIN(ProcessGDBRemote)", } | select({ + # ProcessWindowsCommon registers via its dedicated slot in Plugins.def.in + # (ordered after all other process plugins, before ProcessGDBRemote). + "@platforms//os:windows": { + "@LLDB_PROCESS_WINDOWS_PLUGIN@": "LLDB_PLUGIN(ProcessWindowsCommon)", + }, + "//conditions:default": { + "@LLDB_PROCESS_WINDOWS_PLUGIN@": "", + }, + }) | select({ "@platforms//os:macos": { "@LLDB_ENUM_PLUGINS@": _DEFAULT_LOAD_PLUGINS + """ LLDB_PLUGIN(ProcessMacOSXKernel) From 2a0d56add89abed083bb209f6add7ca5a2aa9237 Mon Sep 17 00:00:00 2001 From: Sungbin Jo Date: Tue, 23 Jun 2026 01:58:09 +0900 Subject: [PATCH 042/511] [yaml2obj][MachO] Fix byte order of the indirect symbol table (#205044) This is a follow-up of PR #203680 that added the test case `linkedit-alignment.test`, which currently fails on big-endian buildbots (see: https://lab.llvm.org/buildbot/#/builders/98/builds/3084 and https://lab.llvm.org/buildbot/#/builders/114/builds/906). The failure seems to be on `yaml2obj`, where `writeDynamicSymbolTable` emits an indirect symbol table in host byte order rather than the specified object's byte order (i.e. the `IsLittleEndian` field value). This PR adds the missing swap and a regression test that round-trips all endian-sensitive fields with both endianness values. --- llvm/lib/ObjectYAML/MachOEmitter.cpp | 9 +- llvm/test/ObjectYAML/MachO/endianness.yaml | 207 +++++++++++++++++++++ 2 files changed, 213 insertions(+), 3 deletions(-) create mode 100644 llvm/test/ObjectYAML/MachO/endianness.yaml diff --git a/llvm/lib/ObjectYAML/MachOEmitter.cpp b/llvm/lib/ObjectYAML/MachOEmitter.cpp index a4d17dfe1e320..cf7202c7da949 100644 --- a/llvm/lib/ObjectYAML/MachOEmitter.cpp +++ b/llvm/lib/ObjectYAML/MachOEmitter.cpp @@ -625,9 +625,12 @@ void MachOWriter::writeStringTable(raw_ostream &OS) { } void MachOWriter::writeDynamicSymbolTable(raw_ostream &OS) { - for (auto Data : Obj.LinkEdit.IndirectSymbols) - OS.write(reinterpret_cast(&Data), - sizeof(yaml::Hex32::BaseType)); + for (auto Data : Obj.LinkEdit.IndirectSymbols) { + uint32_t Value = Data; + if (Obj.IsLittleEndian != sys::IsLittleEndianHost) + MachO::swapStruct(Value); + OS.write(reinterpret_cast(&Value), sizeof(uint32_t)); + } } void MachOWriter::writeFunctionStarts(raw_ostream &OS) { diff --git a/llvm/test/ObjectYAML/MachO/endianness.yaml b/llvm/test/ObjectYAML/MachO/endianness.yaml new file mode 100644 index 0000000000000..a0880e4642274 --- /dev/null +++ b/llvm/test/ObjectYAML/MachO/endianness.yaml @@ -0,0 +1,207 @@ +# RUN: yaml2obj -DENDIAN=false %s | obj2yaml | FileCheck %s +# RUN: yaml2obj -DENDIAN=true %s | obj2yaml | FileCheck %s + +## Check that yaml2obj writes all endian-sensitive Mach-O fields in object +## endianness rather than host endianness. + +# CHECK: FileHeader: +# CHECK: magic: 0xFEEDFACF +# CHECK: cputype: 0x1000012 +# CHECK: cpusubtype: 0x34 +# CHECK: filetype: 0x1 +# CHECK: ncmds: 5 +# CHECK: sizeofcmds: 304 +# CHECK: flags: 0x1020304 + +# CHECK: LoadCommands: +# CHECK: - cmd: LC_SEGMENT_64 +# CHECK: cmdsize: 152 +# CHECK: segname: __TEXT +# CHECK: vmaddr: 2387509390608836384 +# CHECK: vmsize: 256 +# CHECK: fileoff: 336 +# CHECK: filesize: 4 +# CHECK: maxprot: 16909060 +# CHECK: initprot: 84281096 +# CHECK: nsects: 1 +# CHECK: flags: 151653132 +# CHECK: Sections: +# CHECK: - sectname: __text +# CHECK: segname: __TEXT +# CHECK: addr: 0x2122232425262728 +# CHECK: size: 4 +# CHECK: offset: 0x150 +# CHECK: align: 2 +# CHECK: reloff: 0x154 +# CHECK: nreloc: 1 +# CHECK: flags: 0x80000400 +# CHECK: reserved1: 0x1020304 +# CHECK: reserved2: 0x5060708 +# CHECK: reserved3: 0x90A0B0C +# CHECK: content: DEADBEEF +# CHECK: relocations: +# CHECK: - address: 0x1020304 +# CHECK: symbolnum: 1 +# CHECK: pcrel: false +# CHECK: length: 3 +# CHECK: extern: true +# CHECK: type: 2 +# CHECK: scattered: false +# CHECK: value: 0 +# CHECK: - cmd: LC_SYMTAB +# CHECK: cmdsize: 24 +# CHECK: symoff: 356 +# CHECK: nsyms: 2 +# CHECK: stroff: 396 +# CHECK: strsize: 8 +# CHECK: - cmd: LC_DYSYMTAB +# CHECK: cmdsize: 80 +# CHECK: ilocalsym: 0 +# CHECK: nlocalsym: 1 +# CHECK: iextdefsym: 1 +# CHECK: nextdefsym: 0 +# CHECK: iundefsym: 1 +# CHECK: nundefsym: 1 +# CHECK: indirectsymoff: 388 +# CHECK: nindirectsyms: 2 +# CHECK: - cmd: LC_DATA_IN_CODE +# CHECK: cmdsize: 16 +# CHECK: dataoff: 348 +# CHECK: datasize: 8 +# CHECK: - cmd: LC_BUILD_VERSION +# CHECK: cmdsize: 32 +# CHECK: platform: 16909060 +# CHECK: minos: 84281096 +# CHECK: sdk: 151653132 +# CHECK: ntools: 1 +# CHECK: Tools: +# CHECK: - tool: 219025168 +# CHECK: version: 286397204 + +# CHECK: LinkEditData: +# CHECK: NameList: +# CHECK: - n_strx: 1 +# CHECK: n_type: 0x1 +# CHECK: n_sect: 1 +# CHECK: n_desc: 4660 +# CHECK: n_value: 72623859790382856 +# CHECK: - n_strx: 4 +# CHECK: n_type: 0x1 +# CHECK: n_sect: 0 +# CHECK: n_desc: 22136 +# CHECK: n_value: 1230066625199609624 +# CHECK: StringTable: +# CHECK: - '' +# CHECK: - _a +# CHECK: - _b +# CHECK: - '' +# CHECK: IndirectSymbols: [ 0x1, 0x40000000 ] +# CHECK: DataInCode: +# CHECK: - Offset: 0x1020304 +# CHECK: Length: 1286 +# CHECK: Kind: 0x708 + +--- !mach-o +IsLittleEndian: [[ENDIAN]] +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000012 + cpusubtype: 0x00000034 + filetype: 0x00000001 + ncmds: 5 + sizeofcmds: 304 + flags: 0x01020304 + reserved: 0x05060708 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __TEXT + vmaddr: 0x2122232425262720 + vmsize: 0x0000000000000100 + fileoff: 336 + filesize: 4 + maxprot: 0x01020304 + initprot: 0x05060708 + nsects: 1 + flags: 0x090A0B0C + Sections: + - sectname: __text + segname: __TEXT + addr: 0x2122232425262728 + size: 4 + offset: 336 + align: 2 + reloff: 340 + nreloc: 1 + flags: 0x80000400 + reserved1: 0x01020304 + reserved2: 0x05060708 + reserved3: 0x090A0B0C + content: DEADBEEF + relocations: + - address: 0x01020304 + symbolnum: 1 + pcrel: false + length: 3 + extern: true + type: 2 + scattered: false + value: 0 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 356 + nsyms: 2 + stroff: 396 + strsize: 8 + - cmd: LC_DYSYMTAB + cmdsize: 80 + ilocalsym: 0 + nlocalsym: 1 + iextdefsym: 1 + nextdefsym: 0 + iundefsym: 1 + nundefsym: 1 + tocoff: 0 + ntoc: 0 + modtaboff: 0 + nmodtab: 0 + extrefsymoff: 0 + nextrefsyms: 0 + indirectsymoff: 388 + nindirectsyms: 2 + extreloff: 0 + nextrel: 0 + locreloff: 0 + nlocrel: 0 + - cmd: LC_DATA_IN_CODE + cmdsize: 16 + dataoff: 348 + datasize: 8 + - cmd: LC_BUILD_VERSION + cmdsize: 32 + platform: 0x01020304 + minos: 0x05060708 + sdk: 0x090A0B0C + ntools: 1 + Tools: + - tool: 0x0D0E0F10 + version: 0x11121314 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x01 + n_sect: 1 + n_desc: 0x1234 + n_value: 0x0102030405060708 + - n_strx: 4 + n_type: 0x01 + n_sect: 0 + n_desc: 0x5678 + n_value: 0x1112131415161718 + StringTable: [ '', _a, _b, '' ] + IndirectSymbols: [ 0x1, 0x40000000 ] + DataInCode: + - Offset: 0x01020304 + Length: 0x0506 + Kind: 0x0708 +... From 82021b97e56e5b765b89432939cce23a3c59538b Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Mon, 22 Jun 2026 10:09:11 -0700 Subject: [PATCH 043/511] [bazel] Disable buildkite for PluginProcessWindowsCommon (#205141) --- utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel index 49edf59fed59a..2ac0097b64bb6 100644 --- a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -2430,6 +2430,7 @@ cc_library( "Process/Windows/Common/**/*.h", ]), includes = [".."], + tags = ["nobuildkite"], target_compatible_with = select({ "@platforms//os:windows": [], "//conditions:default": ["@platforms//:incompatible"], From b47af2cc4991dca89e6a27d15a9c953a4578dd01 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Mon, 22 Jun 2026 13:13:14 -0400 Subject: [PATCH 044/511] [libc++][lnt] Allow retaining build artifacts in run-benchbot (#205146) Also, as a drive-by, introduce `--results-dir` to specify where to put the JSON results instead of using `--build-dir` for that. Assisted by Claude --- libcxx/utils/ci/lnt/README.md | 8 +++-- libcxx/utils/ci/lnt/run-benchbot | 51 +++++++++++++++++++-------- libcxx/utils/ci/lnt/run-benchmarks | 55 +++++++++++++++++++++++------- 3 files changed, 85 insertions(+), 29 deletions(-) diff --git a/libcxx/utils/ci/lnt/README.md b/libcxx/utils/ci/lnt/README.md index eedd98a082c2f..ac5cbc11f4c40 100644 --- a/libcxx/utils/ci/lnt/README.md +++ b/libcxx/utils/ci/lnt/README.md @@ -11,8 +11,12 @@ commits: libcxx/utils/ci/lnt/run-benchbot --llvm-root -- ... ``` -Results are stored as JSON files in `/build//` by default. Use -`--build-dir ` to override the output directory. +Results are stored as LNT JSON files in `/build//` by default. +Use `--results-dir ` to override where these reports are written. + +By default, build artifacts are stored in a temporary directory and discarded after +each run. Pass `--build-dir ` to keep them; artifacts for each run are then stored +under `//`. To continuously poll for un-benchmarked commits and submit results to a LNT instance: diff --git a/libcxx/utils/ci/lnt/run-benchbot b/libcxx/utils/ci/lnt/run-benchbot index 52fb813b9321e..387853ac52b17 100755 --- a/libcxx/utils/ci/lnt/run-benchbot +++ b/libcxx/utils/ci/lnt/run-benchbot @@ -22,9 +22,12 @@ ${PROGNAME} [options] [-- commit ...] --llvm-root Path to the root of the LLVM monorepo. By default, we try to figure it out based on the current working directory. ---build-dir The directory to use for storing benchmark results. By default, - this is '/build/'. Note that intermediate - build results are never kept around. +--results-dir The directory to use for storing benchmark results (LNT JSON files). + By default, this is '/build/'. + +--build-dir Optional directory in which to keep build artifacts. Artifacts for each + run are stored under '//'. By default, build + artifacts are stored in a temporary directory and discarded after each run. [--lnt-url ] The optional URL of the LNT instance to submit results to. By default, results are not submitted to any LNT instance. @@ -56,6 +59,10 @@ while [[ $# -gt 0 ]]; do MONOREPO_ROOT="${2}" shift; shift ;; + --results-dir) + RESULTS_DIR="${2}" + shift; shift + ;; --build-dir) BUILD_DIR="${2}" shift; shift @@ -81,7 +88,7 @@ while [[ $# -gt 0 ]]; do done MONOREPO_ROOT="${MONOREPO_ROOT:="$(git rev-parse --show-toplevel)"}" -BUILD_DIR="${BUILD_DIR:=${MONOREPO_ROOT}/build/${BUILDER}}" +RESULTS_DIR="${RESULTS_DIR:=${MONOREPO_ROOT}/build/${BUILDER}}" case "${BUILDER}" in apple-m5-clang21) @@ -109,19 +116,21 @@ echo "***********************************************" LNT_TEST_SUITE=libcxx2 -mkdir -p "${BUILD_DIR}" +mkdir -p "${RESULTS_DIR}" -# Given path/to/abcdef.json, figures out the next available path, -# considering path/to/abcdef.{1,2,3,etc}.json. -next_available_path() { - local path="${1}" - local base="${path%.json}" +# Find the smallest suffix such that both the artifacts and the results directory names are available. +# This ensures that we can always match artifact directories with their results. +# The first candidate is unsuffixed ('foo.json' / 'foo'), then '.1', '.2', and so on. +select_run_suffix() { + local results_base="${1}" # e.g. / + local artifacts_base="${2}" # e.g. //, or empty when not keeping artifacts + local suffix="" local i=1 - while [ -f "${path}" ]; do - path="${base}.${i}.json" + while [ -f "${results_base}${suffix}.json" ] || { [ -n "${artifacts_base}" ] && [ -e "${artifacts_base}${suffix}" ]; }; do + suffix=".${i}" i=$((i + 1)) done - echo "${path}" + echo "${suffix}" } run_benchmarks() { @@ -135,7 +144,20 @@ run_benchmarks() { filter_arg="--filter ${FILTER}" fi - local output=$(next_available_path "${BUILD_DIR}/${1}.json") + # Pick a suffix that is free for both the result report and (when keeping build artifacts) the + # artifacts directory, so 'foo.N.json' and its build artifacts in 'foo.N' always correspond. + local artifacts_base="" + if [ -n "${BUILD_DIR}" ]; then + artifacts_base="${BUILD_DIR}/${BUILDER}/${1}" + fi + local suffix=$(select_run_suffix "${RESULTS_DIR}/${1}" "${artifacts_base}") + + local build_dir_arg="" + if [ -n "${BUILD_DIR}" ]; then + build_dir_arg="--build-dir ${artifacts_base}${suffix}" + fi + + local output="${RESULTS_DIR}/${1}${suffix}.json" ${MONOREPO_ROOT}/libcxx/utils/ci/lnt/run-benchmarks \ --test-suite-commit ${BENCHMARK_SUITE_VERSION} \ --git-repo ${MONOREPO_ROOT} \ @@ -144,6 +166,7 @@ run_benchmarks() { --benchmark-commit ${1} \ ${spec_arg} \ ${filter_arg} \ + ${build_dir_arg} \ --output ${output} if [ -n "${LNT_URL}" ]; then diff --git a/libcxx/utils/ci/lnt/run-benchmarks b/libcxx/utils/ci/lnt/run-benchmarks index 611bd9c4d45b9..ce4705c0fafa1 100755 --- a/libcxx/utils/ci/lnt/run-benchmarks +++ b/libcxx/utils/ci/lnt/run-benchmarks @@ -8,6 +8,7 @@ # ===----------------------------------------------------------------------===## import argparse +import contextlib import json import logging import os @@ -77,12 +78,17 @@ def main(argv): parser.add_argument('--machine', type=str, required=True, help='The name of the machine for reporting LNT results.') parser.add_argument('--output', type=pathlib.Path, required=True, - help='Path where the resulting LNT JSON report is written. The file is overwritten if it already exists.') + help='Path where the resulting LNT JSON report is written. It is an error for the file to ' + 'already exist.') parser.add_argument('--filter', type=str, required=False, help="Optional test filter to pass to lit when running the benchmarks. This allows " "running only a subset of the benchmarks.") parser.add_argument('--spec-dir', type=pathlib.Path, required=False, help='Optional path to a SPEC installation to use for benchmarking.') + parser.add_argument('--build-dir', type=pathlib.Path, required=False, + help='Optional directory in which to keep build artifacts. By default, a temporary directory ' + 'is used and the build artifacts are discarded after the run. It is an error to specify ' + 'a build directory that already exists.') parser.add_argument('--git-repo', type=directory_path, default=os.getcwd(), help='Optional path to the Git repository to use. By default, the current working directory is used.') parser.add_argument('--dry-run', action='store_true', @@ -121,17 +127,36 @@ def main(argv): if enforce_success: raise - with tempfile.TemporaryDirectory() as build_dir: - build_dir = pathlib.Path(build_dir) + # Fail fast before doing any expensive work: refuse to overwrite existing results or artifacts. + if args.output.exists(): + sys.exit(f'error: output report {args.output} already exists; not overwriting it') + if args.build_dir is not None and args.build_dir.exists(): + sys.exit(f'error: build directory {args.build_dir} already exists; not overwriting it') + + with contextlib.ExitStack() as stack: + # LNT tooling is throwaway: always install it into a temporary directory. + venv = pathlib.Path(stack.enter_context(tempfile.TemporaryDirectory())) + + # The build artifacts are kept in --build-dir when it is given, and stored in a temporary directory + # otherwise. + if args.build_dir is not None: + artifacts = args.build_dir + else: + artifacts = pathlib.Path(stack.enter_context(tempfile.TemporaryDirectory())) + if not args.dry_run: + artifacts.mkdir(parents=True, exist_ok=True) + logging.info(f'Storing build artifacts in {artifacts}') logging.info('Installing LNT') - run(['python3', '-m', 'venv', build_dir / '.venv']) - run([build_dir / '.venv/bin/pip', 'install', 'llvm-lnt']) + run(['python3', '-m', 'venv', venv]) + run([venv / 'bin/pip', 'install', 'llvm-lnt']) logging.info(f'Building libc++ at commit {args.benchmark_commit}') build_cmd = [args.git_repo / 'libcxx/utils/build-at-commit', '--git-repo', args.git_repo, - '--install-dir', build_dir / 'install', + '--install-dir', artifacts / 'libcxx-install', + '--tmp-src-dir', artifacts / 'libcxx-src', + '--tmp-build-dir', artifacts / 'libcxx-build', '--commit', args.benchmark_commit, '--', '-DCMAKE_BUILD_TYPE=RelWithDebInfo', f'-DCMAKE_CXX_COMPILER={args.compiler}'] run(build_cmd, enforce_success=False) # if the build fails, carry on: we'll fail later and submit empty LNT results @@ -139,32 +164,36 @@ def main(argv): logging.info(f'Running benchmarks from {args.test_suite_commit} against libc++ {args.benchmark_commit}') cmd = [args.git_repo / 'libcxx/utils/test-at-commit', '--git-repo', args.git_repo, - '--build-dir', build_dir / 'bench', + '--build-dir', artifacts / 'benchmarks', '--test-suite-commit', args.test_suite_commit, - '--libcxx-installation', build_dir / 'install', + '--libcxx-installation', artifacts / 'libcxx-install', '--', '-j1', '--time-tests', '--test-output=failed', '--param', f'compiler={args.compiler}', '--param', 'optimization=speed', '--param', 'std=c++26', - build_dir / 'bench/libcxx/test/benchmarks'] + artifacts / 'benchmarks/libcxx/test/benchmarks'] if args.spec_dir is not None: cmd += ['--param', f'spec_dir={args.spec_dir}'] if args.filter is not None: cmd += ['--filter', args.filter] run(cmd, enforce_success=False) # some benchmarks may fail to build/run at some commits, and that's okay - with open(build_dir / 'benchmarks.lnt', 'w') as f: - run([args.git_repo / 'libcxx/utils/consolidate-benchmarks', build_dir / 'bench'], stdout=f) + consolidate = [args.git_repo / 'libcxx/utils/consolidate-benchmarks', artifacts / 'benchmarks'] + if args.dry_run: + run(consolidate) + else: + with open(artifacts / 'benchmarks.lnt', 'w') as f: + run(consolidate, stdout=f) logging.info('Creating JSON report for LNT') order = len(subprocess.check_output(['git', '-C', args.git_repo, 'rev-list', args.benchmark_commit]).splitlines()) - importreport = [build_dir / '.venv/bin/lnt', 'importreport', '--order', str(order), '--machine', args.machine] + importreport = [venv / 'bin/lnt', 'importreport', '--order', str(order), '--machine', args.machine] for arg in dict_to_params(gather_run_information(args)): importreport += ['--run-info', arg] for arg in dict_to_params(gather_machine_information(args)): importreport += ['--machine-info', arg] output = args.output.resolve() - importreport += [build_dir / 'benchmarks.lnt', output] + importreport += [artifacts / 'benchmarks.lnt', output] run(importreport) logging.info(f'Report written to {output}') From f48db982479a5c8361aa3036e6098a985cd32699 Mon Sep 17 00:00:00 2001 From: Yihan Wang Date: Tue, 23 Jun 2026 01:17:53 +0800 Subject: [PATCH 045/511] [libc++] Implement P4206R0 Revert string support in std::constant_wrapper (#203338) Fixes https://github.com/llvm/llvm-project/issues/203336 --------- Signed-off-by: yronglin Co-authored-by: A. Jiang --- libcxx/docs/FeatureTestMacroTable.rst | 2 +- libcxx/docs/ReleaseNotes/23.rst | 1 + libcxx/docs/Status/Cxx29Papers.csv | 2 +- libcxx/include/__utility/constant_wrapper.h | 88 ++++++++----------- libcxx/include/utility | 7 +- libcxx/include/version | 4 +- .../utility.version.compile.pass.cpp | 4 +- .../version.version.compile.pass.cpp | 4 +- .../const.wrap.class/convert.pass.cpp | 18 +--- .../const.wrap.class/ctad.compile.pass.cpp | 32 ------- .../utilities/const.wrap.class/cw.pass.cpp | 16 +--- .../cw_fixed.array.ctor.pass.cpp | 86 ------------------ .../const.wrap.class/cw_fixed.ctor.pass.cpp | 70 --------------- .../const.wrap.class/subscript.pass.cpp | 6 -- .../const.wrap.class/template.verify.cpp | 23 +++++ .../const.wrap.class/types.compile.pass.cpp | 23 +++-- .../const.wrap.class/unary_ops.pass.cpp | 5 ++ .../generate_feature_test_macro_components.py | 2 +- 18 files changed, 97 insertions(+), 296 deletions(-) delete mode 100644 libcxx/test/std/utilities/const.wrap.class/ctad.compile.pass.cpp delete mode 100644 libcxx/test/std/utilities/const.wrap.class/cw_fixed.array.ctor.pass.cpp delete mode 100644 libcxx/test/std/utilities/const.wrap.class/cw_fixed.ctor.pass.cpp create mode 100644 libcxx/test/std/utilities/const.wrap.class/template.verify.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 8e26bdcd860c1..429cf2a7250fb 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -432,7 +432,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_bitset`` ``202306L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_constant_wrapper`` ``202603L`` + ``__cpp_lib_constant_wrapper`` ``202606L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_constexpr_algorithms`` ``202306L`` ---------------------------------------------------------- ----------------- diff --git a/libcxx/docs/ReleaseNotes/23.rst b/libcxx/docs/ReleaseNotes/23.rst index e7eb42207db42..8055792f697ea 100644 --- a/libcxx/docs/ReleaseNotes/23.rst +++ b/libcxx/docs/ReleaseNotes/23.rst @@ -52,6 +52,7 @@ Implemented Papers - P3383R3: ``mdspan.at()`` (`Github `__) - P3369R0: constexpr for ``uninitialized_default_construct`` (`Github `__) - P3508R0: Wording for "constexpr for specialized memory algorithms" (`Github `__) +- P4206R0: Revert string support in ``std::constant_wrapper`` (`Github `__) Improvements and New Features ----------------------------- diff --git a/libcxx/docs/Status/Cxx29Papers.csv b/libcxx/docs/Status/Cxx29Papers.csv index a5d896f260657..3c48fe89be33d 100644 --- a/libcxx/docs/Status/Cxx29Papers.csv +++ b/libcxx/docs/Status/Cxx29Papers.csv @@ -5,7 +5,7 @@ "`P3319R6 `__","Add an ``iota`` object for ``simd`` (and more)","2026-06 (Brno)","","","`#204393 `__","" "`P3798R1 `__","The unexpected in ``std::expected``","2026-06 (Brno)","","","`#204394 `__","" "`P3052R2 `__","``view_interface::at()``","2026-06 (Brno)","","","`#204395 `__","" -"`P4206R0 `__","Revert string support in ``std::constant_wrapper``","2026-06 (Brno)","","","`#203336 `__","To be applied as a Defect Report." +"`P4206R0 `__","Revert string support in ``std::constant_wrapper``","2026-06 (Brno)","|Complete|","23","`#203336 `__","Applied as a Defect Report." "`P3395R6 `__","Fix encoding issues and add a formatter for ``std::error_code``","2026-06 (Brno)","","","`#204396 `__","" "`P3505R4 `__","Fix the default floating-point representation in ``std::format``","2026-06 (Brno)","","","`#204397 `__","To be applied as a Defect Report." "`P3154R3 `__","Deprecating signed character types in iostreams","2026-06 (Brno)","","","`#204398 `__","" diff --git a/libcxx/include/__utility/constant_wrapper.h b/libcxx/include/__utility/constant_wrapper.h index 6bae95fc1878a..5e6e815f03555 100644 --- a/libcxx/include/__utility/constant_wrapper.h +++ b/libcxx/include/__utility/constant_wrapper.h @@ -14,6 +14,7 @@ #include <__functional/invoke.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constructible.h> +#include <__type_traits/is_same.h> #include <__type_traits/remove_cvref.h> #include <__utility/declval.h> #include <__utility/forward.h> @@ -27,44 +28,13 @@ _LIBCPP_BEGIN_NAMESPACE_STD #if _LIBCPP_STD_VER >= 26 -template -struct __cw_fixed_value { - using __type _LIBCPP_NODEBUG = _Tp; - _LIBCPP_HIDE_FROM_ABI constexpr __cw_fixed_value(__type __v) noexcept : __data(__v) {} - _Tp __data; -}; - -template -struct __cw_fixed_value<_Tp[_Extent]> { - using __type _LIBCPP_NODEBUG = _Tp[_Extent]; - _Tp __data[_Extent]; - - _LIBCPP_HIDE_FROM_ABI constexpr __cw_fixed_value(_Tp (&__arr)[_Extent]) noexcept - : __cw_fixed_value(__arr, make_index_sequence<_Extent>{}) {} - -private: - template - _LIBCPP_HIDE_FROM_ABI constexpr __cw_fixed_value(_Tp (&__arr)[_Extent], index_sequence<_Idxs...>) noexcept - : __data{__arr[_Idxs]...} {} -}; - -template -__cw_fixed_value(_Tp (&)[_Extent]) -> __cw_fixed_value<_Tp[_Extent]>; - -template <__cw_fixed_value _Xp, -# ifdef _LIBCPP_COMPILER_GCC - // gcc bug: https://gcc.gnu.org/PR117392 - class = typename decltype(__cw_fixed_value(_Xp))::__type -# else - class = typename decltype(_Xp)::__type -# endif - > +template > struct constant_wrapper; template concept __constexpr_param = requires { typename constant_wrapper<_Tp::value>; }; -template <__cw_fixed_value _Xp> +template constexpr auto cw = constant_wrapper<_Xp>{}; struct __cw_operators { @@ -202,7 +172,8 @@ struct __cw_operators { template <__constexpr_param _Lp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr auto operator->*(_Lp, _Rp) noexcept - -> constant_wrapper<(_Lp::value->*_Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper*_Rp::value)> { return {}; } @@ -228,52 +199,62 @@ struct __cw_operators { template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator+=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value += _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator-=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value -= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator*=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value *= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator/=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value /= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator%=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value %= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator&=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value &= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator|=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value |= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator^=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value ^= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator<<=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value <<= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } template <__constexpr_param _Tp, __constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator>>=(this _Tp, _Rp) noexcept - -> constant_wrapper<(_Tp::value >>= _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper>= _Rp::value)> { return {}; } }; @@ -285,18 +266,23 @@ concept __constexpr_callable = (__constexpr_param> && ...) template concept __constexpr_indexable = (__constexpr_param> && ...) && requires { - typename constant_wrapper<_Obj[remove_cvref_t<_Args>::value...]>; + typename constant_wrapper::value...])>; }; -template <__cw_fixed_value _Xp, class> +template struct constant_wrapper : __cw_operators { - static constexpr const auto& value = _Xp.__data; - using type = constant_wrapper; - using value_type = decltype(_Xp)::__type; + static constexpr decltype((_Xp)) value = (_Xp); + + using type = constant_wrapper; + using value_type = decltype(_Xp); + + static_assert(is_same_v<_Tp, value_type>, + "the second template parameter of std::constant_wrapper must be its value_type"); template <__constexpr_param _Rp> [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator=(_Rp) const noexcept - -> constant_wrapper<(value = _Rp::value)> { + // TODO: Remove `auto` when all support versions of Clang have https://llvm.org/PR202693 fixed. + -> constant_wrapper { return {}; } @@ -320,7 +306,7 @@ struct constant_wrapper : __cw_operators { template requires __constexpr_indexable [[nodiscard]] - _LIBCPP_HIDE_FROM_ABI static constexpr constant_wrapper::value...]> + _LIBCPP_HIDE_FROM_ABI static constexpr constant_wrapper::value...])> operator[](_Args&&...) noexcept { return {}; } diff --git a/libcxx/include/utility b/libcxx/include/utility index e0c97efc6424d..940dc4f354984 100644 --- a/libcxx/include/utility +++ b/libcxx/include/utility @@ -196,10 +196,7 @@ template constexpr const T1&& get(const pair&&) noexcept; // C++14 // [const.wrap.class], class template constant_wrapper -template - struct cw-fixed-value; // exposition only, since C++26 - -template +template struct constant_wrapper; // since C++26 template @@ -208,7 +205,7 @@ template struct cw-operators; // exposition only, since C++26 -template +template constexpr auto cw = constant_wrapper{}; // since C++26 // C++14 diff --git a/libcxx/include/version b/libcxx/include/version index 7f2dc9e4b72ab..ae02006d9d1c9 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -64,7 +64,7 @@ __cpp_lib_common_reference 202302L __cpp_lib_common_reference_wrapper 202302L __cpp_lib_complex_udls 201309L __cpp_lib_concepts 202207L -__cpp_lib_constant_wrapper 202603L +__cpp_lib_constant_wrapper 202606L __cpp_lib_constexpr_algorithms 202306L 201806L // C++20 __cpp_lib_constexpr_bitset 202207L @@ -563,7 +563,7 @@ __cpp_lib_void_t 201411L # undef __cpp_lib_bind_front # define __cpp_lib_bind_front 202306L # define __cpp_lib_bitset 202306L -# define __cpp_lib_constant_wrapper 202603L +# define __cpp_lib_constant_wrapper 202606L # undef __cpp_lib_constexpr_algorithms # define __cpp_lib_constexpr_algorithms 202306L # define __cpp_lib_constexpr_flat_map 202502L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp index 8cbd16d242f74..bff6e905e4dee 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp @@ -404,8 +404,8 @@ # ifndef __cpp_lib_constant_wrapper # error "__cpp_lib_constant_wrapper should be defined in c++26" # endif -# if __cpp_lib_constant_wrapper != 202603L -# error "__cpp_lib_constant_wrapper should have the value 202603L in c++26" +# if __cpp_lib_constant_wrapper != 202606L +# error "__cpp_lib_constant_wrapper should have the value 202606L in c++26" # endif # ifndef __cpp_lib_constexpr_algorithms diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index d9c78b73f7e23..282936a0b3f63 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -6609,8 +6609,8 @@ # ifndef __cpp_lib_constant_wrapper # error "__cpp_lib_constant_wrapper should be defined in c++26" # endif -# if __cpp_lib_constant_wrapper != 202603L -# error "__cpp_lib_constant_wrapper should have the value 202603L in c++26" +# if __cpp_lib_constant_wrapper != 202606L +# error "__cpp_lib_constant_wrapper should have the value 202606L in c++26" # endif # ifndef __cpp_lib_constexpr_algorithms diff --git a/libcxx/test/std/utilities/const.wrap.class/convert.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/convert.pass.cpp index a9567cb08b009..f7c59401dcd2a 100644 --- a/libcxx/test/std/utilities/const.wrap.class/convert.pass.cpp +++ b/libcxx/test/std/utilities/const.wrap.class/convert.pass.cpp @@ -27,11 +27,10 @@ constexpr bool test() { { // int conversion std::constant_wrapper<6> cw6; - const int& result = cw6; + int result = cw6; assert(result == 6); - assert(&result == &cw6.value); - static_assert(noexcept(static_cast(cw6))); + static_assert(noexcept(static_cast(cw6))); } { @@ -45,19 +44,6 @@ constexpr bool test() { static_assert(noexcept(static_cast(cws))); } - { - // array conversion - constexpr int arr[] = {1, 2, 3}; - std::constant_wrapper cwArr; - const int (&result)[3] = cwArr; - assert(result[0] == 1); - assert(result[1] == 2); - assert(result[2] == 3); - assert(&result == &cwArr.value); - - static_assert(noexcept(static_cast(cwArr))); - } - { // function pointer conversion constexpr int (*fptr)(int) = [](int x) constexpr { return x * 2; }; diff --git a/libcxx/test/std/utilities/const.wrap.class/ctad.compile.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/ctad.compile.pass.cpp deleted file mode 100644 index 7210d7e2152d5..0000000000000 --- a/libcxx/test/std/utilities/const.wrap.class/ctad.compile.pass.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: std-at-least-c++26 - -// constant_wrapper - -// template -// cw-fixed-value(T (&)[Extent]) -> cw-fixed-value; // exposition only - -#include -#include - -constexpr int arr[] = {1, 2, 3}; -using T1 = std::constant_wrapper; -static_assert(std::is_same_v); - -using T2 = std::constant_wrapper<"hello world">; -static_assert(std::is_same_v); - -struct S { - int value; -}; - -constexpr S s[] = {{1}, {2}, {3}}; -using T3 = std::constant_wrapper; -static_assert(std::is_same_v); diff --git a/libcxx/test/std/utilities/const.wrap.class/cw.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/cw.pass.cpp index 4f660ee378239..8d8371a0a68a1 100644 --- a/libcxx/test/std/utilities/const.wrap.class/cw.pass.cpp +++ b/libcxx/test/std/utilities/const.wrap.class/cw.pass.cpp @@ -10,12 +10,13 @@ // constant_wrapper -// template +// template // constexpr auto cw = constant_wrapper{}; #include #include #include +#include struct S { int value; @@ -41,7 +42,7 @@ constexpr bool test() { { // array constant - constexpr int arr[] = {1, 2, 3}; + constexpr static int arr[] = {1, 2, 3}; // gcc complains that cw_val is unused [[maybe_unused]] std::same_as> decltype(auto) cw_val = std::cw; static_assert(cw_val[0] == 1); @@ -49,17 +50,6 @@ constexpr bool test() { static_assert(cw_val[2] == 3); } - { - // string literals - [[maybe_unused]] std::same_as> decltype(auto) cw_val = std::cw<"hello">; - static_assert(cw_val[0] == 'h'); - static_assert(cw_val[1] == 'e'); - static_assert(cw_val[2] == 'l'); - static_assert(cw_val[3] == 'l'); - static_assert(cw_val[4] == 'o'); - static_assert(cw_val[5] == '\0'); - } - return true; } diff --git a/libcxx/test/std/utilities/const.wrap.class/cw_fixed.array.ctor.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/cw_fixed.array.ctor.pass.cpp deleted file mode 100644 index 8bccfcfaaaa43..0000000000000 --- a/libcxx/test/std/utilities/const.wrap.class/cw_fixed.array.ctor.pass.cpp +++ /dev/null @@ -1,86 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: std-at-least-c++26 - -// constant_wrapper - -// constexpr cw-fixed-value(T (&arr)[Extent]) noexcept; - -#include -#include - -template -auto helper(std::constant_wrapper) -> decltype(v); - -template -using cw_fixed_value = decltype(helper(std::constant_wrapper{})); - -struct S { - int value; - - constexpr S(int v = 0) : value(v) {} - - constexpr bool operator==(const S& other) const { return value == other.value; } -}; - -constexpr bool test() { - { - // int array construction - // the conversion from int array to cw-fixed-value uses the constructor - constexpr int arr[] = {1, 2, 3}; - std::constant_wrapper cw{}; - assert(cw.value[0] == 1); - assert(cw.value[1] == 2); - assert(cw.value[2] == 3); - } - - { - // struct array construction - constexpr S s[] = {{1}, {2}, {3}}; - std::constant_wrapper cw{}; - assert(cw.value[0] == S{1}); - assert(cw.value[1] == S{2}); - assert(cw.value[2] == S{3}); - } - - { - // calling the constructor - constexpr int arr[] = {1, 2, 3, 4, 5}; - constexpr cw_fixed_value ci(arr); - std::constant_wrapper cw; - assert(cw.value[0] == 1); - assert(cw.value[1] == 2); - assert(cw.value[2] == 3); - assert(cw.value[3] == 4); - assert(cw.value[4] == 5); - - static_assert(noexcept(cw_fixed_value{arr})); - } - - { - // the constructor is implicit - constexpr int arr[] = {1, 2, 3, 4, 5}; - constexpr cw_fixed_value ci = arr; - std::constant_wrapper cw; - assert(cw.value[0] == 1); - assert(cw.value[1] == 2); - assert(cw.value[2] == 3); - assert(cw.value[3] == 4); - assert(cw.value[4] == 5); - } - - return true; -} - -int main(int, char**) { - test(); - static_assert(test()); - - return 0; -} diff --git a/libcxx/test/std/utilities/const.wrap.class/cw_fixed.ctor.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/cw_fixed.ctor.pass.cpp deleted file mode 100644 index 30c2952f57d4c..0000000000000 --- a/libcxx/test/std/utilities/const.wrap.class/cw_fixed.ctor.pass.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// REQUIRES: std-at-least-c++26 - -// constant_wrapper - -// constexpr cw-fixed-value(type v) noexcept : data(v) {} - -#include -#include - -template -auto helper(std::constant_wrapper) -> decltype(v); - -template -using cw_fixed_value = decltype(helper(std::constant_wrapper{})); - -struct S { - int value; - - constexpr S(int v = 0) : value(v) {} - - constexpr bool operator==(const S& other) const { return value == other.value; } -}; - -constexpr bool test() { - { - // int construction - // the conversion from int to cw-fixed-value uses the constructor - std::constant_wrapper<42> cw{}; - assert(cw.value == 42); - } - - { - // struct construction - std::constant_wrapper cw{}; - assert(cw.value == S{13}); - } - - { - // calling the constructor - constexpr cw_fixed_value ci{42}; - std::constant_wrapper cw; - assert(cw == 42); - - static_assert(noexcept(cw_fixed_value{42})); - } - - { - // the constructor is implicit - constexpr cw_fixed_value ci = 42; - std::constant_wrapper cw; - assert(cw == 42); - } - - return true; -} - -int main(int, char**) { - test(); - static_assert(test()); - - return 0; -} diff --git a/libcxx/test/std/utilities/const.wrap.class/subscript.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/subscript.pass.cpp index 7efefa85fed95..bfa77a4662afc 100644 --- a/libcxx/test/std/utilities/const.wrap.class/subscript.pass.cpp +++ b/libcxx/test/std/utilities/const.wrap.class/subscript.pass.cpp @@ -171,12 +171,6 @@ constexpr bool test() { assert(result == 42); } - { - // just use the index operator - assert(std::cw<"abcd">[2] == 'c'); - assert(std::cw<"abcd">[std::cw<3>] == 'd'); - } - { // integral_constant using T = std::constant_wrapper; diff --git a/libcxx/test/std/utilities/const.wrap.class/template.verify.cpp b/libcxx/test/std/utilities/const.wrap.class/template.verify.cpp new file mode 100644 index 0000000000000..f8902212299ec --- /dev/null +++ b/libcxx/test/std/utilities/const.wrap.class/template.verify.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +#include + +// expected-error@+1 {{pointer to subobject of string literal is not allowed in a template argument}} +std::constant_wrapper<"hello"> string_literal; + +// expected-error-re@*:* {{static assertion failed{{.*}}the second template parameter of std::constant_wrapper must be its value_type}} +std::constant_wrapper<1, float> wrong_type1; // expected-note {{in instantiation of template class}} +// expected-error-re@*:* {{static assertion failed{{.*}}the second template parameter of std::constant_wrapper must be its value_type}} +std::constant_wrapper<1.0, int> wrong_type2; // expected-note {{in instantiation of template class}} +// expected-error-re@*:* {{static assertion failed{{.*}}the second template parameter of std::constant_wrapper must be its value_type}} +std::constant_wrapper<1, const int> wrong_type3; // expected-note {{in instantiation of template class}} +// expected-error-re@*:* {{static assertion failed{{.*}}the second template parameter of std::constant_wrapper must be its value_type}} +std::constant_wrapper<1, const int&> wrong_type4; // expected-note {{in instantiation of template class}} diff --git a/libcxx/test/std/utilities/const.wrap.class/types.compile.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/types.compile.pass.cpp index 96dce9e055209..a85a44efd5cfd 100644 --- a/libcxx/test/std/utilities/const.wrap.class/types.compile.pass.cpp +++ b/libcxx/test/std/utilities/const.wrap.class/types.compile.pass.cpp @@ -10,16 +10,15 @@ // constant_wrapper -// static constexpr const auto & value = X.data; +// static constexpr decltype(auto) value = (X); // using type = constant_wrapper; -// using value_type = decltype(X)::type; +// using value_type = decltype(X); -#include #include #include static_assert(std::constant_wrapper<42>::value == 42); -static_assert(std::same_as::value), const int&>); +static_assert(std::same_as::value), const int>); static_assert(std::same_as::type, std::constant_wrapper<42>>); static_assert(std::same_as::value_type, int>); @@ -32,7 +31,15 @@ static_assert(std::same_as::value), const S static_assert(std::same_as::type, std::constant_wrapper>); static_assert(std::same_as::value_type, S>); -static_assert(std::ranges::equal(std::constant_wrapper<"abcd">::value, "abcd")); -static_assert(std::same_as::value), const char (&)[5]>); -static_assert(std::same_as::type, std::constant_wrapper<"abcd">>); -static_assert(std::same_as::value_type, const char[5]>); +template +consteval bool value_ref_to_template_parameter_object() { + return &V == &std::constant_wrapper::value; +} + +static_assert(value_ref_to_template_parameter_object()); + +constexpr int arr[] = {1, 2, 3, 4, 5}; + +static_assert(std::constant_wrapper::value == arr); +static_assert(std::same_as::type, std::constant_wrapper>); +static_assert(std::same_as::value_type, const int*>); diff --git a/libcxx/test/std/utilities/const.wrap.class/unary_ops.pass.cpp b/libcxx/test/std/utilities/const.wrap.class/unary_ops.pass.cpp index 5a474265a1719..8cd27b75f64c2 100644 --- a/libcxx/test/std/utilities/const.wrap.class/unary_ops.pass.cpp +++ b/libcxx/test/std/utilities/const.wrap.class/unary_ops.pass.cpp @@ -148,7 +148,12 @@ static_assert(!HasPlus>); static_assert(!HasMinus>); static_assert(!HasBitNot>); static_assert(!HasNot>); + +// TODO: Remove this guard when Clang 21 is no longer supported. +#if defined(TEST_CLANG_VER) && TEST_CLANG_VER >= 2200 // https://llvm.org/PR151531 static_assert(HasBitAnd>); +#endif + static_assert(!HasDeref>); // The operators from constant_wrapper do not exist, but they can be implicited converted diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index d764a1f677ba6..776689b3d3935 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -337,7 +337,7 @@ def add_version_header(tc): { "name": "__cpp_lib_constant_wrapper", "values": { - "c++26": 202603, + "c++26": 202606, }, "headers": ["utility"], }, From f4d4fe72622b3f6aae63a7fe35e7fad86215c685 Mon Sep 17 00:00:00 2001 From: Xavier Roche Date: Mon, 22 Jun 2026 19:19:19 +0200 Subject: [PATCH 046/511] [libc++] std::abs support for _BitInt(N) and __int128 (#196532) `std::abs` does not accept `__int128` or signed `_BitInt(N)`: the call is ambiguous and fails to compile (#204212). This adds an explicit `abs(__int128_t)` overload and an `abs(_BitInt(N))` overload that deduces the width, so every signed `_BitInt` gets a same-type result. `_BitInt` does not integer-promote, so without this overload a narrow signed `_BitInt` would be an ambiguous call against `abs(int/long/long long)` instead of promoting the way `signed char` and `short` do. Standard narrow types are unchanged: they still go through `abs(int)`. Part of the [_BitInt(N) libc++ effort](https://discourse.llvm.org/t/bitint-n-support-in-libc-investigations-possible-improvements-looking-for-guidance/90063). Fixes #204212 Assisted-by: Claude (Anthropic) --------- Co-authored-by: Claude Opus 4.6 --- libcxx/include/__math/abs.h | 13 +++ libcxx/test/std/numerics/c.math/abs.pass.cpp | 99 ++++++++++++++++++++ 2 files changed, 112 insertions(+) diff --git a/libcxx/include/__math/abs.h b/libcxx/include/__math/abs.h index b780159f11ebf..a478d955d0b56 100644 --- a/libcxx/include/__math/abs.h +++ b/libcxx/include/__math/abs.h @@ -63,6 +63,19 @@ template return __builtin_llabs(__x); } +#if _LIBCPP_HAS_INT128 +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI inline __int128_t abs(__int128_t __x) _NOEXCEPT { return __x < 0 ? -__x : __x; } +#endif + +#if defined(__BITINT_MAXWIDTH__) +// _BitInt does not integer-promote, so without a same-type overload a narrow +// signed _BitInt would be an ambiguous call against abs(int/long/long long). +template +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _BitInt(_Np) abs(_BitInt(_Np) __x) _NOEXCEPT { + return __x < 0 ? -__x : __x; +} +#endif + } // namespace __math _LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/std/numerics/c.math/abs.pass.cpp b/libcxx/test/std/numerics/c.math/abs.pass.cpp index 51aee6e986836..9d9d2c86e4b11 100644 --- a/libcxx/test/std/numerics/c.math/abs.pass.cpp +++ b/libcxx/test/std/numerics/c.math/abs.pass.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// XFAIL: FROZEN-CXX03-HEADERS-FIXME + #include #include #include @@ -37,6 +39,65 @@ void test_big() { assert(std::abs(negative_big_value) == big_value); // make sure it doesn't get casted to a smaller type } +// std::abs has __int128/_BitInt(N) overloads as a libc++ extension. +// Narrow signed types stay on the abs(int) promotion path. +template +struct unpromoted_abs : std::is_same {}; +static_assert(!unpromoted_abs::value, ""); +static_assert(!unpromoted_abs::value, ""); + +#if TEST_HAS_BITINT +// Every signed _BitInt(N) gets a same-type abs, including widths narrower than +// int. unsigned _BitInt is excluded by the signed-only contract. +template +struct has_abs : std::false_type {}; +template +struct has_abs : std::true_type {}; +static_assert(has_abs::value, ""); +static_assert(has_abs::value, ""); +static_assert(has_abs::value, ""); +static_assert(has_abs::value, ""); +static_assert(!has_abs::value, ""); // signed-only contract +static_assert(!has_abs::value, ""); + +template +void test_signed_bitint() { + typedef signed _BitInt(N) T; + ASSERT_SAME_TYPE(decltype(std::abs(T(0))), T); + assert(std::abs(T(0)) == T(0)); + assert(std::abs(T(1)) == T(1)); + assert(std::abs(T(42)) == T(42)); + assert(std::abs(T(-1)) == T(1)); + assert(std::abs(T(-42)) == T(42)); + + // T_MIN omitted: -T_MIN is UB, same as abs(INT_MIN). + T t_max = static_cast(~static_cast(0) >> 1); + T t_min_plus1 = -t_max; + assert(std::abs(t_max) == t_max); + assert(std::abs(t_min_plus1) == t_max); +} +#endif // TEST_HAS_BITINT + +#ifndef TEST_HAS_NO_INT128 +void test_int128() { + ASSERT_SAME_TYPE(decltype(std::abs(static_cast<__int128_t>(0))), __int128_t); + assert(std::abs(static_cast<__int128_t>(0)) == 0); + assert(std::abs(static_cast<__int128_t>(42)) == 42); + assert(std::abs(static_cast<__int128_t>(-42)) == 42); + assert(std::abs(static_cast<__int128_t>(-1)) == 1); + + // INT128_MIN omitted: -__x is UB, same as abs(INT_MIN). + __int128_t big_neg = -((static_cast<__int128_t>(1) << 100)); + __int128_t big_pos = static_cast<__int128_t>(1) << 100; + assert(std::abs(big_neg) == big_pos); + + __int128_t int128_max = static_cast<__int128_t>(~static_cast<__uint128_t>(0) >> 1); + __int128_t int128_min_plus1 = -int128_max; + assert(std::abs(int128_max) == int128_max); + assert(std::abs(int128_min_plus1) == int128_max); +} +#endif // TEST_HAS_NO_INT128 + // The following is helpful to keep in mind: // 1byte == char <= short <= int <= long <= long long @@ -68,5 +129,43 @@ int main(int, char**) { test_big(); +#if TEST_HAS_BITINT + // Non-byte-aligned _BitInt(N) has uninitialized padding bits; passing such a + // value to std::abs trips a false-positive MSan report (#204217). Gate those + // widths out under MSan; byte-aligned widths still run. + test_signed_bitint<8>(); + test_signed_bitint<16>(); + test_signed_bitint<32>(); + test_signed_bitint<64>(); +# if !TEST_HAS_FEATURE(memory_sanitizer) + test_signed_bitint<7>(); + test_signed_bitint<33>(); + test_signed_bitint<63>(); + test_signed_bitint<65>(); +# endif +# if __BITINT_MAXWIDTH__ >= 128 + test_signed_bitint<128>(); +# endif +# if __BITINT_MAXWIDTH__ >= 256 +# if !TEST_HAS_FEATURE(memory_sanitizer) + test_signed_bitint<129>(); +# endif + test_signed_bitint<256>(); + + // Large value: |-2^200| == 2^200. + signed _BitInt(256) v = -(static_cast(1) << 200); + signed _BitInt(256) expected = static_cast(1) << 200; + assert(std::abs(v) == expected); +# endif +# if __BITINT_MAXWIDTH__ >= 1024 + test_signed_bitint<512>(); + test_signed_bitint<1024>(); +# endif +#endif // TEST_HAS_BITINT + +#ifndef TEST_HAS_NO_INT128 + test_int128(); +#endif + return 0; } From a018e201738b19363407da7c09760cbaf86f283f Mon Sep 17 00:00:00 2001 From: John Paul Jepko Date: Mon, 22 Jun 2026 12:30:11 -0500 Subject: [PATCH 047/511] [Clang][Sema] Add -Wstringop-overread warning for source buffer overreads (#183004) This PR adds a new `-Wstringop-overread` warning that diagnoses calls to memory functions where the specified size exceeds the size of the source buffer, increasing parity with GCC's `-Wstringop-overread`. The warning is emitted when the read size is a compile-time constant that is greater than the size of the source buffer (when known statically). This check applies to the following functions: - `memcpy`, `memmove`, `mempcpy` (and `__builtin_` / `__builtin___*_chk` variants) - `memchr` - `memcmp`, `bcmp` Some of the existing code for `-Wfortify-source` was refactored into a helper class to make its lambdas accessible to other functions. Fixes #83728 Assisted-by: claude-opus-4.6 --------- Co-authored-by: Erich Keane --- clang/docs/ReleaseNotes.rst | 3 + .../clang/Basic/DiagnosticSemaKinds.td | 4 + clang/lib/Sema/SemaChecking.cpp | 245 ++++++++++++------ clang/test/AST/ByteCode/builtin-functions.cpp | 20 +- clang/test/Analysis/bstring.c | 4 + clang/test/Analysis/malloc.c | 1 + clang/test/Analysis/pr22954.c | 2 +- clang/test/Sema/builtin-memcpy.c | 3 +- clang/test/Sema/builtin-object-size.c | 2 +- clang/test/Sema/warn-fortify-source.c | 8 +- clang/test/Sema/warn-stringop-overread.c | 175 +++++++++++++ .../asan/TestCases/Windows/issue64990.cpp | 2 +- 12 files changed, 370 insertions(+), 99 deletions(-) create mode 100644 clang/test/Sema/warn-stringop-overread.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 37428df0974f4..42c5dc16ea2e1 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -664,6 +664,9 @@ Improvements to Clang's diagnostics - Clang now rejects inline asm constraints and clobbers that contain an embedded null character, instead of silently truncating them. (#GH173900) +- Added ``-Wstringop-overread`` to warn when ``memcpy``, ``memmove``, ``memcmp``, + and related builtins read more bytes than the source buffer size (#GH83728). + - Diagnostics for the C++11 range-based for statement now report the correct iterator type in notes for invalid iterator types. diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index f7fba8df1e4d7..922c74b0857fc 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -968,6 +968,10 @@ def warn_fortify_source_size_mismatch : Warning< "'%0' size argument is too large; destination buffer has size %1," " but size argument is %2">, InGroup; +def warn_stringop_overread + : Warning<"'%0' reading %1 byte%s1 from a region of size %2">, + InGroup>; + def warn_fortify_strlen_overflow: Warning< "'%0' will always overflow; destination buffer has size %1," " but the source string has length %2 (including NUL byte)">, diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index ec4a9037f5c23..fa0ec55a63bb7 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -1147,36 +1147,23 @@ static bool ProcessFormatStringLiteral(const Expr *FormatExpr, return false; } -void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, - CallExpr *TheCall) { - if (TheCall->isValueDependent() || TheCall->isTypeDependent() || - isConstantEvaluatedContext()) - return; - - bool UseDABAttr = false; - const FunctionDecl *UseDecl = FD; - - const auto *DABAttr = FD->getAttr(); - if (DABAttr) { - UseDecl = DABAttr->getFunction(); - assert(UseDecl && "Missing FunctionDecl in DiagnoseAsBuiltin attribute!"); - UseDABAttr = true; +namespace { +/// Helper class for buffer overflow/overread checking in fortified functions. +class FortifiedBufferChecker { +public: + FortifiedBufferChecker(Sema &S, FunctionDecl *FD, CallExpr *TheCall) + : S(S), TheCall(TheCall), FD(FD), + DABAttr(FD ? FD->getAttr() : nullptr) { + const TargetInfo &TI = S.getASTContext().getTargetInfo(); + SizeTypeWidth = TI.getTypeWidth(TI.getSizeType()); } - unsigned BuiltinID = UseDecl->getBuiltinID(/*ConsiderWrappers=*/true); - - if (!BuiltinID) - return; - - const TargetInfo &TI = getASTContext().getTargetInfo(); - unsigned SizeTypeWidth = TI.getTypeWidth(TI.getSizeType()); - - auto TranslateIndex = [&](unsigned Index) -> std::optional { + std::optional TranslateIndex(unsigned Index) { // If we refer to a diagnose_as_builtin attribute, we need to change the // argument index to refer to the arguments of the called function. Unless // the index is out of bounds, which presumably means it's a variadic // function. - if (!UseDABAttr) + if (!DABAttr) return Index; unsigned DABIndices = DABAttr->argIndices_size(); unsigned NewIndex = Index < DABIndices @@ -1185,25 +1172,25 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, if (NewIndex >= TheCall->getNumArgs()) return std::nullopt; return NewIndex; - }; + } - auto ComputeExplicitObjectSizeArgument = - [&](unsigned Index) -> std::optional { + std::optional + ComputeExplicitObjectSizeArgument(unsigned Index) { std::optional IndexOptional = TranslateIndex(Index); if (!IndexOptional) return std::nullopt; unsigned NewIndex = *IndexOptional; Expr::EvalResult Result; Expr *SizeArg = TheCall->getArg(NewIndex); - if (!SizeArg->EvaluateAsInt(Result, getASTContext())) + if (!SizeArg->EvaluateAsInt(Result, S.getASTContext())) return std::nullopt; llvm::APSInt Integer = Result.Val.getInt(); - Integer.setIsUnsigned(true); + assert(Integer.isUnsigned() && + "size arg should be unsigned after implicit conversion to size_t"); return Integer; - }; + } - auto ComputeSizeArgument = - [&](unsigned Index) -> std::optional { + std::optional ComputeSizeArgument(unsigned Index) { // If the parameter has a pass_object_size attribute, then we should use its // (potentially) more strict checking mode. Otherwise, conservatively assume // type 0. @@ -1225,15 +1212,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, const Expr *ObjArg = TheCall->getArg(NewIndex); if (std::optional ObjSize = - ObjArg->tryEvaluateObjectSize(getASTContext(), BOSType)) { + ObjArg->tryEvaluateObjectSize(S.getASTContext(), BOSType)) { // Get the object size in the target's size_t width. return llvm::APSInt::getUnsigned(*ObjSize).extOrTrunc(SizeTypeWidth); } return std::nullopt; - }; + } - auto ComputeStrLenArgument = - [&](unsigned Index) -> std::optional { + std::optional ComputeStrLenArgument(unsigned Index) { std::optional IndexOptional = TranslateIndex(Index); if (!IndexOptional) return std::nullopt; @@ -1242,33 +1228,95 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, const Expr *ObjArg = TheCall->getArg(NewIndex); if (std::optional Result = - ObjArg->tryEvaluateStrLen(getASTContext())) { + ObjArg->tryEvaluateStrLen(S.getASTContext())) { // Add 1 for null byte. return llvm::APSInt::getUnsigned(*Result + 1).extOrTrunc(SizeTypeWidth); } return std::nullopt; - }; + } + + unsigned getSizeTypeWidth() const { return SizeTypeWidth; } + + unsigned getBuiltinID() const { + const FunctionDecl *UseDecl = FD; + if (DABAttr) { + UseDecl = DABAttr->getFunction(); + assert(UseDecl && "Missing FunctionDecl in DiagnoseAsBuiltin attribute!"); + } + return UseDecl->getBuiltinID(/*ConsiderWrappers=*/true); + } + + /// Return function name after stripping __builtin_ and _chk affixes. + std::string getFunctionName() const { + unsigned ID = getBuiltinID(); + if (!ID) { + // Use callee name directly if not a builtin. + const FunctionDecl *Callee = TheCall->getDirectCallee(); + assert(Callee && "expected callee"); + return Callee->getName().str(); + } + std::string Name = S.getASTContext().BuiltinInfo.getName(ID); + StringRef Ref = Name; + // Strip __builtin___*_chk or __builtin_ prefix. + if (!(Ref.consume_front("__builtin___") && Ref.consume_back("_chk"))) + Ref.consume_front("__builtin_"); + assert(!Ref.empty() && "expected non-empty function name"); + return Ref.str(); + } + + /// Check for source buffer overread in memory functions. + void checkSourceOverread(unsigned SrcArgIdx, unsigned SizeArgIdx) { + if (S.isConstantEvaluatedContext()) + return; + + const Expr *SrcArg = TheCall->getArg(SrcArgIdx); + const Expr *SizeArg = TheCall->getArg(SizeArgIdx); + if (SrcArg->isInstantiationDependent() || + SizeArg->isInstantiationDependent()) + return; + + std::optional CopyLen = + ComputeExplicitObjectSizeArgument(SizeArgIdx); + std::optional SrcBufSize = ComputeSizeArgument(SrcArgIdx); + + if (!CopyLen || !SrcBufSize) + return; + + // Warn only if copy length exceeds source buffer size. + if (llvm::APSInt::compareValues(*CopyLen, *SrcBufSize) <= 0) + return; + + S.DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, + S.PDiag(diag::warn_stringop_overread) + << getFunctionName() << CopyLen->getZExtValue() + << SrcBufSize->getZExtValue()); + } + +private: + Sema &S; + CallExpr *TheCall; + FunctionDecl *FD; + const DiagnoseAsBuiltinAttr *DABAttr; + unsigned SizeTypeWidth; +}; +} // anonymous namespace + +void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, + CallExpr *TheCall) { + if (TheCall->isInstantiationDependent() || isConstantEvaluatedContext()) + return; + + FortifiedBufferChecker Checker(*this, FD, TheCall); + + unsigned BuiltinID = Checker.getBuiltinID(); + if (!BuiltinID) + return; + + unsigned SizeTypeWidth = Checker.getSizeTypeWidth(); std::optional SourceSize; std::optional DestinationSize; unsigned DiagID = 0; - bool IsChkVariant = false; - - auto GetFunctionName = [&]() { - std::string FunctionNameStr = - getASTContext().BuiltinInfo.getName(BuiltinID); - llvm::StringRef FunctionName = FunctionNameStr; - // Skim off the details of whichever builtin was called to produce a better - // diagnostic, as it's unlikely that the user wrote the __builtin - // explicitly. - if (IsChkVariant) { - FunctionName = FunctionName.drop_front(std::strlen("__builtin___")); - FunctionName = FunctionName.drop_back(std::strlen("_chk")); - } else { - FunctionName.consume_front("__builtin_"); - } - return FunctionName.str(); - }; switch (BuiltinID) { default: @@ -1280,8 +1328,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BI__builtin_strcpy: case Builtin::BIstrcpy: { DiagID = diag::warn_fortify_strlen_overflow; - SourceSize = ComputeStrLenArgument(1); - DestinationSize = ComputeSizeArgument(0); + SourceSize = Checker.ComputeStrLenArgument(1); + DestinationSize = Checker.ComputeSizeArgument(0); break; } @@ -1289,9 +1337,8 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BI__builtin___stpcpy_chk: case Builtin::BI__builtin___strcpy_chk: { DiagID = diag::warn_fortify_strlen_overflow; - SourceSize = ComputeStrLenArgument(1); - DestinationSize = ComputeExplicitObjectSizeArgument(2); - IsChkVariant = true; + SourceSize = Checker.ComputeStrLenArgument(1); + DestinationSize = Checker.ComputeExplicitObjectSizeArgument(2); break; } @@ -1317,14 +1364,14 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, unsigned SourceSize) { DiagID = diag::warn_fortify_scanf_overflow; unsigned Index = ArgIndex + DataIndex; - std::string FunctionName = GetFunctionName(); + std::string FunctionName = Checker.getFunctionName(); DiagRuntimeBehavior(TheCall->getArg(Index)->getBeginLoc(), TheCall, PDiag(DiagID) << FunctionName << (Index + 1) << DestSize << SourceSize); }; auto ShiftedComputeSizeArgument = [&](unsigned Index) { - return ComputeSizeArgument(Index + DataIndex); + return Checker.ComputeSizeArgument(Index + DataIndex); }; ScanfDiagnosticFormatHandler H(ShiftedComputeSizeArgument, Diagnose); const char *FormatBytes = FormatStrRef.data(); @@ -1357,10 +1404,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, SourceSize = llvm::APSInt::getUnsigned(H.getSizeLowerBound()) .extOrTrunc(SizeTypeWidth); if (BuiltinID == Builtin::BI__builtin___sprintf_chk) { - DestinationSize = ComputeExplicitObjectSizeArgument(2); - IsChkVariant = true; + DestinationSize = Checker.ComputeExplicitObjectSizeArgument(2); } else { - DestinationSize = ComputeSizeArgument(0); + DestinationSize = Checker.ComputeSizeArgument(0); } break; } @@ -1378,19 +1424,24 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BI__builtin___memccpy_chk: case Builtin::BI__builtin___mempcpy_chk: { DiagID = diag::warn_builtin_chk_overflow; - SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2); + SourceSize = + Checker.ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 2); DestinationSize = - ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); - IsChkVariant = true; + Checker.ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + + if (BuiltinID == Builtin::BI__builtin___memcpy_chk || + BuiltinID == Builtin::BI__builtin___memmove_chk || + BuiltinID == Builtin::BI__builtin___mempcpy_chk) { + Checker.checkSourceOverread(/*SrcArgIdx=*/1, /*SizeArgIdx=*/2); + } break; } case Builtin::BI__builtin___snprintf_chk: case Builtin::BI__builtin___vsnprintf_chk: { DiagID = diag::warn_builtin_chk_overflow; - SourceSize = ComputeExplicitObjectSizeArgument(1); - DestinationSize = ComputeExplicitObjectSizeArgument(3); - IsChkVariant = true; + SourceSize = Checker.ComputeExplicitObjectSizeArgument(1); + DestinationSize = Checker.ComputeExplicitObjectSizeArgument(3); break; } @@ -1406,8 +1457,9 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, // size larger than the destination buffer though; this is a runtime abort // in _FORTIFY_SOURCE mode, and is quite suspicious otherwise. DiagID = diag::warn_fortify_source_size_mismatch; - SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); - DestinationSize = ComputeSizeArgument(0); + SourceSize = + Checker.ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + DestinationSize = Checker.ComputeSizeArgument(0); break; } @@ -1422,23 +1474,52 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, case Builtin::BImempcpy: case Builtin::BI__builtin_mempcpy: { DiagID = diag::warn_fortify_source_overflow; - SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); - DestinationSize = ComputeSizeArgument(0); + SourceSize = + Checker.ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + DestinationSize = Checker.ComputeSizeArgument(0); + + // Buffer overread doesn't make sense for memset/bzero. + if (BuiltinID != Builtin::BImemset && + BuiltinID != Builtin::BI__builtin_memset && + BuiltinID != Builtin::BIbzero && + BuiltinID != Builtin::BI__builtin_bzero) { + Checker.checkSourceOverread(/*SrcArgIdx=*/1, /*SizeArgIdx=*/2); + } break; } case Builtin::BIbcopy: case Builtin::BI__builtin_bcopy: { DiagID = diag::warn_fortify_source_overflow; - SourceSize = ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); - DestinationSize = ComputeSizeArgument(1); + SourceSize = + Checker.ComputeExplicitObjectSizeArgument(TheCall->getNumArgs() - 1); + DestinationSize = Checker.ComputeSizeArgument(1); + Checker.checkSourceOverread(/*SrcArgIdx=*/0, /*SizeArgIdx=*/2); break; } + + // memchr(buf, val, size) + case Builtin::BImemchr: + case Builtin::BI__builtin_memchr: { + Checker.checkSourceOverread(/*SrcArgIdx=*/0, /*SizeArgIdx=*/2); + return; + } + + // memcmp/bcmp(buf0, buf1, size) + // Two checks since each buffer is read + case Builtin::BImemcmp: + case Builtin::BI__builtin_memcmp: + case Builtin::BIbcmp: + case Builtin::BI__builtin_bcmp: { + Checker.checkSourceOverread(/*SrcArgIdx=*/0, /*SizeArgIdx=*/2); + Checker.checkSourceOverread(/*SrcArgIdx=*/1, /*SizeArgIdx=*/2); + return; + } case Builtin::BIsnprintf: case Builtin::BI__builtin_snprintf: case Builtin::BIvsnprintf: case Builtin::BI__builtin_vsnprintf: { DiagID = diag::warn_fortify_source_size_mismatch; - SourceSize = ComputeExplicitObjectSizeArgument(1); + SourceSize = Checker.ComputeExplicitObjectSizeArgument(1); const auto *FormatExpr = TheCall->getArg(2)->IgnoreParenImpCasts(); StringRef FormatStrRef; size_t StrLen; @@ -1462,12 +1543,12 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, FormatSize.toString(FormatSizeStr, /*Radix=*/10); DiagRuntimeBehavior(TheCall->getBeginLoc(), TheCall, PDiag(TruncationDiagID) - << GetFunctionName() << SpecifiedSizeStr - << FormatSizeStr); + << Checker.getFunctionName() + << SpecifiedSizeStr << FormatSizeStr); } } } - DestinationSize = ComputeSizeArgument(0); + DestinationSize = Checker.ComputeSizeArgument(0); const Expr *LenArg = TheCall->getArg(1)->IgnoreCasts(); const Expr *Dest = TheCall->getArg(0)->IgnoreCasts(); IdentifierInfo *FnInfo = FD->getIdentifier(); @@ -1479,7 +1560,7 @@ void Sema::checkFortifiedBuiltinMemoryFunction(FunctionDecl *FD, llvm::APSInt::compareValues(*SourceSize, *DestinationSize) <= 0) return; - std::string FunctionName = GetFunctionName(); + std::string FunctionName = Checker.getFunctionName(); SmallString<16> DestinationStr; SmallString<16> SourceStr; diff --git a/clang/test/AST/ByteCode/builtin-functions.cpp b/clang/test/AST/ByteCode/builtin-functions.cpp index d6990d1725072..b14ae91c753cb 100644 --- a/clang/test/AST/ByteCode/builtin-functions.cpp +++ b/clang/test/AST/ByteCode/builtin-functions.cpp @@ -1,17 +1,17 @@ -// RUN: %clang_cc1 -Wno-string-plus-int -fexperimental-new-constant-interpreter -triple x86_64 %s -verify=expected,both -// RUN: %clang_cc1 -Wno-string-plus-int -triple x86_64 %s -verify=ref,both +// RUN: %clang_cc1 -Wno-string-plus-int -Wno-stringop-overread -fexperimental-new-constant-interpreter -triple x86_64 %s -verify=expected,both +// RUN: %clang_cc1 -Wno-string-plus-int -Wno-stringop-overread -triple x86_64 %s -verify=ref,both // -// RUN: %clang_cc1 -Wno-string-plus-int -fexperimental-new-constant-interpreter -triple i686 %s -verify=expected,both -// RUN: %clang_cc1 -Wno-string-plus-int -triple i686 %s -verify=ref,both +// RUN: %clang_cc1 -Wno-string-plus-int -Wno-stringop-overread -fexperimental-new-constant-interpreter -triple i686 %s -verify=expected,both +// RUN: %clang_cc1 -Wno-string-plus-int -Wno-stringop-overread -triple i686 %s -verify=ref,both // -// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -fexperimental-new-constant-interpreter -triple x86_64 %s -verify=expected,both -// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -triple x86_64 %s -verify=ref,both +// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -fexperimental-new-constant-interpreter -triple x86_64 %s -verify=expected,both +// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -triple x86_64 %s -verify=ref,both // -// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -fexperimental-new-constant-interpreter -triple i686 %s -verify=expected,both -// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -triple i686 %s -verify=ref,both +// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -fexperimental-new-constant-interpreter -triple i686 %s -verify=expected,both +// RUN: %clang_cc1 -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -triple i686 %s -verify=ref,both // -// RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -fexperimental-new-constant-interpreter %s -verify=expected,both -// RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -verify=ref,both %s +// RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -fexperimental-new-constant-interpreter %s -verify=expected,both +// RUN: %clang_cc1 -triple avr -std=c++20 -Wno-string-plus-int -Wno-stringop-overread -verify=ref,both %s #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ #define LITTLE_END 1 diff --git a/clang/test/Analysis/bstring.c b/clang/test/Analysis/bstring.c index b337c71eb02c7..7c33b09409725 100644 --- a/clang/test/Analysis/bstring.c +++ b/clang/test/Analysis/bstring.c @@ -1,4 +1,5 @@ // RUN: %clang_analyze_cc1 -verify %s \ +// RUN: -Wno-stringop-overread \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-disable-checker=unix.cstring.UninitializedRead \ @@ -7,6 +8,7 @@ // RUN: -analyzer-config eagerly-assume=false // // RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS \ +// RUN: -Wno-stringop-overread \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-disable-checker=unix.cstring.UninitializedRead \ @@ -15,6 +17,7 @@ // RUN: -analyzer-config eagerly-assume=false // // RUN: %clang_analyze_cc1 -verify %s -DVARIANT \ +// RUN: -Wno-stringop-overread \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-disable-checker=unix.cstring.UninitializedRead \ @@ -23,6 +26,7 @@ // RUN: -analyzer-config eagerly-assume=false // // RUN: %clang_analyze_cc1 -verify %s -DUSE_BUILTINS -DVARIANT \ +// RUN: -Wno-stringop-overread \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=unix.cstring \ // RUN: -analyzer-disable-checker=unix.cstring.UninitializedRead \ diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c index 849ab3a3a0f37..efb2d52d3a321 100644 --- a/clang/test/Analysis/malloc.c +++ b/clang/test/Analysis/malloc.c @@ -1,5 +1,6 @@ // RUN: %clang_analyze_cc1 -Wno-strict-prototypes -Wno-error=implicit-int -verify %s \ // RUN: -Wno-alloc-size \ +// RUN: -Wno-stringop-overread \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.deadcode.UnreachableCode \ // RUN: -analyzer-checker=unix \ diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c index 3d1cac1972066..b3910da6c70ab 100644 --- a/clang/test/Analysis/pr22954.c +++ b/clang/test/Analysis/pr22954.c @@ -3,7 +3,7 @@ // At the moment the whole of the destination array content is invalidated. // If a.s1 region has a symbolic offset, the whole region of 'a' is invalidated. // Specific triple set to test structures of size 0. -// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu -analyzer-checker=core,unix.Malloc,debug.ExprInspection -Wno-error=int-conversion -verify -analyzer-config eagerly-assume=false %s +// RUN: %clang_analyze_cc1 -triple x86_64-pc-linux-gnu -analyzer-checker=core,unix.Malloc,debug.ExprInspection -Wno-error=int-conversion -Wno-stringop-overread -verify -analyzer-config eagerly-assume=false %s typedef __typeof(sizeof(int)) size_t; diff --git a/clang/test/Sema/builtin-memcpy.c b/clang/test/Sema/builtin-memcpy.c index 2a55e78034a02..94f71e4c42a58 100644 --- a/clang/test/Sema/builtin-memcpy.c +++ b/clang/test/Sema/builtin-memcpy.c @@ -7,7 +7,8 @@ /// Zero-sized structs should not crash. int b() { struct { } a[10]; - __builtin_memcpy(&a[2], a, 2); // c-warning {{buffer has size 0, but size argument is 2}} + __builtin_memcpy(&a[2], a, 2); // c-warning {{buffer has size 0, but size argument is 2}} \ + // c-warning {{'memcpy' reading 2 bytes from a region of size 0}} return 0; } diff --git a/clang/test/Sema/builtin-object-size.c b/clang/test/Sema/builtin-object-size.c index a763c24fd6620..8d48d3f569d91 100644 --- a/clang/test/Sema/builtin-object-size.c +++ b/clang/test/Sema/builtin-object-size.c @@ -50,7 +50,7 @@ void f5(void) { char buf[10]; memset((void *)0x100000000ULL, 0, 0x1000); - memcpy((char *)NULL + 0x10000, buf, 0x10); + memcpy((char *)NULL + 0x10000, buf, 0x10); // expected-warning {{'memcpy' reading 16 bytes from a region of size 10}} memcpy1((char *)NULL + 0x10000, buf, 0x10); // expected-error {{argument value 4 is outside the valid range [0, 3]}} } diff --git a/clang/test/Sema/warn-fortify-source.c b/clang/test/Sema/warn-fortify-source.c index d0b519a516545..55a54b46a1175 100644 --- a/clang/test/Sema/warn-fortify-source.c +++ b/clang/test/Sema/warn-fortify-source.c @@ -96,7 +96,7 @@ void call_stpcpy(void) { void call_memmove(void) { char s1[10], s2[20]; - __builtin_memmove(s2, s1, 20); + __builtin_memmove(s2, s1, 20); // expected-warning {{'memmove' reading 20 bytes from a region of size 10}} __builtin_memmove(s1, s2, 20); // expected-warning {{'memmove' will always overflow; destination buffer has size 10, but size argument is 20}} } @@ -255,11 +255,13 @@ template void call_memcpy_dep() { char bufferA[A]; char bufferB[B]; - memcpy(bufferA, bufferB, 10); // expected-warning{{'memcpy' will always overflow; destination buffer has size 9, but size argument is 10}} + memcpy(bufferA, bufferB, 10); } void call_call_memcpy() { - call_memcpy_dep<10, 9>(); + call_memcpy_dep<10, 9>(); // expected-note {{in instantiation of function template specialization 'call_memcpy_dep<10, 9>' requested here}} + // expected-warning@-5 {{'memcpy' reading 10 bytes from a region of size 9}} call_memcpy_dep<9, 10>(); // expected-note {{in instantiation of function template specialization 'call_memcpy_dep<9, 10>' requested here}} + // expected-warning@-7 {{'memcpy' will always overflow; destination buffer has size 9, but size argument is 10}} } #endif diff --git a/clang/test/Sema/warn-stringop-overread.c b/clang/test/Sema/warn-stringop-overread.c new file mode 100644 index 0000000000000..e4a6289a1df2a --- /dev/null +++ b/clang/test/Sema/warn-stringop-overread.c @@ -0,0 +1,175 @@ +// RUN: %clang_cc1 %s -verify +// RUN: %clang_cc1 %s -verify -DUSE_BUILTINS +// RUN: %clang_cc1 -xc++ %s -verify +// RUN: %clang_cc1 -xc++ %s -verify -DUSE_BUILTINS + +typedef __SIZE_TYPE__ size_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(USE_BUILTINS) +#define memcpy(x,y,z) __builtin_memcpy(x,y,z) +#define memmove(x,y,z) __builtin_memmove(x,y,z) +#define memchr(x,y,z) __builtin_memchr(x,y,z) +#define memcmp(x,y,z) __builtin_memcmp(x,y,z) +#else +void *memcpy(void *dst, const void *src, size_t c); +void *memmove(void *dst, const void *src, size_t c); +void *memchr(const void *s, int c, size_t n); +int memcmp(const void *s1, const void *s2, size_t n); +#endif + +int bcmp(const void *s1, const void *s2, size_t n); +void bcopy(const void *src, void *dst, size_t n); + +#ifdef __cplusplus +} +#endif + +void test_memcpy_overread(void) { + char dst[100]; + int src = 0; + memcpy(dst, &src, sizeof(src) + 1); // expected-warning {{'memcpy' reading 5 bytes from a region of size 4}} +} + +void test_memcpy_array_overread(void) { + int dest[10]; + int src[5] = {1, 2, 3, 4, 5}; + memcpy(dest, src, 10 * sizeof(int)); // expected-warning {{'memcpy' reading 40 bytes from a region of size 20}} +} + +void test_memcpy_struct_overread(void) { + struct S { + int x; + int y; + }; + char dst[100]; + struct S src = {1, 2}; + memcpy(dst, &src, sizeof(struct S) + 1); // expected-warning {{'memcpy' reading 9 bytes from a region of size 8}} +} + +void test_memmove_overread(void) { + char dst[100]; + char src[10]; + memmove(dst, src, 20); // expected-warning {{'memmove' reading 20 bytes from a region of size 10}} +} + +void test_memcpy_no_warning_exact_size(void) { + char dst[100]; + int src = 0; + memcpy(dst, &src, sizeof(src)); // no warning +} + +void test_memcpy_no_warning_smaller_size(void) { + char dst[100]; + int src[10]; + memcpy(dst, src, 5 * sizeof(int)); // no warning +} + +void test_memcpy_both_overflow(void) { + char dst[5]; + int src = 0; + memcpy(dst, &src, 10); // expected-warning {{'memcpy' reading 10 bytes from a region of size 4}} + // expected-warning@-1 {{'memcpy' will always overflow; destination buffer has size 5, but size argument is 10}} +} + +void test_memchr_overread(void) { + char buf[4]; + memchr(buf, 'a', 8); // expected-warning {{'memchr' reading 8 bytes from a region of size 4}} +} + +void test_memchr_no_warning(void) { + char buf[10]; + memchr(buf, 'a', 10); // no warning +} + +void test_memcmp_overread_first(void) { + char a[4], b[100]; + memcmp(a, b, 8); // expected-warning {{'memcmp' reading 8 bytes from a region of size 4}} +} + +void test_memcmp_overread_second(void) { + char a[100], b[4]; + memcmp(a, b, 8); // expected-warning {{'memcmp' reading 8 bytes from a region of size 4}} +} + +void test_memcmp_overread_both(void) { + char a[4], b[2]; + memcmp(a, b, 8); // expected-warning {{'memcmp' reading 8 bytes from a region of size 4}} \ + // expected-warning {{'memcmp' reading 8 bytes from a region of size 2}} +} + +void test_memcmp_no_warning(void) { + char a[10], b[10]; + memcmp(a, b, 10); // no warning +} + +void test_memcpy_src_offset_overread(void) { + char src[] = {1, 2, 3, 4}; + char dst[10]; + memcpy(dst, src + 2, 3); // expected-warning {{'memcpy' reading 3 bytes from a region of size 2}} +} + +void test_memcpy_src_offset_no_warning(void) { + char src[] = {1, 2, 3, 4}; + char dst[10]; + memcpy(dst, src + 2, 2); // no warning +} + +void test_bcmp_overread(void) { + char a[4], b[100]; + bcmp(a, b, 8); // expected-warning {{'bcmp' reading 8 bytes from a region of size 4}} +} + +void test_bcmp_no_warning(void) { + char a[10], b[10]; + bcmp(a, b, 10); // no warning +} + +void test_bcopy_overread(void) { + char src[4], dst[100]; + bcopy(src, dst, 8); // expected-warning {{'bcopy' reading 8 bytes from a region of size 4}} +} + +void test_bcopy_no_warning(void) { + char src[10], dst[10]; + bcopy(src, dst, 10); // no warning +} + +void test_memcpy_chk_overread(void) { + char dst[100]; + char src[4]; + __builtin___memcpy_chk(dst, src, 8, sizeof(dst)); // expected-warning {{'memcpy' reading 8 bytes from a region of size 4}} +} + +void test_memmove_chk_overread(void) { + char dst[100]; + char src[4]; + __builtin___memmove_chk(dst, src, 8, sizeof(dst)); // expected-warning {{'memmove' reading 8 bytes from a region of size 4}} +} + +#ifdef __cplusplus +template +void test_memcpy_dependent_dest() { + char dst[N]; + int src = 0; + memcpy(dst, &src, sizeof(src) + 1); // expected-warning {{'memcpy' reading 5 bytes from a region of size 4}} +} + +void call_test_memcpy_dependent_dest() { + test_memcpy_dependent_dest<100>(); // expected-note {{in instantiation}} +} + +// FIXME: We should warn here at the template definition since src and size are +// not dependent, but checkFortifiedBuiltinMemoryFunction exits when any part of +// the call is dependent (and thus uninstantiated). +template +void test_memcpy_dependent_dest_uninstantiated() { + char dst[N]; + int src = 0; + memcpy(dst, &src, sizeof(src) + 1); // missing-warning {{'memcpy' reading 5 bytes from a region of size 4}} +} + +#endif diff --git a/compiler-rt/test/asan/TestCases/Windows/issue64990.cpp b/compiler-rt/test/asan/TestCases/Windows/issue64990.cpp index 5222ec6e08191..7fccff0313c62 100644 --- a/compiler-rt/test/asan/TestCases/Windows/issue64990.cpp +++ b/compiler-rt/test/asan/TestCases/Windows/issue64990.cpp @@ -1,5 +1,5 @@ // Repro for the issue #64990: Asan with Windows EH generates __asan_xxx runtime calls without required funclet tokens -// RUN: %clang_cl_asan %Od %if MSVC %{ /Oi %} %s -EHsc %Fe%t +// RUN: %clang_cl_asan %Od %if MSVC %{ /Oi %} %s -EHsc -Wno-stringop-overread %Fe%t // RUN: not %run %t 2>&1 | FileCheck %s // UNSUPPORTED: target={{.*-windows-gnu}} From 629833a84ba49b352a326d6ba1b60cf5755d84fc Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Mon, 22 Jun 2026 18:30:43 +0100 Subject: [PATCH 048/511] [ValueTracking] Mark frexp, ALM propagates poison (#205139) --- llvm/lib/Analysis/ConstantFolding.cpp | 3 --- llvm/lib/Analysis/ValueTracking.cpp | 2 ++ .../Transforms/InstSimplify/ConstProp/active-lane-mask.ll | 4 +--- 3 files changed, 3 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index c7210b2e0c2d1..3f663bc61243d 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -4569,9 +4569,6 @@ static Constant *ConstantFoldScalableVectorCall( static std::pair ConstantFoldScalarFrexpCall(Constant *Op, Type *IntTy) { - if (isa(Op)) - return {Op, PoisonValue::get(IntTy)}; - auto *ConstFP = dyn_cast(Op); if (!ConstFP) return {}; diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 70345319ed57d..5cbf9f628867f 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8188,6 +8188,8 @@ bool llvm::intrinsicPropagatesPoison(Intrinsic::ID IID) { case Intrinsic::llrint: case Intrinsic::fshl: case Intrinsic::fshr: + case Intrinsic::frexp: + case Intrinsic::get_active_lane_mask: return true; default: return false; diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll index e9d9ac040ea1d..e272b3f0682ef 100644 --- a/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll +++ b/llvm/test/Transforms/InstSimplify/ConstProp/active-lane-mask.ll @@ -297,9 +297,7 @@ entry: define <4 x float> @poisonc(<4 x float> %a, i32 %n) { ; CHECK-LABEL: @poisonc( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[VAR27:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 poison, i32 1024) -; CHECK-NEXT: [[VAR33:%.*]] = select <4 x i1> [[VAR27]], <4 x float> [[A:%.*]], <4 x float> zeroinitializer -; CHECK-NEXT: ret <4 x float> [[VAR33]] +; CHECK-NEXT: ret <4 x float> poison ; entry: %new0 = shl i1 0, 1 From 84773feeb3de16acc191e737d5ef1a039534078f Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Mon, 22 Jun 2026 10:35:18 -0700 Subject: [PATCH 049/511] [RISCV] Add a Pass for adding %qc.access specifiers (#201938) Qualcomm's ABI has Access Relocation Markers, which are used to enable more linker relaxations. This change implements a pass which will annotate loads and stores (accesses) which are the single user of a `qc.e.li`-materialized address with these markers so they can be relaxed in the linker. This is a follow-up to #188671. --- llvm/lib/Target/RISCV/CMakeLists.txt | 1 + .../Target/RISCV/MCTargetDesc/RISCVBaseInfo.h | 1 + llvm/lib/Target/RISCV/RISCV.h | 3 + llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 3 + llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 +- llvm/lib/Target/RISCV/RISCVInstrPredicates.td | 25 + llvm/lib/Target/RISCV/RISCVQCRelaxMarking.cpp | 220 ++++++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 6 + llvm/test/CodeGen/RISCV/O3-pipeline.ll | 3 +- llvm/test/CodeGen/RISCV/qc-access-marking.ll | 637 ++++++++++++++++++ 10 files changed, 901 insertions(+), 2 deletions(-) create mode 100644 llvm/lib/Target/RISCV/RISCVQCRelaxMarking.cpp create mode 100644 llvm/test/CodeGen/RISCV/qc-access-marking.ll diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 4a1a21cc9b5cd..afab81b2cf7b0 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -61,6 +61,7 @@ add_llvm_target(RISCVCodeGen RISCVPostRAExpandPseudoInsts.cpp RISCVPromoteConstant.cpp RISCVPushPopOptimizer.cpp + RISCVQCRelaxMarking.cpp RISCVRedundantCopyElimination.cpp RISCVRegisterInfo.cpp RISCVSelectionDAGInfo.cpp diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index 99bfef4a78241..1b76f4c1c5311 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -474,6 +474,7 @@ enum { MO_TLSDESC_LOAD_LO = 14, MO_TLSDESC_ADD_LO = 15, MO_TLSDESC_CALL = 16, + MO_QC_ACCESS = 17, // Used to differentiate between target-specific "direct" flags and "bitmask" // flags. A machine operand can only have one "direct" flag, but can have diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 929a8d8f17b4f..8db90ee51c441 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -133,6 +133,9 @@ void initializeRISCVVLOptimizerPass(PassRegistry &); FunctionPass *createRISCVVMV0EliminationPass(); void initializeRISCVVMV0EliminationPass(PassRegistry &); +FunctionPass *createRISCVQCRelaxMarkingPass(); +void initializeRISCVQCRelaxMarkingPass(PassRegistry &); + void initializeRISCVAsmPrinterPass(PassRegistry &); } // namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 13b54caed4d21..a5e35977f8407 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -1083,6 +1083,9 @@ static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, case RISCVII::MO_TLSDESC_CALL: Kind = ELF::R_RISCV_TLSDESC_CALL; break; + case RISCVII::MO_QC_ACCESS: + Kind = RISCV::S_QC_ACCESS; + break; } const MCExpr *ME = MCSymbolRefExpr::create(Sym, Ctx); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index b31a3a7760d3b..3636325cead24 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -3609,7 +3609,9 @@ RISCVInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { {MO_TLSDESC_HI, "riscv-tlsdesc-hi"}, {MO_TLSDESC_LOAD_LO, "riscv-tlsdesc-load-lo"}, {MO_TLSDESC_ADD_LO, "riscv-tlsdesc-add-lo"}, - {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}}; + {MO_TLSDESC_CALL, "riscv-tlsdesc-call"}, + {MO_QC_ACCESS, "riscv-qc-access"}, + }; return ArrayRef(TargetFlags); } bool RISCVInstrInfo::isFunctionSafeToOutlineFrom( diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 60d8657d7000c..367f91bb9902f 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -217,3 +217,28 @@ def ignoresVXRM ]), MCReturnStatement>>], MCReturnStatement>>; + +// RVI Base Loads, only those using simm12(reg) for addressing +def isBaseLoad + : TIIPredicate<"isBaseLoad", + MCReturnStatement< + CheckOpcode<[ + LD, + LW, + LWU, + LH, + LHU, + LB, + LBU + ]>>>; + +// RVI Base Stores, only those using simm12(reg) for addressing +def isBaseStore + : TIIPredicate<"isBaseStore", + MCReturnStatement< + CheckOpcode<[ + SD, + SW, + SH, + SB, + ]>>>; diff --git a/llvm/lib/Target/RISCV/RISCVQCRelaxMarking.cpp b/llvm/lib/Target/RISCV/RISCVQCRelaxMarking.cpp new file mode 100644 index 0000000000000..e7166e92f7ad7 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVQCRelaxMarking.cpp @@ -0,0 +1,220 @@ +//===-- RISCVQCRelaxMarking.cpp - Mark Instructions for QC Relaxations ----===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass adds access tags to some instructions which are used by the +// assembler to emit marker relocations, which enable some code-size relaxations +// for Xqcilo/Xqcili. +// +// The pass is looking for the following sequences: +// +// $dst1 = QC_E_LI sym +// $dst2 = Load killed $dst1, 0 +// +// $dst1 = QC_E_LI sym +// Store $dst2, killed $dst1, 0 +// +// In either case, the Load/Store is modified to become a +// PseudoQCAccess, with an additional operand that represents the +// accessed symbolic address, which will become the contents of a +// `R_RISCV_QC_ACCESS_*` relocation on the emitted instruction. +// +// FIXME: The intention is this pass does not change the size of any +// instructions, but right now it has to do instruction compression as the +// CompressPat infrastructure cannot handle compressing the `%qc.access(...)` +// operand. Symbolic operands are not usually compressible, but this one is as +// we have relocations for both 32-bit and 16-bit instructions (and the +// relocation does not care about the fields of the instruction). + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-qc-relax-marking" +#define RISCV_QC_RELAX_MARKING_NAME "RISC-V QC Relaxation Marking" + +STATISTIC(NumMarked, "Number of Loads/Stores Marked"); + +namespace { + +struct RISCVQCRelaxMarking : public MachineFunctionPass { + static char ID; + + bool runOnMachineFunction(MachineFunction &) override; + + RISCVQCRelaxMarking() : MachineFunctionPass(ID) {} + + StringRef getPassName() const override { return RISCV_QC_RELAX_MARKING_NAME; } +}; + +} // end namespace + +char RISCVQCRelaxMarking::ID = 0; + +INITIALIZE_PASS(RISCVQCRelaxMarking, DEBUG_TYPE, RISCV_QC_RELAX_MARKING_NAME, + false, false) + +/// Returns an instance of the Make Compressible Optimization pass. +FunctionPass *llvm::createRISCVQCRelaxMarkingPass() { + return new RISCVQCRelaxMarking(); +} + +static bool isUImm7LSB000(const MachineOperand &MO) { + return MO.isImm() && isShiftedUInt<4, 3>(MO.getImm()); +} + +static bool isUImm2LSB0(const MachineOperand &MO) { + return MO.isImm() && isShiftedUInt<1, 1>(MO.getImm()); +} + +static bool isUImm2(const MachineOperand &MO) { + return MO.isImm() && isUInt<2>(MO.getImm()); +} + +static bool isGPRC(const MachineOperand &MO) { + return RISCV::GPRCRegClass.contains(MO.getReg()); +} + +static unsigned getQCMarkedOpcode(const MachineInstr &MI, + const RISCVSubtarget &STI) { + switch (MI.getOpcode()) { + case RISCV::LB: + // No c.lb + return RISCV::PseudoQCAccessLB; + case RISCV::LBU: + if (STI.hasStdExtZcb() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm2(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_LBU; + return RISCV::PseudoQCAccessLBU; + case RISCV::LH: + if (STI.hasStdExtZcb() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm2LSB0(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_LH; + return RISCV::PseudoQCAccessLH; + case RISCV::LHU: + if (STI.hasStdExtZcb() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm2LSB0(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_LHU; + return RISCV::PseudoQCAccessLHU; + case RISCV::LW: + if (STI.hasStdExtZca() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm7LSB000(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_LW; + return RISCV::PseudoQCAccessLW; + case RISCV::SB: + if (STI.hasStdExtZcb() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm2(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_SB; + return RISCV::PseudoQCAccessSB; + case RISCV::SH: + if (STI.hasStdExtZcb() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm2LSB0(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_SH; + return RISCV::PseudoQCAccessSH; + case RISCV::SW: + if (STI.hasStdExtZca() && isGPRC(MI.getOperand(0)) && + isGPRC(MI.getOperand(1)) && isUImm7LSB000(MI.getOperand(2))) + return RISCV::PseudoQCAccessC_SW; + return RISCV::PseudoQCAccessSW; + default: + reportFatalInternalError( + "Unhandled Opcode: No Corresponding Marked Opcode"); + } +} + +bool RISCVQCRelaxMarking::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // This is only relevant for QC.E.LI with a symbol, which we only use in the + // small code model. + if (MF.getTarget().getCodeModel() != CodeModel::Small) + return false; + + auto &STI = MF.getSubtarget(); + // We need QC.E.LI instructions to perform this optimisation, which needs + // 32-bit and Xqcili. The markers are only needed when linker relaxations are + // enabled. + if (STI.is64Bit() || !STI.hasVendorXqcili() || !STI.enableLinkerRelax()) + return false; + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + + bool Changed = false; + for (MachineBasicBlock &MBB : MF) { + for (auto MI = MBB.begin(), E = MBB.end(); MI != E; MI++) { + auto NextMI = std::next(MI); + if (NextMI == E) + break; + + // Looking for QC.E.LI followed by a load or store + if (MI->getOpcode() != RISCV::QC_E_LI || + !(RISCVInstrInfo::isBaseLoad(*NextMI) || + RISCVInstrInfo::isBaseStore(*NextMI))) + continue; + + LLVM_DEBUG(dbgs() << "Found QC_E_LI " << *MI); + LLVM_DEBUG(dbgs() << "Followed by Load/Store " << *NextMI); + + if (MI->getOperand(0).getReg() != NextMI->getOperand(1).getReg()) + continue; + if (!NextMI->getOperand(1).isKill()) + continue; + + // This is unsafe for stores where the access address is being stored. + if (RISCVInstrInfo::isBaseStore(*NextMI) && + MI->getOperand(0).getReg() == NextMI->getOperand(0).getReg()) + continue; + + MachineOperand &SymOp = MI->getOperand(1); + if (!SymOp.isSymbol() && !SymOp.isGlobal() && !SymOp.isMCSymbol() && + !SymOp.isCPI()) + continue; + + unsigned NewOpc = getQCMarkedOpcode(*NextMI, STI); + LLVM_DEBUG(dbgs() << "Load/Store " << TII->getName(NextMI->getOpcode()) + << " will become " << TII->getName(NewOpc) << "\n"); + MachineInstrBuilder MIB = + BuildMI(MBB, NextMI, NextMI->getDebugLoc(), TII->get(NewOpc)) + .add(NextMI->getOperand(0)) + .add(NextMI->getOperand(1)) + .add(NextMI->getOperand(2)) + .cloneMemRefs(*NextMI); + + if (SymOp.isSymbol()) { + MIB.addExternalSymbol(SymOp.getSymbolName(), RISCVII::MO_QC_ACCESS); + } else if (SymOp.isGlobal()) { + MIB.addGlobalAddress(SymOp.getGlobal(), SymOp.getOffset(), + RISCVII::MO_QC_ACCESS); + } else if (SymOp.isMCSymbol()) { + MachineOperand MO = MachineOperand::CreateMCSymbol( + SymOp.getMCSymbol(), RISCVII::MO_QC_ACCESS); + MO.setOffset(SymOp.getOffset()); + MIB.add(MO); + } else if (SymOp.isCPI()) { + MIB.addConstantPoolIndex(SymOp.getIndex(), SymOp.getOffset(), + RISCVII::MO_QC_ACCESS); + } else { + reportFatalInternalError("Unhandled SymOp Kind"); + } + + NextMI->removeFromParent(); + NumMarked++; + Changed |= true; + } + } + + return Changed; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index e13012a94711d..88bc7ae9c1994 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -128,6 +128,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVDeadRegisterDefinitionsPass(*PR); initializeRISCVLateBranchOptPass(*PR); initializeRISCVMakeCompressibleOptPass(*PR); + initializeRISCVQCRelaxMarkingPass(*PR); initializeRISCVGatherScatterLoweringPass(*PR); initializeRISCVCodeGenPrepareLegacyPassPass(*PR); initializeRISCVPostRAExpandPseudoPass(*PR); @@ -600,6 +601,11 @@ void RISCVPassConfig::addPreEmitPass2() { } addPass(createRISCVExpandPseudoPass()); + // Add QC Relaxation Markers as late as possible, and only for RV32 + if (TM->getOptLevel() != CodeGenOptLevel::None && + TM->getTargetTriple().isRISCV32()) + addPass(createRISCVQCRelaxMarkingPass()); + // Schedule the expansion of AMOs at the last possible moment, avoiding the // possibility for other passes to break the requirements for forward // progress in the LR/SC block. diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 149764ffedf9e..12b0895e0f158 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=riscv32 -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \ ; RUN: grep -v "Verify generated machine code" | \ -; RUN: FileCheck %s --check-prefixes=CHECK +; RUN: FileCheck %s --check-prefixes=CHECK,RV32 ; RUN: llc -mtriple=riscv64 -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \ ; RUN: grep -v "Verify generated machine code" | \ ; RUN: FileCheck %s --check-prefixes=CHECK,RV64 @@ -222,6 +222,7 @@ ; CHECK-NEXT: RISC-V Zcmp move merging pass ; CHECK-NEXT: RISC-V Zcmp Push/Pop optimization pass ; CHECK-NEXT: RISC-V pseudo instruction expansion pass +; RV32-NEXT: RISC-V QC Relaxation Marking ; CHECK-NEXT: RISC-V atomic pseudo instruction expansion pass ; CHECK-NEXT: Unpack machine instruction bundles ; CHECK-NEXT: Lazy Machine Block Frequency Analysis diff --git a/llvm/test/CodeGen/RISCV/qc-access-marking.ll b/llvm/test/CodeGen/RISCV/qc-access-marking.ll new file mode 100644 index 0000000000000..de580c23e4394 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/qc-access-marking.ll @@ -0,0 +1,637 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=riscv32 -mattr=+xqcili,+relax -code-model=small \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=NO-C +; RUN: llc -mtriple=riscv32 -mattr=+xqcili,+relax,+zca,+zcb -code-model=small \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=WITH-C + +@global = external global [8 x i8] + +define zeroext i8 @load_unsigned_byte() nounwind optsize { +; NO-C-LABEL: load_unsigned_byte: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lbu a0, 0(a0), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_unsigned_byte: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: c.lbu a0, 0(a0), %qc.access(global) +; WITH-C-NEXT: ret + %1 = load i8, ptr @global, align 1 + ret i8 %1 +} + +define zeroext i8 @load_unsigned_byte_offset() nounwind optsize { +; NO-C-LABEL: load_unsigned_byte_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+1 +; NO-C-NEXT: lbu a0, 0(a0), %qc.access(global+1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_unsigned_byte_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+1 +; WITH-C-NEXT: c.lbu a0, 0(a0), %qc.access(global+1) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 1 + %2 = load i8, ptr %1, align 1 + ret i8 %2 +} + +define zeroext i16 @load_two_unsigned_byte() nounwind optsize { +; NO-C-LABEL: load_two_unsigned_byte: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lbu a1, 1(a0) +; NO-C-NEXT: lbu a0, 0(a0) +; NO-C-NEXT: slli a1, a1, 8 +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_two_unsigned_byte: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lbu a1, 1(a0) +; WITH-C-NEXT: lbu a0, 0(a0) +; WITH-C-NEXT: slli a1, a1, 8 +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = load i16, ptr @global, align 1 + ret i16 %1 +} + +define zeroext i8 @load_unsigned_byte_twice() nounwind optsize { +; NO-C-LABEL: load_unsigned_byte_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lbu a1, 0(a0) +; NO-C-NEXT: lbu a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: zext.b a0, a0 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_unsigned_byte_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lbu a1, 0(a0) +; WITH-C-NEXT: lbu a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: zext.b a0, a0 +; WITH-C-NEXT: ret + %1 = load volatile i8, ptr @global, align 1 + %2 = load volatile i8, ptr @global, align 1 + %3 = or i8 %1, %2 + ret i8 %3 +} + +define zeroext i8 @load_unsigned_byte_twice_offset() nounwind optsize { +; NO-C-LABEL: load_unsigned_byte_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+1 +; NO-C-NEXT: lbu a1, 0(a0) +; NO-C-NEXT: lbu a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: zext.b a0, a0 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_unsigned_byte_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+1 +; WITH-C-NEXT: lbu a1, 0(a0) +; WITH-C-NEXT: lbu a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: zext.b a0, a0 +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 1 + %2 = load volatile i8, ptr %1, align 1 + %3 = load volatile i8, ptr %1, align 1 + %4 = or i8 %2, %3 + ret i8 %4 +} + + +define i16 @load_halfword() nounwind optsize { +; NO-C-LABEL: load_halfword: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lh a0, 0(a0), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_halfword: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: c.lh a0, 0(a0), %qc.access(global) +; WITH-C-NEXT: ret + %1 = load i16, ptr @global, align 2 + ret i16 %1 +} + +define i16 @load_halfword_offset() nounwind optsize { +; NO-C-LABEL: load_halfword_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+2 +; NO-C-NEXT: lh a0, 0(a0), %qc.access(global+2) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_halfword_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+2 +; WITH-C-NEXT: c.lh a0, 0(a0), %qc.access(global+2) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 2 + %2 = load i16, ptr %1, align 2 + ret i16 %2 +} + +define i32 @load_two_halfword() nounwind optsize { +; NO-C-LABEL: load_two_halfword: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lhu a1, 2(a0) +; NO-C-NEXT: lhu a0, 0(a0) +; NO-C-NEXT: slli a1, a1, 16 +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_two_halfword: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lhu a1, 2(a0) +; WITH-C-NEXT: lhu a0, 0(a0) +; WITH-C-NEXT: slli a1, a1, 16 +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = load i32, ptr @global, align 2 + ret i32 %1 +} + +define i16 @load_halfword_twice() nounwind optsize { +; NO-C-LABEL: load_halfword_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lh a1, 0(a0) +; NO-C-NEXT: lh a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_halfword_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lh a1, 0(a0) +; WITH-C-NEXT: lh a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = load volatile i16, ptr @global, align 2 + %2 = load volatile i16, ptr @global, align 2 + %3 = or i16 %1, %2 + ret i16 %3 +} + +define i16 @load_halfword_twice_offset() nounwind optsize { +; NO-C-LABEL: load_halfword_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+4 +; NO-C-NEXT: lh a1, 0(a0) +; NO-C-NEXT: lh a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_halfword_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+4 +; WITH-C-NEXT: lh a1, 0(a0) +; WITH-C-NEXT: lh a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = getelementptr i16, ptr @global, i32 2 + %2 = load volatile i16, ptr %1, align 2 + %3 = load volatile i16, ptr %1, align 2 + %4 = or i16 %2, %3 + ret i16 %4 +} + + + +define i32 @load_word() nounwind optsize { +; NO-C-LABEL: load_word: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: c.lw a0, 0(a0), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_word: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: c.lw a0, 0(a0), %qc.access(global) +; WITH-C-NEXT: ret + %1 = load i32, ptr @global, align 4 + ret i32 %1 +} + +define i32 @load_word_offset() nounwind optsize { +; NO-C-LABEL: load_word_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+4 +; NO-C-NEXT: c.lw a0, 0(a0), %qc.access(global+4) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_word_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+4 +; WITH-C-NEXT: c.lw a0, 0(a0), %qc.access(global+4) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 4 + %2 = load i32, ptr %1, align 4 + ret i32 %2 +} + +define i64 @load_two_word() nounwind optsize { +; NO-C-LABEL: load_two_word: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: lw a0, 0(a1) +; NO-C-NEXT: lw a1, 4(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_two_word: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: lw a0, 0(a1) +; WITH-C-NEXT: lw a1, 4(a1) +; WITH-C-NEXT: ret + %1 = load i64, ptr @global, align 4 + ret i64 %1 +} + +define i32 @load_word_twice() nounwind optsize { +; NO-C-LABEL: load_word_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lw a1, 0(a0) +; NO-C-NEXT: lw a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_word_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lw a1, 0(a0) +; WITH-C-NEXT: lw a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = load volatile i32, ptr @global, align 4 + %2 = load volatile i32, ptr @global, align 4 + %3 = or i32 %1, %2 + ret i32 %3 +} + +define i32 @load_word_twice_offset() nounwind optsize { +; NO-C-LABEL: load_word_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global+8 +; NO-C-NEXT: lw a1, 0(a0) +; NO-C-NEXT: lw a0, 0(a0) +; NO-C-NEXT: or a0, a0, a1 +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_word_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global+8 +; WITH-C-NEXT: lw a1, 0(a0) +; WITH-C-NEXT: lw a0, 0(a0) +; WITH-C-NEXT: or a0, a0, a1 +; WITH-C-NEXT: ret + %1 = getelementptr i32, ptr @global, i32 2 + %2 = load volatile i32, ptr %1, align 4 + %3 = load volatile i32, ptr %1, align 4 + %4 = or i32 %2, %3 + ret i32 %4 +} + +define zeroext i8 @load_unsigned_byte_optnone() nounwind optnone noinline { +; NO-C-LABEL: load_unsigned_byte_optnone: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: lbu a0, 0(a0) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: load_unsigned_byte_optnone: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: lbu a0, 0(a0) +; WITH-C-NEXT: ret + %1 = load i8, ptr @global, align 1 + ret i8 %1 +} + +define void @store_byte(i8 %a) nounwind { +; NO-C-LABEL: store_byte: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: sb a0, 0(a1), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_byte: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: c.sb a0, 0(a1), %qc.access(global) +; WITH-C-NEXT: ret + store i8 %a, ptr @global, align 1 + ret void +} + +define void @store_byte_offset(i8 %a) nounwind { +; NO-C-LABEL: store_byte_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+1 +; NO-C-NEXT: sb a0, 0(a1), %qc.access(global+1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_byte_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+1 +; WITH-C-NEXT: c.sb a0, 0(a1), %qc.access(global+1) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 1 + store i8 %a, ptr %1, align 1 + ret void +} + +define void @store_two_byte(i16 %a) nounwind { +; NO-C-LABEL: store_two_byte: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: srli a2, a0, 8 +; NO-C-NEXT: sb a0, 0(a1) +; NO-C-NEXT: sb a2, 1(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_two_byte: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: srli a2, a0, 8 +; WITH-C-NEXT: sb a0, 0(a1) +; WITH-C-NEXT: sb a2, 1(a1) +; WITH-C-NEXT: ret + store i16 %a, ptr @global, align 1 + ret void +} + +define void @store_byte_twice(i8 %a) nounwind { +; NO-C-LABEL: store_byte_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: sb a0, 0(a1) +; NO-C-NEXT: sb a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_byte_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: sb a0, 0(a1) +; WITH-C-NEXT: sb a0, 0(a1) +; WITH-C-NEXT: ret + store volatile i8 %a, ptr @global, align 1 + store volatile i8 %a, ptr @global, align 1 + ret void +} + +define void @store_byte_twice_offset(i8 %a) nounwind { +; NO-C-LABEL: store_byte_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+1 +; NO-C-NEXT: sb a0, 0(a1) +; NO-C-NEXT: sb a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_byte_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+1 +; WITH-C-NEXT: sb a0, 0(a1) +; WITH-C-NEXT: sb a0, 0(a1) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 1 + store volatile i8 %a, ptr %1, align 1 + store volatile i8 %a, ptr %1, align 1 + ret void +} + + +define void @store_halfword(i16 %a) nounwind { +; NO-C-LABEL: store_halfword: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: sh a0, 0(a1), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_halfword: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: c.sh a0, 0(a1), %qc.access(global) +; WITH-C-NEXT: ret + store i16 %a, ptr @global, align 2 + ret void +} + +define void @store_halfword_offset(i16 %a) nounwind { +; NO-C-LABEL: store_halfword_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+2 +; NO-C-NEXT: sh a0, 0(a1), %qc.access(global+2) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_halfword_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+2 +; WITH-C-NEXT: c.sh a0, 0(a1), %qc.access(global+2) +; WITH-C-NEXT: ret + %1 = getelementptr i8, ptr @global, i32 2 + store i16 %a, ptr %1, align 2 + ret void +} + +define void @store_two_halfword(i32 %a) nounwind { +; NO-C-LABEL: store_two_halfword: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: srli a2, a0, 16 +; NO-C-NEXT: sh a0, 0(a1) +; NO-C-NEXT: sh a2, 2(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_two_halfword: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: srli a2, a0, 16 +; WITH-C-NEXT: sh a0, 0(a1) +; WITH-C-NEXT: sh a2, 2(a1) +; WITH-C-NEXT: ret + store i32 %a, ptr @global, align 2 + ret void +} + +define void @store_halfword_twice(i16 %a) nounwind { +; NO-C-LABEL: store_halfword_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: sh a0, 0(a1) +; NO-C-NEXT: sh a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_halfword_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: sh a0, 0(a1) +; WITH-C-NEXT: sh a0, 0(a1) +; WITH-C-NEXT: ret + store volatile i16 %a, ptr @global, align 2 + store volatile i16 %a, ptr @global, align 2 + ret void +} + +define void @store_halfword_twice_offset(i16 %a) nounwind { +; NO-C-LABEL: store_halfword_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+4 +; NO-C-NEXT: sh a0, 0(a1) +; NO-C-NEXT: sh a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_halfword_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+4 +; WITH-C-NEXT: sh a0, 0(a1) +; WITH-C-NEXT: sh a0, 0(a1) +; WITH-C-NEXT: ret + %1 = getelementptr i16, ptr @global, i32 2 + store volatile i16 %a, ptr %1, align 2 + store volatile i16 %a, ptr %1, align 2 + ret void +} + + +define void @store_word(i32 %a) nounwind { +; NO-C-LABEL: store_word: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: c.sw a0, 0(a1), %qc.access(global) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: c.sw a0, 0(a1), %qc.access(global) +; WITH-C-NEXT: ret + store i32 %a, ptr @global, align 4 + ret void +} + +define void @store_word_offset(i32 %a) nounwind { +; NO-C-LABEL: store_word_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+4 +; NO-C-NEXT: c.sw a0, 0(a1), %qc.access(global+4) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+4 +; WITH-C-NEXT: c.sw a0, 0(a1), %qc.access(global+4) +; WITH-C-NEXT: ret + %1 = getelementptr i32, ptr @global, i32 1 + store i32 %a, ptr %1, align 4 + ret void +} + +define void @store_two_word(i64 %a) nounwind { +; NO-C-LABEL: store_two_word: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a2, global +; NO-C-NEXT: sw a0, 0(a2) +; NO-C-NEXT: sw a1, 4(a2) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_two_word: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a2, global +; WITH-C-NEXT: sw a0, 0(a2) +; WITH-C-NEXT: sw a1, 4(a2) +; WITH-C-NEXT: ret + store i64 %a, ptr @global, align 4 + ret void +} + +define void @store_word_twice(i32 %a) nounwind { +; NO-C-LABEL: store_word_twice: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global +; NO-C-NEXT: sw a0, 0(a1) +; NO-C-NEXT: sw a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word_twice: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global +; WITH-C-NEXT: sw a0, 0(a1) +; WITH-C-NEXT: sw a0, 0(a1) +; WITH-C-NEXT: ret + store volatile i32 %a, ptr @global, align 4 + store volatile i32 %a, ptr @global, align 4 + ret void +} + +define void @store_word_twice_offset(i32 %a) nounwind { +; NO-C-LABEL: store_word_twice_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a1, global+8 +; NO-C-NEXT: sw a0, 0(a1) +; NO-C-NEXT: sw a0, 0(a1) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word_twice_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a1, global+8 +; WITH-C-NEXT: sw a0, 0(a1) +; WITH-C-NEXT: sw a0, 0(a1) +; WITH-C-NEXT: ret + %1 = getelementptr i32, ptr @global, i32 2 + store volatile i32 %a, ptr %1, align 4 + store volatile i32 %a, ptr %1, align 4 + ret void +} + +define void @store_word_self() nounwind { +; NO-C-LABEL: store_word_self: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: sw a0, 0(a0) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word_self: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: sw a0, 0(a0) +; WITH-C-NEXT: ret + store ptr @global, ptr @global, align 4 + ret void +} + +define void @store_word_self_offset() nounwind { +; NO-C-LABEL: store_word_self_offset: +; NO-C: # %bb.0: +; NO-C-NEXT: qc.e.li a0, global +; NO-C-NEXT: addi a1, a0, 4 +; NO-C-NEXT: sw a1, 4(a0) +; NO-C-NEXT: ret +; +; WITH-C-LABEL: store_word_self_offset: +; WITH-C: # %bb.0: +; WITH-C-NEXT: qc.e.li a0, global +; WITH-C-NEXT: addi a1, a0, 4 +; WITH-C-NEXT: sw a1, 4(a0) +; WITH-C-NEXT: ret + %1 = getelementptr i32, ptr @global, i32 1 + store ptr %1, ptr %1, align 4 + ret void +} From 2452a646f4f00a81a46f5db202574979bfcf3506 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Mon, 22 Jun 2026 20:40:07 +0300 Subject: [PATCH 050/511] [libc++][random] Applied `[[nodiscard]]` to `` (#204970) Towards: #172124 - https://libcxx.llvm.org/CodingGuidelines.html#apply-nodiscard-where-relevant - https://wg21.link/rand Co-authored-by: Hristo Hristov --- .../include/__random/bernoulli_distribution.h | 14 +- .../include/__random/binomial_distribution.h | 18 +- libcxx/include/__random/cauchy_distribution.h | 18 +- .../__random/chi_squared_distribution.h | 14 +- .../include/__random/discard_block_engine.h | 10 +- .../include/__random/discrete_distribution.h | 14 +- .../__random/exponential_distribution.h | 14 +- .../__random/extreme_value_distribution.h | 18 +- .../include/__random/fisher_f_distribution.h | 18 +- libcxx/include/__random/gamma_distribution.h | 18 +- libcxx/include/__random/generate_canonical.h | 2 +- .../include/__random/geometric_distribution.h | 14 +- .../__random/independent_bits_engine.h | 10 +- .../__random/linear_congruential_engine.h | 8 +- .../include/__random/lognormal_distribution.h | 18 +- .../__random/mersenne_twister_engine.h | 8 +- .../__random/negative_binomial_distribution.h | 18 +- libcxx/include/__random/normal_distribution.h | 18 +- .../piecewise_constant_distribution.h | 18 +- .../__random/piecewise_linear_distribution.h | 18 +- .../include/__random/poisson_distribution.h | 14 +- libcxx/include/__random/random_device.h | 8 +- libcxx/include/__random/seed_seq.h | 4 +- .../include/__random/shuffle_order_engine.h | 10 +- .../include/__random/student_t_distribution.h | 14 +- .../__random/subtract_with_carry_engine.h | 8 +- .../__random/uniform_int_distribution.h | 18 +- .../__random/uniform_real_distribution.h | 18 +- .../include/__random/weibull_distribution.h | 18 +- .../libcxx/numerics/rand/nodiscard.verify.cpp | 607 ++++++++++++++++++ .../bad_engine.verify.cpp | 4 +- .../rand.dist.bern.bin/bad_engine.verify.cpp | 4 +- .../rand.dist.bern.geo/bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../rand.dist.norm.f/bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../rand.dist.norm.t/bad_engine.verify.cpp | 4 +- .../rand.dist.pois.exp/bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../bad_engine.verify.cpp | 4 +- .../rand.dist.uni.int/bad_engine.verify.cpp | 4 +- .../rand.dist.uni.real/bad_engine.verify.cpp | 4 +- 50 files changed, 847 insertions(+), 240 deletions(-) create mode 100644 libcxx/test/libcxx/numerics/rand/nodiscard.verify.cpp diff --git a/libcxx/include/__random/bernoulli_distribution.h b/libcxx/include/__random/bernoulli_distribution.h index 8d1bfad9e7d29..54ca0ee0f7504 100644 --- a/libcxx/include/__random/bernoulli_distribution.h +++ b/libcxx/include/__random/bernoulli_distribution.h @@ -36,7 +36,7 @@ class bernoulli_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(double __p = 0.5) : __p_(__p) {} - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__p_ == __y.__p_; @@ -60,20 +60,20 @@ class bernoulli_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return false; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return true; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return false; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return true; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const bernoulli_distribution& __x, const bernoulli_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/binomial_distribution.h b/libcxx/include/__random/binomial_distribution.h index 76996abacb3ac..e4fe92143741a 100644 --- a/libcxx/include/__random/binomial_distribution.h +++ b/libcxx/include/__random/binomial_distribution.h @@ -45,8 +45,8 @@ class binomial_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __t = 1, double __p = 0.5); - _LIBCPP_HIDE_FROM_ABI result_type t() const { return __t_; } - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type t() const { return __t_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__t_ == __y.__t_ && __x.__p_ == __y.__p_; @@ -74,21 +74,21 @@ class binomial_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type t() const { return __p_.t(); } - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type t() const { return __p_.t(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return t(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return t(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const binomial_distribution& __x, const binomial_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/cauchy_distribution.h b/libcxx/include/__random/cauchy_distribution.h index 0ad887dd8d645..65e83095c5445 100644 --- a/libcxx/include/__random/cauchy_distribution.h +++ b/libcxx/include/__random/cauchy_distribution.h @@ -43,8 +43,8 @@ class cauchy_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __a = 0, result_type __b = 1) : __a_(__a), __b_(__b) {} - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_; @@ -70,21 +70,21 @@ class cauchy_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const cauchy_distribution& __x, const cauchy_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/chi_squared_distribution.h b/libcxx/include/__random/chi_squared_distribution.h index 4ce2c17c354c5..8088dfe9d65bd 100644 --- a/libcxx/include/__random/chi_squared_distribution.h +++ b/libcxx/include/__random/chi_squared_distribution.h @@ -41,7 +41,7 @@ class chi_squared_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __n = 1) : __n_(__n) {} - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__n_ == __y.__n_; @@ -65,22 +65,22 @@ class chi_squared_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { return gamma_distribution(__p.n() / 2, 2)(__g); } // property functions - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const chi_squared_distribution& __x, const chi_squared_distribution& __y) { diff --git a/libcxx/include/__random/discard_block_engine.h b/libcxx/include/__random/discard_block_engine.h index a6e1997fc1c2d..ba9dae56806d9 100644 --- a/libcxx/include/__random/discard_block_engine.h +++ b/libcxx/include/__random/discard_block_engine.h @@ -54,8 +54,8 @@ class discard_block_engine { static constexpr result_type _Max = _Engine::max(); #endif - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Engine::min(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Engine::max(); } // constructors and seeding functions _LIBCPP_HIDE_FROM_ABI discard_block_engine() : __n_(0) {} @@ -83,14 +83,14 @@ class discard_block_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()(); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(); _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } // property functions - _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } template friend bool diff --git a/libcxx/include/__random/discrete_distribution.h b/libcxx/include/__random/discrete_distribution.h index 528de4245ef51..1ade6fd8bf1af 100644 --- a/libcxx/include/__random/discrete_distribution.h +++ b/libcxx/include/__random/discrete_distribution.h @@ -52,7 +52,7 @@ class discrete_distribution { template _LIBCPP_HIDE_FROM_ABI param_type(size_t __nw, double __xmin, double __xmax, _UnaryOperation __fw); - _LIBCPP_HIDE_FROM_ABI vector probabilities() const; + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector probabilities() const; friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__p_ == __y.__p_; @@ -92,20 +92,20 @@ class discrete_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI vector probabilities() const { return __p_.probabilities(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector probabilities() const { return __p_.probabilities(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__p_.size(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__p_.size(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const discrete_distribution& __x, const discrete_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/exponential_distribution.h b/libcxx/include/__random/exponential_distribution.h index a5ce6ce332f9f..17f566ec6a2f8 100644 --- a/libcxx/include/__random/exponential_distribution.h +++ b/libcxx/include/__random/exponential_distribution.h @@ -43,7 +43,7 @@ class exponential_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __lambda = 1) : __lambda_(__lambda) {} - _LIBCPP_HIDE_FROM_ABI result_type lambda() const { return __lambda_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type lambda() const { return __lambda_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__lambda_ == __y.__lambda_; @@ -67,20 +67,20 @@ class exponential_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type lambda() const { return __p_.lambda(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type lambda() const { return __p_.lambda(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const exponential_distribution& __x, const exponential_distribution& __y) { diff --git a/libcxx/include/__random/extreme_value_distribution.h b/libcxx/include/__random/extreme_value_distribution.h index 26d4bc67c2b60..e59079b7d6c22 100644 --- a/libcxx/include/__random/extreme_value_distribution.h +++ b/libcxx/include/__random/extreme_value_distribution.h @@ -43,8 +43,8 @@ class extreme_value_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __a = 0, result_type __b = 1) : __a_(__a), __b_(__b) {} - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_; @@ -70,21 +70,21 @@ class extreme_value_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const extreme_value_distribution& __x, const extreme_value_distribution& __y) { diff --git a/libcxx/include/__random/fisher_f_distribution.h b/libcxx/include/__random/fisher_f_distribution.h index 84362fd0b4414..f36817dea5030 100644 --- a/libcxx/include/__random/fisher_f_distribution.h +++ b/libcxx/include/__random/fisher_f_distribution.h @@ -42,8 +42,8 @@ class fisher_f_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __m = 1, result_type __n = 1) : __m_(__m), __n_(__n) {} - _LIBCPP_HIDE_FROM_ABI result_type m() const { return __m_; } - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type m() const { return __m_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__m_ == __y.__m_ && __x.__n_ == __y.__n_; @@ -69,21 +69,21 @@ class fisher_f_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type m() const { return __p_.m(); } - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type m() const { return __p_.m(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const fisher_f_distribution& __x, const fisher_f_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/gamma_distribution.h b/libcxx/include/__random/gamma_distribution.h index 10a6cb8a8eb2c..aa418fd66702e 100644 --- a/libcxx/include/__random/gamma_distribution.h +++ b/libcxx/include/__random/gamma_distribution.h @@ -45,8 +45,8 @@ class gamma_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __alpha = 1, result_type __beta = 1) : __alpha_(__alpha), __beta_(__beta) {} - _LIBCPP_HIDE_FROM_ABI result_type alpha() const { return __alpha_; } - _LIBCPP_HIDE_FROM_ABI result_type beta() const { return __beta_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type alpha() const { return __alpha_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type beta() const { return __beta_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__alpha_ == __y.__alpha_ && __x.__beta_ == __y.__beta_; @@ -72,21 +72,21 @@ class gamma_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type alpha() const { return __p_.alpha(); } - _LIBCPP_HIDE_FROM_ABI result_type beta() const { return __p_.beta(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type alpha() const { return __p_.alpha(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type beta() const { return __p_.beta(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const gamma_distribution& __x, const gamma_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/generate_canonical.h b/libcxx/include/__random/generate_canonical.h index 738de1517e286..091ff92dfa144 100644 --- a/libcxx/include/__random/generate_canonical.h +++ b/libcxx/include/__random/generate_canonical.h @@ -27,7 +27,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD // generate_canonical template -_LIBCPP_HIDE_FROM_ABI _RealType generate_canonical(_URNG& __g) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _RealType generate_canonical(_URNG& __g) { const size_t __dt = numeric_limits<_RealType>::digits; const size_t __b = __dt < __bits ? __dt : __bits; #ifdef _LIBCPP_CXX03_LANG diff --git a/libcxx/include/__random/geometric_distribution.h b/libcxx/include/__random/geometric_distribution.h index c04ec2acb5607..bb50e34592717 100644 --- a/libcxx/include/__random/geometric_distribution.h +++ b/libcxx/include/__random/geometric_distribution.h @@ -40,7 +40,7 @@ class geometric_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(double __p = 0.5) : __p_(__p) {} - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__p_ == __y.__p_; @@ -64,22 +64,22 @@ class geometric_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { return negative_binomial_distribution(1, __p.p())(__g); } // property functions - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const geometric_distribution& __x, const geometric_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/independent_bits_engine.h b/libcxx/include/__random/independent_bits_engine.h index c8faa9a5bfa46..51f61edd9988e 100644 --- a/libcxx/include/__random/independent_bits_engine.h +++ b/libcxx/include/__random/independent_bits_engine.h @@ -82,8 +82,8 @@ class independent_bits_engine { static_assert(_Min < _Max, "independent_bits_engine invalid parameters"); // engine characteristics - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } // constructors and seeding functions _LIBCPP_HIDE_FROM_ABI independent_bits_engine() {} @@ -105,7 +105,7 @@ class independent_bits_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()() { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()() { if _LIBCPP_CONSTEXPR (_Rp != 0) { result_type __sp = 0; for (size_t __k = 0; __k < __n0; ++__k) { @@ -130,11 +130,11 @@ class independent_bits_engine { _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } // property functions - _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } template friend bool operator==(const independent_bits_engine<_Eng, _Wp, _UInt>& __x, diff --git a/libcxx/include/__random/linear_congruential_engine.h b/libcxx/include/__random/linear_congruential_engine.h index 0c0bc42a5cbfa..03beea93050e4 100644 --- a/libcxx/include/__random/linear_congruential_engine.h +++ b/libcxx/include/__random/linear_congruential_engine.h @@ -254,8 +254,8 @@ class linear_congruential_engine { static inline _LIBCPP_CONSTEXPR const result_type multiplier = __a; static inline _LIBCPP_CONSTEXPR const result_type increment = __c; static inline _LIBCPP_CONSTEXPR const result_type modulus = __m; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } static inline _LIBCPP_CONSTEXPR const result_type default_seed = 1u; // constructors and seeding functions @@ -296,12 +296,12 @@ class linear_congruential_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()() { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()() { return __x_ = static_cast(__lce_ta<__a, __c, __m, _Mp>::next(__x_)); } _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } friend _LIBCPP_HIDE_FROM_ABI bool diff --git a/libcxx/include/__random/lognormal_distribution.h b/libcxx/include/__random/lognormal_distribution.h index e6ca5b4612a2d..a94918a6a6a44 100644 --- a/libcxx/include/__random/lognormal_distribution.h +++ b/libcxx/include/__random/lognormal_distribution.h @@ -43,8 +43,8 @@ class lognormal_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __m = 0, result_type __s = 1) : __m_(__m), __s_(__s) {} - _LIBCPP_HIDE_FROM_ABI result_type m() const { return __m_; } - _LIBCPP_HIDE_FROM_ABI result_type s() const { return __s_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type m() const { return __m_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type s() const { return __s_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__m_ == __y.__m_ && __x.__s_ == __y.__s_; @@ -68,28 +68,28 @@ class lognormal_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return std::exp(__nd_(__g)); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { typename normal_distribution::param_type __pn(__p.m(), __p.s()); return std::exp(__nd_(__g, __pn)); } // property functions - _LIBCPP_HIDE_FROM_ABI result_type m() const { return __nd_.mean(); } - _LIBCPP_HIDE_FROM_ABI result_type s() const { return __nd_.stddev(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type m() const { return __nd_.mean(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type s() const { return __nd_.stddev(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return param_type(__nd_.mean(), __nd_.stddev()); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return param_type(__nd_.mean(), __nd_.stddev()); } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { typename normal_distribution::param_type __pn(__p.m(), __p.s()); __nd_.param(__pn); } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const lognormal_distribution& __x, const lognormal_distribution& __y) { return __x.__nd_ == __y.__nd_; diff --git a/libcxx/include/__random/mersenne_twister_engine.h b/libcxx/include/__random/mersenne_twister_engine.h index 7e26e9987ed92..ce82b3387dbd2 100644 --- a/libcxx/include/__random/mersenne_twister_engine.h +++ b/libcxx/include/__random/mersenne_twister_engine.h @@ -161,8 +161,8 @@ class mersenne_twister_engine { static inline _LIBCPP_CONSTEXPR const result_type tempering_c = __c; static inline _LIBCPP_CONSTEXPR const size_t tempering_l = __l; static inline _LIBCPP_CONSTEXPR const result_type initialization_multiplier = __f; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } static inline _LIBCPP_CONSTEXPR const result_type default_seed = 5489u; // constructors and seeding functions @@ -206,7 +206,7 @@ class mersenne_twister_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()() { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()() { const size_t __j = (__i_ + 1) % __n; const result_type __mask = __r == _Dt ? result_type(~0) : (result_type(1) << __r) - result_type(1); const result_type __yp = (__x_[__i_] & ~__mask) | (__x_[__j] & __mask); @@ -221,7 +221,7 @@ class mersenne_twister_engine { _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type k() const { return __p_.k(); } - _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type k() const { return __p_.k(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double p() const { return __p_.p(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const negative_binomial_distribution& __x, const negative_binomial_distribution& __y) { diff --git a/libcxx/include/__random/normal_distribution.h b/libcxx/include/__random/normal_distribution.h index a735d8307a3e0..4ebfd750a5f8a 100644 --- a/libcxx/include/__random/normal_distribution.h +++ b/libcxx/include/__random/normal_distribution.h @@ -44,8 +44,8 @@ class normal_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __mean = 0, result_type __stddev = 1) : __mean_(__mean), __stddev_(__stddev) {} - _LIBCPP_HIDE_FROM_ABI result_type mean() const { return __mean_; } - _LIBCPP_HIDE_FROM_ABI result_type stddev() const { return __stddev_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type mean() const { return __mean_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type stddev() const { return __stddev_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__mean_ == __y.__mean_ && __x.__stddev_ == __y.__stddev_; @@ -73,21 +73,21 @@ class normal_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type mean() const { return __p_.mean(); } - _LIBCPP_HIDE_FROM_ABI result_type stddev() const { return __p_.stddev(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type mean() const { return __p_.mean(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type stddev() const { return __p_.stddev(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const normal_distribution& __x, const normal_distribution& __y) { return __x.__p_ == __y.__p_ && __x.__v_hot_ == __y.__v_hot_ && (!__x.__v_hot_ || __x.__v_ == __y.__v_); diff --git a/libcxx/include/__random/piecewise_constant_distribution.h b/libcxx/include/__random/piecewise_constant_distribution.h index 3faf339325f74..54e327857c783 100644 --- a/libcxx/include/__random/piecewise_constant_distribution.h +++ b/libcxx/include/__random/piecewise_constant_distribution.h @@ -59,8 +59,8 @@ class piecewise_constant_distribution { _LIBCPP_HIDE_FROM_ABI param_type(param_type const&) = default; _LIBCPP_HIDE_FROM_ABI param_type& operator=(const param_type& __rhs); - _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __b_; } - _LIBCPP_HIDE_FROM_ABI vector densities() const { return __densities_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector densities() const { return __densities_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_; @@ -109,21 +109,21 @@ class piecewise_constant_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __p_.intervals(); } - _LIBCPP_HIDE_FROM_ABI vector densities() const { return __p_.densities(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __p_.intervals(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector densities() const { return __p_.densities(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return __p_.__b_.front(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__b_.back(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return __p_.__b_.front(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__b_.back(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const piecewise_constant_distribution& __x, const piecewise_constant_distribution& __y) { diff --git a/libcxx/include/__random/piecewise_linear_distribution.h b/libcxx/include/__random/piecewise_linear_distribution.h index 8aa3f19ca9004..ddf6416704395 100644 --- a/libcxx/include/__random/piecewise_linear_distribution.h +++ b/libcxx/include/__random/piecewise_linear_distribution.h @@ -60,8 +60,8 @@ class piecewise_linear_distribution { _LIBCPP_HIDE_FROM_ABI param_type(param_type const&) = default; _LIBCPP_HIDE_FROM_ABI param_type& operator=(const param_type& __rhs); - _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __b_; } - _LIBCPP_HIDE_FROM_ABI vector densities() const { return __densities_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector densities() const { return __densities_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__densities_ == __y.__densities_ && __x.__b_ == __y.__b_; @@ -110,21 +110,21 @@ class piecewise_linear_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __p_.intervals(); } - _LIBCPP_HIDE_FROM_ABI vector densities() const { return __p_.densities(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector intervals() const { return __p_.intervals(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI vector densities() const { return __p_.densities(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return __p_.__b_.front(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__b_.back(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return __p_.__b_.front(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return __p_.__b_.back(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const piecewise_linear_distribution& __x, const piecewise_linear_distribution& __y) { diff --git a/libcxx/include/__random/poisson_distribution.h b/libcxx/include/__random/poisson_distribution.h index 85398c751b453..118f148e9b6da 100644 --- a/libcxx/include/__random/poisson_distribution.h +++ b/libcxx/include/__random/poisson_distribution.h @@ -53,7 +53,7 @@ class poisson_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(double __mean = 1.0); - _LIBCPP_HIDE_FROM_ABI double mean() const { return __mean_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double mean() const { return __mean_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__mean_ == __y.__mean_; @@ -79,20 +79,20 @@ class poisson_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI double mean() const { return __p_.mean(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI double mean() const { return __p_.mean(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::max(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const poisson_distribution& __x, const poisson_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/random_device.h b/libcxx/include/__random/random_device.h index 17ca2fc7499d0..a8bb1e68e72f0 100644 --- a/libcxx/include/__random/random_device.h +++ b/libcxx/include/__random/random_device.h @@ -51,8 +51,8 @@ class _LIBCPP_EXPORTED_FROM_ABI random_device { static _LIBCPP_CONSTEXPR const result_type _Min = 0; static _LIBCPP_CONSTEXPR const result_type _Max = 0xFFFFFFFFu; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } // constructors # ifndef _LIBCPP_CXX03_LANG @@ -64,10 +64,10 @@ class _LIBCPP_EXPORTED_FROM_ABI random_device { ~random_device(); // generating functions - result_type operator()(); + [[__nodiscard__]] result_type operator()(); // property functions - double entropy() const _NOEXCEPT; + [[__nodiscard__]] double entropy() const _NOEXCEPT; random_device(const random_device&) = delete; void operator=(const random_device&) = delete; diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h index b1ccc8329b8f9..f5748b7193048 100644 --- a/libcxx/include/__random/seed_seq.h +++ b/libcxx/include/__random/seed_seq.h @@ -56,7 +56,7 @@ class seed_seq { _LIBCPP_HIDE_FROM_ABI void generate(_RandomAccessIterator __first, _RandomAccessIterator __last); // property functions - _LIBCPP_HIDE_FROM_ABI size_t size() const _NOEXCEPT { return __v_.size(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t size() const _NOEXCEPT { return __v_.size(); } template _LIBCPP_HIDE_FROM_ABI void param(_OutputIterator __dest) const { std::copy(__v_.begin(), __v_.end(), __dest); @@ -65,7 +65,7 @@ class seed_seq { seed_seq(const seed_seq&) = delete; void operator=(const seed_seq&) = delete; - _LIBCPP_HIDE_FROM_ABI static result_type _Tp(result_type __x) { return __x ^ (__x >> 27); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static result_type _Tp(result_type __x) { return __x ^ (__x >> 27); } private: template diff --git a/libcxx/include/__random/shuffle_order_engine.h b/libcxx/include/__random/shuffle_order_engine.h index 72fd27042e724..3d8c323d096b5 100644 --- a/libcxx/include/__random/shuffle_order_engine.h +++ b/libcxx/include/__random/shuffle_order_engine.h @@ -76,8 +76,8 @@ class shuffle_order_engine { static _LIBCPP_CONSTEXPR const result_type _Max = _Engine::max(); #endif static_assert(_Min < _Max, "shuffle_order_engine invalid parameters"); - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } static _LIBCPP_CONSTEXPR const unsigned long long _Rp = _Max - _Min + 1ull; @@ -109,7 +109,7 @@ class shuffle_order_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()() { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()() { if _LIBCPP_CONSTEXPR (_Rp != 0 || !(__k & 1)) { using _Ratio = __uratio<__k, _Rp != 0 ? _Rp : 0x8000000000000000ull>; if _LIBCPP_CONSTEXPR (_Ratio::num > 0xFFFFFFFFFFFFFFFFull / (_Max - _Min)) { @@ -126,11 +126,11 @@ class shuffle_order_engine { _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } // property functions - _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI const _Engine& base() const _NOEXCEPT { return __e_; } private: template diff --git a/libcxx/include/__random/student_t_distribution.h b/libcxx/include/__random/student_t_distribution.h index dc199506862b8..16579962ce925 100644 --- a/libcxx/include/__random/student_t_distribution.h +++ b/libcxx/include/__random/student_t_distribution.h @@ -43,7 +43,7 @@ class student_t_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __n = 1) : __n_(__n) {} - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __n_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__n_ == __y.__n_; @@ -68,20 +68,20 @@ class student_t_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type n() const { return __p_.n(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return -numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const student_t_distribution& __x, const student_t_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/include/__random/subtract_with_carry_engine.h b/libcxx/include/__random/subtract_with_carry_engine.h index 755c7d9346eb5..4f9b31a074ee0 100644 --- a/libcxx/include/__random/subtract_with_carry_engine.h +++ b/libcxx/include/__random/subtract_with_carry_engine.h @@ -75,8 +75,8 @@ class subtract_with_carry_engine { static inline _LIBCPP_CONSTEXPR const size_t word_size = __w; static inline _LIBCPP_CONSTEXPR const size_t short_lag = __s; static inline _LIBCPP_CONSTEXPR const size_t long_lag = __r; - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } - _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type min() { return _Min; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI static _LIBCPP_CONSTEXPR result_type max() { return _Max; } static inline _LIBCPP_CONSTEXPR const result_type default_seed = 19780503u; // constructors and seeding functions @@ -122,10 +122,10 @@ class subtract_with_carry_engine { } // generating functions - _LIBCPP_HIDE_FROM_ABI result_type operator()(); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(); _LIBCPP_HIDE_FROM_ABI void discard(unsigned long long __z) { for (; __z; --__z) - operator()(); + (void)operator()(); } template diff --git a/libcxx/include/__random/uniform_int_distribution.h b/libcxx/include/__random/uniform_int_distribution.h index fa2c33755b739..22b172094d571 100644 --- a/libcxx/include/__random/uniform_int_distribution.h +++ b/libcxx/include/__random/uniform_int_distribution.h @@ -151,8 +151,8 @@ class uniform_int_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __a = 0, result_type __b = numeric_limits::max()) : __a_(__a), __b_(__b) {} - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } _LIBCPP_HIDE_FROM_ABI friend bool operator==(const param_type& __x, const param_type& __y) { return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_; @@ -179,21 +179,21 @@ class uniform_int_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return a(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return b(); } _LIBCPP_HIDE_FROM_ABI friend bool operator==(const uniform_int_distribution& __x, const uniform_int_distribution& __y) { diff --git a/libcxx/include/__random/uniform_real_distribution.h b/libcxx/include/__random/uniform_real_distribution.h index 9407827b2a5d5..f648bc0b8e076 100644 --- a/libcxx/include/__random/uniform_real_distribution.h +++ b/libcxx/include/__random/uniform_real_distribution.h @@ -42,8 +42,8 @@ class uniform_real_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __a = 0, result_type __b = 1) : __a_(__a), __b_(__b) {} - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_; @@ -69,21 +69,21 @@ class uniform_real_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p); // property functions - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return a(); } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return b(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const uniform_real_distribution& __x, const uniform_real_distribution& __y) { diff --git a/libcxx/include/__random/weibull_distribution.h b/libcxx/include/__random/weibull_distribution.h index 1d65b09ca663f..333699cac1be4 100644 --- a/libcxx/include/__random/weibull_distribution.h +++ b/libcxx/include/__random/weibull_distribution.h @@ -43,8 +43,8 @@ class weibull_distribution { _LIBCPP_HIDE_FROM_ABI explicit param_type(result_type __a = 1, result_type __b = 1) : __a_(__a), __b_(__b) {} - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __a_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __b_; } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const param_type& __x, const param_type& __y) { return __x.__a_ == __y.__a_ && __x.__b_ == __y.__b_; @@ -70,23 +70,23 @@ class weibull_distribution { // generating functions template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g) { return (*this)(__g, __p_); } template - _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type operator()(_URNG& __g, const param_type& __p) { return __p.b() * std::pow(exponential_distribution()(__g), 1 / __p.a()); } // property functions - _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } - _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type a() const { return __p_.a(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type b() const { return __p_.b(); } - _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI param_type param() const { return __p_; } _LIBCPP_HIDE_FROM_ABI void param(const param_type& __p) { __p_ = __p; } - _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } - _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type min() const { return 0; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI result_type max() const { return numeric_limits::infinity(); } friend _LIBCPP_HIDE_FROM_ABI bool operator==(const weibull_distribution& __x, const weibull_distribution& __y) { return __x.__p_ == __y.__p_; diff --git a/libcxx/test/libcxx/numerics/rand/nodiscard.verify.cpp b/libcxx/test/libcxx/numerics/rand/nodiscard.verify.cpp new file mode 100644 index 0000000000000..27bc199212684 --- /dev/null +++ b/libcxx/test/libcxx/numerics/rand/nodiscard.verify.cpp @@ -0,0 +1,607 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// Check that functions are marked [[nodiscard]] + +#include + +#include "test_macros.h" + +void test() { + std::mt19937_64 gen; + + { + std::bernoulli_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.p(); + + std::bernoulli_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.p(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::binomial_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.t(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.p(); + + std::binomial_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.p(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::cauchy_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.b(); + + std::cauchy_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.b(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::chi_squared_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.n(); + + std::chi_squared_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.n(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::discard_block_engine e; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.base(); + } + { + std::discrete_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.probabilities(); + + std::discrete_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.probabilities(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::exponential_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.lambda(); + + std::exponential_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.lambda(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::extreme_value_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.b(); + + std::extreme_value_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.b(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::fisher_f_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.m(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.n(); + + std::fisher_f_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.m(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.n(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::gamma_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.alpha(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.beta(); + + std::gamma_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.alpha(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.beta(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::generate_canonical(gen); + } + { + std::geometric_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.p(); + + std::geometric_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.p(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::independent_bits_engine e; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.base(); + } + { + std::linear_congruential_engine e(94); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + } + { + std::lognormal_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.m(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.s(); + + std::lognormal_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.m(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.s(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::mt19937_64 e; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + } + { + std::negative_binomial_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.k(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.p(); + + std::negative_binomial_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.k(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.p(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::normal_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.mean(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.stddev(); + + std::normal_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.mean(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.stddev(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::piecewise_constant_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.intervals(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.densities(); + + std::piecewise_constant_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.intervals(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.densities(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::piecewise_linear_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.intervals(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.densities(); + + std::piecewise_linear_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.intervals(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.densities(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::poisson_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.mean(); + + std::poisson_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.mean(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } +#if !defined(TEST_HAS_NO_RANDOM_DEVICE) + { + std::random_device d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.entropy(); + } +#endif + { + std::seed_seq s; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + s.size(); + } + { + std::shuffle_order_engine e; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.base(); + } + { + std::student_t_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.n(); + + std::student_t_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.n(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::subtract_with_carry_engine e; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + e.max(); + } + { + std::uniform_int_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.b(); + + std::uniform_int_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.b(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::uniform_real_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.b(); + + std::uniform_real_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.b(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } + { + std::weibull_distribution::param_type p; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + p.b(); + + std::weibull_distribution d; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d(gen, p); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.a(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.b(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.param(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.min(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + d.max(); + } +} diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/bad_engine.verify.cpp index 9855d8ae6f866..118f9aae74e08 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bernoulli/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::bernoulli_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/bad_engine.verify.cpp index 1b1bb4428f80d..3ff9292180ad7 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.bin/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::binomial_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/bad_engine.verify.cpp index d9b692ec94727..b8118932abf72 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.geo/bad_engine.verify.cpp @@ -26,6 +26,6 @@ void test(std::geometric_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 7 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 7 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/bad_engine.verify.cpp index e526168d6d4ec..d4e7729d07beb 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.bern/rand.dist.bern.negbin/bad_engine.verify.cpp @@ -26,6 +26,6 @@ void test(std::negative_binomial_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 7 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 7 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/bad_engine.verify.cpp index 59e5b50a862dc..e5d135fcc5a63 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.cauchy/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::cauchy_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/bad_engine.verify.cpp index b0265148debf8..5c804043ab309 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.chisq/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::chi_squared_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 3 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 3 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/bad_engine.verify.cpp index 77c97fb3351ad..61f7610f472d7 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.f/bad_engine.verify.cpp @@ -26,6 +26,6 @@ void test(std::fisher_f_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 4 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 4 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/bad_engine.verify.cpp index 80c34580bc40d..5ecc20c4e6bd4 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.lognormal/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::lognormal_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/bad_engine.verify.cpp index cc697ab5e7ae2..bcfa9e818965f 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.normal/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::normal_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/bad_engine.verify.cpp index 151a2bd954c8b..8a4a6b7a6cab4 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.norm/rand.dist.norm.t/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::student_t_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 5 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 5 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/bad_engine.verify.cpp index b1241214de76f..b177b76905bc0 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.exp/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::exponential_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/bad_engine.verify.cpp index c002716a5316a..d083a2e010c9b 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.extreme/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::extreme_value_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/bad_engine.verify.cpp index 8bf87dd8615c6..be1fad1e07677 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.gamma/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::gamma_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 3 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 3 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/bad_engine.verify.cpp index ef56d9635fc49..c4a55f033263a 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.poisson/bad_engine.verify.cpp @@ -26,6 +26,6 @@ void test(std::poisson_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 4 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 4 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/bad_engine.verify.cpp index e40c1972abeb4..156af700cb349 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.pois/rand.dist.pois.weibull/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::weibull_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/bad_engine.verify.cpp index a4def062f83f1..36ed7cd770bac 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.discrete/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::discrete_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/bad_engine.verify.cpp index 002071129d03e..309d71a61f918 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.pconst/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::piecewise_constant_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/bad_engine.verify.cpp index 0fb54a403ac9e..736acb2b3707d 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.samp/rand.dist.samp.plinear/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::piecewise_linear_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* 2 {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/bad_engine.verify.cpp index 2432843942e29..3b5cf901cef27 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.int/bad_engine.verify.cpp @@ -26,6 +26,6 @@ void test(std::uniform_int_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } diff --git a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/bad_engine.verify.cpp b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/bad_engine.verify.cpp index fa5c3a3ebb484..66b51a724899d 100644 --- a/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/bad_engine.verify.cpp +++ b/libcxx/test/libcxx/numerics/rand/rand.dist/rand.dist.uni/rand.dist.uni.real/bad_engine.verify.cpp @@ -25,6 +25,6 @@ void test(std::uniform_real_distribution dist) G badg; G okg; - dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} - dist(okg); + (void)dist(badg); //expected-error@*:* {{static assertion failed}} //expected-note {{in instantiation}} + (void)dist(okg); } From 9ea4135b05a9748313ebfcca0474e874df0094f5 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Mon, 22 Jun 2026 12:47:58 -0500 Subject: [PATCH 051/511] [flang][OpenMP] Use early exit to unindent some code, NFC (#205150) --- flang/lib/Semantics/check-omp-structure.cpp | 92 +++++++++++---------- 1 file changed, 47 insertions(+), 45 deletions(-) diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index e643ceb25ce34..d87b2f1983de6 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -4317,54 +4317,56 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) { return false; }}; - if (OmpVerifyModifiers( + if (!OmpVerifyModifiers( x.v, llvm::omp::OMPC_if, GetContext().clauseSource, context_)) { - auto &modifiers{OmpGetModifiers(x.v)}; - if (auto *dnm{OmpGetUniqueModifier( - modifiers)}) { - llvm::omp::Directive sub{dnm->v}; - std::string subName{parser::omp::GetUpperName(sub, version)}; - std::string dirName{parser::omp::GetUpperName(dir, version)}; + return; + } - parser::CharBlock modifierSource{OmpGetModifierSource(modifiers, dnm)}; - auto desc{OmpGetDescriptor()}; - std::string modName{desc.name.str()}; + auto &modifiers{OmpGetModifiers(x.v)}; + if (auto *dnm{ + OmpGetUniqueModifier(modifiers)}) { + llvm::omp::Directive sub{dnm->v}; + std::string subName{parser::omp::GetUpperName(sub, version)}; + std::string dirName{parser::omp::GetUpperName(dir, version)}; - if (!isConstituent(dir, sub)) { - context_ - .Say(modifierSource, - "%s is not a constituent of the %s directive"_err_en_US, - subName, dirName) - .Attach(GetContext().directiveSource, - "Cannot apply to directive"_en_US); - } else { - static llvm::omp::Directive valid45[]{ - llvm::omp::OMPD_cancel, // - llvm::omp::OMPD_parallel, // - /* OMP 5.0+ also allows OMPD_simd */ - llvm::omp::OMPD_target, // - llvm::omp::OMPD_target_data, // - llvm::omp::OMPD_target_enter_data, // - llvm::omp::OMPD_target_exit_data, // - llvm::omp::OMPD_target_update, // - llvm::omp::OMPD_task, // - llvm::omp::OMPD_taskloop, // - /* OMP 5.2+ also allows OMPD_teams */ - }; - if (version < 50 && sub == llvm::omp::OMPD_simd) { - context_.Say(modifierSource, - "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, - modName, ThisVersion(version), TryVersion(50)); - } else if (version < 52 && sub == llvm::omp::OMPD_teams) { - context_.Say(modifierSource, - "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, - modName, ThisVersion(version), TryVersion(52)); - } else if (!llvm::is_contained(valid45, sub) && - sub != llvm::omp::OMPD_simd && sub != llvm::omp::OMPD_teams) { - context_.Say(modifierSource, - "%s is not allowed as '%s' in %s"_err_en_US, subName, modName, - ThisVersion(version)); - } + parser::CharBlock modifierSource{OmpGetModifierSource(modifiers, dnm)}; + auto desc{OmpGetDescriptor()}; + std::string modName{desc.name.str()}; + + if (!isConstituent(dir, sub)) { + context_ + .Say(modifierSource, + "%s is not a constituent of the %s directive"_err_en_US, subName, + dirName) + .Attach( + GetContext().directiveSource, "Cannot apply to directive"_en_US); + } else { + static llvm::omp::Directive valid45[]{ + llvm::omp::OMPD_cancel, // + llvm::omp::OMPD_parallel, // + /* OMP 5.0+ also allows OMPD_simd */ + llvm::omp::OMPD_target, // + llvm::omp::OMPD_target_data, // + llvm::omp::OMPD_target_enter_data, // + llvm::omp::OMPD_target_exit_data, // + llvm::omp::OMPD_target_update, // + llvm::omp::OMPD_task, // + llvm::omp::OMPD_taskloop, // + /* OMP 5.2+ also allows OMPD_teams */ + }; + if (version < 50 && sub == llvm::omp::OMPD_simd) { + context_.Say(modifierSource, + "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName, + ThisVersion(version), TryVersion(50)); + } else if (version < 52 && sub == llvm::omp::OMPD_teams) { + context_.Say(modifierSource, + "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName, + ThisVersion(version), TryVersion(52)); + } else if (!llvm::is_contained(valid45, sub) && + sub != llvm::omp::OMPD_simd && sub != llvm::omp::OMPD_teams) { + context_.Say(modifierSource, + "%s is not allowed as '%s' in %s"_err_en_US, subName, modName, + ThisVersion(version)); } } } From 38c31182ab01eb74ccd4c7be7152e60b876fa80e Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 22 Jun 2026 10:48:02 -0700 Subject: [PATCH 052/511] [MC] emitCodeAlignment: take MCSubtargetInfo by reference. NFC (#205140) The fragment member cannot be null, and the sibling streamer hooks (emitInstruction, initSections, emitPrefAlign) already take it by reference. --- bolt/lib/Core/BinaryEmitter.cpp | 10 +++++----- llvm/include/llvm/MC/MCObjectStreamer.h | 2 +- llvm/include/llvm/MC/MCStreamer.h | 2 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 6 +++--- llvm/lib/MC/MCAsmStreamer.cpp | 4 ++-- llvm/lib/MC/MCELFStreamer.cpp | 2 +- llvm/lib/MC/MCObjectStreamer.cpp | 4 ++-- llvm/lib/MC/MCParser/AsmParser.cpp | 4 ++-- llvm/lib/MC/MCParser/MasmParser.cpp | 2 +- llvm/lib/MC/MCStreamer.cpp | 2 +- llvm/lib/MC/MCWinCOFFStreamer.cpp | 6 +++--- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 2 +- llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 2 +- llvm/lib/Target/ARM/ARMMCInstLower.cpp | 2 +- llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 8 ++++---- .../Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp | 2 +- llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp | 2 +- llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp | 4 ++-- llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp | 2 +- .../Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp | 2 +- llvm/lib/Target/Mips/MipsAsmPrinter.cpp | 2 +- .../lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp | 2 +- .../Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp | 2 +- llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 2 +- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 4 ++-- llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 2 +- .../SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp | 2 +- .../SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.h | 2 +- llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +- llvm/lib/Target/X86/X86MCInstLower.cpp | 10 +++++----- 30 files changed, 50 insertions(+), 50 deletions(-) diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 6affddafd3bf5..43b42703c86d7 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -319,14 +319,14 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, // tentative layout. Section->ensureMinAlignment(Align(opts::AlignFunctions)); - Streamer.emitCodeAlignment(Function.getMinAlign(), &*BC.STI); + Streamer.emitCodeAlignment(Function.getMinAlign(), *BC.STI); uint16_t MaxAlignBytes = FF.isSplitFragment() ? Function.getMaxColdAlignmentBytes() : Function.getMaxAlignmentBytes(); if (MaxAlignBytes > 0) - Streamer.emitCodeAlignment(Function.getAlign(), &*BC.STI, MaxAlignBytes); + Streamer.emitCodeAlignment(Function.getAlign(), *BC.STI, MaxAlignBytes); } else { - Streamer.emitCodeAlignment(Function.getAlign(), &*BC.STI); + Streamer.emitCodeAlignment(Function.getAlign(), *BC.STI); } if (size_t Padding = opts::padFunctionBefore(Function)) { @@ -459,7 +459,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, for (BinaryBasicBlock *const BB : FF) { if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) && BB->getAlignment() > 1) - Streamer.emitCodeAlignment(BB->getAlign(), &*BC.STI, + Streamer.emitCodeAlignment(BB->getAlign(), *BC.STI, BB->getAlignmentMaxBytes()); Streamer.emitLabel(BB->getLabel()); if (!EmitCodeOnly) { @@ -537,7 +537,7 @@ void BinaryEmitter::emitConstantIslands(BinaryFunction &BF, bool EmitColdPart, const uint16_t Alignment = OnBehalfOf ? OnBehalfOf->getConstantIslandAlignment() : BF.getConstantIslandAlignment(); - Streamer.emitCodeAlignment(Align(Alignment), &*BC.STI); + Streamer.emitCodeAlignment(Align(Alignment), *BC.STI); if (!OnBehalfOf) { if (!EmitColdPart) diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h index cb2694b231d5b..003f6106d15ba 100644 --- a/llvm/include/llvm/MC/MCObjectStreamer.h +++ b/llvm/include/llvm/MC/MCObjectStreamer.h @@ -137,7 +137,7 @@ class LLVM_ABI MCObjectStreamer : public MCStreamer { void emitValueToAlignment(Align Alignment, int64_t Fill = 0, uint8_t FillLen = 1, unsigned MaxBytesToEmit = 0) override; - void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo *STI, + void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo &STI, unsigned MaxBytesToEmit = 0) override; void emitPrefAlign(Align Alignment, const MCSymbol &End, bool EmitNops, uint8_t Fill, const MCSubtargetInfo &STI) override; diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h index 3b25cc5440c52..f1479f860e885 100644 --- a/llvm/include/llvm/MC/MCStreamer.h +++ b/llvm/include/llvm/MC/MCStreamer.h @@ -853,7 +853,7 @@ class LLVM_ABI MCStreamer { /// \param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If /// the alignment cannot be reached in this many bytes, no bytes are /// emitted. - virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, + virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo &STI, unsigned MaxBytesToEmit = 0); virtual void emitPrefAlign(Align A, const MCSymbol &End, bool EmitNops, diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index cdb9d760606f6..d93e06f0300ed 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2754,12 +2754,12 @@ void AsmPrinter::emitGlobalIFunc(Module &M, const GlobalIFunc &GI) { MCSymbol *Stub = getSymbol(&GI); EmitLinkage(Stub); - OutStreamer->emitCodeAlignment(TextAlign, getIFuncMCSubtargetInfo()); + OutStreamer->emitCodeAlignment(TextAlign, *getIFuncMCSubtargetInfo()); OutStreamer->emitLabel(Stub); emitVisibility(Stub, GI.getVisibility()); emitMachOIFuncStubBody(M, GI, LazyPointer); - OutStreamer->emitCodeAlignment(TextAlign, getIFuncMCSubtargetInfo()); + OutStreamer->emitCodeAlignment(TextAlign, *getIFuncMCSubtargetInfo()); OutStreamer->emitLabel(StubHelper); emitVisibility(StubHelper, GI.getVisibility()); emitMachOIFuncStubHelperBody(M, GI, LazyPointer); @@ -3833,7 +3833,7 @@ Align AsmPrinter::emitAlignment(Align Alignment, const GlobalObject *GV, STI = &getSubtargetInfo(); else STI = &TM.getMCSubtargetInfo(); - OutStreamer->emitCodeAlignment(Alignment, STI, MaxBytesToEmit); + OutStreamer->emitCodeAlignment(Alignment, *STI, MaxBytesToEmit); } else OutStreamer->emitValueToAlignment(Alignment, 0, 1, MaxBytesToEmit); return Alignment; diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp index 0344d1c0ead03..a518daa997945 100644 --- a/llvm/lib/MC/MCAsmStreamer.cpp +++ b/llvm/lib/MC/MCAsmStreamer.cpp @@ -284,7 +284,7 @@ class MCAsmStreamer final : public MCAsmBaseStreamer { uint8_t FillLen = 1, unsigned MaxBytesToEmit = 0) override; - void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, + void emitCodeAlignment(Align Alignment, const MCSubtargetInfo &STI, unsigned MaxBytesToEmit = 0) override; void emitPrefAlign(Align Alignment, const MCSymbol &End, bool EmitNops, uint8_t Fill, const MCSubtargetInfo &STI) override; @@ -1573,7 +1573,7 @@ void MCAsmStreamer::emitValueToAlignment(Align Alignment, int64_t Fill, } void MCAsmStreamer::emitCodeAlignment(Align Alignment, - const MCSubtargetInfo *STI, + const MCSubtargetInfo &STI, unsigned MaxBytesToEmit) { // Emit with a text fill value. if (MAI->getTextAlignFillValue()) diff --git a/llvm/lib/MC/MCELFStreamer.cpp b/llvm/lib/MC/MCELFStreamer.cpp index a7e450de4f1bc..a18b27b2dc132 100644 --- a/llvm/lib/MC/MCELFStreamer.cpp +++ b/llvm/lib/MC/MCELFStreamer.cpp @@ -54,7 +54,7 @@ void MCELFStreamer::initSections(const MCSubtargetInfo &STI) { MCContext &Ctx = getContext(); switchSection(Ctx.getObjectFileInfo()->getTextSection()); emitCodeAlignment(Align(Ctx.getObjectFileInfo()->getTextSectionAlignment()), - &STI); + STI); } void MCELFStreamer::emitLabel(MCSymbol *S, SMLoc Loc) { diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp index 81410b37069fb..d89e5df706b21 100644 --- a/llvm/lib/MC/MCObjectStreamer.cpp +++ b/llvm/lib/MC/MCObjectStreamer.cpp @@ -683,12 +683,12 @@ void MCObjectStreamer::emitValueToAlignment(Align Alignment, int64_t Fill, } void MCObjectStreamer::emitCodeAlignment(Align Alignment, - const MCSubtargetInfo *STI, + const MCSubtargetInfo &STI, unsigned MaxBytesToEmit) { auto *F = getCurrentFragment(); emitValueToAlignment(Alignment, 0, 1, MaxBytesToEmit); F->u.align.EmitNops = true; - F->STI = STI; + F->STI = &STI; } void MCObjectStreamer::emitPrefAlign(Align Alignment, const MCSymbol &End, diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp index c4cc4d870ccf6..e0ad48f6e1932 100644 --- a/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/llvm/lib/MC/MCParser/AsmParser.cpp @@ -3480,8 +3480,8 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, uint8_t ValueSize) { // Check whether we should use optimal code alignment for this .align // directive. if (MAI.useCodeAlign(*Section) && !HasFillExpr) { - getStreamer().emitCodeAlignment( - Align(Alignment), &getTargetParser().getSTI(), MaxBytesToFill); + getStreamer().emitCodeAlignment(Align(Alignment), + getTargetParser().getSTI(), MaxBytesToFill); } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. getStreamer().emitValueToAlignment(Align(Alignment), FillExpr, ValueSize, diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp index 14443a63dda63..0a4ab8a552dad 100644 --- a/llvm/lib/MC/MCParser/MasmParser.cpp +++ b/llvm/lib/MC/MCParser/MasmParser.cpp @@ -4227,7 +4227,7 @@ bool MasmParser::emitAlignTo(int64_t Alignment) { const MCSection *Section = getStreamer().getCurrentSectionOnly(); if (MAI.useCodeAlign(*Section)) { getStreamer().emitCodeAlignment(Align(Alignment), - &getTargetParser().getSTI(), + getTargetParser().getSTI(), /*MaxBytesToEmit=*/0); } else { // FIXME: Target specific behavior about how the "extra" bytes are filled. diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp index 1f0d915fae8ab..1cd94715b2592 100644 --- a/llvm/lib/MC/MCStreamer.cpp +++ b/llvm/lib/MC/MCStreamer.cpp @@ -1523,7 +1523,7 @@ void MCStreamer::emitFill(const MCExpr &NumValues, int64_t Size, int64_t Expr, void MCStreamer::emitValueToAlignment(Align, int64_t, uint8_t, unsigned) {} void MCStreamer::emitPrefAlign(Align A, const MCSymbol &End, bool EmitNops, uint8_t Fill, const MCSubtargetInfo &STI) {} -void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, +void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo &STI, unsigned MaxBytesToEmit) {} void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value, SMLoc Loc) {} diff --git a/llvm/lib/MC/MCWinCOFFStreamer.cpp b/llvm/lib/MC/MCWinCOFFStreamer.cpp index 7b4faad7ad88f..467b8e634f03e 100644 --- a/llvm/lib/MC/MCWinCOFFStreamer.cpp +++ b/llvm/lib/MC/MCWinCOFFStreamer.cpp @@ -138,13 +138,13 @@ void MCWinCOFFStreamer::initSections(const MCSubtargetInfo &STI) { // This emulates the same behavior of GNU as. This makes it easier // to compare the output as the major sections are in the same order. switchSection(getContext().getObjectFileInfo()->getTextSection()); - emitCodeAlignment(Align(4), &STI); + emitCodeAlignment(Align(4), STI); switchSection(getContext().getObjectFileInfo()->getDataSection()); - emitCodeAlignment(Align(4), &STI); + emitCodeAlignment(Align(4), STI); switchSection(getContext().getObjectFileInfo()->getBSSSection()); - emitCodeAlignment(Align(4), &STI); + emitCodeAlignment(Align(4), STI); switchSection(getContext().getObjectFileInfo()->getTextSection()); } diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index b16c0460adf38..156c12c5ab8dd 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -510,7 +510,7 @@ void AArch64AsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) { // ;DATA: higher 32 bits of the address of the trampoline // LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack // - OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(4), getSubtargetInfo()); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 8b8d0bc9ada11..c25a2cfeff8d6 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -2403,7 +2403,7 @@ void ARMAsmPrinter::emitInstruction(const MachineInstr *MI) { // FIXME: Ideally we could vary the LDRB index based on the padding // between the sequence and jump table, however that relies on MCExprs // for load indexes which are currently not supported. - OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(4), getSubtargetInfo()); EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr) .addReg(Idx) .addReg(Idx) diff --git a/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/llvm/lib/Target/ARM/ARMMCInstLower.cpp index 79ee0f22f3c1e..6f948f4cb33ad 100644 --- a/llvm/lib/Target/ARM/ARMMCInstLower.cpp +++ b/llvm/lib/Target/ARM/ARMMCInstLower.cpp @@ -213,7 +213,7 @@ void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) // BLX ip // POP{ r0, lr } // - OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(4), getSubtargetInfo()); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a4ac7f61713e0..1cac96a744733 100644 --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -11650,7 +11650,7 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) { SwitchMode(); getTargetStreamer().emitCode16(); - getParser().getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); + getParser().getStreamer().emitCodeAlignment(Align(2), getSTI(), 0); return false; } @@ -11663,7 +11663,7 @@ bool ARMAsmParser::parseDirectiveARM(SMLoc L) { if (isThumb()) SwitchMode(); getTargetStreamer().emitCode32(); - getParser().getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0); + getParser().getStreamer().emitCodeAlignment(Align(4), getSTI(), 0); return false; } @@ -12320,7 +12320,7 @@ bool ARMAsmParser::parseDirectiveEven(SMLoc L) { assert(Section && "must have section to emit alignment"); if (getContext().getAsmInfo().useCodeAlign(*Section)) - getStreamer().emitCodeAlignment(Align(2), &getSTI()); + getStreamer().emitCodeAlignment(Align(2), getSTI()); else getStreamer().emitValueToAlignment(Align(2)); @@ -12518,7 +12518,7 @@ bool ARMAsmParser::parseDirectiveAlign(SMLoc L) { const MCSection *Section = getStreamer().getCurrentSectionOnly(); assert(Section && "must have section to emit alignment"); if (getContext().getAsmInfo().useCodeAlign(*Section)) - getStreamer().emitCodeAlignment(Align(4), &getSTI(), 0); + getStreamer().emitCodeAlignment(Align(4), getSTI(), 0); else getStreamer().emitValueToAlignment(Align(4), 0, 1, 0); return false; diff --git a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp index c2a389515b856..d787f251087ee 100644 --- a/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp +++ b/llvm/lib/Target/CSKY/MCTargetDesc/CSKYTargetStreamer.cpp @@ -31,7 +31,7 @@ void CSKYConstantPool::emitAll(MCStreamer &Streamer) { for (const ConstantPoolEntry &Entry : Entries) { Streamer.emitCodeAlignment( Align(Entry.Size), - Streamer.getContext().getSubtargetInfo()); // align naturally + *Streamer.getContext().getSubtargetInfo()); // align naturally Streamer.emitLabel(Entry.Label); Streamer.emitValue(Entry.Value, Entry.Size, Entry.Loc); } diff --git a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index dd27d05e2a368..a0927d5e58a18 100644 --- a/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -1559,7 +1559,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, MES->switchSection(mySection); unsigned byteSize = is32bit ? 4 : 8; - getStreamer().emitCodeAlignment(Align(byteSize), &getSTI(), byteSize); + getStreamer().emitCodeAlignment(Align(byteSize), getSTI(), byteSize); MCSymbol *Sym; diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 89291a05cc05e..2a1427b38142c 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -208,7 +208,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, OutStreamer.emitLabel(Sym); OutStreamer.emitSymbolAttribute(Sym, MCSA_Global); OutStreamer.emitIntValue(Value, AlignSize); - OutStreamer.emitCodeAlignment(Align(AlignSize), &STI); + OutStreamer.emitCodeAlignment(Align(AlignSize), STI); } } else { assert(Imm.isExpr() && "Expected expression and found none"); @@ -236,7 +236,7 @@ static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI, OutStreamer.emitLabel(Sym); OutStreamer.emitSymbolAttribute(Sym, MCSA_Local); OutStreamer.emitValue(Imm.getExpr(), AlignSize); - OutStreamer.emitCodeAlignment(Align(AlignSize), &STI); + OutStreamer.emitCodeAlignment(Align(AlignSize), STI); } } return Sym; diff --git a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp index df1037469a078..52afb2a8af199 100644 --- a/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp @@ -250,7 +250,7 @@ void LoongArchAsmPrinter::emitSled(const MachineInstr &MI, SledKind Kind) { // The count here should be adjusted accordingly if the implementation // changes. const int8_t NoopsInSledCount = 11; - OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(4), getSubtargetInfo()); MCSymbol *BeginOfSled = OutContext.createTempSymbol("xray_sled_begin"); MCSymbol *EndOfSled = OutContext.createTempSymbol("xray_sled_end"); OutStreamer->emitLabel(BeginOfSled); diff --git a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index e0725b713a5aa..abdb9778e8233 100644 --- a/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -971,7 +971,7 @@ void MipsTargetELFStreamer::finish() { Align Alignment = Section.getAlign(); S.switchSection(&Section); if (getContext().getAsmInfo().useCodeAlign(Section)) - S.emitCodeAlignment(Alignment, &STI, Alignment.value()); + S.emitCodeAlignment(Alignment, STI, Alignment.value()); else S.emitValueToAlignment(Alignment, 0, 1, Alignment.value()); } diff --git a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp index 715b96e52e6c8..62fd0ef319e1a 100644 --- a/llvm/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/llvm/lib/Target/Mips/MipsAsmPrinter.cpp @@ -1185,7 +1185,7 @@ void MipsAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { // LD RA, 8(SP) // DADDIU SP, SP, 16 // - OutStreamer->emitCodeAlignment(Align(4), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(4), getSubtargetInfo()); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp index b9e03f087082d..7b2d2be503d72 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFStreamer.cpp @@ -52,7 +52,7 @@ void PPCELFStreamer::emitPrefixedInstruction(const MCInst &Inst, // all of the nops required as part of the alignment operation. In the cases // when no nops are added then The fragment is still created but it remains // empty. - emitCodeAlignment(Align(64), &STI, 4); + emitCodeAlignment(Align(64), STI, 4); // Emit the instruction. // Since the previous emit created a new fragment then adding this instruction diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp index 54163ee306cba..fae35c4248e7e 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCXCOFFStreamer.cpp @@ -42,7 +42,7 @@ void PPCXCOFFStreamer::emitPrefixedInstruction(const MCInst &Inst, // prefixed instruction. Align to 64 bytes if possible but add a maximum of 4 // bytes when trying to do that. If alignment requires adding more than 4 // bytes then the instruction won't be aligned. - emitCodeAlignment(Align(64), &STI, 4); + emitCodeAlignment(Align(64), STI, 4); // Emit the instruction. // Since the previous emit created a new fragment then adding this instruction diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp index 0a2c7a1719042..51f2ff4a68be7 100644 --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1901,7 +1901,7 @@ void PPCLinuxAsmPrinter::emitInstruction(const MachineInstr *MI) { // // Update compiler-rt/lib/xray/xray_powerpc64.cc accordingly when number // of instructions change. - OutStreamer->emitCodeAlignment(Align(8), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(8), getSubtargetInfo()); MCSymbol *BeginOfSled = OutContext.createTempSymbol(); OutStreamer->emitLabel(BeginOfSled); EmitToStreamer(*OutStreamer, RetInst); diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index a5e35977f8407..ef140fc79ffd9 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -292,7 +292,7 @@ void RISCVAsmPrinter::emitLpadAlignedCall(const MachineInstr &MI) { HasRelax = MCSTI.hasFeature(RISCV::FeatureRelax); if (HasZca) - OutStreamer->emitCodeAlignment(Align(4), &MCSTI); + OutStreamer->emitCodeAlignment(Align(4), MCSTI); if (OutStreamer->hasRawTextSupport()) { // Assembly path: wrap call with .option push/exact/pop and emit LPAD @@ -597,7 +597,7 @@ void RISCVAsmPrinter::emitSled(const MachineInstr *MI, SledKind Kind) { // is a chance that we'll use C.JAL instead, so an additional NOP is needed. const uint8_t NoopsInSledCount = STI->is64Bit() ? 33 : 21; - OutStreamer->emitCodeAlignment(Align(4), STI); + OutStreamer->emitCodeAlignment(Align(4), *STI); auto CurSled = OutContext.createTempSymbol("xray_sled_", true); OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 96cbb262aa170..c311af40ef142 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -345,7 +345,7 @@ void SparcAsmPrinter::emitInstruction(const MachineInstr *MI) { case SP::SWAPrr: case SP::SWAPri: if (MF->getSubtarget().fixTN0011()) - OutStreamer->emitCodeAlignment(Align(16), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(16), getSubtargetInfo()); break; case SP::GETPCX: LowerGETPCXAndEmitMCInsts(MI, getSubtargetInfo()); diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp index b38bbb09fb8fa..9e7c35b5c3ce9 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.cpp @@ -154,7 +154,7 @@ void SystemZHLASMAsmStreamer::emitValueToAlignment(Align Alignment, } void SystemZHLASMAsmStreamer::emitCodeAlignment(Align Alignment, - const MCSubtargetInfo *STI, + const MCSubtargetInfo &STI, unsigned MaxBytesToEmit) { // Emit with a text fill value. if (MAI->getTextAlignFillValue()) diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.h index ee7c5dcbf552c..d3fe8d791dd0f 100644 --- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.h +++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZHLASMAsmStreamer.h @@ -87,7 +87,7 @@ class SystemZHLASMAsmStreamer final : public MCStreamer { void emitValueToAlignment(Align Alignment, int64_t Fill, uint8_t FillLen, unsigned MaxBytesToEmit) override; - void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI, + void emitCodeAlignment(Align Alignment, const MCSubtargetInfo &STI, unsigned MaxBytesToEmit = 0) override; /// Return true if this streamer supports verbose assembly at all. diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp index 33ce25425b29e..45aff90a9998c 100644 --- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -4951,7 +4951,7 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) { Section = getStreamer().getCurrentSectionOnly(); } if (getContext().getAsmInfo().useCodeAlign(*Section)) - getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0); + getStreamer().emitCodeAlignment(Align(2), getSTI(), 0); else getStreamer().emitValueToAlignment(Align(2), 0, 1, 0); return false; diff --git a/llvm/lib/Target/X86/X86MCInstLower.cpp b/llvm/lib/Target/X86/X86MCInstLower.cpp index 9d285b5e2dcf0..2219d0a222cfc 100644 --- a/llvm/lib/Target/X86/X86MCInstLower.cpp +++ b/llvm/lib/Target/X86/X86MCInstLower.cpp @@ -1127,7 +1127,7 @@ void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, // First we emit the label and the jump. auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); OutStreamer->AddComment("# XRay Custom Event Log"); - OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2), getSubtargetInfo()); OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as @@ -1225,7 +1225,7 @@ void X86AsmPrinter::LowerPATCHABLE_TYPED_EVENT_CALL(const MachineInstr &MI, // First we emit the label and the jump. auto CurSled = OutContext.createTempSymbol("xray_typed_event_sled_", true); OutStreamer->AddComment("# XRay Typed Event Log"); - OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2), getSubtargetInfo()); OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as @@ -1325,7 +1325,7 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, // call // 5 bytes // auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2), getSubtargetInfo()); OutStreamer->emitLabel(CurSled); // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as @@ -1355,7 +1355,7 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI, // // This just makes sure that the alignment for the next instruction is 2. auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2), getSubtargetInfo()); OutStreamer->emitLabel(CurSled); unsigned OpCode = MI.getOperand(0).getImm(); MCInst Ret; @@ -1408,7 +1408,7 @@ void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual // tail call much like how we have it in PATCHABLE_RET. auto CurSled = OutContext.createTempSymbol("xray_sled_", true); - OutStreamer->emitCodeAlignment(Align(2), &getSubtargetInfo()); + OutStreamer->emitCodeAlignment(Align(2), getSubtargetInfo()); OutStreamer->emitLabel(CurSled); auto Target = OutContext.createTempSymbol(); From a65576ac4e2cb1397f26ab80189f110479cc7a6b Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Mon, 22 Jun 2026 10:52:29 -0700 Subject: [PATCH 053/511] [CIR] Lower const arrays as a single llvm.mlir.constant (#203590) When compiling the blender benchmark for SPEC CPU2017, we hit a case where a very large array (more than 400k elements) is initialized with constant values. However, because it contains trailing zeros, CIR generates a constant record initializer (an array of elements, plus a zero-initialized trailing array). We were lowering this to the LLVM dialect using a global initializer function with a huge number of calls to insertelement. The subsequent lowering to LLVM IR constant folded back to a constant initializer, but it took about 40 minutes to compile. The recent fix to avoid calling insertelement for the array initialization didn't fix this case because it handled only arrays, not records. This change updates the lowering to the LLVM dialect to lower constant array attributes to a single llvm.mlir.const value rather than attempting to build a chain of insertvalue ops whenever possible. --- clang/include/clang/CIR/LoweringHelpers.h | 5 ++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 32 ++++++++-- clang/lib/CIR/Lowering/LoweringHelpers.cpp | 63 +++++++++++++++++++ .../const-array-bulk-lowering-fallbacks.cir | 13 ++++ .../CIR/Lowering/const-array-of-pointers.cir | 12 ++++ 5 files changed, 120 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/CIR/LoweringHelpers.h b/clang/include/clang/CIR/LoweringHelpers.h index 3f3e939621c37..b4567828392c9 100644 --- a/clang/include/clang/CIR/LoweringHelpers.h +++ b/clang/include/clang/CIR/LoweringHelpers.h @@ -38,6 +38,11 @@ lowerConstArrayAttr(cir::ConstArrayAttr constArr, const mlir::TypeConverter *converter, mlir::ModuleOp moduleOp = {}); +std::optional +lowerConstRecordAttr(cir::ConstRecordAttr constRecord, + const mlir::TypeConverter *converter, + mlir::ModuleOp moduleOp = {}); + mlir::Value getConstAPInt(mlir::OpBuilder &bld, mlir::Location loc, mlir::Type typ, const llvm::APInt &val); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index c844375a000e0..25fa6d1625301 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -481,6 +481,12 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) { mlir::Location loc = parentOp->getLoc(); mlir::Value result; + // When the array can be represented as a single dense constant, emit one + // llvm.mlir.constant instead of a chain of llvm.insertvalue ops. + if (std::optional denseAttr = + lowerConstArrayAttr(attr, converter)) + return mlir::LLVM::ConstantOp::create(rewriter, loc, llvmTy, *denseAttr); + if (attr.hasTrailingZeros()) { mlir::Type arrayTy = attr.getType(); result = mlir::LLVM::ZeroOp::create(rewriter, loc, @@ -2503,11 +2509,27 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( } } return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter); - } else if (mlir::isa( - init.value())) { + } else if (auto constRecord = + mlir::dyn_cast(init.value())) { + // Bulk-emit llvm.mlir.global when every member of the record can be + // lowered to a constant attribute. The LLVM dialect global translation + // turns an ArrayAttr (one element per struct field) into an + // llvm::ConstantStruct, so the whole initializer becomes a single + // attribute on the global instead of an insertvalue region. + mlir::ModuleOp modOp = op->getParentOfType(); + if (std::optional bulkInit = + lowerConstRecordAttr(constRecord, typeConverter, modOp)) { + mlir::SymbolRefAttr comdatAttr = getComdatAttr(op, rewriter); + rewriter.replaceOpWithNewOp( + op, llvmType, isConst, linkage, symbol, bulkInit.value(), alignment, + addrSpace, isDsoLocal, isThreadLocal, comdatAttr, attributes); + return mlir::success(); + } + return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter); + } else if (mlir::isa(init.value())) { // TODO(cir): once LLVM's dialect has proper equivalent attributes this // should be updated. For now, we use a custom op to initialize globals // to the appropriate value. diff --git a/clang/lib/CIR/Lowering/LoweringHelpers.cpp b/clang/lib/CIR/Lowering/LoweringHelpers.cpp index 92e64e188633e..f298095e56824 100644 --- a/clang/lib/CIR/Lowering/LoweringHelpers.cpp +++ b/clang/lib/CIR/Lowering/LoweringHelpers.cpp @@ -272,6 +272,69 @@ lowerConstArrayAttr(cir::ConstArrayAttr constArr, return std::nullopt; } +/// Lower a constant attribute that initializes a single member of a record (or +/// a leaf of a nested aggregate) to an LLVM-dialect attribute that can be +/// attached directly to an \c llvm.mlir.global, avoiding an insertvalue +/// initializer region. Returns \c std::nullopt when the attribute cannot be +/// represented as a single constant attribute (e.g. an indexed +/// \c GlobalViewAttr), in which case the caller falls back to the region-based +/// lowering. +static std::optional +lowerConstRecordMemberAttr(mlir::Attribute attr, + const mlir::TypeConverter *converter, + mlir::ModuleOp moduleOp) { + mlir::MLIRContext *ctx = attr.getContext(); + + if (auto arrayAttr = mlir::dyn_cast(attr)) + return lowerConstArrayAttr(arrayAttr, converter, moduleOp); + + if (auto recordAttr = mlir::dyn_cast(attr)) + return lowerConstRecordAttr(recordAttr, converter, moduleOp); + + if (mlir::isa(attr)) + return mlir::LLVM::ZeroAttr::get(ctx); + + if (mlir::isa(attr)) + return mlir::LLVM::UndefAttr::get(ctx); + + if (auto intAttr = mlir::dyn_cast(attr)) + return mlir::IntegerAttr::get(converter->convertType(intAttr.getType()), + intAttr.getValue()); + + if (auto boolAttr = mlir::dyn_cast(attr)) + return mlir::IntegerAttr::get(converter->convertType(boolAttr.getType()), + boolAttr.getValue() ? 1 : 0); + + if (auto fpAttr = mlir::dyn_cast(attr)) + return mlir::FloatAttr::get(converter->convertType(fpAttr.getType()), + fpAttr.getValue()); + + // Null pointers and simple address-of-global references can be represented + // as constant attributes; anything more complex uses the region fallback. + return lowerPointerElementAttr(attr, ctx, moduleOp, converter); +} + +std::optional +lowerConstRecordAttr(cir::ConstRecordAttr constRecord, + const mlir::TypeConverter *converter, + mlir::ModuleOp moduleOp) { + // Build one constant attribute per record member. The LLVM dialect global + // translation accepts an ArrayAttr (one element per struct field) and emits + // an llvm::ConstantStruct, so the whole initializer can be a single + // attribute on the global instead of an insertvalue region. + mlir::ArrayAttr memberAttrs = constRecord.getMembers(); + llvm::SmallVector loweredMembers; + loweredMembers.reserve(memberAttrs.size()); + for (mlir::Attribute member : memberAttrs) { + std::optional lowered = + lowerConstRecordMemberAttr(member, converter, moduleOp); + if (!lowered) + return std::nullopt; + loweredMembers.push_back(*lowered); + } + return mlir::ArrayAttr::get(constRecord.getContext(), loweredMembers); +} + mlir::Value getConstAPInt(mlir::OpBuilder &bld, mlir::Location loc, mlir::Type typ, const llvm::APInt &val) { return mlir::LLVM::ConstantOp::create(bld, loc, typ, val); diff --git a/clang/test/CIR/Lowering/const-array-bulk-lowering-fallbacks.cir b/clang/test/CIR/Lowering/const-array-bulk-lowering-fallbacks.cir index 56d80c655cb72..2c894a87a4cb6 100644 --- a/clang/test/CIR/Lowering/const-array-bulk-lowering-fallbacks.cir +++ b/clang/test/CIR/Lowering/const-array-bulk-lowering-fallbacks.cir @@ -2,6 +2,8 @@ !s32i = !cir.int !s8i = !cir.int +!rec_data = !cir.struct<{!cir.array, !cir.array}> +!rec_mixed = !cir.struct<{!s32i, !cir.ptr}> module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { cir.global external @tz_arr = @@ -34,6 +36,11 @@ module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { #cir.global_view<@s1> : !cir.ptr ]> : !cir.array x 2> ]> : !cir.array x 2> x 2> + + cir.global constant external dso_local @blob = #cir.const_record<{ + #cir.const_array<[#cir.int<66> : !s8i, #cir.int<76> : !s8i, #cir.int<69> : !s8i, #cir.int<78> : !s8i]> : !cir.array, + #cir.zero : !cir.array + }> : !rec_data } // CHECK-LABEL: llvm.mlir.global external @tz_arr( @@ -53,3 +60,9 @@ module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { // CHECK-LABEL: llvm.mlir.global external @ptr2d // CHECK: llvm.insertvalue + +// CHECK-LABEL: llvm.mlir.global external constant @blob( +// CHECK-SAME: [dense<[66, 76, 69, 78]> : tensor<4xi8>, #llvm.zero] +// CHECK-SAME: : !llvm.struct<(array<4 x i8>, array<2 x i8>)> +// CHECK-NOT: llvm.insertvalue +// CHECK-NOT: llvm.return diff --git a/clang/test/CIR/Lowering/const-array-of-pointers.cir b/clang/test/CIR/Lowering/const-array-of-pointers.cir index 47797c0cf3271..5d590e086fe8b 100644 --- a/clang/test/CIR/Lowering/const-array-of-pointers.cir +++ b/clang/test/CIR/Lowering/const-array-of-pointers.cir @@ -1,6 +1,7 @@ // RUN: cir-opt %s --cir-to-llvm -o - | FileCheck %s !s32i = !cir.int +!rec_mixed = !cir.struct<{!s32i, !cir.ptr}> module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { cir.global "private" constant cir_private dso_local @g0 = #cir.int<0> : !s32i @@ -12,7 +13,18 @@ module attributes {cir.triple = "x86_64-unknown-linux-gnu"} { #cir.global_view<@g1> : !cir.ptr, #cir.global_view<@g2> : !cir.ptr ]> : !cir.array x 3> + + cir.global constant external dso_local @mixed = #cir.const_record<{ + #cir.int<7> : !s32i, + #cir.global_view<@g0> : !cir.ptr + }> : !rec_mixed } // CHECK-LABEL: llvm.mlir.global external constant @ptrs( // CHECK-NOT: llvm.insertvalue + +// CHECK-LABEL: llvm.mlir.global external constant @mixed( +// CHECK-SAME: [7 : i32, @g0] +// CHECK-SAME: : !llvm.struct<(i32, ptr)> +// CHECK-NOT: llvm.insertvalue +// CHECK-NOT: llvm.return From 9e477bca3a3de8f3c7bc9e27d5b4add4a2145517 Mon Sep 17 00:00:00 2001 From: Bidhan Date: Mon, 22 Jun 2026 23:52:38 +0545 Subject: [PATCH 054/511] [BPF] override getFrameIndexReference for frame object offsets (#204722) ### Summary The BPF backend currently does not override `getFrameIndexReference()`. Since BPF uses a fixed frame pointer (R10), frame object offsets are already expressed relative to the frame pointer. The generic `TargetFrameLowering::getFrameIndexReference()` implementation adjusts offsets using the stack size, which is not appropriate for BPF. This PR overrides `getFrameIndexReference()` to return the correct frame object offsets for the BPF frame model, resulting in accurate debug locations for stack variables. For example, the stack variable `local` in the reproducer below previously received: ``` DW_AT_location (DW_OP_fbreg +0) ``` and now correctly receives: ``` DW_AT_location (DW_OP_fbreg -32) ``` which matches the actual location of the stack object relative to the frame pointer. ----- **Rust code snippet** ``` #[repr(C)] #[derive(Copy, Clone)] pub struct S { pub a: u64, pub b: u64, pub c: u64, } #[unsafe(no_mangle)] pub extern "C" fn entry(x: u64) -> u64 { let mut local = S { a: x, b: x + 1, c: x + 2 }; let ptr_to_b = &mut local.a as *mut u64; let ptr_to_b = core::hint::black_box(ptr_to_b); unsafe { core::ptr::write_volatile(ptr_to_b, x + 42); } unsafe { core::ptr::read_volatile(ptr_to_b) } } ``` **Build commands** ``` > rustc +nightly repro.rs --emit=llvm-ir -g -O --crate-type=lib -o repro.l > ./build/bin/lc repro.l -march=bpfel -O2 -filetype=obj -o repro.o > ./build/bin/lvm-dwarfdump --debug-info --debug-loc --debug-loclists repro.o > repro.debug ``` **DW_TAG_variable info for variable "local"** Before patch: ``` 0x00000114: DW_TAG_variable DW_AT_location (DW_OP_fbreg +0) DW_AT_name ("local") DW_AT_alignment (8) DW_AT_decl_file ("/home/LLVM/llvm-project/repro.rs") DW_AT_decl_line (11) DW_AT_type (0x000001b8 "repro::S") ``` After patch: ``` 0x00000114: DW_TAG_variable DW_AT_location (DW_OP_fbreg -32) DW_AT_name ("local") DW_AT_alignment (8) DW_AT_decl_file ("/home/LLVM/llvm-project/repro.rs") DW_AT_decl_line (11) DW_AT_type (0x000001bc "repro::S") ``` --- llvm/lib/Target/BPF/BPFFrameLowering.cpp | 11 ++ llvm/lib/Target/BPF/BPFFrameLowering.h | 3 + llvm/test/DebugInfo/BPF/dbg-location-fbreg.ll | 127 ++++++++++++++++++ 3 files changed, 141 insertions(+) create mode 100644 llvm/test/DebugInfo/BPF/dbg-location-fbreg.ll diff --git a/llvm/lib/Target/BPF/BPFFrameLowering.cpp b/llvm/lib/Target/BPF/BPFFrameLowering.cpp index 00780b4d1406f..b9df6faa101d4 100644 --- a/llvm/lib/Target/BPF/BPFFrameLowering.cpp +++ b/llvm/lib/Target/BPF/BPFFrameLowering.cpp @@ -36,3 +36,14 @@ void BPFFrameLowering::determineCalleeSaves(MachineFunction &MF, SavedRegs.reset(BPF::R8); SavedRegs.reset(BPF::R9); } + +StackOffset BPFFrameLowering::getFrameIndexReference(const MachineFunction &MF, + int FI, + Register &FrameReg) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); + + FrameReg = RI->getFrameRegister(MF); + + return StackOffset::getFixed(MFI.getObjectOffset(FI)); +} diff --git a/llvm/lib/Target/BPF/BPFFrameLowering.h b/llvm/lib/Target/BPF/BPFFrameLowering.h index 6beffcbe69dd0..76ddce740d383 100644 --- a/llvm/lib/Target/BPF/BPFFrameLowering.h +++ b/llvm/lib/Target/BPF/BPFFrameLowering.h @@ -35,6 +35,9 @@ class BPFFrameLowering : public TargetFrameLowering { return MBB.erase(MI); } + StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, + Register &FrameReg) const override; + protected: bool hasFPImpl(const MachineFunction &MF) const override; }; diff --git a/llvm/test/DebugInfo/BPF/dbg-location-fbreg.ll b/llvm/test/DebugInfo/BPF/dbg-location-fbreg.ll new file mode 100644 index 0000000000000..b806e573f50ec --- /dev/null +++ b/llvm/test/DebugInfo/BPF/dbg-location-fbreg.ll @@ -0,0 +1,127 @@ +; RUN: llc -O2 -mtriple=bpfel -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s +; RUN: llc -O2 -mtriple=bpfeb -filetype=obj %s -o %t +; RUN: llvm-dwarfdump -debug-info %t | FileCheck %s +; +; Source: +; #![no_std] +; use core::hint::black_box; +; +; #[inline(never)] +; pub extern "C" fn entrypoint() -> u64 { +; let mut var_a: u64 = 0x1111_2222_3333_4444; +; let mut var_b: u64 = 0xAAAA_BBBB_CCCC_DDDD; +; black_box(&mut var_a); +; black_box(&mut var_b); +; var_a ^ var_b +; } +; Compilation flag: +; rustc lib.rs --emit=llvm-ir -g -O --crate-type=lib + +; ModuleID = 'lib.1f96fa33bef8cf6b-cgu.0' +source_filename = "lib.1f96fa33bef8cf6b-cgu.0" +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +; lib::entrypoint +; Function Attrs: noinline nounwind uwtable +define noundef i64 @_ZN3lib10entrypoint17hbcd54b7b97a1f371E() unnamed_addr #0 !dbg !6 { +start: + %0 = alloca [8 x i8], align 8 + %1 = alloca [8 x i8], align 8 + %var_b = alloca [8 x i8], align 8 + %var_a = alloca [8 x i8], align 8 + #dbg_declare(ptr %var_a, !13, !DIExpression(), !18) + #dbg_declare(ptr %var_b, !15, !DIExpression(), !19) + call void @llvm.lifetime.start.p0(ptr nonnull %var_a), !dbg !20 + store i64 1229801703532086340, ptr %var_a, align 8, !dbg !21 + call void @llvm.lifetime.start.p0(ptr nonnull %var_b), !dbg !22 + store i64 -6148895925951734307, ptr %var_b, align 8, !dbg !23 + #dbg_value(ptr %var_a, !24, !DIExpression(), !35) + call void @llvm.lifetime.start.p0(ptr nonnull %1), !dbg !37 + store ptr %var_a, ptr %1, align 8, !dbg !37 + call void asm sideeffect "", "r,~{memory}"(ptr nonnull %1) #2, !dbg !37, !srcloc !38 + call void @llvm.lifetime.end.p0(ptr nonnull %1), !dbg !37 + #dbg_value(ptr %var_b, !24, !DIExpression(), !39) + call void @llvm.lifetime.start.p0(ptr nonnull %0), !dbg !41 + store ptr %var_b, ptr %0, align 8, !dbg !41 + call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) #2, !dbg !41, !srcloc !38 + call void @llvm.lifetime.end.p0(ptr nonnull %0), !dbg !41 + %_7 = load i64, ptr %var_a, align 8, !dbg !42, !noundef !17 + %_8 = load i64, ptr %var_b, align 8, !dbg !43, !noundef !17 + %_0 = xor i64 %_8, %_7, !dbg !42 + call void @llvm.lifetime.end.p0(ptr nonnull %var_b), !dbg !44 + call void @llvm.lifetime.end.p0(ptr nonnull %var_a), !dbg !45 + ret i64 %_0, !dbg !46 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(ptr captures(none)) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(ptr captures(none)) #1 + +attributes #0 = { noinline nounwind uwtable "frame-pointer"="all" "probe-stack"="inline-asm" "target-cpu"="penryn" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} +!llvm.ident = !{!3} +!llvm.dbg.cu = !{!4} + +!0 = !{i32 8, !"PIC Level", i32 2} +!1 = !{i32 7, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{!"rustc version 1.96.0 (ac68faa20 2026-05-25)"} +!4 = distinct !DICompileUnit(language: DW_LANG_Rust, file: !5, producer: "clang LLVM (rustc version 1.96.0 (ac68faa20 2026-05-25))", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!5 = !DIFile(filename: "lib.rs/@/lib.1f96fa33bef8cf6b-cgu.0", directory: "/tmp/llvm-test/src") +!6 = distinct !DISubprogram(name: "entrypoint", linkageName: "_ZN3lib10entrypoint17hbcd54b7b97a1f371E", scope: !8, file: !7, line: 6, type: !9, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !4, templateParams: !17, retainedNodes: !12) +!7 = !DIFile(filename: "lib.rs", directory: "/tmp/llvm-test/src", checksumkind: CSK_MD5, checksum: "47ba490b7ac3b9eeccd8e8f3180f2b08") +!8 = !DINamespace(name: "lib", scope: null) +!9 = !DISubroutineType(types: !10) +!10 = !{!11} +!11 = !DIBasicType(name: "u64", size: 64, encoding: DW_ATE_unsigned) +!12 = !{!13, !15} +!13 = !DILocalVariable(name: "var_a", scope: !14, file: !7, line: 7, type: !11, align: 64) +!14 = distinct !DILexicalBlock(scope: !6, file: !7, line: 7, column: 5) +!15 = !DILocalVariable(name: "var_b", scope: !16, file: !7, line: 8, type: !11, align: 64) +!16 = distinct !DILexicalBlock(scope: !14, file: !7, line: 8, column: 5) +!17 = !{} +!18 = !DILocation(line: 7, column: 9, scope: !14) +!19 = !DILocation(line: 8, column: 9, scope: !16) +!20 = !DILocation(line: 7, column: 9, scope: !6) +!21 = !DILocation(line: 7, column: 26, scope: !6) +!22 = !DILocation(line: 8, column: 9, scope: !14) +!23 = !DILocation(line: 8, column: 26, scope: !14) +!24 = !DILocalVariable(name: "dummy", arg: 1, scope: !25, file: !26, line: 490, type: !31) +!25 = distinct !DISubprogram(name: "black_box<&mut u64>", linkageName: "_ZN4core4hint9black_box17h799a64f36faa5e88E", scope: !27, file: !26, line: 490, type: !29, scopeLine: 490, flags: DIFlagPrototyped, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !4, templateParams: !33, retainedNodes: !32) +!26 = !DIFile(filename: "/Users/b/.rustup/toolchains/aarch64-apple-darwin/lib/rustlib/src/rust/library/core/src/hint.rs", directory: "", checksumkind: CSK_MD5, checksum: "3bdbac5c7616d584a36b114744411911") +!27 = !DINamespace(name: "hint", scope: !28) +!28 = !DINamespace(name: "core", scope: null) +!29 = !DISubroutineType(types: !30) +!30 = !{!31, !31} +!31 = !DIDerivedType(tag: DW_TAG_pointer_type, name: "&mut u64", baseType: !11, size: 64, align: 64, dwarfAddressSpace: 0) +!32 = !{!24} +!33 = !{!34} +!34 = !DITemplateTypeParameter(name: "T", type: !31) +!35 = !DILocation(line: 0, scope: !25, inlinedAt: !36) +!36 = !DILocation(line: 10, column: 5, scope: !16) +!37 = !DILocation(line: 491, column: 5, scope: !25, inlinedAt: !36) +!38 = !{i64 872325037889853} +!39 = !DILocation(line: 0, scope: !25, inlinedAt: !40) +!40 = !DILocation(line: 11, column: 5, scope: !16) +!41 = !DILocation(line: 491, column: 5, scope: !25, inlinedAt: !40) +!42 = !DILocation(line: 13, column: 5, scope: !16) +!43 = !DILocation(line: 13, column: 13, scope: !16) +!44 = !DILocation(line: 14, column: 1, scope: !14) +!45 = !DILocation(line: 14, column: 1, scope: !6) +!46 = !DILocation(line: 14, column: 2, scope: !6) + +; CHECK: DW_TAG_subprogram +; CHECK: DW_AT_name ("entrypoint") +; CHECK: DW_TAG_variable +; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg -24) +; CHECK: DW_AT_name ("var_a") +; CHECK: DW_TAG_variable +; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg -16) +; CHECK: DW_AT_name ("var_b") From fa714ad3b70d14ff633446503f29b95ad2f484b1 Mon Sep 17 00:00:00 2001 From: Akhil Goel Date: Mon, 22 Jun 2026 11:09:27 -0700 Subject: [PATCH 055/511] [mlir][Math][XeVM] Add Math to OCL conversion patterns (#198370) This PR adds conversion patterns to convert supported math ops to SPIR-V OpenCL builtin calls. These lowerings correspond to `OpExtInst` calls into the OpenCL SPIR-V extended instruction set via mangled `__spirv_ocl_` entry points for f32/f64 variants. --- .../mlir/Conversion/MathToXeVM/MathToXeVM.h | 9 +- mlir/include/mlir/Conversion/Passes.td | 12 +- .../GPUCommon/OpToFuncCallLowering.h | 18 +- mlir/lib/Conversion/MathToXeVM/CMakeLists.txt | 2 + mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp | 130 ++++- .../Conversion/MathToXeVM/math-to-ocl.mlir | 533 ++++++++++++++++++ 6 files changed, 673 insertions(+), 31 deletions(-) create mode 100644 mlir/test/Conversion/MathToXeVM/math-to-ocl.mlir diff --git a/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h b/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h index 91d3c92fd6296..1bc6b095f9ad0 100644 --- a/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h +++ b/mlir/include/mlir/Conversion/MathToXeVM/MathToXeVM.h @@ -21,7 +21,14 @@ class Pass; /// Populate the given list with patterns that convert from Math to XeVM calls. void populateMathToXeVMConversionPatterns(RewritePatternSet &patterns, - bool convertArith); + bool convertArith, + PatternBenefit benefit = 1); + +/// Populate the given list with patterns that convert from Math to OCL LLVM-SPV +/// builtin calls. +void populateMathToScalarOCLExtSetConversionPatterns( + const LLVMTypeConverter &converter, RewritePatternSet &patterns, + PatternBenefit benefit = 1); } // namespace mlir #endif // MLIR_CONVERSION_MATHTOXEVM_MATHTOXEVM_H_ diff --git a/mlir/include/mlir/Conversion/Passes.td b/mlir/include/mlir/Conversion/Passes.td index ae93769a66762..550e6b853057d 100644 --- a/mlir/include/mlir/Conversion/Passes.td +++ b/mlir/include/mlir/Conversion/Passes.td @@ -904,11 +904,15 @@ def ConvertMathToXeVM : Pass<"convert-math-to-xevm"> { are typically mapped directly to native device instructions, often resulting in better performance. However, the precision/error of these intrinsics are implementation-defined, and thus math ops are only converted when they - have the `afn` fastmath flag enabled. + have the `afn` fastmath flag enabled. However, if the `convertToOCL` flag is + set then all supported math ops would be lowered to OpenCL math intrinsics. }]; - let options = [Option< - "convertArith", "convert-arith", "bool", /*default=*/"true", - "Convert supported Arith ops (e.g. arith.divf) as well.">]; + let options = [ + Option<"convertArith", "convert-arith", "bool", /*default=*/"true", + "Convert supported Arith ops (e.g. arith.divf) as well.">, + Option<"convertToOCL", "convert-to-ocl", "bool", /*default=*/"false", + "Convert supported Math ops to OCL intrinsics."> + ]; let dependentDialects = [ "arith::ArithDialect", "xevm::XeVMDialect", diff --git a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h index 9f36e5c369d06..cb9b6da071839 100644 --- a/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h +++ b/mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h @@ -54,14 +54,14 @@ using has_get_fastmath_t = decltype(std::declval().getFastmath()); template struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { public: - explicit OpToFuncCallLowering(const LLVMTypeConverter &lowering, - StringRef f32Func, StringRef f64Func, - StringRef f32ApproxFunc, StringRef f16Func, - StringRef i32Func = "", - PatternBenefit benefit = 1) + explicit OpToFuncCallLowering( + const LLVMTypeConverter &lowering, StringRef f32Func, StringRef f64Func, + StringRef f32ApproxFunc, StringRef f16Func, StringRef i32Func = "", + PatternBenefit benefit = 1, + LLVM::cconv::CConv cconv = LLVM::cconv::CConv::C) : ConvertOpToLLVMPattern(lowering, benefit), f32Func(f32Func), f64Func(f64Func), f32ApproxFunc(f32ApproxFunc), f16Func(f16Func), - i32Func(i32Func) {} + i32Func(i32Func), cconv(cconv) {} LogicalResult matchAndRewrite(SourceOp op, typename SourceOp::Adaptor adaptor, @@ -104,6 +104,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { LLVMFuncOp funcOp = appendOrGetFuncOp(funcName, funcType, op); auto callOp = LLVM::CallOp::create(rewriter, op->getLoc(), funcOp, castedOperands); + callOp.setCConv(cconv); if (resultType == adaptor.getOperands().front().getType()) { rewriter.replaceOp(op, {callOp.getResult()}); @@ -171,7 +172,9 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { // location as debug info metadata inside of a function cannot be used // outside of that function. auto globalloc = op->getLoc()->findInstanceOfOrUnknown(); - return LLVMFuncOp::create(b, globalloc, funcName, funcType); + auto newFuncOp = LLVMFuncOp::create(b, globalloc, funcName, funcType); + newFuncOp.setCConv(cconv); + return newFuncOp; } StringRef getFunctionName(Type type, SourceOp op) const { @@ -202,6 +205,7 @@ struct OpToFuncCallLowering : public ConvertOpToLLVMPattern { const std::string f32ApproxFunc; const std::string f16Func; const std::string i32Func; + const LLVM::cconv::CConv cconv; }; } // namespace mlir diff --git a/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt b/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt index 050c0ed90e383..ca6c07bddeee4 100644 --- a/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt +++ b/mlir/lib/Conversion/MathToXeVM/CMakeLists.txt @@ -11,8 +11,10 @@ add_mlir_conversion_library(MLIRMathToXeVM Core LINK_LIBS PUBLIC + MLIRAnalysis MLIRArithAttrToLLVMConversion MLIRArithDialect + MLIRGPUToGPURuntimeTransforms MLIRLLVMCommonConversion MLIRLLVMDialect MLIRMathDialect diff --git a/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp b/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp index 0fe31d000237d..4ced4a83a628f 100644 --- a/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp +++ b/mlir/lib/Conversion/MathToXeVM/MathToXeVM.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/Conversion/MathToXeVM/MathToXeVM.h" +#include "mlir/Analysis/DataLayoutAnalysis.h" #include "mlir/Conversion/ArithCommon/AttrToLLVMConverter.h" #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" @@ -15,6 +16,9 @@ #include "mlir/Pass/Pass.h" #include "llvm/Support/FormatVariadic.h" +#include "../GPUCommon/GPUOpsLowering.h" +#include "../GPUCommon/OpToFuncCallLowering.h" + namespace mlir { #define GEN_PASS_DEF_CONVERTMATHTOXEVM #include "mlir/Conversion/Passes.h.inc" @@ -119,33 +123,104 @@ struct ConvertNativeFuncPattern final : public OpConversionPattern { const StringRef nativeFunc; }; +template +static void populateOCLExtSetOpPatterns(const LLVMTypeConverter &converter, + RewritePatternSet &patterns, + PatternBenefit benefit, + StringRef opName) { + std::string prefix = "__spirv_ocl_"; + std::string mangledName = "_Z" + + std::to_string(prefix.size() + opName.size()) + + prefix + opName.str(); + + patterns.add>(converter, benefit); + patterns.add>( + converter, mangledName + "f", mangledName + "d", + /*f32ApproxFunc=*/"", /*f16Func=*/"", + /*i32Func=*/"", benefit, LLVM::cconv::CConv::SPIR_FUNC); +} + +void mlir::populateMathToScalarOCLExtSetConversionPatterns( + const LLVMTypeConverter &converter, RewritePatternSet &patterns, + PatternBenefit benefit) { + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "acos"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "acosh"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "asin"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "asinh"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "atan"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "atan2"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "atanh"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "cbrt"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "copysign"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "cos"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "cosh"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "erf"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "erfc"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "exp"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "exp2"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "expm1"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "log"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "log10"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "log1p"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "log2"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "pow"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "rsqrt"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "sin"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "sinh"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "sqrt"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, "tan"); + populateOCLExtSetOpPatterns(converter, patterns, benefit, + "tanh"); +} + void mlir::populateMathToXeVMConversionPatterns(RewritePatternSet &patterns, - bool convertArith) { - patterns.add>(patterns.getContext(), - "__spirv_ocl_native_exp"); - patterns.add>(patterns.getContext(), - "__spirv_ocl_native_cos"); + bool convertArith, + PatternBenefit benefit) { + patterns.add>( + patterns.getContext(), "__spirv_ocl_native_exp", benefit); + patterns.add>( + patterns.getContext(), "__spirv_ocl_native_cos", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_exp2"); - patterns.add>(patterns.getContext(), - "__spirv_ocl_native_log"); + patterns.getContext(), "__spirv_ocl_native_exp2", benefit); + patterns.add>( + patterns.getContext(), "__spirv_ocl_native_log", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_log2"); + patterns.getContext(), "__spirv_ocl_native_log2", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_log10"); + patterns.getContext(), "__spirv_ocl_native_log10", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_powr"); + patterns.getContext(), "__spirv_ocl_native_powr", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_rsqrt"); - patterns.add>(patterns.getContext(), - "__spirv_ocl_native_sin"); + patterns.getContext(), "__spirv_ocl_native_rsqrt", benefit); + patterns.add>( + patterns.getContext(), "__spirv_ocl_native_sin", benefit); patterns.add>( - patterns.getContext(), "__spirv_ocl_native_sqrt"); - patterns.add>(patterns.getContext(), - "__spirv_ocl_native_tan"); + patterns.getContext(), "__spirv_ocl_native_sqrt", benefit); + patterns.add>( + patterns.getContext(), "__spirv_ocl_native_tan", benefit); if (convertArith) patterns.add>( - patterns.getContext(), "__spirv_ocl_native_divide"); + patterns.getContext(), "__spirv_ocl_native_divide", benefit); } namespace { @@ -157,9 +232,26 @@ struct ConvertMathToXeVMPass } // namespace void ConvertMathToXeVMPass::runOnOperation() { + Operation *op = getOperation(); + MLIRContext *ctx = op->getContext(); + + const auto &dl = getAnalysis(); + RewritePatternSet patterns(&getContext()); - populateMathToXeVMConversionPatterns(patterns, convertArith); + LowerToLLVMOptions options(ctx, dl.getAtOrAbove(op)); + LLVMTypeConverter converter(ctx, options); ConversionTarget target(getContext()); + + // Native OCL patterns should take precedence for `fast` ops even when + // convertToOCL is set. + populateMathToXeVMConversionPatterns(patterns, convertArith, + convertToOCL + 1); + if (convertToOCL) { + populateMathToScalarOCLExtSetConversionPatterns(converter, patterns, 1); + target + .addIllegalOp(); + } target.addLegalDialect(); if (failed( applyPartialConversion(getOperation(), target, std::move(patterns)))) diff --git a/mlir/test/Conversion/MathToXeVM/math-to-ocl.mlir b/mlir/test/Conversion/MathToXeVM/math-to-ocl.mlir new file mode 100644 index 0000000000000..61efa3515657e --- /dev/null +++ b/mlir/test/Conversion/MathToXeVM/math-to-ocl.mlir @@ -0,0 +1,533 @@ +// RUN: mlir-opt %s -split-input-file -convert-math-to-xevm | FileCheck %s -check-prefixes='CHECK,CHECK-NO-OCL' +// RUN: mlir-opt %s -split-input-file -convert-math-to-xevm='convert-to-ocl=true convert-arith=true' | FileCheck %s -check-prefixes='CHECK,CHECK-OCL' + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_copysignf(f32, f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atan2f(f32, f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_powf(f32, f32) -> f32 + // CHECK-LABEL: func @math_bin_f32 + func.func @math_bin_f32(%arg_f32_1 : f32, %arg_f32_2 : f32) -> (f32, f32, f32) { + %result1 = math.copysign %arg_f32_1, %arg_f32_2 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_copysignf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK-NO-OCL: math.copysign + %result2 = math.atan2 %arg_f32_1, %arg_f32_2 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atan2f(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK-NO-OCL: math.atan2 + %result3 = math.powf %arg_f32_1, %arg_f32_2 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_powf(%{{.*}}, %{{.*}}) : (f32, f32) -> f32 + // CHECK-NO-OCL: math.powf + func.return %result1, %result2, %result3 : f32, f32, f32 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_copysignd(f64, f64) -> f64 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atan2d(f64, f64) -> f64 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_powd(f64, f64) -> f64 + // CHECK-LABEL: func @math_bin_f64 + func.func @math_bin_f64(%arg_f64_1 : f64, %arg_f64_2 : f64) -> (f64, f64, f64) { + %result1 = math.copysign %arg_f64_1, %arg_f64_2 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_copysignd(%{{.*}}, %{{.*}}) : (f64, f64) -> f64 + // CHECK-NO-OCL: math.copysign + %result2 = math.atan2 %arg_f64_1, %arg_f64_2 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atan2d(%{{.*}}, %{{.*}}) : (f64, f64) -> f64 + // CHECK-NO-OCL: math.atan2 + %result3 = math.powf %arg_f64_1, %arg_f64_2 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_powd(%{{.*}}, %{{.*}}) : (f64, f64) -> f64 + // CHECK-NO-OCL: math.powf + func.return %result1, %result2, %result3 : f64, f64, f64 + } +} + +// ----- + +module @test_module { +// CHECK-OCL-DAG: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(f32) +// CHECK-NO-OCL-NOT: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(f32) +// CHECK-LABEL: func.func @expm1_vector + func.func @expm1_vector(%arg0: memref<32xvector<4xf32>>, + %arg1: memref<32xvector<4xf32>>, + %idx : index) { + // CHECK: %[[ARG0:.*]] = memref.load %arg0 + %v = memref.load %arg0[%idx] : memref<32xvector<4xf32>> + // CHECK-OCL: %[[EXT_0:.*]] = llvm.extractelement %[[ARG0]] + // CHECK-OCL: %[[VAL_0:.*]] = llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(%[[EXT_0]]) + // CHECK-OCL: llvm.insertelement %[[VAL_0]] + // CHECK-OCL: %[[EXT_1:.*]] = llvm.extractelement %[[ARG0]] + // CHECK-OCL: %[[VAL_1:.*]] = llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(%[[EXT_1]]) + // CHECK-OCL: llvm.insertelement %[[VAL_1]] + // CHECK-OCL: %[[EXT_2:.*]] = llvm.extractelement %[[ARG0]] + // CHECK-OCL: %[[VAL_2:.*]] = llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(%[[EXT_2]]) + // CHECK-OCL: llvm.insertelement %[[VAL_2]] + // CHECK-OCL: %[[EXT_3:.*]] = llvm.extractelement %[[ARG0]] + // CHECK-OCL: %[[VAL_3:.*]] = llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(%[[EXT_3]]) + // CHECK-OCL: %[[INS:.*]] = llvm.insertelement %[[VAL_3]] + // CHECK-NO-OCL: %[[INS:.*]] = math.expm1 %[[ARG0]] + %e = math.expm1 %v : vector<4xf32> + // CHECK: memref.store %[[INS]], %arg1 + memref.store %e, %arg1[%idx] : memref<32xvector<4xf32>> + return + } +} + +// ----- + +module @test_module { + // CHECK-DAG: llvm.func @_Z{{.*}}__spirv_ocl_native_sinf(f32) -> f32 + // CHECK-DAG: llvm.func @_Z{{.*}}__spirv_ocl_native_sind(f64) -> f64 + // CHECK-LABEL: func @math_fast_sin + func.func @math_fast_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.sin %arg_f32 fastmath : f32 + // CHECK: llvm.call @_Z{{.*}}__spirv_ocl_native_sinf(%{{.*}}) {fastmathFlags = #llvm.fastmath} : (f32) -> f32 + %result64 = math.sin %arg_f64 fastmath : f64 + // CHECK: llvm.call @_Z{{.*}}__spirv_ocl_native_sind(%{{.*}}) {fastmathFlags = #llvm.fastmath} : (f64) -> f64 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_acosf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_acosd(f64) -> f64 + // CHECK-LABEL: func @math_acos + func.func @math_acos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.acos %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_acosf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.acos + %result64 = math.acos %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_acosd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.acos + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_acoshf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_acoshd(f64) -> f64 + // CHECK-LABEL: func @math_acosh + func.func @math_acosh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.acosh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_acoshf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.acosh + %result64 = math.acosh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_acoshd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.acosh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_asinf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_asind(f64) -> f64 + // CHECK-LABEL: func @math_asin + func.func @math_asin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.asin %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_asinf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.asin + %result64 = math.asin %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_asind(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.asin + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_asinhf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_asinhd(f64) -> f64 + // CHECK-LABEL: func @math_asinh + func.func @math_asinh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.asinh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_asinhf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.asinh + %result64 = math.asinh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_asinhd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.asinh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atanf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atand(f64) -> f64 + // CHECK-LABEL: func @math_atan + func.func @math_atan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.atan %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atanf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.atan + %result64 = math.atan %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atand(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.atan + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atanhf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_atanhd(f64) -> f64 + // CHECK-LABEL: func @math_atanh + func.func @math_atanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.atanh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atanhf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.atanh + %result64 = math.atanh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_atanhd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.atanh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_cbrtf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_cbrtd(f64) -> f64 + // CHECK-LABEL: func @math_cbrt + func.func @math_cbrt(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.cbrt %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_cbrtf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.cbrt + %result64 = math.cbrt %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_cbrtd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.cbrt + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_cosf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_cosd(f64) -> f64 + // CHECK-LABEL: func @math_cos + func.func @math_cos(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.cos %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_cosf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.cos + %result64 = math.cos %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_cosd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.cos + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_coshf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_coshd(f64) -> f64 + // CHECK-LABEL: func @math_cosh + func.func @math_cosh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.cosh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_coshf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.cosh + %result64 = math.cosh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_coshd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.cosh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_erff(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_erfd(f64) -> f64 + // CHECK-LABEL: func @math_erf + func.func @math_erf(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.erf %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_erff(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.erf + %result64 = math.erf %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_erfd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.erf + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_erfcf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_erfcd(f64) -> f64 + // CHECK-LABEL: func @math_erfc + func.func @math_erfc(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.erfc %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_erfcf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.erfc + %result64 = math.erfc %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_erfcd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.erfc + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expd(f64) -> f64 + // CHECK-LABEL: func @math_exp + func.func @math_exp(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.exp %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.exp + %result64 = math.exp %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.exp + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_exp2f(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_exp2d(f64) -> f64 + // CHECK-LABEL: func @math_exp2 + func.func @math_exp2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.exp2 %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_exp2f(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.exp2 + %result64 = math.exp2 %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_exp2d(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.exp2 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_expm1d(f64) -> f64 + // CHECK-LABEL: func @math_expm1 + func.func @math_expm1(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.expm1 %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1f(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.expm1 + %result64 = math.expm1 %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_expm1d(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.expm1 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_logf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_logd(f64) -> f64 + // CHECK-LABEL: func @math_log + func.func @math_log(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.log %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_logf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.log + %result64 = math.log %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_logd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.log + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log10f(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log10d(f64) -> f64 + // CHECK-LABEL: func @math_log10 + func.func @math_log10(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.log10 %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log10f(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.log10 + %result64 = math.log10 %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log10d(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.log10 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log1pf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log1pd(f64) -> f64 + // CHECK-LABEL: func @math_log1p + func.func @math_log1p(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.log1p %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log1pf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.log1p + %result64 = math.log1p %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log1pd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.log1p + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log2f(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_log2d(f64) -> f64 + // CHECK-LABEL: func @math_log2 + func.func @math_log2(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.log2 %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log2f(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.log2 + %result64 = math.log2 %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_log2d(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.log2 + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_rsqrtf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_rsqrtd(f64) -> f64 + // CHECK-LABEL: func @math_rsqrt + func.func @math_rsqrt(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.rsqrt %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_rsqrtf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.rsqrt + %result64 = math.rsqrt %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_rsqrtd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.rsqrt + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sinf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sind(f64) -> f64 + // CHECK-LABEL: func @math_sin + func.func @math_sin(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.sin %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sinf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.sin + %result64 = math.sin %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sind(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.sin + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sinhf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sinhd(f64) -> f64 + // CHECK-LABEL: func @math_sinh + func.func @math_sinh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.sinh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sinhf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.sinh + %result64 = math.sinh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sinhd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.sinh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sqrtf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sqrtd(f64) -> f64 + // CHECK-LABEL: func @math_sqrt + func.func @math_sqrt(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.sqrt %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sqrtf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.sqrt + %result64 = math.sqrt %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sqrtd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.sqrt + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_tanf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_tand(f64) -> f64 + // CHECK-LABEL: func @math_tan + func.func @math_tan(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.tan %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_tanf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.tan + %result64 = math.tan %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_tand(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.tan + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_tanhf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_tanhd(f64) -> f64 + // CHECK-LABEL: func @math_tanh + func.func @math_tanh(%arg_f32 : f32, %arg_f64 : f64) -> (f32, f64) { + %result32 = math.tanh %arg_f32 : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_tanhf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.tanh + %result64 = math.tanh %arg_f64 : f64 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_tanhd(%{{.*}}) : (f64) -> f64 + // CHECK-NO-OCL: math.tanh + func.return %result32, %result64 : f32, f64 + } +} + +// ----- + +module @test_module { + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_cbrtf(f32) -> f32 + // CHECK-OCL: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_erfcf(f32) -> f32 + // CHECK-LABEL: func @math_unary_16bit + func.func @math_unary_16bit(%arg_f16 : f16, %arg_bf16 : bf16) -> (f16, bf16) { + %resultf16 = math.cbrt %arg_f16 : f16 + // CHECK-OCL: %[[F16:.+]] = llvm.fpext %{{.*}} : f16 to f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_cbrtf(%[[F16]]) : (f32) -> f32 + // CHECK-OCL: llvm.fptrunc %{{.*}} : f32 to f16 + // CHECK-NO-OCL: math.cbrt + %resultbf16 = math.erfc %arg_bf16 : bf16 + // CHECK-OCL: %[[BF16:.+]] = llvm.fpext %{{.*}} : bf16 to f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_erfcf(%[[BF16]]) : (f32) -> f32 + // CHECK-OCL: llvm.fptrunc %{{.*}} : f32 to bf16 + // CHECK-NO-OCL: math.erfc + func.return %resultf16, %resultbf16 : f16, bf16 + } +} + +// ----- + +module @test_module { + // CHECK-DAG: llvm.func @_Z{{.*}}__spirv_ocl_native_divideff(f32, f32) -> f32 + // CHECK-OCL-DAG: llvm.func spir_funccc @_Z{{.*}}__spirv_ocl_sqrtf(f32) -> f32 + // CHECK-LABEL: func @math_sqrt_div + func.func @math_sqrt_div(%arg : f32) -> f32 { + %sqrt = math.sqrt %arg : f32 + // CHECK-OCL: llvm.call spir_funccc @_Z{{.*}}__spirv_ocl_sqrtf(%{{.*}}) : (f32) -> f32 + // CHECK-NO-OCL: math.sqrt + %result = arith.divf %arg, %sqrt fastmath : f32 + // CHECK: llvm.call @_Z{{.*}}__spirv_ocl_native_divideff(%{{.*}}) {fastmathFlags = #llvm.fastmath} : (f32, f32) -> f32 + func.return %result : f32 + } +} From 85ec4367a9ce753a9603b645879bfa4244947133 Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Mon, 22 Jun 2026 14:12:00 -0400 Subject: [PATCH 056/511] [RFC][NFCI][IR] Extract AMDGPU-specific verification logic into `VerifierAMDGPU.cpp` (#204284) `Verifier.cpp` is large and already mixes generic IR verification with target-specific checks. We also have a growing amount of AMDGPU verifier logic downstream, which would all end up in the same file if we don't address this, and that is not ideal. This patch extracts AMDGPU-specific verification logic into a separate `VerifierAMDGPU.cpp` file, with shared infrastructure (`VerifierSupport`) moved into `VerifierInternal.h`. This is purely a code organization change, not a target-dependent IR verifier. All checks remain compiled and linked into `LLVMCore` regardless of the target triple. The extracted functions are called unconditionally at well-defined extension points in `Verifier.cpp`, and each function internally gates on target-specific conditions (for example, triple checks or intrinsic IDs) as needed. The file is strictly limited to AMDGPU-specific IR constructs (amdgcn intrinsics, AMDGPU module flags, etc.), and does not contain generic IR rules that vary by target. This PR deliberately avoided introducing polymorphism, since this is not a target-dependent verification framework. Instead, this follows a pattern similar to TargetParser (for example `AMDGPUTargetParser.cpp`): flat file layout, free functions, no registration, and unconditional compilation. Other targets that want similar separation can follow the same pattern. Open to suggestions on whether there is a better long-term way to structure this. --- llvm/lib/IR/CMakeLists.txt | 1 + llvm/lib/IR/Verifier.cpp | 555 +------------------ llvm/lib/IR/VerifierAMDGPU.cpp | 406 ++++++++++++++ llvm/lib/IR/VerifierInternal.h | 233 ++++++++ llvm/test/Verifier/callbr-intrinsic.ll | 12 +- llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn | 1 + 6 files changed, 671 insertions(+), 537 deletions(-) create mode 100644 llvm/lib/IR/VerifierAMDGPU.cpp create mode 100644 llvm/lib/IR/VerifierInternal.h diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt index 9cc45ef0e1773..3037f01083308 100644 --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -79,6 +79,7 @@ add_llvm_component_library(LLVMCore ValueSymbolTable.cpp VectorTypeUtils.cpp Verifier.cpp + VerifierAMDGPU.cpp VFABIDemangler.cpp RuntimeLibcalls.cpp diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 8207e60857eba..2a0892d1af11a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -48,6 +48,7 @@ //===----------------------------------------------------------------------===// #include "llvm/IR/Verifier.h" +#include "VerifierInternal.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" @@ -95,7 +96,6 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsAArch64.h" -#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsARM.h" #include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsWebAssembly.h" @@ -115,7 +115,6 @@ #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/ProfileData/InstrProf.h" -#include "llvm/Support/AMDGPUAddrSpace.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -143,189 +142,6 @@ static cl::opt VerifyNoAliasScopeDomination( cl::desc("Ensure that llvm.experimental.noalias.scope.decl for identical " "scopes are not dominating")); -struct llvm::VerifierSupport { - raw_ostream *OS; - const Module &M; - ModuleSlotTracker MST; - const Triple &TT; - const DataLayout &DL; - LLVMContext &Context; - - /// Track the brokenness of the module while recursively visiting. - bool Broken = false; - /// Broken debug info can be "recovered" from by stripping the debug info. - bool BrokenDebugInfo = false; - /// Whether to treat broken debug info as an error. - bool TreatBrokenDebugInfoAsError = true; - - explicit VerifierSupport(raw_ostream *OS, const Module &M) - : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()), - Context(M.getContext()) {} - -private: - void Write(const Module *M) { - *OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n"; - } - - void Write(const Value *V) { - if (V) - Write(*V); - } - - void Write(const Value &V) { - if (isa(V)) { - V.print(*OS, MST); - *OS << '\n'; - } else { - V.printAsOperand(*OS, true, MST); - *OS << '\n'; - } - } - - void Write(const DbgRecord *DR) { - if (DR) { - DR->print(*OS, MST, false); - *OS << '\n'; - } - } - - void Write(DbgVariableRecord::LocationType Type) { - switch (Type) { - case DbgVariableRecord::LocationType::Value: - *OS << "value"; - break; - case DbgVariableRecord::LocationType::Declare: - *OS << "declare"; - break; - case DbgVariableRecord::LocationType::DeclareValue: - *OS << "declare_value"; - break; - case DbgVariableRecord::LocationType::Assign: - *OS << "assign"; - break; - case DbgVariableRecord::LocationType::End: - *OS << "end"; - break; - case DbgVariableRecord::LocationType::Any: - *OS << "any"; - break; - }; - } - - void Write(const Metadata *MD) { - if (!MD) - return; - MD->print(*OS, MST, &M); - *OS << '\n'; - } - - template void Write(const MDTupleTypedArrayWrapper &MD) { - Write(MD.get()); - } - - void Write(const NamedMDNode *NMD) { - if (!NMD) - return; - NMD->print(*OS, MST); - *OS << '\n'; - } - - void Write(Type *T) { - if (!T) - return; - *OS << ' ' << *T; - } - - void Write(const Comdat *C) { - if (!C) - return; - *OS << *C; - } - - void Write(const APInt *AI) { - if (!AI) - return; - *OS << *AI << '\n'; - } - - void Write(const unsigned i) { *OS << i << '\n'; } - - // NOLINTNEXTLINE(readability-identifier-naming) - void Write(const Attribute *A) { - if (!A) - return; - *OS << A->getAsString() << '\n'; - } - - // NOLINTNEXTLINE(readability-identifier-naming) - void Write(const AttributeSet *AS) { - if (!AS) - return; - *OS << AS->getAsString() << '\n'; - } - - // NOLINTNEXTLINE(readability-identifier-naming) - void Write(const AttributeList *AL) { - if (!AL) - return; - AL->print(*OS); - } - - void Write(Printable P) { *OS << P << '\n'; } - - template void Write(ArrayRef Vs) { - for (const T &V : Vs) - Write(V); - } - - template - void WriteTs(const T1 &V1, const Ts &... Vs) { - Write(V1); - WriteTs(Vs...); - } - - template void WriteTs() {} - -public: - /// A check failed, so printout out the condition and the message. - /// - /// This provides a nice place to put a breakpoint if you want to see why - /// something is not correct. - void CheckFailed(const Twine &Message) { - if (OS) - *OS << Message << '\n'; - Broken = true; - } - - /// A check failed (with values to print). - /// - /// This calls the Message-only version so that the above is easier to set a - /// breakpoint on. - template - void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) { - CheckFailed(Message); - if (OS) - WriteTs(V1, Vs...); - } - - /// A debug info check failed. - void DebugInfoCheckFailed(const Twine &Message) { - if (OS) - *OS << Message << '\n'; - Broken |= TreatBrokenDebugInfoAsError; - BrokenDebugInfo = true; - } - - /// A debug info check failed (with values to print). - template - void DebugInfoCheckFailed(const Twine &Message, const T1 &V1, - const Ts &... Vs) { - DebugInfoCheckFailed(Message); - if (OS) - WriteTs(V1, Vs...); - } -}; - namespace { class Verifier : public InstVisitor, VerifierSupport { @@ -647,7 +463,6 @@ class Verifier : public InstVisitor, VerifierSupport { void verifyFunctionAttrs(FunctionType *FT, AttributeList Attrs, const Value *V, bool IsIntrinsic, bool IsInlineAsm); void verifyFunctionMetadata(ArrayRef> MDs); - void verifyAMDGPUReqdWorkGroupSize(const Function &F); void verifyUnknownProfileMetadata(MDNode *MD); void visitConstantExprsRecursively(const Constant *EntryC); void visitConstantExpr(const ConstantExpr *CE); @@ -2147,25 +1962,13 @@ Verifier::visitModuleFlag(const MDNode *Op, "SemanticInterposition metadata requires constant integer argument"); } - if (ID->getString() == "amdgpu.buffer.oob.mode" || - ID->getString() == "amdgpu.tbuffer.oob.mode") { - Check(MFB == Module::Max, - "'" + ID->getString() + - "' module flag must use 'max' merge behaviour"); - ConstantInt *Value = - mdconst::dyn_extract_or_null(Op->getOperand(2)); - Check(Value, "'" + ID->getString() + - "' module flag must have a constant integer value"); - if (Value) { - Check(Value->getZExtValue() <= 2, - "'" + ID->getString() + "' module flag must be 0, 1, or 2"); - } - } - if (ID->getString() == "CG Profile") { for (const MDOperand &MDO : cast(Op->getOperand(2))->operands()) visitModuleFlagCGProfileEntry(MDO); } + + // Target-specific module flag checks. + verifyAMDGPUModuleFlag(*this, ID, MFB, Op); } void Verifier::visitModuleFlagCGProfileEntry(const MDOperand &MDO) { @@ -2858,57 +2661,6 @@ void Verifier::verifyFunctionMetadata( } } -void Verifier::verifyAMDGPUReqdWorkGroupSize(const Function &F) { - if (!TT.isAMDGPU()) - return; - - MDNode *ReqdWorkGroupSize = F.getMetadata("reqd_work_group_size"); - if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3) - return; - - uint64_t Product = 1; - for (const MDOperand &Op : ReqdWorkGroupSize->operands()) { - ConstantInt *C = mdconst::dyn_extract(Op); - if (!C || C->getValue().getActiveBits() > 64) - return; - uint64_t Dim = C->getZExtValue(); - if (Dim != 0 && Product > std::numeric_limits::max() / Dim) - return; - Product *= Dim; - } - - Attribute FlatWorkGroupSize = F.getFnAttribute("amdgpu-flat-work-group-size"); - if (!FlatWorkGroupSize.isValid()) { - CheckFailed("reqd_work_group_size requires amdgpu-flat-work-group-size", &F, - ReqdWorkGroupSize); - return; - } - - if (!FlatWorkGroupSize.isStringAttribute()) { - CheckFailed("amdgpu-flat-work-group-size must be a string attribute", &F); - return; - } - - StringRef AttrValue = FlatWorkGroupSize.getValueAsString(); - std::pair Values = AttrValue.split(','); - uint64_t Min = 0; - uint64_t Max = 0; - bool Parsed = !Values.second.contains(',') && - llvm::to_integer(Values.first.trim(), Min) && - llvm::to_integer(Values.second.trim(), Max); - if (!Parsed) { - CheckFailed("amdgpu-flat-work-group-size must be a pair of unsigned " - "integers", - &F); - return; - } - - Check(Min == Product && Max == Product, - "amdgpu-flat-work-group-size must equal the product of " - "reqd_work_group_size operands", - &F, ReqdWorkGroupSize); -} - void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { if (EntryC->getNumOperands() == 0) return; @@ -3379,7 +3131,9 @@ void Verifier::visitFunction(const Function &F) { F.getAllMetadata(MDs); assert(F.hasMetadata() != MDs.empty() && "Bit out-of-sync"); verifyFunctionMetadata(MDs); - verifyAMDGPUReqdWorkGroupSize(F); + + // Target-specific function metadata checks. + verifyAMDGPUFunctionMetadata(*this, F); // Check validity of the personality function if (F.hasPersonalityFn()) { @@ -3678,27 +3432,22 @@ void Verifier::visitIndirectBrInst(IndirectBrInst &BI) { visitTerminator(BI); } +static bool isSupportedCallBrIntrinsic(Intrinsic::ID ID) { + // Currently we only support callbr for amdgcn.kill. Add more checks here as + // needed. + return isAMDGPUCallBrIntrinsic(ID); +} + void Verifier::visitCallBrInst(CallBrInst &CBI) { if (!CBI.isInlineAsm()) { Check(CBI.getCalledFunction(), - "Callbr: indirect function / invalid signature"); + "callbr: indirect function / invalid signature"); Check(!CBI.hasOperandBundles(), - "Callbr for intrinsics currently doesn't support operand bundles"); - - switch (CBI.getIntrinsicID()) { - case Intrinsic::amdgcn_kill: { - Check(CBI.getNumIndirectDests() == 1, - "Callbr amdgcn_kill only supports one indirect dest"); - bool Unreachable = isa(CBI.getIndirectDest(0)->begin()); - CallInst *Call = dyn_cast(CBI.getIndirectDest(0)->begin()); - Check(Unreachable || (Call && Call->getIntrinsicID() == - Intrinsic::amdgcn_unreachable), - "Callbr amdgcn_kill indirect dest needs to be unreachable"); - break; - } - default: + "callbr for intrinsics currently doesn't support operand bundles"); + + if (!isSupportedCallBrIntrinsic(CBI.getIntrinsicID())) { CheckFailed( - "Callbr currently only supports asm-goto and selected intrinsics"); + "callbr currently only supports asm-goto and selected intrinsics"); } visitIntrinsicCall(CBI.getIntrinsicID(), CBI); } else { @@ -4886,12 +4635,10 @@ void Verifier::visitAllocaInst(AllocaInst &AI) { verifySwiftErrorValue(&AI); } - if (TT.isAMDGPU()) { - Check(AI.getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS, - "alloca on amdgpu must be in addrspace(5)", &AI); - } - visitInstruction(AI); + + // Target-specific alloca checks. + verifyAMDGPUAlloca(*this, AI); } void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) { @@ -7163,263 +6910,6 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { "@llvm.structured.alloca calls require elementtype attribute.", &Call); break; - case Intrinsic::amdgcn_cs_chain: { - auto CallerCC = Call.getCaller()->getCallingConv(); - switch (CallerCC) { - case CallingConv::AMDGPU_CS: - case CallingConv::AMDGPU_CS_Chain: - case CallingConv::AMDGPU_CS_ChainPreserve: - case CallingConv::AMDGPU_ES: - case CallingConv::AMDGPU_GS: - case CallingConv::AMDGPU_HS: - case CallingConv::AMDGPU_LS: - case CallingConv::AMDGPU_VS: - break; - default: - CheckFailed("Intrinsic cannot be called from functions with this " - "calling convention", - &Call); - break; - } - - Check(Call.paramHasAttr(2, Attribute::InReg), - "SGPR arguments must have the `inreg` attribute", &Call); - Check(!Call.paramHasAttr(3, Attribute::InReg), - "VGPR arguments must not have the `inreg` attribute", &Call); - - auto *FlagsArg = cast(Call.getArgOperand(4)); - Check(FlagsArg->getValue().ult(2), - "flags must be 0 or 1 for llvm.amdgcn.cs.chain", &Call); - - auto *Next = Call.getNextNode(); - bool IsAMDUnreachable = Next && isa(Next) && - cast(Next)->getIntrinsicID() == - Intrinsic::amdgcn_unreachable; - Check(Next && (isa(Next) || IsAMDUnreachable), - "llvm.amdgcn.cs.chain must be followed by unreachable", &Call); - break; - } - case Intrinsic::amdgcn_init_exec_from_input: { - const Argument *Arg = dyn_cast(Call.getOperand(0)); - Check(Arg && Arg->hasInRegAttr(), - "only inreg arguments to the parent function are valid as inputs to " - "this intrinsic", - &Call); - break; - } - case Intrinsic::amdgcn_set_inactive_chain_arg: { - auto CallerCC = Call.getCaller()->getCallingConv(); - switch (CallerCC) { - case CallingConv::AMDGPU_CS_Chain: - case CallingConv::AMDGPU_CS_ChainPreserve: - break; - default: - CheckFailed("Intrinsic can only be used from functions with the " - "amdgpu_cs_chain or amdgpu_cs_chain_preserve " - "calling conventions", - &Call); - break; - } - - unsigned InactiveIdx = 1; - Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg), - "Value for inactive lanes must not have the `inreg` attribute", - &Call); - Check(isa(Call.getArgOperand(InactiveIdx)), - "Value for inactive lanes must be a function argument", &Call); - Check(!cast(Call.getArgOperand(InactiveIdx))->hasInRegAttr(), - "Value for inactive lanes must be a VGPR function argument", &Call); - break; - } - case Intrinsic::amdgcn_call_whole_wave: { - auto F = dyn_cast(Call.getArgOperand(0)); - Check(F, "Indirect whole wave calls are not allowed", &Call); - - CallingConv::ID CC = F->getCallingConv(); - Check(CC == CallingConv::AMDGPU_Gfx_WholeWave, - "Callee must have the amdgpu_gfx_whole_wave calling convention", - &Call); - - Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call); - - Check(Call.arg_size() == F->arg_size(), - "Call argument count must match callee argument count", &Call); - - // The first argument of the call is the callee, and the first argument of - // the callee is the active mask. The rest of the arguments must match. - Check(F->arg_begin()->getType()->isIntegerTy(1), - "Callee must have i1 as its first argument", &Call); - for (auto [CallArg, FuncArg] : - drop_begin(zip_equal(Call.args(), F->args()))) { - Check(CallArg->getType() == FuncArg.getType(), - "Argument types must match", &Call); - - // Check that inreg attributes match between call site and function - Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) == - FuncArg.hasInRegAttr(), - "Argument inreg attributes must match", &Call); - } - break; - } - case Intrinsic::amdgcn_s_prefetch_data: { - Check( - AMDGPU::isFlatGlobalAddrSpace( - Call.getArgOperand(0)->getType()->getPointerAddressSpace()), - "llvm.amdgcn.s.prefetch.data only supports global or constant memory"); - break; - } - case Intrinsic::amdgcn_load_to_lds: - case Intrinsic::amdgcn_load_async_to_lds: - case Intrinsic::amdgcn_global_load_lds: - case Intrinsic::amdgcn_global_load_async_lds: - case Intrinsic::amdgcn_raw_buffer_load_lds: - case Intrinsic::amdgcn_raw_buffer_load_async_lds: - case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: - case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds: - case Intrinsic::amdgcn_struct_buffer_load_lds: - case Intrinsic::amdgcn_struct_buffer_load_async_lds: - case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: - case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: { - // The data byte size immarg is operand 2 for every load-to-LDS intrinsic. - uint64_t Size = cast(Call.getArgOperand(2))->getZExtValue(); - Check(Size == 1 || Size == 2 || Size == 4 || Size == 12 || Size == 16, - "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, " - "or 16", - &Call); - break; - } - case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: - case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { - Value *Src0 = Call.getArgOperand(0); - Value *Src1 = Call.getArgOperand(1); - - uint64_t CBSZ = cast(Call.getArgOperand(3))->getZExtValue(); - uint64_t BLGP = cast(Call.getArgOperand(4))->getZExtValue(); - Check(CBSZ <= 4, "invalid value for cbsz format", Call, - Call.getArgOperand(3)); - Check(BLGP <= 4, "invalid value for blgp format", Call, - Call.getArgOperand(4)); - - // AMDGPU::MFMAScaleFormats values - auto getFormatNumRegs = [](unsigned FormatVal) { - switch (FormatVal) { - case 0: - case 1: - return 8u; - case 2: - case 3: - return 6u; - case 4: - return 4u; - default: - llvm_unreachable("invalid format value"); - } - }; - - auto isValidSrcASrcBVector = [](FixedVectorType *Ty) { - if (!Ty || !Ty->getElementType()->isIntegerTy(32)) - return false; - unsigned NumElts = Ty->getNumElements(); - return NumElts == 4 || NumElts == 6 || NumElts == 8; - }; - - auto *Src0Ty = dyn_cast(Src0->getType()); - auto *Src1Ty = dyn_cast(Src1->getType()); - Check(isValidSrcASrcBVector(Src0Ty), - "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0); - Check(isValidSrcASrcBVector(Src1Ty), - "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1); - - // Permit excess registers for the format. - Check(Src0Ty->getNumElements() >= getFormatNumRegs(CBSZ), - "invalid vector type for format", &Call, Src0, Call.getArgOperand(3)); - Check(Src1Ty->getNumElements() >= getFormatNumRegs(BLGP), - "invalid vector type for format", &Call, Src1, Call.getArgOperand(5)); - break; - } - case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4: - case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4: - case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: { - Value *Src0 = Call.getArgOperand(1); - Value *Src1 = Call.getArgOperand(3); - - unsigned FmtA = cast(Call.getArgOperand(0))->getZExtValue(); - unsigned FmtB = cast(Call.getArgOperand(2))->getZExtValue(); - Check(FmtA <= 4, "invalid value for matrix format", Call, - Call.getArgOperand(0)); - Check(FmtB <= 4, "invalid value for matrix format", Call, - Call.getArgOperand(2)); - - // AMDGPU::MatrixFMT values - auto getFormatNumRegs = [](unsigned FormatVal) { - switch (FormatVal) { - case 0: - case 1: - return 16u; - case 2: - case 3: - return 12u; - case 4: - return 8u; - default: - llvm_unreachable("invalid format value"); - } - }; - - auto isValidSrcASrcBVector = [](FixedVectorType *Ty) { - if (!Ty || !Ty->getElementType()->isIntegerTy(32)) - return false; - unsigned NumElts = Ty->getNumElements(); - return NumElts == 16 || NumElts == 12 || NumElts == 8; - }; - - auto *Src0Ty = dyn_cast(Src0->getType()); - auto *Src1Ty = dyn_cast(Src1->getType()); - Check(isValidSrcASrcBVector(Src0Ty), - "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0); - Check(isValidSrcASrcBVector(Src1Ty), - "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1); - - // Permit excess registers for the format. - Check(Src0Ty->getNumElements() >= getFormatNumRegs(FmtA), - "invalid vector type for format", &Call, Src0, Call.getArgOperand(0)); - Check(Src1Ty->getNumElements() >= getFormatNumRegs(FmtB), - "invalid vector type for format", &Call, Src1, Call.getArgOperand(2)); - break; - } - case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: - case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: - case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: - case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: - case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: - case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { - // Check we only use this intrinsic on the FLAT or GLOBAL address spaces. - Value *PtrArg = Call.getArgOperand(0); - const unsigned AS = PtrArg->getType()->getPointerAddressSpace(); - Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS, - "cooperative atomic intrinsics require a generic or global pointer", - &Call, PtrArg); - - // Last argument must be a MD string - auto *Op = cast(Call.getArgOperand(Call.arg_size() - 1)); - MDNode *MD = cast(Op->getMetadata()); - Check((MD->getNumOperands() == 1) && isa(MD->getOperand(0)), - "cooperative atomic intrinsics require that the last argument is a " - "metadata string", - &Call, Op); - break; - } - case Intrinsic::amdgcn_av_load_b128: - case Intrinsic::amdgcn_av_store_b128: { - // Last argument must be a MD string - auto *Op = cast(Call.getArgOperand(Call.arg_size() - 1)); - auto *MD = dyn_cast(Op->getMetadata()); - Check(MD && (MD->getNumOperands() == 1) && isa(MD->getOperand(0)), - "the last argument to av load/store intrinsics must be a " - "metadata string", - &Call, Op); - break; - } case Intrinsic::nvvm_setmaxnreg_inc_sync_aligned_u32: case Intrinsic::nvvm_setmaxnreg_dec_sync_aligned_u32: { Value *V = Call.getArgOperand(0); @@ -7531,6 +7021,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) { Check(HasToken, "Missing funclet token on intrinsic call", &Call); } } + + // Target-specific intrinsic call checks. + verifyAMDGPUIntrinsicCall(*this, ID, Call); } /// Carefully grab the subprogram from a local scope. diff --git a/llvm/lib/IR/VerifierAMDGPU.cpp b/llvm/lib/IR/VerifierAMDGPU.cpp new file mode 100644 index 0000000000000..04cb214ef2520 --- /dev/null +++ b/llvm/lib/IR/VerifierAMDGPU.cpp @@ -0,0 +1,406 @@ +//===-- VerifierAMDGPU.cpp - AMDGPU-specific IR verification ---------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains AMDGPU-specific IR verification logic that was extracted +// from Verifier.cpp for code organization purposes only. These checks are +// always compiled and linked as part of LLVMCore — this is not a target- +// dependent IR verifier, which would require a different design. +// +// This file should only contain checks for AMDGPU-specific IR constructs +// (e.g. amdgcn intrinsics, AMDGPU address spaces). It must not contain +// checks for generic IR that might behave differently under AMDGPU. +// +//===----------------------------------------------------------------------===// + +#include "VerifierInternal.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/Support/AMDGPUAddrSpace.h" + +using namespace llvm; + +#define Check(C, ...) \ + do { \ + if (!(C)) { \ + VS.CheckFailed(__VA_ARGS__); \ + return; \ + } \ + } while (false) + +void llvm::verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID, + Module::ModFlagBehavior MFB, + const MDNode *Op) { + if (ID->getString() != "amdgpu.buffer.oob.mode" && + ID->getString() != "amdgpu.tbuffer.oob.mode") + return; + + Check(MFB == Module::Max, + "'" + ID->getString() + "' module flag must use 'max' merge behaviour"); + ConstantInt *Value = + mdconst::dyn_extract_or_null(Op->getOperand(2)); + Check(Value, "'" + ID->getString() + + "' module flag must have a constant integer value"); + Check(Value->getZExtValue() <= 2, + "'" + ID->getString() + "' module flag must be 0, 1, or 2"); +} + +// Verify that when a function has !reqd_work_group_size metadata, it also has +// an amdgpu-flat-work-group-size attribute that matches the product of the +// reqd_work_group_size operands. +static void verifyAMDGPUReqdWorkGroupSize(VerifierSupport &VS, + const Function &F) { + // This is not required for other targets so we only check for AMDGPU. + if (!VS.TT.isAMDGPU()) + return; + + MDNode *ReqdWorkGroupSize = F.getMetadata("reqd_work_group_size"); + if (!ReqdWorkGroupSize || ReqdWorkGroupSize->getNumOperands() != 3) + return; + + uint64_t Product = 1; + for (const MDOperand &Op : ReqdWorkGroupSize->operands()) { + ConstantInt *C = mdconst::dyn_extract(Op); + if (!C || C->getValue().getActiveBits() > 64) + return; + uint64_t Dim = C->getZExtValue(); + if (Dim != 0 && Product > std::numeric_limits::max() / Dim) + return; + Product *= Dim; + } + + Attribute FlatWorkGroupSize = F.getFnAttribute("amdgpu-flat-work-group-size"); + if (!FlatWorkGroupSize.isValid()) { + VS.CheckFailed("reqd_work_group_size requires amdgpu-flat-work-group-size", + &F, ReqdWorkGroupSize); + return; + } + + if (!FlatWorkGroupSize.isStringAttribute()) { + VS.CheckFailed("amdgpu-flat-work-group-size must be a string attribute", + &F); + return; + } + + StringRef AttrValue = FlatWorkGroupSize.getValueAsString(); + std::pair Values = AttrValue.split(','); + uint64_t Min = 0; + uint64_t Max = 0; + bool Parsed = !Values.second.contains(',') && + llvm::to_integer(Values.first.trim(), Min) && + llvm::to_integer(Values.second.trim(), Max); + if (!Parsed) { + VS.CheckFailed("amdgpu-flat-work-group-size must be a pair of unsigned " + "integers", + &F); + return; + } + + if (Min != Product || Max != Product) { + VS.CheckFailed("amdgpu-flat-work-group-size must equal the product of " + "reqd_work_group_size operands", + &F, ReqdWorkGroupSize); + } +} + +void llvm::verifyAMDGPUFunctionMetadata(VerifierSupport &VS, + const Function &F) { + verifyAMDGPUReqdWorkGroupSize(VS, F); +} + +void llvm::verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI) { + // This is not required for other targets so we only check for AMDGPU. + if (!VS.TT.isAMDGPU()) + return; + + if (AI.getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + VS.CheckFailed("alloca on amdgpu must be in addrspace(5)", &AI); +} + +bool llvm::isAMDGPUCallBrIntrinsic(Intrinsic::ID ID) { + switch (ID) { + default: + return false; + case Intrinsic::amdgcn_kill: + return true; + } +} + +void llvm::verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID, + CallBase &Call) { + switch (ID) { + default: + return; + case Intrinsic::amdgcn_kill: { + if (auto *CBI = dyn_cast(&Call)) { + Check(CBI->getNumIndirectDests() == 1, + "callbr amdgcn_kill only supports one indirect dest"); + bool Unreachable = isa(CBI->getIndirectDest(0)->begin()); + CallInst *CI = dyn_cast(CBI->getIndirectDest(0)->begin()); + Check(Unreachable || + (CI && CI->getIntrinsicID() == Intrinsic::amdgcn_unreachable), + "callbr amdgcn_kill indirect dest needs to be unreachable"); + } + break; + } + case Intrinsic::amdgcn_cs_chain: { + CallingConv::ID CallerCC = Call.getCaller()->getCallingConv(); + switch (CallerCC) { + case CallingConv::AMDGPU_CS: + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + case CallingConv::AMDGPU_ES: + case CallingConv::AMDGPU_GS: + case CallingConv::AMDGPU_HS: + case CallingConv::AMDGPU_LS: + case CallingConv::AMDGPU_VS: + break; + default: + VS.CheckFailed("Intrinsic cannot be called from functions with this " + "calling convention", + &Call); + break; + } + + Check(Call.paramHasAttr(2, Attribute::InReg), + "SGPR arguments must have the `inreg` attribute", &Call); + Check(!Call.paramHasAttr(3, Attribute::InReg), + "VGPR arguments must not have the `inreg` attribute", &Call); + + ConstantInt *FlagsArg = cast(Call.getArgOperand(4)); + Check(FlagsArg->getValue().ult(2), + "flags must be 0 or 1 for llvm.amdgcn.cs.chain", &Call); + + Instruction *Next = Call.getNextNode(); + bool IsAMDUnreachable = isa_and_nonnull(Next) && + cast(Next)->getIntrinsicID() == + Intrinsic::amdgcn_unreachable; + Check(Next && (isa(Next) || IsAMDUnreachable), + "llvm.amdgcn.cs.chain must be followed by unreachable", &Call); + break; + } + case Intrinsic::amdgcn_init_exec_from_input: { + const Argument *Arg = dyn_cast(Call.getOperand(0)); + Check(Arg && Arg->hasInRegAttr(), + "only inreg arguments to the parent function are valid as inputs to " + "this intrinsic", + &Call); + break; + } + case Intrinsic::amdgcn_set_inactive_chain_arg: { + CallingConv::ID CallerCC = Call.getCaller()->getCallingConv(); + switch (CallerCC) { + case CallingConv::AMDGPU_CS_Chain: + case CallingConv::AMDGPU_CS_ChainPreserve: + break; + default: + VS.CheckFailed("Intrinsic can only be used from functions with the " + "amdgpu_cs_chain or amdgpu_cs_chain_preserve " + "calling conventions", + &Call); + break; + } + + unsigned InactiveIdx = 1; + Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg), + "Value for inactive lanes must not have the `inreg` attribute", + &Call); + Check(isa(Call.getArgOperand(InactiveIdx)), + "Value for inactive lanes must be a function argument", &Call); + Check(!cast(Call.getArgOperand(InactiveIdx))->hasInRegAttr(), + "Value for inactive lanes must be a VGPR function argument", &Call); + break; + } + case Intrinsic::amdgcn_call_whole_wave: { + Function *F = dyn_cast(Call.getArgOperand(0)); + Check(F, "Indirect whole wave calls are not allowed", &Call); + + CallingConv::ID CC = F->getCallingConv(); + Check(CC == CallingConv::AMDGPU_Gfx_WholeWave, + "Callee must have the amdgpu_gfx_whole_wave calling convention", + &Call); + + Check(!F->isVarArg(), "Variadic whole wave calls are not allowed", &Call); + + Check(Call.arg_size() == F->arg_size(), + "Call argument count must match callee argument count", &Call); + + Check(F->arg_begin()->getType()->isIntegerTy(1), + "Callee must have i1 as its first argument", &Call); + for (auto [CallArg, FuncArg] : + drop_begin(zip_equal(Call.args(), F->args()))) { + Check(CallArg->getType() == FuncArg.getType(), + "Argument types must match", &Call); + + Check(Call.paramHasAttr(FuncArg.getArgNo(), Attribute::InReg) == + FuncArg.hasInRegAttr(), + "Argument inreg attributes must match", &Call); + } + break; + } + case Intrinsic::amdgcn_s_prefetch_data: { + Check( + AMDGPU::isFlatGlobalAddrSpace( + Call.getArgOperand(0)->getType()->getPointerAddressSpace()), + "llvm.amdgcn.s.prefetch.data only supports global or constant memory"); + break; + } + case Intrinsic::amdgcn_load_to_lds: + case Intrinsic::amdgcn_load_async_to_lds: + case Intrinsic::amdgcn_global_load_lds: + case Intrinsic::amdgcn_global_load_async_lds: + case Intrinsic::amdgcn_raw_buffer_load_lds: + case Intrinsic::amdgcn_raw_buffer_load_async_lds: + case Intrinsic::amdgcn_raw_ptr_buffer_load_lds: + case Intrinsic::amdgcn_raw_ptr_buffer_load_async_lds: + case Intrinsic::amdgcn_struct_buffer_load_lds: + case Intrinsic::amdgcn_struct_buffer_load_async_lds: + case Intrinsic::amdgcn_struct_ptr_buffer_load_lds: + case Intrinsic::amdgcn_struct_ptr_buffer_load_async_lds: { + uint64_t Size = cast(Call.getArgOperand(2))->getZExtValue(); + Check(Size == 1 || Size == 2 || Size == 4 || Size == 12 || Size == 16, + "invalid data size for load-to-LDS intrinsic; must be 1, 2, 4, 12, " + "or 16", + &Call); + break; + } + case Intrinsic::amdgcn_mfma_scale_f32_16x16x128_f8f6f4: + case Intrinsic::amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { + Value *Src0 = Call.getArgOperand(0); + Value *Src1 = Call.getArgOperand(1); + + uint64_t CBSZ = cast(Call.getArgOperand(3))->getZExtValue(); + uint64_t BLGP = cast(Call.getArgOperand(4))->getZExtValue(); + Check(CBSZ <= 4, "invalid value for cbsz format", Call, + Call.getArgOperand(3)); + Check(BLGP <= 4, "invalid value for blgp format", Call, + Call.getArgOperand(4)); + + auto GetFormatNumRegs = [](unsigned FormatVal) { + switch (FormatVal) { + case 0: + case 1: + return 8u; + case 2: + case 3: + return 6u; + case 4: + return 4u; + default: + llvm_unreachable("invalid format value"); + } + }; + + auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) { + if (!Ty || !Ty->getElementType()->isIntegerTy(32)) + return false; + unsigned NumElts = Ty->getNumElements(); + return NumElts == 4 || NumElts == 6 || NumElts == 8; + }; + + FixedVectorType *Src0Ty = dyn_cast(Src0->getType()); + FixedVectorType *Src1Ty = dyn_cast(Src1->getType()); + Check(IsValidSrcASrcBVector(Src0Ty), + "operand 0 must be 4, 6 or 8 element i32 vector", &Call, Src0); + Check(IsValidSrcASrcBVector(Src1Ty), + "operand 1 must be 4, 6 or 8 element i32 vector", &Call, Src1); + + Check(Src0Ty->getNumElements() >= GetFormatNumRegs(CBSZ), + "invalid vector type for format", &Call, Src0, Call.getArgOperand(3)); + Check(Src1Ty->getNumElements() >= GetFormatNumRegs(BLGP), + "invalid vector type for format", &Call, Src1, Call.getArgOperand(5)); + break; + } + case Intrinsic::amdgcn_wmma_f32_16x16x128_f8f6f4: + case Intrinsic::amdgcn_wmma_scale_f32_16x16x128_f8f6f4: + case Intrinsic::amdgcn_wmma_scale16_f32_16x16x128_f8f6f4: { + Value *Src0 = Call.getArgOperand(1); + Value *Src1 = Call.getArgOperand(3); + + unsigned FmtA = cast(Call.getArgOperand(0))->getZExtValue(); + unsigned FmtB = cast(Call.getArgOperand(2))->getZExtValue(); + Check(FmtA <= 4, "invalid value for matrix format", Call, + Call.getArgOperand(0)); + Check(FmtB <= 4, "invalid value for matrix format", Call, + Call.getArgOperand(2)); + + auto GetFormatNumRegs = [](unsigned FormatVal) { + switch (FormatVal) { + case 0: + case 1: + return 16u; + case 2: + case 3: + return 12u; + case 4: + return 8u; + default: + llvm_unreachable("invalid format value"); + } + }; + + auto IsValidSrcASrcBVector = [](FixedVectorType *Ty) { + if (!Ty || !Ty->getElementType()->isIntegerTy(32)) + return false; + unsigned NumElts = Ty->getNumElements(); + return NumElts == 16 || NumElts == 12 || NumElts == 8; + }; + + FixedVectorType *Src0Ty = dyn_cast(Src0->getType()); + FixedVectorType *Src1Ty = dyn_cast(Src1->getType()); + Check(IsValidSrcASrcBVector(Src0Ty), + "operand 1 must be 8, 12 or 16 element i32 vector", &Call, Src0); + Check(IsValidSrcASrcBVector(Src1Ty), + "operand 3 must be 8, 12 or 16 element i32 vector", &Call, Src1); + + Check(Src0Ty->getNumElements() >= GetFormatNumRegs(FmtA), + "invalid vector type for format", &Call, Src0, Call.getArgOperand(0)); + Check(Src1Ty->getNumElements() >= GetFormatNumRegs(FmtB), + "invalid vector type for format", &Call, Src1, Call.getArgOperand(2)); + break; + } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: { + Value *PtrArg = Call.getArgOperand(0); + const unsigned AS = PtrArg->getType()->getPointerAddressSpace(); + Check(AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS, + "cooperative atomic intrinsics require a generic or global pointer", + &Call, PtrArg); + + MetadataAsValue *Op = + cast(Call.getArgOperand(Call.arg_size() - 1)); + MDNode *MD = cast(Op->getMetadata()); + Check((MD->getNumOperands() == 1) && isa(MD->getOperand(0)), + "cooperative atomic intrinsics require that the last argument is a " + "metadata string", + &Call, Op); + break; + } + case Intrinsic::amdgcn_av_load_b128: + case Intrinsic::amdgcn_av_store_b128: { + MetadataAsValue *Op = + cast(Call.getArgOperand(Call.arg_size() - 1)); + MDNode *MD = dyn_cast(Op->getMetadata()); + Check(MD && (MD->getNumOperands() == 1) && isa(MD->getOperand(0)), + "the last argument to av load/store intrinsics must be a " + "metadata string", + &Call, Op); + break; + } + } +} + +#undef Check diff --git a/llvm/lib/IR/VerifierInternal.h b/llvm/lib/IR/VerifierInternal.h new file mode 100644 index 0000000000000..922385230179b --- /dev/null +++ b/llvm/lib/IR/VerifierInternal.h @@ -0,0 +1,233 @@ +//===-- VerifierInternal.h - Internal verifier infrastructure --------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Shared definitions used by the verifier implementation files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_IR_VERIFIERINTERNAL_H +#define LLVM_LIB_IR_VERIFIERINTERNAL_H + +#include "llvm/ADT/Twine.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ModuleSlotTracker.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Printable.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/Triple.h" + +namespace llvm { + +struct VerifierSupport { + raw_ostream *OS; + const Module &M; + ModuleSlotTracker MST; + const Triple &TT; + const DataLayout &DL; + LLVMContext &Context; + + /// Track the brokenness of the module while recursively visiting. + bool Broken = false; + /// Broken debug info can be "recovered" from by stripping the debug info. + bool BrokenDebugInfo = false; + /// Whether to treat broken debug info as an error. + bool TreatBrokenDebugInfoAsError = true; + + explicit VerifierSupport(raw_ostream *OS, const Module &M) + : OS(OS), M(M), MST(&M), TT(M.getTargetTriple()), DL(M.getDataLayout()), + Context(M.getContext()) {} + +private: + void Write(const Module *M) { + *OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n"; + } + + void Write(const Value *V) { + if (V) + Write(*V); + } + + void Write(const Value &V) { + if (isa(V)) { + V.print(*OS, MST); + *OS << '\n'; + } else { + V.printAsOperand(*OS, true, MST); + *OS << '\n'; + } + } + + void Write(const DbgRecord *DR) { + if (DR) { + DR->print(*OS, MST, false); + *OS << '\n'; + } + } + + void Write(DbgVariableRecord::LocationType Type) { + switch (Type) { + case DbgVariableRecord::LocationType::Value: + *OS << "value"; + break; + case DbgVariableRecord::LocationType::Declare: + *OS << "declare"; + break; + case DbgVariableRecord::LocationType::DeclareValue: + *OS << "declare_value"; + break; + case DbgVariableRecord::LocationType::Assign: + *OS << "assign"; + break; + case DbgVariableRecord::LocationType::End: + *OS << "end"; + break; + case DbgVariableRecord::LocationType::Any: + *OS << "any"; + break; + }; + } + + void Write(const Metadata *MD) { + if (!MD) + return; + MD->print(*OS, MST, &M); + *OS << '\n'; + } + + template void Write(const MDTupleTypedArrayWrapper &MD) { + Write(MD.get()); + } + + void Write(const NamedMDNode *NMD) { + if (!NMD) + return; + NMD->print(*OS, MST); + *OS << '\n'; + } + + void Write(Type *T) { + if (!T) + return; + *OS << ' ' << *T; + } + + void Write(const Comdat *C) { + if (!C) + return; + *OS << *C; + } + + void Write(const APInt *AI) { + if (!AI) + return; + *OS << *AI << '\n'; + } + + void Write(const unsigned i) { *OS << i << '\n'; } + + // NOLINTNEXTLINE(readability-identifier-naming) + void Write(const Attribute *A) { + if (!A) + return; + *OS << A->getAsString() << '\n'; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + void Write(const AttributeSet *AS) { + if (!AS) + return; + *OS << AS->getAsString() << '\n'; + } + + // NOLINTNEXTLINE(readability-identifier-naming) + void Write(const AttributeList *AL) { + if (!AL) + return; + AL->print(*OS); + } + + void Write(Printable P) { *OS << P << '\n'; } + + template void Write(ArrayRef Vs) { + for (const T &V : Vs) + Write(V); + } + + template + void WriteTs(const T1 &V1, const Ts &...Vs) { + Write(V1); + WriteTs(Vs...); + } + + template void WriteTs() {} + +public: + /// A check failed, so printout out the condition and the message. + /// + /// This provides a nice place to put a breakpoint if you want to see why + /// something is not correct. + void CheckFailed(const Twine &Message) { + if (OS) + *OS << Message << '\n'; + Broken = true; + } + + /// A check failed (with values to print). + /// + /// This calls the Message-only version so that the above is easier to set a + /// breakpoint on. + template + void CheckFailed(const Twine &Message, const T1 &V1, const Ts &...Vs) { + CheckFailed(Message); + if (OS) + WriteTs(V1, Vs...); + } + + /// A debug info check failed. + void DebugInfoCheckFailed(const Twine &Message) { + if (OS) + *OS << Message << '\n'; + Broken |= TreatBrokenDebugInfoAsError; + BrokenDebugInfo = true; + } + + /// A debug info check failed (with values to print). + template + void DebugInfoCheckFailed(const Twine &Message, const T1 &V1, + const Ts &...Vs) { + DebugInfoCheckFailed(Message); + if (OS) + WriteTs(V1, Vs...); + } +}; + +//============================================================================== +// AMDGPU-specific verification functions + +void verifyAMDGPUModuleFlag(VerifierSupport &VS, const MDString *ID, + Module::ModFlagBehavior MFB, const MDNode *Op); + +void verifyAMDGPUFunctionMetadata(VerifierSupport &VS, const Function &F); + +void verifyAMDGPUAlloca(VerifierSupport &VS, const AllocaInst &AI); + +void verifyAMDGPUIntrinsicCall(VerifierSupport &VS, Intrinsic::ID ID, + CallBase &Call); + +bool isAMDGPUCallBrIntrinsic(Intrinsic::ID ID); + +//============================================================================== + +} // namespace llvm + +#endif // LLVM_LIB_IR_VERIFIERINTERNAL_H diff --git a/llvm/test/Verifier/callbr-intrinsic.ll b/llvm/test/Verifier/callbr-intrinsic.ll index 5cc9e7034e2bc..eec73fb796698 100644 --- a/llvm/test/Verifier/callbr-intrinsic.ll +++ b/llvm/test/Verifier/callbr-intrinsic.ll @@ -2,7 +2,7 @@ declare void @llvm.amdgcn.kill(i1) -; CHECK: Callbr amdgcn_kill only supports one indirect dest +; CHECK: callbr amdgcn_kill only supports one indirect dest define void @test_callbr_intrinsic_indirect0(i1 %c) { callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [] kill: @@ -11,7 +11,7 @@ cont: ret void } -; CHECK-NEXT: Callbr amdgcn_kill only supports one indirect dest +; CHECK-NEXT: callbr amdgcn_kill only supports one indirect dest define void @test_callbr_intrinsic_indirect2(i1 %c) { callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill1, label %kill2] kill1: @@ -22,7 +22,7 @@ cont: ret void } -; CHECK-NEXT: Callbr amdgcn_kill indirect dest needs to be unreachable +; CHECK-NEXT: callbr amdgcn_kill indirect dest needs to be unreachable define void @test_callbr_intrinsic_no_unreachable(i1 %c) { callbr void @llvm.amdgcn.kill(i1 %c) to label %cont [label %kill] kill: @@ -31,7 +31,7 @@ cont: ret void } -; CHECK-NEXT: Callbr currently only supports asm-goto and selected intrinsics +; CHECK-NEXT: callbr currently only supports asm-goto and selected intrinsics declare i32 @llvm.amdgcn.workitem.id.x() define void @test_callbr_intrinsic_unsupported() { callbr i32 @llvm.amdgcn.workitem.id.x() to label %cont [] @@ -39,7 +39,7 @@ cont: ret void } -; CHECK-NEXT: Callbr: indirect function / invalid signature +; CHECK-NEXT: callbr: indirect function / invalid signature define void @test_callbr_intrinsic_wrong_signature(ptr %ptr) { %func = load ptr, ptr %ptr, align 8 callbr void %func() to label %cont [] @@ -47,7 +47,7 @@ cont: ret void } -; CHECK-NEXT: Callbr for intrinsics currently doesn't support operand bundles +; CHECK-NEXT: callbr for intrinsics currently doesn't support operand bundles define void @test_callbr_intrinsic_no_operand_bundles(i1 %c) { callbr void @llvm.amdgcn.kill(i1 %c) [ "foo"(i1 %c) ] to label %cont [label %kill] kill: diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn index b91e7f72cf712..1135884bdfd68 100644 --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -95,5 +95,6 @@ static_library("IR") { "ValueSymbolTable.cpp", "VectorTypeUtils.cpp", "Verifier.cpp", + "VerifierAMDGPU.cpp", ] } From a386da560e7fa2eb3fcf9c293a9ae300b863a05e Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Mon, 22 Jun 2026 11:25:35 -0700 Subject: [PATCH 057/511] [flang][cuda] Apply implicit managed attribute to pointer variables under -gpu=mem:managed (#204634) When -gpu=mem:managed is active with CUDA Fortran enabled, only allocatable variables were implicitly given the managed CUDA data attribute. Pointer variables were left without it, causing their allocations to use host memory instead of cudaMallocManaged. This patch extends the implicit managed attribute in FinishSpecificationPart to also cover pointer symbols. A LanguageFeature::CUDA guard is added so the attribute is only applied when CUDA Fortran semantics are active. The implicit pinned attribute (-gpu=mem:pinned) remains allocatable-only. --- flang/lib/Semantics/resolve-names.cpp | 14 ++++++++------ flang/test/Lower/CUDA/cuda-gpu-managed.cuf | 13 ++++++------- 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index 6d2d0bf24b194..c6800e15be9dc 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -10397,10 +10397,11 @@ void ResolveNamesVisitor::FinishSpecificationPart( } if (auto *object{symbol.detailsIf()}) { - if (IsAllocatable(symbol) && !object->cudaDataAttr()) { - // Implicitly treat allocatable arrays as managed when feature is - // enabled. This is done after all explicit CUDA attributes have been - // processed. Only applies when CUDA Fortran is enabled; otherwise + if ((IsAllocatable(symbol) || IsPointer(symbol)) && + !object->cudaDataAttr()) { + // Implicitly treat allocatable/pointer arrays as managed when feature + // is enabled. This is done after all explicit CUDA attributes have + // been processed. Only applies when CUDA Fortran is enabled; otherwise // -gpu=mem:managed on a non-CUDA-Fortran translation unit (e.g. pure // OpenACC) would incorrectly route every allocatable through the CUDA // Fortran managed descriptor pipeline. @@ -10411,8 +10412,9 @@ void ResolveNamesVisitor::FinishSpecificationPart( object->set_cudaDataAttr(common::CUDADataAttr::Managed); // Implicitly treat allocatable arrays as pinned when feature is // enabled. - else if (context().languageFeatures().IsEnabled( - common::LanguageFeature::CudaPinned)) + else if (IsAllocatable(symbol) && + context().languageFeatures().IsEnabled( + common::LanguageFeature::CudaPinned)) object->set_cudaDataAttr(common::CUDADataAttr::Pinned); } } diff --git a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf index a9ae310978e19..e9b2df1d71ed5 100644 --- a/flang/test/Lower/CUDA/cuda-gpu-managed.cuf +++ b/flang/test/Lower/CUDA/cuda-gpu-managed.cuf @@ -65,20 +65,19 @@ end subroutine ! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_explicit_managedEm"} ! ----------------------------------------------------------------------------- -! Test 5: Pointer variables are NOT affected by -gpu=managed +! Test 5: Pointer variables are also implicitly managed with -gpu=managed ! ----------------------------------------------------------------------------- -subroutine test_pointer_not_managed() +subroutine test_pointer_managed() real, pointer :: ptr(:) allocate(ptr(100)) ptr = 1.0 deallocate(ptr) end subroutine -! CHECK-LABEL: func.func @_QPtest_pointer_not_managed() -! CHECK: %[[BOX:.*]] = fir.alloca !fir.box>> {bindc_name = "ptr", uniq_name = "_QFtest_pointer_not_managedEptr"} -! CHECK-NOT: data_attr = #cuf.cuda -! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_pointer_not_managedEptr"} -! CHECK: fir.call @_FortranAPointerAllocate +! CHECK-LABEL: func.func @_QPtest_pointer_managed() +! CHECK: %[[BOX:.*]] = cuf.alloc !fir.box>> {bindc_name = "ptr", data_attr = #cuf.cuda, uniq_name = "_QFtest_pointer_managedEptr"} +! CHECK: %[[BOX_DECL:.*]]:2 = hlfir.declare %[[BOX]] {data_attr = #cuf.cuda, fortran_attrs = #fir.var_attrs, uniq_name = "_QFtest_pointer_managedEptr"} +! CHECK: cuf.allocate %[[BOX_DECL]]#0 : {{.*}} {data_attr = #cuf.cuda, pointer} ! ----------------------------------------------------------------------------- ! Test 6: Multiple allocatables - mix of implicit and explicit From 3af5c779f56a6bf9b372098e42f05a1c68cf1008 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Mon, 22 Jun 2026 11:37:45 -0700 Subject: [PATCH 058/511] [SDAG][LegalizeType] Implement result vector widening for VECTOR_DEINTERLEAVE (#203105) I accidentally found that we haven't implemented result vector widening for `ISD::VECTOR_DEINTERLEAVE`. This patch implements such type legalization. --------- Co-authored-by: Simon Pilgrim Co-authored-by: Craig Topper --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 44 +++++++++++++++++++ .../RISCV/rvv/vector-deinterleave-fixed.ll | 26 +++++++++++ .../CodeGen/RISCV/rvv/vector-deinterleave.ll | 19 ++++++++ 4 files changed, 90 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index a83b964e5cc36..71d3e1c66be86 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1078,6 +1078,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N); SDValue WidenVecRes_VECTOR_REVERSE(SDNode *N); SDValue WidenVecRes_GET_ACTIVE_LANE_MASK(SDNode *N); + void WidenVecRes_VECTOR_DEINTERLEAVE(SDNode *N); SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 80532ed8011f2..9e24ae1807ca1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -5313,6 +5313,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::GET_ACTIVE_LANE_MASK: Res = WidenVecRes_GET_ACTIVE_LANE_MASK(N); break; + case ISD::VECTOR_DEINTERLEAVE: + WidenVecRes_VECTOR_DEINTERLEAVE(N); + break; case ISD::ADD: case ISD::VP_ADD: case ISD::AND: case ISD::VP_AND: @@ -7467,6 +7470,47 @@ SDValue DAGTypeLegalizer::WidenVecRes_GET_ACTIVE_LANE_MASK(SDNode *N) { return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), NVT, N->ops()); } +void DAGTypeLegalizer::WidenVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + ElementCount OrigEC = VT.getVectorElementCount(); + unsigned Factor = N->getNumOperands(); + SDLoc DL(N); + + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + ElementCount WidenEC = WidenVT.getVectorElementCount(); + // We cannot just use the widened operands directly: since they might be + // individually widened, using them directly will result in de-interleaving + // the "padded" lanes that sit in the middle of the vector. Instead, we should + // not just concat but also "re-pack" these operands before extracting new + // operand vectors with the widened type. + EVT PackedWidenVT = EVT::getVectorVT(*DAG.getContext(), EltVT, + WidenEC.multiplyCoefficientBy(Factor)); + SDValue PackedWidenVec = DAG.getUNDEF(PackedWidenVT); + for (unsigned Idx = 0U; Idx < Factor; ++Idx) { + const SDValue Op = N->getOperand(Idx); + // Note that we insert these widened operands with offsets derived from + // the original vector length. + PackedWidenVec = DAG.getInsertSubvector( + DL, PackedWidenVec, GetWidenedVector(Op), + OrigEC.multiplyCoefficientBy(Idx).getKnownMinValue()); + } + + // Extract the new widened operand vectors. + SmallVector NewOps(Factor, SDValue()); + for (unsigned Idx = 0U; Idx < Factor; ++Idx) { + NewOps[Idx] = DAG.getExtractSubvector( + DL, WidenVT, PackedWidenVec, + WidenEC.multiplyCoefficientBy(Idx).getKnownMinValue()); + } + + SmallVector NewVTs(Factor, WidenVT); + SDValue NewRes = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, NewVTs, NewOps); + // Set the widened results manually. + for (unsigned Idx = 0U; Idx < Factor; ++Idx) + SetWidenedVector(SDValue(N, Idx), NewRes.getValue(Idx)); +} + SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { assert(N->getValueType(0).isVector() && N->getOperand(0).getValueType().isVector() && diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index f76b0122e15a9..07fb8675d85e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -208,6 +208,32 @@ define {<2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave3_v2i32_v6i32(<6 x ret {<2 x i32>, <2 x i32>, <2 x i32>} %res } +define {<3 x i32>, <3 x i32>, <3 x i32>} @vector_deinterleave3_v3i32_v9i32(<9 x i32> %v) nounwind { +; CHECK-LABEL: vector_deinterleave3_v3i32_v9i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: vsetivli zero, 4, e32, m4, ta, ma +; CHECK-NEXT: vslidedown.vi v12, v8, 8 +; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma +; CHECK-NEXT: vslidedown.vi v10, v8, 4 +; CHECK-NEXT: vmv1r.v v9, v10 +; CHECK-NEXT: vmv2r.v v10, v12 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs4r.v v8, (a0) +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vlseg3e32.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %d = call {<3 x i32>, <3 x i32>, <3 x i32>} @llvm.vector.deinterleave3(<9 x i32> %v) + ret {<3 x i32>, <3 x i32>, <3 x i32>} %d +} + define {<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>} @vector_deinterleave4_v2i32_v8i32(<8 x i32> %v) nounwind { ; CHECK-LABEL: vector_deinterleave4_v2i32_v8i32: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index a8b637a14f1a9..aa6615061d2ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -505,6 +505,25 @@ define {, , } @vector_dein ret {, , } %retval } +define {, , } @vector_deinterleave3_nxv3i32_nxv9i32( %v) nounwind { +; CHECK-LABEL: vector_deinterleave3_nxv3i32_nxv9i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a0) +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vlseg3e32.v v8, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %d = call {, , } @llvm.vector.deinterleave3( %v) + ret {, , } %d +} define {, , } @vector_deinterleave_nxv2i64_nxv6i64( %vec) nounwind { ; CHECK-LABEL: vector_deinterleave_nxv2i64_nxv6i64: From 01bfc80347c77e805172ba89bc379441b94da8e6 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 22 Jun 2026 11:39:28 -0700 Subject: [PATCH 059/511] [llvm-profgen] Enable all AArch64 instructions for disassembly (#204619) llvm-profgen builds its MCSubtargetInfo from `ObjectFile::getFeatures()`. For AArch64 ELF objects this often produces an empty feature set, so the disassembler falls back to the baseline Armv8.0-A ISA and rejects valid feature-gated instructions such as LSE atomics and RCPC loads. `llvm-objdump` already handles this by [adding +all for AArch64 disassembly](https://github.com/llvm/llvm-project/blob/1e2d1bbc12f6a5f5931c77d39894ee1b8679f5f8/llvm/tools/llvm-objdump/llvm-objdump.cpp#L2823-L2824) when neither -mattr nor -mcpu is specified. Match that behavior in `llvm-profgen` so valid AArch64 instructions are not reported as invalid and their addresses are preserved in profgen's code and branch maps. Add a regression test covering an AArch64 binary containing `ldaddal` and `ldapr` without object-level feature metadata. --------- Co-authored-by: Kunal Pathak --- .../aarch64-disassemble-all-features.test | 46 +++++++++++++++++++ llvm/tools/llvm-profgen/ProfiledBinary.cpp | 10 ++++ 2 files changed, 56 insertions(+) create mode 100644 llvm/test/tools/llvm-profgen/aarch64-disassemble-all-features.test diff --git a/llvm/test/tools/llvm-profgen/aarch64-disassemble-all-features.test b/llvm/test/tools/llvm-profgen/aarch64-disassemble-all-features.test new file mode 100644 index 0000000000000..b4c185bbd431d --- /dev/null +++ b/llvm/test/tools/llvm-profgen/aarch64-disassemble-all-features.test @@ -0,0 +1,46 @@ +# REQUIRES: aarch64-registered-target + +# RUN: yaml2obj %s -o %t.exe +# RUN: llvm-profgen --binary=%t.exe --output=/dev/null --show-disassembly-only 2>&1 \ +# RUN: | FileCheck %s + +# CHECK-NOT: invalid instructions +# CHECK: Disassembly of section .text +# CHECK-NOT: +# CHECK: : +# CHECK-NEXT: {{[0-9a-f]+}}: ldaddal w0, w1, [x2] +# CHECK-NEXT: {{[0-9a-f]+}}: ldapr x3, [x4] +# CHECK-NEXT: {{[0-9a-f]+}}: ret +# CHECK-NOT: + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 + Entry: 0x0000000000401000 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x0000000000401000 + AddressAlign: 0x4 + ## ldaddal w0, w1, [x2] + ## ldapr x3, [x4] + ## ret + Content: 4100E0B883C0BFF8C0035FD6 +ProgramHeaders: + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + VAddr: 0x0000000000400000 + Align: 0x1000 + FirstSec: .text + LastSec: .text +Symbols: + - Name: foo + Type: STT_FUNC + Section: .text + Binding: STB_GLOBAL + Value: 0x0000000000401000 + Size: 0x000000000000000C diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp index ab452d78062e6..08fd6917c9f3a 100644 --- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp +++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp @@ -745,6 +745,16 @@ void ProfiledBinary::setUpDisassembler(const ObjectFile *Obj) { Expected Features = Obj->getFeatures(); if (!Features) exitWithError(Features.takeError(), FileName); + // AArch64 object files do not generally carry complete ISA feature metadata, + // so the subtarget would default to the baseline (Armv8.0-A) feature set. + // That disassembler cannot decode feature-gated instructions (LSE atomics, + // RCPC loads, SVE, ...) that are pervasive in modern AArch64 binaries; they + // would be miscounted as "invalid instructions" and, worse, their addresses + // would be absent from the code/branch maps used for sample attribution. + // Enable all instructions so the disassembler recognizes whatever the + // compiler emitted, matching llvm-objdump's default for AArch64. + if (TheTriple.isAArch64()) + Features->AddFeature("+all"); STI.reset( TheTarget->createMCSubtargetInfo(TheTriple, "", Features->getString())); if (!STI) From 80a02fa4dd769770ee1713885fd3a7e086a2f0d7 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Mon, 22 Jun 2026 14:42:43 -0400 Subject: [PATCH 060/511] Bump minimum required sphinx Python to 3.8 (#203963) There seems to be de-facto use of at least 3.6 in docs, namely: * Use of pathlib (3.4) in various places * Format f-strings (3.6) and used in clang/docs/ghlinks.py I don't see a strong reason to maintain the divide in minimum version between test/docs, especially considering the "FIXME" indicating the 3.0 lower bound was just a guess to begin with. Change-Id: I11e00295ae0a13ec0f1c5cefbb2fdd2db272b152 --- llvm/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index ad69c4b719a3f..cec5462ae02a1 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -1070,7 +1070,7 @@ set(LLVM_PROFDATA_FILE "" CACHE FILEPATH set(LLVM_SPROFDATA_FILE "" CACHE FILEPATH "Sampling profiling data file to use when compiling in order to improve runtime performance.") -if(LLVM_INCLUDE_TESTS) +if(LLVM_INCLUDE_TESTS OR LLVM_ENABLE_SPHINX) # All LLVM Python files should be compatible down to this minimum version. set(LLVM_MINIMUM_PYTHON_VERSION 3.8) else() From 91e17f6a83291b6e0a66cf0df2665f6ede5300e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?I=C3=B1aki=20Amatria=20Barral?= <140811900+inaki-amatria@users.noreply.github.com> Date: Mon, 22 Jun 2026 19:50:12 +0100 Subject: [PATCH 061/511] [llvm-cov] Init `ViewOpts.Colors` before `error()` (#205001) The `commandLineParser` lambda calls `error()` at several points before `ViewOpts.Colors` is set. `error()` uses `ViewOpts.colored_ostream()` which reads `Colors`, triggering undefined behavior (load of uninitialized `bool`). Fix by moving the `Colors` initialization block to just after `ParseCommandLineOptions`, before any `error()` call in the lambda. This ensures error messages are always rendered with properly initialized color settings. --- .../tools/llvm-cov/show-colors-uninit.test | 7 ++++ llvm/tools/llvm-cov/CodeCoverage.cpp | 42 ++++++++++--------- 2 files changed, 30 insertions(+), 19 deletions(-) create mode 100644 llvm/test/tools/llvm-cov/show-colors-uninit.test diff --git a/llvm/test/tools/llvm-cov/show-colors-uninit.test b/llvm/test/tools/llvm-cov/show-colors-uninit.test new file mode 100644 index 0000000000000..73caeb9e8899e --- /dev/null +++ b/llvm/test/tools/llvm-cov/show-colors-uninit.test @@ -0,0 +1,7 @@ +; Regression test: calling `llvm-cov show` without `-instr-profile` or +; `-empty-profile` triggered a UBSan error because `error()` reads `Colors` via +; `colored_ostream()` before `Colors` was initialized. + +; RUN: not llvm-cov show 2>&1 | FileCheck %s + +; CHECK: error: exactly one of -instr-profile and -empty-profile must be specified diff --git a/llvm/tools/llvm-cov/CodeCoverage.cpp b/llvm/tools/llvm-cov/CodeCoverage.cpp index dbba0a3d73726..892d25750004f 100644 --- a/llvm/tools/llvm-cov/CodeCoverage.cpp +++ b/llvm/tools/llvm-cov/CodeCoverage.cpp @@ -814,6 +814,29 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { auto commandLineParser = [&, this](int argc, const char **argv) -> int { cl::ParseCommandLineOptions(argc, argv, "LLVM code coverage tool\n"); ViewOpts.Debug = DebugDump; + + // Initialize `Format` and `Colors` before any call to `error()` or + // `warning()`, which use `ViewOpts.colored_ostream()` and would read + // uninitialized `Colors`. + ViewOpts.Format = Format; + switch (ViewOpts.Format) { + case CoverageViewOptions::OutputFormat::Text: + ViewOpts.Colors = UseColor == cl::boolOrDefault::BOU_UNSET + ? sys::Process::StandardOutHasColors() + : UseColor == cl::boolOrDefault::BOU_TRUE; + break; + case CoverageViewOptions::OutputFormat::HTML: + if (UseColor == cl::boolOrDefault::BOU_FALSE) + errs() << "Color output cannot be disabled when generating html.\n"; + ViewOpts.Colors = true; + break; + case CoverageViewOptions::OutputFormat::Lcov: + if (UseColor == cl::boolOrDefault::BOU_TRUE) + errs() << "Color output cannot be enabled when generating lcov.\n"; + ViewOpts.Colors = false; + break; + } + if (Debuginfod) { HTTPClient::initialize(); BIDFetcher = std::make_unique(DebugFileDirectory); @@ -846,25 +869,6 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) { ::exit(0); } - ViewOpts.Format = Format; - switch (ViewOpts.Format) { - case CoverageViewOptions::OutputFormat::Text: - ViewOpts.Colors = UseColor == cl::boolOrDefault::BOU_UNSET - ? sys::Process::StandardOutHasColors() - : UseColor == cl::boolOrDefault::BOU_TRUE; - break; - case CoverageViewOptions::OutputFormat::HTML: - if (UseColor == cl::boolOrDefault::BOU_FALSE) - errs() << "Color output cannot be disabled when generating html.\n"; - ViewOpts.Colors = true; - break; - case CoverageViewOptions::OutputFormat::Lcov: - if (UseColor == cl::boolOrDefault::BOU_TRUE) - errs() << "Color output cannot be enabled when generating lcov.\n"; - ViewOpts.Colors = false; - break; - } - if (!PathRemaps.empty()) { std::vector> Remappings; From e7b38a448130b81bb3b0482b0e22c9106f212e49 Mon Sep 17 00:00:00 2001 From: "forking-google-bazel-bot[bot]" <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com> Date: Mon, 22 Jun 2026 14:03:36 -0500 Subject: [PATCH 062/511] [Bazel] Fixes 46995fb (#205155) This fixes 46995fb32b999c6332fbd1bdfb84d79cad47195f. Co-authored-by: Google Bazel Bot --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 45d25d06f35a1..f6c845fefc915 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -7351,9 +7351,11 @@ cc_library( "lib/Conversion/MathToXeVM", ], deps = [ + ":Analysis", ":ArithAttrToLLVMConversion", ":ArithDialect", ":ConversionPassIncGen", + ":GPUCommonTransforms", ":IR", ":LLVMCommonConversion", ":LLVMDialect", From 3e3b3382ccdcd8a1737e28f6a2777248312cfd8f Mon Sep 17 00:00:00 2001 From: Larry Meadows Date: Mon, 22 Jun 2026 12:12:49 -0700 Subject: [PATCH 063/511] [PGO][HIP] HSA-introspection device profile drain + GPU PGO tests (#203056) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Follow-up to #202095 (now landed). #202095's host-shadow device-profile drain can only collect device counters for kernels that registered a host-side shadow via `__hipRegisterVar`. Device-linked programs (e.g. RCCL), whose instrumented code objects are linked directly into the device image with no host shadow, are never drained. This adds a **supplemental, Linux-only HSA-introspection drain** that runs after the host-shadow drain: it walks each GPU agent, enumerates only the code objects actually resident there, reads each one's `__llvm_profile_sections` table on the device, and routes them through the existing `processDeviceOffloadPrf()` path so the emitted `.profraw` layout is identical. A content-dedup set keyed on the `(data, counters, names)` device-pointer triple ensures a section already drained by the host-shadow pass is not drained twice, so the two passes compose without double-counting. It is purely additive — it does not modify #202095's host-shadow drain or its launch-tracking. Highlights: - `compiler-rt/lib/profile/InstrProfilingPlatformROCmHSA.cpp`: HSA agent/segment/ symbol walk + dedup; record drained bounds after each host-shadow drain; lazy HSA init (no library constructor, for fork-safety). - Because the HSA walk only touches resident code objects, it lets us avoid the host-shadow drain's collect-all fallback on Linux. When **no** kernel launch was tracked (program never launches, collects before its first launch, or launches only via an untracked API), the host-shadow pass is skipped and the HSA drain covers it safely — instead of faulting/hanging reading a non-resident device on a multi-GPU host. This also closes the silent-data-loss gap for untracked launch APIs (`hipExtLaunchKernel`, cooperative/graph launches). - `clang/lib/Driver/ToolChains/Clang.cpp` / `HIPAMD.cpp`: link the device profile runtime on both the new-offload-driver (`LinkerWrapper::ConstructJob`) and traditional (`lld`) link paths, guarded by `needsProfileRT` + VFS existence. - New GPU/AMDGPU HIP device-PGO lit tests, gated by `hip`/`amdgpu` features (auto-detected from the toolchain + a visible GPU) so they report UNSUPPORTED rather than fail when no GPU is present. Plus GPU-free host unit tests for the device-profile host helpers that run everywhere, including upstream CI. ## Test plan - 4x gfx90a (`gfx90a:sramecc+:xnack-`), ROCm 7.1. - GPU device tests run through standard lit: `llvm-lit -sv /.../compiler-rt/test/profile/Profile-x86_64` over the `GPU/` and `AMDGPU/` subdirectories. The profile `lit.cfg.py` auto-detects a visible GPU (`amdgpu-arch`) and the HIP runtime (`libamdhip64`) and exposes the `hip`/`amdgpu`/`multi-device` features and the `%amdgpu_arch` / `%hip_lib_path` substitutions; both are overridable via `--param amdgpu_arch=… / hip_lib_path=…`. - **15 device tests passed, 0 failed.** Covers: basic/coverage/pgo-use, multiple-kernels, device-branching, multi-gpu and non-default-device drain, early-collect / no-kernel edges, RDC vs non-RDC `__llvm_profile_sections`, dedup (host-shadow drains the used device, HSA finds it and dedups), and fork-safety (the RCCL parent-no-HIP / kernel-in-forked-child pattern). - On a host without a GPU + ROCm/HIP (e.g. upstream CI) those device tests report UNSUPPORTED instead of failing, and the GPU subdirectories serialize via a size-1 `gpu` lit parallelism group when they do run. - GPU-free host unit tests run anywhere the profile suite runs (including upstream CI): `instrprof-rocm-grow-array.cpp` (the dynamic-array helper) and `instrprof-rocm-bounds-dedup.cpp` (the `(data, counters, names)` dedup table that backs the "drain each counter set once" guarantee). - Build is warning-clean and `clang-format` clean. --------- Co-authored-by: Cursor --- compiler-rt/lib/profile/CMakeLists.txt | 26 +- .../profile/InstrProfilingPlatformROCm.cpp | 183 +++---- .../profile/InstrProfilingPlatformROCmHSA.cpp | 516 ++++++++++++++++++ .../InstrProfilingPlatformROCmHSADefs.h | 102 ++++ .../InstrProfilingPlatformROCmInternal.h | 131 +++++ .../test/profile/AMDGPU/device-basic.hip | 67 +++ .../profile/AMDGPU/device-early-collect.hip | 68 +++ .../test/profile/AMDGPU/device-no-kernel.hip | 44 ++ .../test/profile/AMDGPU/device-symbols.hip | 42 ++ .../test/profile/AMDGPU/lit.local.cfg.py | 7 + .../test/profile/GPU/instrprof-hip-basic.hip | 51 ++ .../GPU/instrprof-hip-collect-after.hip | 63 +++ .../GPU/instrprof-hip-counter-correctness.hip | 56 ++ .../profile/GPU/instrprof-hip-coverage.hip | 51 ++ .../GPU/instrprof-hip-device-branching.hip | 67 +++ .../profile/GPU/instrprof-hip-fork-safety.hip | 61 +++ .../profile/GPU/instrprof-hip-multi-gpu.hip | 57 ++ .../GPU/instrprof-hip-multi-process-merge.hip | 63 +++ .../GPU/instrprof-hip-multiple-kernels.hip | 58 ++ .../GPU/instrprof-hip-nondefault-device.hip | 60 ++ .../profile/GPU/instrprof-hip-pgo-use.hip | 63 +++ compiler-rt/test/profile/GPU/lit.local.cfg.py | 7 + .../profile/instrprof-rocm-bounds-dedup.cpp | 108 ++++ .../profile/instrprof-rocm-grow-array.cpp | 115 ++++ compiler-rt/test/profile/lit.cfg.py | 67 +++ .../compiler-rt/BUILD.bazel | 1 + 26 files changed, 2029 insertions(+), 105 deletions(-) create mode 100644 compiler-rt/lib/profile/InstrProfilingPlatformROCmHSA.cpp create mode 100644 compiler-rt/lib/profile/InstrProfilingPlatformROCmHSADefs.h create mode 100644 compiler-rt/lib/profile/InstrProfilingPlatformROCmInternal.h create mode 100644 compiler-rt/test/profile/AMDGPU/device-basic.hip create mode 100644 compiler-rt/test/profile/AMDGPU/device-early-collect.hip create mode 100644 compiler-rt/test/profile/AMDGPU/device-no-kernel.hip create mode 100644 compiler-rt/test/profile/AMDGPU/device-symbols.hip create mode 100644 compiler-rt/test/profile/AMDGPU/lit.local.cfg.py create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-basic.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-collect-after.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-counter-correctness.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-coverage.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-device-branching.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-fork-safety.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-multi-gpu.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-multi-process-merge.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-multiple-kernels.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-nondefault-device.hip create mode 100644 compiler-rt/test/profile/GPU/instrprof-hip-pgo-use.hip create mode 100644 compiler-rt/test/profile/GPU/lit.local.cfg.py create mode 100644 compiler-rt/test/profile/instrprof-rocm-bounds-dedup.cpp create mode 100644 compiler-rt/test/profile/instrprof-rocm-grow-array.cpp diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt index b41843ad555b2..59cf523eef63a 100644 --- a/compiler-rt/lib/profile/CMakeLists.txt +++ b/compiler-rt/lib/profile/CMakeLists.txt @@ -233,7 +233,9 @@ if(COMPILER_RT_BUILD_PROFILE_ROCM AND NOT COMPILER_RT_PROFILE_BAREMETAL AND TARGET RTSanitizerCommon.${COMPILER_RT_DEFAULT_TARGET_ARCH} AND TARGET RTSanitizerCommonLibc.${COMPILER_RT_DEFAULT_TARGET_ARCH}) - set(PROFILE_ROCM_SOURCES ${PROFILE_SOURCES} InstrProfilingPlatformROCm.cpp) + set(PROFILE_ROCM_SOURCES ${PROFILE_SOURCES} + InstrProfilingPlatformROCm.cpp + InstrProfilingPlatformROCmHSA.cpp) # Enables the device-collection call in InstrProfilingFile.c. set(PROFILE_ROCM_FLAGS ${EXTRA_FLAGS} -DCOMPILER_RT_BUILD_PROFILE_ROCM=1) @@ -243,6 +245,28 @@ if(COMPILER_RT_BUILD_PROFILE_ROCM AND NOT COMPILER_RT_PROFILE_BAREMETAL append_list_if(COMPILER_RT_HAS_FNO_EXCEPTIONS_FLAG -fno-exceptions PROFILE_ROCM_FLAGS) + # Optional build-time verification of the mirrored HSA ABI in + # InstrProfilingPlatformROCmHSA.cpp. HSA is dlopened (never linked), so the + # declarations are hand-mirrored; when the real ROCm headers happen to be + # available, compile the static_assert cross-checks against them. This is + # never a build requirement -- if the package is absent, the checks are simply + # skipped. Linux only, matching the supplemental HSA drain. + if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux") + find_package(hsa-runtime64 QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm) + if(hsa-runtime64_FOUND) + get_target_property(_profile_hsa_inc hsa-runtime64::hsa-runtime64 + INTERFACE_INCLUDE_DIRECTORIES) + if(_profile_hsa_inc) + message(STATUS "clang_rt.profile_rocm: verifying HSA ABI against " + "${_profile_hsa_inc}") + list(APPEND PROFILE_ROCM_FLAGS -DPROFILE_VERIFY_HSA_ABI=1) + foreach(_inc ${_profile_hsa_inc}) + list(APPEND PROFILE_ROCM_FLAGS "-isystem${_inc}") + endforeach() + endif() + endif() + endif() + # The interceptor path needs sanitizer_common symbols; merge the same object # libs as clang_rt.cfi so the archive stays self-contained. set(PROFILE_ROCM_OBJECT_LIBS diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp b/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp index d0d9b1ea8f61d..5b3db7defa420 100644 --- a/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp +++ b/compiler-rt/lib/profile/InstrProfilingPlatformROCm.cpp @@ -8,7 +8,6 @@ extern "C" { #include "InstrProfiling.h" -#include "InstrProfilingInternal.h" #include "InstrProfilingPort.h" } @@ -33,6 +32,11 @@ extern "C" { #include #endif +#include "InstrProfilingPlatformROCmInternal.h" + +// shortcut to shared helper names +using namespace __prof_rocm; + /* Serialize one-time HIP loader resolution and DynamicModules mutations. * Inline to avoid a sanitizer_common dependency. */ #ifdef _WIN32 @@ -62,11 +66,7 @@ static void unlockDynamicModules(void) { } #endif -struct OffloadSectionShadowGroup; -static int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target, - const OffloadSectionShadowGroup *Sections); - -static int isVerboseMode() { +int __prof_rocm::isVerboseMode() { static int IsVerbose = -1; if (IsVerbose == -1) IsVerbose = getenv("LLVM_PROFILE_VERBOSE") != nullptr; @@ -265,7 +265,7 @@ static BOOL CALLBACK ensureHipLoadedCb(PINIT_ONCE, PVOID, PVOID *) { } #endif -static void ensureHipLoaded(void) { +void __prof_rocm::ensureHipLoaded(void) { #ifdef _WIN32 InitOnceExecuteOnce(&HipLoadedOnce, ensureHipLoadedCb, NULL, NULL); #else @@ -273,6 +273,10 @@ static void ensureHipLoaded(void) { #endif } +// Accessor for the HSA drain: true once the loaded HIP runtime exposes +// hipMemcpy. Kept here so pHipMemcpy stays file-private to this TU. +int __prof_rocm::hipMemcpyAvailable() { return pHipMemcpy != nullptr; } + /* -------------------------------------------------------------------------- */ /* Public wrappers that forward to the loaded HIP symbols */ /* -------------------------------------------------------------------------- */ @@ -295,7 +299,7 @@ static int hipMemcpy(void *dest, const void *src, size_t len, /* Device section symbols must be registered with CLR first; otherwise * hipMemcpy may take a CPU path and crash. */ -static int memcpyDeviceToHost(void *Dst, const void *Src, size_t Size) { +int __prof_rocm::memcpyDeviceToHost(void *Dst, const void *Src, size_t Size) { return hipMemcpy(Dst, Src, Size, 2 /* DToH */); } @@ -498,16 +502,10 @@ static int registerPrfSymbol(const char *Name, void *UserData) { return 0; /* continue */ } - if (MI->NumTUs >= MI->CapTUs) { - int NewCap = MI->CapTUs ? MI->CapTUs * 2 : 4; - OffloadDynamicTUInfo *New = (OffloadDynamicTUInfo *)realloc( - MI->TUs, NewCap * sizeof(OffloadDynamicTUInfo)); - if (!New) { - PROF_ERR("%s\n", "failed to grow TU array"); - return 0; - } - MI->TUs = New; - MI->CapTUs = NewCap; + if (growArray((void **)&MI->TUs, &MI->CapTUs, MI->NumTUs + 1, 4, + sizeof(*MI->TUs))) { + PROF_ERR("%s\n", "failed to grow TU array"); + return 0; } OffloadDynamicTUInfo *TU = &MI->TUs[MI->NumTUs++]; TU->DeviceVar = DeviceVar; @@ -535,16 +533,10 @@ __llvm_profile_offload_register_dynamic_module(int ModuleLoadRc, void **Ptr, PROF_NOTE("Registering loaded module %d: rc=%d, module=%p, image=%p\n", NumDynamicModules, ModuleLoadRc, *Ptr, Image); - if (NumDynamicModules >= CapDynamicModules) { - int NewCap = CapDynamicModules ? CapDynamicModules * 2 : 64; - OffloadDynamicModuleInfo *New = (OffloadDynamicModuleInfo *)realloc( - DynamicModules, NewCap * sizeof(OffloadDynamicModuleInfo)); - if (!New) { - unlockDynamicModules(); - return; - } - DynamicModules = New; - CapDynamicModules = NewCap; + if (growArray((void **)&DynamicModules, &CapDynamicModules, + NumDynamicModules + 1, 64, sizeof(*DynamicModules))) { + unlockDynamicModules(); + return; } OffloadDynamicModuleInfo *MI = &DynamicModules[NumDynamicModules++]; @@ -624,19 +616,6 @@ extern "C" void __llvm_profile_offload_unregister_dynamic_module(void *Ptr) { unlockDynamicModules(); } -/* Grow a void* array, doubling capacity (or starting at InitCap). */ -static int growPtrArray(void ***Arr, int *Num, int *Cap, int InitCap) { - if (*Num < *Cap) - return 0; - int NewCap = *Cap ? *Cap * 2 : InitCap; - void **New = (void **)realloc(*Arr, NewCap * sizeof(void *)); - if (!New) - return -1; - *Arr = New; - *Cap = NewCap; - return 0; -} - static void **OffloadShadowVariables = nullptr; static int NumShadowVariables = 0; static int CapShadowVariables = 0; @@ -658,41 +637,20 @@ static OffloadSectionShadowGroup *OffloadSectionShadowGroups = nullptr; static int CapSectionShadowGroups = 0; static int ensureSectionShadowGroupCapacity(void) { - if (CapSectionShadowGroups >= CapShadowVariables) - return 0; - OffloadSectionShadowGroup *New = (OffloadSectionShadowGroup *)realloc( - OffloadSectionShadowGroups, CapShadowVariables * sizeof(*New)); - if (!New) - return -1; - __builtin_memset(New + CapSectionShadowGroups, 0, - (CapShadowVariables - CapSectionShadowGroups) * - sizeof(*New)); - OffloadSectionShadowGroups = New; - CapSectionShadowGroups = CapShadowVariables; - return 0; + return growArray((void **)&OffloadSectionShadowGroups, + &CapSectionShadowGroups, CapShadowVariables, + CapShadowVariables, sizeof(*OffloadSectionShadowGroups)); } static int ensureSectionShadowCapacity(OffloadSectionShadowGroup *Group, int MinCapacity) { - if (Group->CapShadows >= MinCapacity) - return 0; - int NewCap = Group->CapShadows ? Group->CapShadows * 2 : 4; - while (NewCap < MinCapacity) - NewCap *= 2; - OffloadSectionShadow *New = - (OffloadSectionShadow *)realloc(Group->Shadows, NewCap * sizeof(*New)); - if (!New) - return -1; - __builtin_memset(New + Group->CapShadows, 0, - (NewCap - Group->CapShadows) * sizeof(*New)); - Group->Shadows = New; - Group->CapShadows = NewCap; - return 0; + return growArray((void **)&Group->Shadows, &Group->CapShadows, MinCapacity, 4, + sizeof(*Group->Shadows)); } extern "C" void __llvm_profile_offload_register_shadow_variable(void *ptr) { - if (growPtrArray(&OffloadShadowVariables, &NumShadowVariables, - &CapShadowVariables, 64)) + if (growArray((void **)&OffloadShadowVariables, &CapShadowVariables, + NumShadowVariables + 1, 64, sizeof(*OffloadShadowVariables))) return; if (ensureSectionShadowGroupCapacity()) return; @@ -731,28 +689,8 @@ __llvm_profile_offload_register_section_shadow_variable(void *ptr) { ++Group->NumSections; } -namespace { - -// free()-based scope guard. Use .release() to transfer ownership. -struct UniqueFree { - void *Ptr; - explicit UniqueFree(void *P = nullptr) : Ptr(P) {} - ~UniqueFree() { free(Ptr); } - UniqueFree(const UniqueFree &) = delete; - UniqueFree &operator=(const UniqueFree &) = delete; - char *get() const { return static_cast(Ptr); } - void reset(void *P) { - free(Ptr); - Ptr = P; - } - void *release() { - void *P = Ptr; - Ptr = nullptr; - return P; - } -}; - -} // namespace +// UniqueFree (free()-based scope guard) lives in +// InstrProfilingPlatformROCmInternal.h so the HSA drain can share it. static int getRegisteredSectionBounds(void *Shadow, void **DevicePtr, size_t *Size) { @@ -787,8 +725,9 @@ hasCompleteSectionShadows(const OffloadSectionShadowGroup *Sections) { return 1; } -static int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target, - const OffloadSectionShadowGroup *Sections) { +int __prof_rocm::processDeviceOffloadPrf( + void *DeviceOffloadPrf, const char *Target, + const OffloadSectionShadowGroup *Sections) { __llvm_profile_gpu_sections HostSections; if (hipMemcpy(&HostSections, DeviceOffloadPrf, sizeof(HostSections), @@ -1119,8 +1058,14 @@ static int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target, if (ret != 0) { PROF_ERR("%s\n", "failed to write device profile using shared API"); - } else if (isVerboseMode()) { - PROF_NOTE("%s\n", "Successfully wrote device profile using shared API"); + } else { +#if defined(__linux__) && !defined(_WIN32) + // Dedup against the supplemental HSA pass: this section is now drained, so + // the HSA walk must not drain the same device code object again. + profRecordDrainedBounds(DevDataBegin, DevCntsBegin, DevNamesBegin); +#endif + if (isVerboseMode()) + PROF_NOTE("%s\n", "Successfully wrote device profile using shared API"); } return ret; @@ -1152,13 +1097,11 @@ static int isHipAvailable(void) { /* Collect device-side profile data */ /* -------------------------------------------------------------------------- */ -extern "C" int __llvm_profile_hip_collect_device_data(void) { - if (NumShadowVariables == 0 && NumDynamicModules == 0) - return 0; - - if (!isHipAvailable()) - return 0; - +/* Host-shadow drain: static-linked kernels (host __hipRegisterVar shadows) and + * intercepted dynamic modules. The caller gates this on + * (NumShadowVariables || NumDynamicModules) && isHipAvailable(); pure + * device-linked programs (RCCL) are handled by the supplemental HSA pass. */ +static int collectHostShadowData(void) { int Ret = 0; /* Shadow variables (static-linked kernels): drain from every device. */ @@ -1172,6 +1115,18 @@ extern "C" int __llvm_profile_hip_collect_device_data(void) { PROF_NOTE("Skipping unused device %d\n", Dev); continue; } +#if defined(__linux__) && !defined(_WIN32) + /* When no kernel launch was tracked at all, shouldCollectDevice() falls + * back to collect-all, which can fault/hang reading a non-resident + * device's sections on a multi-GPU host. On Linux the supplemental HSA + * drain covers those cases safely. */ + if (!__atomic_load_n(&AnyDeviceUsed, __ATOMIC_ACQUIRE)) { + if (isVerboseMode()) + PROF_NOTE("No tracked launch; deferring device %d to HSA drain\n", + Dev); + continue; + } +#endif if (hipSetDevice(Dev) != 0) { if (isVerboseMode()) PROF_NOTE("Failed to set device %d, skipping\n", Dev); @@ -1182,8 +1137,9 @@ extern "C" int __llvm_profile_hip_collect_device_data(void) { PROF_NOTE("Collecting static profile data from device %d (%s)\n", Dev, ArchName); for (int i = 0; i < NumShadowVariables; ++i) { - /* RDC-mode multi-shadow drains need a distinct profraw per TU; - * single-TU programs keep the bare arch target. */ + /* Stable name per shadow so a repeated drain (explicit collect plus the + * atexit drain) overwrites its own profraw rather than emitting a + * second one: bare arch for a single TU, arch. for RDC multi-TU. */ const char *Target = ArchName; char TargetWithIdx[64]; if (NumShadowVariables > 1) { @@ -1215,6 +1171,22 @@ extern "C" int __llvm_profile_hip_collect_device_data(void) { } unlockDynamicModules(); + return Ret; +} + +extern "C" int __llvm_profile_hip_collect_device_data(void) { + int Ret = 0; + + if ((NumShadowVariables != 0 || NumDynamicModules != 0) && isHipAvailable() && + collectHostShadowData() != 0) + Ret = -1; + +#if defined(__linux__) && !defined(_WIN32) + /* Supplemental HSA-introspection drain */ + if (drainDevicesViaHsa() != 0) + Ret = -1; +#endif + if (Ret != 0) PROF_WARN("%s\n", "failed to collect device profile data"); return Ret; @@ -1268,6 +1240,8 @@ static int recordHipMultiDeviceLaunchResult(int Rc, return Rc; } +// interceptors must have external linkage +// NOLINTBEGIN(misc-use-internal-linkage) INTERCEPTOR(int, hipLaunchKernel, const void *Function, HipDim3 GridDim, HipDim3 BlockDim, void **Args, size_t SharedMemBytes, HipStream Stream) { @@ -1398,6 +1372,7 @@ INTERCEPTOR(int, hipModuleUnload, void *module) { __llvm_profile_offload_unregister_dynamic_module(module); return REAL(hipModuleUnload)(module); } +// NOLINTEND(misc-use-internal-linkage) __attribute__((constructor)) static void installHipInterceptors() { /* Avoid interception unless the HIP runtime is already loaded. */ diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSA.cpp b/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSA.cpp new file mode 100644 index 0000000000000..5e365ff24bb14 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSA.cpp @@ -0,0 +1,516 @@ +//===- InstrProfilingPlatformROCmHSA.cpp - ROCm HSA device drain ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Supplemental HSA-introspection drain (Linux only). +// +// The host-shadow drain in InstrProfilingPlatformROCm.cpp only sees device code +// objects with a host-side shadow (__hipRegisterVar) or an intercepted +// hipModuleLoad*. Device-linked code with no host shadow (e.g. RCCL) is +// invisible to it. This pass walks every GPU agent's loaded executables via +// HSA, finds each __llvm_profile_sections table on the device, and drains the +// ones the host-shadow pass missed (deduped by the section-bounds tuple). It +// reuses processDeviceOffloadPrf() so the profraw layout is identical. +// +//===----------------------------------------------------------------------===// + +#if defined(__linux__) + +extern "C" { +#include "InstrProfiling.h" +#include "InstrProfilingPort.h" +} + +#include "InstrProfilingPlatformROCmInternal.h" +#include "interception/interception.h" +// C (not C++) headers: clang_rt.profile is built -nostdinc++. +#include +#include +#include +#include +#include + +using namespace __prof_rocm; + +// Mirrored HSA declarations the drain needs (dlopen'd, not linked). See the +// header for the rationale; the values are HSA's stable C ABI. +#include "InstrProfilingPlatformROCmHSADefs.h" + +#ifdef PROFILE_VERIFY_HSA_ABI +// When the real ROCm headers are available at build time (developer installs +// and the downstream GPU CI), check that the mirror above still matches them. +#include +#include + +static_assert(PROF_HSA_STATUS_SUCCESS == HSA_STATUS_SUCCESS, "HSA ABI drift"); +static_assert(PROF_HSA_STATUS_INFO_BREAK == HSA_STATUS_INFO_BREAK, + "HSA ABI drift"); +static_assert(PROF_HSA_AGENT_INFO_NAME == HSA_AGENT_INFO_NAME, "HSA ABI drift"); +static_assert(PROF_HSA_AGENT_INFO_DEVICE == HSA_AGENT_INFO_DEVICE, + "HSA ABI drift"); +static_assert(PROF_HSA_DEVICE_TYPE_GPU == HSA_DEVICE_TYPE_GPU, "HSA ABI drift"); +static_assert(PROF_HSA_SYMBOL_KIND_VARIABLE == HSA_SYMBOL_KIND_VARIABLE, + "HSA ABI drift"); +static_assert(PROF_HSA_EXECUTABLE_SYMBOL_INFO_TYPE == + HSA_EXECUTABLE_SYMBOL_INFO_TYPE, + "HSA ABI drift"); +static_assert(PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH == + HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, + "HSA ABI drift"); +static_assert(PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME == + HSA_EXECUTABLE_SYMBOL_INFO_NAME, + "HSA ABI drift"); +static_assert(PROF_HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS == + HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, + "HSA ABI drift"); +static_assert(PROF_HSA_EXTENSION_AMD_LOADER == HSA_EXTENSION_AMD_LOADER, + "HSA ABI drift"); + +static_assert(sizeof(prof_hsa_agent_t) == sizeof(hsa_agent_t), "HSA ABI drift"); +static_assert(sizeof(prof_hsa_executable_t) == sizeof(hsa_executable_t), + "HSA ABI drift"); +static_assert(sizeof(prof_hsa_executable_symbol_t) == + sizeof(hsa_executable_symbol_t), + "HSA ABI drift"); + +static_assert(sizeof(prof_hsa_loader_segment_descriptor_t) == + sizeof(hsa_ven_amd_loader_segment_descriptor_t), + "HSA ABI drift"); +static_assert(offsetof(prof_hsa_loader_segment_descriptor_t, agent) == + offsetof(hsa_ven_amd_loader_segment_descriptor_t, agent), + "HSA ABI drift"); +static_assert(offsetof(prof_hsa_loader_segment_descriptor_t, executable) == + offsetof(hsa_ven_amd_loader_segment_descriptor_t, executable), + "HSA ABI drift"); +static_assert(offsetof(prof_hsa_loader_segment_descriptor_t, segment_base) == + offsetof(hsa_ven_amd_loader_segment_descriptor_t, + segment_base), + "HSA ABI drift"); +static_assert(offsetof(prof_hsa_loader_segment_descriptor_t, segment_size) == + offsetof(hsa_ven_amd_loader_segment_descriptor_t, + segment_size), + "HSA ABI drift"); + +// We fetch the loader pfn table by raw layout, so query_segment_descriptors +// must sit at the same offset as in the real table. +static_assert(offsetof(prof_hsa_loader_pfn_t, query_segment_descriptors) == + offsetof(hsa_ven_amd_loader_1_00_pfn_t, + hsa_ven_amd_loader_query_segment_descriptors), + "HSA ABI drift"); +#endif // PROFILE_VERIFY_HSA_ABI + +static hsa_iterate_agents_ty pHsaIterateAgents = nullptr; +static hsa_agent_get_info_ty pHsaAgentGetInfo = nullptr; +static hsa_executable_iterate_agent_symbols_ty pHsaExecIterAgentSyms = nullptr; +static hsa_executable_symbol_get_info_ty pHsaSymGetInfo = nullptr; +static hsa_loader_query_segment_descriptors_ty pQuerySegDescs = nullptr; + +/* Status-check shorthands, in the spirit of the thin HIP wrappers in + * InstrProfilingPlatformROCm.cpp: every HSA entry point returns + * prof_hsa_status_t. hsaOkOrBreak() also accepts INFO_BREAK, which the + * iterate_* callbacks use to stop early and is not an error. */ +static inline bool hsaOk(prof_hsa_status_t St) { + return St == PROF_HSA_STATUS_SUCCESS; +} +static inline bool hsaOkOrBreak(prof_hsa_status_t St) { + return St == PROF_HSA_STATUS_SUCCESS || St == PROF_HSA_STATUS_INFO_BREAK; +} + +/* 0 = not attempted, 1 = ready, -1 = unavailable. Acquire/release atomics: a + * thread observing HsaRuntimeState==1 also sees the published p* pointers. */ +static int HsaRuntimeState = 0; + +static int setHsaRuntimeState(int S) { + __atomic_store_n(&HsaRuntimeState, S, __ATOMIC_RELEASE); + return S > 0 ? 0 : -1; +} + +/* Resolve HSA entry points and the AMD loader extension once, and confirm HIP's + * hipMemcpy is reachable for the device-to-host copies. */ +static int loadHsaRuntimePointers(void) { + int State = __atomic_load_n(&HsaRuntimeState, __ATOMIC_ACQUIRE); + if (State) + return State > 0 ? 0 : -1; + + if (!__interception::DynamicLoaderAvailable()) { + if (isVerboseMode()) + PROF_NOTE("%s", "Dynamic library loading not available - " + "HSA device profiling disabled\n"); + return setHsaRuntimeState(-1); + } + + void *Hsa = __interception::OpenLibrary("libhsa-runtime64.so"); + if (!Hsa) + Hsa = __interception::OpenLibrary("libhsa-runtime64.so.1"); + if (!Hsa) { + if (isVerboseMode()) + PROF_NOTE("%s", "libhsa-runtime64.so not loadable - " + "HSA device profiling disabled\n"); + return setHsaRuntimeState(-1); + } + + hsa_init_ty pHsaInit = + (hsa_init_ty)__interception::LookupSymbol(Hsa, "hsa_init"); + hsa_system_get_major_extension_table_ty pGetExtTable = + (hsa_system_get_major_extension_table_ty)__interception::LookupSymbol( + Hsa, "hsa_system_get_major_extension_table"); + pHsaIterateAgents = (hsa_iterate_agents_ty)__interception::LookupSymbol( + Hsa, "hsa_iterate_agents"); + pHsaAgentGetInfo = (hsa_agent_get_info_ty)__interception::LookupSymbol( + Hsa, "hsa_agent_get_info"); + pHsaExecIterAgentSyms = + (hsa_executable_iterate_agent_symbols_ty)__interception::LookupSymbol( + Hsa, "hsa_executable_iterate_agent_symbols"); + pHsaSymGetInfo = + (hsa_executable_symbol_get_info_ty)__interception::LookupSymbol( + Hsa, "hsa_executable_symbol_get_info"); + + if (!pHsaInit || !pGetExtTable || !pHsaIterateAgents || !pHsaAgentGetInfo || + !pHsaExecIterAgentSyms || !pHsaSymGetInfo) { + PROF_WARN("%s", + "required HSA symbols missing - HSA device profiling disabled\n"); + return setHsaRuntimeState(-1); + } + + /* Bring HSA up lazily on the first drain (idempotent, refcounted), never from + * a library constructor -- see the fork-safety note at end of file. */ + prof_hsa_status_t St = pHsaInit(); + if (!hsaOkOrBreak(St)) { + if (isVerboseMode()) + PROF_NOTE("hsa_init failed (0x%x) - HSA device profiling disabled\n", St); + return setHsaRuntimeState(-1); + } + + prof_hsa_loader_pfn_t LoaderApi; + __builtin_memset(&LoaderApi, 0, sizeof(LoaderApi)); + St = pGetExtTable(PROF_HSA_EXTENSION_AMD_LOADER, 1, sizeof(LoaderApi), + &LoaderApi); + if (!hsaOk(St) || !LoaderApi.query_segment_descriptors) { + PROF_WARN("AMD loader extension unavailable (0x%x) - " + "HSA device profiling disabled\n", + St); + return setHsaRuntimeState(-1); + } + pQuerySegDescs = LoaderApi.query_segment_descriptors; + + /* The device-to-host copies go through the shared HIP loader. */ + ensureHipLoaded(); + if (!hipMemcpyAvailable()) { + PROF_WARN("%s", "hipMemcpy unavailable - HSA device profiling disabled\n"); + return setHsaRuntimeState(-1); + } + + if (isVerboseMode()) + PROF_NOTE("%s", "HSA + HIP runtime resolved for device profiling\n"); + return setHsaRuntimeState(1); +} + +/* The canonical device bounds-table symbol from InstrProfilingPlatformGPU.c. */ +static const char ProfileSectionsSymbol[] = "__llvm_profile_sections"; + +/* Dedup of drained section-bounds tuples, shared with the host-shadow path + * (processDeviceOffloadPrf records here on every successful drain) so each + * unique counter set is drained exactly once across both paths. + */ +static ProfBoundsSet SeenBounds; + +/* Has this bounds tuple already been drained? Pure check, no state mutation. */ +static int profBoundsAlreadyDrained(const void *D, const void *C, + const void *N) { + return SeenBounds.contains(D, C, N); +} + +/* Record a drained bounds tuple. Idempotent; call only after a successful drain + * so a failed attempt stays retryable. */ +void __prof_rocm::profRecordDrainedBounds(const void *D, const void *C, + const void *N) { + SeenBounds.record(D, C, N); +} + +#define PROF_MAX_GPU_AGENTS 64 + +/* Buffer size for HSA agent names and symbol names we read back; both the + * device arch string and the __llvm_profile_sections symbol are far shorter. */ +#define PROF_HSA_NAME_MAX 64 + +namespace { +struct GpuAgent { + prof_hsa_agent_t agent; + char arch[PROF_HSA_NAME_MAX]; +}; + +struct WalkState { + GpuAgent agents[PROF_MAX_GPU_AGENTS]; + int num_agents; + int total_found; + int total_drained; +}; + +/* Per (agent, executable) symbol-iteration state. */ +struct SymbolState { + const char *arch; + int found; + int drained; +}; +} // namespace + +/* HSA per-symbol callback: when it finds a __llvm_profile_sections variable, + * drain it via processDeviceOffloadPrf() unless the host-shadow path (or an + * earlier agent) already handled the same bounds. */ +static prof_hsa_status_t onSymbol(prof_hsa_executable_t, prof_hsa_agent_t, + prof_hsa_executable_symbol_t Sym, + void *Data) { + SymbolState *S = (SymbolState *)Data; + + prof_hsa_symbol_kind_t Kind; + if (!hsaOk( + pHsaSymGetInfo(Sym, PROF_HSA_EXECUTABLE_SYMBOL_INFO_TYPE, &Kind)) || + Kind != PROF_HSA_SYMBOL_KIND_VARIABLE) + return PROF_HSA_STATUS_SUCCESS; + + uint32_t NameLen = 0; + if (!hsaOk(pHsaSymGetInfo(Sym, PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH, + &NameLen)) || + NameLen != sizeof(ProfileSectionsSymbol) - 1) + return PROF_HSA_STATUS_SUCCESS; + + char NameBuf[PROF_HSA_NAME_MAX]; + if (NameLen + 1 > sizeof(NameBuf)) + return PROF_HSA_STATUS_SUCCESS; + if (!hsaOk( + pHsaSymGetInfo(Sym, PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME, NameBuf))) + return PROF_HSA_STATUS_SUCCESS; + NameBuf[NameLen] = '\0'; + + if (strcmp(NameBuf, ProfileSectionsSymbol) != 0) + return PROF_HSA_STATUS_SUCCESS; + + uint64_t Addr = 0; + if (!hsaOk(pHsaSymGetInfo( + Sym, PROF_HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS, &Addr)) || + Addr == 0) { + if (isVerboseMode()) + PROF_NOTE("%s", "failed to read __llvm_profile_sections address\n"); + return PROF_HSA_STATUS_SUCCESS; + } + + S->found++; + + // Read the bounds table first to dedup (and detect empty sections) before + // the full copy/relocate done by processDeviceOffloadPrf. + __llvm_profile_gpu_sections Sec; + if (memcpyDeviceToHost(&Sec, (void *)(uintptr_t)Addr, sizeof(Sec)) != 0) { + PROF_WARN("%s", "failed to copy device bounds table\n"); + return PROF_HSA_STATUS_SUCCESS; + } + if (profBoundsAlreadyDrained(Sec.DataStart, Sec.CountersStart, + Sec.NamesStart)) { + if (isVerboseMode()) + PROF_NOTE("%s", "device bounds already drained, skipping\n"); + return PROF_HSA_STATUS_SUCCESS; + } + + size_t DataBytes = (const char *)Sec.DataStop - (const char *)Sec.DataStart; + size_t CntsBytes = + (const char *)Sec.CountersStop - (const char *)Sec.CountersStart; + if (DataBytes == 0 || CntsBytes == 0) { + // Empty code object: nothing to write. Mark seen so we don't revisit it. + profRecordDrainedBounds(Sec.DataStart, Sec.CountersStart, Sec.NamesStart); + return PROF_HSA_STATUS_SUCCESS; + } + + // Name HSA-drained objects in their own ".hsaN" suffix space so they never + // collide with the host-shadow path's "arch"/"arch." filenames. The drain + // latch (HsaDrainCompleted) already prevents re-draining an object, so a + // plain per-drain counter is enough for uniqueness. + static int DrainIndex = 0; + char Target[96]; + snprintf(Target, sizeof(Target), "%s.hsa%d", S->arch, DrainIndex); + + // Record the bounds (and advance the index) only on a successful write so a + // transient error stays retryable on a later agent or collect call. + if (processDeviceOffloadPrf((void *)(uintptr_t)Addr, Target, nullptr) == 0) { + S->drained++; + DrainIndex++; + profRecordDrainedBounds(Sec.DataStart, Sec.CountersStart, Sec.NamesStart); + } + + return PROF_HSA_STATUS_SUCCESS; +} + +static prof_hsa_status_t collectAgent(prof_hsa_agent_t Agent, void *Data) { + prof_hsa_device_type_t DevType; + if (!hsaOk(pHsaAgentGetInfo(Agent, PROF_HSA_AGENT_INFO_DEVICE, &DevType)) || + DevType != PROF_HSA_DEVICE_TYPE_GPU) + return PROF_HSA_STATUS_SUCCESS; + + WalkState *W = (WalkState *)Data; + if (W->num_agents >= PROF_MAX_GPU_AGENTS) + return PROF_HSA_STATUS_SUCCESS; + + GpuAgent &GA = W->agents[W->num_agents++]; + GA.agent = Agent; + char Name[PROF_HSA_NAME_MAX]; + __builtin_memset(Name, 0, sizeof(Name)); + pHsaAgentGetInfo(Agent, PROF_HSA_AGENT_INFO_NAME, Name); + size_t N = strnlen(Name, sizeof(GA.arch) - 1); + __builtin_memcpy(GA.arch, Name, N); + GA.arch[N] = '\0'; + if (!GA.arch[0]) + strncpy(GA.arch, "amdgpu", sizeof(GA.arch) - 1); + + if (isVerboseMode()) + PROF_NOTE("GPU agent %d: %s\n", W->num_agents - 1, GA.arch); + return PROF_HSA_STATUS_SUCCESS; +} + +/* Reentrancy guard and "drained at least once" latch (both acquire/release). */ +static int HsaDrainInProgress = 0; +static int HsaDrainCompleted = 0; + +int __prof_rocm::drainDevicesViaHsa(void) { + if (__atomic_load_n(&HsaDrainCompleted, __ATOMIC_ACQUIRE)) + return 0; + + int Expected = 0; + if (!__atomic_compare_exchange_n(&HsaDrainInProgress, &Expected, 1, + /*weak=*/0, __ATOMIC_ACQ_REL, + __ATOMIC_ACQUIRE)) + return 0; + + struct InProgressGuard { + ~InProgressGuard() { + __atomic_store_n(&HsaDrainInProgress, 0, __ATOMIC_RELEASE); + } + } _Guard; + + if (loadHsaRuntimePointers() != 0) + return 0; /* Runtime unavailable: stay retryable. */ + + WalkState W; + __builtin_memset(&W, 0, sizeof(W)); + prof_hsa_status_t St = pHsaIterateAgents(collectAgent, &W); + if (!hsaOkOrBreak(St)) { + PROF_WARN("hsa_iterate_agents failed (0x%x)\n", St); + return -1; + } + if (W.num_agents == 0) { + if (isVerboseMode()) + PROF_NOTE("%s", "no GPU agents present; nothing to drain (will retry)\n"); + return 0; + } + + /* query_segment_descriptors ships in every loader-extension version, is more + * permissive than iterate_executables on ROCm, and yields the loaded + * (agent, executable) pairs directly. */ + size_t NumSegs = 0; + St = pQuerySegDescs(nullptr, &NumSegs); + if (!hsaOk(St)) { + PROF_WARN("query_segment_descriptors(count) failed (0x%x)\n", St); + return -1; + } + if (NumSegs == 0) { + if (isVerboseMode()) + PROF_NOTE("%s", "no loaded segments; nothing to drain (will retry)\n"); + return 0; + } + + prof_hsa_loader_segment_descriptor_t *Segs = + (prof_hsa_loader_segment_descriptor_t *)calloc(NumSegs, sizeof(*Segs)); + if (!Segs) { + PROF_ERR("%s\n", "failed to allocate segment descriptor array"); + return -1; + } + UniqueFree SegsOwner(Segs); + + St = pQuerySegDescs(Segs, &NumSegs); + if (!hsaOk(St)) { + PROF_WARN("query_segment_descriptors(fetch) failed (0x%x)\n", St); + return -1; + } + + if (isVerboseMode()) + PROF_NOTE("query_segment_descriptors: %zu segments\n", NumSegs); + + // Walk each unique (agent, executable) pair once. + struct SeenPair { + uint64_t agent; + uint64_t exec; + }; + enum { kSeenPairsInitCap = 64 }; + SeenPair *Seen = nullptr; + int NumPairs = 0; + int CapPairs = 0; + int IterFailures = 0; + + for (size_t i = 0; i < NumSegs; ++i) { + if (Segs[i].executable.handle == 0 || Segs[i].agent.handle == 0) + continue; + + bool AlreadySeen = false; + for (int j = 0; j < NumPairs; ++j) + if (Seen[j].agent == Segs[i].agent.handle && + Seen[j].exec == Segs[i].executable.handle) { + AlreadySeen = true; + break; + } + if (AlreadySeen) + continue; + if (growArray((void **)&Seen, &CapPairs, NumPairs + 1, kSeenPairsInitCap, + sizeof(*Seen)) == 0) { + Seen[NumPairs].agent = Segs[i].agent.handle; + Seen[NumPairs].exec = Segs[i].executable.handle; + NumPairs++; + } + + const char *Arch = nullptr; + for (int k = 0; k < W.num_agents; ++k) + if (W.agents[k].agent.handle == Segs[i].agent.handle) { + Arch = W.agents[k].arch; + break; + } + if (!Arch) + continue; /* not a GPU agent we collected */ + + SymbolState S; + __builtin_memset(&S, 0, sizeof(S)); + S.arch = Arch; + if (isVerboseMode()) + PROF_NOTE("walking executable 0x%llx on %s\n", + (unsigned long long)Segs[i].executable.handle, Arch); + prof_hsa_status_t IterSt = + pHsaExecIterAgentSyms(Segs[i].executable, Segs[i].agent, onSymbol, &S); + if (!hsaOkOrBreak(IterSt)) { + PROF_WARN("hsa_executable_iterate_agent_symbols on executable 0x%llx " + "failed (0x%x)\n", + (unsigned long long)Segs[i].executable.handle, IterSt); + IterFailures++; + } + W.total_found += S.found; + W.total_drained += S.drained; + } + + if (isVerboseMode()) + PROF_NOTE("HSA walk complete: agents=%d pairs=%d found=%d drained=%d " + "iter-failures=%d\n", + W.num_agents, NumPairs, W.total_found, W.total_drained, + IterFailures); + + free(Seen); + + /* Latch only when we actually drained data. A "found nothing new" walk is + * deliberately not latched: an early collect can precede any kernel launch, + * and latching it would suppress the real exit-time drain. No-op walks are + * cheap to repeat. */ + if (W.total_drained > 0) + __atomic_store_n(&HsaDrainCompleted, 1, __ATOMIC_RELEASE); + return (IterFailures > 0) ? -1 : 0; +} + +/* Fork-safety: deliberately no library constructor calling hsa_init(). */ + +#endif /* defined(__linux__) && !defined(_WIN32) -- HSA drain */ diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSADefs.h b/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSADefs.h new file mode 100644 index 0000000000000..68ff6e18e3359 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingPlatformROCmHSADefs.h @@ -0,0 +1,102 @@ +//===- InstrProfilingPlatformROCmHSADefs.h - mirrored HSA decls ----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Minimal HSA type/enum/function-pointer declarations used by the Linux-only +// supplemental HSA drain (InstrProfilingPlatformROCmHSA.cpp). compiler-rt +// cannot depend on the ROCm headers at build time, and the runtime dlopens +// libhsa-runtime64.so rather than linking it, so the handful of declarations +// the drain needs are mirrored here under a prof_hsa_* prefix. +// +// Values mirror hsa/hsa.h and hsa/hsa_ven_amd_loader.h. These are part of HSA's +// stable, versioned C ABI (libhsa-runtime64.so.1), so they do not shift. +// +//===----------------------------------------------------------------------===// + +#ifndef PROFILE_INSTRPROFILINGPLATFORMROCMHSADEFS_H +#define PROFILE_INSTRPROFILINGPLATFORMROCMHSADEFS_H + +#include +#include + +typedef uint32_t prof_hsa_status_t; +#define PROF_HSA_STATUS_SUCCESS ((prof_hsa_status_t)0x0) +#define PROF_HSA_STATUS_INFO_BREAK ((prof_hsa_status_t)0x1) + +typedef struct { + uint64_t handle; +} prof_hsa_agent_t; +typedef struct { + uint64_t handle; +} prof_hsa_executable_t; +typedef struct { + uint64_t handle; +} prof_hsa_executable_symbol_t; + +typedef uint32_t prof_hsa_agent_info_t; +#define PROF_HSA_AGENT_INFO_NAME ((prof_hsa_agent_info_t)0) +#define PROF_HSA_AGENT_INFO_DEVICE ((prof_hsa_agent_info_t)17) + +typedef uint32_t prof_hsa_device_type_t; +#define PROF_HSA_DEVICE_TYPE_GPU ((prof_hsa_device_type_t)1) + +typedef uint32_t prof_hsa_symbol_kind_t; +#define PROF_HSA_SYMBOL_KIND_VARIABLE ((prof_hsa_symbol_kind_t)0) + +typedef uint32_t prof_hsa_executable_symbol_info_t; +#define PROF_HSA_EXECUTABLE_SYMBOL_INFO_TYPE \ + ((prof_hsa_executable_symbol_info_t)0) +#define PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME_LENGTH \ + ((prof_hsa_executable_symbol_info_t)1) +#define PROF_HSA_EXECUTABLE_SYMBOL_INFO_NAME \ + ((prof_hsa_executable_symbol_info_t)2) +#define PROF_HSA_EXECUTABLE_SYMBOL_INFO_VARIABLE_ADDRESS \ + ((prof_hsa_executable_symbol_info_t)21) + +#define PROF_HSA_EXTENSION_AMD_LOADER ((uint16_t)0x201) + +typedef uint32_t prof_hsa_loader_storage_type_t; + +typedef struct { + prof_hsa_agent_t agent; + prof_hsa_executable_t executable; + prof_hsa_loader_storage_type_t code_object_storage_type; + const void *code_object_storage_base; + size_t code_object_storage_size; + size_t code_object_storage_offset; + const void *segment_base; + size_t segment_size; +} prof_hsa_loader_segment_descriptor_t; + +typedef prof_hsa_status_t (*hsa_init_ty)(void); +typedef prof_hsa_status_t (*hsa_iterate_agents_ty)( + prof_hsa_status_t (*)(prof_hsa_agent_t, void *), void *); +typedef prof_hsa_status_t (*hsa_agent_get_info_ty)(prof_hsa_agent_t, + prof_hsa_agent_info_t, + void *); +typedef prof_hsa_status_t (*hsa_executable_iterate_agent_symbols_ty)( + prof_hsa_executable_t, prof_hsa_agent_t, + prof_hsa_status_t (*)(prof_hsa_executable_t, prof_hsa_agent_t, + prof_hsa_executable_symbol_t, void *), + void *); +typedef prof_hsa_status_t (*hsa_executable_symbol_get_info_ty)( + prof_hsa_executable_symbol_t, prof_hsa_executable_symbol_info_t, void *); +typedef prof_hsa_status_t (*hsa_system_get_major_extension_table_ty)(uint16_t, + uint16_t, + size_t, + void *); +typedef prof_hsa_status_t (*hsa_loader_query_segment_descriptors_ty)( + prof_hsa_loader_segment_descriptor_t *, size_t *); + +/* First two members of hsa_ven_amd_loader_1_00_pfn_t; query_host_address only + * pads the offset to query_segment_descriptors. */ +typedef struct { + void *query_host_address; + hsa_loader_query_segment_descriptors_ty query_segment_descriptors; +} prof_hsa_loader_pfn_t; + +#endif // PROFILE_INSTRPROFILINGPLATFORMROCMHSADEFS_H diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformROCmInternal.h b/compiler-rt/lib/profile/InstrProfilingPlatformROCmInternal.h new file mode 100644 index 0000000000000..e1531fddd5524 --- /dev/null +++ b/compiler-rt/lib/profile/InstrProfilingPlatformROCmInternal.h @@ -0,0 +1,131 @@ +//===- InstrProfilingPlatformROCmInternal.h - ROCm shared interface -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Private interface shared between the ROCm host-shadow drain +// (InstrProfilingPlatformROCm.cpp) and the Linux-only supplemental +// HSA-introspection drain (InstrProfilingPlatformROCmHSA.cpp). +// +//===----------------------------------------------------------------------===// + +#ifndef PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H +#define PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H + +#include +#include + +// For prototype declarations +struct OffloadSectionShadowGroup; + +namespace __prof_rocm { + +// free()-based scope guard. Use .release() to transfer ownership. +struct UniqueFree { + void *Ptr; + explicit UniqueFree(void *P = nullptr) : Ptr(P) {} + ~UniqueFree() { free(Ptr); } + UniqueFree(const UniqueFree &) = delete; + UniqueFree &operator=(const UniqueFree &) = delete; + char *get() const { return static_cast(Ptr); } + void reset(void *P) { + free(Ptr); + Ptr = P; + } + void *release() { + void *P = Ptr; + Ptr = nullptr; + return P; + } +}; + +// Grow a heap array (doubling from InitCap) to hold at least MinCount elements +// of ElemSize bytes each. +// Success: zero new memory, update pointer, return 0. +// Failure: return -1, data is still intact. +inline int growArray(void **Arr, int *Cap, int MinCount, int InitCap, + size_t ElemSize) { + if (*Cap >= MinCount) + return 0; + int NewCap = *Cap ? *Cap : InitCap; + while (NewCap < MinCount) + NewCap *= 2; + void *New = realloc(*Arr, (size_t)NewCap * ElemSize); + if (!New) + return -1; + __builtin_memset((char *)New + (size_t)*Cap * ElemSize, 0, + (size_t)(NewCap - *Cap) * ElemSize); + *Arr = New; + *Cap = NewCap; + return 0; +} + +// Set of (data, counters, names) device section-bounds tuples that have already +// been drained. Both ROCm drains record here so each unique device counter set +// is written exactly once. +// See test/profile/instrprof-rocm-bounds-dedup.cpp. +struct ProfBoundsSet { + struct Tuple { + const void *Data; + const void *Counters; + const void *Names; + }; + enum { kInitCap = 64 }; + + Tuple *Items = nullptr; + int Count = 0; + int Cap = 0; + + // True iff this exact (Data, Counters, Names) tuple was already recorded. All + // three fields must match: two code objects can share, e.g., a names section. + bool contains(const void *D, const void *C, const void *N) const { + for (int I = 0; I < Count; ++I) + if (Items[I].Data == D && Items[I].Counters == C && Items[I].Names == N) + return true; + return false; + } + + // Record a tuple unless already present. Returns true only when a new tuple + // was added (false for a duplicate or when the growth failed under OOM). + bool record(const void *D, const void *C, const void *N) { + if (contains(D, C, N)) + return false; + if (growArray((void **)&Items, &Cap, Count + 1, kInitCap, sizeof(*Items))) + return false; + Items[Count].Data = D; + Items[Count].Counters = C; + Items[Count].Names = N; + ++Count; + return true; + } +}; + +// HIP/host-shadow helpers defined in InstrProfilingPlatformROCm.cpp and reused +// by the HSA drain. +int isVerboseMode(); +void ensureHipLoaded(); +// True once the loaded HIP runtime exposes hipMemcpy (device-to-host copies). +int hipMemcpyAvailable(); +int memcpyDeviceToHost(void *Dst, const void *Src, size_t Size); +int processDeviceOffloadPrf(void *DeviceOffloadPrf, const char *Target, + const ::OffloadSectionShadowGroup *Sections); + +#if defined(__linux__) +// Implemented in InstrProfilingPlatformROCmHSA.cpp. + +// Record a drained section-bounds tuple so the supplemental HSA pass skips any +// code object the host-shadow path already drained. +void profRecordDrainedBounds(const void *Data, const void *Counters, + const void *Names); + +// Walk every GPU agent's loaded executables via HSA and drain each +// __llvm_profile_sections table the host-shadow pass did not already handle. +int drainDevicesViaHsa(void); +#endif + +} // namespace __prof_rocm + +#endif // PROFILE_INSTRPROFILINGPLATFORMROCMINTERNAL_H diff --git a/compiler-rt/test/profile/AMDGPU/device-basic.hip b/compiler-rt/test/profile/AMDGPU/device-basic.hip new file mode 100644 index 0000000000000..4fcf044802240 --- /dev/null +++ b/compiler-rt/test/profile/AMDGPU/device-basic.hip @@ -0,0 +1,67 @@ +// Basic HIP device PGO drain end-to-end: a host + device .profraw are written +// at exit (the device one arch-prefixed), they merge, the merged profile +// contains the device kernel's counters, and llvm-cov reports device-side +// coverage. Covers both non-RDC and RDC device compiles. +// +// REQUIRES: hip, amdgpu + +// RUN: rm -rf %t.dir && mkdir -p %t.dir + +// --- non-RDC --- +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: %t.dir/a.out +// A device profraw (arch-prefixed) must have been drained alongside the host one. +// RUN: ls %t.dir/gfx*.profraw +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/a.profdata +// RUN: llvm-profdata show --all-functions %t.dir/a.profdata \ +// RUN: | FileCheck --check-prefix=FUNCS %s +// Confirm the embedded device image is extractable (failure here is the real +// cause of any downstream llvm-cov failure, so let it propagate). +// RUN: llvm-objdump --offloading %t.dir/a.out > /dev/null +// RUN: llvm-cov report %t.dir/a.out.0.hip-amdgcn-amd-amdhsa--*gfx* \ +// RUN: -instr-profile=%t.dir/a.profdata 2>&1 | FileCheck --check-prefix=COV %s + +// --- RDC --- +// RUN: rm -f %t.dir/*.profraw +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fgpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/b.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: %t.dir/b.out +// RUN: ls %t.dir/gfx*.profraw +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/b.profdata +// RUN: llvm-profdata show --all-functions %t.dir/b.profdata \ +// RUN: | FileCheck --check-prefix=FUNCS %s + +#include + +__global__ void addk(int *p) { + if (*p > 0) + *p += 1; + else + *p -= 1; +} + +int main() { + int *d = nullptr; + if (hipMalloc(&d, sizeof(int)) != hipSuccess) + return 2; + int h = 5; + (void)hipMemcpy(d, &h, sizeof(int), hipMemcpyHostToDevice); + addk<<<1, 1>>>(d); + (void)hipMemcpy(&h, d, sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + return h > 0 ? 0 : 1; +} + +// The merged profile contains both the host main and the device kernel, +// proving the device counters were drained and merged. +// FUNCS-DAG: addk +// FUNCS-DAG: main + +// COV: TOTAL diff --git a/compiler-rt/test/profile/AMDGPU/device-early-collect.hip b/compiler-rt/test/profile/AMDGPU/device-early-collect.hip new file mode 100644 index 0000000000000..3e2c6e84e26c2 --- /dev/null +++ b/compiler-rt/test/profile/AMDGPU/device-early-collect.hip @@ -0,0 +1,68 @@ +// M1 regression: calling __llvm_profile_hip_collect_device_data() before any +// kernel has been launched must not poison the later atexit drain. The early +// call sees "no instrumented code object loaded yet" (a transient no-op) and +// must not latch the drain as completed; otherwise the post-launch atexit +// pass produces no device .profraw and we silently lose device counters. +// +// REQUIRES: hip, amdgpu +// Guards the Linux introspection drain's DrainCompleted latch; the Windows +// host-shadow drain has no such latch (it tracks per-TU Processed flags). +// UNSUPPORTED: windows + +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: %t.dir/a.out +// Both the host profraw and at least one device profraw (gfx-prefixed) must +// have been produced, despite the early collection attempt. +// RUN: ls %t.dir/host.*.profraw +// RUN: ls %t.dir/gfx*.profraw +// And the merged profile must contain the device kernel that was launched +// *after* the early collect. +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/a.profdata +// RUN: llvm-profdata show --all-functions %t.dir/a.profdata \ +// RUN: | FileCheck %s + +#include + +// Declared by libclang_rt.profile-.a; we call it directly to +// simulate any caller that drains device counters at an arbitrary point in +// the program lifetime (e.g. a per-iteration profile dump). +extern "C" int __llvm_profile_hip_collect_device_data(void); + +__global__ void post_collect_kernel(int *p) { + if (*p > 0) + *p += 1; + else + *p -= 1; +} + +int main() { + // (1) Early collection -- runs before any kernel launch. The drainer + // finds either no GPU agents, no loaded segments, or no instrumented + // bounds table, and returns 0 without latching DrainCompleted. + (void)__llvm_profile_hip_collect_device_data(); + + // (2) Now launch a kernel. HIP loads the device code object that carries + // the __llvm_profile_sections bounds table, executes our kernel, and + // populates the device-side counters. + int *d = nullptr; + if (hipMalloc(&d, sizeof(int)) != hipSuccess) + return 2; + int h = 5; + (void)hipMemcpy(d, &h, sizeof(int), hipMemcpyHostToDevice); + post_collect_kernel<<<1, 1>>>(d); + (void)hipMemcpy(&h, d, sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + // (3) Exit normally. The atexit drain runs and -- because step (1) did + // not latch DrainCompleted -- it walks the (now loaded) code object, + // finds __llvm_profile_sections, and emits the device .profraw. + return h > 0 ? 0 : 1; +} + +// CHECK-DAG: post_collect_kernel +// CHECK-DAG: main diff --git a/compiler-rt/test/profile/AMDGPU/device-no-kernel.hip b/compiler-rt/test/profile/AMDGPU/device-no-kernel.hip new file mode 100644 index 0000000000000..a154308d725d8 --- /dev/null +++ b/compiler-rt/test/profile/AMDGPU/device-no-kernel.hip @@ -0,0 +1,44 @@ +// Independence / robustness: an instrumented HIP program that never launches a +// kernel still writes its host .profraw, and the device drain is a clean no-op +// (no crash, no spurious device .profraw). We assert the no-op condition +// directly via the runtime's verbose log rather than rely on HIP lazy-loading +// to leave the device code object unloaded -- the loader may load it for +// other reasons (e.g. eager registration), and in that case the drain +// legitimately walks it and reports zero instrumented sections / zero +// drained. Either outcome is correct. +// +// REQUIRES: hip, amdgpu +// The terminal conditions checked below ("no GPU agents", "no loaded +// segments", "drained=0") are Linux HSA-drain strings with no Windows analog. +// UNSUPPORTED: windows + +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: %clang -x hip --offload-arch=%amdgpu_arch \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: LLVM_PROFILE_VERBOSE=1 \ +// RUN: %t.dir/a.out 2> %t.dir/verbose.log +// RUN: ls %t.dir/host.*.profraw +// No arch-prefixed device profraw should have been produced. +// RUN: not ls %t.dir/gfx*.profraw +// The drain must have run; one of these three terminal conditions must hold: +// - no GPU agents enumerated (test host has /dev/kfd but no usable agent) +// - no loaded code object segments at exit +// - the walk completed and drained=0 (no instrumented kernel was launched +// so the device code object either wasn't loaded or its bounds were +// empty/already drained) +// RUN: FileCheck --input-file=%t.dir/verbose.log %s +// CHECK: {{no GPU agents present|no loaded segments|drained=0}} + +#include + +// Defined but never launched. +__global__ void unused(int *p) { *p += 1; } + +int main() { + int n = 0; + (void)hipGetDeviceCount(&n); + return 0; +} diff --git a/compiler-rt/test/profile/AMDGPU/device-symbols.hip b/compiler-rt/test/profile/AMDGPU/device-symbols.hip new file mode 100644 index 0000000000000..f12283b7da636 --- /dev/null +++ b/compiler-rt/test/profile/AMDGPU/device-symbols.hip @@ -0,0 +1,42 @@ +// The decoupled drain reads only the canonical __llvm_profile_sections bounds +// table provided by the device profile runtime (InstrProfilingPlatformGPU.c), +// since clang no longer emits a per-TU struct. Assert that symbol is present +// in the device ELF's dynamic symbol table (protected visibility) for both +// non-RDC and RDC device compiles. This is the contract the drainer depends on. +// +// REQUIRES: hip, amdgpu + +// RUN: rm -rf %t.dir && mkdir -p %t.dir + +// --- non-RDC --- +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// Extraction failure here would make the readelf invocation succeed against +// an empty/missing file; surface it instead of hiding it behind `|| true`. +// RUN: llvm-objdump --offloading %t.dir/a.out > /dev/null +// RUN: llvm-readelf --dyn-syms %t.dir/a.out.0.hip-amdgcn-amd-amdhsa--*gfx* \ +// RUN: | FileCheck %s + +// --- RDC --- +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fgpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/b.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: llvm-objdump --offloading %t.dir/b.out > /dev/null +// RUN: llvm-readelf --dyn-syms %t.dir/b.out.0.hip-amdgcn-amd-amdhsa--*gfx* \ +// RUN: | FileCheck %s + +// CHECK: PROTECTED {{.*}} __llvm_profile_sections + +#include + +__global__ void k(int *p) { *p += 1; } + +int main() { + int *d = nullptr; + if (hipMalloc(&d, sizeof(int)) != hipSuccess) + return 2; + k<<<1, 1>>>(d); + (void)hipFree(d); + return 0; +} diff --git a/compiler-rt/test/profile/AMDGPU/lit.local.cfg.py b/compiler-rt/test/profile/AMDGPU/lit.local.cfg.py new file mode 100644 index 0000000000000..3ad624f258ddf --- /dev/null +++ b/compiler-rt/test/profile/AMDGPU/lit.local.cfg.py @@ -0,0 +1,7 @@ +# Device-profile drain tests: require an AMD GPU (and, implicitly, the amdgcn +# device profile runtime in the resource directory and a ROCm/HIP install). +if not {"hip", "amdgpu"}.issubset(config.available_features): + config.unsupported = True +else: + # Tests share the GPU(s) and pin HIP_VISIBLE_DEVICES; serialize them. + config.parallelism_group = "gpu" diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-basic.hip b/compiler-rt/test/profile/GPU/instrprof-hip-basic.hip new file mode 100644 index 0000000000000..8cbe7c970052c --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-basic.hip @@ -0,0 +1,51 @@ +// Test basic HIP PGO instrumentation and profile collection. +// +// REQUIRES: hip, amdgpu +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: env LLVM_PROFILE_FILE=%t.dir/prof.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t +// RUN: ls %t.dir/prof.profraw +// RUN: llvm-profdata merge -o %t.profdata %t.dir/ +// RUN: llvm-profdata show --all-functions %t.profdata \ +// RUN: | FileCheck %s --check-prefix=PROF +// +// PROF: _Z6squarePiPKii +// PROF: main +// PROF: Functions shown: 2 +// PROF: Total functions: 2 + +#include +#include + +__global__ void square(int *out, const int *in, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) + out[idx] = in[idx] * in[idx]; +} + +int main() { + constexpr int N = 64; + int h_in[N], h_out[N]; + for (int i = 0; i < N; ++i) h_in[i] = i; + + int *d_in, *d_out; + (void)hipMalloc(&d_in, N * sizeof(int)); + (void)hipMalloc(&d_out, N * sizeof(int)); + (void)hipMemcpy(d_in, h_in, N * sizeof(int), hipMemcpyHostToDevice); + + square<<<1, N>>>(d_out, d_in, N); + + (void)hipMemcpy(h_out, d_out, N * sizeof(int), hipMemcpyDeviceToHost); + + int ok = 1; + for (int i = 0; i < N; ++i) + if (h_out[i] != i * i) ok = 0; + + printf("%s\n", ok ? "PASS" : "FAIL"); + (void)hipFree(d_in); + (void)hipFree(d_out); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-collect-after.hip b/compiler-rt/test/profile/GPU/instrprof-hip-collect-after.hip new file mode 100644 index 0000000000000..5a2393f8dcc47 --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-collect-after.hip @@ -0,0 +1,63 @@ +// Explicit-collect idempotency: a program that calls +// __llvm_profile_hip_collect_device_data() itself *after* a launch (e.g. a +// periodic profile dump) and then also exits normally must not double-count the +// device counters. The explicit drain and the atexit drain write the same +// arch-named device profraw, so the merged profile must reflect a single launch +// (function count 64, even-branch 32), not two. Complements device-early-collect +// (which covers a collect *before* the first launch). +// +// REQUIRES: hip, amdgpu +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %t.dir/a.out +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/a.profdata +// RUN: llvm-profdata show --all-functions --counts %t.dir/a.profdata \ +// RUN: | FileCheck %s +// +// A single launch of 64 threads, drained twice (explicit + atexit), must still +// merge to exactly one launch's worth of counts. +// CHECK: _Z8classifyPii: +// CHECK: Function count: 64 +// CHECK: Block counts: [0, 32] + +#include +#include + +extern "C" int __llvm_profile_hip_collect_device_data(void); + +__global__ void classify(int *out, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) + return; + if (idx % 2 == 0) + out[idx] = 1; + else + out[idx] = 0; +} + +int main() { + constexpr int N = 64; + int *d = nullptr; + if (hipMalloc(&d, N * sizeof(int)) != hipSuccess) + return 2; + classify<<<1, N>>>(d, N); + (void)hipDeviceSynchronize(); + + // Explicit mid-program collect after the launch. The atexit drain runs again + // at exit; together they must not double the device counters. + (void)__llvm_profile_hip_collect_device_data(); + + int h[N]; + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int evens = 0; + for (int i = 0; i < N; ++i) + evens += h[i]; + printf("%s\n", evens == 32 ? "PASS" : "FAIL"); + return evens == 32 ? 0 : 1; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-counter-correctness.hip b/compiler-rt/test/profile/GPU/instrprof-hip-counter-correctness.hip new file mode 100644 index 0000000000000..c2bfc9ac9dc66 --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-counter-correctness.hip @@ -0,0 +1,56 @@ +// Quantitative device-counter correctness: the drained device profile must carry +// the *exact* per-region execution counts produced by the kernel, not merely +// "some counts are present". A single launch of 64 threads over classify() must +// record a function entry count of 64 and an even-branch block count of 32 +// (idx % 2 == 0 holds for exactly half of idx in [0, 64)). This pins the drain + +// dedup path against silent under/over-counting (e.g. a dedup bug that dropped +// or doubled a section would change these numbers). +// +// REQUIRES: hip, amdgpu +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/host.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %t.dir/a.out +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/a.profdata +// RUN: llvm-profdata show --all-functions --counts %t.dir/a.profdata \ +// RUN: | FileCheck %s +// +// The device kernel ran with exactly 64 threads, all of which entered the +// function; the even branch was taken 32 times and the early-return path 0. +// CHECK: _Z8classifyPii: +// CHECK: Function count: 64 +// CHECK: Block counts: [0, 32] + +#include +#include + +__global__ void classify(int *out, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) + return; + if (idx % 2 == 0) + out[idx] = 1; + else + out[idx] = 0; +} + +int main() { + constexpr int N = 64; + int *d = nullptr; + if (hipMalloc(&d, N * sizeof(int)) != hipSuccess) + return 2; + classify<<<1, N>>>(d, N); + (void)hipDeviceSynchronize(); + int h[N]; + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int evens = 0; + for (int i = 0; i < N; ++i) + evens += h[i]; + printf("%s\n", evens == 32 ? "PASS" : "FAIL"); + return evens == 32 ? 0 : 1; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-coverage.hip b/compiler-rt/test/profile/GPU/instrprof-hip-coverage.hip new file mode 100644 index 0000000000000..a867c30f0edfb --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-coverage.hip @@ -0,0 +1,51 @@ +// Test HIP coverage mapping produces source-level coverage for host code. +// +// REQUIRES: hip, amdgpu +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: env LLVM_PROFILE_FILE=%t.dir/prof.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.dir/ +// RUN: llvm-cov report %t -instr-profile=%t.profdata 2>&1 \ +// RUN: | FileCheck %s --check-prefix=REPORT +// +// REPORT: instrprof-hip-coverage.hip +// No coverage column should be fully uncovered. Anchor on a non-digit before +// the "0.00%" so this does not spuriously match e.g. "80.00%". +// REPORT-NOT: {{[^.0-9]0[.]00%}} + +#include +#include + +__device__ int gpu_abs(int x) { + return x < 0 ? -x : x; +} + +__global__ void abs_kernel(int *data, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) + data[idx] = gpu_abs(data[idx]); +} + +int main() { + constexpr int N = 16; + int h[N]; + for (int i = 0; i < N; ++i) + h[i] = (i % 2 == 0) ? i : -i; + + int *d; + (void)hipMalloc(&d, N * sizeof(int)); + (void)hipMemcpy(d, h, N * sizeof(int), hipMemcpyHostToDevice); + abs_kernel<<<1, N>>>(d, N); + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int ok = 1; + for (int i = 0; i < N; ++i) + if (h[i] != i) ok = 0; + + printf("%s\n", ok ? "PASS" : "FAIL"); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-device-branching.hip b/compiler-rt/test/profile/GPU/instrprof-hip-device-branching.hip new file mode 100644 index 0000000000000..a24b28ec9af0a --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-device-branching.hip @@ -0,0 +1,67 @@ +// Test that device-side branching is captured in profile counters. +// Exercises the classify-style pattern where different branches are taken +// by different threads, verifying that counter values reflect actual execution. +// +// REQUIRES: hip, amdgpu +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: env LLVM_PROFILE_FILE=%t.dir/prof.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.dir/ +// RUN: llvm-profdata show --all-functions %t.profdata \ +// RUN: | FileCheck %s --check-prefix=PROF +// +// Device functions should appear with non-zero counters. The __device__ +// classify() helper is inlined into the histogram kernel, so it does not get a +// separate profile record; its branching is captured within the kernel's +// counters instead. +// PROF-DAG: _Z9histogramPKiPii +// PROF-DAG: main +// PROF: Total functions: 2 +// PROF: Maximum function count: {{[1-9][0-9]*}} + +#include +#include + +__device__ int classify(int x) { + if (x > 100) return 2; + else if (x > 0) return 1; + else return 0; +} + +__global__ void histogram(const int *input, int *bins, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) { + int cls = classify(input[idx]); + atomicAdd(&bins[cls], 1); + } +} + +int main() { + constexpr int N = 256; + constexpr int NBINS = 3; + + int h_in[N], h_bins[NBINS] = {}; + for (int i = 0; i < N; ++i) + h_in[i] = (i % 3 == 0) ? -1 : (i % 3 == 1) ? 50 : 200; + + int *d_in, *d_bins; + (void)hipMalloc(&d_in, N * sizeof(int)); + (void)hipMalloc(&d_bins, NBINS * sizeof(int)); + (void)hipMemcpy(d_in, h_in, N * sizeof(int), hipMemcpyHostToDevice); + (void)hipMemset(d_bins, 0, NBINS * sizeof(int)); + + histogram<<<1, N>>>(d_in, d_bins, N); + + (void)hipMemcpy(h_bins, d_bins, NBINS * sizeof(int), hipMemcpyDeviceToHost); + printf("bins: [%d, %d, %d]\n", h_bins[0], h_bins[1], h_bins[2]); + + int ok = (h_bins[0] > 0 && h_bins[1] > 0 && h_bins[2] > 0); + printf("%s\n", ok ? "PASS" : "FAIL"); + + (void)hipFree(d_in); + (void)hipFree(d_bins); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-fork-safety.hip b/compiler-rt/test/profile/GPU/instrprof-hip-fork-safety.hip new file mode 100644 index 0000000000000..c79cf568f88bc --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-fork-safety.hip @@ -0,0 +1,61 @@ +// Fork safety: loading the profile-instrumented library must NOT initialize +// HSA in a process that itself never touches HIP and only runs device work in +// forked children. RCCL's unit tests follow exactly this pattern -- the parent +// deliberately keeps HIP/HSA uninitialized and launches kernels only inside +// forked children. If the profile runtime's library constructor eagerly called +// hsa_init(), the child would inherit invalid HSA state across fork() and crash +// inside HSA (e.g. hsa_amd_signal_create -> SharedSignalPool::alloc). The HSA +// drain therefore brings HSA up lazily, never from a constructor. +// +// REQUIRES: hip, amdgpu +// The eager-hsa_init fork hazard and the lazy HSA drain are Linux-only. +// UNSUPPORTED: windows +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: %run %t 2>&1 | FileCheck %s +// +// The forked child must complete its kernel without crashing in HSA. +// CHECK: PASS + +#include +#include +#include +#include + +__global__ void increment(int *p) { *p += 1; } + +static int doChildWork() { + int *d = nullptr; + if (hipMalloc(&d, sizeof(int)) != hipSuccess) + return 1; + int h = 41; + (void)hipMemcpy(d, &h, sizeof(int), hipMemcpyHostToDevice); + increment<<<1, 1>>>(d); + if (hipDeviceSynchronize() != hipSuccess) + return 1; + (void)hipMemcpy(&h, d, sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + return h == 42 ? 0 : 1; +} + +int main() { + // The parent intentionally performs no HIP/HSA work before forking. + pid_t pid = fork(); + if (pid < 0) { + printf("FAIL (fork failed)\n"); + return 1; + } + if (pid == 0) { + // Child runs the device work; _exit avoids flushing the parent's profile + // handlers from the child (the RCCL test pattern). + _exit(doChildWork()); + } + + int status = 0; + (void)waitpid(pid, &status, 0); + int ok = WIFEXITED(status) && WEXITSTATUS(status) == 0; + printf("%s\n", ok ? "PASS" : "FAIL"); + return ok ? 0 : 1; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-multi-gpu.hip b/compiler-rt/test/profile/GPU/instrprof-hip-multi-gpu.hip new file mode 100644 index 0000000000000..6a99546d34bdb --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-multi-gpu.hip @@ -0,0 +1,57 @@ +// Test that HIP PGO works on multi-GPU systems. The kernel runs on the default +// device, so the host-shadow drain (guarded by upstream's launch tracking) +// collects only that device and the supplemental HSA agent-walk then finds the +// same code object and dedups it out. The point of the test is that neither +// pass crashes or hangs reading a non-resident device on a host with several +// GPUs (the failure mode that the launch tracking + HSA residency walk fix). +// +// REQUIRES: hip, amdgpu +// The "walk complete" / dedup notes are Linux-only HSA-drain strings; the +// Windows host-shadow drain collects only the current device. +// UNSUPPORTED: windows +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: LLVM_PROFILE_VERBOSE=1 %run %t 2>&1 | FileCheck %s +// +// The host-shadow pass drains the launched device, the HSA walk finds that same +// code object and dedups it (drained=0), and the program does not crash. +// CHECK: Copied device sections: +// CHECK: device bounds already drained, skipping +// CHECK: walk complete: agents={{[0-9]+}} pairs={{[0-9]+}} found={{[1-9][0-9]*}} drained={{[0-9]+}} +// CHECK: PASS + +#include +#include + +__global__ void add_one(int *data, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) + data[idx] += 1; +} + +int main() { + int ndev = 0; + (void)hipGetDeviceCount(&ndev); + + constexpr int N = 32; + int h_data[N]; + for (int i = 0; i < N; ++i) h_data[i] = i; + + int *d_data; + (void)hipMalloc(&d_data, N * sizeof(int)); + (void)hipMemcpy(d_data, h_data, N * sizeof(int), hipMemcpyHostToDevice); + + add_one<<<1, N>>>(d_data, N); + + (void)hipMemcpy(h_data, d_data, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d_data); + + int ok = 1; + for (int i = 0; i < N; ++i) + if (h_data[i] != i + 1) ok = 0; + + printf("%s (devices=%d)\n", ok ? "PASS" : "FAIL", ndev); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-multi-process-merge.hip b/compiler-rt/test/profile/GPU/instrprof-hip-multi-process-merge.hip new file mode 100644 index 0000000000000..8cf1258a63535 --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-multi-process-merge.hip @@ -0,0 +1,63 @@ +// Multi-process accumulation: device counters from several independent runs of an +// instrumented HIP program must accumulate when their profraws are offline-merged +// (llvm-profdata merge), the common real-world feedback-collection path. Three +// runs of a 64-thread launch must sum to a function count of 192 and an +// even-branch block count of 96 (3 x 64 / 3 x 32). +// +// Note: on-the-fly merge-pooling via LLVM_PROFILE_FILE=...%m does NOT currently +// accumulate *device* counters -- the device profraw is rewritten per process +// rather than merged in place -- so each process must write a distinct file +// (here via %p) and the accumulation is done by llvm-profdata merge. +// +// REQUIRES: hip, amdgpu +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: %clang -x hip --offload-arch=%amdgpu_arch -fno-gpu-rdc \ +// RUN: -fprofile-instr-generate -fcoverage-mapping %s -o %t.dir/a.out \ +// RUN: -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.dir/run1.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %t.dir/a.out +// RUN: env LLVM_PROFILE_FILE=%t.dir/run2.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %t.dir/a.out +// RUN: env LLVM_PROFILE_FILE=%t.dir/run3.%%p.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %t.dir/a.out +// RUN: llvm-profdata merge %t.dir/*.profraw -o %t.dir/a.profdata +// RUN: llvm-profdata show --all-functions --counts %t.dir/a.profdata \ +// RUN: | FileCheck %s +// +// CHECK: _Z8classifyPii: +// CHECK: Function count: 192 +// CHECK: Block counts: [0, 96] + +#include +#include + +__global__ void classify(int *out, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx >= n) + return; + if (idx % 2 == 0) + out[idx] = 1; + else + out[idx] = 0; +} + +int main() { + constexpr int N = 64; + int *d = nullptr; + if (hipMalloc(&d, N * sizeof(int)) != hipSuccess) + return 2; + classify<<<1, N>>>(d, N); + (void)hipDeviceSynchronize(); + int h[N]; + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int evens = 0; + for (int i = 0; i < N; ++i) + evens += h[i]; + printf("%s\n", evens == 32 ? "PASS" : "FAIL"); + return evens == 32 ? 0 : 1; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-multiple-kernels.hip b/compiler-rt/test/profile/GPU/instrprof-hip-multiple-kernels.hip new file mode 100644 index 0000000000000..0fd6185b82441 --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-multiple-kernels.hip @@ -0,0 +1,58 @@ +// Test PGO with multiple kernel launches from a single TU. +// Verifies that counters from all device functions are collected correctly. +// +// REQUIRES: hip, amdgpu +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: env LLVM_PROFILE_FILE=%t.dir/prof.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t +// RUN: llvm-profdata merge -o %t.profdata %t.dir/ +// RUN: llvm-profdata show --all-functions %t.profdata \ +// RUN: | FileCheck %s --check-prefix=PROF +// +// All three kernels plus main should be profiled. +// PROF-DAG: _Z4fillPii +// PROF-DAG: _Z5scalePii +// PROF-DAG: _Z6negatePii +// PROF-DAG: main +// PROF: Total functions: 4 + +#include +#include + +__global__ void fill(int *data, int val) { + data[threadIdx.x] = val; +} + +__global__ void scale(int *data, int factor) { + data[threadIdx.x] *= factor; +} + +__global__ void negate(int *data, int n) { + int idx = threadIdx.x; + if (idx < n) + data[idx] = -data[idx]; +} + +int main() { + constexpr int N = 16; + int h[N]; + int *d; + (void)hipMalloc(&d, N * sizeof(int)); + + fill<<<1, N>>>(d, 5); + scale<<<1, N>>>(d, 3); + negate<<<1, N>>>(d, N); + + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int ok = 1; + for (int i = 0; i < N; ++i) + if (h[i] != -15) ok = 0; + + printf("%s\n", ok ? "PASS" : "FAIL"); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-nondefault-device.hip b/compiler-rt/test/profile/GPU/instrprof-hip-nondefault-device.hip new file mode 100644 index 0000000000000..5d3dea671047b --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-nondefault-device.hip @@ -0,0 +1,60 @@ +// Test PGO when the kernel runs on a non-default device (here the program +// selects device 1). Upstream's launch tracking records that device 1 was used, +// so the host-shadow drain skips the other devices and collects device 1, and +// the supplemental HSA agent-walk then finds that same code object and dedups it +// out. This exercises both that the correct device is drained and that an +// unused device is never read (which would fault/hang on a multi-GPU host). +// +// REQUIRES: hip, amdgpu, multi-device +// The "walk complete" / dedup notes are Linux-only HSA-drain strings; the +// Windows host-shadow drain only collects the current device. +// UNSUPPORTED: windows +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t -L%hip_lib_path -lamdhip64 +// RUN: env LLVM_PROFILE_FILE=%t.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: LLVM_PROFILE_VERBOSE=1 %run %t 2>&1 | FileCheck %s +// +// The launched device (1) is drained, the unused default device is skipped, and +// the HSA walk finds the same code object and dedups it (drained=0). +// CHECK: Skipping unused device 0 +// CHECK: Collecting static profile data from device 1 +// CHECK: Copied device sections: +// CHECK: device bounds already drained, skipping +// CHECK: walk complete: agents={{[0-9]+}} pairs={{[0-9]+}} found={{[1-9][0-9]*}} drained={{[0-9]+}} +// CHECK: PASS + +#include +#include + +__global__ void fill(int *data, int val, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) + data[idx] = val; +} + +int main() { + int ndev = 0; + (void)hipGetDeviceCount(&ndev); + if (ndev < 2) { + printf("PASS (skipped: only %d device)\n", ndev); + return 0; + } + + (void)hipSetDevice(1); + + constexpr int N = 32; + int h[N] = {}; + int *d; + (void)hipMalloc(&d, N * sizeof(int)); + fill<<<1, N>>>(d, 99, N); + (void)hipMemcpy(h, d, N * sizeof(int), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int ok = 1; + for (int i = 0; i < N; ++i) + if (h[i] != 99) ok = 0; + + printf("%s\n", ok ? "PASS" : "FAIL"); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/instrprof-hip-pgo-use.hip b/compiler-rt/test/profile/GPU/instrprof-hip-pgo-use.hip new file mode 100644 index 0000000000000..9a8a8187f8e77 --- /dev/null +++ b/compiler-rt/test/profile/GPU/instrprof-hip-pgo-use.hip @@ -0,0 +1,63 @@ +// Test the full PGO cycle: instrument, collect, merge, optimize. +// Verifies that the optimized binary produces correct output and that +// profile data is consumed without errors. +// +// REQUIRES: hip, amdgpu +// +// Step 1: Build instrumented binary. +// RUN: %clang -x hip -fprofile-instr-generate -fcoverage-mapping \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t.instr \ +// RUN: -L%hip_lib_path -lamdhip64 +// +// Step 2: Run to collect profile data. +// RUN: rm -rf %t.dir && mkdir -p %t.dir +// RUN: env LLVM_PROFILE_FILE=%t.dir/prof.profraw \ +// RUN: LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t.instr 2>&1 | FileCheck %s +// +// Step 3: Merge profile data. +// RUN: llvm-profdata merge -o %t.profdata %t.dir/ +// +// Step 4: Build optimized binary with profile data. +// RUN: %clang -x hip -fprofile-instr-use=%t.profdata \ +// RUN: --offload-arch=%amdgpu_arch %s -o %t.opt \ +// RUN: -L%hip_lib_path -lamdhip64 -O2 +// +// Step 5: Run optimized binary. +// RUN: env LD_LIBRARY_PATH=%hip_lib_path:$LD_LIBRARY_PATH \ +// RUN: HIP_VISIBLE_DEVICES=0 %run %t.opt 2>&1 | FileCheck %s +// +// CHECK: PASS + +#include +#include + +__global__ void scale(float *data, float factor, int n) { + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < n) + data[idx] *= factor; +} + +int main() { + constexpr int N = 128; + float h[N]; + for (int i = 0; i < N; ++i) h[i] = (float)i; + + float *d; + (void)hipMalloc(&d, N * sizeof(float)); + (void)hipMemcpy(d, h, N * sizeof(float), hipMemcpyHostToDevice); + + scale<<<1, N>>>(d, 2.0f, N); + + (void)hipMemcpy(h, d, N * sizeof(float), hipMemcpyDeviceToHost); + (void)hipFree(d); + + int ok = 1; + for (int i = 0; i < N; ++i) { + float expected = (float)(i * 2); + if (h[i] != expected) ok = 0; + } + + printf("%s\n", ok ? "PASS" : "FAIL"); + return !ok; +} diff --git a/compiler-rt/test/profile/GPU/lit.local.cfg.py b/compiler-rt/test/profile/GPU/lit.local.cfg.py new file mode 100644 index 0000000000000..4a3fd1c138302 --- /dev/null +++ b/compiler-rt/test/profile/GPU/lit.local.cfg.py @@ -0,0 +1,7 @@ +# HIP device-PGO tests for host-shadowed (statically linked) kernels: require an +# AMD GPU plus a ROCm/HIP install (features set in ../lit.cfg.py). +if not {"hip", "amdgpu"}.issubset(config.available_features): + config.unsupported = True +else: + # Tests share the GPU(s) and pin HIP_VISIBLE_DEVICES; serialize them. + config.parallelism_group = "gpu" diff --git a/compiler-rt/test/profile/instrprof-rocm-bounds-dedup.cpp b/compiler-rt/test/profile/instrprof-rocm-bounds-dedup.cpp new file mode 100644 index 0000000000000..430a8cc5cce26 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-rocm-bounds-dedup.cpp @@ -0,0 +1,108 @@ +// Host unit test for __prof_rocm::ProfBoundsSet, the section-bounds dedup table +// shared by the ROCm device-profile drains (InstrProfilingPlatformROCm.cpp and +// InstrProfilingPlatformROCmHSA.cpp). This is the bookkeeping that guarantees a +// device counter set is drained exactly once -- across the host-shadow and HSA +// paths and across the multiple GPU agents that may share a code object (the +// "device bounds already drained, skipping" behavior exercised by the multi-GPU +// device test). It is pure host logic with no GPU/HIP/HSA dependency, so unlike +// the device drain tests under GPU/ and AMDGPU/ it runs anywhere the profile +// runtime is tested, including upstream CI on machines without an AMD GPU. +// +// RUN: %clangxx %s -o %t +// RUN: %run %t | FileCheck %s + +#include "../../lib/profile/InstrProfilingPlatformROCmInternal.h" + +#include +#include +#include + +using __prof_rocm::ProfBoundsSet; + +static int Failures = 0; + +#define EXPECT(Cond) \ + do { \ + if (!(Cond)) { \ + fprintf(stderr, "FAIL: %s:%d: %s\n", __FILE__, __LINE__, #Cond); \ + ++Failures; \ + } \ + } while (0) + +// Distinct, non-null fake section pointers derived from an integer. +static const void *P(uintptr_t V) { return (const void *)(V * 8 + 8); } + +int main() { + // 1. A fresh set contains nothing. + { + ProfBoundsSet S; + EXPECT(S.Count == 0); + EXPECT(!S.contains(P(1), P(2), P(3))); + free(S.Items); + } + + // 2. record() is idempotent: the first insert reports "new", repeats do not, + // and the element count never double-counts. + { + ProfBoundsSet S; + EXPECT(S.record(P(1), P(2), P(3)) == true); + EXPECT(S.contains(P(1), P(2), P(3))); + EXPECT(S.Count == 1); + EXPECT(S.record(P(1), P(2), P(3)) == false); + EXPECT(S.record(P(1), P(2), P(3)) == false); + EXPECT(S.Count == 1); + free(S.Items); + } + + // 3. All three fields are part of the key: differing in any single field + // (data, counters, or names) is a distinct tuple. Guards against a dedup + // that keys on only a subset and would drop a real counter set. + { + ProfBoundsSet S; + EXPECT(S.record(P(1), P(2), P(3)) == true); + EXPECT(!S.contains(P(9), P(2), P(3))); // data differs + EXPECT(!S.contains(P(1), P(9), P(3))); // counters differ + EXPECT(!S.contains(P(1), P(2), P(9))); // names differ + EXPECT(S.record(P(9), P(2), P(3)) == true); + EXPECT(S.record(P(1), P(9), P(3)) == true); + EXPECT(S.record(P(1), P(2), P(9)) == true); + EXPECT(S.Count == 4); + free(S.Items); + } + + // 4. Many distinct tuples grow the table past its initial capacity; all stay + // recorded and re-recording any of them is still a no-op. + { + ProfBoundsSet S; + const int N = 4 * ProfBoundsSet::kInitCap + 7; // forces several doublings + for (int I = 0; I < N; ++I) + EXPECT(S.record(P(3 * I + 1), P(3 * I + 2), P(3 * I + 3)) == true); + EXPECT(S.Count == N); + EXPECT(S.Cap >= N); + for (int I = 0; I < N; ++I) { + EXPECT(S.contains(P(3 * I + 1), P(3 * I + 2), P(3 * I + 3))); + EXPECT(S.record(P(3 * I + 1), P(3 * I + 2), P(3 * I + 3)) == false); + } + EXPECT(S.Count == N); // duplicates did not grow the table + free(S.Items); + } + + // 5. Null pointers are valid keys (an empty/zero code object is recorded so a + // later agent skips it rather than reprocessing it). + { + ProfBoundsSet S; + EXPECT(S.record(nullptr, nullptr, nullptr) == true); + EXPECT(S.contains(nullptr, nullptr, nullptr)); + EXPECT(S.record(nullptr, nullptr, nullptr) == false); + EXPECT(S.Count == 1); + free(S.Items); + } + + if (Failures == 0) + printf("PASS\n"); + else + printf("%d FAILURE(S)\n", Failures); + return Failures != 0; +} + +// CHECK: PASS diff --git a/compiler-rt/test/profile/instrprof-rocm-grow-array.cpp b/compiler-rt/test/profile/instrprof-rocm-grow-array.cpp new file mode 100644 index 0000000000000..92a8b932fbb45 --- /dev/null +++ b/compiler-rt/test/profile/instrprof-rocm-grow-array.cpp @@ -0,0 +1,115 @@ +// Host unit test for __prof_rocm::growArray, the dynamic-array helper shared by +// the ROCm device-profile drains (InstrProfilingPlatformROCm.cpp and +// InstrProfilingPlatformROCmHSA.cpp). It is pure host logic with no GPU, HIP, or +// HSA dependency, so -- unlike the device drain tests under GPU/ and AMDGPU/, +// which require a real AMD GPU -- it runs anywhere the profile runtime is +// tested, including upstream CI on machines without a GPU. +// +// RUN: %clangxx %s -o %t +// RUN: %run %t | FileCheck %s + +#include "../../lib/profile/InstrProfilingPlatformROCmInternal.h" + +#include +#include +#include + +using __prof_rocm::growArray; + +static int Failures = 0; + +#define EXPECT(Cond) \ + do { \ + if (!(Cond)) { \ + fprintf(stderr, "FAIL: %s:%d: %s\n", __FILE__, __LINE__, #Cond); \ + ++Failures; \ + } \ + } while (0) + +static int allZero(const int *P, int Begin, int End) { + for (int I = Begin; I < End; ++I) + if (P[I] != 0) + return 0; + return 1; +} + +int main() { + // 1. Allocating from empty uses InitCap and zero-initializes every slot. + { + int *A = nullptr; + int Cap = 0; + EXPECT(growArray((void **)&A, &Cap, /*MinCount=*/1, /*InitCap=*/4, + sizeof(int)) == 0); + EXPECT(A != nullptr); + EXPECT(Cap == 4); + EXPECT(allZero(A, 0, Cap)); + free(A); + } + + // 2. Doubling continues until the capacity covers MinCount (4 -> 8 -> 16). + { + int *A = nullptr; + int Cap = 0; + EXPECT(growArray((void **)&A, &Cap, /*MinCount=*/10, /*InitCap=*/4, + sizeof(int)) == 0); + EXPECT(Cap == 16); + EXPECT(allZero(A, 0, Cap)); + free(A); + } + + // 3. When the capacity already suffices the array is left untouched. + { + int *A = (int *)malloc(8 * sizeof(int)); + for (int I = 0; I < 8; ++I) + A[I] = I + 1; + int *Before = A; + int Cap = 8; + EXPECT(growArray((void **)&A, &Cap, /*MinCount=*/8, /*InitCap=*/4, + sizeof(int)) == 0); + EXPECT(A == Before); + EXPECT(Cap == 8); + EXPECT(A[0] == 1 && A[7] == 8); + free(A); + } + + // 4. Growth preserves existing elements and zero-fills the new tail, with + // doubling resuming from the current capacity rather than InitCap. + { + int *A = (int *)malloc(4 * sizeof(int)); + for (int I = 0; I < 4; ++I) + A[I] = 100 + I; + int Cap = 4; + EXPECT(growArray((void **)&A, &Cap, /*MinCount=*/5, /*InitCap=*/4, + sizeof(int)) == 0); + EXPECT(Cap == 8); + EXPECT(A[0] == 100 && A[1] == 101 && A[2] == 102 && A[3] == 103); + EXPECT(allZero(A, 4, Cap)); + free(A); + } + + // 5. ElemSize byte math is honored for wider element types. + { + struct Pair { + uint64_t A, B; + }; + Pair *P = nullptr; + int Cap = 0; + EXPECT(growArray((void **)&P, &Cap, /*MinCount=*/3, /*InitCap=*/2, + sizeof(Pair)) == 0); + EXPECT(Cap == 4); + int Zeroed = 1; + for (int I = 0; I < Cap; ++I) + if (P[I].A != 0 || P[I].B != 0) + Zeroed = 0; + EXPECT(Zeroed); + free(P); + } + + if (Failures == 0) + printf("PASS\n"); + else + printf("%d FAILURE(S)\n", Failures); + return Failures != 0; +} + +// CHECK: PASS diff --git a/compiler-rt/test/profile/lit.cfg.py b/compiler-rt/test/profile/lit.cfg.py index df7f11e2b286b..a6168cc4c4ceb 100644 --- a/compiler-rt/test/profile/lit.cfg.py +++ b/compiler-rt/test/profile/lit.cfg.py @@ -1,7 +1,9 @@ # -*- Python -*- +import glob import os import re +import subprocess def get_required_attr(config, attr_name): @@ -186,3 +188,68 @@ def exclude_unsupported_files_for_aix(dirname): if config.target_os in ("AIX", "Darwin", "Linux"): config.available_features.add("continuous-mode") + +# GPU (HIP/AMDGPU) device-profile tests. +# +# The GPU/ and AMDGPU/ subdirectories exercise the device-PGO drain end to end +# and need a real AMD GPU plus a ROCm/HIP install. Detect that here and, when +# present, expose the features ('hip', 'amdgpu', 'multi-device') and +# substitutions ('%amdgpu_arch', '%hip_lib_path') those tests use. Without a GPU +# the subdirectory lit.local.cfg.py files mark themselves unsupported, so the +# tests report UNSUPPORTED instead of failing. +# +# Both knobs are overridable from the command line, e.g.: +# llvm-lit --param amdgpu_arch=gfx90a --param hip_lib_path=/opt/rocm/lib ... +config.suffixes.append(".hip") + + +def _amdgpu_archs(): + # config.clang is a wrapped command (" "); the clang path is + # the last token. amdgpu-arch ships next to it and prints one line per GPU. + clang_path = config.clang.split()[-1] if config.clang else "" + tool = os.path.join(os.path.dirname(clang_path), "amdgpu-arch") + if not os.path.exists(tool): + return [] + try: + proc = subprocess.run(tool, capture_output=True, text=True, timeout=60) + except (OSError, subprocess.SubprocessError): + return [] + if proc.returncode != 0: + return [] + return [line.strip() for line in proc.stdout.splitlines() if line.strip()] + + +def _hip_lib_path(): + # An explicit --param hip_lib_path=DIR is authoritative; otherwise probe the + # usual ROCm locations for libamdhip64. + explicit = lit_config.params.get("hip_lib_path") + if explicit: + candidates = [explicit] + else: + candidates = [] + for var in ("ROCM_PATH", "HIP_PATH"): + if os.environ.get(var): + candidates.append(os.path.join(os.environ[var], "lib")) + candidates.append("/opt/rocm/lib") + for directory in candidates: + if directory and glob.glob(os.path.join(directory, "libamdhip64.so*")): + return directory + return None + + +_amdgpu_arch_list = _amdgpu_archs() +_hip_lib_dir = _hip_lib_path() +if _amdgpu_arch_list and _hip_lib_dir: + config.available_features.add("hip") + config.available_features.add("amdgpu") + if len(_amdgpu_arch_list) >= 2: + config.available_features.add("multi-device") + config.substitutions.append( + ("%amdgpu_arch", lit_config.params.get("amdgpu_arch", "native")) + ) + config.substitutions.append(("%hip_lib_path", _hip_lib_dir)) + # The GPU tests share the device(s) and pin HIP_VISIBLE_DEVICES, so they must + # not run concurrently with each other. The subdirectories opt into this + # group; the size-1 cap serializes them while leaving the CPU profile tests + # fully parallel. + lit_config.parallelism_groups["gpu"] = 1 diff --git a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel index ff4b381abe064..0c5e0af4cb483 100644 --- a/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/compiler-rt/BUILD.bazel @@ -33,6 +33,7 @@ WIN32_ONLY_FILES = [ PROFILE_ROCM_FILES = [ "lib/profile/InstrProfilingPlatformROCm.cpp", + "lib/profile/InstrProfilingPlatformROCmHSA.cpp", ] cc_library( From c1773a5a14a2f488b2b6d10845394e9eda3ee72e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 21:14:58 +0200 Subject: [PATCH 064/511] AMDGPU: Refactor AMDGPUTargetID to not store MCSubtargetInfo (#204315) Store the triple string and GPUKind instead. The dependence on checking AMDHSA seems like an anti-feature, but maintain the behavior of not printing the modifiers for other OSes. Start parsing the target ID instead of performing a direct string comparison. Also improve test coverage for the treatment of the environment component of the triple. The main behavioral change is this will now produce normalized triples in the output and diagnostics. Practially, this means all of the places that currently emit "--" will be expanded into "-unknown-". Co-Authored-By: Claude Opus 4.6 --- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 43 +++- .../Target/AMDGPU/Disassembler/CMakeLists.txt | 1 - .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 99 ++++++--- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 28 ++- llvm/lib/TargetParser/AMDGPUTargetParser.cpp | 2 + .../CodeGen/AMDGPU/directive-amdgcn-target.ll | 204 +++++++++--------- llvm/test/CodeGen/AMDGPU/elf-notes.ll | 12 +- .../CodeGen/AMDGPU/gfx902-without-xnack.ll | 2 +- .../test/CodeGen/AMDGPU/hsa-default-device.ll | 2 +- llvm/test/CodeGen/AMDGPU/hsa-func.ll | 14 +- llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll | 44 ++-- llvm/test/CodeGen/AMDGPU/hsa.ll | 15 +- .../AMDGPU/target-id-xnack-always-on.ll | 2 +- .../AMDGPU/tid-mul-func-xnack-all-any.ll | 4 +- .../tid-mul-func-xnack-all-not-supported.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-all-off.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-all-on.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-any-off-1.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-any-off-2.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-any-on-1.ll | 4 +- .../AMDGPU/tid-mul-func-xnack-any-on-2.ll | 4 +- .../CodeGen/AMDGPU/tid-one-func-xnack-any.ll | 4 +- .../tid-one-func-xnack-not-supported.ll | 4 +- .../CodeGen/AMDGPU/tid-one-func-xnack-off.ll | 4 +- .../CodeGen/AMDGPU/tid-one-func-xnack-on.ll | 4 +- .../amd-amdgpu-isa-malformed-target-id.s | 5 + .../amdgcn-target-directive-triple-env.s | 17 ++ .../amdgcn-target-malformed-target-id.s | 5 + llvm/test/MC/AMDGPU/buffer-op-swz-operand.s | 2 +- llvm/test/MC/AMDGPU/hsa-diag-v4.s | 10 +- llvm/test/MC/AMDGPU/hsa-exp.s | 2 +- llvm/test/MC/AMDGPU/hsa-gfx12-v4.s | 2 +- llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s | 2 +- llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s | 2 +- llvm/test/MC/AMDGPU/hsa-gfx13-v4.s | 2 +- llvm/test/MC/AMDGPU/hsa-tg-split.s | 2 +- llvm/test/MC/AMDGPU/hsa-v4.s | 2 +- .../MC/AMDGPU/hsa-v5-uses-dynamic-stack.s | 2 +- llvm/test/MC/AMDGPU/isa-version-hsa.s | 8 +- llvm/test/MC/AMDGPU/isa-version-pal.s | 8 +- llvm/test/MC/AMDGPU/isa-version-unk.s | 8 +- llvm/test/MC/AMDGPU/user-sgpr-count.s | 2 +- 42 files changed, 359 insertions(+), 238 deletions(-) create mode 100644 llvm/test/MC/AMDGPU/amd-amdgpu-isa-malformed-target-id.s create mode 100644 llvm/test/MC/AMDGPU/amdgcn-target-directive-triple-env.s create mode 100644 llvm/test/MC/AMDGPU/amdgcn-target-malformed-target-id.s diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 2c33b0a2ec32f..a7191c4411336 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5980,14 +5980,22 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { if (getParser().parseEscapedString(TargetIDDirective)) return true; - SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); - if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) + std::optional MaybeParsed = + AMDGPU::IsaInfo::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + if (!MaybeParsed) + return getParser().Error(TargetStart, "malformed target ID"); + + const AMDGPU::IsaInfo::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = + getTargetStreamer().getTargetID(); + + if (*CurrentTargetID != ParsedTargetID) { return getParser().Error( - TargetRange.Start, - (Twine(".amdgcn_target directive's target id ") + - Twine(TargetIDDirective) + - Twine(" does not match the specified target id ") + - Twine(getTargetStreamer().getTargetID()->toString()))); + TargetStart, Twine(".amdgcn_target directive's target id ") + + Twine(ParsedTargetID.toString()) + + Twine(" does not match the specified target id ") + + Twine(CurrentTargetID->toString())); + } return false; } @@ -6682,9 +6690,24 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() { "architectures"); } - auto TargetIDDirective = getLexer().getTok().getStringContents(); - if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) - return Error(getParser().getTok().getLoc(), "target id must match options"); + StringRef TargetIDDirective = getLexer().getTok().getStringContents(); + + std::optional MaybeParsed = + AMDGPU::IsaInfo::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + if (!MaybeParsed) + return Error(getParser().getTok().getLoc(), "malformed target id"); + + const AMDGPU::IsaInfo::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = + getTargetStreamer().getTargetID(); + + if (*CurrentTargetID != ParsedTargetID) { + return Error(getParser().getTok().getLoc(), + Twine(".amd_amdgpu_isa directive's target id ") + + Twine(ParsedTargetID.toString()) + + Twine(" does not match the specified target id ") + + Twine(CurrentTargetID->toString())); + } getTargetStreamer().EmitISAVersion(); Lex(); diff --git a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt index aeede04081fc7..aa96d67c527a4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt @@ -10,7 +10,6 @@ add_llvm_component_library(LLVMAMDGPUDisassembler CodeGenTypes MC MCDisassembler - TargetParser Support ADD_TO_COMPONENT diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index e1e83ece32ad0..b59e8ddf2b282 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1101,12 +1101,16 @@ namespace IsaInfo { AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) - : STI(STI), XnackSetting(STI.getFeatureBits().test(FeatureSupportsXNACK) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported), + : Arch(parseArchAMDGCN(STI.getCPU())), + TargetTripleString( + STI.getTargetTriple().normalize(Triple::CanonicalForm::FOUR_IDENT)), + XnackSetting(STI.getFeatureBits().test(FeatureSupportsXNACK) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported), SramEccSetting(STI.getFeatureBits().test(FeatureSupportsSRAMECC) ? TargetIDSetting::Any - : TargetIDSetting::Unsupported) { + : TargetIDSetting::Unsupported), + IsAMDHSA(STI.getTargetTriple().getOS() == Triple::AMDHSA) { // Check if xnack or sramecc is explicitly enabled or disabled. In the // absence of the target features we assume we must generate code that can run @@ -1168,6 +1172,13 @@ AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, } } +AMDGPUTargetID::AMDGPUTargetID(GPUKind Arch, StringRef TargetTripleString, + TargetIDSetting XnackSetting, + TargetIDSetting SramEccSetting, bool IsAMDHSA) + : Arch(Arch), TargetTripleString(TargetTripleString), + XnackSetting(XnackSetting), SramEccSetting(SramEccSetting), + IsAMDHSA(IsAMDHSA) {} + static TargetIDSetting getTargetIDSettingFromFeatureString(StringRef FeatureString) { if (FeatureString.ends_with("-")) @@ -1190,40 +1201,66 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { } } -void AMDGPUTargetID::print(raw_ostream &StreamRep) const { - const Triple &TargetTriple = STI.getTargetTriple(); - auto Version = getIsaVersion(STI.getCPU()); +std::optional +AMDGPUTargetID::parseTargetIDString(StringRef TargetIDDirective) { + // Split on '-' to get arch-vendor-os-environment-processor:features + // There is a single dash separator after the 4-component triple + SmallVector Parts; + TargetIDDirective.split(Parts, '-', /*MaxSplit=*/4); + if (Parts.size() < 4) + return std::nullopt; - StreamRep << TargetTriple.getArchName() << '-' << TargetTriple.getVendorName() - << '-' << TargetTriple.getOSName() << '-' - << TargetTriple.getEnvironmentName() << '-'; + Triple TT(Parts[0], Parts[1], Parts[2], Parts[3]); + if (!TT.isAMDGCN()) + return std::nullopt; - std::string Processor; - // TODO: Following else statement is present here because we used various - // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). - // Remove once all aliases are removed from GCNProcessors.td. - if (Version.Major >= 9) - Processor = STI.getCPU().str(); - else - Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + - Twine(Version.Stepping)) - .str(); + SmallVector FeatureSplit; + Parts[4].split(FeatureSplit, ':'); + if (FeatureSplit.empty()) + return std::nullopt; + + StringRef CPUName = FeatureSplit[0]; + + // Determine xnack/sramecc support based on the architecture attributes + GPUKind Arch = parseArchAMDGCN(CPUName); + unsigned ArchAttr = getArchAttrAMDGCN(Arch); - std::string Features; - if (TargetTriple.getOS() == Triple::AMDHSA) { + TargetIDSetting XnackSetting = (ArchAttr & FEATURE_XNACK) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported; + TargetIDSetting SramEccSetting = (ArchAttr & FEATURE_SRAMECC) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported; + + for (StringRef FeatureString : + ArrayRef(FeatureSplit).drop_front(1)) { + if (FeatureString.starts_with("xnack")) + XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); + else if (FeatureString.starts_with("sramecc")) + SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); + } + + return AMDGPUTargetID(Arch, TT.normalize(Triple::CanonicalForm::FOUR_IDENT), + XnackSetting, SramEccSetting, + TT.getOS() == Triple::AMDHSA); +} + +void AMDGPUTargetID::print(raw_ostream &StreamRep) const { + StreamRep << TargetTripleString << '-' << getArchNameAMDGCN(Arch); + + if (IsAMDHSA) { // sramecc. if (getSramEccSetting() == TargetIDSetting::Off) - Features += ":sramecc-"; + StreamRep << ":sramecc-"; else if (getSramEccSetting() == TargetIDSetting::On) - Features += ":sramecc+"; + StreamRep << ":sramecc+"; + // xnack. if (getXnackSetting() == TargetIDSetting::Off) - Features += ":xnack-"; + StreamRep << ":xnack-"; else if (getXnackSetting() == TargetIDSetting::On) - Features += ":xnack+"; + StreamRep << ":xnack+"; } - - StreamRep << Processor << Features; } std::string AMDGPUTargetID::toString() const { @@ -1233,6 +1270,12 @@ std::string AMDGPUTargetID::toString() const { return Str; } +bool AMDGPUTargetID::operator==(const AMDGPUTargetID &Other) const { + return Arch == Other.Arch && XnackSetting == Other.XnackSetting && + SramEccSetting == Other.SramEccSetting && IsAMDHSA == Other.IsAMDHSA && + TargetTripleString == Other.TargetTripleString; +} + unsigned getInstCacheLineSize(const MCSubtargetInfo &STI) { if (STI.getFeatureBits().test(FeatureInstCacheLineSize128)) return 128; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 93bc40807399e..81b0fd56e5bc5 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -16,6 +16,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/Alignment.h" +#include "llvm/TargetParser/AMDGPUTargetParser.h" #include #include #include @@ -155,12 +156,19 @@ enum class TargetIDSetting { Unsupported, Any, Off, On }; class AMDGPUTargetID { private: - const MCSubtargetInfo &STI; + GPUKind Arch; + std::string TargetTripleString; TargetIDSetting XnackSetting; TargetIDSetting SramEccSetting; + bool IsAMDHSA; public: explicit AMDGPUTargetID(const MCSubtargetInfo &STI, StringRef FeatureString); + + AMDGPUTargetID(GPUKind Arch, StringRef TargetTripleString, + TargetIDSetting XnackSetting, TargetIDSetting SramEccSetting, + bool IsAMDHSA); + ~AMDGPUTargetID() = default; /// \return True if the current xnack setting is not "Unsupported". @@ -219,11 +227,29 @@ class AMDGPUTargetID { void setTargetIDFromTargetIDStream(StringRef TargetID); + GPUKind getGPUKind() const { return Arch; } + + StringRef getTargetTripleString() const { return TargetTripleString; } + + /// \returns True if this is an AMDHSA target. + bool isAMDHSA() const { return IsAMDHSA; } + + /// Parse a target ID directive string (e.g., + /// "amdgcn-amd-amdhsa--gfx1010:xnack-") and return an AMDGPUTargetID. + /// \returns AMDGPUTargetID or std::nullopt if malformed. + static std::optional + parseTargetIDString(StringRef TargetIDDirective); + /// Write string representation to \p OS void print(raw_ostream &OS) const; /// \returns String representation of an object. std::string toString() const; + + bool operator==(const AMDGPUTargetID &Other) const; + bool operator!=(const AMDGPUTargetID &Other) const { + return !(*this == Other); + } }; inline raw_ostream &operator<<(raw_ostream &OS, diff --git a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp index d9ebd4f9ffd6d..77f1f2d795167 100644 --- a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp +++ b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp @@ -58,6 +58,8 @@ AMDGPU::GPUKind llvm::AMDGPU::parseArchAMDGCN(StringRef CPU) { #define AMDGCN_GPU(NAME, ENUM, ISAVERSION, FEATURES) .Case(NAME, ENUM) #define AMDGCN_GPU_ALIAS(NAME, ENUM) .Case(NAME, ENUM) #include "llvm/TargetParser/AMDGPUTargetParser.def" + .Case("generic", AMDGPU::GPUKind::GK_GFX600) + .Case("generic-hsa", AMDGPU::GPUKind::GK_GFX700) .Default(AMDGPU::GPUKind::GK_NONE); } diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index a7ac3bd81cd9f..562f2672f3d2a 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -124,109 +124,109 @@ ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-generic < %s | FileCheck --check-prefixes=GFX12_GENERIC %s ; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-5-generic < %s | FileCheck --check-prefixes=GFX12_5_GENERIC %s -; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600" -; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601" -; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602" -; GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701" -; GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702" -; GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703" -; GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704" -; GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705" -; GFX801: .amdgcn_target "amdgcn-amd-amdhsa--gfx801" -; GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack-" -; GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801:xnack+" -; GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -; GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803" -; GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805" -; GFX810: .amdgcn_target "amdgcn-amd-amdhsa--gfx810" -; GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack-" -; GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" -; GFX900: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" -; GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" -; GFX902: .amdgcn_target "amdgcn-amd-amdhsa--gfx902" -; GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-" -; GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack+" -; GFX904: .amdgcn_target "amdgcn-amd-amdhsa--gfx904" -; GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack-" -; GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" -; GFX906: .amdgcn_target "amdgcn-amd-amdhsa--gfx906" -; GFX906-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-" -; GFX906-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+" -; GFX906-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack-" -; GFX906-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:xnack+" -; GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack-" -; GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack-" -; GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc-:xnack+" -; GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906:sramecc+:xnack+" -; GFX908: .amdgcn_target "amdgcn-amd-amdhsa--gfx908" -; GFX908-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-" -; GFX908-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+" -; GFX908-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack-" -; GFX908-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:xnack+" -; GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack-" -; GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack-" -; GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc-:xnack+" -; GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908:sramecc+:xnack+" -; GFX909: .amdgcn_target "amdgcn-amd-amdhsa--gfx909" -; GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack-" -; GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909:xnack+" -; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c" -; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-" -; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+" -; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942" -; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-" -; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+" -; GFX950: .amdgcn_target "amdgcn-amd-amdhsa--gfx950" -; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack-" -; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx950:xnack+" -; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010" -; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-" -; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+" -; GFX1011: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011" -; GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack-" -; GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011:xnack+" -; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012" -; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack-" -; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012:xnack+" -; GFX1013: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013" -; GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack-" -; GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013:xnack+" -; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030" -; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031" -; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032" -; GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033" -; GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034" -; GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035" -; GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036" -; GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" -; GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101" -; GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102" -; GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103" -; GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150" -; GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151" -; GFX1152: .amdgcn_target "amdgcn-amd-amdhsa--gfx1152" -; GFX1153: .amdgcn_target "amdgcn-amd-amdhsa--gfx1153" -; GFX1170: .amdgcn_target "amdgcn-amd-amdhsa--gfx1170" -; GFX1171: .amdgcn_target "amdgcn-amd-amdhsa--gfx1171" -; GFX1172: .amdgcn_target "amdgcn-amd-amdhsa--gfx1172" -; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" -; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201" -; GFX1250: .amdgcn_target "amdgcn-amd-amdhsa--gfx1250" -; GFX1251: .amdgcn_target "amdgcn-amd-amdhsa--gfx1251" -; GFX1310: .amdgcn_target "amdgcn-amd-amdhsa--gfx1310" +; GFX600: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx600" +; GFX601: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx601" +; GFX602: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx602" +; GFX700: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx700" +; GFX701: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx701" +; GFX702: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx702" +; GFX703: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx703" +; GFX704: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx704" +; GFX705: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx705" +; GFX801: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx801" +; GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx801:xnack-" +; GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx801:xnack+" +; GFX802: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx802" +; GFX803: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx803" +; GFX805: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx805" +; GFX810: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx810" +; GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx810:xnack-" +; GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx810:xnack+" +; GFX900: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900" +; GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack-" +; GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack+" +; GFX902: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx902" +; GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx902:xnack-" +; GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx902:xnack+" +; GFX904: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx904" +; GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx904:xnack-" +; GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx904:xnack+" +; GFX906: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906" +; GFX906-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc-" +; GFX906-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc+" +; GFX906-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:xnack-" +; GFX906-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:xnack+" +; GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc-:xnack-" +; GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc+:xnack-" +; GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc-:xnack+" +; GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx906:sramecc+:xnack+" +; GFX908: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908" +; GFX908-NOSRAMECC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc-" +; GFX908-SRAMECC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc+" +; GFX908-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:xnack-" +; GFX908-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:xnack+" +; GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc-:xnack-" +; GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc+:xnack-" +; GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc-:xnack+" +; GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx908:sramecc+:xnack+" +; GFX909: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx909" +; GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx909:xnack-" +; GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx909:xnack+" +; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx90c" +; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx90c:xnack-" +; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx90c:xnack+" +; GFX942: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx942" +; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx942:xnack-" +; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx942:xnack+" +; GFX950: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx950" +; GFX950-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx950:xnack-" +; GFX950-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx950:xnack+" +; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1010" +; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1010:xnack-" +; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1010:xnack+" +; GFX1011: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1011" +; GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1011:xnack-" +; GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1011:xnack+" +; GFX1012: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1012" +; GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1012:xnack-" +; GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1012:xnack+" +; GFX1013: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1013" +; GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1013:xnack-" +; GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1013:xnack+" +; GFX1030: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1030" +; GFX1031: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1031" +; GFX1032: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1032" +; GFX1033: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1033" +; GFX1034: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1034" +; GFX1035: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1035" +; GFX1036: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1036" +; GFX1100: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1100" +; GFX1101: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1101" +; GFX1102: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1102" +; GFX1103: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1103" +; GFX1150: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1150" +; GFX1151: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1151" +; GFX1152: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1152" +; GFX1153: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1153" +; GFX1170: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1170" +; GFX1171: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1171" +; GFX1172: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1172" +; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1200" +; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1201" +; GFX1250: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1250" +; GFX1251: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1251" +; GFX1310: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1310" -; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-" -; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+" -; GFX9_4_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack-" -; GFX9_4_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-4-generic:xnack+" -; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack-" -; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-1-generic:xnack+" -; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10-3-generic" -; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic" -; GFX12_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx12-generic" -; GFX12_5_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx12-5-generic" +; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx9-generic:xnack-" +; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx9-generic:xnack+" +; GFX9_4_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx9-4-generic:xnack-" +; GFX9_4_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx9-4-generic:xnack+" +; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx10-1-generic:xnack-" +; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx10-1-generic:xnack+" +; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx10-3-generic" +; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx11-generic" +; GFX12_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx12-generic" +; GFX12_5_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx12-5-generic" define amdgpu_kernel void @directive_amdgcn_target() { ret void diff --git a/llvm/test/CodeGen/AMDGPU/elf-notes.ll b/llvm/test/CodeGen/AMDGPU/elf-notes.ll index d0dec1f1fe7e4..245ddfe68bba6 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-notes.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-notes.ll @@ -11,21 +11,21 @@ ; OSABI-UNK-NOT: .hsa_code_object_version ; OSABI-UNK-NOT: .hsa_code_object_isa -; OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" +; OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown-unknown-gfx802" ; OSABI-UNK-NOT: .amd_amdgpu_hsa_metadata ; OSABI-UNK-NOT: .amd_amdgpu_pal_metadata ; OSABI-UNK-ELF-NOT: Unknown note type ; OSABI-UNK-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) ; OSABI-UNK-ELF: AMD HSA ISA Name: -; OSABI-UNK-ELF: amdgcn-amd-unknown--gfx802 +; OSABI-UNK-ELF: amdgcn-amd-unknown-unknown-gfx802 ; OSABI-UNK-ELF-NOT: Unknown note type ; OSABI-UNK-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; OSABI-UNK-ELF-NOT: Unknown note type ; OSABI-UNK-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata) ; OSABI-UNK-ELF-NOT: Unknown note type -; OSABI-HSA: amdhsa.target: amdgcn-amd-amdhsa--gfx802 +; OSABI-HSA: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx802 ; OSABI-HSA: amdhsa.version: ; OSABI-HSA: .end_amdgpu_metadata ; OSABI-HSA-NOT: .amd_amdgpu_pal_metadata @@ -46,20 +46,20 @@ ; OSABI-HSA-ELF: .vgpr_count: 0 ; OSABI-HSA-ELF: .vgpr_spill_count: 0 ; OSABI-HSA-ELF: .wavefront_size: 64 -; OSABI-HSA-ELF: amdhsa.target: amdgcn-amd-amdhsa--gfx802 +; OSABI-HSA-ELF: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx802 ; OSABI-HSA-ELF: amdhsa.version: ; OSABI-HSA-ELF: - 1 ; OSABI-HSA-ELF: - 1 ; OSABI-HSA-ELF: ... ; OSABI-HSA-ELF-NOT: NT_AMD_PAL_METADATA (AMD PAL Metadata) -; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" +; OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal-unknown-gfx802" ; OSABI-PAL: .amdgpu_pal_metadata ; OSABI-PAL-NOT: .amd_amdgpu_hsa_metadata ; OSABI-PAL-ELF: NT_AMD_HSA_ISA_NAME (AMD HSA ISA Name) ; OSABI-PAL-ELF: AMD HSA ISA Name: -; OSABI-PAL-ELF: amdgcn-amd-amdpal--gfx802 +; OSABI-PAL-ELF: amdgcn-amd-amdpal-unknown-gfx802 ; OSABI-PAL-ELF-NOT: NT_AMD_HSA_METADATA (AMD HSA Metadata) ; OSABI-PAL-ELF: NT_AMDGPU_METADATA (AMDGPU Metadata) ; OSABI-PAL-ELF: AMDGPU Metadata: diff --git a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll index a83cde14892b5..6dc0aa3d7e3b0 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx902-without-xnack.ll @@ -1,6 +1,6 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck %s -; CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902:xnack-" +; CHECK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx902:xnack-" define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { store float 0.0, ptr addrspace(1) %out0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll index 0f1a784eba19e..e10010d256a30 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-default-device.ll @@ -3,7 +3,7 @@ ; Make sure that with an HSA triple, we don't default to an ; unsupported device. -; CHECK: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +; CHECK: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx700" define amdgpu_kernel void @test_kernel(ptr addrspace(1) %out0, ptr addrspace(1) %out1) nounwind { store float 0.0, ptr addrspace(1) %out0 ret void diff --git a/llvm/test/CodeGen/AMDGPU/hsa-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-func.ll index b8bdacf450fb3..86c8db598fcd2 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-func.ll @@ -18,13 +18,13 @@ ; ELF: } ; ELF: SHT_NOTE -; ELF: 0000: 07000000 4F000000 20000000 414D4447 +; ELF: 0000: 07000000 57000000 20000000 414D4447 ; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572 ; ELF: 0020: 6E656C73 90AD616D 64687361 2E746172 -; ELF: 0030: 676574BD 616D6467 636E2D75 6E6B6E6F -; ELF: 0040: 776E2D61 6D646873 612D2D67 66783730 -; ELF: 0050: 30AE616D 64687361 2E766572 73696F6E -; ELF: 0060: 92010100 +; ELF: 0030: 676574D9 24616D64 67636E2D 756E6B6E +; ELF: 0040: 6F776E2D 616D6468 73612D75 6E6B6E6F +; ELF: 0050: 776E2D67 66783730 30AE616D 64687361 +; ELF: 0060: 2E766572 73696F6E 92010100 ; ELF: Symbol { ; ELF: Name: simple @@ -33,8 +33,8 @@ ; ELF: } ; HSA: .text -; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" -; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" +; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx700" +; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx801" ; HSA-NOT: .amdgpu_hsa_kernel simple ; HSA: .globl simple diff --git a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll index ea578fc64c699..a1ace8e61680a 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa-note-no-func.ll @@ -34,28 +34,28 @@ ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-xnack | FileCheck --check-prefix=HSA-GFX906 %s ; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 | FileCheck --check-prefix=HSA-GFX907 %s -; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx600" -; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx601" -; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown--gfx602" -; HSA-CI700: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" -; HSA-CI701: .amdgcn_target "amdgcn-unknown-amdhsa--gfx701" -; HSA-CI702: .amdgcn_target "amdgcn-unknown-amdhsa--gfx702" -; HSA-CI703: .amdgcn_target "amdgcn-unknown-amdhsa--gfx703" -; HSA-CI704: .amdgcn_target "amdgcn-unknown-amdhsa--gfx704" -; HSA-CI705: .amdgcn_target "amdgcn-unknown-amdhsa--gfx705" -; HSA-VI801: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" -; HSA-VI802: .amdgcn_target "amdgcn-unknown-amdhsa--gfx802" -; HSA-VI803: .amdgcn_target "amdgcn-unknown-amdhsa--gfx803" -; HSA-VI805: .amdgcn_target "amdgcn-unknown-amdhsa--gfx805" -; HSA-VI810: .amdgcn_target "amdgcn-unknown-amdhsa--gfx810" -; HSA-GFX900: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900:xnack-" -; HSA-GFX901: .amdgcn_target "amdgcn-unknown-amdhsa--gfx900" -; HSA-GFX902: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902:xnack-" -; HSA-GFX903: .amdgcn_target "amdgcn-unknown-amdhsa--gfx902" -; HSA-GFX904: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904:xnack-" -; HSA-GFX905: .amdgcn_target "amdgcn-unknown-amdhsa--gfx904" -; HSA-GFX906: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906:xnack-" -; HSA-GFX907: .amdgcn_target "amdgcn-unknown-amdhsa--gfx906" +; NONHSA-SI600: .amd_amdgpu_isa "amdgcn-unknown-unknown-unknown-gfx600" +; NONHSA-SI601: .amd_amdgpu_isa "amdgcn-unknown-unknown-unknown-gfx601" +; NONHSA-SI602: .amd_amdgpu_isa "amdgcn-unknown-unknown-unknown-gfx602" +; HSA-CI700: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx700" +; HSA-CI701: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx701" +; HSA-CI702: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx702" +; HSA-CI703: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx703" +; HSA-CI704: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx704" +; HSA-CI705: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx705" +; HSA-VI801: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx801" +; HSA-VI802: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx802" +; HSA-VI803: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx803" +; HSA-VI805: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx805" +; HSA-VI810: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx810" +; HSA-GFX900: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx900:xnack-" +; HSA-GFX901: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx900" +; HSA-GFX902: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx902:xnack-" +; HSA-GFX903: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx902" +; HSA-GFX904: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx904:xnack-" +; HSA-GFX905: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx904" +; HSA-GFX906: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx906:xnack-" +; HSA-GFX907: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx906" !llvm.module.flags = !{!0} !0 = !{i32 1, !"amdhsa_code_object_version", i32 400} diff --git a/llvm/test/CodeGen/AMDGPU/hsa.ll b/llvm/test/CodeGen/AMDGPU/hsa.ll index 9f3702cda9d1e..779d54065f84c 100644 --- a/llvm/test/CodeGen/AMDGPU/hsa.ll +++ b/llvm/test/CodeGen/AMDGPU/hsa.ll @@ -26,7 +26,7 @@ ; ELF: SHF_ALLOC (0x2) ; ELF: ] ; ELF: SectionData ( -; ELF: 0000: 07000000 A8020000 20000000 414D4447 +; ELF: 0000: 07000000 B0020000 20000000 414D4447 ; ELF: 0010: 50550000 83AE616D 64687361 2E6B6572 ; ELF: 0020: 6E656C73 928DA52E 61726773 9185AE2E ; ELF: 0030: 61646472 6573735F 73706163 65A6676C @@ -66,10 +66,11 @@ ; ELF: 0250: 6770725F 636F756E 7402B12E 76677072 ; ELF: 0260: 5F737069 6C6C5F63 6F756E74 00AF2E77 ; ELF: 0270: 61766566 726F6E74 5F73697A 6540AD61 -; ELF: 0280: 6D646873 612E7461 72676574 BD616D64 -; ELF: 0290: 67636E2D 756E6B6E 6F776E2D 616D6468 -; ELF: 02A0: 73612D2D 67667837 3030AE61 6D646873 -; ELF: 02B0: 612E7665 7273696F 6E920101 +; ELF: 0280: 6D646873 612E7461 72676574 D924616D +; ELF: 0290: 6467636E 2D756E6B 6E6F776E 2D616D64 +; ELF: 02A0: 6873612D 756E6B6E 6F776E2D 67667837 +; ELF: 02B0: 3030AE61 6D646873 612E7665 7273696F +; ELF: 02C0: 6E920101 ; ELF: ) ; ELF: Symbol { @@ -79,8 +80,8 @@ ; HSA-NOT: .AMDGPU.config ; HSA: .text -; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" -; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa--gfx801" +; HSA-CI: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx700" +; HSA-VI: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx801" ; HSA-LABEL: {{^}}simple: diff --git a/llvm/test/CodeGen/AMDGPU/target-id-xnack-always-on.ll b/llvm/test/CodeGen/AMDGPU/target-id-xnack-always-on.ll index 13d13c875b8aa..4469f55fbac07 100644 --- a/llvm/test/CodeGen/AMDGPU/target-id-xnack-always-on.ll +++ b/llvm/test/CodeGen/AMDGPU/target-id-xnack-always-on.ll @@ -14,7 +14,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-5-generic -mattr=+xnack < %s | FileCheck %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx12-5-generic -mattr=-xnack < %s | FileCheck %s -; CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx{{1250|1251|12-5-generic}}" +; CHECK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx{{1250|1251|12-5-generic}}" define void @func0() { entry: diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll index 560b0e2c81cf2..88e2b87919789 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900" +; ASM: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx900 ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll index 0741ec4ffac42..bec91dd968f3a 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx700" +; ASM: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx700 ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll index 08dd90250d0b4..8dee8cdba04bc 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack-' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll index a8340ddadaaf7..6fe3485179f23 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack+' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll index aefcfac23ff5d..9b0afa226d964 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack-' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll index 6005c31622405..23638debcd086 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack-' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll index 328f56fb841b8..2bbed1fbf2703 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack+' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll index c50dd8b2fec7a..05c091e1c0af7 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack+' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll index cb2c07c7f9f4e..bd0603e91ec99 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-any.ll @@ -6,8 +6,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900" -; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900 +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900" +; ASM: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx900 ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll index fed493b630a4d..e8f29b71afedb 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700" -; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700 +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx700" +; ASM: amdhsa.target: amdgcn-amd-amdhsa-unknown-gfx700 ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll index 60ff8b2dbb5eb..87c9f28272fc8 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack-" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack-' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll index e04629a24209e..b2a9acb457ef9 100644 --- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll +++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll @@ -10,8 +10,8 @@ ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s ; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s -; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+" -; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+' +; ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx900:xnack+" +; ASM: amdhsa.target: 'amdgcn-amd-amdhsa-unknown-gfx900:xnack+' ; ASM: amdhsa.version: ; ASM: - 1 ; ASM4: - 1 diff --git a/llvm/test/MC/AMDGPU/amd-amdgpu-isa-malformed-target-id.s b/llvm/test/MC/AMDGPU/amd-amdgpu-isa-malformed-target-id.s new file mode 100644 index 0000000000000..efce3612f0300 --- /dev/null +++ b/llvm/test/MC/AMDGPU/amd-amdgpu-isa-malformed-target-id.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple amdgcn-amd-unknown -mcpu=gfx900 %s 2>&1 | FileCheck %s + +// Test malformed target ID error handling in .amd_amdgpu_isa directive +// CHECK: [[#@LINE+1]]:17: error: malformed target id +.amd_amdgpu_isa "not-a-valid-target-id" diff --git a/llvm/test/MC/AMDGPU/amdgcn-target-directive-triple-env.s b/llvm/test/MC/AMDGPU/amdgcn-target-directive-triple-env.s new file mode 100644 index 0000000000000..13f9a500d5c2f --- /dev/null +++ b/llvm/test/MC/AMDGPU/amdgcn-target-directive-triple-env.s @@ -0,0 +1,17 @@ +// RUN: split-file %s %t +// RUN: llvm-mc -triple amdgcn-amd-amdhsa-llvm -mcpu=gfx802 %t/amdhsa-llvm.s | FileCheck --check-prefix=AMDHSA-LLVM %s +// RUN: llvm-mc -triple amdgcn-amd-amdpal-llvm -mcpu=gfx802 %t/amdpal-llvm.s | FileCheck --check-prefix=AMDPAL-LLVM %s +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx802 %t/amdhsa-llvm.s -filetype=null 2>&1 | FileCheck --check-prefix=AMDHSA-ERR %s +// RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %t/amdpal-llvm.s -filetype=null 2>&1 | FileCheck --check-prefix=AMDPAL-ERR %s + +// Test that the environment component of the triple is preserved and validated + +//--- amdhsa-llvm.s +// AMDHSA-LLVM: .amdgcn_target "amdgcn-amd-amdhsa-llvm-gfx802" +// AMDHSA-ERR: {{.*}}:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-llvm-gfx802 does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx802 +.amdgcn_target "amdgcn-amd-amdhsa-llvm-gfx802" + +//--- amdpal-llvm.s +// AMDPAL-LLVM: .amd_amdgpu_isa "amdgcn-amd-amdpal-llvm-gfx802" +// AMDPAL-ERR: {{.*}}:17: error: .amd_amdgpu_isa directive's target id amdgcn-amd-amdpal-llvm-gfx802 does not match the specified target id amdgcn-amd-amdpal-unknown-gfx802 +.amd_amdgpu_isa "amdgcn-amd-amdpal-llvm-gfx802" diff --git a/llvm/test/MC/AMDGPU/amdgcn-target-malformed-target-id.s b/llvm/test/MC/AMDGPU/amdgcn-target-malformed-target-id.s new file mode 100644 index 0000000000000..d4f89b5ac0142 --- /dev/null +++ b/llvm/test/MC/AMDGPU/amdgcn-target-malformed-target-id.s @@ -0,0 +1,5 @@ +// RUN: not llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 %s -filetype=null 2>&1 | FileCheck %s + +// Test malformed target ID error handling in .amdgcn_target directive +// CHECK: [[#@LINE+1]]:16: error: malformed target ID +.amdgcn_target "not-a-valid-target-id" diff --git a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s index bf5a30e0f209e..899e136f93735 100644 --- a/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s +++ b/llvm/test/MC/AMDGPU/buffer-op-swz-operand.s @@ -1,6 +1,6 @@ // RUN: llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1100 --show-inst < %s | FileCheck %s -// CHECK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100" +// CHECK: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1100" buffer_load_dwordx4 v[0:3], v0, s[0:3], 0, offen offset:4092 slc // CHECK: buffer_load_b128 v[0:3], v0, s[0:3], 0 offen offset:4092 slc ; diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s index cdc621ca00733..c4bf7c3e169a4 100644 --- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s @@ -11,11 +11,11 @@ // GCN-LABEL: warning: test_target // GFX8-NOT: error: -// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+ -// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100 -// GFX12: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--[[MCPU]] -// GFX1170: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1170 -// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810 +// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx1010:xnack+ +// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx1100 +// GFX12: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa-unknown-[[MCPU]] +// GFX1170: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx1170 +// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown-unknown-gfx810 .warning "test_target" .amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+" diff --git a/llvm/test/MC/AMDGPU/hsa-exp.s b/llvm/test/MC/AMDGPU/hsa-exp.s index 3cc35b781507b..035b55f8f98bd 100644 --- a/llvm/test/MC/AMDGPU/hsa-exp.s +++ b/llvm/test/MC/AMDGPU/hsa-exp.s @@ -14,7 +14,7 @@ // ELF: } .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" -// ASM: .amdgcn_target "amdgcn-unknown-amdhsa--gfx700" +// ASM: .amdgcn_target "amdgcn-unknown-amdhsa-unknown-gfx700" .amdhsa_code_object_version 4 // ASM: .amdhsa_code_object_version 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s index 1ad2510422f76..664db2b8359fd 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s @@ -49,7 +49,7 @@ .text .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1200" .amdhsa_code_object_version 4 // ASM: .amdhsa_code_object_version 4 diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s index 2021bb05c3f15..294f46cbde48f 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx1250-v4.s @@ -65,7 +65,7 @@ .text .amdgcn_target "amdgcn-amd-amdhsa--gfx1250" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1250" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1250" .p2align 8 .type minimal,@function diff --git a/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s index 157715b82f3c2..8e1e34651be6c 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx1251-v4.s @@ -65,7 +65,7 @@ .text .amdgcn_target "amdgcn-amd-amdhsa--gfx1251" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1251" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1251" .p2align 8 .type minimal,@function diff --git a/llvm/test/MC/AMDGPU/hsa-gfx13-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx13-v4.s index 205afbd35a432..b700cec253fb2 100644 --- a/llvm/test/MC/AMDGPU/hsa-gfx13-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-gfx13-v4.s @@ -49,7 +49,7 @@ .text .amdgcn_target "amdgcn-amd-amdhsa--gfx1310" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1310" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1310" .amdhsa_code_object_version 4 // ASM: .amdhsa_code_object_version 4 diff --git a/llvm/test/MC/AMDGPU/hsa-tg-split.s b/llvm/test/MC/AMDGPU/hsa-tg-split.s index ca3de214a64a0..bbe72cc7e73e4 100644 --- a/llvm/test/MC/AMDGPU/hsa-tg-split.s +++ b/llvm/test/MC/AMDGPU/hsa-tg-split.s @@ -9,7 +9,7 @@ // OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000 .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx90a:xnack+" .amdhsa_code_object_version 4 // ASM: .amdhsa_code_object_version 4 diff --git a/llvm/test/MC/AMDGPU/hsa-v4.s b/llvm/test/MC/AMDGPU/hsa-v4.s index 931b4e874630b..07e648e3a9982 100644 --- a/llvm/test/MC/AMDGPU/hsa-v4.s +++ b/llvm/test/MC/AMDGPU/hsa-v4.s @@ -47,7 +47,7 @@ // OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx904:xnack+" .amdhsa_code_object_version 4 // ASM: .amdhsa_code_object_version 4 diff --git a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s index b71d384b47613..5ee020c0fd132 100644 --- a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s +++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s @@ -52,7 +52,7 @@ // OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000 .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx904:xnack+" .amdhsa_code_object_version 5 // ASM: .amdhsa_code_object_version 5 diff --git a/llvm/test/MC/AMDGPU/isa-version-hsa.s b/llvm/test/MC/AMDGPU/isa-version-hsa.s index cdf25501bf7fc..ac7c0e8e46cd5 100644 --- a/llvm/test/MC/AMDGPU/isa-version-hsa.s +++ b/llvm/test/MC/AMDGPU/isa-version-hsa.s @@ -8,7 +8,7 @@ // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s -filetype=null 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -// OSABI-HSA: .amdgcn_target "amdgcn-amd-amdhsa--gfx802" -// OSABI-HSA-ERR: :[[@LINE-2]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx803 -// OSABI-PAL-ERR: :[[@LINE-3]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-amdpal--gfx802 -// OSABI-UNK-ERR: :[[@LINE-4]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx802 does not match the specified target id amdgcn-amd-unknown--gfx802 +// OSABI-HSA: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx802" +// OSABI-HSA-ERR: :[[@LINE-2]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx802 does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx803 +// OSABI-PAL-ERR: :[[@LINE-3]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx802 does not match the specified target id amdgcn-amd-amdpal-unknown-gfx802 +// OSABI-UNK-ERR: :[[@LINE-4]]:16: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa-unknown-gfx802 does not match the specified target id amdgcn-amd-unknown-unknown-gfx802 diff --git a/llvm/test/MC/AMDGPU/isa-version-pal.s b/llvm/test/MC/AMDGPU/isa-version-pal.s index 81f68523b08b2..9c99b3b5936df 100644 --- a/llvm/test/MC/AMDGPU/isa-version-pal.s +++ b/llvm/test/MC/AMDGPU/isa-version-pal.s @@ -6,9 +6,9 @@ // RUN: llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s | FileCheck --check-prefix=OSABI-PAL %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx803 %s -filetype=null 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s -// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" -// OSABI-UNK-ERR: error: target id must match options -// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdpal--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802 -// OSABI-PAL-ERR: error: target id must match options +// OSABI-PAL: .amd_amdgpu_isa "amdgcn-amd-amdpal-unknown-gfx802" +// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive's target id amdgcn-amd-amdpal-unknown-gfx802 does not match the specified target id amdgcn-amd-unknown-unknown-gfx802 +// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-amdpal-unknown-gfx802 does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx802 +// OSABI-PAL-ERR: error: .amd_amdgpu_isa directive's target id amdgcn-amd-amdpal-unknown-gfx802 does not match the specified target id amdgcn-amd-amdpal-unknown-gfx803 .amd_amdgpu_isa "amdgcn-amd-amdpal--gfx802" .amdgcn_target "amdgcn-amd-amdpal--gfx802" diff --git a/llvm/test/MC/AMDGPU/isa-version-unk.s b/llvm/test/MC/AMDGPU/isa-version-unk.s index e34fa1059c311..855dae8a0aa35 100644 --- a/llvm/test/MC/AMDGPU/isa-version-unk.s +++ b/llvm/test/MC/AMDGPU/isa-version-unk.s @@ -6,9 +6,9 @@ // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=gfx802 %s -filetype=null 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s // RUN: not llvm-mc -triple amdgcn-amd-amdpal -mcpu=iceland %s -filetype=null 2>&1 | FileCheck --check-prefix=OSABI-PAL-ERR %s -// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" -// OSABI-UNK-ERR: error: target id must match options -// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-unknown--gfx802 does not match the specified target id amdgcn-amd-amdhsa--gfx802 -// OSABI-PAL-ERR: error: target id must match options +// OSABI-UNK: .amd_amdgpu_isa "amdgcn-amd-unknown-unknown-gfx802" +// OSABI-UNK-ERR: error: .amd_amdgpu_isa directive's target id amdgcn-amd-unknown-unknown-gfx802 does not match the specified target id amdgcn-amd-unknown-unknown-gfx803 +// OSABI-HSA-ERR: error: .amdgcn_target directive's target id amdgcn-amd-unknown-unknown-gfx802 does not match the specified target id amdgcn-amd-amdhsa-unknown-gfx802 +// OSABI-PAL-ERR: error: .amdgcn_target directive's target id amdgcn-amd-unknown-unknown-gfx802 does not match the specified target id amdgcn-amd-amdpal-unknown-gfx802 .amd_amdgpu_isa "amdgcn-amd-unknown--gfx802" .amdgcn_target "amdgcn-amd-unknown--gfx802" diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s index 8a12741920dd4..01458a3ea3e1d 100644 --- a/llvm/test/MC/AMDGPU/user-sgpr-count.s +++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s @@ -1,7 +1,7 @@ // RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" -// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+" +// ASM: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx90a:xnack+" // ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count From b7be82bed224da3a8fae6811e25fe3f6d0c1ccd3 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Mon, 22 Jun 2026 12:16:05 -0700 Subject: [PATCH 065/511] [AMDGPU] Update packed FP32 intrinsic cost model (#205145) Intrinsics will not have packed vector benefit if they don't have the corresponding packed instructions. --- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 9 ++++- .../Analysis/CostModel/AMDGPU/maximumnum.ll | 40 +++++++++---------- .../Analysis/CostModel/AMDGPU/minimumnum.ll | 40 +++++++++---------- 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 03a046bcb9142..fe66a1a5d7242 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -895,10 +895,15 @@ GCNTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, if ((ST->hasVOP3PInsts() && (SLT == MVT::f16 || SLT == MVT::i16 || (SLT == MVT::bf16 && ST->hasBF16PackedInsts()))) || - (ST->hasPackedFP32Ops() && SLT == MVT::f32) || (ST->hasPackedFP64Ops() && SLT == MVT::f64) || - (ST->hasPackedU64Ops() && SLT == MVT::i64)) + (ST->hasPackedU64Ops() && SLT == MVT::i64)) { NElts = (NElts + 1) / 2; + } else if (SLT == MVT::f32) { + bool HasPk2FP32Op = ST->hasPackedFP32Ops() && + IID != Intrinsic::minimumnum && + IID != Intrinsic::maximumnum; + NElts = HasPk2FP32Op ? (NElts + 1) / 2 : NElts; + } // TODO: Get more refined intrinsic costs? unsigned InstRate = getQuarterRateInstrCost(CostKind); diff --git a/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll b/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll index 4537bd54d1067..4db1c0fc76547 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/maximumnum.ll @@ -250,11 +250,11 @@ define void @maximumnum_f32() { ; ; GFX1251-LABEL: 'maximumnum_f32' ; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.maximumnum.f32(float poison, float poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'maximumnum_f32' @@ -295,11 +295,11 @@ define void @maximumnum_f32() { ; ; GFX1251-SIZE-LABEL: 'maximumnum_f32' ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.maximumnum.f32(float poison, float poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = call float @llvm.maximumnum.f32(float poison, float poison) @@ -650,11 +650,11 @@ define void @maximumnum_f32_no_ieee() #0 { ; ; GFX1251-LABEL: 'maximumnum_f32_no_ieee' ; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.maximumnum.f32(float poison, float poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'maximumnum_f32_no_ieee' @@ -695,11 +695,11 @@ define void @maximumnum_f32_no_ieee() #0 { ; ; GFX1251-SIZE-LABEL: 'maximumnum_f32_no_ieee' ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.maximumnum.f32(float poison, float poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.maximumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; %f32 = call float @llvm.maximumnum.f32(float poison, float poison) diff --git a/llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll b/llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll index 5ba3e2ce16ddb..b1824b6be5c2e 100644 --- a/llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll +++ b/llvm/test/Analysis/CostModel/AMDGPU/minimumnum.ll @@ -266,11 +266,11 @@ define void @minimumnum_f32() { ; ; GFX1251-LABEL: 'minimumnum_f32' ; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.minimumnum.f32(float poison, float poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'minimumnum_f32' @@ -311,11 +311,11 @@ define void @minimumnum_f32() { ; ; GFX1251-SIZE-LABEL: 'minimumnum_f32' ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.minimumnum.f32(float poison, float poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'minimumnum_f32' @@ -698,11 +698,11 @@ define void @minimumnum_f32_no_ieee() #0 { ; ; GFX1251-LABEL: 'minimumnum_f32_no_ieee' ; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.minimumnum.f32(float poison, float poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void ; ; GFX7-SIZE-LABEL: 'minimumnum_f32_no_ieee' @@ -743,11 +743,11 @@ define void @minimumnum_f32_no_ieee() #0 { ; ; GFX1251-SIZE-LABEL: 'minimumnum_f32_no_ieee' ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f32 = call float @llvm.minimumnum.f32(float poison, float poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) -; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f32 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v3f32 = call <3 x float> @llvm.minimumnum.v3f32(<3 x float> poison, <3 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f32 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f32 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 144 for instruction: %v16f32 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) ; GFX1251-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void ; ; SIZE-LABEL: 'minimumnum_f32_no_ieee' From b60611921603d7364ceecc71812fc614a0d368b6 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Mon, 22 Jun 2026 12:16:34 -0700 Subject: [PATCH 066/511] [AMDGPU] Add MC omod support for bf16 trans instructions (#205144) Based on recent gfx1250 sp3 update. Refer to DEGFXSP3-664 --- llvm/lib/Target/AMDGPU/VOP1Instructions.td | 3 -- llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s | 40 ------------------- .../gfx1250_asm_vop3_from_vop1-fake16.s | 24 +++++++++++ .../MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s | 24 +++++++++++ .../AMDGPU/gfx1250_dasm_vop3_from_vop1.txt | 32 +++++++++++++++ 5 files changed, 80 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td index d504b440b4fe4..bb112ddb329df 100644 --- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -533,12 +533,9 @@ defm V_CVT_U16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_u16_f16", defm V_CVT_I16_F16 : VOP1Inst_t16_with_profiles <"v_cvt_i16_f16", VOP_I16_F16_SPECIAL_OMOD, VOP_I16_F16_SPECIAL_OMOD_t16, VOP_I16_F16_SPECIAL_OMOD_fake16, fp_to_sint>; - -let HasOMod = 0 in { def V_TRANS_BF16_Profile : VOPProfile <[bf16, bf16, untyped, untyped]>; def V_TRANS_BF16_t16_Profile : VOPProfile_True16 ; def V_TRANS_BF16_fake16_Profile : VOPProfile_Fake16 ; -} let TRANS = 1, SchedRW = [WriteTrans32] in { defm V_RCP_F16 : VOP1Inst_t16 <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>; diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s index 43733c28be642..35375d4e62597 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop1_err.s @@ -34,43 +34,3 @@ v_cvt_f32_bf16 v5, v1 div:2 // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. // GFX1250-ERR-NEXT:{{^}}v_cvt_f32_bf16 v5, v1 div:2 // GFX1250-ERR-NEXT:{{^}} ^ - -v_cos_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_cos_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_exp_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_exp_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_log_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_log_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_rcp_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_rcp_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_rsq_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_rsq_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_sin_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_sin_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_sqrt_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_sqrt_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ - -v_tanh_bf16 v1, v2 mul:2 -// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: not a valid operand. -// GFX1250-ERR-NEXT:{{^}}v_tanh_bf16 v1, v2 mul:2 -// GFX1250-ERR-NEXT:{{^}} ^ diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s index 61497adad6206..da3974cc30115 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1-fake16.s @@ -3751,6 +3751,9 @@ v_tanh_bf16_e64 v5, v1 v_tanh_bf16_e64 v5, v1 clamp // GFX1250: v_tanh_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xca,0xd5,0x01,0x01,0x01,0x02] +v_tanh_bf16_e64 v5, v1 mul:4 +// GFX1250: v_tanh_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x01,0x12] + v_tanh_bf16_e64 v5, v255 // GFX1250: v_tanh_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xca,0xd5,0xff,0x01,0x01,0x02] @@ -3826,6 +3829,9 @@ v_rcp_bf16_e64 v5, v1 v_rcp_bf16_e64 v5, v1 clamp // GFX1250: v_rcp_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xf9,0xd5,0x01,0x01,0x01,0x02] +v_rcp_bf16_e64 v5, v1 mul:2 +// GFX1250: v_rcp_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x01,0x0a] + v_rcp_bf16_e64 v5, v255 // GFX1250: v_rcp_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xf9,0xd5,0xff,0x01,0x01,0x02] @@ -3865,6 +3871,9 @@ v_sqrt_bf16_e64 v5, v1 v_sqrt_bf16_e64 v5, v1 clamp // GFX1250: v_sqrt_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfa,0xd5,0x01,0x01,0x01,0x02] +v_sqrt_bf16_e64 v5, v1 mul:4 +// GFX1250: v_sqrt_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x01,0x12] + v_sqrt_bf16_e64 v5, v255 // GFX1250: v_sqrt_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x01,0x02] @@ -3904,6 +3913,9 @@ v_rsq_bf16_e64 v5, v1 v_rsq_bf16_e64 v5, v1 clamp // GFX1250: v_rsq_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfb,0xd5,0x01,0x01,0x01,0x02] +v_rsq_bf16_e64 v5, v1 div:2 +// GFX1250: v_rsq_bf16_e64 v5, v1 div:2 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x01,0x1a] + v_rsq_bf16_e64 v5, v255 // GFX1250: v_rsq_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfb,0xd5,0xff,0x01,0x01,0x02] @@ -3943,6 +3955,9 @@ v_log_bf16_e64 v5, v1 v_log_bf16_e64 v5, v1 clamp // GFX1250: v_log_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfc,0xd5,0x01,0x01,0x01,0x02] +v_log_bf16_e64 v5, v1 mul:2 +// GFX1250: v_log_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x01,0x0a] + v_log_bf16_e64 v5, v255 // GFX1250: v_log_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x01,0x02] @@ -3982,6 +3997,9 @@ v_exp_bf16_e64 v5, v1 v_exp_bf16_e64 v5, v1 clamp // GFX1250: v_exp_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfd,0xd5,0x01,0x01,0x01,0x02] +v_exp_bf16_e64 v5, v1 mul:4 +// GFX1250: v_exp_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x01,0x12] + v_exp_bf16_e64 v5, v255 // GFX1250: v_exp_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x01,0x02] @@ -4021,6 +4039,9 @@ v_sin_bf16_e64 v5, v1 v_sin_bf16_e64 v5, v1 clamp // GFX1250: v_sin_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfe,0xd5,0x01,0x01,0x01,0x02] +v_sin_bf16_e64 v5, v1 div:2 +// GFX1250: v_sin_bf16_e64 v5, v1 div:2 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x01,0x1a] + v_sin_bf16_e64 v5, v255 // GFX1250: v_sin_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xfe,0xd5,0xff,0x01,0x01,0x02] @@ -4060,6 +4081,9 @@ v_cos_bf16_e64 v5, v1 v_cos_bf16_e64 v5, v1 clamp // GFX1250: v_cos_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xff,0xd5,0x01,0x01,0x01,0x02] +v_cos_bf16_e64 v5, v1 mul:2 +// GFX1250: v_cos_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x01,0x0a] + v_cos_bf16_e64 v5, v255 // GFX1250: v_cos_bf16_e64 v5, v255 ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x01,0x02] diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s index e31a644345d70..5828742ae751f 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vop3_from_vop1.s @@ -3922,6 +3922,9 @@ v_tanh_bf16_e64 v5.l, v1.l v_tanh_bf16_e64 v5.l, v1.l clamp // GFX1250: v_tanh_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xca,0xd5,0x01,0x01,0x01,0x02] +v_tanh_bf16_e64 v5.l, v1.l mul:4 +// GFX1250: v_tanh_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x01,0x12] + v_tanh_bf16_e64 v5.l, v255.l // GFX1250: v_tanh_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xca,0xd5,0xff,0x01,0x01,0x02] @@ -4000,6 +4003,9 @@ v_rcp_bf16_e64 v5.l, v1.l v_rcp_bf16_e64 v5.l, v1.l clamp // GFX1250: v_rcp_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xf9,0xd5,0x01,0x01,0x01,0x02] +v_rcp_bf16_e64 v5.l, v1.l mul:2 +// GFX1250: v_rcp_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x01,0x0a] + v_rcp_bf16_e64 v5.l, v255.l // GFX1250: v_rcp_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xf9,0xd5,0xff,0x01,0x01,0x02] @@ -4042,6 +4048,9 @@ v_sqrt_bf16_e64 v5.l, v1.l v_sqrt_bf16_e64 v5.l, v1.l clamp // GFX1250: v_sqrt_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfa,0xd5,0x01,0x01,0x01,0x02] +v_sqrt_bf16_e64 v5.l, v1.l mul:4 +// GFX1250: v_sqrt_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x01,0x12] + v_sqrt_bf16_e64 v5.l, v255.l // GFX1250: v_sqrt_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfa,0xd5,0xff,0x01,0x01,0x02] @@ -4084,6 +4093,9 @@ v_rsq_bf16_e64 v5.l, v1.l v_rsq_bf16_e64 v5.l, v1.l clamp // GFX1250: v_rsq_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfb,0xd5,0x01,0x01,0x01,0x02] +v_rsq_bf16_e64 v5.l, v1.l div:2 +// GFX1250: v_rsq_bf16_e64 v5.l, v1.l div:2 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x01,0x1a] + v_rsq_bf16_e64 v5.l, v255.l // GFX1250: v_rsq_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfb,0xd5,0xff,0x01,0x01,0x02] @@ -4126,6 +4138,9 @@ v_log_bf16_e64 v5.l, v1.l v_log_bf16_e64 v5.l, v1.l clamp // GFX1250: v_log_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfc,0xd5,0x01,0x01,0x01,0x02] +v_log_bf16_e64 v5.l, v1.l mul:2 +// GFX1250: v_log_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x01,0x0a] + v_log_bf16_e64 v5.l, v255.l // GFX1250: v_log_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfc,0xd5,0xff,0x01,0x01,0x02] @@ -4168,6 +4183,9 @@ v_exp_bf16_e64 v5.l, v1.l v_exp_bf16_e64 v5.l, v1.l clamp // GFX1250: v_exp_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfd,0xd5,0x01,0x01,0x01,0x02] +v_exp_bf16_e64 v5.l, v1.l mul:4 +// GFX1250: v_exp_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x01,0x12] + v_exp_bf16_e64 v5.l, v255.l // GFX1250: v_exp_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfd,0xd5,0xff,0x01,0x01,0x02] @@ -4210,6 +4228,9 @@ v_sin_bf16_e64 v5.l, v1.l v_sin_bf16_e64 v5.l, v1.l clamp // GFX1250: v_sin_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfe,0xd5,0x01,0x01,0x01,0x02] +v_sin_bf16_e64 v5.l, v1.l div:2 +// GFX1250: v_sin_bf16_e64 v5.l, v1.l div:2 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x01,0x1a] + v_sin_bf16_e64 v5.l, v255.l // GFX1250: v_sin_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xfe,0xd5,0xff,0x01,0x01,0x02] @@ -4252,6 +4273,9 @@ v_cos_bf16_e64 v5.l, v1.l v_cos_bf16_e64 v5.l, v1.l clamp // GFX1250: v_cos_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xff,0xd5,0x01,0x01,0x01,0x02] +v_cos_bf16_e64 v5.l, v1.l mul:2 +// GFX1250: v_cos_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x01,0x0a] + v_cos_bf16_e64 v5.l, v255.l // GFX1250: v_cos_bf16_e64 v5.l, v255.l ; encoding: [0x05,0x00,0xff,0xd5,0xff,0x01,0x01,0x02] diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt index 73a38bfcce68d..56857abc530d9 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vop3_from_vop1.txt @@ -4134,6 +4134,10 @@ # GFX1250-FAKE16: v_tanh_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xca,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_tanh_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xca,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xca,0xd5,0x01,0x01,0x01,0x12 +# GFX1250-FAKE16: v_tanh_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x01,0x12] +# GFX1250-REAL16: v_tanh_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xca,0xd5,0x01,0x01,0x01,0x12] + 0x05,0x00,0xca,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_tanh_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xca,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_tanh_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xca,0xd5,0x7f,0x00,0x01,0x02] @@ -4226,6 +4230,10 @@ # GFX1250-FAKE16: v_rcp_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xf9,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_rcp_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xf9,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xf9,0xd5,0x01,0x01,0x01,0x0a +# GFX1250-FAKE16: v_rcp_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x01,0x0a] +# GFX1250-REAL16: v_rcp_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xf9,0xd5,0x01,0x01,0x01,0x0a] + 0x05,0x00,0xf9,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_rcp_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xf9,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_rcp_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xf9,0xd5,0x7f,0x00,0x01,0x02] @@ -4282,6 +4290,10 @@ # GFX1250-FAKE16: v_sqrt_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfa,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfa,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xfa,0xd5,0x01,0x01,0x01,0x12 +# GFX1250-FAKE16: v_sqrt_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x01,0x12] +# GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xfa,0xd5,0x01,0x01,0x01,0x12] + 0x05,0x00,0xfa,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_sqrt_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_sqrt_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfa,0xd5,0x7f,0x00,0x01,0x02] @@ -4338,6 +4350,10 @@ # GFX1250-FAKE16: v_rsq_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfb,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_rsq_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfb,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xfb,0xd5,0x01,0x01,0x01,0x1a +# GFX1250-FAKE16: v_rsq_bf16_e64 v5, v1 div:2 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x01,0x1a] +# GFX1250-REAL16: v_rsq_bf16_e64 v5.l, v1.l div:2 ; encoding: [0x05,0x00,0xfb,0xd5,0x01,0x01,0x01,0x1a] + 0x05,0x00,0xfb,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_rsq_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfb,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_rsq_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfb,0xd5,0x7f,0x00,0x01,0x02] @@ -4394,6 +4410,10 @@ # GFX1250-FAKE16: v_log_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfc,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_log_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfc,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xfc,0xd5,0x01,0x01,0x01,0x0a +# GFX1250-FAKE16: v_log_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x01,0x0a] +# GFX1250-REAL16: v_log_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xfc,0xd5,0x01,0x01,0x01,0x0a] + 0x05,0x00,0xfc,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_log_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_log_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfc,0xd5,0x7f,0x00,0x01,0x02] @@ -4450,6 +4470,10 @@ # GFX1250-FAKE16: v_exp_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfd,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_exp_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfd,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xfd,0xd5,0x01,0x01,0x01,0x12 +# GFX1250-FAKE16: v_exp_bf16_e64 v5, v1 mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x01,0x12] +# GFX1250-REAL16: v_exp_bf16_e64 v5.l, v1.l mul:4 ; encoding: [0x05,0x00,0xfd,0xd5,0x01,0x01,0x01,0x12] + 0x05,0x00,0xfd,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_exp_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_exp_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfd,0xd5,0x7f,0x00,0x01,0x02] @@ -4506,6 +4530,10 @@ # GFX1250-FAKE16: v_sin_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xfe,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_sin_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xfe,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xfe,0xd5,0x01,0x01,0x01,0x1a +# GFX1250-FAKE16: v_sin_bf16_e64 v5, v1 div:2 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x01,0x1a] +# GFX1250-REAL16: v_sin_bf16_e64 v5.l, v1.l div:2 ; encoding: [0x05,0x00,0xfe,0xd5,0x01,0x01,0x01,0x1a] + 0x05,0x00,0xfe,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_sin_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xfe,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_sin_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xfe,0xd5,0x7f,0x00,0x01,0x02] @@ -4562,6 +4590,10 @@ # GFX1250-FAKE16: v_cos_bf16_e64 v5, v1 clamp ; encoding: [0x05,0x80,0xff,0xd5,0x01,0x01,0x01,0x02] # GFX1250-REAL16: v_cos_bf16_e64 v5.l, v1.l clamp ; encoding: [0x05,0x80,0xff,0xd5,0x01,0x01,0x01,0x02] +0x05,0x00,0xff,0xd5,0x01,0x01,0x01,0x0a +# GFX1250-FAKE16: v_cos_bf16_e64 v5, v1 mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x01,0x0a] +# GFX1250-REAL16: v_cos_bf16_e64 v5.l, v1.l mul:2 ; encoding: [0x05,0x00,0xff,0xd5,0x01,0x01,0x01,0x0a] + 0x05,0x00,0xff,0xd5,0x7f,0x00,0x00,0x00 # GFX1250-FAKE16: v_cos_bf16_e64 v5, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x01,0x02] # GFX1250-REAL16: v_cos_bf16_e64 v5.l, exec_hi ; encoding: [0x05,0x00,0xff,0xd5,0x7f,0x00,0x01,0x02] From cc177cec6f000c3acef22e973d56da01c0e9611f Mon Sep 17 00:00:00 2001 From: Henry Jiang Date: Mon, 22 Jun 2026 12:17:20 -0700 Subject: [PATCH 067/511] [SimplifyCFG] Allow hoisting in the presence of pseudoprobes (#199753) Fix regressions in the presence of pseudoprobes that prevents SimplifyCFG from hoisting instructions into the predecessor. Teach `hoistCommonCodeFromSuccessors` and `foldBranchToCommonDest` to ignore pseudo probes and drop them when the BB is eliminated. The minor loss of profile quality for these cases are justified, as not performing these hoists degrades performance more and blocks downstream passes like loop-vectorize (can be upto 30% in 526.blender_r and 525.x264_r). --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 11 ++ .../fold-branch-to-common-dest-pseudoprobe.ll | 102 +++++++++++++++++ .../hoist-common-skip-pseudoprobe.ll | 103 ++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll create mode 100644 llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index 524947dd2e95d..b1478b7d10b8f 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1174,6 +1174,11 @@ static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses( if (BonusInst.isTerminator()) continue; + // Skip cloning pseudo probes into the predecessor, as it would overcount + // otherwise. + if (isa(BonusInst)) + continue; + Instruction *NewBonusInst = BonusInst.clone(); if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) { @@ -1524,6 +1529,9 @@ enum SkipFlags { }; static unsigned skippedInstrFlags(Instruction *I) { + // Pseudo probes don't constrain reordering of other instructions. + if (isa(I)) + return 0; unsigned Flags = 0; if (I->mayReadFromMemory()) Flags |= SkipReadMem; @@ -4206,6 +4214,9 @@ bool llvm::foldBranchToCommonDest(CondBrInst *BI, DomTreeUpdater *DTU, // Ignore the terminator. if (isa(I)) continue; + // Pseudo probes aren't speculatable but can be dropped on fold. + if (isa(I)) + continue; // I must be safe to execute unconditionally. if (!isSafeToSpeculativelyExecute(&I, CxtI, AC)) return false; diff --git a/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll new file mode 100644 index 0000000000000..c3fafd5806823 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/fold-branch-to-common-dest-pseudoprobe.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=simplifycfg %s | FileCheck %s + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +; Check that probe 2 does not prevent folding and is dropped +define i32 @chained_cmp(float %v, ptr %out) { +; CHECK-LABEL: @chained_cmp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt float [[V:%.*]], f0x35B28000 +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 -503749374543619030, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[CMP2:%.*]] = fcmp ogt float [[V]], f0xBF800001 +; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN:%.*]], label [[COMMON_RET:%.*]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 1, [[IF_THEN]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: if.then: +; CHECK-NEXT: store float [[V]], ptr [[OUT:%.*]], align 4 +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %cmp1 = fcmp olt float %v, 0x3EB6500000000000 + call void @llvm.pseudoprobe(i64 -503749374543619030, i64 1, i32 0, i64 -1) + br i1 %cmp1, label %land.lhs.true, label %ret_zero + +land.lhs.true: + %cmp2 = fcmp ogt float %v, 0xBFF0000020000000 + call void @llvm.pseudoprobe(i64 -503749374543619030, i64 2, i32 0, i64 -1) + br i1 %cmp2, label %if.then, label %ret_zero + +if.then: + store float %v, ptr %out, align 4 + ret i32 1 + +ret_zero: + ret i32 0 +} + +; Probe should not enable folding when another bonus instruction is not +; speculatable like udiv +define i32 @no_fold_with_unsafe_bonus(i32 %x, i32 %y, i32 %d) { +; CHECK-LABEL: @no_fold_with_unsafe_bonus( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: br i1 [[C0]], label [[BB:%.*]], label [[COMMON:%.*]] +; CHECK: bb: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 100, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[Y:%.*]], [[D:%.*]] +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[DIV]], 0 +; CHECK-NEXT: br i1 [[C1]], label [[COMMON_RET:%.*]], label [[COMMON]] +; CHECK: common.ret: +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = phi i32 [ 0, [[COMMON]] ], [ 1, [[BB]] ] +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; CHECK: common: +; CHECK-NEXT: br label [[COMMON_RET]] +; +entry: + %c0 = icmp eq i32 %x, 0 + br i1 %c0, label %bb, label %common + +bb: + call void @llvm.pseudoprobe(i64 100, i64 1, i32 0, i64 -1) + %div = udiv i32 %y, %d + %c1 = icmp eq i32 %div, 0 + br i1 %c1, label %if.then, label %common + +if.then: + ret i32 1 + +common: + ret i32 0 +} + +; Multiple probes in BB are all dropped on fold and predecessor probe is preserved. +define i32 @multiple_probes_in_bb(i32 %x, i32 %y) { +; CHECK-LABEL: @multiple_probes_in_bb( +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 200, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[X:%.*]], 0 +; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[Y:%.*]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[C0]], i1 [[C1]], i1 false +; CHECK-NEXT: [[COMMON_RET_OP:%.*]] = select i1 [[OR_COND]], i32 1, i32 0 +; CHECK-NEXT: ret i32 [[COMMON_RET_OP]] +; +entry: + call void @llvm.pseudoprobe(i64 200, i64 1, i32 0, i64 -1) + %c0 = icmp eq i32 %x, 0 + br i1 %c0, label %bb, label %common + +bb: + call void @llvm.pseudoprobe(i64 200, i64 2, i32 0, i64 -1) + %c1 = icmp eq i32 %y, 0 + call void @llvm.pseudoprobe(i64 200, i64 3, i32 0, i64 -1) + br i1 %c1, label %if.then, label %common + +if.then: + ret i32 1 + +common: + ret i32 0 +} diff --git a/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll new file mode 100644 index 0000000000000..9b5e8798072c2 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/hoist-common-skip-pseudoprobe.ll @@ -0,0 +1,103 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S --passes='simplifycfg' %s | FileCheck %s + +declare void @llvm.pseudoprobe(i64, i64, i32, i64) + +; Check that instructions past the pseudoprobe instrs are still hoisted into the predecessor. +define void @hoist_loads_past_pseudoprobe(i1 %c, ptr %d, ptr %m, ptr %b, i32 %v) { +; CHECK-LABEL: @hoist_loads_past_pseudoprobe( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[M:%.*]], align 2 +; CHECK-NEXT: [[CONV0:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[M1:%.*]], align 2 +; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP3]] to i32 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 1, i64 1, i32 0, i64 -1) +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[CONV0]], [[V:%.*]] +; CHECK-NEXT: [[MUL:%.*]] = mul i32 [[ADD]], [[CONV1]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[MUL]], 16 +; CHECK-NEXT: [[CONV12:%.*]] = trunc i32 [[SHR]] to i16 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 1, i64 2, i32 0, i64 -1) +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[CONV0]], [[V]] +; CHECK-NEXT: [[MUL24:%.*]] = mul i32 [[SUB]], [[CONV1]] +; CHECK-NEXT: [[SHR25:%.*]] = lshr i32 [[MUL24]], 16 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[SHR25]] to i16 +; CHECK-NEXT: [[CONV27:%.*]] = sub i16 0, [[TMP2]] +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i16 [ [[CONV27]], [[IF_ELSE]] ], [ [[CONV12]], [[IF_THEN]] ] +; CHECK-NEXT: store i16 [[STOREMERGE]], ptr [[D:%.*]], align 2 +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + call void @llvm.pseudoprobe(i64 1, i64 1, i32 0, i64 -1) + %0 = load i16, ptr %b, align 2 + %conv0a = zext i16 %0 to i32 + %add = add i32 %conv0a, %v + %1 = load i16, ptr %m, align 2 + %conv1a = zext i16 %1 to i32 + %mul = mul i32 %add, %conv1a + %shr = lshr i32 %mul, 16 + %conv12 = trunc i32 %shr to i16 + br label %if.end + +if.else: + call void @llvm.pseudoprobe(i64 1, i64 2, i32 0, i64 -1) + %2 = load i16, ptr %b, align 2 + %conv0b = zext i16 %2 to i32 + %sub = sub i32 %conv0b, %v + %3 = load i16, ptr %m, align 2 + %conv1b = zext i16 %3 to i32 + %mul24 = mul i32 %sub, %conv1b + %shr25 = lshr i32 %mul24, 16 + %4 = trunc i32 %shr25 to i16 + %conv27 = sub i16 0, %4 + br label %if.end + +if.end: + %storemerge = phi i16 [ %conv27, %if.else ], [ %conv12, %if.then ] + store i16 %storemerge, ptr %d, align 2 + ret void +} + +; Stores past pseudo probes are also hoisted into the predecessor. +define void @hoist_stores_past_pseudoprobe(i1 %c, ptr %d, ptr %d2, i32 %v) { +; CHECK-LABEL: @hoist_stores_past_pseudoprobe( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 [[V:%.*]], ptr [[D:%.*]], align 4 +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 2, i64 1, i32 0, i64 -1) +; CHECK-NEXT: store i32 0, ptr [[D2:%.*]], align 4 +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.else: +; CHECK-NEXT: call void @llvm.pseudoprobe(i64 2, i64 2, i32 0, i64 -1) +; CHECK-NEXT: store i32 1, ptr [[D2]], align 4 +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: ret void +; +entry: + br i1 %c, label %if.then, label %if.else + +if.then: + call void @llvm.pseudoprobe(i64 2, i64 1, i32 0, i64 -1) + store i32 %v, ptr %d, align 4 + store i32 0, ptr %d2, align 4 + br label %if.end + +if.else: + call void @llvm.pseudoprobe(i64 2, i64 2, i32 0, i64 -1) + store i32 %v, ptr %d, align 4 + store i32 1, ptr %d2, align 4 + br label %if.end + +if.end: + ret void +} From d9ae4ede7dcce8591b7636161bf00cd3e9d15c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ingo=20M=C3=BCller?= Date: Mon, 22 Jun 2026 21:21:44 +0200 Subject: [PATCH 068/511] [lldb][tests] Fix FS timing issue in `TestRerunAndExprDylib`. (#205116) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR fixes a timing issue that made `TestRerunAndExprDylib` fail with a small probability. The test rebuilds a library; however, the build and the re-build may fall into the same timestamp if the underlying filesystem only has second granularity such that LLDB doesn't reload the rebuilt library for the second execution. The fix consists in artifically aging the library file from the first build, i.e., setting its timestamp 10 seconds into the past. This not only guarantees that LLDB reloads the file but also also that it is rebuilt, so the explicit removing is now unnecessary and removed. This issue exists for at least six months, possible since the tests exists; I was not able to test older versions. However, we have recently seen frequent failures, probably due to some change in our underlying testing infrastructure. Signed-off-by: Ingo Müller --- .../rerun_and_expr_dylib/TestRerunAndExprDylib.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/lldb/test/API/functionalities/rerun_and_expr_dylib/TestRerunAndExprDylib.py b/lldb/test/API/functionalities/rerun_and_expr_dylib/TestRerunAndExprDylib.py index af413642632b9..9f6c3cfbe1b34 100644 --- a/lldb/test/API/functionalities/rerun_and_expr_dylib/TestRerunAndExprDylib.py +++ b/lldb/test/API/functionalities/rerun_and_expr_dylib/TestRerunAndExprDylib.py @@ -4,6 +4,9 @@ TypeSystems tied to that process. """ +import os +import time + import lldb from lldbsuite.test.lldbtest import * from lldbsuite.test import lldbutil @@ -61,6 +64,12 @@ def test(self): } ) + # Age the library file to 10 seconds in the past so that it is rebuilt + # and reloaded even on filesystems with 1s resolution. + fpath = self.getBuildArtifact(FULL_DYLIB_NAME) + old_mtime = time.time() - 10 + os.utime(fpath, (old_mtime, old_mtime)) + # Build a.out self.build( dictionary={ @@ -84,9 +93,6 @@ def test(self): result_children=[ValueCheck(name="m_val", value="42")], ) - # Delete the dylib to force make to rebuild it. - remove_file(self.getBuildArtifact(FULL_DYLIB_NAME)) - # Re-build libfoo.dylib self.build( dictionary={ From a5523a055d38729a8dd94ab05f2e4c2b2692f96b Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Mon, 22 Jun 2026 21:36:57 +0200 Subject: [PATCH 069/511] AMDGPU: Temporarily restore disassembler's dependency on TargetParser (#205175) Reverts part of #204315 --- llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt index aa96d67c527a4..aeede04081fc7 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_component_library(LLVMAMDGPUDisassembler CodeGenTypes MC MCDisassembler + TargetParser Support ADD_TO_COMPONENT From 136469a203d0d69f0cb40b742dbe8010e2aba0d1 Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Mon, 22 Jun 2026 15:38:02 -0400 Subject: [PATCH 070/511] [GitHub] Add googlewalt to Bazel codeowners (#205174) --- .github/CODEOWNERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e12de8e336a91..24edb8bd9973a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -178,7 +178,7 @@ /bolt/ @aaupov @maksfb @rafaelauler @ayermolo @yota9 @paschalis-mpeis @yozhu @yavtuk # Bazel build system. -/utils/bazel/ @rupprecht @keith @aaronmondal +/utils/bazel/ @rupprecht @keith @aaronmondal @googlewalt # InstallAPI and TextAPI /llvm/**/TextAPI/ @cyndyishida From 9b233a9b5f92e7561e1f536d0877bed8ea275acc Mon Sep 17 00:00:00 2001 From: newgre Date: Mon, 22 Jun 2026 21:40:24 +0200 Subject: [PATCH 071/511] [ProfileData] Avoid unnecessary copies. (#204875) Make `Frame` moveable and avoid some unnecessary copies in `RawMemProfReader`. Unnecessary copies fixed in this PR were found by the CSan prototype described in the RFC [1] CopySanitizer (CSan): Detecting unneccessary object copies at runtime. [1] https://discourse.llvm.org/t/rfc-copysanitizer-csan-detecting-unneccessary-object-copies-at-runtime/91038 Co-authored-by: Jan Newger --- llvm/include/llvm/ProfileData/MemProf.h | 3 +++ llvm/lib/ProfileData/MemProfReader.cpp | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 391ac0f339f67..d36b48e68a72d 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -255,6 +255,9 @@ struct Frame { bool IsInlineFrame = false; Frame() = default; + Frame(Frame &&) = default; + Frame &operator=(Frame &&) = default; + Frame(const Frame &Other) { Function = Other.Function; SymbolName = Other.SymbolName diff --git a/llvm/lib/ProfileData/MemProfReader.cpp b/llvm/lib/ProfileData/MemProfReader.cpp index 3fc0dbfd8e69d..c476e1a08cd59 100644 --- a/llvm/lib/ProfileData/MemProfReader.cpp +++ b/llvm/lib/ProfileData/MemProfReader.cpp @@ -275,7 +275,7 @@ std::string getBuildIdString(const SegmentEntry &Entry) { for (size_t I = 0; I < Entry.BuildIdSize; I++) { OS << format_hex_no_prefix(Entry.BuildId[I], 2); } - return OS.str(); + return Str; } } // namespace @@ -612,7 +612,7 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames( getModuleOffset(VAddr), Specifier, /*UseSymbolTable=*/false); if (!DIOr) return DIOr.takeError(); - DIInliningInfo DI = DIOr.get(); + DIInliningInfo &DI = DIOr.get(); // Drop frames which we can't symbolize or if they belong to the runtime. if (DI.getFrame(0).FunctionName == DILineInfo::BadString || From f39c8da49e5fc2989cfe77695de1a57a899aa754 Mon Sep 17 00:00:00 2001 From: Mohammed Ashraf <125150223+Holo-xy@users.noreply.github.com> Date: Mon, 22 Jun 2026 22:49:48 +0300 Subject: [PATCH 072/511] [BoundsSafety] unify ParseLexedAttribute (#186033) Resolves #93263 --- clang/include/clang/Parse/Parser.h | 19 ++--- clang/lib/Parse/ParseCXXInlineMethods.cpp | 86 ++++++++++++----------- clang/lib/Parse/ParseDecl.cpp | 27 +------ 3 files changed, 53 insertions(+), 79 deletions(-) diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h index 5e7af97feeb6c..f0e06473bf615 100644 --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -1377,15 +1377,17 @@ class Parser : public CodeCompletionHandler { /// Parse all attributes in LAs, and attach them to Decl D. void ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D, - bool EnterScope, bool OnDefinition); + bool EnterScope, bool OnDefinition, + ParsedAttributes *OutAttrs = nullptr); /// Finish parsing an attribute for which parsing was delayed. /// This will be called at the end of parsing a class declaration /// for each LateParsedAttribute. We consume the saved tokens and /// create an attribute with the arguments filled in. We add this /// to the Attribute list for the decl. - void ParseLexedAttribute(LateParsedAttribute &LA, bool EnterScope, - bool OnDefinition); + void ParseLexedAttribute(LateParsedAttribute &LPA, bool EnterScope, + bool OnDefinition, + ParsedAttributes *OutAttrs = nullptr); /// ParseLexedMethodDeclarations - We finished parsing the member /// specification of a top (non-nested) C++ class. Now go over the @@ -1518,17 +1520,6 @@ class Parser : public CodeCompletionHandler { const char *&PrevSpec, unsigned &DiagID, bool &isInvalid); - void ParseLexedCAttributeList(LateParsedAttrList &LA, - ParsedAttributes *OutAttrs = nullptr); - - /// Finish parsing an attribute for which parsing was delayed. - /// This will be called at the end of parsing a class declaration - /// for each LateParsedAttribute. We consume the saved tokens and - /// create an attribute with the arguments filled in. We add this - /// to the Attribute list for the decl. - void ParseLexedCAttribute(LateParsedAttribute &LA, - ParsedAttributes *OutAttrs = nullptr); - void ParseLexedTypeAttribute(LateParsedTypeAttribute &LA, ParsedAttributes &OutAttrs); diff --git a/clang/lib/Parse/ParseCXXInlineMethods.cpp b/clang/lib/Parse/ParseCXXInlineMethods.cpp index 6189c854e5fbf..d13f73641218b 100644 --- a/clang/lib/Parse/ParseCXXInlineMethods.cpp +++ b/clang/lib/Parse/ParseCXXInlineMethods.cpp @@ -725,83 +725,86 @@ void Parser::ParseLexedAttributes(ParsingClass &Class) { } void Parser::ParseLexedAttributeList(LateParsedAttrList &LAs, Decl *D, - bool EnterScope, bool OnDefinition) { + bool EnterScope, bool OnDefinition, + ParsedAttributes *OutAttrs) { assert(LAs.parseSoon() && "Attribute list should be marked for immediate parsing."); for (unsigned i = 0, ni = LAs.size(); i < ni; ++i) { if (D) LAs[i]->addDecl(D); - ParseLexedAttribute(*LAs[i], EnterScope, OnDefinition); + ParseLexedAttribute(*LAs[i], EnterScope, OnDefinition, OutAttrs); delete LAs[i]; } LAs.clear(); } -void Parser::ParseLexedAttribute(LateParsedAttribute &LA, - bool EnterScope, bool OnDefinition) { +void Parser::ParseLexedAttribute(LateParsedAttribute &LPA, bool EnterScope, + bool OnDefinition, + ParsedAttributes *OutAttrs) { // Create a fake EOF so that attribute parsing won't go off the end of the // attribute. Token AttrEnd; AttrEnd.startToken(); AttrEnd.setKind(tok::eof); AttrEnd.setLocation(Tok.getLocation()); - AttrEnd.setEofData(LA.Toks.data()); - LA.Toks.push_back(AttrEnd); + AttrEnd.setEofData(LPA.Toks.data()); + LPA.Toks.push_back(AttrEnd); // Append the current token at the end of the new token stream so that it // doesn't get lost. - LA.Toks.push_back(Tok); - PP.EnterTokenStream(LA.Toks, true, /*IsReinject=*/true); + LPA.Toks.push_back(Tok); + PP.EnterTokenStream(LPA.Toks, true, /*IsReinject=*/true); // Consume the previously pushed token. ConsumeAnyToken(/*ConsumeCodeCompletionTok=*/true); ParsedAttributes Attrs(AttrFactory); - if (LA.Decls.size() > 0) { - Decl *D = LA.Decls[0]; - NamedDecl *ND = dyn_cast(D); + if (LPA.Decls.size() > 0) { + Decl *D = LPA.Decls[0]; + bool HasFuncScope = EnterScope && LPA.Decls.size() == 1 && + D->isFunctionOrFunctionTemplate(); + bool IsCPlusPlus = getLangOpts().CPlusPlus; + + NamedDecl *ND = dyn_cast(D); RecordDecl *RD = dyn_cast_or_null(D->getDeclContext()); // Allow 'this' within late-parsed attributes. Sema::CXXThisScopeRAII ThisScope(Actions, RD, Qualifiers(), - ND && ND->isCXXInstanceMember()); - - if (LA.Decls.size() == 1) { - // If the Decl is templatized, add template parameters to scope. - ReenterTemplateScopeRAII InDeclScope(*this, D, EnterScope); - - // If the Decl is on a function, add function parameters to the scope. - bool HasFunScope = EnterScope && D->isFunctionOrFunctionTemplate(); - if (HasFunScope) { - InDeclScope.Scopes.Enter(Scope::FnScope | Scope::DeclScope | - Scope::CompoundStmtScope); - Actions.ActOnReenterFunctionContext(Actions.CurScope, D); - } + IsCPlusPlus && ND && + ND->isCXXInstanceMember()); - ParseGNUAttributeArgs(&LA.AttrName, LA.AttrNameLoc, Attrs, nullptr, - nullptr, SourceLocation(), ParsedAttr::Form::GNU(), - nullptr); + // If the Decl is templatized, add template parameters to the scope. + ReenterTemplateScopeRAII InDeclScope(*this, D, IsCPlusPlus && EnterScope); - if (HasFunScope) - Actions.ActOnExitFunctionContext(); - } else { - // If there are multiple decls, then the decl cannot be within the - // function scope. - ParseGNUAttributeArgs(&LA.AttrName, LA.AttrNameLoc, Attrs, nullptr, - nullptr, SourceLocation(), ParsedAttr::Form::GNU(), - nullptr); + // If the Decl is on a function, add function parameters to the scope. + if (HasFuncScope) { + InDeclScope.Scopes.Enter(Scope::FnScope | Scope::DeclScope | + Scope::CompoundStmtScope); + Actions.ActOnReenterFunctionContext(Actions.CurScope, D); } + + ParseGNUAttributeArgs(&LPA.AttrName, LPA.AttrNameLoc, Attrs, + /*EndLoc=*/nullptr, /*ScopeName=*/nullptr, + SourceLocation(), ParsedAttr::Form::GNU(), + /*D=*/nullptr); + + if (HasFuncScope) + Actions.ActOnExitFunctionContext(); + } else if (OutAttrs) { + ParseGNUAttributeArgs(&LPA.AttrName, LPA.AttrNameLoc, Attrs, + /*EndLoc=*/nullptr, /*ScopeName=*/nullptr, + SourceLocation(), ParsedAttr::Form::GNU(), + /*D=*/nullptr); } else { - Diag(Tok, diag::warn_attribute_no_decl) << LA.AttrName.getName(); + Diag(Tok, diag::warn_attribute_no_decl) << LPA.AttrName.getName(); } if (OnDefinition && !Attrs.empty() && !Attrs.begin()->isCXX11Attribute() && Attrs.begin()->isKnownToGCC()) - Diag(Tok, diag::warn_attribute_on_function_definition) - << &LA.AttrName; + Diag(Tok, diag::warn_attribute_on_function_definition) << &LPA.AttrName; - for (unsigned i = 0, ni = LA.Decls.size(); i < ni; ++i) - Actions.ActOnFinishDelayedAttribute(getCurScope(), LA.Decls[i], Attrs); + for (auto *D : LPA.Decls) + Actions.ActOnFinishDelayedAttribute(getCurScope(), D, Attrs); // Due to a parsing error, we either went over the cached tokens or // there are still cached tokens left, so we skip the leftover tokens. @@ -810,6 +813,9 @@ void Parser::ParseLexedAttribute(LateParsedAttribute &LA, if (Tok.is(tok::eof) && Tok.getEofData() == AttrEnd.getEofData()) ConsumeAnyToken(); + + if (OutAttrs) + OutAttrs->takeAllAppendingFrom(Attrs); } void Parser::ParseLexedPragmas(ParsingClass &Class) { diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp index 405dddf7991b4..3f41e7c5c6f0d 100644 --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -4847,19 +4847,6 @@ void Parser::ParseStructDeclaration( } } -// TODO: All callers of this function should be moved to -// `Parser::ParseLexedAttributeList`. -void Parser::ParseLexedCAttributeList(LateParsedAttrList &LAs, - ParsedAttributes *OutAttrs) { - assert(LAs.parseSoon() && - "Attribute list should be marked for immediate parsing."); - for (auto *LA : LAs) { - ParseLexedCAttribute(*LA, OutAttrs); - delete LA; - } - LAs.clear(); -} - ParsedAttributes Parser::ParseLexedCAttributeTokens(LateParsedAttribute &LA) { // Create a fake EOF so that attribute parsing won't go off the end of the // attribute. @@ -4900,17 +4887,6 @@ ParsedAttributes Parser::ParseLexedCAttributeTokens(LateParsedAttribute &LA) { return Attrs; } -void Parser::ParseLexedCAttribute(LateParsedAttribute &LA, - ParsedAttributes *OutAttrs) { - ParsedAttributes Attrs = ParseLexedCAttributeTokens(LA); - - for (Decl *D : LA.Decls) - Actions.ActOnFinishDelayedAttribute(getCurScope(), D, Attrs); - - if (OutAttrs) - OutAttrs->takeAllAppendingFrom(Attrs); -} - void Parser::ParseLexedTypeAttribute(LateParsedTypeAttribute &LA, ParsedAttributes &OutAttrs) { ParsedAttributes Attrs = ParseLexedCAttributeTokens(LA); @@ -5066,7 +5042,8 @@ void Parser::ParseStructUnionBody(SourceLocation RecordLoc, T.getOpenLocation(), T.getCloseLocation(), attrs); // Late parse field attributes if necessary. - ParseLexedCAttributeList(LateFieldAttrs); + ParseLexedAttributeList(LateFieldAttrs, /*D=*/nullptr, /*EnterScope=*/false, + /*OnDefinition=*/false); StructScope.Exit(); Actions.ActOnTagFinishDefinition(getCurScope(), TagDecl, T.getRange()); } From 6c1fa7c7cb05bcb94671a4f47fe14a06c1456f50 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Mon, 22 Jun 2026 12:55:07 -0700 Subject: [PATCH 073/511] Fix SectionList::ReplaceSection to not replace incorrect section. (#204677) We use SectionList::ReplaceSection to check for some sections in the main object file and in separate debug info files. It was relying on section IDs being consistent between different individual section lists in different object files which does not work. I fixed this by not using a section ID when replacing a section, but using the section shared pointer so there can be no errors. --- lldb/include/lldb/Core/Section.h | 4 +- lldb/source/Core/Section.cpp | 13 +- .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 2 +- .../SymbolVendor/ELF/SymbolVendorELF.cpp | 3 +- .../PECOFF/SymbolVendorPECOFF.cpp | 3 +- .../SymbolVendor/wasm/SymbolVendorWasm.cpp | 3 +- .../ELF/build-id-case-debug-only.yaml | 135 ++++++++++++++++++ 7 files changed, 149 insertions(+), 14 deletions(-) create mode 100644 lldb/test/Shell/ObjectFile/ELF/build-id-case-debug-only.yaml diff --git a/lldb/include/lldb/Core/Section.h b/lldb/include/lldb/Core/Section.h index 84022eae7f56f..5823a1719b32f 100644 --- a/lldb/include/lldb/Core/Section.h +++ b/lldb/include/lldb/Core/Section.h @@ -79,8 +79,8 @@ class SectionList { // Get the number of sections in this list, and any contained child sections size_t GetNumSections(uint32_t depth) const; - bool ReplaceSection(lldb::user_id_t sect_id, - const lldb::SectionSP §ion_sp, + bool ReplaceSection(const lldb::SectionSP &remove_section_sp, + const lldb::SectionSP &replace_section_sp, uint32_t depth = UINT32_MAX); // Warning, this can be slow as it's removing items from a std::vector. diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index e2f6f99b22593..515e2589fdea5 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -517,18 +517,21 @@ size_t SectionList::AddUniqueSection(const lldb::SectionSP §_sp) { return sect_idx; } -bool SectionList::ReplaceSection(user_id_t sect_id, - const lldb::SectionSP §_sp, +bool SectionList::ReplaceSection(const lldb::SectionSP &remove_sect_sp, + const lldb::SectionSP &replace_sect_sp, uint32_t depth) { + // Make sure this isn't the same section pointer. + if (remove_sect_sp == replace_sect_sp) + return false; iterator sect_iter, end = m_sections.end(); for (sect_iter = m_sections.begin(); sect_iter != end; ++sect_iter) { - if ((*sect_iter)->GetID() == sect_id) { - *sect_iter = sect_sp; + if (*sect_iter == remove_sect_sp) { + *sect_iter = replace_sect_sp; return true; } else if (depth > 0) { if ((*sect_iter) ->GetChildren() - .ReplaceSection(sect_id, sect_sp, depth - 1)) + .ReplaceSection(remove_sect_sp, replace_sect_sp, depth - 1)) return true; } } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index 16dd2fc122906..7e4190d673fc5 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -2184,7 +2184,7 @@ void ObjectFileELF::CreateSections(SectionList &unified_section_list) { SectionSP module_section_sp = unified_section_list.FindSectionByType( eSectionTypeELFSymbolTable, true); if (module_section_sp) - unified_section_list.ReplaceSection(module_section_sp->GetID(), + unified_section_list.ReplaceSection(module_section_sp, symtab_section_sp); else unified_section_list.AddSection(symtab_section_sp); diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index d245f05bc2e29..4ddfed4f25bea 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -160,8 +160,7 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, objfile_section_list->FindSectionByType(section_type, true)) { if (SectionSP module_section_sp = module_section_list->FindSectionByType(section_type, true)) - module_section_list->ReplaceSection(module_section_sp->GetID(), - section_sp); + module_section_list->ReplaceSection(module_section_sp, section_sp); else module_section_list->AddSection(section_sp); } diff --git a/lldb/source/Plugins/SymbolVendor/PECOFF/SymbolVendorPECOFF.cpp b/lldb/source/Plugins/SymbolVendor/PECOFF/SymbolVendorPECOFF.cpp index 87436da443d91..2e3a3647d0601 100644 --- a/lldb/source/Plugins/SymbolVendor/PECOFF/SymbolVendorPECOFF.cpp +++ b/lldb/source/Plugins/SymbolVendor/PECOFF/SymbolVendorPECOFF.cpp @@ -131,8 +131,7 @@ SymbolVendorPECOFF::CreateInstance(const lldb::ModuleSP &module_sp, objfile_section_list->FindSectionByType(section_type, true)) { if (SectionSP module_section_sp = module_section_list->FindSectionByType(section_type, true)) - module_section_list->ReplaceSection(module_section_sp->GetID(), - section_sp); + module_section_list->ReplaceSection(module_section_sp, section_sp); else module_section_list->AddSection(section_sp); } diff --git a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp index 62fb7fb4db13d..58a7128ee48bc 100644 --- a/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp +++ b/lldb/source/Plugins/SymbolVendor/wasm/SymbolVendorWasm.cpp @@ -127,8 +127,7 @@ SymbolVendorWasm::CreateInstance(const lldb::ModuleSP &module_sp, objfile_section_list->FindSectionByType(section_type, true)) { if (SectionSP module_section_sp = module_section_list->FindSectionByType(section_type, true)) - module_section_list->ReplaceSection(module_section_sp->GetID(), - section_sp); + module_section_list->ReplaceSection(module_section_sp, section_sp); else module_section_list->AddSection(section_sp); } diff --git a/lldb/test/Shell/ObjectFile/ELF/build-id-case-debug-only.yaml b/lldb/test/Shell/ObjectFile/ELF/build-id-case-debug-only.yaml new file mode 100644 index 0000000000000..e518febd4871d --- /dev/null +++ b/lldb/test/Shell/ObjectFile/ELF/build-id-case-debug-only.yaml @@ -0,0 +1,135 @@ +# This test makes sure that LLDB correctly merges the section list of files +# that have different section layouts in the main executable and in the +# .debug file. Prior to this fix a bug would cause some sections to be +# replaced in the module's unified list and would replace the .symtab section +# due to section IDs being used when replacing sections. + +# RUN: mkdir -p %t/.build-id/1b +# RUN: yaml2obj %s -o %t/full.out +# RUN: llvm-objcopy --strip-debug %t/full.out %t/strip-debug.out +# RUN: cd %t +# RUN: llvm-objcopy --only-keep-debug %t/full.out --strip-symbol=main %t/.build-id/1b/8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug +# RUN: %lldb -b -o "image dump symtab" -o "image dump sections" -o "quit" %t/strip-debug.out | FileCheck %s + +# Make sure the symbol table is still in LLDB. Prior to this fix the ELF +# symbol table section was being replaced. +# CHECK: [ 0] 1 Code 0x00000000004003d0 0x0000000000000008 0x00000002 main + +# Make sure we see the .symtab section and .strtab section in the section +# list. Prior to this fix they were being replaced. +# CHECK: 0x0000000000000001 regular [0x0000000000400274-0x0000000000400298) r-- 0x00000040 0x00000024 0x00000002 strip-debug.out..note.gnu.build-id +# CHECK: 0x0000000000000002 code [0x00000000004003d0-0x00000000004003d8) r-x 0x00000070 0x00000008 0x00000006 strip-debug.out..text +# CHECK: 0x0000000000000003 data [0x00000000004003e0-0x00000000004003e8) r-- 0x00000080 0x00000008 0x00000002 strip-debug.out..data +# CHECK: 0x0000000000000004 elf-symbol-table --- 0x00000088 0x00000030 0x00000000 strip-debug.out..symtab +# CHECK: 0x0000000000000005 regular --- 0x000000b8 0x00000006 0x00000000 strip-debug.out..strtab +# CHECK: 0x0000000000000017 regular --- 0x000000fc 0x00000135 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..shstrtab +# CHECK: 0x0000000000000004 dwarf-abbrev --- 0x00000064 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_abbrev +# CHECK: 0x0000000000000005 dwarf-addr --- 0x0000006c 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_addr +# CHECK: 0x0000000000000006 dwarf-aranges --- 0x00000074 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_aranges +# CHECK: 0x0000000000000007 dwarf-frame --- 0x0000007c 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_frame +# CHECK: 0x0000000000000008 dwarf-info --- 0x00000084 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_info +# CHECK: 0x0000000000000009 dwarf-line --- 0x0000008c 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_line +# CHECK: 0x000000000000000a dwarf-line-str --- 0x00000094 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_line_str +# CHECK: 0x000000000000000b dwarf-loc --- 0x0000009c 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_loc +# CHECK: 0x000000000000000c dwarf-loclists --- 0x000000a4 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_loclists +# CHECK: 0x000000000000000d dwarf-macinfo --- 0x000000ac 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_macinfo +# CHECK: 0x000000000000000e dwarf-macro --- 0x000000b4 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_macro +# CHECK: 0x000000000000000f dwarf-names --- 0x000000bc 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_names +# CHECK: 0x0000000000000010 dwarf-pubnames --- 0x000000c4 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_pubnames +# CHECK: 0x0000000000000011 dwarf-pubtypes --- 0x000000cc 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_pubtypes +# CHECK: 0x0000000000000012 dwarf-ranges --- 0x000000d4 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_ranges +# CHECK: 0x0000000000000013 dwarf-rnglists --- 0x000000dc 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_rnglists +# CHECK: 0x0000000000000014 dwarf-str --- 0x000000e4 0x00000008 0x00000030 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_str +# CHECK: 0x0000000000000015 dwarf-str-offsets --- 0x000000ec 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_str_offsets +# CHECK: 0x0000000000000016 dwarf-types --- 0x000000f4 0x00000008 0x00000000 8a73ac238390e32a7ff4ac8ebe4d6a41ecf5c9.debug..debug_types + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 + Entry: 0x00000000004003D0 +Sections: + - Name: .note.gnu.build-id + Type: SHT_NOTE + Flags: [ SHF_ALLOC ] + Address: 0x0000000000400274 + AddressAlign: 0x0000000000000004 + Content: 040000001400000003000000474E55001B8A73AC238390E32A7FF4AC8EBE4D6A41ECF5C9 + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x00000000004003D0 + AddressAlign: 0x0000000000000010 + Content: DEADBEEFBAADF00D + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x00000000004003E0 + AddressAlign: 0x0000000000000010 + Content: DDDDDDDDDDDDDDDD + - Name: .debug_abbrev + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_addr + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_aranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_frame + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_info + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_line_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loc + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_loclists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macinfo + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_macro + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_names + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubnames + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_pubtypes + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_ranges + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_rnglists + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_str_offsets + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D + - Name: .debug_types + Type: SHT_PROGBITS + Content: DEADBEEFBAADF00D +Symbols: + - Name: main + Type: STT_FUNC + Section: .text + Value: 0x00000000004003D0 + Size: 0x0000000000000008 +... From 5c9811dc9ef6afed5235df67cc906dbc55c18ea6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 22 Jun 2026 20:03:57 +0000 Subject: [PATCH 074/511] [LSR] Preserve LCSSA in SCEVRewriter This is necessery to fix some regressions when switching to the NewPM and seems to improve optimization quality in some cases due to LSR currently not understanding loop nests (usage of getSCEV vs getSCEVScoped). This patch just enables LCSSA preservation for SCEVRewriter and updates all the relevant tests. There are some further fixes that are needed to get this fully working that will be included in follow up patches. This patch also only changes behavior in the NewPM path to get that unblocked while more work is done on ensuring LCSSA preservation/requirements do not regress LSR. Similar to #185373 (although without follow up fixes and a regression test). Regression test added for the specific NewPM case noticed is in Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll (does not reproduce without the target triple). Reviewers: arsenm, Meinersbur, fhahn, nikic, vikramRH Pull Request: https://github.com/llvm/llvm-project/pull/191665 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 22 ++++++--- .../X86/lcssa-preservation-regression.ll | 49 +++++++++++++++++++ 2 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 6a88740b8ad51..534fcd9f7145e 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2326,9 +2326,14 @@ class LSRInstance { void ImplementSolution(const SmallVectorImpl &Solution); public: + // TODO(boomanaiden154): The PreserveLCSSA flag is a hack to allow + // experimentation with the NewPM which requires LCSSA preservation while + // some of the details are worked out in LSR. Eventually it should be set + // to true and removed. LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, - TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); + TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA); bool getChanged() const { return Changed; } const SmallVectorImpl &getScalarEvolutionIVs() const { @@ -6283,12 +6288,13 @@ void LSRInstance::ImplementSolution( LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, - TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) + TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ? PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)), - Rewriter(SE, "lsr", false), BaselineCost(L, SE, TTI, AMK) { + Rewriter(SE, "lsr", PreserveLCSSA), BaselineCost(L, SE, TTI, AMK) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; @@ -7162,7 +7168,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, TargetLibraryInfo &TLI, - MemorySSA *MSSA) { + MemorySSA *MSSA, bool PreserveLCSSA) { // Debug preservation - before we start removing anything identify which DVI // meet the salvageable criteria and store their DIExpression and SCEVs. @@ -7176,7 +7182,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, // Run the main LSR transformation. const LSRInstance &Reducer = - LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); + LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get(), PreserveLCSSA); Changed |= Reducer.getChanged(); // Remove any extra phis created by processing inner loops. @@ -7254,14 +7260,16 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { MemorySSA *MSSA = nullptr; if (MSSAAnalysis) MSSA = &MSSAAnalysis->getMSSA(); - return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); + return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA, + /*PreserveLCSSA=*/false); } PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR, LPMUpdater &) { if (!ReduceLoopStrength(&L, AM.getResult(L, AR), AR.SE, - AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA)) + AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA, + /*PreserveLCSSA=*/true)) return PreservedAnalyses::all(); auto PA = getLoopPassPreservedAnalyses(); diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll new file mode 100644 index 0000000000000..d2eb71bfb1700 --- /dev/null +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll @@ -0,0 +1,49 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes="loop-reduce" -S | FileCheck %s + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @regression1() { +; CHECK-LABEL: define void @regression1() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP1_HEADER:.*]] +; CHECK: [[LOOP1_HEADER]]: +; CHECK-NEXT: br label %[[LOOP1:.*]] +; CHECK: [[LOOP1]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[LOOP1_HEADER]] ], [ 1, %[[LOOP1]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP2_HEADER:.*]], label %[[LOOP1]] +; CHECK: [[LOOP2_HEADER]]: +; CHECK-NEXT: [[PHI_LCSSA1:%.*]] = phi i32 [ [[PHI]], %[[LOOP1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i32 [[PHI_LCSSA1]], 1 +; CHECK-NEXT: br label %[[LOOP2:.*]] +; CHECK: [[LOOP2]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], %[[LOOP2]] ], [ [[TMP0]], %[[LOOP2_HEADER]] ] +; CHECK-NEXT: [[SELECT:%.*]] = select i1 false, i32 [[LSR_IV]], i32 0 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 +; CHECK-NEXT: br i1 false, label %[[EXIT:.*]], label %[[LOOP2]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[LOOP1_HEADER]] +; +entry: + br label %loop1.header + +loop1.header: ; preds = %exit, %entry + br label %loop1 + +loop1: ; preds = %loop1, %loop1.header + %phi = phi i32 [ 0, %loop1.header ], [ 1, %loop1 ] + br i1 false, label %loop2.header, label %loop1 + +loop2.header: ; preds = %loop1 + br label %loop2 + +loop2: ; preds = %loop2.header, %loop2 + %phi5 = phi i32 [ %add, %loop2 ], [ %phi, %loop2.header ] + %add = add i32 %phi5, 1 + %select = select i1 false, i32 %add, i32 0 + br i1 false, label %exit, label %loop2 + +exit: ; preds = %loop2 + br label %loop1.header +} From d770312d89a279c323fcdda90f15fecd813fc5db Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 22 Jun 2026 20:05:34 +0000 Subject: [PATCH 075/511] [LSR] Preserve LCSSA in critical edge splitting This was another case where LSR failed to preserve LCSSA, which will trigger an assertion under the NewPM. This is currently only enabled under the NewPM to avoid pessimizing any optimizations while unblocking work on the NewPM. Reviewers: fhahn, vikramRH, nikic, arsenm Pull Request: https://github.com/llvm/llvm-project/pull/192371 --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 17 +-- .../X86/lcssa-preservation-regression.ll | 114 ++++++++++++++++++ .../LoopStrengthReduce/preserve-lcssa.ll | 7 +- 3 files changed, 129 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 534fcd9f7145e..57a76565bfbb7 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2180,6 +2180,7 @@ class LSRInstance { mutable SCEVExpander Rewriter; bool Changed = false; bool HardwareLoopProfitable = false; + bool ShouldPreserveLCSSA = false; /// This is the insert position that the current loop's induction variable /// increment should be placed. In simple loops, this is the latch block's @@ -6029,13 +6030,14 @@ void LSRInstance::RewriteForPHI(PHINode *PN, const LSRUse &LU, // Split the critical edge. BasicBlock *NewBB = nullptr; if (!Parent->isLandingPad()) { - NewBB = - SplitCriticalEdge(BB, Parent, - CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) - .setMergeIdenticalEdges() - .setKeepOneInputPHIs()); + CriticalEdgeSplittingOptions SplitOptions(&DT, &LI, MSSAU); + SplitOptions = + SplitOptions.setMergeIdenticalEdges().setKeepOneInputPHIs(); + if (ShouldPreserveLCSSA) + SplitOptions = SplitOptions.setPreserveLCSSA(); + NewBB = SplitCriticalEdge(BB, Parent, SplitOptions); } else { - SmallVector NewBBs; + SmallVector NewBBs; DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI); NewBB = NewBBs[0]; @@ -6294,7 +6296,8 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 ? PreferredAddresingMode : TTI.getPreferredAddressingMode(L, &SE)), - Rewriter(SE, "lsr", PreserveLCSSA), BaselineCost(L, SE, TTI, AMK) { + Rewriter(SE, "lsr", PreserveLCSSA), ShouldPreserveLCSSA(PreserveLCSSA), + BaselineCost(L, SE, TTI, AMK) { // If LoopSimplify form is not available, stay out of trouble. if (!L->isLoopSimplifyForm()) return; diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll index d2eb71bfb1700..a07637159d0ac 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll @@ -47,3 +47,117 @@ loop2: ; preds = %loop2.header, %lo exit: ; preds = %loop2 br label %loop1.header } + +define void @regression2() { +; CHECK-LABEL: define void @regression2() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[LOOP1_HEADER:.*]] +; CHECK: [[LOOP1_HEADER]]: +; CHECK-NEXT: switch i8 0, label %[[LOOP1_HEADER_BACKEDGE:.*]] [ +; CHECK-NEXT: i8 63, label %[[LOOP1_LATCH1_PREHEADER:.*]] +; CHECK-NEXT: i8 43, label %[[LOOP1_LATCH1_PREHEADER]] +; CHECK-NEXT: i8 42, label %[[LOOP1_LATCH1_PREHEADER]] +; CHECK-NEXT: ] +; CHECK: [[LOOP1_HEADER_BACKEDGE]]: +; CHECK-NEXT: br label %[[LOOP1_HEADER]] +; CHECK: [[LOOP1_LATCH1_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP1_LATCH1:.*]] +; CHECK: [[LOOP1_LATCH1]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ [[ADD4:%.*]], %[[LOOP1_LATCH2:.*]] ], [ 0, %[[LOOP1_LATCH1_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP1_LATCH2]], label %[[LOOP1_LATCH1_LOOP1_LATCH3_CRIT_EDGE:.*]] +; CHECK: [[LOOP1_LATCH2]]: +; CHECK-NEXT: [[ADD:%.*]] = add i32 0, 0 +; CHECK-NEXT: [[ADD4]] = add i32 [[PHI]], 1 +; CHECK-NEXT: br i1 false, label %[[LOOP1_LATCH1]], label %[[LOOP1_LATCH3SPLIT:.*]] +; CHECK: [[LOOP1_LATCH3SPLIT]]: +; CHECK-NEXT: [[PHI6_PH:%.*]] = phi i32 [ [[ADD]], %[[LOOP1_LATCH2]] ] +; CHECK-NEXT: [[PHI7_PH:%.*]] = phi i32 [ 0, %[[LOOP1_LATCH2]] ] +; CHECK-NEXT: br label %[[LOOP1_LATCH3:.*]] +; CHECK: [[LOOP1_LATCH1_LOOP1_LATCH3_CRIT_EDGE]]: +; CHECK-NEXT: [[PHI_LCSSA:%.*]] = phi i32 [ [[PHI]], %[[LOOP1_LATCH1]] ] +; CHECK-NEXT: [[SPLIT:%.*]] = phi i32 [ 0, %[[LOOP1_LATCH1]] ] +; CHECK-NEXT: [[SPLIT1:%.*]] = phi i32 [ [[PHI]], %[[LOOP1_LATCH1]] ] +; CHECK-NEXT: br label %[[LOOP1_LATCH3]] +; CHECK: [[LOOP1_LATCH3]]: +; CHECK-NEXT: [[PHI6:%.*]] = phi i32 [ [[SPLIT]], %[[LOOP1_LATCH1_LOOP1_LATCH3_CRIT_EDGE]] ], [ [[PHI6_PH]], %[[LOOP1_LATCH3SPLIT]] ] +; CHECK-NEXT: [[PHI7:%.*]] = phi i32 [ [[PHI_LCSSA]], %[[LOOP1_LATCH1_LOOP1_LATCH3_CRIT_EDGE]] ], [ [[PHI7_PH]], %[[LOOP1_LATCH3SPLIT]] ] +; CHECK-NEXT: br label %[[LOOP1_HEADER_BACKEDGE]] +; +entry: + br label %loop1.header + +loop1.header: ; preds = %loop1.latch3, %loop1.header, %entry + switch i8 0, label %loop1.header [ + i8 63, label %loop1.latch1 + i8 43, label %loop1.latch1 + i8 42, label %loop1.latch1 + ] + +loop1.latch1: ; preds = %loop1.latch2, %loop1.header, %loop1.header, %loop1.header + %phi = phi i32 [ %add4, %loop1.latch2 ], [ 0, %loop1.header ], [ 0, %loop1.header ], [ 0, %loop1.header ] + br i1 false, label %loop1.latch2, label %loop1.latch3 + +loop1.latch2: ; preds = %loop1.latch1 + %add = add i32 0, 0 + %add4 = add i32 %phi, 1 + br i1 false, label %loop1.latch1, label %loop1.latch3 + +loop1.latch3: ; preds = %loop1.latch2, %loop1.latch1 + %phi6 = phi i32 [ %add, %loop1.latch2 ], [ 0, %loop1.latch1 ] + %phi7 = phi i32 [ 0, %loop1.latch2 ], [ %phi, %loop1.latch1 ] + br label %loop1.header +} + +define i64 @regression3() { +; CHECK-LABEL: define i64 @regression3() { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ASHR:%.*]] = ashr i64 0, 1 +; CHECK-NEXT: br label %[[LOOP2_HEADER:.*]] +; CHECK: [[FUNCEXITSPLIT:.*]]: +; CHECK-NEXT: [[PHI_PH:%.*]] = phi i64 [ [[PHI6:%.*]], %[[LOOP1_HEADER:.*]] ] +; CHECK-NEXT: br label %[[FUNCEXIT:.*]] +; CHECK: [[FUNCEXIT]]: +; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[ASHR]], %[[LOOP1_LATCH_FUNCEXIT_CRIT_EDGE:.*]] ], [ [[PHI_PH]], %[[FUNCEXITSPLIT]] ] +; CHECK-NEXT: ret i64 [[PHI]] +; CHECK: [[LOOP2_HEADER]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[LOOP2_LATCH:.*]] ], [ [[ASHR]], %[[ENTRY]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP2_LATCH]], label %[[LOOP1_HEADER_PREHEADER:.*]] +; CHECK: [[LOOP1_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[LOOP1_HEADER]] +; CHECK: [[LOOP2_LATCH]]: +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i64 [[LSR_IV]], 0 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw nsw i64 [[LSR_IV]], -1 +; CHECK-NEXT: br label %[[LOOP2_HEADER]] +; CHECK: [[LOOP1_HEADER]]: +; CHECK-NEXT: [[PHI6]] = phi i64 [ 0, %[[LOOP1_LATCH:.*]] ], [ 1, %[[LOOP1_HEADER_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label %[[LOOP1_LATCH]], label %[[FUNCEXITSPLIT]] +; CHECK: [[LOOP1_LATCH]]: +; CHECK-NEXT: br i1 false, label %[[LOOP1_LATCH_FUNCEXIT_CRIT_EDGE]], label %[[LOOP1_HEADER]] +; CHECK: [[LOOP1_LATCH_FUNCEXIT_CRIT_EDGE]]: +; CHECK-NEXT: [[SPLIT:%.*]] = phi i64 [ [[ASHR]], %[[LOOP1_LATCH]] ] +; CHECK-NEXT: br label %[[FUNCEXIT]] +; +entry: + %ashr = ashr i64 0, 1 + br label %loop2.header + +funcexit: ; preds = %loop1.latch, %loop1.header + %phi = phi i64 [ %phi6, %loop1.header ], [ %ashr, %loop1.latch ] + ret i64 %phi + +loop2.header: ; preds = %loop2.latch, %entry + %phi3 = phi i64 [ 0, %entry ], [ %add, %loop2.latch ] + br i1 false, label %loop2.latch, label %loop1.header + +loop2.latch: ; preds = %loop2.header + %add = add i64 %phi3, 1 + %icmp = icmp eq i64 %phi3, %ashr + br label %loop2.header + +loop1.header: ; preds = %loop1.latch, %loop2.header + %phi6 = phi i64 [ 0, %loop1.latch ], [ 1, %loop2.header ] + br i1 false, label %loop1.latch, label %funcexit + +loop1.latch: ; preds = %loop1.header + br i1 false, label %funcexit, label %loop1.header +} diff --git a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll index 0add19e286f58..e4890fc00355f 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/preserve-lcssa.ll @@ -47,6 +47,8 @@ define void @function_0(i32 %val_i32_8, i32 %val_i32_9) { ; NEWPM-NEXT: br label [[LOOP_4:%.*]] ; NEWPM: loop_4: ; NEWPM-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BE_6:%.*]] ], [ 7851, [[PRHDR_LOOP_3]] ] +; NEWPM-NEXT: [[LOOP_CNT_I32_11:%.*]] = phi i32 [ 7850, [[PRHDR_LOOP_3]] ], [ [[VAL_I32_24:%.*]], [[BE_6]] ] +; NEWPM-NEXT: [[VAL_I32_24]] = add i32 [[LOOP_CNT_I32_11]], 1 ; NEWPM-NEXT: br i1 [[VAL_I1_22]], label [[BE_6]], label [[LOOP_EXIT_7SPLIT:%.*]] ; NEWPM: bb_5: ; NEWPM-NEXT: [[VAL_I32_40:%.*]] = mul i32 [[VAL_I32_9]], [[VAL_I32_24_LCSSA:%.*]] @@ -55,13 +57,14 @@ define void @function_0(i32 %val_i32_8, i32 %val_i32_9) { ; NEWPM-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1 ; NEWPM-NEXT: br i1 [[VAL_I1_22]], label [[LOOP_4]], label [[BE_6_LOOP_EXIT_7_CRIT_EDGE:%.*]] ; NEWPM: loop_exit_7split: -; NEWPM-NEXT: [[LSR_IV_LCSSA:%.*]] = phi i32 [ [[LSR_IV]], [[LOOP_4]] ] +; NEWPM-NEXT: [[VAL_I32_24_LCSSA_PH:%.*]] = phi i32 [ [[VAL_I32_24]], [[LOOP_4]] ] ; NEWPM-NEXT: br label [[LOOP_EXIT_7:%.*]] ; NEWPM: be_6.loop_exit_7_crit_edge: ; NEWPM-NEXT: [[LSR_IV_LCSSA1:%.*]] = phi i32 [ [[LSR_IV]], [[BE_6]] ] +; NEWPM-NEXT: [[SPLIT:%.*]] = phi i32 [ [[VAL_I32_24]], [[BE_6]] ] ; NEWPM-NEXT: br label [[LOOP_EXIT_7]] ; NEWPM: loop_exit_7: -; NEWPM-NEXT: [[VAL_I32_24_LCSSA]] = phi i32 [ [[LSR_IV_LCSSA1]], [[BE_6_LOOP_EXIT_7_CRIT_EDGE]] ], [ [[LSR_IV_LCSSA]], [[LOOP_EXIT_7SPLIT]] ] +; NEWPM-NEXT: [[VAL_I32_24_LCSSA]] = phi i32 [ [[LSR_IV_LCSSA1]], [[BE_6_LOOP_EXIT_7_CRIT_EDGE]] ], [ [[VAL_I32_24_LCSSA_PH]], [[LOOP_EXIT_7SPLIT]] ] ; NEWPM-NEXT: br label [[BB_5:%.*]] ; %val_i1_22 = trunc i8 -66 to i1 From 97469cb3d28f6bb9f8550a1fc89a7b6727f6828e Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 22 Jun 2026 22:07:49 +0200 Subject: [PATCH 076/511] [LAA] Collect no-wrap predicates separately in getPtrStride/isNoWrap. (#203787) Add an overload of getPtrStride and update isNoWrap to collect no-wrap SCEV predicates in a vector, and have the caller add them to PSE if needed. This also requires to explicitly build the wrap predicate via ScalarEvolution::getWrapPredicate (clearing statically implied flags) rather than going through PredicatedScalarEvolution::setNoOverflow, which can now be removed together with the flags map. This also requires generalizing SCEVUnionPredicate::implies so a wrap predicate can be recognized as implied by an existing wrap predicate after applying equal predicates, avoiding redundant predicates when the collected predicates are committed. The only functional changes is re-ordering of runtime check groups in a single test case; no functional IR changes across a large IR corpus (32k modules) To be used to avoid adding unnecessary wrap predicates during interleave group analysis: https://github.com/llvm/llvm-project/pull/200807 PR: https://github.com/llvm/llvm-project/pull/203787 --- .../llvm/Analysis/LoopAccessAnalysis.h | 16 +++- llvm/include/llvm/Analysis/ScalarEvolution.h | 16 ++-- llvm/lib/Analysis/LoopAccessAnalysis.cpp | 77 +++++++++++++------ llvm/lib/Analysis/ScalarEvolution.cpp | 61 ++++++++------- ...untime-checks-after-dependence-analysis.ll | 48 ++++++------ 5 files changed, 127 insertions(+), 91 deletions(-) diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h index ce2d92c4e01c0..85901ebddc7f0 100644 --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -888,8 +888,8 @@ replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, /// /// If necessary this method will version the stride of the pointer according /// to \p PtrToStride and therefore add further predicates to \p PSE. -/// The \p Assume parameter indicates if we are allowed to make additional -/// run-time assumptions. +/// +/// If \p Predicates is non-null, add no-wrap SCEV predicates if needed. /// /// Note that the analysis results are defined if-and-only-if the original /// memory access was defined. If that access was dead, or UB, then the @@ -899,7 +899,17 @@ getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const DominatorTree &DT, const DenseMap &StridesMap = DenseMap(), - bool Assume = false, bool ShouldCheckWrap = true); + bool ShouldCheckWrap = true, + SmallVectorImpl *Predicates = nullptr); + +/// Overload of \ref getPtrStride that adds the no-wrap predicates directly to +/// \p PSE. The \p Assume parameter indicates whether such additional run-time +/// assumptions are allowed. +LLVM_ABI std::optional +getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, const DominatorTree &DT, + const DenseMap &StridesMap, bool Assume, + bool ShouldCheckWrap = true); /// Returns the distance between the pointers \p PtrA and \p PtrB iff they are /// compatible and it is possible to calculate the distance between them. This diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 50af763614a31..37da037ffcae8 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -2644,6 +2644,9 @@ class PredicatedScalarEvolution { /// Adds a new predicate. LLVM_ABI void addPredicate(const SCEVPredicate &Pred); + /// Adds all predicates in \p Preds. + LLVM_ABI void addPredicates(ArrayRef Preds); + /// Attempts to produce an AddRecExpr for V by adding additional SCEV /// predicates. If we can't transform the expression into an AddRecExpr we /// return nullptr and not add additional SCEV predicates to the current @@ -2653,19 +2656,15 @@ class PredicatedScalarEvolution { getAsAddRec(Value *V, SmallVectorImpl *WrapPredsAdded = nullptr); - /// Proves that V doesn't overflow by adding SCEV predicate. - LLVM_ABI void setNoOverflow(Value *V, - SCEVWrapPredicate::IncrementWrapFlags Flags); - - /// Returns true if we've proved that V doesn't wrap by means of a SCEV - /// predicate. + /// Returns true if we've statically proved that V doesn't wrap. LLVM_ABI bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags); /// Returns the ScalarEvolution analysis used. ScalarEvolution *getSE() const { return &SE; } - /// We need to explicitly define the copy constructor because of FlagsMap. + /// We need to explicitly define the copy constructor due to the ownership of + /// the SCEVUnionPredicate Preds. LLVM_ABI PredicatedScalarEvolution(const PredicatedScalarEvolution &); /// Print the SCEV mappings done by the Predicated Scalar Evolution. @@ -2694,9 +2693,6 @@ class PredicatedScalarEvolution { /// SCEV. DenseMap RewriteMap; - /// Records what NoWrap flags we've added to a Value *. - ValueMap FlagsMap; - /// The ScalarEvolution analysis. ScalarEvolution &SE; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp index 35471f4cb454e..1a1f458d00253 100644 --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1019,11 +1019,13 @@ getStrideFromAddRec(const SCEVAddRecExpr *AR, const Loop *Lp, Type *AccessTy, } /// Check whether \p AR is a non-wrapping AddRec. If \p Ptr is not nullptr, use -/// informating from the IR pointer value to determine no-wrap. -static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, - Value *Ptr, Type *AccessTy, const Loop *L, bool Assume, - const DominatorTree &DT, - std::optional Stride = std::nullopt) { +/// information from the IR pointer value to determine no-wrap. If \p Predicates +/// is not nullptr add no-wrap assumptions if needed. +static bool +isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, Value *Ptr, + Type *AccessTy, const Loop *L, const DominatorTree &DT, + std::optional Stride = std::nullopt, + SmallVectorImpl *Predicates = nullptr) { // FIXME: This should probably only return true for NUW. if (any(AR->getNoWrapFlags(SCEV::NoWrapMask))) return true; @@ -1064,8 +1066,12 @@ static bool isNoWrap(PredicatedScalarEvolution &PSE, const SCEVAddRecExpr *AR, return true; } - if (Ptr && Assume) { - PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); + if (Ptr && Predicates) { + ScalarEvolution &SE = *PSE.getSE(); + SCEVWrapPredicate::IncrementWrapFlags Flags = SCEVWrapPredicate::clearFlags( + SCEVWrapPredicate::IncrementNUSW, + SCEVWrapPredicate::getImpliedFlags(AR, SE)); + Predicates->push_back(SE.getWrapPredicate(AR, Flags)); LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n" << "LAA: Pointer: " << *Ptr << "\n" << "LAA: SCEV: " << *AR << "\n" @@ -1293,6 +1299,7 @@ bool AccessAnalysis::createCheckForAccess( /// Check whether all pointers can participate in a runtime bounds check. They /// must either be invariant or non-wrapping affine AddRecs. + SmallVector Predicates; for (auto &P : RTCheckPtrs) { // The bounds for loop-invariant pointer is trivial. if (SE->isLoopInvariant(P.getPointer(), TheLoop)) @@ -1300,22 +1307,27 @@ bool AccessAnalysis::createCheckForAccess( const SCEVAddRecExpr *AR = dyn_cast(P.getPointer()); if (!AR && Assume) - AR = PSE.getAsAddRec(Ptr); + AR = PSE.getAsAddRec(Ptr, &Predicates); if (!AR || !AR->isAffine()) return false; - // If there's only one option for Ptr, look it up after bounds and wrap - // checking, because assumptions might have been added to PSE. + // If there's only one option for Ptr, commit the predicates collected by + // getAsAddRec and look Ptr up again afterwards: the lookup below reads the + // assumptions back from PSE, so they need to be committed first. if (RTCheckPtrs.size() == 1) { + PSE.addPredicates(Predicates); + Predicates.clear(); AR = cast(replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr)); P.setPointer(AR); } if (!isNoWrap(PSE, AR, RTCheckPtrs.size() == 1 ? Ptr : nullptr, AccessTy, - TheLoop, Assume, DT)) + TheLoop, DT, /*Stride=*/std::nullopt, + Assume ? &Predicates : nullptr)) return false; } + PSE.addPredicates(Predicates); for (const auto &[PtrExpr, NeedsFreeze] : RTCheckPtrs) { // The id of the dependence set. @@ -1644,11 +1656,10 @@ void AccessAnalysis::buildDependenceSets() { } /// Check whether the access through \p Ptr has a constant stride. -std::optional -llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, - const Loop *Lp, const DominatorTree &DT, - const DenseMap &StridesMap, - bool Assume, bool ShouldCheckWrap) { +std::optional llvm::getPtrStride( + PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, + const DominatorTree &DT, const DenseMap &StridesMap, + bool ShouldCheckWrap, SmallVectorImpl *Predicates) { const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) return 0; @@ -1656,8 +1667,10 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); - if (Assume && !AR) - AR = PSE.getAsAddRec(Ptr); + if (Predicates && !AR) { + AR = PSE.getSE()->convertSCEVToAddRecWithPredicates(PtrScev, Lp, + *Predicates); + } if (!AR) { LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr @@ -1670,7 +1683,7 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, if (!ShouldCheckWrap || !Stride) return Stride; - if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, Assume, DT, Stride)) + if (isNoWrap(PSE, AR, Ptr, AccessTy, Lp, DT, Stride, Predicates)) return Stride; LLVM_DEBUG( @@ -1679,6 +1692,20 @@ llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, return std::nullopt; } +/// Check whether the access through \p Ptr has a constant stride. +std::optional +llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, + const Loop *Lp, const DominatorTree &DT, + const DenseMap &StridesMap, + bool Assume, bool ShouldCheckWrap) { + SmallVector Predicates; + std::optional Stride = + getPtrStride(PSE, AccessTy, Ptr, Lp, DT, StridesMap, ShouldCheckWrap, + Assume ? &Predicates : nullptr); + PSE.addPredicates(Predicates); + return Stride; +} + std::optional llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, @@ -2091,10 +2118,14 @@ MemoryDepChecker::getDependenceDistanceStrideAndSize( BPtr->getType()->getPointerAddressSpace()) return MemoryDepChecker::Dependence::Unknown; - std::optional StrideAPtr = getPtrStride( - PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, true, true); - std::optional StrideBPtr = getPtrStride( - PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, true, true); + SmallVector Predicates; + std::optional StrideAPtr = + getPtrStride(PSE, ATy, APtr, InnermostLoop, *DT, SymbolicStrides, + /*ShouldCheckWrap=*/true, &Predicates); + std::optional StrideBPtr = + getPtrStride(PSE, BTy, BPtr, InnermostLoop, *DT, SymbolicStrides, + /*ShouldCheckWrap=*/true, &Predicates); + PSE.addPredicates(Predicates); const SCEV *Src = PSE.getSCEV(APtr); const SCEV *Sink = PSE.getSCEV(BPtr); diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index c0cdce982e623..6c419a9895ef5 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -15476,8 +15476,25 @@ bool SCEVUnionPredicate::implies(const SCEVPredicate *N, return this->implies(I, SE); }); - return any_of(Preds, - [N, &SE](const SCEVPredicate *I) { return I->implies(N, SE); }); + if (any_of(Preds, + [N, &SE](const SCEVPredicate *I) { return I->implies(N, SE); })) + return true; + + // A wrap predicate may be implied by a wrap predicate in Preds after applying + // equal predicates. + const auto *NWrap = dyn_cast(N); + if (!NWrap) + return false; + const Loop *L = NWrap->getExpr()->getLoop(); + return any_of(Preds, [&](const SCEVPredicate *I) { + const auto *IWrap = dyn_cast(I); + if (!IWrap) + return false; + const auto *RewrittenAR = dyn_cast( + SE.rewriteUsingPredicate(IWrap->getExpr(), L, *this)); + return RewrittenAR && + SE.getWrapPredicate(RewrittenAR, IWrap->getFlags())->implies(N, SE); + }); } void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { @@ -15603,6 +15620,12 @@ void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { updateGeneration(); } +void PredicatedScalarEvolution::addPredicates( + ArrayRef Preds) { + for (const SCEVPredicate *P : Preds) + addPredicate(*P); +} + const SCEVPredicate &PredicatedScalarEvolution::getPredicate() const { return *Preds; } @@ -15617,35 +15640,15 @@ void PredicatedScalarEvolution::updateGeneration() { } } -void PredicatedScalarEvolution::setNoOverflow( - Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { - const SCEV *Expr = getSCEV(V); - const auto *AR = cast(Expr); - - auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); - - // Clear the statically implied flags. - Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); - addPredicate(*SE.getWrapPredicate(AR, Flags)); - - auto II = FlagsMap.insert({V, Flags}); - if (!II.second) - II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); -} - bool PredicatedScalarEvolution::hasNoOverflow( Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { - const SCEV *Expr = getSCEV(V); - const auto *AR = cast(Expr); + const auto *AR = dyn_cast(getSCEV(V)); + if (!AR) + return false; Flags = SCEVWrapPredicate::clearFlags( Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); - auto II = FlagsMap.find(V); - - if (II != FlagsMap.end()) - Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); - return Flags == SCEVWrapPredicate::IncrementAnyWrap; } @@ -15663,8 +15666,7 @@ const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec( return New; } - for (const auto *P : NewPreds) - addPredicate(*P); + addPredicates(NewPreds); RewriteMap[SE.getSCEV(V)] = {Generation, New}; return New; @@ -15675,10 +15677,7 @@ PredicatedScalarEvolution::PredicatedScalarEvolution( : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(std::make_unique(Init.Preds->getPredicates(), SE)), - Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { - for (auto I : Init.FlagsMap) - FlagsMap.insert(I); -} + Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) {} void PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { // For each block. diff --git a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll index 63abd4ef70d63..88b16d0f32534 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/retry-runtime-checks-after-dependence-analysis.ll @@ -72,27 +72,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_not_inbounds_iv2_st ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group GRP1: -; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 -; CHECK-NEXT: Against group GRP2: ; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %gep.b = getelementptr i8, ptr %b, i64 %iv2 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b)) -; CHECK-NEXT: Member: {%b,+,5}<%loop> -; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) ; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: %b High: (-1 + (5 * %n) + %b)) +; CHECK-NEXT: Member: {%b,+,5}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -265,27 +265,27 @@ define void @dependency_check_and_runtime_checks_needed_gepb_may_wrap(ptr %a, pt ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 +; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: ; CHECK-NEXT: %gep.a.iv = getelementptr inbounds float, ptr %a, i64 %iv ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group GRP1: -; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 -; CHECK-NEXT: Against group GRP2: ; CHECK-NEXT: %gep.a.iv.off = getelementptr inbounds float, ptr %a, i64 %iv.offset +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %gep.b = getelementptr float, ptr %b, i64 %iv2 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %a High: ((4 * %n) + %a)) ; CHECK-NEXT: Member: {%a,+,4}<%loop> ; CHECK-NEXT: Group GRP1: -; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b)) -; CHECK-NEXT: Member: {%b,+,8}<%loop> -; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: ((4 * %offset) + %a) High: ((4 * %offset) + (4 * %n) + %a)) ; CHECK-NEXT: Member: {((4 * %offset) + %a),+,4}<%loop> +; CHECK-NEXT: Group GRP2: +; CHECK-NEXT: (Low: %b High: (-4 + (8 * %n) + %b)) +; CHECK-NEXT: Member: {%b,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -325,29 +325,29 @@ define void @retry_after_dep_check_with_unknown_offset(ptr %A, i32 %offset) { ; CHECK-NEXT: Run-time memory checks: ; CHECK-NEXT: Check 0: ; CHECK-NEXT: Comparing group GRP0: -; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv +; CHECK-NEXT: ptr %A ; CHECK-NEXT: Against group GRP1: -; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv ; CHECK-NEXT: Check 1: ; CHECK-NEXT: Comparing group GRP0: -; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv -; CHECK-NEXT: Against group GRP2: ; CHECK-NEXT: ptr %A +; CHECK-NEXT: Against group GRP2: +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 ; CHECK-NEXT: Check 2: ; CHECK-NEXT: Comparing group GRP1: -; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 +; CHECK-NEXT: %A.100.iv = getelementptr { float, float }, ptr %A.100, i64 %iv ; CHECK-NEXT: Against group GRP2: -; CHECK-NEXT: ptr %A +; CHECK-NEXT: %A.100.iv.offset.3 = getelementptr i8, ptr %A.100, i64 %iv.offset.3 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: +; CHECK-NEXT: (Low: %A High: (4 + %A)) +; CHECK-NEXT: Member: %A +; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: (100 + %A) High: (96 + (8 * (zext i32 %offset to i64)) + %A)) ; CHECK-NEXT: Member: {(100 + %A),+,8}<%loop> -; CHECK-NEXT: Group GRP1: +; CHECK-NEXT: Group GRP2: ; CHECK-NEXT: (Low: (100 + (8 * (zext i32 %offset to i64)) + %A) High: (96 + (16 * (zext i32 %offset to i64)) + %A)) ; CHECK-NEXT: Member: {(100 + (8 * (zext i32 %offset to i64)) + %A),+,8}<%loop> -; CHECK-NEXT: Group GRP2: -; CHECK-NEXT: (Low: %A High: (4 + %A)) -; CHECK-NEXT: Member: %A ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: From d32fc0d6ccf95afbd12d732492a11b729f5b0f4c Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Mon, 22 Jun 2026 13:16:26 -0700 Subject: [PATCH 077/511] Revert "[InstCombine] Merge consecutive assumes" (#205177) Reverts llvm/llvm-project#204983 due to buildbot breakage (see discussion in https://github.com/llvm/llvm-project/pull/204983#issuecomment-4771424564) --- .../InstCombine/InstCombineCalls.cpp | 22 +++---------------- .../InstCombine/assume-loop-align.ll | 3 ++- llvm/test/Transforms/InstCombine/assume.ll | 20 ++++++++++++----- .../PhaseOrdering/AArch64/std-find.ll | 3 ++- 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 880d896e12d6e..ce9e4b836a56e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3844,26 +3844,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - if (II->hasOperandBundles()) { - // Merge consecutive assumes to save some resources - if (auto *PrevAI = dyn_cast_or_null(II->getPrevNode()); - PrevAI && PrevAI->hasOperandBundles()) { - SmallVector Bundles; - Bundles.reserve(II->getNumOperandBundles() + - PrevAI->getNumOperandBundles()); - for (auto Bundle : PrevAI->operand_bundles()) - Bundles.emplace_back(Bundle); - for (auto Bundle : II->operand_bundles()) - Bundles.emplace_back(Bundle); - Builder.CreateAssumption(Bundles); - eraseInstFromFunction(*PrevAI); - return eraseInstFromFunction(*II); - } - - // If the assume has operand bundles, the folds below will never work, so - // don't bother trying. + // If the assume has operand bundles, the folds below will never work, so + // don't bother trying. + if (II->hasOperandBundles()) break; - } Value *IIOperand = II->getArgOperand(0); diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll index 2701775f011e8..0c5e403ca54a9 100644 --- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll @@ -10,7 +10,8 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, ptr %b) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64), "align"(ptr [[B:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[B:%.*]], i64 64) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 1902fcbb95afd..69220811ac206 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -134,7 +134,8 @@ define i1 @align_with_offset_on_gep(ptr %base) { define void @align_with_constant_offset_0(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_0( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -144,7 +145,8 @@ define void @align_with_constant_offset_0(ptr %ptr) { define void @align_with_constant_offset_1(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_1( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 -8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 -8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -155,7 +157,8 @@ define void @align_with_constant_offset_1(ptr %ptr) { define void @align_with_constant_offset_4(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_4( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -166,7 +169,8 @@ define void @align_with_constant_offset_4(ptr %ptr) { define void @align_with_constant_offset_8(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_8( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -176,7 +180,8 @@ define void @align_with_constant_offset_8(ptr %ptr) { define void @align_with_variable_offset(ptr %ptr, i64 %offset) { ; CHECK-LABEL: @align_with_variable_offset( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -622,7 +627,10 @@ define void @redundant_nonnull3(ptr %ptr) { define void @partially_redundant(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5) { ; CHECK-LABEL: @partially_redundant( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]), "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]), "nonnull"(ptr [[PTR:%.*]]), "nonnull"(ptr [[PTR2:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR2:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr), "nonnull"(ptr %ptr2) ] diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index 4ca7f780cdc5e..69b23200b239b 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -244,7 +244,8 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2), "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: From b48fc193e84f0bc3452b0d79d8c06833a7f3f294 Mon Sep 17 00:00:00 2001 From: adams381 Date: Mon, 22 Jun 2026 15:29:05 -0500 Subject: [PATCH 078/511] [CIR] Compute union base subobject for tail-padding reuse (#201428) A [[no_unique_address]] field whose type is a union with reusable tail padding made CIRGen trip the insertPadding "offset >= size" assertion in CIRGenRecordLayoutBuilder. CIR sized the union member at its full size, so a following field that the ABI places in the union's tail padding overlapped the union and insertPadding asserted. CIR already computed a distinct, smaller base subobject type for structs and classes whose tail padding can be reused, but not for unions. A union has reusable tail padding when one of its members is itself a [[no_unique_address]] field with tail padding, which makes the union's data size smaller than its size. computeRecordLayout now builds that base type for unions too, sized from getDataSize(), and lowerUnion emits the storage type plus trailing padding as ordinary struct fields so the reusable padding is exposed, mirroring classic CodeGen. The base-type gate also no longer skips records marked final, which classic CodeGen never skipped and which otherwise hit the same assertion for a final union with reusable tail padding. This unblocks libcxx's std::expected tests, which lean on [[no_unique_address]] over unions. --- .../CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp | 75 +++++++++---- clang/test/CIR/CodeGen/no-unique-address.cpp | 101 ++++++++++++++++++ 2 files changed, 154 insertions(+), 22 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp index 525ae4be4207b..e33b2065ecb67 100644 --- a/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenRecordLayoutBuilder.cpp @@ -71,8 +71,8 @@ struct CIRRecordLowering final { void setBitFieldInfo(const FieldDecl *fd, CharUnits startOffset, mlir::Type storageType); - void lower(bool NonVirtualBaseType); - void lowerUnion(); + void lower(bool nonVirtualBaseType); + void lowerUnion(bool nonVirtualBaseType); /// Determines if we need a packed llvm struct. void determinePacked(bool nvBaseType); @@ -280,7 +280,7 @@ void CIRRecordLowering::setBitFieldInfo(const FieldDecl *fd, void CIRRecordLowering::lower(bool nonVirtualBaseType) { if (recordDecl->isUnion()) { - lowerUnion(); + lowerUnion(nonVirtualBaseType); computeVolatileBitfields(); return; } @@ -692,23 +692,28 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) { assert(ty->isIncomplete() && "recomputing record layout?"); lowering.lower(/*nonVirtualBaseType=*/false); - // If we're in C++, compute the base subobject type. For C++ records the base - // subobject type is always set (matching classic CodeGen). For unions and - // final classes the base subobject and complete object types are identical - // (no tail padding can be reused), so baseTy points at the same record as - // ty. We must still populate baseTy in those cases because callers such as - // getStorageType(const CXXRecordDecl *) used to lay out potentially- - // overlapping ([[no_unique_address]]) fields read it unconditionally; a - // null baseTy would otherwise propagate as a null mlir::Type into the - // members vector and trip the !empty() assertion in fillOutputFields. + // If we're in C++, compute the base subobject type. For C++ records baseTy + // defaults to the complete object type and is replaced by a distinct, + // smaller record only when the record has tail padding an enclosing + // [[no_unique_address]] field can reuse. We must populate baseTy even when + // it equals ty because callers such as getStorageType(const CXXRecordDecl *) + // read it unconditionally when laying out potentially-overlapping + // ([[no_unique_address]]) fields; a null baseTy would otherwise propagate as + // a null mlir::Type into the members vector and trip the !empty() assertion + // in fillOutputFields. cir::RecordType baseTy; if (llvm::isa(rd)) { baseTy = *ty; - if (!rd->isUnion() && !rd->hasAttr() && - lowering.astRecordLayout.getNonVirtualSize() != - lowering.astRecordLayout.getSize()) { + // A record needs a distinct base-subobject type when its tail padding can + // be reused by an enclosing [[no_unique_address]] field, i.e. when the + // non-virtual size differs from the complete size. This matches classic + // CodeGen and covers unions too: a union's non-virtual size already tracks + // its reusable tail padding (and stays at the minimum union size when the + // union is empty, so a zero-data union does not spuriously qualify). + if (lowering.astRecordLayout.getNonVirtualSize() != + lowering.astRecordLayout.getSize()) { CIRRecordLowering baseLowering(*this, rd, /*Packed=*/lowering.packed); - baseLowering.lower(/*NonVirtualBaseType=*/true); + baseLowering.lower(/*nonVirtualBaseType=*/true); std::string baseIdentifier = getRecordTypeName(rd, ".base"); baseTy = builder.getCompleteNamedRecordType( baseLowering.fieldTypes, baseLowering.packed, baseLowering.padded, @@ -716,8 +721,12 @@ CIRGenTypes::computeRecordLayout(const RecordDecl *rd, cir::RecordType *ty) { // TODO(cir): add something like addRecordTypeName // BaseTy and Ty must agree on their packedness for getCIRFieldNo to work - // on both of them with the same index. - assert(lowering.packed == baseLowering.packed && + // on both of them with the same index. Unions are exempt: CIR derives a + // union's packedness from its layout size, which is the data size for the + // base subobject but the full size for the complete object, so the two + // can legitimately disagree. (Classic CodeGen derives both from the data + // size and so needs no such exemption.) + assert((rd->isUnion() || lowering.packed == baseLowering.packed) && "Non-virtual and complete types must agree on packedness"); } } @@ -809,8 +818,13 @@ void CIRGenRecordLayout::dump() const { print(llvm::errs()); } void CIRGenBitFieldInfo::dump() const { print(llvm::errs()); } -void CIRRecordLowering::lowerUnion() { - CharUnits layoutSize = astRecordLayout.getSize(); +void CIRRecordLowering::lowerUnion(bool nonVirtualBaseType) { + // The base-subobject layout of a union is sized to its data size rather than + // its full size. A union can have reusable tail padding when one of its + // members is a [[no_unique_address]] field that itself has tail padding, so + // an enclosing [[no_unique_address]] union field must use this smaller type. + CharUnits layoutSize = nonVirtualBaseType ? astRecordLayout.getDataSize() + : astRecordLayout.getSize(); mlir::Type storageType = nullptr; bool seenNamedMember = false; @@ -860,7 +874,12 @@ void CIRRecordLowering::lowerUnion() { // NOTE(cir): Track all union member's types, not just the largest one. It // allows for proper type-checking and retain more info for analisys. - fieldTypes.push_back(fieldType); + // + // The base-subobject type instead uses a single (possibly clipped) storage + // type, mirroring classic CodeGen, so that it exposes the union's reusable + // tail padding. + if (!nonVirtualBaseType) + fieldTypes.push_back(fieldType); } if (!storageType) { @@ -870,8 +889,20 @@ void CIRRecordLowering::lowerUnion() { if (layoutSize < getSize(storageType)) storageType = getByteArrayType(layoutSize); - else + + if (nonVirtualBaseType) { + // The base-subobject record is built as a struct from fieldTypes, so add + // the storage type and any trailing padding as ordinary fields rather than + // routing padding through the union's single tail-padding slot. + fieldTypes.push_back(storageType); + CharUnits padding = layoutSize - getSize(storageType); + if (!padding.isZero()) { + fieldTypes.push_back(getByteArrayType(padding)); + padded = true; + } + } else { appendPaddingBytes(layoutSize - getSize(storageType)); + } // Set packed if we need it. if (!layoutSize.isMultipleOf(getAlignment(storageType))) diff --git a/clang/test/CIR/CodeGen/no-unique-address.cpp b/clang/test/CIR/CodeGen/no-unique-address.cpp index 89a2c3f1b6ec0..08e2f1e34d8c4 100644 --- a/clang/test/CIR/CodeGen/no-unique-address.cpp +++ b/clang/test/CIR/CodeGen/no-unique-address.cpp @@ -46,14 +46,36 @@ struct Outer { // LLVM-DAG: %union.UnionForNUA = type { i64 } // LLVM-DAG: %struct.OuterFinal = type { %struct.FinalForNUA, i8 } // LLVM-DAG: %struct.FinalForNUA = type { i32, i8 } +// LLVM-DAG: %struct.OuterUnionPad = type { %struct.UnionWithPadding.base, i8 } +// LLVM-DAG: %struct.UnionWithPadding.base = type { i8 } +// LLVM-DAG: %struct.OuterFinalUnionPad = type { %struct.FinalUnionWithPadding.base, i8 } +// LLVM-DAG: %struct.FinalUnionWithPadding.base = type { i8 } // LLVM-DAG: @ou = {{(dso_local )?}}global %struct.OuterUnion zeroinitializer, align 8 // LLVM-DAG: @of = {{(dso_local )?}}global %struct.OuterFinal zeroinitializer, align 4 +// LLVM-DAG: @oup = {{(dso_local )?}}global %struct.OuterUnionPad zeroinitializer, align 2 +// LLVM-DAG: @ofup = {{(dso_local )?}}global %struct.OuterFinalUnionPad zeroinitializer, align 2 +// LLVM-DAG: %struct.OuterZeroData = type { %union.UnionZeroDataSize, i8 } +// LLVM-DAG: %union.UnionZeroDataSize = type { i32 } +// LLVM-DAG: @ozd = {{(dso_local )?}}global %struct.OuterZeroData zeroinitializer, align 4 +// LLVM-DAG: %struct.OuterAllEmpty = type { i8 } +// LLVM-DAG: @oae = {{(dso_local )?}}global %struct.OuterAllEmpty zeroinitializer, align 1 // OGCG-DAG: %struct.OuterUnion = type { %union.UnionForNUA, i32 } // OGCG-DAG: %union.UnionForNUA = type { i64 } // OGCG-DAG: %struct.OuterFinal = type { %struct.FinalForNUA, i8 } // OGCG-DAG: %struct.FinalForNUA = type { i32, i8 } +// OGCG-DAG: %struct.OuterUnionPad = type { %union.UnionWithPadding.base, i8 } +// OGCG-DAG: %union.UnionWithPadding.base = type { i8 } +// OGCG-DAG: %struct.OuterFinalUnionPad = type { %union.FinalUnionWithPadding.base, i8 } +// OGCG-DAG: %union.FinalUnionWithPadding.base = type { i8 } // OGCG-DAG: @ou = {{(dso_local )?}}global %struct.OuterUnion zeroinitializer, align 8 // OGCG-DAG: @of = {{(dso_local )?}}global %struct.OuterFinal zeroinitializer, align 4 +// OGCG-DAG: @oup = {{(dso_local )?}}global %struct.OuterUnionPad zeroinitializer, align 2 +// OGCG-DAG: @ofup = {{(dso_local )?}}global %struct.OuterFinalUnionPad zeroinitializer, align 2 +// OGCG-DAG: %struct.OuterZeroData = type { %union.UnionZeroDataSize, i8 } +// OGCG-DAG: %union.UnionZeroDataSize = type { i32 } +// OGCG-DAG: @ozd = {{(dso_local )?}}global %struct.OuterZeroData zeroinitializer, align 4 +// OGCG-DAG: %struct.OuterAllEmpty = type { i8 } +// OGCG-DAG: @oae = {{(dso_local )?}}global %struct.OuterAllEmpty zeroinitializer, align 1 // LLVM-LABEL: define {{.*}} void @_ZN5OuterC2ERK6Middlec( // LLVM: %[[GEP:.*]] = getelementptr inbounds nuw %struct.Outer, ptr %{{.+}}, i32 0, i32 0 @@ -98,10 +120,89 @@ struct OuterFinal { OuterFinal of; +// A [[no_unique_address]] union field whose union has reusable tail padding. +struct Padded { + Padded(); + +private: + alignas(2) bool b; +}; + +union UnionWithPadding { + UnionWithPadding(); + [[no_unique_address]] Padded p; + bool flag; +}; + +struct OuterUnionPad { + [[no_unique_address]] UnionWithPadding u; + bool tail; +}; + +OuterUnionPad oup; + +// A final union also gets a base-subobject type for tail-padding reuse. +union FinalUnionWithPadding final { + FinalUnionWithPadding(); + [[no_unique_address]] Padded p; + bool flag; +}; + +struct OuterFinalUnionPad { + [[no_unique_address]] FinalUnionWithPadding u; + bool tail; +}; + +OuterFinalUnionPad ofup; + // CIR-NUA-DAG: !rec_FinalForNUA = !cir.struct<"FinalForNUA" {!s32i, !s8i}> // CIR-NUA-DAG: !rec_UnionForNUA = !cir.union<"UnionForNUA" {!s32i, !s64i}> // CIR-NUA-DAG: !rec_OuterFinal = !cir.struct<"OuterFinal" {!rec_FinalForNUA, !s8i}> // CIR-NUA-DAG: !rec_OuterUnion = !cir.struct<"OuterUnion" {!rec_UnionForNUA, !s32i}> +// CIR-NUA-DAG: !rec_UnionWithPadding2Ebase = !cir.struct<"UnionWithPadding.base" {!u8i}> +// CIR-NUA-DAG: !rec_OuterUnionPad = !cir.struct<"OuterUnionPad" {!rec_UnionWithPadding2Ebase, !cir.bool}> +// CIR-NUA-DAG: !rec_FinalUnionWithPadding2Ebase = !cir.struct<"FinalUnionWithPadding.base" {!u8i}> +// CIR-NUA-DAG: !rec_OuterFinalUnionPad = !cir.struct<"OuterFinalUnionPad" {!rec_FinalUnionWithPadding2Ebase, !cir.bool}> // CIR-NUA-DAG: cir.global external @ou = #cir.zero : !rec_OuterUnion // CIR-NUA-DAG: cir.global external @of = #cir.zero : !rec_OuterFinal +// CIR-NUA-DAG: cir.global external @oup = #cir.zero : !rec_OuterUnionPad +// CIR-NUA-DAG: cir.global external @ofup = #cir.zero : !rec_OuterFinalUnionPad + +struct EmptyForNUA {}; + +union UnionZeroDataSize { + [[no_unique_address]] EmptyForNUA e; + [[no_unique_address]] int i; +}; + +struct OuterZeroData { + [[no_unique_address]] UnionZeroDataSize u; + bool flag; +}; + +OuterZeroData ozd; + +// A union whose members are all [[no_unique_address]] empty types has data +// size 0, but its non-virtual size stays at the 1-byte minimum, so the gate +// does not fire and it needs no distinct base-subobject type. +struct EmptyA {}; +struct EmptyB {}; + +union UnionAllEmpty { + [[no_unique_address]] EmptyA a; + [[no_unique_address]] EmptyB b; +}; + +struct OuterAllEmpty { + [[no_unique_address]] UnionAllEmpty u; + bool flag; +}; + +OuterAllEmpty oae; + +// CIR-NUA-DAG: !rec_OuterAllEmpty = !cir.struct<"OuterAllEmpty" {!cir.bool}> +// CIR-NUA-DAG: cir.global external @oae = #cir.zero : !rec_OuterAllEmpty +// CIR-NUA-DAG: !rec_UnionZeroDataSize = !cir.union<"UnionZeroDataSize" {!rec_EmptyForNUA, !s32i}> +// CIR-NUA-DAG: !rec_OuterZeroData = !cir.struct<"OuterZeroData" {!rec_UnionZeroDataSize, !cir.bool}> +// CIR-NUA-DAG: cir.global external @ozd = #cir.zero : !rec_OuterZeroData From 33ee918aa00676526758552b65c8bb32ac292e30 Mon Sep 17 00:00:00 2001 From: David Zbarsky Date: Mon, 22 Jun 2026 16:31:01 -0400 Subject: [PATCH 079/511] [BOLT][Bazel] Configure Rewrite target backends (#205179) `RewriteInstance.cpp` only creates target-specific `MCPlusBuilder` instances when `X86_AVAILABLE` or `AARCH64_AVAILABLE` is defined. The Bazel `Rewrite` target defines neither macro and hard-codes `TargetX86`, so `createMCPlusBuilder()` returns null for both target backends defined by the Bazel overlay. Derive the definitions and dependencies from the generated `bolt_targets` list, restricted to the `TargetAArch64` and `TargetX86` rules defined in this BUILD file, and use the same target dependencies for `llvm-bolt`. Validation: `buildifier -mode=check -lint=warn utils/bazel/llvm-project-overlay/bolt/BUILD.bazel`; an equivalent overlay change built and executed BOLT for Linux x86_64 and Linux AArch64 in [hermetic-llvm's remote `//prebuilt/llvm:all` build](https://app.buildbuddy.io/invocation/7943a005-c159-4cb9-af0e-00ffbaa40bb5). AI tool disclosure: Co-authored with OpenAI Codex. --- .../llvm-project-overlay/bolt/BUILD.bazel | 24 ++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel index 3e087aeec4435..40395f2bce540 100644 --- a/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/bolt/BUILD.bazel @@ -6,6 +6,15 @@ load("@bazel_skylib//rules:expand_template.bzl", "expand_template") load("@rules_cc//cc:defs.bzl", "cc_binary", "cc_library") load(":targets.bzl", "bolt_targets") +_BOLT_REWRITE_TARGETS = [ + target + for target in bolt_targets + if target in [ + "AArch64", + "X86", + ] +] + package( default_visibility = ["//visibility:public"], ) @@ -71,9 +80,11 @@ cc_binary( ":Profile", ":Rewrite", ":RuntimeLibs", - ":TargetAArch64", ":TargetConfig", - ":TargetX86", + ] + [ + ":Target" + target + for target in _BOLT_REWRITE_TARGETS + ] + [ ":Utils", "//llvm:AllTargetsAsmParsers", "//llvm:AllTargetsCodeGens", @@ -94,12 +105,19 @@ cc_library( "include/bolt/Rewrite/*.h", ]), includes = ["include"], + local_defines = [ + target.upper() + "_AVAILABLE" + for target in _BOLT_REWRITE_TARGETS + ], deps = [ ":Core", ":Passes", ":Profile", ":RuntimeLibs", - ":TargetX86", + ] + [ + ":Target" + target + for target in _BOLT_REWRITE_TARGETS + ] + [ ":Utils", "//llvm:Analysis", "//llvm:BinaryFormat", From 0034f76768a15b44fa68c25ca68496c547a01435 Mon Sep 17 00:00:00 2001 From: Harald van Dijk Date: Mon, 22 Jun 2026 21:32:57 +0100 Subject: [PATCH 080/511] [Clang][RAV] Simplify TraverseTemplateArgumentLocsHelper (#199131) We were checking the result of getTemplateArgsAsWritten() to skip over implicit instantiations, with an assert to ensure that it has the desired effect, before checking getTemplateSpecializationKind() == TSK_ExplicitSpecialization which would skip over implicit instantiations anyway. As the included tests show, the invariant that we were relying on did not hold, but we no longer have any need to rely on that, we can now just check the result of getTemplateSpecializationKind() directly. Fixes: #198903 Fixes: #169302 --- clang/include/clang/AST/RecursiveASTVisitor.h | 21 ++++++---------- clang/test/AST/pr198903.cpp | 25 +++++++++++++++++++ clang/test/Analysis/pr169302.cpp | 25 +++++++++++++++++++ 3 files changed, 58 insertions(+), 13 deletions(-) create mode 100644 clang/test/AST/pr198903.cpp create mode 100644 clang/test/Analysis/pr169302.cpp diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h index 529d657fc01f5..b000a34043696 100644 --- a/clang/include/clang/AST/RecursiveASTVisitor.h +++ b/clang/include/clang/AST/RecursiveASTVisitor.h @@ -2222,25 +2222,20 @@ bool RecursiveASTVisitor::TraverseTemplateArgumentLocsHelper( handles traversal of template args and qualifier. \ For explicit specializations ("template<> set {...};"), \ we traverse template args here since there is no EID. */ \ - if (const auto *ArgsWritten = D->getTemplateArgsAsWritten()) { \ - assert(D->getTemplateSpecializationKind() != TSK_ImplicitInstantiation); \ - if (D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) { \ - TRY_TO(TraverseTemplateArgumentLocsHelper( \ - ArgsWritten->getTemplateArgs(), ArgsWritten->NumTemplateArgs)); \ - } \ - } \ - \ - if (getDerived().shouldVisitTemplateInstantiations() || \ - D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) { \ - /* Traverse base definition for explicit specializations */ \ - TRY_TO(Traverse##DECLKIND##Helper(D)); \ - } else { \ + if (D->getTemplateSpecializationKind() == TSK_ExplicitSpecialization) { \ + const auto *ArgsWritten = D->getTemplateArgsAsWritten(); \ + TRY_TO(TraverseTemplateArgumentLocsHelper( \ + ArgsWritten->getTemplateArgs(), ArgsWritten->NumTemplateArgs)); \ + } else if (!getDerived().shouldVisitTemplateInstantiations()) { \ /* Returning from here skips traversing the \ declaration context of the *TemplateSpecializationDecl \ (embedded in the DEF_TRAVERSE_DECL() macro) \ which contains the instantiated members of the template. */ \ return true; \ } \ + \ + /* Traverse base definition for explicit specializations */ \ + TRY_TO(Traverse##DECLKIND##Helper(D)); \ }) DEF_TRAVERSE_TMPL_SPEC_DECL(Class, CXXRecord) diff --git a/clang/test/AST/pr198903.cpp b/clang/test/AST/pr198903.cpp new file mode 100644 index 0000000000000..1f0f68f92b7e4 --- /dev/null +++ b/clang/test/AST/pr198903.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_cc1 -ast-list %s | FileCheck -strict-whitespace %s + +template +struct Tpl { + template + static int var; +}; +// CHECK: Tpl +// CHECK-NEXT: Tpl::(anonymous) +// CHECK-NEXT: Tpl +// CHECK-NEXT: Tpl::var +// CHECK-NEXT: Tpl::(anonymous) +// CHECK-NEXT: Tpl::var + +template +template +int Tpl::var; +// CHECK-NEXT: Tpl::var +// CHECK-NEXT: Tpl::(anonymous) +// CHECK-NEXT: Tpl::var +// CHECK-NEXT: T + +int i = Tpl::var; +// CHECK-NEXT: i +// CHECK-NEXT: Tpl::var diff --git a/clang/test/Analysis/pr169302.cpp b/clang/test/Analysis/pr169302.cpp new file mode 100644 index 0000000000000..9ff73bc8535e3 --- /dev/null +++ b/clang/test/Analysis/pr169302.cpp @@ -0,0 +1,25 @@ +// RUN: %clang_analyze_cc1 -std=c++14 -analyzer-checker=core -verify %s + +// expected-no-diagnostics + +template struct S; + +class Sp { +public: + template void M() {} + template struct I { + static void IM(); + }; +}; + +template <> struct S { + using F = void (Sp::*)(); + template static constexpr F SpM = &Sp::template M

; +}; + +template constexpr S::F S::SpM; + +template void Sp::I::IM() { + using Spec = S; + typename Spec::F E = Spec::template SpM; +} From 06fee0798410e06cdb51ee45dfa26ea976870307 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 22 Jun 2026 22:40:25 +0200 Subject: [PATCH 081/511] [VPlan] Handle single-scalar casts in replicateByVF. (#205181) Fixes a crash after https://github.com/llvm/llvm-project/pull/203057. --- llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 5 +- .../X86/predicated-replicate-feeding-cast.ll | 234 ++++++++++++++++++ 2 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/predicated-replicate-feeding-cast.ll diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index 3c4dc12950823..ae4beb5b71874 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -963,7 +963,10 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { /// Users that only demand the first lane can use the definition for lane /// 0. DefR->replaceUsesWithIf(LaneDefs[0], [DefR](VPUser &U, unsigned) { - return U.usesFirstLaneOnly(DefR); + if (U.usesFirstLaneOnly(DefR)) + return true; + auto *VPI = dyn_cast(&U); + return VPI && Instruction::isCast(VPI->getOpcode()); }); // Update each build vector user that currently has DefR as its only diff --git a/llvm/test/Transforms/LoopVectorize/X86/predicated-replicate-feeding-cast.ll b/llvm/test/Transforms/LoopVectorize/X86/predicated-replicate-feeding-cast.ll new file mode 100644 index 0000000000000..ca6aca088ccde --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/predicated-replicate-feeding-cast.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -p loop-vectorize -force-vector-width=2 -force-vector-interleave=1 -S %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define i8 @predicated_replicate_feeding_cast(i16 %n, i1 %c1, i1 %c2, i16 %a, i8 %b) { +; CHECK-LABEL: define i8 @predicated_replicate_feeding_cast( +; CHECK-SAME: i16 [[N:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], i16 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[N]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i32 [[TMP0]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i32 [[N_VEC]] to i16 +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[C1]], true +; CHECK-NEXT: [[TMP4:%.*]] = xor i1 [[C2]], true +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE6:.*]] ] +; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] +; CHECK: [[PRED_SDIV_IF]]: +; CHECK-NEXT: [[TMP5:%.*]] = sdiv i16 1, [[A]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> poison, i16 [[TMP5]], i64 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] +; CHECK: [[PRED_SDIV_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_SDIV_IF]] ] +; CHECK-NEXT: br i1 [[TMP3]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] +; CHECK: [[PRED_SDIV_IF1]]: +; CHECK-NEXT: [[TMP8:%.*]] = sdiv i16 1, [[A]] +; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x i16> [[TMP7]], i16 [[TMP8]], i64 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] +; CHECK: [[PRED_SDIV_CONTINUE2]]: +; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP7]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP9]], %[[PRED_SDIV_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C1]], <2 x i16> zeroinitializer, <2 x i16> [[TMP10]] +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i64 0 +; CHECK-NEXT: [[TMP12:%.*]] = trunc i16 [[TMP11]] to i8 +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]] +; CHECK: [[PRED_SDIV_IF3]]: +; CHECK-NEXT: [[TMP13:%.*]] = sdiv i8 [[TMP12]], [[B]] +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i8> poison, i8 [[TMP13]], i64 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]] +; CHECK: [[PRED_SDIV_CONTINUE4]]: +; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i8> [ poison, %[[PRED_SDIV_CONTINUE2]] ], [ [[TMP14]], %[[PRED_SDIV_IF3]] ] +; CHECK-NEXT: br i1 [[TMP4]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_IF5]]: +; CHECK-NEXT: [[TMP18:%.*]] = sdiv i8 [[TMP12]], [[B]] +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x i8> [[TMP15]], i8 [[TMP18]], i64 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_CONTINUE6]]: +; CHECK-NEXT: [[TMP20:%.*]] = phi <2 x i8> [ [[TMP15]], %[[PRED_SDIV_CONTINUE4]] ], [ [[TMP19]], %[[PRED_SDIV_IF5]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C2]], <2 x i8> zeroinitializer, <2 x i8> [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i8> [[PREDPHI7]], i64 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ [[TMP2]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; CHECK-NEXT: br i1 [[C1]], label %[[MERGE:.*]], label %[[IF1:.*]] +; CHECK: [[IF1]]: +; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 1, [[A]] +; CHECK-NEXT: br label %[[MERGE]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[COND:%.*]] = phi i16 [ [[DIV]], %[[IF1]] ], [ 0, %[[LOOP]] ] +; CHECK-NEXT: br i1 [[C2]], label %[[LATCH]], label %[[IF2:.*]] +; CHECK: [[IF2]]: +; CHECK-NEXT: [[T:%.*]] = trunc i16 [[COND]] to i8 +; CHECK-NEXT: [[DIV2:%.*]] = sdiv i8 [[T]], [[B]] +; CHECK-NEXT: br label %[[LATCH]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[DIV2]], %[[IF2]] ], [ 0, %[[MERGE]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i16 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i16 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LATCH]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i8 [[RES_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i16 [ 0, %entry ], [ %iv.next, %latch ] + br i1 %c1, label %merge, label %if1 + +if1: + %div = sdiv i16 1, %a + br label %merge + +merge: + %cond = phi i16 [ %div, %if1 ], [ 0, %loop ] + br i1 %c2, label %latch, label %if2 + +if2: + %t = trunc i16 %cond to i8 + %div2 = sdiv i8 %t, %b + br label %latch + +latch: + %res = phi i8 [ %div2, %if2 ], [ 0, %merge ] + %iv.next = add i16 %iv, 1 + %ec = icmp eq i16 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret i8 %res +} + +define i8 @predicated_replicate_feeding_cast_non_uniform(i64 %n, i1 %c1, i1 %c2, ptr %a.ptr, i8 %b) { +; CHECK-LABEL: define i8 @predicated_replicate_feeding_cast_non_uniform( +; CHECK-SAME: i64 [[N:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]], ptr [[A_PTR:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 2 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[C1]], true +; CHECK-NEXT: [[TMP2:%.*]] = xor i1 [[C2]], true +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_SDIV_CONTINUE6:.*]] ] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i16, ptr [[A_PTR]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i16>, ptr [[TMP3]], align 2 +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_SDIV_IF:.*]], label %[[PRED_SDIV_CONTINUE:.*]] +; CHECK: [[PRED_SDIV_IF]]: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i64 0 +; CHECK-NEXT: [[TMP5:%.*]] = sdiv i16 1, [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i16> poison, i16 [[TMP5]], i64 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE]] +; CHECK: [[PRED_SDIV_CONTINUE]]: +; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i16> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_SDIV_IF]] ] +; CHECK-NEXT: br i1 [[TMP1]], label %[[PRED_SDIV_IF1:.*]], label %[[PRED_SDIV_CONTINUE2:.*]] +; CHECK: [[PRED_SDIV_IF1]]: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i16> [[WIDE_LOAD]], i64 1 +; CHECK-NEXT: [[TMP9:%.*]] = sdiv i16 1, [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x i16> [[TMP7]], i16 [[TMP9]], i64 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE2]] +; CHECK: [[PRED_SDIV_CONTINUE2]]: +; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i16> [ [[TMP7]], %[[PRED_SDIV_CONTINUE]] ], [ [[TMP10]], %[[PRED_SDIV_IF1]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C1]], <2 x i16> zeroinitializer, <2 x i16> [[TMP11]] +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF3:.*]], label %[[PRED_SDIV_CONTINUE4:.*]] +; CHECK: [[PRED_SDIV_IF3]]: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i16> [[PREDPHI]], i64 0 +; CHECK-NEXT: [[TMP13:%.*]] = trunc i16 [[TMP12]] to i8 +; CHECK-NEXT: [[TMP14:%.*]] = sdiv i8 [[TMP13]], [[B]] +; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x i8> poison, i8 [[TMP14]], i64 0 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE4]] +; CHECK: [[PRED_SDIV_CONTINUE4]]: +; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i8> [ poison, %[[PRED_SDIV_CONTINUE2]] ], [ [[TMP15]], %[[PRED_SDIV_IF3]] ] +; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_SDIV_IF5:.*]], label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_IF5]]: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <2 x i16> [[PREDPHI]], i64 1 +; CHECK-NEXT: [[TMP18:%.*]] = trunc i16 [[TMP17]] to i8 +; CHECK-NEXT: [[TMP19:%.*]] = sdiv i8 [[TMP18]], [[B]] +; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x i8> [[TMP16]], i8 [[TMP19]], i64 1 +; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE6]] +; CHECK: [[PRED_SDIV_CONTINUE6]]: +; CHECK-NEXT: [[TMP21:%.*]] = phi <2 x i8> [ [[TMP16]], %[[PRED_SDIV_CONTINUE4]] ], [ [[TMP20]], %[[PRED_SDIV_IF5]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[PREDPHI7:%.*]] = select i1 [[C2]], <2 x i8> zeroinitializer, <2 x i8> [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i8> [[PREDPHI7]], i64 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LATCH:.*]] ] +; CHECK-NEXT: [[A_GEP:%.*]] = getelementptr i16, ptr [[A_PTR]], i64 [[IV]] +; CHECK-NEXT: [[A:%.*]] = load i16, ptr [[A_GEP]], align 2 +; CHECK-NEXT: br i1 [[C1]], label %[[MERGE:.*]], label %[[IF1:.*]] +; CHECK: [[IF1]]: +; CHECK-NEXT: [[DIV:%.*]] = sdiv i16 1, [[A]] +; CHECK-NEXT: br label %[[MERGE]] +; CHECK: [[MERGE]]: +; CHECK-NEXT: [[COND:%.*]] = phi i16 [ [[DIV]], %[[IF1]] ], [ 0, %[[LOOP]] ] +; CHECK-NEXT: br i1 [[C2]], label %[[LATCH]], label %[[IF2:.*]] +; CHECK: [[IF2]]: +; CHECK-NEXT: [[T:%.*]] = trunc i16 [[COND]] to i8 +; CHECK-NEXT: [[DIV2:%.*]] = sdiv i8 [[T]], [[B]] +; CHECK-NEXT: br label %[[LATCH]] +; CHECK: [[LATCH]]: +; CHECK-NEXT: [[RES:%.*]] = phi i8 [ [[DIV2]], %[[IF2]] ], [ 0, %[[MERGE]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i8 [ [[RES]], %[[LATCH]] ], [ [[TMP23]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i8 [[RES_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %a.gep = getelementptr i16, ptr %a.ptr, i64 %iv + %a = load i16, ptr %a.gep, align 2 + br i1 %c1, label %merge, label %if1 + +if1: + %div = sdiv i16 1, %a + br label %merge + +merge: + %cond = phi i16 [ %div, %if1 ], [ 0, %loop ] + br i1 %c2, label %latch, label %if2 + +if2: + %t = trunc i16 %cond to i8 + %div2 = sdiv i8 %t, %b + br label %latch + +latch: + %res = phi i8 [ %div2, %if2 ], [ 0, %merge ] + %iv.next = add i64 %iv, 1 + %ec = icmp eq i64 %iv, %n + br i1 %ec, label %exit, label %loop + +exit: + ret i8 %res +} From 35ca78a39712de33131ea0b764da8e674878dabf Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Mon, 22 Jun 2026 13:41:46 -0700 Subject: [PATCH 082/511] [libunwind] Fix an uninitialized read of __ra_sign_state (#205152) The Arm DWARF spec defines UNW_AARCH64_RA_SIGN_STATE as being zeroed until the first .cfi_negate_ra_state / .cfi_set_ra_state [1]. The GPRs struct containing __ra_sign_state is memcpy'd directly from the unw_context_t, which in turn is initialized by __unw_getcontext. Since it is a pseudo register, there is no corresponding state to restore in __unw_resume. https://github.com/ARM-software/abi-aa/blob/main/aadwarf64/aadwarf64.rst#44call-frame-instructions --- libunwind/src/UnwindRegistersSave.S | 1 + 1 file changed, 1 insertion(+) diff --git a/libunwind/src/UnwindRegistersSave.S b/libunwind/src/UnwindRegistersSave.S index ca9a97b18e764..a82ebd1ead23b 100644 --- a/libunwind/src/UnwindRegistersSave.S +++ b/libunwind/src/UnwindRegistersSave.S @@ -807,6 +807,7 @@ DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) mov x1,sp str x1, [x0, #0x0F8] str x30, [x0, #0x100] // store return address as pc + str xzr, [x0, #0x108] // zero __ra_sign_state // skip cpsr #if defined(__ARM_FP) && __ARM_FP != 0 stp d0, d1, [x0, #0x110] From 387872583345f439bf7342b8081f7f0c40972b99 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 22 Jun 2026 13:44:08 -0700 Subject: [PATCH 083/511] [lldb][NFC] FileSpec::GetFileNameStrippingExtension should return a StringRef (#205178) No need to create a ConstString of a filename without its file extension. --- lldb/include/lldb/Utility/FileSpec.h | 6 +++--- lldb/source/Core/ModuleList.cpp | 8 +++----- lldb/source/Core/PluginManager.cpp | 3 +-- .../Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp | 9 ++++----- lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 2 +- lldb/source/Target/Platform.cpp | 2 +- lldb/source/Utility/FileSpec.cpp | 4 ++-- 7 files changed, 15 insertions(+), 19 deletions(-) diff --git a/lldb/include/lldb/Utility/FileSpec.h b/lldb/include/lldb/Utility/FileSpec.h index 0a3ac62ae8d07..3cdf340cd8034 100644 --- a/lldb/include/lldb/Utility/FileSpec.h +++ b/lldb/include/lldb/Utility/FileSpec.h @@ -336,12 +336,12 @@ class FileSpec { /// Return the filename without the extension part /// - /// Returns a ConstString that represents the filename of this object + /// Returns a StringRef that represents the filename of this object /// without the extension part (e.g. for a file named "foo.bar", "foo" is /// returned) /// - /// \return Returns the filename without extension as a ConstString object. - ConstString GetFileNameStrippingExtension() const; + /// \return Returns the filename without extension as a StringRef object. + llvm::StringRef GetFileNameStrippingExtension() const; /// Get the memory cost of this object. /// diff --git a/lldb/source/Core/ModuleList.cpp b/lldb/source/Core/ModuleList.cpp index d3c52cfe7d8f3..6a22320e4d97b 100644 --- a/lldb/source/Core/ModuleList.cpp +++ b/lldb/source/Core/ModuleList.cpp @@ -1442,12 +1442,10 @@ bool ModuleList::LoadScriptingResourcesInTarget(Target *target, Status error; if (!LoadScriptingResourceInTargetForModule(*module, *target, error)) { if (error.Fail() && error.AsCString()) { - error = Status::FromErrorStringWithFormat( + error = Status::FromErrorStringWithFormatv( "unable to load scripting data for " - "module %s - error reported was %s", - module->GetFileSpec() - .GetFileNameStrippingExtension() - .GetCString(), + "module {0} - error reported was {1}", + module->GetFileSpec().GetFileNameStrippingExtension(), error.AsCString()); errors.push_back(std::move(error)); if (!continue_on_error) diff --git a/lldb/source/Core/PluginManager.cpp b/lldb/source/Core/PluginManager.cpp index d1a2f41ca99a2..1b13279940d26 100644 --- a/lldb/source/Core/PluginManager.cpp +++ b/lldb/source/Core/PluginManager.cpp @@ -171,8 +171,7 @@ llvm::Expected PluginInfo::Create(const FileSpec &path) { // Look for files that follow the convention ., in // which case we need to call lldb_initialize_ and // lldb_terminate_. - llvm::StringRef file_name = - path.GetFileNameStrippingExtension().GetStringRef(); + llvm::StringRef file_name = path.GetFileNameStrippingExtension(); if (file_name.starts_with(g_plugin_prefix)) { llvm::StringRef plugin_name = file_name.substr(g_plugin_prefix.size()); std::string init_symbol = diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp index 2b8ef1c9f23cf..aaa56d55ba903 100644 --- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp +++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwinKernel.cpp @@ -621,11 +621,11 @@ bool PlatformDarwinKernel::KextHasdSYMSibling( // CFBundleCopyExecutableURL // Look for a deep bundle foramt - ConstString executable_name = + llvm::StringRef executable_name = kext_bundle_filepath.GetFileNameStrippingExtension(); std::string deep_bundle_str = kext_bundle_filepath.GetPath() + "/Contents/MacOS/"; - deep_bundle_str += executable_name.GetStringRef(); + deep_bundle_str += executable_name; deep_bundle_str += ".dSYM"; dsym_fspec.SetFile(deep_bundle_str, FileSpec::Style::native); FileSystem::Instance().Resolve(dsym_fspec); @@ -636,7 +636,7 @@ bool PlatformDarwinKernel::KextHasdSYMSibling( // look for a shallow bundle format // std::string shallow_bundle_str = kext_bundle_filepath.GetPath() + "/"; - shallow_bundle_str += executable_name.GetStringRef(); + shallow_bundle_str += executable_name; shallow_bundle_str += ".dSYM"; dsym_fspec.SetFile(shallow_bundle_str, FileSpec::Style::native); FileSystem::Instance().Resolve(dsym_fspec); @@ -696,8 +696,7 @@ PlatformDarwinKernel::GetDWARFBinaryInDSYMBundle(const FileSpec &dsym_bundle) { return results; } // Drop the '.dSYM' from the filename - std::string filename = - dsym_bundle.GetFileNameStrippingExtension().GetCString(); + llvm::StringRef filename = dsym_bundle.GetFileNameStrippingExtension(); std::string dirname = dsym_bundle.GetDirectory().GetCString(); std::string binary_filepath = dsym_bundle.GetPath(); diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index c81fd1c83be85..56fbf3fd771b5 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4512,7 +4512,7 @@ const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { // If we don't have a separate debug info file, then try stripping the // extension. The main module could be "a.debug" and the .dwp file could // be "a.dwp" instead of "a.debug.dwp". - ConstString filename_no_ext = + llvm::StringRef filename_no_ext = module_fspec.GetFileNameStrippingExtension(); if (filename_no_ext != module_fspec.GetFilename()) { FileSpec module_spec_no_ext(module_fspec); diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp index 1c04cbbabda03..78119936edf69 100644 --- a/lldb/source/Target/Platform.cpp +++ b/lldb/source/Target/Platform.cpp @@ -189,7 +189,7 @@ Platform::LocateExecutableScriptingResourcesFromSafePaths( target.GetDebugger() .GetScriptInterpreter() ->GetSanitizedScriptingModuleName( - module_spec.GetFileNameStrippingExtension().GetStringRef()); + module_spec.GetFileNameStrippingExtension()); FileSpecList paths = target.GetSafeAutoLoadPaths(); diff --git a/lldb/source/Utility/FileSpec.cpp b/lldb/source/Utility/FileSpec.cpp index 01b83f3e4c684..0498dddb37458 100644 --- a/lldb/source/Utility/FileSpec.cpp +++ b/lldb/source/Utility/FileSpec.cpp @@ -407,8 +407,8 @@ llvm::StringRef FileSpec::GetFileNameExtension() const { return llvm::sys::path::extension(m_filename.GetStringRef(), m_style); } -ConstString FileSpec::GetFileNameStrippingExtension() const { - return ConstString(llvm::sys::path::stem(m_filename.GetStringRef(), m_style)); +llvm::StringRef FileSpec::GetFileNameStrippingExtension() const { + return llvm::sys::path::stem(m_filename.GetStringRef(), m_style); } // Return the size in bytes that this object takes in memory. This returns the From 7e5684882a34985a63e75f7f5895d8f5db7b5bf6 Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Mon, 22 Jun 2026 13:45:07 -0700 Subject: [PATCH 084/511] [SSAF] Let UnsafeBufferUsageExtractor & PointerFlowExtractor ignore templates (#198927) Templates are ignored for two reasons: - Template instantiations are still handled. Template facts can be inferred from their instantiations. - Templates are inherently difficult to reason about. Their ASTs can contain dependent expression types (such as ParenListExpr) that complicate analysis. --- .../PointerFlow/PointerFlowExtractor.cpp | 6 ++ .../Analyses/SSAFAnalysesCommon.cpp | 5 ++ .../UnsafeBufferUsageExtractor.cpp | 6 ++ .../Analyses/PointerFlow/PointerFlowTest.cpp | 72 ++++++++++++++++++- .../UnsafeBufferUsageTest.cpp | 65 +++++++++++++++++ .../FindDecl.h | 6 +- 6 files changed, 158 insertions(+), 2 deletions(-) diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index 39cee76440de4..38e3e3ec3ab9e 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -336,6 +336,12 @@ class PointerFlowTUSummaryExtractor : public TUSummaryExtractor { findContributors(Ctx, Contributors); for (auto *CD : Contributors) { + // Templates are skipped, but their instantiations are handled. The idea + // is that we can conclude facts about a template through all of its + // instantiations. + if (CD->isTemplated()) + continue; + auto EntitySummary = extractEntitySummary(CD, Ctx, *this); assert(EntitySummary); diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp index 68a7374639300..660bc424fb32f 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp @@ -34,6 +34,11 @@ class ContributorFinder : public DynamicRecursiveASTVisitor { public: std::set Contributors; + ContributorFinder() { + ShouldVisitTemplateInstantiations = true; + ShouldVisitImplicitCode = false; + } + bool VisitFunctionDecl(FunctionDecl *D) override { Contributors.insert(D); return true; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp index be5e1f58fc019..f4067e5f315ff 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp @@ -74,6 +74,12 @@ void clang::ssaf::UnsafeBufferUsageTUSummaryExtractor::HandleTranslationUnit( findContributors(Ctx, Contributors); for (auto *CD : Contributors) { + // Templates are skipped, but their instantiations are handled. The idea + // is that we can conclude facts about a template through all of its + // instantiations. + if (CD->isTemplated()) + continue; + auto EntitySummary = extractEntitySummary(CD, Ctx); assert(EntitySummary); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp index ad1a654c5b241..44465a59d4cfd 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp @@ -104,9 +104,13 @@ const SomeDecl *findEntityByName(FindEntityByName Name, ASTContext &Ctx) { const SomeDecl *FoundDecl = nullptr; NamedDeclFinder(FindEntityByName SearchingName) - : SearchingName(SearchingName) {} + : SearchingName(SearchingName) { + ShouldVisitTemplateInstantiations = true; + } bool VisitDecl(Decl *D) override { + if (D->isTemplated()) + return true; if (auto *ND = dyn_cast(D)) { FoundDecl = llvm::dyn_cast_or_null( matchNamedDeclByFindEntityByName(SearchingName, ND)); @@ -1265,6 +1269,72 @@ TEST_F(PointerFlowTest, CXXConstructExprArrayInit) { EXPECT_EQ(*Sum, makeEdges(__LINE__, {{{"q", 1U}, {"arr", 1U}}})); } +////////////////////////////////////////////////////////////// +// Template is ignored but instantiations are visited. // +////////////////////////////////////////////////////////////// +TEST_F(PointerFlowTest, FunctionTemplate) { + ASSERT_TRUE(setUpTest(R"cpp( + template + T* f(T *p) { + int *q = p; + return q; + } + )cpp")); + ASSERT_FALSE(findEntityByName("f", AST->getASTContext())); +} + +TEST_F(PointerFlowTest, MethodInClassTemplate) { + ASSERT_TRUE(setUpTest(R"cpp( + template + struct Wrapper { + T *ptr; + void set(T *p) { ptr = p; } + }; + )cpp")); + ASSERT_FALSE(findEntityByName("set", AST->getASTContext())); +} + +TEST_F(PointerFlowTest, FunctionTemplateInstantiation) { + ASSERT_TRUE(setUpTest(R"cpp( + template + T* f(T *p) { + int *q = p; + return q; + } + + void test(int *p) { + f(p); + } + )cpp")); + + auto *Sum = getEntitySummary("f"); + + ASSERT_TRUE(Sum); + ASSERT_EQ(*Sum, makeEdges(__LINE__, {{{"q", 1U}, {"p", 1U}}, + {{"f", 1U, 1}, {"q", 1U}}})); +} + +TEST_F(PointerFlowTest, MethodInClassTemplateInstantiation) { + ASSERT_TRUE(setUpTest(R"cpp( + template + struct Wrapper { + T *ptr; + void set(T *p) { ptr = p; } + }; + + void test(int *p) { + Wrapper W; + + W.set(p); + } + )cpp")); + + auto *Sum = getEntitySummary("set"); + + ASSERT_TRUE(Sum); + ASSERT_EQ(*Sum, makeEdges(__LINE__, {{{"ptr", 1U}, {"p", 1U}}})); +} + ////////////////////////////////////////////////////////////// // Robustness Tests (No Crash Tests) // ////////////////////////////////////////////////////////////// diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp index a47d802a9f48b..c2354e0c1ac1b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp @@ -688,6 +688,71 @@ TEST_F(UnsafeBufferUsageTest, CXXScalarValueInitExpr) { EXPECT_EQ(*Sum, makeSet(__LINE__, {{"q", 1U}})); } +////////////////////////////////////////////////////////////// +// Template is ignored but instantiations are visited. // +////////////////////////////////////////////////////////////// + +TEST_F(UnsafeBufferUsageTest, FunctionTemplate) { + ASSERT_TRUE(setUpTest(R"cpp( + template + T* f(T *p) { + return &p[5]; + } + )cpp")); + ASSERT_FALSE(findDeclByName("f", AST->getASTContext())); +} + +TEST_F(UnsafeBufferUsageTest, MethodInClassTemplate) { + ASSERT_TRUE(setUpTest(R"cpp( + template + struct Wrapper { + T *ptr; + void set(T *p) { ptr = p[5]; } + }; + )cpp")); + ASSERT_FALSE(findDeclByName("set", AST->getASTContext())); +} + +TEST_F(UnsafeBufferUsageTest, FunctionTemplateInstantiation) { + ASSERT_TRUE(setUpTest(R"cpp( + template + void unsafe(T p) { + p[1] = p[2] + p[3]; + } + + void f(int *p) { + unsafe(p); + } + )cpp")); + + auto *Sum = getEntitySummary("unsafe"); + + ASSERT_TRUE(Sum); + ASSERT_EQ(*Sum, makeSet(__LINE__, {{"p", 1U}})); +} + +TEST_F(UnsafeBufferUsageTest, MethodInClassTemplateInstantiation) { + ASSERT_TRUE(setUpTest(R"cpp( + template + struct UnsafeClass { + T p; + void unsafe_method() { + p[1] = p[2] + p[3]; + } + }; + + void f(int *p) { + UnsafeClass UC; + + UC.unsafe_method(); + } + )cpp")); + + auto *Sum = getEntitySummary("unsafe_method"); + + ASSERT_TRUE(Sum); + EXPECT_EQ(*Sum, makeSet(__LINE__, {{"p", 1U}})); +} // Robustness test: unsupported constructs will not cause crash #ifndef NDEBUG TEST_F(UnsafeBufferUsageTest, StmtExprArrayAccess) { diff --git a/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h b/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h index 07c28c610380c..5ce50fe538cda 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h +++ b/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h @@ -21,9 +21,13 @@ const SomeDecl *findDeclByName(StringRef Name, ASTContext &Ctx) { StringRef SearchingName; const NamedDecl *FoundDecl = nullptr; - NamedDeclFinder(StringRef SearchingName) : SearchingName(SearchingName) {} + NamedDeclFinder(StringRef SearchingName) : SearchingName(SearchingName) { + ShouldVisitTemplateInstantiations = true; + } bool VisitDecl(Decl *D) override { + if (D->isTemplated()) + return true; if (const auto *ND = dyn_cast(D)) { if (ND->getNameAsString() == SearchingName) { FoundDecl = ND; From 31a324a7e8b856fc07fbe13633b6cd170b91459f Mon Sep 17 00:00:00 2001 From: YongKang Zhu Date: Mon, 22 Jun 2026 13:47:07 -0700 Subject: [PATCH 085/511] [BOLT] Route alignment options through BinaryContext (NFC) (#204902) Consolidate all alignment-related `cl::opt` definitions into `CommandLineOpts.cpp`, expose matching public members on `BinaryContext`, and populate them from `RewriteInstance::adjustCommandLineOptions` (and mirrored in `MachORewriteInstance`). Switch all readers in Aligner, BinaryEmitter, LongJmp, BinaryFunction and the use-old-text logs to `BC.*` instead of `opts::*`. --- bolt/include/bolt/Core/BinaryContext.h | 16 ++++++ bolt/include/bolt/Utils/CommandLineOpts.h | 8 +++ bolt/lib/Core/BinaryEmitter.cpp | 20 ++----- bolt/lib/Core/BinaryFunction.cpp | 9 +--- bolt/lib/Passes/Aligner.cpp | 66 +++++------------------ bolt/lib/Passes/LongJmp.cpp | 13 ++--- bolt/lib/Rewrite/MachORewriteInstance.cpp | 14 ++++- bolt/lib/Rewrite/RewriteInstance.cpp | 17 +++++- bolt/lib/Utils/CommandLineOpts.cpp | 42 +++++++++++++++ 9 files changed, 120 insertions(+), 85 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryContext.h b/bolt/include/bolt/Core/BinaryContext.h index 3362b74e3f303..5785941d54320 100644 --- a/bolt/include/bolt/Core/BinaryContext.h +++ b/bolt/include/bolt/Core/BinaryContext.h @@ -816,6 +816,22 @@ class BinaryContext { std::atomic MaxMainCodeAlignment{1}; std::atomic MaxColdCodeAlignment{1}; + /// Alignment-related options sourced from CommandLineOpts. Populated by + /// RewriteInstance::adjustCommandLineOptions() so passes and the emitter + /// can read them via BinaryContext instead of touching opts::* directly. + /// Defaults must stay in sync with the cl::init values in + /// bolt/lib/Utils/CommandLineOpts.cpp. + unsigned AlignText{0}; + unsigned AlignFunctions{64}; + unsigned AlignBlocksMinSize{0}; + unsigned AlignBlocksThreshold{800}; + unsigned AlignFunctionsMaxBytes{32}; + unsigned BlockAlignment{16}; + bool AlignBlocks{false}; + bool PreserveBlocksAlignment{false}; + bool UseCompactAligner{true}; + bool X86AlignBranchBoundaryHotOnly{true}; + /// Fold \p Alignment into the running max for the main code section (when /// \p InMainSection) and/or the cold code section (when \p InColdSection), /// reflecting which output section(s) the object is emitted into. Safe to diff --git a/bolt/include/bolt/Utils/CommandLineOpts.h b/bolt/include/bolt/Utils/CommandLineOpts.h index 994e352e16218..e11b18d3489cf 100644 --- a/bolt/include/bolt/Utils/CommandLineOpts.h +++ b/bolt/include/bolt/Utils/CommandLineOpts.h @@ -72,6 +72,14 @@ extern llvm::cl::OptionCategory BinaryAnalysisCategory; extern llvm::cl::opt AlignText; extern llvm::cl::opt AlignFunctions; +extern llvm::cl::opt AlignBlocks; +extern llvm::cl::opt AlignBlocksMinSize; +extern llvm::cl::opt AlignBlocksThreshold; +extern llvm::cl::opt AlignFunctionsMaxBytes; +extern llvm::cl::opt BlockAlignment; +extern llvm::cl::opt PreserveBlocksAlignment; +extern llvm::cl::opt UseCompactAligner; +extern llvm::cl::opt X86AlignBranchBoundaryHotOnly; extern llvm::cl::opt AggregateOnly; extern llvm::cl::opt ArmSPE; extern llvm::cl::opt BucketsPerLine; diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp index 43b42703c86d7..a555c7185448e 100644 --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -33,10 +33,6 @@ using namespace bolt; namespace opts { extern cl::opt JumpTables; -extern cl::opt PreserveBlocksAlignment; - -cl::opt AlignBlocks("align-blocks", cl::desc("align basic blocks"), - cl::cat(BoltOptCategory)); static cl::list BreakFunctionNames("break-funcs", @@ -68,12 +64,6 @@ static cl::opt PrintJumpTables("print-jump-tables", cl::desc("print jump tables"), cl::Hidden, cl::cat(BoltCategory)); -static cl::opt -X86AlignBranchBoundaryHotOnly("x86-align-branch-boundary-hot-only", - cl::desc("only apply branch boundary alignment in hot code"), - cl::init(true), - cl::cat(BoltOptCategory)); - size_t padFunction(std::map &FunctionPadding, const cl::list &Spec, const BinaryFunction &Function) { @@ -214,7 +204,7 @@ void BinaryEmitter::emitAll(StringRef OrgSecPrefix) { if (RuntimeLibrary *RtLibrary = BC.getRuntimeLibrary()) RtLibrary->emitBinary(BC, Streamer); - BC.getTextSection()->setAlignment(Align(opts::AlignText)); + BC.getTextSection()->setAlignment(Align(BC.AlignText)); emitFunctions(); @@ -246,7 +236,7 @@ void BinaryEmitter::emitFunctions() { bool Emitted = false; // Turn off Intel JCC Erratum mitigation for cold code if requested - if (HasProfile && opts::X86AlignBranchBoundaryHotOnly && + if (HasProfile && BC.X86AlignBranchBoundaryHotOnly && !Function->hasValidProfile()) Streamer.setAllowAutoPadding(false); @@ -254,7 +244,7 @@ void BinaryEmitter::emitFunctions() { Emitted |= emitFunction(*Function, Layout.getMainFragment()); if (Function->isSplit()) { - if (opts::X86AlignBranchBoundaryHotOnly) + if (BC.X86AlignBranchBoundaryHotOnly) Streamer.setAllowAutoPadding(false); assert((Layout.fragment_size() == 1 || Function->isSimple()) && @@ -317,7 +307,7 @@ bool BinaryEmitter::emitFunction(BinaryFunction &Function, // Set section alignment to at least maximum possible object alignment. // We need this to support LongJmp and other passes that calculates // tentative layout. - Section->ensureMinAlignment(Align(opts::AlignFunctions)); + Section->ensureMinAlignment(Align(BC.AlignFunctions)); Streamer.emitCodeAlignment(Function.getMinAlign(), *BC.STI); uint16_t MaxAlignBytes = FF.isSplitFragment() @@ -457,7 +447,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF, // Track the first emitted instruction with debug info. bool FirstInstr = true; for (BinaryBasicBlock *const BB : FF) { - if ((opts::AlignBlocks || opts::PreserveBlocksAlignment) && + if ((BC.AlignBlocks || BC.PreserveBlocksAlignment) && BB->getAlignment() > 1) Streamer.emitCodeAlignment(BB->getAlign(), *BC.STI, BB->getAlignmentMaxBytes()); diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 0e538fa48907a..79e92b79f6fee 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -111,11 +111,6 @@ static cl::opt NoScan( "slower binary)"), cl::Hidden, cl::cat(BoltOptCategory)); -cl::opt - PreserveBlocksAlignment("preserve-blocks-alignment", - cl::desc("try to preserve basic block alignment"), - cl::cat(BoltOptCategory)); - static cl::opt PrintOutputAddressRange( "print-output-address-range", cl::desc( @@ -2351,7 +2346,7 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { // Always create new BB at branch destination. PrevBB = InsertBB ? InsertBB : PrevBB; InsertBB = addBasicBlockAt(LI->first, LI->second); - if (opts::PreserveBlocksAlignment && IsLastInstrNop) + if (BC.PreserveBlocksAlignment && IsLastInstrNop) InsertBB->setDerivedAlignment(); if (PrevBB) @@ -2388,7 +2383,7 @@ Error BinaryFunction::buildCFG(MCPlusBuilder::AllocatorIdTy AllocatorId) { Label = BC.Ctx->createNamedTempSymbol("FT"); } InsertBB = addBasicBlockAt(Offset, Label); - if (opts::PreserveBlocksAlignment && IsLastInstrNop) + if (BC.PreserveBlocksAlignment && IsLastInstrNop) InsertBB->setDerivedAlignment(); updateOffset(LastInstrOffset); } diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp index 3157af1fc5530..798148491abef 100644 --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -17,54 +17,16 @@ using namespace llvm; -namespace opts { - -extern cl::OptionCategory BoltOptCategory; - -extern cl::opt AlignBlocks; -extern cl::opt PreserveBlocksAlignment; -extern cl::opt AlignFunctions; - -static cl::opt AlignBlocksMinSize( - "align-blocks-min-size", - cl::desc("minimal size of the basic block that should be aligned"), - cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); - -static cl::opt AlignBlocksThreshold( - "align-blocks-threshold", - cl::desc( - "align only blocks with frequency larger than containing function " - "execution frequency specified in percent. E.g. 1000 means aligning " - "blocks that are 10 times more frequently executed than the " - "containing function."), - cl::init(800), cl::Hidden, cl::cat(BoltOptCategory)); - -static cl::opt AlignFunctionsMaxBytes( - "align-functions-max-bytes", - cl::desc("maximum number of bytes to use to align functions"), cl::init(32), - cl::cat(BoltOptCategory)); - -static cl::opt - BlockAlignment("block-alignment", - cl::desc("boundary to use for alignment of basic blocks"), - cl::init(16), cl::ZeroOrMore, cl::cat(BoltOptCategory)); - -static cl::opt - UseCompactAligner("use-compact-aligner", - cl::desc("Use compact approach for aligning functions"), - cl::init(true), cl::cat(BoltOptCategory)); - -} // end namespace opts - namespace llvm { namespace bolt { // Align function to the specified byte-boundary (typically, 64) offsetting // the function by not more than the corresponding value static void alignMaxBytes(BinaryFunction &Function) { - Function.setAlignment(opts::AlignFunctions); - Function.setMaxAlignmentBytes(opts::AlignFunctionsMaxBytes); - Function.setMaxColdAlignmentBytes(opts::AlignFunctionsMaxBytes); + const BinaryContext &BC = Function.getBinaryContext(); + Function.setAlignment(BC.AlignFunctions); + Function.setMaxAlignmentBytes(BC.AlignFunctionsMaxBytes); + Function.setMaxColdAlignmentBytes(BC.AlignFunctionsMaxBytes); } // Align function to the specified byte-boundary (typically, 64) offsetting @@ -90,16 +52,16 @@ static void alignCompact(BinaryFunction &Function, else HotSize += BC.computeCodeSize(BB.begin(), BB.end(), Emitter); - Function.setAlignment(opts::AlignFunctions); + Function.setAlignment(BC.AlignFunctions); if (HotSize > 0) Function.setMaxAlignmentBytes( - std::min(size_t(opts::AlignFunctionsMaxBytes), HotSize)); + std::min(size_t(BC.AlignFunctionsMaxBytes), HotSize)); // using the same option, max-align-bytes, both for cold and hot parts of the // functions, as aligning cold functions typically does not affect performance if (ColdSize > 0) Function.setMaxColdAlignmentBytes( - std::min(size_t(opts::AlignFunctionsMaxBytes), ColdSize)); + std::min(size_t(BC.AlignFunctionsMaxBytes), ColdSize)); } void AlignerPass::alignBlocks(BinaryFunction &Function, @@ -115,7 +77,7 @@ void AlignerPass::alignBlocks(BinaryFunction &Function, for (BinaryBasicBlock *BB : Function.getLayout().blocks()) { uint64_t Count = BB->getKnownExecutionCount(); - if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { + if (Count <= FuncCount * BC.AlignBlocksThreshold / 100) { PrevBB = BB; continue; } @@ -132,12 +94,12 @@ void AlignerPass::alignBlocks(BinaryFunction &Function, const uint64_t BlockSize = BC.computeCodeSize(BB->begin(), BB->end(), Emitter); const uint64_t BytesToUse = - std::min(opts::BlockAlignment - 1, BlockSize); + std::min(BC.BlockAlignment - 1, BlockSize); - if (opts::AlignBlocksMinSize && BlockSize < opts::AlignBlocksMinSize) + if (BC.AlignBlocksMinSize && BlockSize < BC.AlignBlocksMinSize) continue; - BB->setAlignment(opts::BlockAlignment); + BB->setAlignment(BC.BlockAlignment); BB->setAlignmentMaxBytes(BytesToUse); // Update stats. @@ -153,14 +115,14 @@ Error AlignerPass::runOnFunctions(BinaryContext &BC) { if (!BC.HasRelocations) return Error::success(); - AlignHistogram.resize(opts::BlockAlignment); + AlignHistogram.resize(BC.BlockAlignment); ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { // Create a separate MCCodeEmitter to allow lock free execution BinaryContext::IndependentCodeEmitter Emitter = BC.createIndependentMCCodeEmitter(); - if (opts::UseCompactAligner) + if (BC.UseCompactAligner) alignCompact(BF, Emitter.MCE.get()); else alignMaxBytes(BF); @@ -185,7 +147,7 @@ Error AlignerPass::runOnFunctions(BinaryContext &BC) { BC.getColdCodeSectionName(); BC.updateMaxCodeAlignment(Align, InMainSection, InColdSection); - if (opts::AlignBlocks && !opts::PreserveBlocksAlignment) + if (BC.AlignBlocks && !BC.PreserveBlocksAlignment) alignBlocks(BF, Emitter.MCE.get()); }; diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp index 63368443c46b8..b771e6a8b120a 100644 --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -22,8 +22,6 @@ using namespace llvm; namespace opts { extern cl::OptionCategory BoltCategory; extern cl::OptionCategory BoltOptCategory; -extern llvm::cl::opt AlignText; -extern cl::opt AlignFunctions; extern cl::opt UseOldText; extern cl::opt HotFunctionsAtEnd; @@ -318,7 +316,7 @@ uint64_t LongJmpPass::tentativeLayoutRelocColdPart( const BinaryContext &BC, BinaryFunctionListType &SortedFunctions, uint64_t DotAddress) { DotAddress = - alignTo(DotAddress, std::max(opts::AlignFunctions, + alignTo(DotAddress, std::max(BC.AlignFunctions, BC.MaxColdCodeAlignment.load())); for (BinaryFunction *Func : SortedFunctions) { if (!Func->isSplit()) @@ -375,11 +373,11 @@ LongJmpPass::tentativeLayoutRelocMode(const BinaryContext &BC, // after the last non-cold section. Account for it before assigning cold // fragment addresses so range checks see the hot-to-cold gap. if (opts::Hugify && !BC.HasFixedLoadAddress && !opts::HotFunctionsAtEnd) - DotAddress = alignTo(DotAddress, opts::AlignText); + DotAddress = alignTo(DotAddress, BC.AlignText); DotAddress = tentativeLayoutRelocColdPart(BC, SortedFunctions, DotAddress); ColdLayoutDone = true; if (opts::HotFunctionsAtEnd) - DotAddress = alignTo(DotAddress, opts::AlignText); + DotAddress = alignTo(DotAddress, BC.AlignText); }; for (BinaryFunction *Func : SortedFunctions) { if (!BC.shouldEmit(*Func)) { @@ -446,8 +444,7 @@ void LongJmpPass::tentativeLayout(const BinaryContext &BC, // Initial padding if (EstimatedTextSize <= BC.OldTextSectionSize) { DotAddress = BC.OldTextSectionAddress; - uint64_t Pad = - offsetToAlignment(DotAddress, llvm::Align(opts::AlignText)); + uint64_t Pad = offsetToAlignment(DotAddress, llvm::Align(BC.AlignText)); if (Pad + EstimatedTextSize <= BC.OldTextSectionSize) { DotAddress += Pad; } @@ -456,7 +453,7 @@ void LongJmpPass::tentativeLayout(const BinaryContext &BC, if (!EstimatedTextSize || EstimatedTextSize > BC.OldTextSectionSize) { uint64_t TextAlign = - std::max(opts::AlignText, BC.MaxMainCodeAlignment.load()); + std::max(BC.AlignText, BC.MaxMainCodeAlignment.load()); DotAddress = alignTo(BC.LayoutStartAddress, TextAlign); } diff --git a/bolt/lib/Rewrite/MachORewriteInstance.cpp b/bolt/lib/Rewrite/MachORewriteInstance.cpp index 17f726ae13945..8762572c6cf76 100644 --- a/bolt/lib/Rewrite/MachORewriteInstance.cpp +++ b/bolt/lib/Rewrite/MachORewriteInstance.cpp @@ -32,7 +32,6 @@ namespace opts { using namespace llvm; -extern cl::opt AlignText; // FIXME! Upstream change // extern cl::opt CheckOverlappingElements; extern cl::opt Instrument; @@ -557,6 +556,19 @@ void MachORewriteInstance::adjustCommandLineOptions() { opts::JumpTables = JTS_MOVE; opts::InstrumentCalls = false; opts::RuntimeInstrumentationLib = "libbolt_rt_instr_osx.a"; + + // Mirror alignment-related command line options onto BinaryContext so passes + // and the emitter can read them via BC instead of touching opts::*. + BC->AlignText = opts::AlignText; + BC->AlignFunctions = opts::AlignFunctions; + BC->AlignBlocks = opts::AlignBlocks; + BC->AlignBlocksMinSize = opts::AlignBlocksMinSize; + BC->AlignBlocksThreshold = opts::AlignBlocksThreshold; + BC->AlignFunctionsMaxBytes = opts::AlignFunctionsMaxBytes; + BC->BlockAlignment = opts::BlockAlignment; + BC->PreserveBlocksAlignment = opts::PreserveBlocksAlignment; + BC->UseCompactAligner = opts::UseCompactAligner; + BC->X86AlignBranchBoundaryHotOnly = opts::X86AlignBranchBoundaryHotOnly; } void MachORewriteInstance::run() { diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 9fcfadbfd752c..73d57c5c95221 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -2447,6 +2447,19 @@ void RewriteInstance::adjustCommandLineOptions() { if (opts::AlignText < opts::AlignFunctions) opts::AlignText = (unsigned)opts::AlignFunctions; + // Mirror alignment-related command line options onto BinaryContext so passes + // and the emitter can read them via BC instead of touching opts::*. + BC->AlignText = opts::AlignText; + BC->AlignFunctions = opts::AlignFunctions; + BC->AlignBlocks = opts::AlignBlocks; + BC->AlignBlocksMinSize = opts::AlignBlocksMinSize; + BC->AlignBlocksThreshold = opts::AlignBlocksThreshold; + BC->AlignFunctionsMaxBytes = opts::AlignFunctionsMaxBytes; + BC->BlockAlignment = opts::BlockAlignment; + BC->PreserveBlocksAlignment = opts::PreserveBlocksAlignment; + BC->UseCompactAligner = opts::UseCompactAligner; + BC->X86AlignBranchBoundaryHotOnly = opts::X86AlignBranchBoundaryHotOnly; + if (BC->isX86() && opts::Lite.getNumOccurrences() == 0 && !opts::StrictMode && !opts::UseOldText) opts::Lite = true; @@ -4323,7 +4336,7 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { const uint64_t CodeSize = EndAddress - StartAddress; if (CodeSize <= BC->OldTextSectionSize) { BC->outs() << "BOLT-INFO: using original .text for new code with 0x" - << Twine::utohexstr(opts::AlignText) << " alignment"; + << Twine::utohexstr(BC->AlignText) << " alignment"; if (StartAddress != BC->OldTextSectionAddress) BC->outs() << " at 0x" << Twine::utohexstr(StartAddress); BC->outs() << '\n'; @@ -4331,7 +4344,7 @@ void RewriteInstance::mapCodeSections(BOLTLinker::SectionMapper MapSection) { } else { BC->errs() << "BOLT-WARNING: --use-old-text failed. The original .text " "too small to fit the new code using 0x" - << Twine::utohexstr(opts::AlignText) << " alignment. " + << Twine::utohexstr(BC->AlignText) << " alignment. " << CodeSize << " bytes needed, have " << BC->OldTextSectionSize << " bytes available. Rebuilding without --use-old-text may " "produce a smaller binary\n"; diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index ee34b7075ee31..20b24c3b4acc5 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -51,6 +51,48 @@ cl::opt AlignFunctions( cl::desc("align functions at a given value (relocation mode)"), cl::init(64), cl::cat(BoltOptCategory)); +cl::opt AlignBlocks("align-blocks", cl::desc("align basic blocks"), + cl::cat(BoltOptCategory)); + +cl::opt AlignBlocksMinSize( + "align-blocks-min-size", + cl::desc("minimal size of the basic block that should be aligned"), + cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory)); + +cl::opt AlignBlocksThreshold( + "align-blocks-threshold", + cl::desc( + "align only blocks with frequency larger than containing function " + "execution frequency specified in percent. E.g. 1000 means aligning " + "blocks that are 10 times more frequently executed than the " + "containing function."), + cl::init(800), cl::Hidden, cl::cat(BoltOptCategory)); + +cl::opt AlignFunctionsMaxBytes( + "align-functions-max-bytes", + cl::desc("maximum number of bytes to use to align functions"), cl::init(32), + cl::cat(BoltOptCategory)); + +cl::opt + BlockAlignment("block-alignment", + cl::desc("boundary to use for alignment of basic blocks"), + cl::init(16), cl::ZeroOrMore, cl::cat(BoltOptCategory)); + +cl::opt + PreserveBlocksAlignment("preserve-blocks-alignment", + cl::desc("try to preserve basic block alignment"), + cl::cat(BoltOptCategory)); + +cl::opt + UseCompactAligner("use-compact-aligner", + cl::desc("Use compact approach for aligning functions"), + cl::init(true), cl::cat(BoltOptCategory)); + +cl::opt X86AlignBranchBoundaryHotOnly( + "x86-align-branch-boundary-hot-only", + cl::desc("only apply branch boundary alignment in hot code"), + cl::init(true), cl::cat(BoltOptCategory)); + cl::opt AggregateOnly("aggregate-only", cl::desc("exit after writing aggregated data file"), From fc9be81ee1d3edb911e8f6a67fedab6af7d95a6b Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 22 Jun 2026 21:48:16 +0100 Subject: [PATCH 086/511] [AArch64][GlobalISel] Update some scalar types to integer in tests. NFC (#205190) --- .../AArch64/GlobalISel/legalize-and.mir | 64 ++-- .../AArch64/GlobalISel/legalize-or.mir | 321 +++++++++--------- .../AArch64/GlobalISel/legalize-xor.mir | 314 ++++++++--------- 3 files changed, 345 insertions(+), 354 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir index 252960568fe94..64dc779687dd5 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-and.mir @@ -79,17 +79,16 @@ tracksRegLiveness: true body: | bb.0: liveins: $x0 - ; CHECK-LABEL: name: test_s318_and ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4611686018427387903 ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i64) = G_AND [[AND]], [[C]] @@ -111,11 +110,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](i64) ; CHECK-NEXT: G_STORE [[AND9]](i64), [[PTR_ADD3]](p0) :: (store (i64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s318) = G_IMPLICIT_DEF - %b:_(s318) = G_IMPLICIT_DEF + %a:_(i318) = G_IMPLICIT_DEF + %b:_(i318) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %and:_(s318) = G_AND %a, %b - G_STORE %and(s318), %ptr(p0) :: (store (s318)) + %and:_(i318) = G_AND %a, %b + G_STORE %and(i318), %ptr(p0) :: (store (i318)) RET_ReallyLR implicit $x0 ... --- @@ -127,39 +126,32 @@ body: | ; CHECK-LABEL: name: test_s158_and ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(i64) = G_AND [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4611686018427387903 - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i64) = G_AND [[AND]], [[C]] - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(i64) = G_AND [[AND1]], [[C]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(i64) = G_AND [[AND2]], [[C]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(i64) = G_AND [[AND3]], [[C]] - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(i64) = G_AND [[AND4]], [[C1]] - ; CHECK-NEXT: G_STORE [[AND5]](i64), %ptr(p0) :: (store (i64), align 64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1073741823 + ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(i64) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(i64) = G_AND [[AND1]], [[C]] + ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(i64) = G_AND [[AND2]], [[C1]] + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY [[AND3]](i64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY [[AND4]](i64) + ; CHECK-NEXT: [[EXTRACT:%[0-9]+]]:_(i32) = G_EXTRACT [[AND5]](i64), 0 + ; CHECK-NEXT: G_STORE [[COPY]](i64), %ptr(p0) :: (store (i64), align 32) ; CHECK-NEXT: [[C2:%[0-9]+]]:_(i64) = G_CONSTANT i64 8 ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C2]](i64) - ; CHECK-NEXT: G_STORE [[AND6]](i64), [[PTR_ADD]](p0) :: (store (i64) into unknown-address + 8) + ; CHECK-NEXT: G_STORE [[COPY1]](i64), [[PTR_ADD]](p0) :: (store (i64) into unknown-address + 8) ; CHECK-NEXT: [[C3:%[0-9]+]]:_(i64) = G_CONSTANT i64 16 ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](i64) - ; CHECK-NEXT: G_STORE [[AND7]](i64), [[PTR_ADD1]](p0) :: (store (i64) into unknown-address + 16, align 16) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(i64) = G_CONSTANT i64 24 - ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C4]](i64) - ; CHECK-NEXT: G_STORE [[AND8]](i64), [[PTR_ADD2]](p0) :: (store (i64) into unknown-address + 24) - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(i64) = G_CONSTANT i64 32 - ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](i64) - ; CHECK-NEXT: G_STORE [[AND9]](i64), [[PTR_ADD3]](p0) :: (store (i64) into unknown-address + 32, align 32) + ; CHECK-NEXT: G_STORE [[EXTRACT]](i32), [[PTR_ADD1]](p0) :: (store (i32) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s318) = G_IMPLICIT_DEF - %b:_(s318) = G_IMPLICIT_DEF + %a:_(i158) = G_IMPLICIT_DEF + %b:_(i158) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %and:_(s318) = G_AND %a, %b - G_STORE %and(s318), %ptr(p0) :: (store (s318)) + %and:_(i158) = G_AND %a, %b + G_STORE %and(i158), %ptr(p0) :: (store (i158)) RET_ReallyLR implicit $x0 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir index c28f91bc266b4..111d3cba57b53 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-or.mir @@ -9,19 +9,19 @@ body: | ; CHECK-LABEL: name: test_scalar_or_small ; CHECK: liveins: $x0, $x1, $x2, $x3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) - ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s8) = G_TRUNC %0 - %3:_(s8) = G_TRUNC %1 - %4:_(s8) = G_OR %2, %3 - %5:_(s64) = G_ANYEXT %4 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $x1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i32) = G_OR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[OR]](i32) + ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](i64) + %0:_(i64) = COPY $x0 + %1:_(i64) = COPY $x1 + %2:_(i8) = G_TRUNC %0 + %3:_(i8) = G_TRUNC %1 + %4:_(i8) = G_OR %2, %3 + %5:_(i64) = G_ANYEXT %4 $x0 = COPY %5 ... @@ -37,44 +37,43 @@ body: | ; CHECK-LABEL: name: test_big_scalar_power_of_2 ; CHECK: liveins: $x0, $x1, $x2, $x3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[COPY]], [[COPY2]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[COPY1]], [[COPY3]] - ; CHECK-NEXT: $x0 = COPY [[OR]](s64) - ; CHECK-NEXT: $x1 = COPY [[OR1]](s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i64) = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(i64) = COPY $x3 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[COPY1]], [[COPY3]] + ; CHECK-NEXT: $x0 = COPY [[OR]](i64) + ; CHECK-NEXT: $x1 = COPY [[OR1]](i64) ; CHECK-NEXT: RET_ReallyLR implicit $x0, implicit $x1 - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s64) = COPY $x2 - %3:_(s64) = COPY $x3 - %4:_(s128) = G_MERGE_VALUES %0, %1 - %5:_(s128) = G_MERGE_VALUES %2, %3 - %6:_(s128) = G_OR %4, %5 - %7:_(s64), %8:_(s64) = G_UNMERGE_VALUES %6 + %0:_(i64) = COPY $x0 + %1:_(i64) = COPY $x1 + %2:_(i64) = COPY $x2 + %3:_(i64) = COPY $x3 + %4:_(i128) = G_MERGE_VALUES %0, %1 + %5:_(i128) = G_MERGE_VALUES %2, %3 + %6:_(i128) = G_OR %4, %5 + %7:_(i64), %8:_(i64) = G_UNMERGE_VALUES %6 $x0 = COPY %7 $x1 = COPY %8 RET_ReallyLR implicit $x0, implicit $x1 ... --- -name: test_s318_or +name: test_i318_or tracksRegLiveness: true body: | bb.0: liveins: $x0 - - ; CHECK-LABEL: name: test_s318_or + ; CHECK-LABEL: name: test_i318_or ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s64) = G_OR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s64) = G_OR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s64) = G_OR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s64) = G_OR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s64) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(i64) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(i64) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(i64) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(i64) = G_OR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(i64) = G_OR [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4611686018427387903 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[OR]], [[C]] @@ -96,11 +95,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](i64) ; CHECK-NEXT: G_STORE [[AND4]](i64), [[PTR_ADD3]](p0) :: (store (i64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s318) = G_IMPLICIT_DEF - %b:_(s318) = G_IMPLICIT_DEF + %a:_(i318) = G_IMPLICIT_DEF + %b:_(i318) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %or:_(s318) = G_OR %a, %b - G_STORE %or(s318), %ptr(p0) :: (store (s318)) + %or:_(i318) = G_OR %a, %b + G_STORE %or(i318), %ptr(p0) :: (store (i318)) RET_ReallyLR implicit $x0 ... @@ -109,19 +108,19 @@ name: test_vector_or_v16s16 body: | bb.0.entry: ; CHECK-LABEL: name: test_vector_or_v16s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s16>) = G_OR [[COPY]], [[COPY]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x s16>) = G_OR [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $q0 = COPY [[OR]](<8 x s16>) - ; CHECK-NEXT: $q1 = COPY [[OR1]](<8 x s16>) - %1:_(<8 x s16>) = COPY $q0 - %2:_(<8 x s16>) = COPY $q1 - %0:_(<16 x s16>) = G_CONCAT_VECTORS %1(<8 x s16>), %2(<8 x s16>) - %3:_(<16 x s16>) = G_OR %0, %0 - %4:_(<8 x s16>), %5:_(<8 x s16>) = G_UNMERGE_VALUES %3(<16 x s16>) - $q0 = COPY %4(<8 x s16>) - $q1 = COPY %5(<8 x s16>) + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $q1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x i16>) = G_OR [[COPY]], [[COPY]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<8 x i16>) = G_OR [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[OR]](<8 x i16>) + ; CHECK-NEXT: $q1 = COPY [[OR1]](<8 x i16>) + %1:_(<8 x i16>) = COPY $q0 + %2:_(<8 x i16>) = COPY $q1 + %0:_(<16 x i16>) = G_CONCAT_VECTORS %1(<8 x i16>), %2(<8 x i16>) + %3:_(<16 x i16>) = G_OR %0, %0 + %4:_(<8 x i16>), %5:_(<8 x i16>) = G_UNMERGE_VALUES %3(<16 x i16>) + $q0 = COPY %4(<8 x i16>) + $q1 = COPY %5(<8 x i16>) ... --- @@ -129,19 +128,19 @@ name: test_vector_or_v32s8 body: | bb.0.entry: ; CHECK-LABEL: name: test_vector_or_v32s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[COPY]], [[COPY]] - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<16 x s8>) = G_OR [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $q0 = COPY [[OR]](<16 x s8>) - ; CHECK-NEXT: $q1 = COPY [[OR1]](<16 x s8>) - %0:_(<16 x s8>) = COPY $q0 - %1:_(<16 x s8>) = COPY $q1 - %2:_(<32 x s8>) = G_CONCAT_VECTORS %0, %1 - %3:_(<32 x s8>) = G_OR %2, %2 - %7:_(<16 x s8>), %8:_(<16 x s8>) = G_UNMERGE_VALUES %3(<32 x s8>) - $q0 = COPY %7(<16 x s8>) - $q1 = COPY %8(<16 x s8>) + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x i8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x i8>) = COPY $q1 + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<16 x i8>) = G_OR [[COPY]], [[COPY]] + ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(<16 x i8>) = G_OR [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[OR]](<16 x i8>) + ; CHECK-NEXT: $q1 = COPY [[OR1]](<16 x i8>) + %0:_(<16 x i8>) = COPY $q0 + %1:_(<16 x i8>) = COPY $q1 + %2:_(<32 x i8>) = G_CONCAT_VECTORS %0, %1 + %3:_(<32 x i8>) = G_OR %2, %2 + %7:_(<16 x i8>), %8:_(<16 x i8>) = G_UNMERGE_VALUES %3(<32 x i8>) + $q0 = COPY %7(<16 x i8>) + $q1 = COPY %8(<16 x i8>) ... --- @@ -154,24 +153,24 @@ body: | ; CHECK-LABEL: name: or_v2s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[OR]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x i32>) = G_ICMP intpred(eq), [[COPY]](<2 x i32>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x i32>) = G_ICMP intpred(eq), [[COPY2]](<2 x i32>), [[COPY3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<2 x i32>) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[OR]](<2 x i32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<2 x s32>) = COPY $d0 - %1:_(<2 x s32>) = COPY $d1 - %2:_(<2 x s32>) = COPY $d2 - %3:_(<2 x s32>) = COPY $d3 - %4:_(<2 x s1>) = G_ICMP intpred(eq), %0(<2 x s32>), %1 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2(<2 x s32>), %3 - %6:_(<2 x s1>) = G_OR %4, %5 - %7:_(<2 x s32>) = G_ANYEXT %6 - $d0 = COPY %7:_(<2 x s32>) + %0:_(<2 x i32>) = COPY $d0 + %1:_(<2 x i32>) = COPY $d1 + %2:_(<2 x i32>) = COPY $d2 + %3:_(<2 x i32>) = COPY $d3 + %4:_(<2 x i1>) = G_ICMP intpred(eq), %0(<2 x i32>), %1 + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %3 + %6:_(<2 x i1>) = G_OR %4, %5 + %7:_(<2 x i32>) = G_ANYEXT %6 + $d0 = COPY %7:_(<2 x i32>) RET_ReallyLR implicit $d0 ... --- @@ -184,32 +183,32 @@ body: | ; CHECK-LABEL: name: or_v3s1 ; CHECK: liveins: $b0, $b1, $b2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT3]](s16), [[ANYEXT4]](s16), [[ANYEXT5]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i8) = COPY $b0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY $b1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY $b2 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY2]](i8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[ANYEXT]](i16), [[ANYEXT1]](i16), [[ANYEXT2]](i16), [[DEF]](i16) + ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY]](i8) + ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY1]](i8) + ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY2]](i8) + ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[ANYEXT3]](i16), [[ANYEXT4]](i16), [[ANYEXT5]](i16), [[DEF]](i16) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x i16>) = G_OR [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[OR]](<4 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](i8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 - %1:_(s8) = COPY $b0 - %2:_(s8) = COPY $b1 - %3:_(s8) = COPY $b2 - %4:_(<3 x s8>) = G_BUILD_VECTOR %1(s8), %2(s8), %3(s8) - %0:_(<3 x s1>) = G_TRUNC %4(<3 x s8>) - %5:_(<3 x s1>) = G_OR %0, %0 - %7:_(<3 x s8>) = G_ANYEXT %5(<3 x s1>) - %8:_(s8), %9:_(s8), %10:_(s8) = G_UNMERGE_VALUES %7(<3 x s8>) - $b0 = COPY %8:_(s8) + %1:_(i8) = COPY $b0 + %2:_(i8) = COPY $b1 + %3:_(i8) = COPY $b2 + %4:_(<3 x i8>) = G_BUILD_VECTOR %1(i8), %2(i8), %3(i8) + %0:_(<3 x i1>) = G_TRUNC %4(<3 x i8>) + %5:_(<3 x i1>) = G_OR %0, %0 + %7:_(<3 x i8>) = G_ANYEXT %5(<3 x i1>) + %8:_(i8), %9:_(i8), %10:_(i8) = G_UNMERGE_VALUES %7(<3 x i8>) + $b0 = COPY %8:_(i8) RET_ReallyLR implicit $b0 ... --- @@ -222,24 +221,24 @@ body: | ; CHECK-LABEL: name: or_v4s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[OR]](<4 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i16>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x i16>) = G_ICMP intpred(eq), [[COPY]](<4 x i16>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x i16>) = G_ICMP intpred(eq), [[COPY2]](<4 x i16>), [[COPY3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x i16>) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[OR]](<4 x i16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<4 x s16>) = COPY $d0 - %1:_(<4 x s16>) = COPY $d1 - %2:_(<4 x s16>) = COPY $d2 - %3:_(<4 x s16>) = COPY $d3 - %4:_(<4 x s1>) = G_ICMP intpred(eq), %0(<4 x s16>), %1 - %5:_(<4 x s1>) = G_ICMP intpred(eq), %2(<4 x s16>), %3 - %6:_(<4 x s1>) = G_OR %4, %5 - %7:_(<4 x s16>) = G_ANYEXT %6 - $d0 = COPY %7:_(<4 x s16>) + %0:_(<4 x i16>) = COPY $d0 + %1:_(<4 x i16>) = COPY $d1 + %2:_(<4 x i16>) = COPY $d2 + %3:_(<4 x i16>) = COPY $d3 + %4:_(<4 x i1>) = G_ICMP intpred(eq), %0(<4 x i16>), %1 + %5:_(<4 x i1>) = G_ICMP intpred(eq), %2(<4 x i16>), %3 + %6:_(<4 x i1>) = G_OR %4, %5 + %7:_(<4 x i16>) = G_ANYEXT %6 + $d0 = COPY %7:_(<4 x i16>) RET_ReallyLR implicit $d0 ... --- @@ -252,24 +251,24 @@ body: | ; CHECK-LABEL: name: or_v8s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s8>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x s8>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY]](<8 x s8>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY2]](<8 x s8>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x s8>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[OR]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i8>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<8 x i8>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x i8>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x i8>) = G_ICMP intpred(eq), [[COPY]](<8 x i8>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x i8>) = G_ICMP intpred(eq), [[COPY2]](<8 x i8>), [[COPY3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<8 x i8>) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[OR]](<8 x i8>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<8 x s8>) = COPY $d0 - %1:_(<8 x s8>) = COPY $d1 - %2:_(<8 x s8>) = COPY $d2 - %3:_(<8 x s8>) = COPY $d3 - %4:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s8>), %1 - %5:_(<8 x s1>) = G_ICMP intpred(eq), %2(<8 x s8>), %3 - %6:_(<8 x s1>) = G_OR %4, %5 - %7:_(<8 x s8>) = G_ANYEXT %6 - $d0 = COPY %7:_(<8 x s8>) + %0:_(<8 x i8>) = COPY $d0 + %1:_(<8 x i8>) = COPY $d1 + %2:_(<8 x i8>) = COPY $d2 + %3:_(<8 x i8>) = COPY $d3 + %4:_(<8 x i1>) = G_ICMP intpred(eq), %0(<8 x i8>), %1 + %5:_(<8 x i1>) = G_ICMP intpred(eq), %2(<8 x i8>), %3 + %6:_(<8 x i1>) = G_OR %4, %5 + %7:_(<8 x i8>) = G_ANYEXT %6 + $d0 = COPY %7:_(<8 x i8>) RET_ReallyLR implicit $d0 ... --- @@ -282,23 +281,23 @@ body: | ; CHECK-LABEL: name: or_v16s1 ; CHECK: liveins: $q0, $q1, $q2, $q3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s8>) = COPY $q2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s8>) = COPY $q3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY2]](<16 x s8>), [[COPY3]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<16 x s8>) = G_OR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $q0 = COPY [[OR]](<16 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x i8>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<16 x i8>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x i8>) = COPY $q3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x i8>) = G_ICMP intpred(eq), [[COPY]](<16 x i8>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x i8>) = G_ICMP intpred(eq), [[COPY2]](<16 x i8>), [[COPY3]] + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(<16 x i8>) = G_OR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $q0 = COPY [[OR]](<16 x i8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<16 x s8>) = COPY $q0 - %1:_(<16 x s8>) = COPY $q1 - %2:_(<16 x s8>) = COPY $q2 - %3:_(<16 x s8>) = COPY $q3 - %4:_(<16 x s1>) = G_ICMP intpred(eq), %0(<16 x s8>), %1 - %5:_(<16 x s1>) = G_ICMP intpred(eq), %2(<16 x s8>), %3 - %6:_(<16 x s1>) = G_OR %4, %5 - %7:_(<16 x s8>) = G_ANYEXT %6 - $q0 = COPY %7:_(<16 x s8>) + %0:_(<16 x i8>) = COPY $q0 + %1:_(<16 x i8>) = COPY $q1 + %2:_(<16 x i8>) = COPY $q2 + %3:_(<16 x i8>) = COPY $q3 + %4:_(<16 x i1>) = G_ICMP intpred(eq), %0(<16 x i8>), %1 + %5:_(<16 x i1>) = G_ICMP intpred(eq), %2(<16 x i8>), %3 + %6:_(<16 x i1>) = G_OR %4, %5 + %7:_(<16 x i8>) = G_ANYEXT %6 + $q0 = COPY %7:_(<16 x i8>) RET_ReallyLR implicit $q0 ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir index 862ad0ccd2fa7..dfcf0398fa3dc 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xor.mir @@ -5,20 +5,20 @@ name: test_scalar_xor_small body: | bb.0.entry: ; CHECK-LABEL: name: test_scalar_xor_small - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) - ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[XOR]](s32) - ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](s64) - %0:_(s64) = COPY $x0 - %1:_(s64) = COPY $x1 - %2:_(s8) = G_TRUNC %0(s64) - %3:_(s8) = G_TRUNC %1(s64) - %4:_(s8) = G_XOR %2, %3 - %5:_(s64) = G_ANYEXT %4(s8) - $x0 = COPY %5(s64) + ; CHECK: [[COPY:%[0-9]+]]:_(i64) = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i64) = COPY $x1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i32) = G_TRUNC [[COPY]](i64) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(i32) = G_TRUNC [[COPY1]](i64) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i32) = G_XOR [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i64) = G_ANYEXT [[XOR]](i32) + ; CHECK-NEXT: $x0 = COPY [[ANYEXT]](i64) + %0:_(i64) = COPY $x0 + %1:_(i64) = COPY $x1 + %2:_(i8) = G_TRUNC %0(i64) + %3:_(i8) = G_TRUNC %1(i64) + %4:_(i8) = G_XOR %2, %3 + %5:_(i64) = G_ANYEXT %4(i8) + $x0 = COPY %5(i64) ... --- @@ -30,13 +30,13 @@ body: | ; CHECK-LABEL: name: test_s318_xor ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 4611686018427387903 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] @@ -58,11 +58,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](i64) ; CHECK-NEXT: G_STORE [[AND4]](i64), [[PTR_ADD3]](p0) :: (store (i64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s318) = G_IMPLICIT_DEF - %b:_(s318) = G_IMPLICIT_DEF + %a:_(i318) = G_IMPLICIT_DEF + %b:_(i318) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %xor:_(s318) = G_XOR %a, %b - G_STORE %xor(s318), %ptr(p0) :: (store (s318)) + %xor:_(i318) = G_XOR %a, %b + G_STORE %xor(i318), %ptr(p0) :: (store (i318)) RET_ReallyLR implicit $x0 ... --- @@ -74,13 +74,13 @@ body: | ; CHECK-LABEL: name: test_s319_xor ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR4:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR3:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR4:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 9223372036854775807 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] @@ -102,11 +102,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C5]](i64) ; CHECK-NEXT: G_STORE [[AND4]](i64), [[PTR_ADD3]](p0) :: (store (i64) into unknown-address + 32, align 32) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s319) = G_IMPLICIT_DEF - %b:_(s319) = G_IMPLICIT_DEF + %a:_(i319) = G_IMPLICIT_DEF + %b:_(i319) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %xor:_(s319) = G_XOR %a, %b - G_STORE %xor(s319), %ptr(p0) :: (store (s319)) + %xor:_(i319) = G_XOR %a, %b + G_STORE %xor(i319), %ptr(p0) :: (store (i319)) RET_ReallyLR implicit $x0 ... --- @@ -118,11 +118,11 @@ body: | ; CHECK-LABEL: name: test_s158_xor ; CHECK: liveins: $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i64) = G_IMPLICIT_DEF ; CHECK-NEXT: %ptr:_(p0) = COPY $x0 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] - ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(s64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] + ; CHECK-NEXT: [[XOR2:%[0-9]+]]:_(i64) = G_XOR [[DEF]], [[DEF]] ; CHECK-NEXT: [[C:%[0-9]+]]:_(i64) = G_CONSTANT i64 -1 ; CHECK-NEXT: [[C1:%[0-9]+]]:_(i64) = G_CONSTANT i64 1073741823 ; CHECK-NEXT: [[AND:%[0-9]+]]:_(i64) = G_AND [[XOR]], [[C]] @@ -139,11 +139,11 @@ body: | ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = nuw inbounds G_PTR_ADD %ptr, [[C3]](i64) ; CHECK-NEXT: G_STORE [[EXTRACT]](i32), [[PTR_ADD1]](p0) :: (store (i32) into unknown-address + 16, align 16) ; CHECK-NEXT: RET_ReallyLR implicit $x0 - %a:_(s158) = G_IMPLICIT_DEF - %b:_(s158) = G_IMPLICIT_DEF + %a:_(i158) = G_IMPLICIT_DEF + %b:_(i158) = G_IMPLICIT_DEF %ptr:_(p0) = COPY $x0 - %xor:_(s158) = G_XOR %a, %b - G_STORE %xor(s158), %ptr(p0) :: (store (s158)) + %xor:_(i158) = G_XOR %a, %b + G_STORE %xor(i158), %ptr(p0) :: (store (i158)) RET_ReallyLR implicit $x0 ... @@ -152,19 +152,19 @@ name: test_vector_xor_v16s16 body: | bb.0.entry: ; CHECK-LABEL: name: test_vector_xor_v16s16 - ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s16>) = COPY $q1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s16>) = G_XOR [[COPY]], [[COPY]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<8 x s16>) = G_XOR [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $q0 = COPY [[XOR]](<8 x s16>) - ; CHECK-NEXT: $q1 = COPY [[XOR1]](<8 x s16>) - %1:_(<8 x s16>) = COPY $q0 - %2:_(<8 x s16>) = COPY $q1 - %0:_(<16 x s16>) = G_CONCAT_VECTORS %1(<8 x s16>), %2(<8 x s16>) - %3:_(<16 x s16>) = G_XOR %0, %0 - %4:_(<8 x s16>), %5:_(<8 x s16>) = G_UNMERGE_VALUES %3(<16 x s16>) - $q0 = COPY %4(<8 x s16>) - $q1 = COPY %5(<8 x s16>) + ; CHECK: [[COPY:%[0-9]+]]:_(<8 x i16>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i16>) = COPY $q1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x i16>) = G_XOR [[COPY]], [[COPY]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<8 x i16>) = G_XOR [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[XOR]](<8 x i16>) + ; CHECK-NEXT: $q1 = COPY [[XOR1]](<8 x i16>) + %1:_(<8 x i16>) = COPY $q0 + %2:_(<8 x i16>) = COPY $q1 + %0:_(<16 x i16>) = G_CONCAT_VECTORS %1(<8 x i16>), %2(<8 x i16>) + %3:_(<16 x i16>) = G_XOR %0, %0 + %4:_(<8 x i16>), %5:_(<8 x i16>) = G_UNMERGE_VALUES %3(<16 x i16>) + $q0 = COPY %4(<8 x i16>) + $q1 = COPY %5(<8 x i16>) ... --- @@ -172,19 +172,19 @@ name: test_vector_xor_v32s8 body: | bb.0.entry: ; CHECK-LABEL: name: test_vector_xor_v32s8 - ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<16 x s8>) = G_XOR [[COPY]], [[COPY]] - ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<16 x s8>) = G_XOR [[COPY1]], [[COPY1]] - ; CHECK-NEXT: $q0 = COPY [[XOR]](<16 x s8>) - ; CHECK-NEXT: $q1 = COPY [[XOR1]](<16 x s8>) - %0:_(<16 x s8>) = COPY $q0 - %1:_(<16 x s8>) = COPY $q1 - %2:_(<32 x s8>) = G_CONCAT_VECTORS %0, %1 - %3:_(<32 x s8>) = G_XOR %2, %2 - %7:_(<16 x s8>), %8:_(<16 x s8>) = G_UNMERGE_VALUES %3(<32 x s8>) - $q0 = COPY %7(<16 x s8>) - $q1 = COPY %8(<16 x s8>) + ; CHECK: [[COPY:%[0-9]+]]:_(<16 x i8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x i8>) = COPY $q1 + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<16 x i8>) = G_XOR [[COPY]], [[COPY]] + ; CHECK-NEXT: [[XOR1:%[0-9]+]]:_(<16 x i8>) = G_XOR [[COPY1]], [[COPY1]] + ; CHECK-NEXT: $q0 = COPY [[XOR]](<16 x i8>) + ; CHECK-NEXT: $q1 = COPY [[XOR1]](<16 x i8>) + %0:_(<16 x i8>) = COPY $q0 + %1:_(<16 x i8>) = COPY $q1 + %2:_(<32 x i8>) = G_CONCAT_VECTORS %0, %1 + %3:_(<32 x i8>) = G_XOR %2, %2 + %7:_(<16 x i8>), %8:_(<16 x i8>) = G_UNMERGE_VALUES %3(<32 x i8>) + $q0 = COPY %7(<16 x i8>) + $q1 = COPY %8(<16 x i8>) ... --- @@ -197,24 +197,24 @@ body: | ; CHECK-LABEL: name: xor_v2s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s32>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY]](<2 x s32>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x s32>) = G_ICMP intpred(eq), [[COPY2]](<2 x s32>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x s32>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[XOR]](<2 x s32>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x i32>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x i32>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<2 x i32>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<2 x i32>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<2 x i32>) = G_ICMP intpred(eq), [[COPY]](<2 x i32>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<2 x i32>) = G_ICMP intpred(eq), [[COPY2]](<2 x i32>), [[COPY3]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<2 x i32>) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[XOR]](<2 x i32>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<2 x s32>) = COPY $d0 - %1:_(<2 x s32>) = COPY $d1 - %2:_(<2 x s32>) = COPY $d2 - %3:_(<2 x s32>) = COPY $d3 - %4:_(<2 x s1>) = G_ICMP intpred(eq), %0(<2 x s32>), %1 - %5:_(<2 x s1>) = G_ICMP intpred(eq), %2(<2 x s32>), %3 - %6:_(<2 x s1>) = G_XOR %4, %5 - %7:_(<2 x s32>) = G_ANYEXT %6 - $d0 = COPY %7:_(<2 x s32>) + %0:_(<2 x i32>) = COPY $d0 + %1:_(<2 x i32>) = COPY $d1 + %2:_(<2 x i32>) = COPY $d2 + %3:_(<2 x i32>) = COPY $d3 + %4:_(<2 x i1>) = G_ICMP intpred(eq), %0(<2 x i32>), %1 + %5:_(<2 x i1>) = G_ICMP intpred(eq), %2(<2 x i32>), %3 + %6:_(<2 x i1>) = G_XOR %4, %5 + %7:_(<2 x i32>) = G_ANYEXT %6 + $d0 = COPY %7:_(<2 x i32>) RET_ReallyLR implicit $d0 ... --- @@ -227,28 +227,28 @@ body: | ; CHECK-LABEL: name: xor_v3s1 ; CHECK: liveins: $b0, $b1, $b2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s8) = COPY $b0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s8) = COPY $b1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s8) = COPY $b2 - ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY]](s8) - ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY1]](s8) - ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s16) = G_ANYEXT [[COPY2]](s8) - ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[ANYEXT]](s16), [[ANYEXT1]](s16), [[ANYEXT2]](s16), [[DEF]](s16) - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR]] - ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[XOR]](<4 x s16>) - ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16) - ; CHECK-NEXT: $b0 = COPY [[TRUNC]](s8) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(i8) = COPY $b0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(i8) = COPY $b1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(i8) = COPY $b2 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY]](i8) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY1]](i8) + ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(i16) = G_ANYEXT [[COPY2]](i8) + ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(i16) = G_IMPLICIT_DEF + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x i16>) = G_BUILD_VECTOR [[ANYEXT]](i16), [[ANYEXT1]](i16), [[ANYEXT2]](i16), [[DEF]](i16) + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x i16>) = G_XOR [[BUILD_VECTOR]], [[BUILD_VECTOR]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:_(i16), [[UV1:%[0-9]+]]:_(i16), [[UV2:%[0-9]+]]:_(i16), [[UV3:%[0-9]+]]:_(i16) = G_UNMERGE_VALUES [[XOR]](<4 x i16>) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(i8) = G_TRUNC [[UV]](i16) + ; CHECK-NEXT: $b0 = COPY [[TRUNC]](i8) ; CHECK-NEXT: RET_ReallyLR implicit $b0 - %1:_(s8) = COPY $b0 - %2:_(s8) = COPY $b1 - %3:_(s8) = COPY $b2 - %4:_(<3 x s8>) = G_BUILD_VECTOR %1(s8), %2(s8), %3(s8) - %0:_(<3 x s1>) = G_TRUNC %4(<3 x s8>) - %5:_(<3 x s1>) = G_XOR %0, %0 - %7:_(<3 x s8>) = G_ANYEXT %5(<3 x s1>) - %8:_(s8), %9:_(s8), %10:_(s8) = G_UNMERGE_VALUES %7(<3 x s8>) - $b0 = COPY %8:_(s8) + %1:_(i8) = COPY $b0 + %2:_(i8) = COPY $b1 + %3:_(i8) = COPY $b2 + %4:_(<3 x i8>) = G_BUILD_VECTOR %1(i8), %2(i8), %3(i8) + %0:_(<3 x i1>) = G_TRUNC %4(<3 x i8>) + %5:_(<3 x i1>) = G_XOR %0, %0 + %7:_(<3 x i8>) = G_ANYEXT %5(<3 x i1>) + %8:_(i8), %9:_(i8), %10:_(i8) = G_UNMERGE_VALUES %7(<3 x i8>) + $b0 = COPY %8:_(i8) RET_ReallyLR implicit $b0 ... --- @@ -261,24 +261,24 @@ body: | ; CHECK-LABEL: name: xor_v4s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x s16>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x s16>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY]](<4 x s16>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s16>) = G_ICMP intpred(eq), [[COPY2]](<4 x s16>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[XOR]](<4 x s16>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x i16>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x i16>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<4 x i16>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<4 x i16>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x i16>) = G_ICMP intpred(eq), [[COPY]](<4 x i16>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x i16>) = G_ICMP intpred(eq), [[COPY2]](<4 x i16>), [[COPY3]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x i16>) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[XOR]](<4 x i16>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<4 x s16>) = COPY $d0 - %1:_(<4 x s16>) = COPY $d1 - %2:_(<4 x s16>) = COPY $d2 - %3:_(<4 x s16>) = COPY $d3 - %4:_(<4 x s1>) = G_ICMP intpred(eq), %0(<4 x s16>), %1 - %5:_(<4 x s1>) = G_ICMP intpred(eq), %2(<4 x s16>), %3 - %6:_(<4 x s1>) = G_XOR %4, %5 - %7:_(<4 x s16>) = G_ANYEXT %6 - $d0 = COPY %7:_(<4 x s16>) + %0:_(<4 x i16>) = COPY $d0 + %1:_(<4 x i16>) = COPY $d1 + %2:_(<4 x i16>) = COPY $d2 + %3:_(<4 x i16>) = COPY $d3 + %4:_(<4 x i1>) = G_ICMP intpred(eq), %0(<4 x i16>), %1 + %5:_(<4 x i1>) = G_ICMP intpred(eq), %2(<4 x i16>), %3 + %6:_(<4 x i1>) = G_XOR %4, %5 + %7:_(<4 x i16>) = G_ANYEXT %6 + $d0 = COPY %7:_(<4 x i16>) RET_ReallyLR implicit $d0 ... --- @@ -291,24 +291,24 @@ body: | ; CHECK-LABEL: name: xor_v8s1 ; CHECK: liveins: $d0, $d1, $d2, $d3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<8 x s8>) = COPY $d2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x s8>) = COPY $d3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY]](<8 x s8>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x s8>) = G_ICMP intpred(eq), [[COPY2]](<8 x s8>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x s8>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $d0 = COPY [[XOR]](<8 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x i8>) = COPY $d0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x i8>) = COPY $d1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<8 x i8>) = COPY $d2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<8 x i8>) = COPY $d3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x i8>) = G_ICMP intpred(eq), [[COPY]](<8 x i8>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<8 x i8>) = G_ICMP intpred(eq), [[COPY2]](<8 x i8>), [[COPY3]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<8 x i8>) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $d0 = COPY [[XOR]](<8 x i8>) ; CHECK-NEXT: RET_ReallyLR implicit $d0 - %0:_(<8 x s8>) = COPY $d0 - %1:_(<8 x s8>) = COPY $d1 - %2:_(<8 x s8>) = COPY $d2 - %3:_(<8 x s8>) = COPY $d3 - %4:_(<8 x s1>) = G_ICMP intpred(eq), %0(<8 x s8>), %1 - %5:_(<8 x s1>) = G_ICMP intpred(eq), %2(<8 x s8>), %3 - %6:_(<8 x s1>) = G_XOR %4, %5 - %7:_(<8 x s8>) = G_ANYEXT %6 - $d0 = COPY %7:_(<8 x s8>) + %0:_(<8 x i8>) = COPY $d0 + %1:_(<8 x i8>) = COPY $d1 + %2:_(<8 x i8>) = COPY $d2 + %3:_(<8 x i8>) = COPY $d3 + %4:_(<8 x i1>) = G_ICMP intpred(eq), %0(<8 x i8>), %1 + %5:_(<8 x i1>) = G_ICMP intpred(eq), %2(<8 x i8>), %3 + %6:_(<8 x i1>) = G_XOR %4, %5 + %7:_(<8 x i8>) = G_ANYEXT %6 + $d0 = COPY %7:_(<8 x i8>) RET_ReallyLR implicit $d0 ... --- @@ -321,23 +321,23 @@ body: | ; CHECK-LABEL: name: xor_v16s1 ; CHECK: liveins: $q0, $q1, $q2, $q3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<16 x s8>) = COPY $q2 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x s8>) = COPY $q3 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY]](<16 x s8>), [[COPY1]] - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x s8>) = G_ICMP intpred(eq), [[COPY2]](<16 x s8>), [[COPY3]] - ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<16 x s8>) = G_XOR [[ICMP]], [[ICMP1]] - ; CHECK-NEXT: $q0 = COPY [[XOR]](<16 x s8>) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x i8>) = COPY $q0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x i8>) = COPY $q1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(<16 x i8>) = COPY $q2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(<16 x i8>) = COPY $q3 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<16 x i8>) = G_ICMP intpred(eq), [[COPY]](<16 x i8>), [[COPY1]] + ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<16 x i8>) = G_ICMP intpred(eq), [[COPY2]](<16 x i8>), [[COPY3]] + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<16 x i8>) = G_XOR [[ICMP]], [[ICMP1]] + ; CHECK-NEXT: $q0 = COPY [[XOR]](<16 x i8>) ; CHECK-NEXT: RET_ReallyLR implicit $q0 - %0:_(<16 x s8>) = COPY $q0 - %1:_(<16 x s8>) = COPY $q1 - %2:_(<16 x s8>) = COPY $q2 - %3:_(<16 x s8>) = COPY $q3 - %4:_(<16 x s1>) = G_ICMP intpred(eq), %0(<16 x s8>), %1 - %5:_(<16 x s1>) = G_ICMP intpred(eq), %2(<16 x s8>), %3 - %6:_(<16 x s1>) = G_XOR %4, %5 - %7:_(<16 x s8>) = G_ANYEXT %6 - $q0 = COPY %7:_(<16 x s8>) + %0:_(<16 x i8>) = COPY $q0 + %1:_(<16 x i8>) = COPY $q1 + %2:_(<16 x i8>) = COPY $q2 + %3:_(<16 x i8>) = COPY $q3 + %4:_(<16 x i1>) = G_ICMP intpred(eq), %0(<16 x i8>), %1 + %5:_(<16 x i1>) = G_ICMP intpred(eq), %2(<16 x i8>), %3 + %6:_(<16 x i1>) = G_XOR %4, %5 + %7:_(<16 x i8>) = G_ANYEXT %6 + $q0 = COPY %7:_(<16 x i8>) RET_ReallyLR implicit $q0 ... From 85846143f6e98df265db284415d9f36af43563cb Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Mon, 22 Jun 2026 23:05:58 +0200 Subject: [PATCH 087/511] [Clang][SYCL] Add new comp targets to new offload (#22307) These targets were missing from the new offload model, which prevented correct recognition and parsing of the corresponding Intel GPU architectures (resulting in ocloc missing the -device flag value). See https://github.com/intel/llvm/pull/22170 fixes: https://github.com/intel/llvm/issues/22295 --- clang/include/clang/Basic/OffloadArch.h | 4 ++++ clang/lib/Basic/OffloadArch.cpp | 4 ++++ clang/unittests/Basic/OffloadArchTest.cpp | 14 ++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index 95dbce7f4ad6a..af06960d08300 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -163,6 +163,7 @@ enum class OffloadArch { ADL_P, ADL_N, DG1, + DG2, ACM_G10, DG2_G10, ACM_G11, @@ -171,13 +172,16 @@ enum class OffloadArch { DG2_G12, PVC, PVC_VG, + MTL, MTL_U, MTL_S, ARL_U, ARL_S, MTL_H, ARL_H, + BMG, BMG_G21, + PTL, LNL_M, LAST = LNL_M, diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index c93acc77b9ff6..1c33f3fc8f529 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -156,6 +156,7 @@ static const OffloadArchToStringMap ArchNames[] = { {OffloadArch::ADL_P, "adl_p", ""}, {OffloadArch::ADL_N, "adl_n", ""}, {OffloadArch::DG1, "dg1", ""}, + {OffloadArch::DG2, "dg2", ""}, {OffloadArch::ACM_G10, "acm_g10", ""}, {OffloadArch::DG2_G10, "dg2_g10", ""}, {OffloadArch::ACM_G11, "acm_g11", ""}, @@ -164,13 +165,16 @@ static const OffloadArchToStringMap ArchNames[] = { {OffloadArch::DG2_G12, "dg2_g12", ""}, {OffloadArch::PVC, "pvc", ""}, {OffloadArch::PVC_VG, "pvc_vg", ""}, + {OffloadArch::MTL, "mtl", ""}, {OffloadArch::MTL_U, "mtl_u", ""}, {OffloadArch::MTL_S, "mtl_s", ""}, {OffloadArch::ARL_U, "arl_u", ""}, {OffloadArch::ARL_S, "arl_s", ""}, {OffloadArch::MTL_H, "mtl_h", ""}, {OffloadArch::ARL_H, "arl_h", ""}, + {OffloadArch::BMG, "bmg", ""}, {OffloadArch::BMG_G21, "bmg_g21", ""}, + {OffloadArch::PTL, "ptl", ""}, {OffloadArch::LNL_M, "lnl_m", ""}, {OffloadArch::Generic, "generic", ""}, // clang-format on diff --git a/clang/unittests/Basic/OffloadArchTest.cpp b/clang/unittests/Basic/OffloadArchTest.cpp index c19ad0043d774..6e47cc2cb1744 100644 --- a/clang/unittests/Basic/OffloadArchTest.cpp +++ b/clang/unittests/Basic/OffloadArchTest.cpp @@ -34,3 +34,17 @@ TEST(OffloadArchTest, basic) { EXPECT_FALSE(IsAMDOffloadArch(OffloadArch::Generic)); EXPECT_FALSE(IsIntelOffloadArch(OffloadArch::Generic)); } + +TEST(OffloadArchTest, IntelGPUFamilyArchitectures) { + EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::DG2)); + EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::MTL)); + EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::BMG)); + EXPECT_TRUE(IsIntelGPUOffloadArch(OffloadArch::PTL)); +} + +TEST(OffloadArchTest, IntelGPUFamilyArchParsing) { + EXPECT_EQ(StringToOffloadArch("dg2"), OffloadArch::DG2); + EXPECT_EQ(StringToOffloadArch("mtl"), OffloadArch::MTL); + EXPECT_EQ(StringToOffloadArch("bmg"), OffloadArch::BMG); + EXPECT_EQ(StringToOffloadArch("ptl"), OffloadArch::PTL); +} From c635857485a23b204c93cca95ace64670399aac7 Mon Sep 17 00:00:00 2001 From: Bhavesh M <85327930+beamandala@users.noreply.github.com> Date: Tue, 23 Jun 2026 02:37:42 +0530 Subject: [PATCH 088/511] [mlir][EmitC] Make `GlobalOps` `FieldOps` in wrap-emitc-func-in-class pass (#203641) Update the `WrapFuncInClassPass` pass so that `GlobalOp`s are moved into the `ClassOp` as `FieldOps`. This respects MLIR's behavior of resolving references to the closest parent operation that defines a symbol table which is the `ClassOp` that we are creating in this pass. Without this change, references to a `GlobalOp` in `GetGlobalOp` are failing to resolve. Details: - Identify `GlobalOp`s - Create a `FieldOp` within the `ClassOp` for each `GlobalOp` - Delete the `GlobalOp`s after all functions have been wrapped in a class. Doing this after every function can cause an error when multiple functions refer to the same `GlobalOp`(s) which would be deleted after the first function is wrapped in a class. Also renamed `fName` parameter in `populateWrapFuncInClass` to `funcName` to match naming in `WrapFuncInClass`. Based on PR #153452. Key differences: - No size is set for the `globalsToMove` `SmallVector` type because I'm not sure if the number of global variables is consistent across different models. - `GlobalOp`s are deleted after all functions have been processed. - Instead of directly cloning the `GlobalOp`, an equivalent `FieldOp` is created - `GetGlobalOp`s are translated to `GetFieldOp`s Co-authored-by: [Jaddyen](https://github.com/Jaddyen) --- .../mlir/Dialect/EmitC/Transforms/Passes.td | 2 +- .../Dialect/EmitC/Transforms/Transforms.h | 5 +- .../EmitC/Transforms/WrapFuncInClass.cpp | 76 +++++++-- .../Dialect/EmitC/wrap-func-in-class.mlir | 150 ++++++++++++++++++ 4 files changed, 216 insertions(+), 17 deletions(-) diff --git a/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td index 40ecef33448d7..c34c3303a6ab3 100644 --- a/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/EmitC/Transforms/Passes.td @@ -20,7 +20,7 @@ def FormExpressionsPass : Pass<"form-expressions"> { let dependentDialects = ["emitc::EmitCDialect"]; } -def WrapFuncInClassPass : Pass<"wrap-emitc-func-in-class"> { +def WrapFuncInClassPass : Pass<"wrap-emitc-func-in-class", "ModuleOp"> { let summary = "Wrap functions in classes, using arguments as fields."; let description = [{ This pass transforms `emitc.func` operations into `emitc.class` operations. diff --git a/mlir/include/mlir/Dialect/EmitC/Transforms/Transforms.h b/mlir/include/mlir/Dialect/EmitC/Transforms/Transforms.h index 962bdb3c032bf..791e545a8edcf 100644 --- a/mlir/include/mlir/Dialect/EmitC/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/EmitC/Transforms/Transforms.h @@ -11,6 +11,7 @@ #include "mlir/Dialect/EmitC/IR/EmitC.h" #include "mlir/IR/PatternMatch.h" +#include "llvm/ADT/DenseMap.h" namespace mlir { namespace emitc { @@ -32,7 +33,9 @@ void populateExpressionPatterns(RewritePatternSet &patterns); // The WrapFuncInClass pass. //===----------------------------------------------------------------------===// -void populateWrapFuncInClass(RewritePatternSet &patterns, StringRef fName); +void populateWrapFuncInClass( + RewritePatternSet &patterns, StringRef funcName, + DenseMap> &globalsToMove); } // namespace emitc } // namespace mlir diff --git a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp index fc8acd616ba70..aaceddb1e1b61 100644 --- a/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp +++ b/mlir/lib/Dialect/EmitC/Transforms/WrapFuncInClass.cpp @@ -13,7 +13,10 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/PatternMatch.h" +#include "mlir/IR/SymbolTable.h" #include "mlir/Transforms/WalkPatternRewriteDriver.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" using namespace mlir; using namespace emitc; @@ -28,12 +31,32 @@ struct WrapFuncInClassPass : public impl::WrapFuncInClassPassBase { using WrapFuncInClassPassBase::WrapFuncInClassPassBase; void runOnOperation() override { - Operation *rootOp = getOperation(); + mlir::ModuleOp moduleOp = getOperation(); + + DenseMap> globalsUsedByFuncs; + + SymbolTableCollection symbolTable; + moduleOp.walk([&globalsUsedByFuncs, &symbolTable](FuncOp funcOp) { + funcOp.walk([&globalsUsedByFuncs, &symbolTable, + &funcOp](GetGlobalOp getGlobalOp) { + if (auto globalOp = symbolTable.lookupNearestSymbolFrom( + getGlobalOp, getGlobalOp.getNameAttr())) { + globalsUsedByFuncs[funcOp].insert(globalOp); + } + }); + }); RewritePatternSet patterns(&getContext()); - populateWrapFuncInClass(patterns, funcName); + populateWrapFuncInClass(patterns, funcName, globalsUsedByFuncs); + + walkAndApplyPatterns(moduleOp, std::move(patterns)); - walkAndApplyPatterns(rootOp, std::move(patterns)); + DenseSet globalsToErase; + for (auto &[_, globals] : globalsUsedByFuncs) + globalsToErase.insert_range(globals); + + for (GlobalOp globalOp : globalsToErase) + globalOp.erase(); } }; @@ -41,12 +64,15 @@ struct WrapFuncInClassPass } // namespace emitc } // namespace mlir -class WrapFuncInClass : public OpRewritePattern { +class WrapFuncInClass : public OpRewritePattern { public: - WrapFuncInClass(MLIRContext *context, StringRef funcName) - : OpRewritePattern(context), funcName(funcName) {} + WrapFuncInClass( + MLIRContext *context, StringRef funcName, + const DenseMap> &globalsToMove) + : OpRewritePattern(context), funcName(funcName), + globalsToMove(globalsToMove) {} - LogicalResult matchAndRewrite(emitc::FuncOp funcOp, + LogicalResult matchAndRewrite(FuncOp funcOp, PatternRewriter &rewriter) const override { auto className = funcOp.getSymNameAttr().str() + "Class"; @@ -64,19 +90,26 @@ class WrapFuncInClass : public OpRewritePattern { TypeAttr typeAttr = TypeAttr::get(val.getType()); fields.push_back({fieldName, typeAttr}); - FieldOp fieldop = emitc::FieldOp::create(rewriter, funcOp->getLoc(), - fieldName, typeAttr, nullptr); + FieldOp fieldop = FieldOp::create(rewriter, funcOp->getLoc(), fieldName, + typeAttr, nullptr); if (argAttrs && idx < argAttrs->size()) { fieldop->setDiscardableAttrs(funcOp.getArgAttrDict(idx)); } } + auto globalsIt = globalsToMove.find(funcOp); + if (globalsIt != globalsToMove.end()) { + for (auto global : globalsIt->second) { + FieldOp::create(rewriter, funcOp->getLoc(), global.getSymNameAttr(), + global.getTypeAttr(), global.getInitialValueAttr()); + } + } + rewriter.setInsertionPointToEnd(&newClassOp.getBody().front()); FunctionType funcType = funcOp.getFunctionType(); Location loc = funcOp.getLoc(); - FuncOp newFuncOp = - emitc::FuncOp::create(rewriter, loc, (funcName), funcType); + FuncOp newFuncOp = FuncOp::create(rewriter, loc, (funcName), funcType); rewriter.createBlock(&newFuncOp.getBody()); newFuncOp.getBody().takeBody(funcOp.getBody()); @@ -86,7 +119,7 @@ class WrapFuncInClass : public OpRewritePattern { newArguments.reserve(fields.size()); for (auto &[fieldName, attr] : fields) { GetFieldOp arg = - emitc::GetFieldOp::create(rewriter, loc, attr.getValue(), fieldName); + GetFieldOp::create(rewriter, loc, attr.getValue(), fieldName); newArguments.push_back(arg); } @@ -99,6 +132,14 @@ class WrapFuncInClass : public OpRewritePattern { if (failed(newFuncOp.eraseArguments(argsToErase))) newFuncOp->emitOpError("failed to erase all arguments using BitVector"); + newFuncOp.walk([&](GetGlobalOp getGlobalOp) { + rewriter.setInsertionPoint(getGlobalOp); + GetFieldOp getFieldOp = + GetFieldOp::create(rewriter, getGlobalOp.getLoc(), + getGlobalOp.getType(), getGlobalOp.getNameAttr()); + rewriter.replaceOp(getGlobalOp, getFieldOp); + }); + rewriter.replaceOp(funcOp, newClassOp); return success(); } @@ -107,9 +148,14 @@ class WrapFuncInClass : public OpRewritePattern { /// Name of the newly generated member function with body matching the input /// function. std::string funcName; + + /// Map of FuncOp and the GlobalOps it uses which need to be moved into the + /// ClassOp wrapper. + DenseMap> globalsToMove; }; -void mlir::emitc::populateWrapFuncInClass(RewritePatternSet &patterns, - StringRef funcName) { - patterns.add(patterns.getContext(), funcName); +void mlir::emitc::populateWrapFuncInClass( + RewritePatternSet &patterns, StringRef funcName, + DenseMap> &globalsToMove) { + patterns.add(patterns.getContext(), funcName, globalsToMove); } diff --git a/mlir/test/Dialect/EmitC/wrap-func-in-class.mlir b/mlir/test/Dialect/EmitC/wrap-func-in-class.mlir index cb5f99d31e9da..7d5b3d30a64ba 100644 --- a/mlir/test/Dialect/EmitC/wrap-func-in-class.mlir +++ b/mlir/test/Dialect/EmitC/wrap-func-in-class.mlir @@ -58,3 +58,153 @@ module attributes { } { // EXECUTE-NOT: operator // EXECUTE: execute() + +// ----- +// Tests that GlobalOps are moved into the ClassOp wrapper correctly as fields + +module attributes { } { + emitc.global static const @global_arr : !emitc.array<1xi8> = dense<0> + emitc.func @foo() { + %0 = emitc.get_global @global_arr : !emitc.array<1xi8> + emitc.return + } +} + +// CHECK: emitc.class @fooClass { +// CHECK: emitc.field @global_arr : !emitc.array<1xi8> = dense<0> +// CHECK: emitc.func @"operator()"() { +// CHECK: %0 = get_field @global_arr : !emitc.array<1xi8> +// CHECK: return +// CHECK: } +// CHECK: } + +// EXECUTE-NOT: operator +// EXECUTE: execute() + +// ----- +// Tests that only GlobalOps that are used within a function are moved into the +// ClassOp wrapper as fields + +module attributes { } { + emitc.global static const @global_arr : !emitc.array<1xi8> = dense<0> + emitc.global static const @global_arr2 : !emitc.array<1xi8> = dense<0> + emitc.func @foo() { + %0 = emitc.get_global @global_arr : !emitc.array<1xi8> + emitc.return + } +} + +// CHECK: module { +// CHECK-NEXT: emitc.global static const @global_arr2 : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.class @fooClass { +// CHECK-NEXT: emitc.field @global_arr : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } + +// EXECUTE-NOT: operator +// EXECUTE: execute() + +// ----- +// Tests that when multiple functions use different globals, only the used globals +// are moved into their respective ClassOp wrappers as fields. + +module attributes { } { + emitc.global static const @global_arr1 : !emitc.array<1xi8> = dense<0> + emitc.global static const @global_arr2 : !emitc.array<1xi8> = dense<0> + emitc.global static const @global_arr3 : !emitc.array<1xi8> = dense<0> + emitc.func @foo() { + %0 = emitc.get_global @global_arr1 : !emitc.array<1xi8> + emitc.return + } + emitc.func @bar() { + %0 = emitc.get_global @global_arr2 : !emitc.array<1xi8> + emitc.return + } +} + +// CHECK: module { +// CHECK-NEXT: emitc.global static const @global_arr3 : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.class @fooClass { +// CHECK-NEXT: emitc.field @global_arr1 : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr1 : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: emitc.class @barClass { +// CHECK-NEXT: emitc.field @global_arr2 : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr2 : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } + +// EXECUTE-NOT: operator +// EXECUTE: execute() + +// ----- +// Tests that when multiple functions use the same global, the global is moved +// into each ClassOp wrapper as a field and erased from the module. + +module attributes { } { + emitc.global static const @global_arr : !emitc.array<1xi8> = dense<0> + emitc.func @foo() { + %0 = emitc.get_global @global_arr : !emitc.array<1xi8> + emitc.return + } + emitc.func @bar() { + %0 = emitc.get_global @global_arr : !emitc.array<1xi8> + emitc.return + } +} + +// CHECK: module { +// CHECK-NEXT: emitc.class @fooClass { +// CHECK-NEXT: emitc.field @global_arr : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: emitc.class @barClass { +// CHECK-NEXT: emitc.field @global_arr : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } + +// EXECUTE-NOT: operator +// EXECUTE: execute() + +// ----- +// Tests that multiple uses of the same global in a function result in a single field. + +module attributes { } { + emitc.global static const @global_arr : !emitc.array<1xi8> = dense<0> + emitc.func @foo() { + %0 = emitc.get_global @global_arr : !emitc.array<1xi8> + %1 = emitc.get_global @global_arr : !emitc.array<1xi8> + emitc.return + } +} + +// CHECK: module { +// CHECK-NEXT: emitc.class @fooClass { +// CHECK-NEXT: emitc.field @global_arr : !emitc.array<1xi8> = dense<0> +// CHECK-NEXT: emitc.func @"operator()"() { +// CHECK-NEXT: %0 = get_field @global_arr : !emitc.array<1xi8> +// CHECK-NEXT: %1 = get_field @global_arr : !emitc.array<1xi8> +// CHECK-NEXT: return +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: } + +// EXECUTE-NOT: operator +// EXECUTE: execute() From 14d07dd943cc8c2c5de1f1c509fd514ad8df0a5e Mon Sep 17 00:00:00 2001 From: Daniel Hill Date: Mon, 22 Jun 2026 17:15:51 -0400 Subject: [PATCH 089/511] [BOLT] Emit .eh_frame_hdr before .eh_frame to prevent table scans from libdw (#201917) resolves https://github.com/llvm/llvm-project/issues/201701 by moving the `.eh_frame_hdr` section-header entry before `.eh_frame` before assigning final section indices. This lets BOLT binaries work with existing [libdw library logic expecting the index before the table](https://github.com/sourceware-org/elfutils/blob/67199e1c974db37f2bd200dcca7d7103f42ed06e/libdw/dwarf_getcfi_elf.c#L301), preventing linear `.eh_frame` scanning. --- bolt/lib/Rewrite/RewriteInstance.cpp | 16 ++++++++++++++++ bolt/test/eh-frame-hdr.test | 11 ++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp index 73d57c5c95221..5e2ecd66b7f9d 100644 --- a/bolt/lib/Rewrite/RewriteInstance.cpp +++ b/bolt/lib/Rewrite/RewriteInstance.cpp @@ -5104,6 +5104,22 @@ RewriteInstance::getOutputSections(ELFObjectFile *File, addSection(NewSection, Section); } + // Some consumers, including elfutils/libdw, stop scanning section headers + // once they find .eh_frame and only use .eh_frame_hdr if it appeared first. + // Keep layout-driven ordering for size calculations above, but preserve the + // conventional section-header order before assigning final indices. + auto HasOutputName = [](StringRef Name) { + return [Name](const auto &SectionKV) { + return SectionKV.first && SectionKV.first->getOutputName() == Name; + }; + }; + auto EHFrameHdrIt = + llvm::find_if(OutputSections, HasOutputName(getEHFrameHdrSectionName())); + auto EHFrameIt = llvm::find_if(OutputSections, HasOutputName(".eh_frame")); + if (EHFrameHdrIt != OutputSections.end() && + EHFrameIt != OutputSections.end() && EHFrameIt < EHFrameHdrIt) + std::rotate(EHFrameIt, EHFrameHdrIt, std::next(EHFrameHdrIt)); + // Assign indices to sections. for (uint32_t Index = 1; Index < OutputSections.size(); ++Index) OutputSections[Index].first->setIndex(Index); diff --git a/bolt/test/eh-frame-hdr.test b/bolt/test/eh-frame-hdr.test index 4d718c850e2f2..51f4100718bad 100644 --- a/bolt/test/eh-frame-hdr.test +++ b/bolt/test/eh-frame-hdr.test @@ -1,4 +1,6 @@ -# Check that llvm-bolt overwrites .eh_frame_hdr in-place. +# Check that llvm-bolt overwrites .eh_frame_hdr in-place and keeps the +# .eh_frame_hdr section header before .eh_frame when it has to create a new +# header. REQUIRES: system-linux @@ -6,7 +8,14 @@ RUN: %clang %cflags %p/Inputs/hello.c -o %t -Wl,-q RUN: llvm-bolt %t -o %t.bolt --use-old-text \ RUN: | FileCheck %s --check-prefix=CHECK-BOLT RUN: llvm-readelf -WS %t.bolt | FileCheck %s +RUN: touch %t.empty +RUN: llvm-objcopy --update-section .eh_frame_hdr=%t.empty %t %t.small-hdr +RUN: llvm-bolt %t.small-hdr -o %t.small-hdr.bolt +RUN: llvm-readelf -WS %t.small-hdr.bolt | FileCheck %s --check-prefix=ORDER CHECK-BOLT: rewriting .eh_frame_hdr in-place CHECK-NOT: .bolt.org.eh_frame_hdr + +ORDER: ] .eh_frame_hdr +ORDER-NEXT: ] .eh_frame From 00aa7e835312bd6964d7f9a3ebb473eec1e6f3ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 22 Jun 2026 14:27:33 -0700 Subject: [PATCH 090/511] [flang][cuda] Do not emit data transfer for constant read on the rhs (#205185) --- flang/include/flang/Evaluate/tools.h | 27 ++++++++++++++++++++ flang/lib/Evaluate/tools.cpp | 3 +-- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 14 +++++++++- 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index d2d0b69e6337d..08468f304914b 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1321,6 +1321,15 @@ inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) { return false; } +inline bool IsCUDAConstantSymbol(const Symbol &sym) { + if (const auto *details = + sym.GetUltimate().detailsIf()) { + return details->cudaDataAttr() && + (*details->cudaDataAttr() == common::CUDADataAttr::Constant); + } + return false; +} + // Non-allocatable module-level managed/unified variables use pointer // indirection through a companion global in __nv_managed_data__. // Explicit data transfers (cudaMemcpy) must be avoided for these @@ -1371,6 +1380,16 @@ inline int GetNbOfCUDAManagedOrUnifiedSymbols(const A &expr) { return symbols.size(); } +template inline int GetNbOfCUDAConstantSymbols(const A &expr) { + semantics::UnorderedSymbolSet symbols; + for (const Symbol &sym : CollectCudaSymbols(expr)) { + if (IsCUDAConstantSymbol(sym)) { + symbols.insert(sym); + } + } + return symbols.size(); +} + // Check if any of the symbols part of the expression has a CUDA device // attribute. template inline bool HasCUDADeviceAttrs(const A &expr) { @@ -1383,11 +1402,19 @@ template inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) { int lhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(lhs)}; int rhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(rhs)}; + int rhsNbConstantSymbols{GetNbOfCUDAConstantSymbols(rhs)}; int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)}; if (HasNonAllocatableModuleCUDAManagedSymbols(lhs)) return false; + // If only constant symbols are present on the rhs, and no device symbols on + // the lhs, then no data transfer is needed because the constant have a host + // value. + if (rhsNbConstantSymbols == rhsNbSymbols && !HasCUDADeviceAttrs(lhs)) { + return false; + } + if (lhsNbManagedSymbols >= 1 && lhs.Rank() > 0 && rhsNbSymbols == 0 && rhsNbManagedSymbols == 0 && (IsVariable(rhs) || IsConstantExpr(rhs))) { return true; // Managed arrays initialization is performed on the device. diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index 82dcd1e795f49..a60e36654ca34 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1210,8 +1210,7 @@ bool IsCUDADeviceOnlySymbol(const Symbol &sym) { if (const auto *details = sym.GetUltimate().detailsIf()) { return details->cudaDataAttr() && - (*details->cudaDataAttr() == common::CUDADataAttr::Device || - *details->cudaDataAttr() == common::CUDADataAttr::Constant); + (*details->cudaDataAttr() == common::CUDADataAttr::Device); } return false; } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index a1006437485ca..f236e829072ee 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -3,6 +3,8 @@ ! Test CUDA Fortran data transfer using assignment statements. module mod1 + real, constant :: c1 = 1.0 + type :: t1 integer :: i end type @@ -495,7 +497,7 @@ subroutine sub25() end ! CHECK-LABEL: func.func @_QPsub25() -! CHECK: fir.allocmem !fir.array, %15#1 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp", uniq_name = ""} ! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer} : !fir.ref>>>, !fir.box> ! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref ! CHECK: fir.freemem %{{.*}} : !fir.heap> @@ -724,3 +726,13 @@ subroutine sub41() lm(1:5) = a%m(1:5) end subroutine + +subroutine sub42() + use mod1 + real :: a + a = c1 * c1 +end subroutine + +! CHECK-LABEL: func.func @_QPsub42() +! CHECK-NOT: cuf.data_transfer +! CHECK: hlfir.assign From 12fba5cd1f37d36672be5129332bd3c8a7693f13 Mon Sep 17 00:00:00 2001 From: Eugene Epshteyn Date: Mon, 22 Jun 2026 17:35:32 -0400 Subject: [PATCH 091/511] [flang][Semantics] Do not require explicit interface checks for statement functions (#205023) https://github.com/llvm/llvm-project/pull/198610 caused a regression, where the code path for explicit interface checks was also used for the statement functions arg check. Refactor the code to avoid explicit interface checks. Fixes #203500 Assisted-by: AI --- flang/lib/Semantics/check-call.cpp | 9 +++++++-- flang/lib/Semantics/check-call.h | 7 ++++++- flang/lib/Semantics/expression.cpp | 8 ++++---- flang/test/Semantics/call47.f90 | 14 ++++++++++++++ 4 files changed, 31 insertions(+), 7 deletions(-) diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index dcd7dc4288f6f..97bb346cc72bb 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -27,8 +27,8 @@ namespace characteristics = Fortran::evaluate::characteristics; namespace Fortran::semantics { -void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, - parser::ContextualMessages &messages, SemanticsContext &context) { +void CheckImplicitInterfaceArgKeywords( + const evaluate::ActualArgument &arg, parser::ContextualMessages &messages) { auto restorer{ messages.SetLocation(arg.sourceLocation().value_or(messages.at()))}; if (auto kw{arg.keyword()}) { @@ -36,6 +36,11 @@ void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, "Keyword '%s=' may not appear in a reference to a procedure with an implicit interface"_err_en_US, *kw); } +} + +void CheckImplicitInterfaceArg(evaluate::ActualArgument &arg, + parser::ContextualMessages &messages, SemanticsContext &context) { + CheckImplicitInterfaceArgKeywords(arg, messages); auto type{arg.GetType()}; if (type) { if (type->IsAssumedType()) { diff --git a/flang/lib/Semantics/check-call.h b/flang/lib/Semantics/check-call.h index fb021d23dabc8..cdfecd58423a7 100644 --- a/flang/lib/Semantics/check-call.h +++ b/flang/lib/Semantics/check-call.h @@ -25,8 +25,13 @@ namespace Fortran::semantics { class Scope; class SemanticsContext; +// Check keyword constraints on actual arguments for procedures with implicit +// interfaces. +void CheckImplicitInterfaceArgKeywords( + const evaluate::ActualArgument &, parser::ContextualMessages &); + // Check constraints on actual arguments for procedures with implicit -// interfaces. Used for statement function calls and external procedures. +// interfaces. void CheckImplicitInterfaceArg(evaluate::ActualArgument &, parser::ContextualMessages &, SemanticsContext &); diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 709292e62f97c..6c0a21cc769c1 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3791,7 +3791,6 @@ std::optional ExpressionAnalyzer::CheckCall( bool treatExternalAsImplicit{ IsExternalCalledImplicitly(callSite, proc.GetSymbol())}; const Symbol *procSymbol{proc.GetSymbol()}; - // Statement functions have implicit interfaces and require the same checks bool isStatementFunction{ procSymbol && procSymbol->flags().test(Symbol::Flag::StmtFunction)}; std::optional chars; @@ -3864,13 +3863,14 @@ std::optional ExpressionAnalyzer::CheckCall( } } if (isStatementFunction) { - // Statement functions have implicit interfaces; check for - // keyword arguments and other implicit interface constraints + // Statement functions have implicit interfaces, so keyword actual + // arguments are not allowed. They are exempt from the explicit-interface + // requirements of F2023 15.4.2.2. parser::ContextualMessages &messages{ context_.foldingContext().messages()}; for (auto &arg : arguments) { if (arg) { - semantics::CheckImplicitInterfaceArg(*arg, messages, context_); + semantics::CheckImplicitInterfaceArgKeywords(*arg, messages); } } } diff --git a/flang/test/Semantics/call47.f90 b/flang/test/Semantics/call47.f90 index 0c6e9071f33f0..ddd80129be8f7 100644 --- a/flang/test/Semantics/call47.f90 +++ b/flang/test/Semantics/call47.f90 @@ -24,3 +24,17 @@ program test_stmt_func_keyword !ERROR: Keyword 'y=' may not appear in a reference to a procedure with an implicit interface c = f2(x=10, y=20) end program + +! A parameterized derived type actual argument to a statement function does not +! require an explicit interface, so the reference below is valid (no error). +subroutine pdt_actual_to_stmt_func() + type t(k) + integer, kind :: k = 1 + end type + + type(t) :: x + integer :: f + + f(x) = 0 + print *, f(x) +end subroutine From 5b81236db407871fc56b38b712a4f05f98d6c52a Mon Sep 17 00:00:00 2001 From: adams381 Date: Mon, 22 Jun 2026 16:35:39 -0500 Subject: [PATCH 092/511] [CIR] DataMemberAttr: replace flat index with GEP-style path (#200854) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `DataMemberAttr` stored a single field index relative to the immediately containing class, which broke when the member is inherited: `int Derived::*p = &Derived::x` with `x` in Base produced a spurious `errorNYI` because Derived's CIR record doesn't directly hold `x`. The attribute now stores a GEP-style `member_path` — a sequence of CIR field indices stepping from the pointer's class type down to the member, one level per inheritance hop. `lowerDataMemberConstant` walks the path accumulating element offsets to produce the Itanium ABI byte value. `buildMemberPath` searches the `destClass` record tree for the target field (`findFieldMemberPath`). `CK_BaseToDerivedMemberPointer` and `CK_DerivedToBaseMemberPointer` return `{}` in ConstExprEmitter, delegating to the APValue path which builds the correct path via `buildMemberPath`. Virtual bases are not yet handled. `CK_ReinterpretMemberPointer` remains `errorNYI` on this branch; a follow-up PR will add that separately. --- .../include/clang/CIR/Dialect/IR/CIRAttrs.td | 38 +++++---- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 4 +- clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 22 +++--- clang/lib/CIR/CodeGen/CIRGenModule.cpp | 77 ++++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenModule.h | 11 +++ clang/lib/CIR/Dialect/IR/CIRAttrs.cpp | 68 ++++++++++++---- .../TargetLowering/LowerItaniumCXXABI.cpp | 13 +++- clang/test/CIR/CodeGen/nonzeroinit-struct.cpp | 4 +- .../CIR/CodeGen/pointer-to-data-member.cpp | 63 ++++++++++++++- clang/test/CIR/IR/invalid-data-member.cir | 13 ++-- 10 files changed, 255 insertions(+), 58 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index e3c992ed327ac..356fac33a5733 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -538,45 +538,55 @@ def CIR_DataMemberAttr : CIR_ValueLikeAttr<"DataMember", "data_member"> { let parameters = (ins AttributeSelfTypeParameter< "", "cir::DataMemberType">:$type, OptionalParameter< - "std::optional">:$member_index); + "mlir::DenseI32ArrayAttr">:$member_path); let description = [{ A data member attribute is a literal attribute that represents a constant pointer-to-data-member value. - The `member_index` parameter represents the index of the pointed-to member - within its containing record. It is an optional parameter; lack of this - parameter indicates a null pointer-to-data-member value. + The `member_path` parameter is a GEP-like sequence of field indices + navigating from `classTy` down to the pointed-to member. An absent + `member_path` represents a null pointer-to-data-member. - Example: + Examples: ``` - #ptr = #cir.data_member<1> : !cir.data_member + // int Point::*p = &Point::z (z is field 2) + #cir.data_member<[2]> : !cir.data_member - #null = #cir.data_member : !cir.data_member + // int Derived::*p = &Derived::x (Base subobject at [0], x at [0]) + #cir.data_member<[0, 0]> : !cir.data_member + + // null + #cir.data_member : !cir.data_member ``` }]; let builders = [ + // Null pointer-to-data-member. AttrBuilderWithInferredContext<(ins "cir::DataMemberType":$type), [{ - return $_get(type.getContext(), type, std::nullopt); + return $_get(type.getContext(), type, mlir::DenseI32ArrayAttr{}); }]>, + // Non-null pointer-to-data-member with an explicit field-index path. AttrBuilderWithInferredContext<(ins "cir::DataMemberType":$type, - "unsigned":$member_index), [{ - return $_get(type.getContext(), type, member_index); + "llvm::ArrayRef":$path), [{ + return $_get(type.getContext(), type, + mlir::DenseI32ArrayAttr::get(type.getContext(), path)); }]>, ]; - // This attribute gets lowered during CXXABILowering + // This attribute gets lowered during CXXABILowering. let hasAttrToValueLowering = 0; let genVerifyDecl = 1; let assemblyFormat = [{ - `<` ($member_index^):(`null`)? `>` + `<` custom($member_path) `>` }]; let extraClassDeclaration = [{ - bool isNullPtr() const { - return !getMemberIndex().has_value(); + bool isNullPtr() const { return !getMemberPath(); } + llvm::ArrayRef getPath() const { + assert(!isNullPtr() && "getPath() called on null data member pointer"); + return getMemberPath().asArrayRef(); } }]; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 3188b096579be..3204ba1a319f0 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -264,8 +264,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { // --------------------------- cir::DataMemberAttr getDataMemberAttr(cir::DataMemberType ty, - unsigned memberIndex) { - return cir::DataMemberAttr::get(ty, memberIndex); + llvm::ArrayRef path) { + return cir::DataMemberAttr::get(ty, path); } cir::DataMemberAttr getNullDataMemberAttr(cir::DataMemberType ty) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index 5208af44412a3..8ee29484ce64b 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -934,9 +934,14 @@ class ConstExprEmitter case CK_ToUnion: case CK_AddressSpaceConversion: case CK_ReinterpretMemberPointer: + cgm.errorNYI(e->getBeginLoc(), "ConstExprEmitter::VisitCastExpr"); + return {}; + case CK_DerivedToBaseMemberPointer: case CK_BaseToDerivedMemberPointer: - cgm.errorNYI(e->getBeginLoc(), "ConstExprEmitter::VisitCastExpr"); + // Return {} to let the APValue evaluator handle member pointer type + // conversions. The APValue::MemberPointer case in tryEmitPrivate + // already builds the correct GEP path for cross-class member pointers. return {}; case CK_LValueToRValue: @@ -1951,12 +1956,6 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, if (!memberDecl) return builder.getZeroInitAttr(cgm.convertType(destType)); - if (value.isMemberPointerToDerivedMember()) { - cgm.errorNYI( - "ConstExprEmitter::tryEmitPrivate member pointer to derived member"); - return {}; - } - if (auto const *cxxDecl = dyn_cast(memberDecl)) { auto ty = mlir::cast(cgm.convertType(destType)); if (cxxDecl->isVirtual()) @@ -1968,9 +1967,14 @@ mlir::Attribute ConstantEmitter::tryEmitPrivate(const APValue &value, } auto cirTy = mlir::cast(cgm.convertType(destType)); - const auto *fieldDecl = cast(memberDecl); - return builder.getDataMemberAttr(cirTy, fieldDecl->getFieldIndex()); + const auto *mpt = destType->castAs(); + const auto *destClass = mpt->getMostRecentCXXRecordDecl(); + std::optional> path = + cgm.buildMemberPath(destClass, fieldDecl); + if (!path) + return {}; + return builder.getDataMemberAttr(cirTy, *path); } case APValue::LValue: return ConstantLValueEmitter(*this, value, destType).tryEmit(); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index b377f84e8d370..0897a3a897580 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -2277,8 +2277,81 @@ mlir::Value CIRGenModule::emitMemberPointerConstant(const UnaryOperator *e) { // Otherwise, a member data pointer. auto ty = mlir::cast(convertType(e->getType())); const auto *fieldDecl = cast(decl); - return cir::ConstantOp::create( - builder, loc, builder.getDataMemberAttr(ty, fieldDecl->getFieldIndex())); + const auto *mpt = e->getType()->castAs(); + const auto *destClass = mpt->getMostRecentCXXRecordDecl(); + std::optional> path = + buildMemberPath(destClass, fieldDecl); + if (!path) + return {}; + return cir::ConstantOp::create(builder, loc, + builder.getDataMemberAttr(ty, *path)); +} + +std::optional> +CIRGenModule::buildMemberPath(const CXXRecordDecl *destClass, + const FieldDecl *field) { + llvm::SmallVector path; + if (!findFieldMemberPath(destClass, field, path)) + return std::nullopt; + return path; +} + +bool CIRGenModule::findFieldMemberPath(const CXXRecordDecl *currentClass, + const FieldDecl *field, + llvm::SmallVectorImpl &path) { + const CIRGenRecordLayout &layout = + getTypes().getCIRGenRecordLayout(currentClass); + + // The field is declared directly in this class. + if (field->getParent() == currentClass) { + int32_t fieldIdx; + if (currentClass->isUnion()) { + // For unions, getCIRFieldNo always returns 0 for every union member (all + // members share offset 0 in the CIR record). Use the declaration-order + // index to distinguish members with the same type at the same offset. + if (!layout.isZeroInitializable()) { + errorNYI(field->getLocation(), + "data member pointer for non-zero-initializable union"); + return false; + } + fieldIdx = static_cast(field->getFieldIndex()); + } else { + fieldIdx = static_cast(layout.getCIRFieldNo(field)); + } + path.push_back(fieldIdx); + return true; + } + + // Otherwise search the base subobjects. A virtual base only blocks lowering + // when the field actually lives within it; a virtual base elsewhere in the + // hierarchy must not stop us from reaching a member through a non-virtual + // path. + for (const CXXBaseSpecifier &base : currentClass->bases()) { + const auto *baseDecl = + cast(base.getType()->getAsRecordDecl()); + + if (base.isVirtual()) { + // A pointer to a data member that traverses a virtual base is ill-formed, + // so this guard only fires defensively if the member is reached through + // the virtual base. An unrelated virtual base is skipped so it does not + // block members reached through a non-virtual path. + llvm::SmallVector discardedPath; + if (findFieldMemberPath(baseDecl, field, discardedPath)) { + errorNYI(field->getLocation(), + "data member pointer through virtual base"); + return false; + } + continue; + } + + auto baseFieldIdx = + static_cast(layout.getNonVirtualBaseCIRFieldNo(baseDecl)); + path.push_back(baseFieldIdx); + if (findFieldMemberPath(baseDecl, field, path)) + return true; + path.pop_back(); + } + return false; } void CIRGenModule::emitDeclContext(const DeclContext *dc) { diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index 0883a6b891c61..ed956b7585f95 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -722,6 +722,11 @@ class CIRGenModule : public CIRGenTypeCache { /// member, depending on the type of mpt. mlir::TypedAttr emitNullMemberAttr(QualType t, const MemberPointerType *mpt); + /// Build a GEP-style field-index path from \p destClass to \p field. + /// Returns std::nullopt and emits errorNYI for virtual-base paths. + std::optional> + buildMemberPath(const CXXRecordDecl *destClass, const FieldDecl *field); + llvm::StringRef getMangledName(clang::GlobalDecl gd); // This function is to support the OpenACC 'bind' clause, which names an // alternate name for the function to be called by. This function mangles @@ -930,6 +935,12 @@ class CIRGenModule : public CIRGenTypeCache { void addGlobalAnnotations(const clang::ValueDecl *d, mlir::Operation *gv); private: + /// Search \p currentClass and its non-virtual base subobjects for \p field, + /// appending CIR field indices along the path from \p currentClass. + bool findFieldMemberPath(const CXXRecordDecl *currentClass, + const FieldDecl *field, + llvm::SmallVectorImpl &path); + // An ordered map of canonical GlobalDecls to their mangled names. llvm::MapVector mangledDeclNames; llvm::StringMap manglings; diff --git a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp index 9b1593ffde154..eba5aec0ca142 100644 --- a/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRAttrs.cpp @@ -70,6 +70,13 @@ static mlir::ParseResult parseConstPtr(mlir::AsmParser &parser, static void printConstPtr(mlir::AsmPrinter &p, mlir::IntegerAttr value); +static mlir::ParseResult +parseDataMemberPath(mlir::AsmParser &parser, + mlir::DenseI32ArrayAttr &memberPath); + +static void printDataMemberPath(mlir::AsmPrinter &p, + mlir::DenseI32ArrayAttr memberPath); + #define GET_ATTRDEF_CLASSES #include "clang/CIR/Dialect/IR/CIROpsAttributes.cpp.inc" @@ -262,6 +269,26 @@ static void printConstPtr(AsmPrinter &p, mlir::IntegerAttr value) { p << value; } +static ParseResult parseDataMemberPath(AsmParser &parser, + mlir::DenseI32ArrayAttr &memberPath) { + if (parser.parseOptionalKeyword("null").succeeded()) + return success(); + + auto parsed = mlir::FieldParser::parse(parser); + if (mlir::failed(parsed)) + return failure(); + memberPath = *parsed; + return success(); +} + +static void printDataMemberPath(AsmPrinter &p, + mlir::DenseI32ArrayAttr memberPath) { + if (!memberPath) + p << "null"; + else + p.printStrippedAttrOrType(memberPath); +} + //===----------------------------------------------------------------------===// // IntAttr definitions //===----------------------------------------------------------------------===// @@ -509,27 +536,34 @@ Attribute CUDAVarRegistrationInfoAttr::parse(AsmParser &parser, Type odsType) { LogicalResult DataMemberAttr::verify(function_ref emitError, cir::DataMemberType ty, - std::optional memberIndex) { - // DataMemberAttr without a given index represents a null value. - if (!memberIndex.has_value()) - return success(); + mlir::DenseI32ArrayAttr memberPath) { + if (!memberPath) + return success(); // null pointer — always valid - cir::RecordType recTy = ty.getClassTy(); - if (recTy.isIncomplete()) - return emitError() - << "incomplete 'cir.record' cannot be used to build a non-null " - "data member pointer"; + if (memberPath.empty()) + return emitError() << "#cir.data_member path must not be empty"; - unsigned memberIndexValue = memberIndex.value(); - if (memberIndexValue >= recTy.getNumElements()) - return emitError() - << "member index of a #cir.data_member attribute is out of range"; + mlir::Type currentTy = ty.getClassTy(); + for (auto [step, idx] : llvm::enumerate(memberPath.asArrayRef())) { + auto recTy = mlir::dyn_cast(currentTy); + if (!recTy) + return emitError() << "#cir.data_member path step " << step + << " reaches a non-record type"; + + if (recTy.isIncomplete()) + return success(); // cannot validate further; trust the builder + + if (idx < 0 || static_cast(idx) >= recTy.getNumElements()) + return emitError() << "#cir.data_member path index " << idx << " at step " + << step << " is out of range"; + + currentTy = recTy.getMembers()[idx]; + } - mlir::Type memberTy = recTy.getMembers()[memberIndexValue]; - if (memberTy != ty.getMemberTy()) + if (currentTy != ty.getMemberTy()) return emitError() - << "member type of a #cir.data_member attribute must match the " - "attribute type"; + << "member type of a #cir.data_member attribute must match " + "the attribute type"; return success(); } diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp index 0e246c8612f25..cccbe70876c3f 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp @@ -202,10 +202,15 @@ mlir::TypedAttr LowerItaniumCXXABI::lowerDataMemberConstant( } else { // Itanium C++ ABI 2.3: // A pointer to data member is an offset from the base address of - // the class object containing it, represented as a ptrdiff_t - unsigned memberIndex = attr.getMemberIndex().value(); - memberOffset = - attr.getType().getClassTy().getElementOffset(layout, memberIndex); + // the class object containing it, represented as a ptrdiff_t. + // Walk the GEP-style path, accumulating the byte offset at each step. + memberOffset = 0; + mlir::Type currentTy = attr.getType().getClassTy(); + for (int32_t idx : attr.getPath()) { + auto recTy = mlir::cast(currentTy); + memberOffset += static_cast(recTy.getElementOffset(layout, idx)); + currentTy = recTy.getMembers()[idx]; + } } mlir::Type abiTy = lowerDataMemberType(attr.getType(), typeConverter); diff --git a/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp b/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp index e945f5147bcf3..3540ad0d71d2f 100644 --- a/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp +++ b/clang/test/CIR/CodeGen/nonzeroinit-struct.cpp @@ -55,7 +55,7 @@ Trivial t; // LLVM-DAG: @t = global %struct.Trivial { i32 0, double 0.000000e+00, i64 -1 }, align 8 Trivial t_init{1,2.2, &Other::x}; -// CIR-BEFORE-DAG: cir.global external @t_init = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<2.200000e+00> : !cir.double, #cir.data_member<0> : !cir.data_member}> : !rec_Trivial {alignment = 8 : i64} +// CIR-BEFORE-DAG: cir.global external @t_init = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<2.200000e+00> : !cir.double, #cir.data_member<[0]> : !cir.data_member}> : !rec_Trivial {alignment = 8 : i64} // CIR-AFTER-DAG: cir.global external @t_init = #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<2.200000e+00> : !cir.double, #cir.int<0> : !s64i}> : !rec_Trivial {alignment = 8 : i64} // LLVM-DAG: @t_init = global %struct.Trivial { i32 1, double 2.200000e+00, i64 0 }, align 8 @@ -84,7 +84,7 @@ extern "C" void local() { // LLVM: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}%[[MPT_INIT]], ptr {{.*}}@__const.local.localMpt_init, i64 32, i1 false) Trivial localT_init{1,2.2, &Other::x}; - // CIR-BEFORE: %[[T_INIT_VAL:.*]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<2.200000e+00> : !cir.double, #cir.data_member<0> : !cir.data_member}> : !rec_Trivial + // CIR-BEFORE: %[[T_INIT_VAL:.*]] = cir.const #cir.const_record<{#cir.int<1> : !s32i, #cir.fp<2.200000e+00> : !cir.double, #cir.data_member<[0]> : !cir.data_member}> : !rec_Trivial // CIR-BEFORE: cir.store align(8) %[[T_INIT_VAL]], %[[T_INIT]] : !rec_Trivial, !cir.ptr // CIR-AFTER: %[[T_INIT_VAL:.*]] = cir.get_global @__const.local.localT_init : !cir.ptr diff --git a/clang/test/CIR/CodeGen/pointer-to-data-member.cpp b/clang/test/CIR/CodeGen/pointer-to-data-member.cpp index b710f26f96ba7..957bc1131f403 100644 --- a/clang/test/CIR/CodeGen/pointer-to-data-member.cpp +++ b/clang/test/CIR/CodeGen/pointer-to-data-member.cpp @@ -19,7 +19,7 @@ int Point::*ptr_none = nullptr; // OGCG: @ptr_none = global i64 -1 int Point::*pt_member = &Point::z; -// CIR-BEFORE: cir.global external @pt_member = #cir.data_member<2> : !cir.data_member +// CIR-BEFORE: cir.global external @pt_member = #cir.data_member<[2]> : !cir.data_member // CIR-AFTER: cir.global external @pt_member = #cir.int<8> : !s64i // LLVM: @pt_member = global i64 8 // OGCG: @pt_member = global i64 8 @@ -41,20 +41,79 @@ int Point::*pt_member_nested_region = test1(); // CIR-AFTER: %[[MEMBER_PTR_ADDR:.*]] = cir.get_global @pt_member_nested_region : !cir.ptr // CIR-AFTER: %[[MEMBER_PTR:.*]] = cir.call @_Z5test1v() : () -> !s64i // CIR-AFTER: cir.store align(8) %[[MEMBER_PTR]], %[[MEMBER_PTR_ADDR]] : !s64i, !cir.ptr +// CIR-AFTER: cir.global external @p_inherit_single = #cir.int<0> : !s64i +// CIR-AFTER: cir.global external @p_multi_inherit = #cir.int<8> : !s64i +// CIR-AFTER: cir.global external @p_depth3 = #cir.int<8> : !s64i +// CIR-AFTER: cir.global external @p_vbase_sibling_nv = #cir.int<8> : !s64i +// CIR-AFTER: cir.global external @p_vbase_sibling_direct = #cir.int<12> : !s64i // LLVM: @pt_member_nested_region = global i64 -1, align 8 +// LLVM: @p_inherit_single = global i64 0, align 8 +// LLVM: @p_multi_inherit = global i64 8, align 8 +// LLVM: @p_depth3 = global i64 8, align 8 +// LLVM: @p_vbase_sibling_nv = global i64 8, align 8 +// LLVM: @p_vbase_sibling_direct = global i64 12, align 8 // LLVM: define internal void @__cxx_global_var_init() // LLVM: %[[MEMBER_PTR:.*]] = call i64 @_Z5test1v() // LLVM: store i64 %[[MEMBER_PTR]], ptr @pt_member_nested_region, align 8 // OGCG: @pt_member_nested_region = global i64 -1, align 8 +// OGCG: @p_inherit_single = global i64 0, align 8 +// OGCG: @p_multi_inherit = global i64 8, align 8 +// OGCG: @p_depth3 = global i64 8, align 8 +// OGCG: @p_vbase_sibling_nv = global i64 8, align 8 +// OGCG: @p_vbase_sibling_direct = global i64 12, align 8 // OGCG emits __cxx_global_var_init between test1() and test2(). See checks below. +struct InheritBase { + int x; +}; + +struct InheritDerived : InheritBase { + double y; +}; + +int InheritDerived::*p_inherit_single = &InheritDerived::x; +// CIR-BEFORE: cir.global external @p_inherit_single = #cir.data_member<[0, 0]> : !cir.data_member + +struct MultiA { + double a; +}; + +struct MultiB { + int b; +}; + +struct MultiM : MultiA, MultiB {}; + +int MultiM::*p_multi_inherit = &MultiM::b; +// CIR-BEFORE: cir.global external @p_multi_inherit = #cir.data_member<[1, 0]> : !cir.data_member + +struct DepthA { double a; int x; }; +struct DepthB : DepthA { double y; }; +struct DepthC : DepthB { float z; }; + +int DepthC::*p_depth3 = &DepthC::x; +// CIR-BEFORE: cir.global external @p_depth3 = #cir.data_member<[0, 0, 1]> : !cir.data_member + +struct VBaseField { int v; }; +struct NVBaseField { int x; }; +struct Diamond : virtual VBaseField, NVBaseField { int d; }; + +// A virtual base elsewhere in the hierarchy must not block a member reached +// through a non-virtual base. +int Diamond::*p_vbase_sibling_nv = &NVBaseField::x; +// CIR-BEFORE: cir.global external @p_vbase_sibling_nv = #cir.data_member<[1, 0]> : !cir.data_member + +// A direct member of a class with a virtual base still resolves. +int Diamond::*p_vbase_sibling_direct = &Diamond::d; +// CIR-BEFORE: cir.global external @p_vbase_sibling_direct = #cir.data_member<[2]> : !cir.data_member + // Checks for test1() // CIR-BEFORE: cir.func {{.*}} @_Z5test1v() -> !cir.data_member attributes {{{.*}}nothrow} { // CIR-BEFORE: %[[RETVAL:.*]] = cir.alloca "__retval" {{.*}} : !cir.ptr> -// CIR-BEFORE: %[[MEMBER:.*]] = cir.const #cir.data_member<1> : !cir.data_member +// CIR-BEFORE: %[[MEMBER:.*]] = cir.const #cir.data_member<[1]> : !cir.data_member // CIR-BEFORE: cir.store %[[MEMBER]], %[[RETVAL]] : !cir.data_member, !cir.ptr> // CIR-BEFORE: %[[RET:.*]] = cir.load %[[RETVAL]] : !cir.ptr>, !cir.data_member // CIR-BEFORE: cir.return %[[RET]] : !cir.data_member diff --git a/clang/test/CIR/IR/invalid-data-member.cir b/clang/test/CIR/IR/invalid-data-member.cir index 97bde1bc42288..d072d164d17b1 100644 --- a/clang/test/CIR/IR/invalid-data-member.cir +++ b/clang/test/CIR/IR/invalid-data-member.cir @@ -7,15 +7,16 @@ !struct1 = !cir.struct<"Struct1" {!u16i, !u32i}> // expected-error@+1 {{member type of a #cir.data_member attribute must match the attribute type}} -#invalid_member_ty = #cir.data_member<0> : !cir.data_member +#invalid_member_ty = #cir.data_member<[0]> : !cir.data_member // ----- !u16i = !cir.int -!incomplete_struct = !cir.struct<"Incomplete" incomplete> +!u32i = !cir.int +!struct1 = !cir.struct<"Struct1" {!u16i, !u32i}> -// expected-error@+1 {{incomplete 'cir.record' cannot be used to build a non-null data member pointer}} -#incomplete_cls_member = #cir.data_member<0> : !cir.data_member +// expected-error@+1 {{#cir.data_member path index 2 at step 0 is out of range}} +#invalid_member_ty = #cir.data_member<[2]> : !cir.data_member // ----- @@ -23,8 +24,8 @@ !u32i = !cir.int !struct1 = !cir.struct<"Struct1" {!u16i, !u32i}> -// expected-error@+1 {{member index of a #cir.data_member attribute is out of range}} -#invalid_member_ty = #cir.data_member<2> : !cir.data_member +// expected-error@+1 {{#cir.data_member path must not be empty}} +#empty_path = #cir.data_member<[]> : !cir.data_member // ----- From 7c75868c1ea127ae13a2b9eca24820d252d48435 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Mon, 22 Jun 2026 14:53:24 -0700 Subject: [PATCH 093/511] [flang] Reduce FIR AA overhead for functions with one scope. (#204009) Avoid overheads of collectScopedOrigins and getDeclarationScope/DominanceInfo when the values passed to FIR AA belong to a function with a single dummy scope. --- .../flang/Optimizer/Analysis/AliasAnalysis.h | 15 ++++++ .../lib/Optimizer/Analysis/AliasAnalysis.cpp | 52 +++++++++++++++++-- .../alias-analysis-host-assoc.fir | 9 ++-- .../alias-analysis-scoped-origins.fir | 37 +++++++++---- 4 files changed, 96 insertions(+), 17 deletions(-) diff --git a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h index fa4a673683df4..832634a708dba 100644 --- a/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h +++ b/flang/include/flang/Optimizer/Analysis/AliasAnalysis.h @@ -359,6 +359,16 @@ struct AliasAnalysis { /// POINTER object or a raw fir::PointerType. static bool isPointerReference(mlir::Type ty); + /// Return true if the function containing \p v has more than one + /// fir.dummy_scope op (e.g. the function body has been inlined into). + /// Scope-aware disambiguation in alias(lhs, rhs) is only meaningful in + /// that case; skipping it for functions with just one scope avoids the + /// getDeclarationScope/DominanceInfo overhead in getSource. + /// Both true and false results are cached in multiScopeCache so the + /// function walk is paid at most once per funcOp per AliasAnalysis + /// instance. + bool functionHasMultipleScopes(mlir::Value v); + private: /// Build an intermediate Source rooted at the declare captured by the /// snapshot. Reuses getSource(declValue) for the SourceKind / origin @@ -431,6 +441,11 @@ struct AliasAnalysis { domInfoCache; llvm::DenseMap> sortedScopeCache; + /// Per-function cache: true iff the function contains more than one + /// fir.dummy_scope op (i.e. has been inlined into). Populated by + /// functionHasMultipleScopes(); both true and false are cached so that + /// repeated queries are O(1) without re-walking the function body. + llvm::DenseMap multiScopeCache; }; inline bool operator==(const AliasAnalysis::Source::SourceOrigin &lhs, diff --git a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp index 838fcffc5fa66..a30d54841dd2e 100644 --- a/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp +++ b/flang/lib/Optimizer/Analysis/AliasAnalysis.cpp @@ -599,9 +599,17 @@ static mlir::Value getZeroOffsetViewRoot(mlir::Value val) { AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) { // A wrapper around alias(Source lhsSrc, Source rhsSrc, mlir::Value lhs, // mlir::Value rhs) This allows a user to provide Source that may be obtained - // through other dialects - auto lhsSrc = getSource(lhs); - auto rhsSrc = getSource(rhs); + // through other dialects. + // + // Scope-aware refinement is only meaningful after inlining, when the + // function contains more than one fir.dummy_scope op. Skip + // collectScopedOrigins and the scope-pair loop for non-inlined functions + // to avoid the per-query getDeclarationScope/DominanceInfo overhead. + bool multiScopes = functionHasMultipleScopes(lhs); + auto lhsSrc = + getSource(lhs, /*getLastInstantiationPoint=*/false, multiScopes); + auto rhsSrc = + getSource(rhs, /*getLastInstantiationPoint=*/false, multiScopes); AliasResult result = alias(lhsSrc, rhsSrc, lhs, rhs); // Scope-aware refinement after inlining: if both walks crossed declares @@ -615,7 +623,8 @@ AliasResult AliasAnalysis::alias(mlir::Value lhs, mlir::Value rhs) { // and pointer-dereferenced paths remain correctly reported as MayAlias. // Short-circuit on NoAlias since any pair that disambiguates is // decisive. - if (result == AliasResult::NoAlias || result == AliasResult::MustAlias) + if (!multiScopes || result == AliasResult::NoAlias || + result == AliasResult::MustAlias) return result; for (const auto &lhsScopedOrigin : lhsSrc.scopedOrigins) { if (!lhsScopedOrigin.scope) @@ -1295,6 +1304,18 @@ AliasAnalysis::Source AliasAnalysis::getSource(mlir::Value v, type = SourceKind::Allocate; v = def; defOp = nullptr; + } else if (boxSrc.kind == SourceKind::HostAssoc) { + // Box loaded from a host-associated descriptor: classify + // the dereferenced target as HostAssoc (not Indirect) so + // alias() can apply the host-assoc/pointer rules instead + // of coarsening to MayAlias. The access path (PointerDeref/ + // AllocDeref step) and Pointer attribute were already set + // above, so the resulting Source matches the one that + // buildSourceAtDeclare() rebuilds during scope-aware + // refinement. + type = SourceKind::HostAssoc; + v = def; + defOp = nullptr; } else if (isDummyArgument(def)) { defOp = nullptr; v = def; @@ -1758,4 +1779,27 @@ fir::AliasAnalysis::Source fir::AliasAnalysis::buildSourceAtDeclare( return source; } +bool fir::AliasAnalysis::functionHasMultipleScopes(mlir::Value v) { + mlir::func::FuncOp funcOp; + if (mlir::Operation *defOp = v.getDefiningOp()) + funcOp = defOp->getParentOfType(); + else if (auto bArg = mlir::dyn_cast(v)) + if (mlir::Region *region = bArg.getOwner()->getParent()) + funcOp = region->getParentOfType(); + if (!funcOp) + return true; // conservative + mlir::Operation *funcOpPtr = funcOp.getOperation(); + auto it = multiScopeCache.find(funcOpPtr); + if (it != multiScopeCache.end()) + return it->second; + // Walk counting DummyScopeOps, stop early at 2. + unsigned count = 0; + funcOp.walk([&](fir::DummyScopeOp) -> mlir::WalkResult { + return ++count >= 2 ? mlir::WalkResult::interrupt() + : mlir::WalkResult::advance(); + }); + // Cache both true and false so subsequent queries are O(1). + return multiScopeCache.try_emplace(funcOpPtr, count >= 2).first->second; +} + } // namespace fir diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir index 7f90384ac99c5..d570d039432bd 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-host-assoc.fir @@ -184,9 +184,12 @@ func.func @_QFtest5Pinner(%arg0: !fir.ref> {fir.bindc_name = // end subroutine inner // end subroutine test6 -// F18 15.5.2.13 (4): -// FIXME: 'x' is classified as Indirect access leading to a conservative reply: -// CHECK: test6_y(1)#0 <-> test6_x(1)#0: MayAlias +// F18 15.5.2.13 (4): 'x' is a host-associated POINTER and 'y' is a non-TARGET +// dummy, so 'x' cannot be associated with 'y' and they do not alias. The +// host-associated pointer descriptor load is now classified as HostAssoc +// rather than Indirect, so this no longer falls back to a conservative +// MayAlias. +// CHECK: test6_y(1)#0 <-> test6_x(1)#0: NoAlias func.func @_QFtest6Pinner(%arg0: !fir.ref> {fir.bindc_name = "y"}, %arg1: !fir.ref>>>>> {fir.host_assoc}) attributes {fir.internal_proc} { %c0_i32 = arith.constant 0 : i32 %0 = fir.coordinate_of %arg1, %c0_i32 : (!fir.ref>>>>>, i32) -> !fir.llvm_ptr>>>> diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir index 6291ba86caccc..228c720d0450a 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-scoped-origins.fir @@ -131,11 +131,17 @@ func.func @_QPtest_two_dummies_fir( // SourceKind::Unknown and the underlying alias() reports MayAlias // ("indirect access"). The ONLY way to disambiguate is via the // ScopedOrigin snapshots taken at the inner-frame declares (which share -// one fir.dummy_scope). This is a regression for buildSourceAtDeclare: -// it must classify each rebuilt Source AT the captured declare's own -// scope (getLastInstantiationPoint=true, yielding SourceKind::Argument) -// rather than walking past the declare back into the fir.if and -// collapsing to Unknown. +// one fir.dummy_scope). +// +// This models REAL two-level inlining: the caller is itself a procedure +// with its own dummy arguments, so it has its own fir.dummy_scope +// (%outer) in addition to the inlined callee's (%inner). With two +// dummy_scope ops, functionHasMultipleScopes() is true and the +// scope-aware refinement is enabled. buildSourceAtDeclare must classify +// each rebuilt Source AT the captured declare's own scope +// (getLastInstantiationPoint=true, yielding SourceKind::Argument) rather +// than walking past the declare back into the fir.if and collapsing to +// Unknown. // // CHECK-LABEL: Testing : "_QPtest_nested_inline_region_branch" // CHECK-DAG: field#0 <-> value#0: NoAlias @@ -143,17 +149,28 @@ func.func @_QPtest_nested_inline_region_branch( %arg0: !fir.ref {fir.bindc_name = "field"}, %arg1: !fir.ref {fir.bindc_name = "value", fir.optional}, %cond: i1) { - // field: contiguity copy-in select -> original actual or a local temp. + // Outer (caller) frame: the caller has its own dummy arguments, hence + // its own fir.dummy_scope. + %outer = fir.dummy_scope : !fir.dscope + %f_outer = fir.declare %arg0 dummy_scope %outer + {fortran_attrs = #fir.var_attrs, + uniq_name = "_QFcallerEfield"} + : (!fir.ref, !fir.dscope) -> !fir.ref + %v_outer = fir.declare %arg1 dummy_scope %outer + {fortran_attrs = #fir.var_attrs, + uniq_name = "_QFcallerEvalue"} + : (!fir.ref, !fir.dscope) -> !fir.ref + // field: contiguity copy-in select -> caller actual or a local temp. %ftmp = fir.alloca f32 %f_sel = fir.if %cond -> (!fir.ref) { - fir.result %arg0 : !fir.ref + fir.result %f_outer : !fir.ref } else { fir.result %ftmp : !fir.ref } - // value: OPTIONAL presence select -> actual or absent. - %present = fir.is_present %arg1 : (!fir.ref) -> i1 + // value: OPTIONAL presence select -> caller actual or absent. + %present = fir.is_present %v_outer : (!fir.ref) -> i1 %v_sel = fir.if %present -> (!fir.ref) { - fir.result %arg1 : !fir.ref + fir.result %v_outer : !fir.ref } else { %absent = fir.absent !fir.ref fir.result %absent : !fir.ref From ac1a260461a0cc23aad76d5eced73488e6fa9a0a Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 22 Jun 2026 14:55:07 -0700 Subject: [PATCH 094/511] [llvm-cas] Fix validation test on Ubuntu / uutils v0.2.2, NFC (#205199) Work around uutils/coreutils#9128 by implementing the 40 byte truncation in Python. Otherwise, this test fails out of the box on Ubuntu 25.10. GNU coreutils supports -s=arg, but Mac truncate does not. Resorting to Python seemed like the cleanest solution. The next best idea was to use subshells or other techniques to calculate the file size and subtract 40, but that seemed excessive. --- llvm/test/tools/llvm-cas/validation.test | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/llvm/test/tools/llvm-cas/validation.test b/llvm/test/tools/llvm-cas/validation.test index a41cb8a3faad8..54bb7ab11e814 100644 --- a/llvm/test/tools/llvm-cas/validation.test +++ b/llvm/test/tools/llvm-cas/validation.test @@ -42,5 +42,7 @@ RUN: llvm-cas --cas %t/ac --validate # Note: records are 40 bytes (32 hash bytes + 8 byte value), so trim the last # allocated record, leaving it invalid. -RUN: truncate -s -40 %t/ac/v1.1/actions.v1 +# FIXME: Use `truncate -s -40` once Ubuntu 26.04 is the LTS support baseline, +# see https://github.com/uutils/coreutils/issues/9128 +RUN: %python -c "with open(r'%t/ac/v1.1/actions.v1', 'r+b') as f: f.truncate(f.seek(0, 2) - 40)" RUN: not llvm-cas --cas %t/ac --validate From f6a580e102e71ee6b8f1d1ddd63785d6955c1cf9 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 22 Jun 2026 15:13:40 -0700 Subject: [PATCH 095/511] [docs] Minor edits to project governace docs (#203149) s/non-private/public/ Pencil in the 2027 calendar year election dates, so they are posted well in advance and we have a link anchor we can share. --- llvm/docs/ProjectGovernance.rst | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/llvm/docs/ProjectGovernance.rst b/llvm/docs/ProjectGovernance.rst index 96419384b87db..170f3b8e20123 100644 --- a/llvm/docs/ProjectGovernance.rst +++ b/llvm/docs/ProjectGovernance.rst @@ -63,9 +63,9 @@ Elections and Composition To be a *voting contributor* an individual must be a member of the LLVM GitHub Organization, and either have a public email address on their GitHub profile or -have made a commit to the LLVM project using a non-private email address. The -email address on the GitHub public profile or retrieved via commit metadata -will be used for all election-related communication. +have made a commit to the LLVM project using a public email address. The email +address on the GitHub public profile or retrieved via commit metadata will be +used for all election-related communication. Each *area team* will have an odd number of members with a minimum of three (3) members and a maximum of nine (9) elected by the *voting contributors*. @@ -218,6 +218,15 @@ Area Teams * Petr Hosek * David Blaikie +Next Election Cycle +------------------- + +* **January 11, 2027:** Nominations begin. +* **January 25, 2027:** Nominations close and voting begins. +* **February 8, 2027:** Voting closes. +* **February 10, 2027:** Election results announced no later than this date. +* **March 1, 2027:** New area team terms begin. +* **March 1-5, 2027:** Area teams meet to elect chairs and secretaries. Meetings and Contact Information ================================ From e8543d53045c10c63d20bfae9ffe649174a8b75c Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Mon, 22 Jun 2026 22:46:51 +0000 Subject: [PATCH 096/511] [SCEV] Preserve LCSSA when reusing dominating variable This seems to just be a missing case in SCEV. This came up when looking at making LSR preserve LCSSA for the NewPM. Regression test of that nature has been added. Reviewers: fhahn, nikic, arsenm Pull Request: https://github.com/llvm/llvm-project/pull/192831 --- .../Utils/ScalarEvolutionExpander.cpp | 2 + .../X86/lcssa-preservation-regression.ll | 40 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp index d09d6bc57ee11..125571b18dddc 100644 --- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp +++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp @@ -1237,6 +1237,8 @@ SCEVExpander::expandAddRecExprLiterally(SCEVUseT S) { // We have decided to reuse an induction variable of a dominating loop. Apply // truncation and/or inversion of the step. if (TruncTy) { + if (TruncTy != Result->getType() || InvertStep) + Result = fixupLCSSAFormFor(Result); // Truncate the result. if (TruncTy != Result->getType()) Result = Builder.CreateTrunc(Result, TruncTy); diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll index a07637159d0ac..8f343305bb4f5 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll @@ -161,3 +161,43 @@ loop1.header: ; preds = %loop1.latc loop1.latch: ; preds = %loop1.header br i1 false, label %funcexit, label %loop1.header } + +define i32 @regression4() { +; CHECK-LABEL: define i32 @regression4() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[WHILE_COND_OUTER:.*]] +; CHECK: [[WHILE_COND_OUTER_LOOPEXIT:.*]]: +; CHECK-NEXT: br label %[[WHILE_COND_OUTER]] +; CHECK: [[WHILE_COND_OUTER]]: +; CHECK-NEXT: br label %[[WHILE_COND:.*]] +; CHECK: [[WHILE_COND]]: +; CHECK-NEXT: br i1 false, label %[[WHILE_COND]], label %[[FOR_BODY_PREHEADER:.*]] +; CHECK: [[FOR_BODY_PREHEADER]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ] +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i32 [[LSR_IV]], 0 +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1 +; CHECK-NEXT: br i1 false, label %[[WHILE_COND_OUTER_LOOPEXIT]], label %[[FOR_BODY]] +; +entry: + br label %while.cond.outer + +while.cond.outer: ; preds = %for.body, %entry + %phi = phi i32 [ 0, %entry ], [ %add, %for.body ] + br label %while.cond + +while.cond: ; preds = %while.cond, %while.cond.outer + %phi3 = phi i32 [ 0, %while.cond.outer ], [ %add5, %while.cond ] + %phi4 = phi i32 [ 0, %while.cond.outer ], [ %add, %while.cond ] + %add = add i32 %phi4, 1 + %add5 = add i32 %phi3, 1 + br i1 false, label %while.cond, label %for.body + +for.body: ; preds = %for.body, %while.cond + %phi7 = phi i32 [ %add8, %for.body ], [ 0, %while.cond ] + %add8 = add i32 %phi7, 1 + %sub = sub i32 0, %phi3 + %icmp = icmp eq i32 %phi7, %sub + br i1 false, label %while.cond.outer, label %for.body +} From 5d75698ecf99a185b991ec587464a1e94d2bed6e Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 22 Jun 2026 16:00:56 -0700 Subject: [PATCH 097/511] [docs] Rename 22 popular LLVM docs .rst -> .md (#201243) Tracking issue: #201242 RFC: https://discourse.llvm.org/t/rfc-make-myst-markdown-the-llvm-docs-format-rip-rest/90840 Migration guide: https://llvm.org/docs/SphinxQuickstartTemplate.html#markdown-migration-guidelines Update filename references, but leave the docs with reST syntax to ensure rename detection works. This intentionally breaks the documentation build, but I will immediately follow up by merging the PR that fixes it. --- llvm/README.txt | 2 +- llvm/docs/{CMake.rst => CMake.md} | 0 llvm/docs/{CodeGenerator.rst => CodeGenerator.md} | 0 llvm/docs/{CodingStandards.rst => CodingStandards.md} | 0 llvm/docs/CommandGuide/{index.rst => index.md} | 0 llvm/docs/{Contributing.rst => Contributing.md} | 0 llvm/docs/{DeveloperPolicy.rst => DeveloperPolicy.md} | 0 llvm/docs/{GettingInvolved.rst => GettingInvolved.md} | 0 llvm/docs/{GettingStarted.rst => GettingStarted.md} | 0 ...GettingStartedTutorials.rst => GettingStartedTutorials.md} | 0 llvm/docs/{GitHub.rst => GitHub.md} | 0 llvm/docs/GitRepositoryPolicy.md | 2 +- llvm/docs/InstrRefDebugInfo.md | 2 +- llvm/docs/{Passes.rst => Passes.md} | 0 llvm/docs/{ProgrammersManual.rst => ProgrammersManual.md} | 0 llvm/docs/{RFCProcess.rst => RFCProcess.md} | 0 llvm/docs/{Reference.rst => Reference.md} | 0 llvm/docs/RemoveDIsDebugInfo.md | 2 +- .../{SourceLevelDebugging.rst => SourceLevelDebugging.md} | 0 llvm/docs/SphinxQuickstartTemplate.md | 4 ++-- llvm/docs/TableGen/{index.rst => index.md} | 0 llvm/docs/{TestingGuide.rst => TestingGuide.md} | 0 llvm/docs/{UserGuides.rst => UserGuides.md} | 0 .../{WritingAnLLVMBackend.rst => WritingAnLLVMBackend.md} | 0 .../{WritingAnLLVMNewPMPass.rst => WritingAnLLVMNewPMPass.md} | 0 llvm/docs/{WritingAnLLVMPass.rst => WritingAnLLVMPass.md} | 0 llvm/docs/conf.py | 2 +- llvm/docs/{index.rst => index.md} | 0 llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp | 2 +- llvm/lib/IR/DebugInfo.cpp | 2 +- llvm/tools/reduce-chunk-list/reduce-chunk-list.cpp | 2 +- .../lit/tests/Inputs/shtest-define/examples/param-subst.txt | 2 +- 32 files changed, 11 insertions(+), 11 deletions(-) rename llvm/docs/{CMake.rst => CMake.md} (100%) rename llvm/docs/{CodeGenerator.rst => CodeGenerator.md} (100%) rename llvm/docs/{CodingStandards.rst => CodingStandards.md} (100%) rename llvm/docs/CommandGuide/{index.rst => index.md} (100%) rename llvm/docs/{Contributing.rst => Contributing.md} (100%) rename llvm/docs/{DeveloperPolicy.rst => DeveloperPolicy.md} (100%) rename llvm/docs/{GettingInvolved.rst => GettingInvolved.md} (100%) rename llvm/docs/{GettingStarted.rst => GettingStarted.md} (100%) rename llvm/docs/{GettingStartedTutorials.rst => GettingStartedTutorials.md} (100%) rename llvm/docs/{GitHub.rst => GitHub.md} (100%) rename llvm/docs/{Passes.rst => Passes.md} (100%) rename llvm/docs/{ProgrammersManual.rst => ProgrammersManual.md} (100%) rename llvm/docs/{RFCProcess.rst => RFCProcess.md} (100%) rename llvm/docs/{Reference.rst => Reference.md} (100%) rename llvm/docs/{SourceLevelDebugging.rst => SourceLevelDebugging.md} (100%) rename llvm/docs/TableGen/{index.rst => index.md} (100%) rename llvm/docs/{TestingGuide.rst => TestingGuide.md} (100%) rename llvm/docs/{UserGuides.rst => UserGuides.md} (100%) rename llvm/docs/{WritingAnLLVMBackend.rst => WritingAnLLVMBackend.md} (100%) rename llvm/docs/{WritingAnLLVMNewPMPass.rst => WritingAnLLVMNewPMPass.md} (100%) rename llvm/docs/{WritingAnLLVMPass.rst => WritingAnLLVMPass.md} (100%) rename llvm/docs/{index.rst => index.md} (100%) diff --git a/llvm/README.txt b/llvm/README.txt index b9b71a3b6daff..aa43033da462e 100644 --- a/llvm/README.txt +++ b/llvm/README.txt @@ -9,7 +9,7 @@ LLVM is open source software. You may freely distribute it under the terms of the license agreement found in LICENSE.txt. Please see the documentation provided in docs/ for further -assistance with LLVM, and in particular docs/GettingStarted.rst for getting +assistance with LLVM, and in particular docs/GettingStarted.md for getting started with LLVM and docs/README.txt for an overview of LLVM's documentation setup. diff --git a/llvm/docs/CMake.rst b/llvm/docs/CMake.md similarity index 100% rename from llvm/docs/CMake.rst rename to llvm/docs/CMake.md diff --git a/llvm/docs/CodeGenerator.rst b/llvm/docs/CodeGenerator.md similarity index 100% rename from llvm/docs/CodeGenerator.rst rename to llvm/docs/CodeGenerator.md diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.md similarity index 100% rename from llvm/docs/CodingStandards.rst rename to llvm/docs/CodingStandards.md diff --git a/llvm/docs/CommandGuide/index.rst b/llvm/docs/CommandGuide/index.md similarity index 100% rename from llvm/docs/CommandGuide/index.rst rename to llvm/docs/CommandGuide/index.md diff --git a/llvm/docs/Contributing.rst b/llvm/docs/Contributing.md similarity index 100% rename from llvm/docs/Contributing.rst rename to llvm/docs/Contributing.md diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.md similarity index 100% rename from llvm/docs/DeveloperPolicy.rst rename to llvm/docs/DeveloperPolicy.md diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.md similarity index 100% rename from llvm/docs/GettingInvolved.rst rename to llvm/docs/GettingInvolved.md diff --git a/llvm/docs/GettingStarted.rst b/llvm/docs/GettingStarted.md similarity index 100% rename from llvm/docs/GettingStarted.rst rename to llvm/docs/GettingStarted.md diff --git a/llvm/docs/GettingStartedTutorials.rst b/llvm/docs/GettingStartedTutorials.md similarity index 100% rename from llvm/docs/GettingStartedTutorials.rst rename to llvm/docs/GettingStartedTutorials.md diff --git a/llvm/docs/GitHub.rst b/llvm/docs/GitHub.md similarity index 100% rename from llvm/docs/GitHub.rst rename to llvm/docs/GitHub.md diff --git a/llvm/docs/GitRepositoryPolicy.md b/llvm/docs/GitRepositoryPolicy.md index 10859243fc1cc..3ee81f7996dfa 100644 --- a/llvm/docs/GitRepositoryPolicy.md +++ b/llvm/docs/GitRepositoryPolicy.md @@ -22,7 +22,7 @@ Requirements for *new* repositories as part of the If you want to integrate your project as part of the Monorepo, please take a look at the -[Developer Policy](project:DeveloperPolicy.rst#Adding an Established Project To the LLVM Monorepo). +[Developer Policy](project:DeveloperPolicy.md#Adding an Established Project To the LLVM Monorepo). To request a new repository, please create an issue with in [LLVM's Github Issue Tracker](https://github.com/llvm/llvm-project/issues) and diff --git a/llvm/docs/InstrRefDebugInfo.md b/llvm/docs/InstrRefDebugInfo.md index e0a85f99cb4f3..7124ee26de68c 100644 --- a/llvm/docs/InstrRefDebugInfo.md +++ b/llvm/docs/InstrRefDebugInfo.md @@ -191,4 +191,4 @@ instruction number of any cloned instruction, to avoid duplicate numbers appearing to `LiveDebugValues`. Dealing with duplicated instructions is a natural extension to instruction referencing that's currently unimplemented. -[LiveDebugValues]: project:SourceLevelDebugging.rst#LiveDebugValues expansion of variable locations +[LiveDebugValues]: project:SourceLevelDebugging.md#LiveDebugValues expansion of variable locations diff --git a/llvm/docs/Passes.rst b/llvm/docs/Passes.md similarity index 100% rename from llvm/docs/Passes.rst rename to llvm/docs/Passes.md diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.md similarity index 100% rename from llvm/docs/ProgrammersManual.rst rename to llvm/docs/ProgrammersManual.md diff --git a/llvm/docs/RFCProcess.rst b/llvm/docs/RFCProcess.md similarity index 100% rename from llvm/docs/RFCProcess.rst rename to llvm/docs/RFCProcess.md diff --git a/llvm/docs/Reference.rst b/llvm/docs/Reference.md similarity index 100% rename from llvm/docs/Reference.rst rename to llvm/docs/Reference.md diff --git a/llvm/docs/RemoveDIsDebugInfo.md b/llvm/docs/RemoveDIsDebugInfo.md index 7d07352f99420..cb787c72bce2c 100644 --- a/llvm/docs/RemoveDIsDebugInfo.md +++ b/llvm/docs/RemoveDIsDebugInfo.md @@ -206,7 +206,7 @@ Below is a brief overview of the new representation that replaces debug intrinsi ### What exactly have you replaced debug intrinsics with? -We're using a dedicated C++ class called `DbgRecord` to store debug info, with a one-to-one relationship between each instance of a debug intrinsic and each `DbgRecord` object in any LLVM IR program; these `DbgRecord`s are represented in the IR as non-instruction debug records, as described in the [Source Level Debugging](project:SourceLevelDebugging.rst#Debug Records) document. This class has a set of subclasses that store exactly the same information as is stored in debugging intrinsics. Each one also has almost entirely the same set of methods, that behave in the same way: +We're using a dedicated C++ class called `DbgRecord` to store debug info, with a one-to-one relationship between each instance of a debug intrinsic and each `DbgRecord` object in any LLVM IR program; these `DbgRecord`s are represented in the IR as non-instruction debug records, as described in the [Source Level Debugging](project:SourceLevelDebugging.md#Debug Records) document. This class has a set of subclasses that store exactly the same information as is stored in debugging intrinsics. Each one also has almost entirely the same set of methods, that behave in the same way: https://llvm.org/docs/doxygen/classllvm_1_1DbgRecord.html https://llvm.org/docs/doxygen/classllvm_1_1DbgVariableRecord.html diff --git a/llvm/docs/SourceLevelDebugging.rst b/llvm/docs/SourceLevelDebugging.md similarity index 100% rename from llvm/docs/SourceLevelDebugging.rst rename to llvm/docs/SourceLevelDebugging.md diff --git a/llvm/docs/SphinxQuickstartTemplate.md b/llvm/docs/SphinxQuickstartTemplate.md index ceeb297bedf3a..9b17c332e807b 100644 --- a/llvm/docs/SphinxQuickstartTemplate.md +++ b/llvm/docs/SphinxQuickstartTemplate.md @@ -23,7 +23,7 @@ See the {ref}`migration ` section for more inform This article is located in `docs/SphinxQuickstartTemplate.md`. To use it as a template, make a copy and open it in a text editor. -You can then write your docs, and open a [GitHub PR](project:GitHub.rst) to request a review. +You can then write your docs, and open a [GitHub PR](project:GitHub.md) to request a review. To view the Markdown source file for this article, click **Show Source** on the right sidebar. @@ -167,7 +167,7 @@ without any syntax highlighting like this: You can generate the HTML documentation from the sources locally if you want to see what they would look like. In addition to the normal -[build tools](project:GettingStarted.rst) +[build tools](project:GettingStarted.md) you need to install [Sphinx] and the necessary extensions using the following command inside the `llvm-project` checkout: diff --git a/llvm/docs/TableGen/index.rst b/llvm/docs/TableGen/index.md similarity index 100% rename from llvm/docs/TableGen/index.rst rename to llvm/docs/TableGen/index.md diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.md similarity index 100% rename from llvm/docs/TestingGuide.rst rename to llvm/docs/TestingGuide.md diff --git a/llvm/docs/UserGuides.rst b/llvm/docs/UserGuides.md similarity index 100% rename from llvm/docs/UserGuides.rst rename to llvm/docs/UserGuides.md diff --git a/llvm/docs/WritingAnLLVMBackend.rst b/llvm/docs/WritingAnLLVMBackend.md similarity index 100% rename from llvm/docs/WritingAnLLVMBackend.rst rename to llvm/docs/WritingAnLLVMBackend.md diff --git a/llvm/docs/WritingAnLLVMNewPMPass.rst b/llvm/docs/WritingAnLLVMNewPMPass.md similarity index 100% rename from llvm/docs/WritingAnLLVMNewPMPass.rst rename to llvm/docs/WritingAnLLVMNewPMPass.md diff --git a/llvm/docs/WritingAnLLVMPass.rst b/llvm/docs/WritingAnLLVMPass.md similarity index 100% rename from llvm/docs/WritingAnLLVMPass.rst rename to llvm/docs/WritingAnLLVMPass.md diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index 29b1024ff482e..ca750dded019f 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -292,7 +292,7 @@ def process_rst(name): for name in os.listdir(command_guide_path): # Process Markdown files - if name.endswith(".md"): + if name.endswith(".md") and name != "index.md": process_md(name) # Process ReST files apart from the index page. elif name.endswith(".rst") and name != "index.rst": diff --git a/llvm/docs/index.rst b/llvm/docs/index.md similarity index 100% rename from llvm/docs/index.rst rename to llvm/docs/index.md diff --git a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp index 2a6014194316d..e467e9f976e80 100644 --- a/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp +++ b/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp @@ -57,7 +57,7 @@ Error AppleAcceleratorTable::extract() { FormParams = {Hdr.Version, 0, dwarf::DwarfFormat::DWARF32}; // Check that we can read all the hashes and offsets from the - // section (see SourceLevelDebugging.rst for the structure of the index). + // section (see SourceLevelDebugging.md for the structure of the index). if (!AccelSection.isValidOffset(getIthBucketBase(Hdr.BucketCount - 1))) return createStringError( errc::illegal_byte_sequence, diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp index 7889de7ebb49a..0cd8351c52469 100644 --- a/llvm/lib/IR/DebugInfo.cpp +++ b/llvm/lib/IR/DebugInfo.cpp @@ -2381,7 +2381,7 @@ bool AssignmentTrackingPass::runOnFunction(Function &F) { // Note: trackAssignments doesn't respect dbg.declare's IR positions (as it // doesn't "understand" dbg.declares). However, this doesn't appear to break // any rules given this description of dbg.declare from - // llvm/docs/SourceLevelDebugging.rst: + // llvm/docs/SourceLevelDebugging.md: // // It is not control-dependent, meaning that if a call to llvm.dbg.declare // exists and has a valid location argument, that address is considered to diff --git a/llvm/tools/reduce-chunk-list/reduce-chunk-list.cpp b/llvm/tools/reduce-chunk-list/reduce-chunk-list.cpp index eaf173d4a88a2..b41cad32a5c28 100644 --- a/llvm/tools/reduce-chunk-list/reduce-chunk-list.cpp +++ b/llvm/tools/reduce-chunk-list/reduce-chunk-list.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// See the llvm-project/llvm/docs/ProgrammersManual.rst to see how to use this +// See the llvm-project/llvm/docs/ProgrammersManual.md to see how to use this // tool // //===----------------------------------------------------------------------===// diff --git a/llvm/utils/lit/tests/Inputs/shtest-define/examples/param-subst.txt b/llvm/utils/lit/tests/Inputs/shtest-define/examples/param-subst.txt index 91418cd5e88c4..14e43772f50f9 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-define/examples/param-subst.txt +++ b/llvm/utils/lit/tests/Inputs/shtest-define/examples/param-subst.txt @@ -1,4 +1,4 @@ -; This example originally appeared in TestingGuide.rst except here we've added +; This example originally appeared in TestingGuide.md except here we've added ; echo to the clang/FileCheck command line to be executed. ; DEFINE: %{cflags} = From 429e592dfc03844f78e4a406b66129aae5fc1396 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Mon, 22 Jun 2026 16:02:50 -0700 Subject: [PATCH 098/511] [docs] Migrate 22 popular LLVM docs to MyST (#201244) This was done with LLM assistance. I rebuilt the LLVM docs, opened all 22 docs in a browser and scrolled through them, and caught and fixed a few errors. Tracking issue: #201242 Migration guide docs: https://llvm.org/docs/SphinxQuickstartTemplate.html#markdown-migration-guidelines RFC: https://discourse.llvm.org/t/rfc-make-myst-markdown-the-llvm-docs-format-rip-rest/90840 --- llvm/docs/CMake.md | 1675 +++++----- llvm/docs/CodeGenerator.md | 1966 ++++++------ llvm/docs/CodingStandards.md | 1771 +++++------ llvm/docs/CommandGuide/index.md | 197 +- llvm/docs/Contributing.md | 221 +- llvm/docs/DeveloperPolicy.md | 1327 ++++---- llvm/docs/GettingInvolved.md | 534 ++-- llvm/docs/GettingStarted.md | 1051 +++---- llvm/docs/GettingStartedTutorials.md | 76 +- llvm/docs/GitHub.md | 497 ++- llvm/docs/Passes.md | 705 ++--- llvm/docs/ProgrammersManual.md | 4375 ++++++++++++-------------- llvm/docs/RFCProcess.md | 70 +- llvm/docs/Reference.md | 409 ++- llvm/docs/SourceLevelDebugging.md | 2861 ++++++++--------- llvm/docs/TableGen/index.md | 345 +- llvm/docs/TestingGuide.md | 1185 ++++--- llvm/docs/UserGuides.md | 567 ++-- llvm/docs/WritingAnLLVMBackend.md | 2625 ++++++++-------- llvm/docs/WritingAnLLVMNewPMPass.md | 310 +- llvm/docs/WritingAnLLVMPass.md | 965 +++--- llvm/docs/conf.py | 9 +- llvm/docs/index.md | 128 +- 23 files changed, 11468 insertions(+), 12401 deletions(-) diff --git a/llvm/docs/CMake.md b/llvm/docs/CMake.md index 4d6aa2b99b9c9..d97af85057a32 100644 --- a/llvm/docs/CMake.md +++ b/llvm/docs/CMake.md @@ -1,997 +1,1104 @@ -======================== -Building LLVM with CMake -======================== +# Building LLVM with CMake -.. contents:: - :local: +```{contents} +:local: +``` -Introduction -============ +## Introduction -`CMake `_ is a cross-platform build-generator tool. CMake +[CMake](http://www.cmake.org/) is a cross-platform build-generator tool. CMake does not build the project; it generates the files needed by your build tool (GNU make, Visual Studio, etc.) for building LLVM. -If **you are a new contributor**, please start with the :doc:`GettingStarted` -page. This page is geared for existing contributors moving from the -legacy configure/make system. +If **you are a new contributor**, please start with the {doc}`GettingStarted` +page. This page is geared for existing contributors moving from the legacy +configure/make system. If you are really anxious about getting a functional LLVM build, go to the -`Quick start`_ section. If you are a CMake novice, start with `Basic CMake usage`_ -and then go back to the `Quick start`_ section once you know what you are doing. The -`Options and variables`_ section is a reference for customizing your build. If -you already have experience with CMake, this is the recommended starting point. +[Quick start](#quick-start) section. If you are a CMake novice, start with +[Basic CMake usage](#basic-cmake-usage) and then go back to the [Quick +start](#quick-start) section once you know what you are doing. The [Options and +variables](#options-and-variables) section is a reference for customizing your +build. If you already have experience with CMake, this is the recommended +starting point. -This page is geared towards users of the LLVM CMake build. If you're looking for -information about modifying the LLVM CMake build system, you may want to see the -:doc:`CMakePrimer` page. It has a basic overview of the CMake language. +This page is geared towards users of the LLVM CMake build. If you're looking +for information about modifying the LLVM CMake build system, you may want to +see the {doc}`CMakePrimer` page. It has a basic overview of the CMake language. -.. _Quick start: - -Quick start -=========== +(Quick start)= +## Quick start We use here the command-line, non-interactive CMake interface. -#. `Download `_ and install +1. [Download](http://www.cmake.org/cmake/resources/software.html) and install CMake. Version 3.20.0 is the minimum required. -#. Open a shell. Your development tools must be reachable from this shell - through the ``PATH`` environment variable. - -#. Create a build directory. Building LLVM in the source - directory is not supported. ``cd`` to this directory: +2. Open a shell. Your development tools must be reachable from this shell + through the `PATH` environment variable. - .. code-block:: console +3. Create a build directory. Building LLVM in the source + directory is not supported. `cd` to this directory: - $ mkdir mybuilddir - $ cd mybuilddir + ``` console + $ mkdir mybuilddir + $ cd mybuilddir + ``` -#. Execute this command in the shell replacing `path/to/llvm/source/root` with +4. Execute this command in the shell replacing `path/to/llvm/source/root` with the path to the root of your LLVM source tree: - .. code-block:: console - - $ cmake path/to/llvm/source/root + ``` console + $ cmake path/to/llvm/source/root + ``` - CMake will detect your development environment, perform a series of tests, and - generate the files required for building LLVM. CMake will use default values - for all build parameters. See the `Options and variables`_ section for - a list of build parameters that you can modify. + CMake will detect your development environment, perform a series of tests, + and generate the files required for building LLVM. CMake will use default + values for all build parameters. See the [Options and + variables](#options-and-variables) section for a list of build parameters + that you can modify. This can fail if CMake can't detect your toolset, or if it thinks that the - environment is not sane enough. In this case, make sure that the toolset that - you intend to use is the only one reachable from the shell, and that the shell - itself is the correct one for your development environment. CMake will refuse - to build MinGW makefiles if you have a POSIX shell reachable through the PATH - environment variable, for instance. You can force CMake to use a given build - tool; for instructions, see the `Usage`_ section, below. You may - also wish to control which targets LLVM enables, or which LLVM - components are built; see the `Frequently Used LLVM-related - variables`_ below. - -#. After CMake has finished running, use IDE project files, or start + environment is not sane enough. In this case, make sure that the toolset + that you intend to use is the only one reachable from the shell, and that + the shell itself is the correct one for your development environment. CMake + will refuse to build MinGW makefiles if you have a POSIX shell reachable + through the PATH environment variable, for instance. You can force CMake to + use a given build tool; for instructions, see the [Usage](#Usage) section, + below. You may also wish to control which targets LLVM enables, or which + LLVM components are built; see the [Frequently Used LLVM-related + variables](#frequently-used-llvm-related-variables) below. + +5. After CMake has finished running, use IDE project files, or start the build from the build directory: - .. code-block:: console + ``` console + $ cmake --build . + ``` - $ cmake --build . - - The ``--build`` option tells ``cmake`` to invoke the underlying build - tool (``make``, ``ninja``, ``xcodebuild``, ``msbuild``, etc.) + The `--build` option tells `cmake` to invoke the underlying build + tool (`make`, `ninja`, `xcodebuild`, `msbuild`, etc.) The underlying build tool can be invoked directly, of course, but - the ``--build`` option is portable. - -#. After LLVM has finished building, install it from the build directory: - - .. code-block:: console + the `--build` option is portable. - $ cmake --build . --target install +6. After LLVM has finished building, install it from the build directory: - The ``--target`` option with ``install`` parameter in addition to - the ``--build`` option tells ``cmake`` to build the ``install`` target. + ``` console + $ cmake --build . --target install + ``` - It is possible to set a different install prefix at installation time - by invoking the ``cmake_install.cmake`` script generated in the - build directory: + The `--target` option with `install` parameter in addition to + the `--build` option tells `cmake` to build the `install` target. - .. code-block:: console + It is possible to set a different install prefix at installation time by + invoking the `cmake_install.cmake` script generated in the build directory: - $ cmake -DCMAKE_INSTALL_PREFIX=/tmp/llvm -P cmake_install.cmake + ``` console + $ cmake -DCMAKE_INSTALL_PREFIX=/tmp/llvm -P cmake_install.cmake + ``` -.. _Basic CMake usage: -.. _Usage: - -Basic CMake usage -================= +(Basic CMake usage)= +(Usage)= +## Basic CMake usage This section explains basic aspects of CMake for daily use. CMake comes with extensive documentation, in the form of HTML files, and as -online help accessible via the ``cmake`` executable itself. Execute ``cmake ---help`` for further help options. +online help accessible via the `cmake` executable itself. Execute `cmake +--help` for further help options. CMake allows you to specify a build tool (e.g., GNU make, Visual Studio, or Xcode). If not specified on the command line, CMake tries to guess which build tool to use based on your environment. Once it has identified your build tool, CMake uses the corresponding *Generator* to create files for your build tool (e.g., Makefiles or Visual Studio or Xcode project files). You can -explicitly specify the generator with the command line option ``-G "Name of the -generator"``. To see a list of the available generators on your system, execute: - -.. code-block:: console +explicitly specify the generator with the command line option `-G "Name of the +generator"`. To see a list of the available generators on your system, execute: - $ cmake --help +``` console +$ cmake --help +``` This will list the generator names at the end of the help text. Generators' names are case-sensitive and may contain spaces. For this reason, -you should enter them exactly as they are listed in the ``cmake --help`` +you should enter them exactly as they are listed in the `cmake --help` output, in quotes. For example, to generate project files specifically for Visual Studio 12, you can execute: -.. code-block:: console - - $ cmake -G "Visual Studio 12" path/to/llvm/source/root +``` console +$ cmake -G "Visual Studio 12" path/to/llvm/source/root +``` A given development platform can have more than one adequate generator. If you use Visual Studio, "NMake Makefiles" is a generator you can use for building with NMake. By default, CMake chooses the most specific generator supported by your development environment. If you want an alternative generator, -you must specify this to CMake with the ``-G`` option. - -.. todo:: - - Explain variables and cache. Move explanation here from #options section. +you must specify this to CMake with the `-G` option. -.. _Options and variables: +```{todo} +Explain variables and cache. Move explanation here from #options section. +``` -Options and variables -===================== +(Options and variables)= +## Options and variables Variables customize how the build will be generated. Options are boolean variables, with possible values ON/OFF. Options and variables are defined on the CMake command line like this: -.. code-block:: console - - $ cmake -DVARIABLE=value path/to/llvm/source +``` console +$ cmake -DVARIABLE=value path/to/llvm/source +``` You can set a variable after the initial CMake invocation to change its value. You can also undefine a variable: -.. code-block:: console - - $ cmake -UVARIABLE path/to/llvm/source +``` console +$ cmake -UVARIABLE path/to/llvm/source +``` -Variables are stored in the CMake cache. This is a file named ``CMakeCache.txt`` -stored at the root of your build directory that is generated by ``cmake``. +Variables are stored in the CMake cache. This is a file named `CMakeCache.txt` +stored at the root of your build directory that is generated by `cmake`. Editing it yourself is not recommended. Variables are listed in the CMake cache and later in this document with the variable name and type separated by a colon. You can also specify the variable and type on the CMake command line: -.. code-block:: console +``` console +$ cmake -DVARIABLE:TYPE=value path/to/llvm/source +``` - $ cmake -DVARIABLE:TYPE=value path/to/llvm/source +(cmake_frequently_used_variables)= +### Frequently-used CMake variables -.. _cmake_frequently_used_variables: +Here are some of the CMake variables that are used often, along with a brief +explanation. For full documentation, consult the CMake manual, or execute +`cmake --help-variable VARIABLE_NAME`. See [Frequently Used LLVM-related +Variables](#frequently-used-llvm-related-variables) below for information about +commonly used variables that control features of LLVM and enabled subprojects. -Frequently-used CMake variables -------------------------------- +(cmake_build_type)= -Here are some of the CMake variables that are used often, along with a -brief explanation. For full documentation, consult the CMake manual, -or execute ``cmake --help-variable VARIABLE_NAME``. See `Frequently -Used LLVM-related Variables`_ below for information about commonly -used variables that control features of LLVM and enabled subprojects. +**CMAKE_BUILD_TYPE**:STRING -.. _cmake_build_type: +: This configures the optimization level for `make` or `ninja` builds. -**CMAKE_BUILD_TYPE**:STRING - This configures the optimization level for ``make`` or ``ninja`` builds. - - Possible values: - - =========================== ============= ========== ========== ========================== - Build Type Optimizations Debug Info Assertions Best suited for - =========================== ============= ========== ========== ========================== - **Release** For Speed No No Users of LLVM and Clang - **Debug** None Yes Yes Developers of LLVM - **RelWithDebInfo** For Speed Yes No Users that also need Debug - **MinSizeRel** For Size No No When disk space matters - =========================== ============= ========== ========== ========================== - - * Optimizations make LLVM/Clang run faster but can be an impediment for - step-by-step debugging. - * Builds with debug information can use a lot of RAM and disk space and are - usually slower to run. You can improve RAM usage by using ``lld``, see - the :ref:`LLVM_USE_LINKER ` option. - * Assertions are internal checks to help you find bugs. They typically slow - down LLVM and Clang when enabled but can be useful during development. - You can manually set :ref:`LLVM_ENABLE_ASSERTIONS ` - to override the default from `CMAKE_BUILD_TYPE`. - - If you are using an IDE such as Visual Studio or Xcode, use - the IDE settings to set the build type. - - Note: on Windows (building with MSVC or clang-cl), CMake's **RelWithDebInfo** - setting does not enable the same optimizations as **Release**. Using the - **Release** build type with :ref:`LLVM_ENABLE_PDB ` set - may be a better option. + Possible values: + + | Build Type | Optimizations | Debug Info | Assertions | Best suited for | + |--------------------|---------------|------------|------------|----------------------------| + | **Release** | For Speed | No | No | Users of LLVM and Clang | + | **Debug** | None | Yes | Yes | Developers of LLVM | + | **RelWithDebInfo** | For Speed | Yes | No | Users that also need Debug | + | **MinSizeRel** | For Size | No | No | When disk space matters | + + - Optimizations make LLVM/Clang run faster but can be an impediment for + step-by-step debugging. + - Builds with debug information can use a lot of RAM and disk space and are + usually slower to run. You can improve RAM usage by using `lld`, see + the {ref}`LLVM_USE_LINKER ` option. + - Assertions are internal checks to help you find bugs. They typically + slow down LLVM and Clang when enabled but can be useful during + development. You can manually set {ref}`LLVM_ENABLE_ASSERTIONS + ` to override the default from + `CMAKE_BUILD_TYPE`. + + If you are using an IDE such as Visual Studio or Xcode, use the IDE + settings to set the build type. + + Note: on Windows (building with MSVC or clang-cl), CMake's + **RelWithDebInfo** setting does not enable the same optimizations as + **Release**. Using the **Release** build type with {ref}`LLVM_ENABLE_PDB + ` set may be a better option. **CMAKE_INSTALL_PREFIX**:PATH - Path where LLVM will be installed when the "install" target is built. -**CMAKE_{C,CXX}_FLAGS**:STRING - Extra flags to use when compiling C and C++ source files respectively. +: Path where LLVM will be installed when the "install" target is built. + +**CMAKE\_{C,CXX}\_FLAGS**:STRING + +: Extra flags to use when compiling C and C++ source files respectively. -**CMAKE_{C,CXX}_COMPILER**:STRING - Specify the C and C++ compilers to use. If you have multiple - compilers installed, CMake might not default to the one you wish to - use. +**CMAKE\_{C,CXX}\_COMPILER**:STRING -.. _Frequently Used LLVM-related variables: +: Specify the C and C++ compilers to use. If you have multiple compilers + installed, CMake might not default to the one you wish to use. -Frequently Used LLVM-related variables --------------------------------------- +(Frequently Used LLVM-related variables)= +### Frequently Used LLVM-related variables -The default configuration may not match your requirements. Here are -LLVM variables that are frequently used to control that. The full -description is in `LLVM-related variables`_ below. +The default configuration may not match your requirements. Here are LLVM +variables that are frequently used to control that. The full description is in +[LLVM-related variables](#llvm-related-variables) below. **LLVM_ENABLE_PROJECTS**:STRING - Control which projects are enabled. For example, you may want to work on clang - or lldb by specifying ``-DLLVM_ENABLE_PROJECTS="clang;lldb"``. + +: Control which projects are enabled. For example, you may want to work on + clang or lldb by specifying `-DLLVM_ENABLE_PROJECTS="clang;lldb"`. **LLVM_ENABLE_RUNTIMES**:STRING - Control which runtimes are enabled. For example, you may want to work on - libc++ or libc++abi by specifying ``-DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi"``. + +: Control which runtimes are enabled. For example, you may want to work on + libc++ or libc++abi by specifying + `-DLLVM_ENABLE_RUNTIMES="libcxx;libcxxabi"`. **LLVM_LIBDIR_SUFFIX**:STRING - Extra suffix to append to the directory where libraries are to be - installed. On a 64-bit architecture, one could use ``-DLLVM_LIBDIR_SUFFIX=64`` - to install libraries to ``/usr/lib64``. -**LLVM_PARALLEL_{COMPILE,LINK}_JOBS**:STRING - Building the llvm toolchain can use a lot of resources, particularly - during linking. These options, when you use the Ninja generator, allow you - to restrict the parallelism. For example, to avoid OOMs or going - into swap, permit only one link job per 15 GB of RAM available on a - 32 GB machine, specify ``-G Ninja -DLLVM_PARALLEL_LINK_JOBS=2``. +: Extra suffix to append to the directory where libraries are to be + installed. On a 64-bit architecture, one could use + `-DLLVM_LIBDIR_SUFFIX=64` to install libraries to `/usr/lib64`. + +**LLVM_PARALLEL\_{COMPILE,LINK}\_JOBS**:STRING + +: Building the llvm toolchain can use a lot of resources, particularly during + linking. These options, when you use the Ninja generator, allow you to + restrict the parallelism. For example, to avoid OOMs or going into swap, + permit only one link job per 15 GB of RAM available on a 32 GB machine, + specify `-G Ninja -DLLVM_PARALLEL_LINK_JOBS=2`. **LLVM_TARGETS_TO_BUILD**:STRING - Control which targets are enabled. For example, you may only need to enable - your native target with, for example, ``-DLLVM_TARGETS_TO_BUILD=X86``. -.. _llvm_use_linker: +: Control which targets are enabled. For example, you may only need to enable + your native target with, for example, `-DLLVM_TARGETS_TO_BUILD=X86`. + +(llvm_use_linker)= **LLVM_USE_LINKER**:STRING - Override the system's default linker. For instance, use ``lld`` with - ``-DLLVM_USE_LINKER=lld``. -Rarely-used CMake variables ---------------------------- +: Override the system's default linker. For instance, use `lld` with + `-DLLVM_USE_LINKER=lld`. + +### Rarely-used CMake variables Here are some of the CMake variables that are rarely used, along with a brief -explanation and LLVM-related notes. For full documentation, consult the CMake -manual, or execute ``cmake --help-variable VARIABLE_NAME``. +explanation and LLVM-related notes. For full documentation, consult the CMake +manual, or execute `cmake --help-variable VARIABLE_NAME`. **CMAKE_CXX_STANDARD**:STRING - Sets the C++ standard to conform to when building LLVM. - LLVM requires C++17 or higher. This defaults to 17. + +: Sets the C++ standard to conform to when building LLVM. LLVM requires C++17 + or higher. This defaults to 17. **CMAKE_INSTALL_BINDIR**:PATH - The path to install executables, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to "bin". + +: The path to install executables, relative to the *CMAKE_INSTALL_PREFIX*. + Defaults to "bin". **CMAKE_INSTALL_DOCDIR**:PATH - The path to install documentation, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to "share/doc". + +: The path to install documentation, relative to the *CMAKE_INSTALL_PREFIX*. + Defaults to "share/doc". **CMAKE_INSTALL_INCLUDEDIR**:PATH - The path to install header files, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to "include". + +: The path to install header files, relative to the *CMAKE_INSTALL_PREFIX*. + Defaults to "include". **CMAKE_INSTALL_MANDIR**:PATH - The path to install manpage files, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to "share/man". -.. _LLVM-related variables: +: The path to install manpage files, relative to the *CMAKE_INSTALL_PREFIX*. + Defaults to "share/man". -LLVM-related variables ------------------------ +(LLVM-related variables)= +### LLVM-related variables -These variables provide fine control over the build of LLVM and -its enabled sub-projects. Nearly all of these variable names begin with -``LLVM_``. +These variables provide fine control over the build of LLVM and its enabled +sub-projects. Nearly all of these variable names begin with `LLVM_`. -.. _LLVM-related variables BUILD_SHARED_LIBS: +(LLVM-related variables BUILD_SHARED_LIBS)= **BUILD_SHARED_LIBS**:BOOL - Flag indicating if each LLVM component (e.g. Support) is built as a shared - library (ON) or as a static library (OFF). Its default value is OFF. On - Windows, shared libraries may be used when building with MinGW, including - mingw-w64, but not when building with the Microsoft toolchain. - .. note:: ``BUILD_SHARED_LIBS`` is only recommended for use by LLVM developers. - If you want to build LLVM as a shared library, you should use the - ``LLVM_BUILD_LLVM_DYLIB`` option. +: Flag indicating if each LLVM component (e.g. Support) is built as a shared + library (ON) or as a static library (OFF). Its default value is OFF. On + Windows, shared libraries may be used when building with MinGW, including + mingw-w64, but not when building with the Microsoft toolchain. + + ```{note} + `BUILD_SHARED_LIBS` is only recommended for use by LLVM developers. If you want to build LLVM as a shared library, you should use the `LLVM_BUILD_LLVM_DYLIB` option. + ``` **LLVM_ABI_BREAKING_CHECKS**:STRING - Used to decide if LLVM should be built with ABI breaking checks or - not. Allowed values are `WITH_ASSERTS` (default), `FORCE_ON` and - `FORCE_OFF`. `WITH_ASSERTS` turns on ABI breaking checks in an - assertion-enabled build. `FORCE_ON` (`FORCE_OFF`) turns them on - (off) irrespective of whether normal (`NDEBUG`-based) assertions are - enabled or not. A version of LLVM built with ABI breaking checks - is not ABI compatible with a version built without it. + +: Used to decide if LLVM should be built with ABI breaking checks or not. + Allowed values are `WITH_ASSERTS` (default), `FORCE_ON` and `FORCE_OFF`. + `WITH_ASSERTS` turns on ABI breaking checks in an assertion-enabled build. + `FORCE_ON` (`FORCE_OFF`) turns them on (off) irrespective of whether normal + (`NDEBUG`-based) assertions are enabled or not. A version of LLVM built + with ABI breaking checks is not ABI compatible with a version built without + it. **LLVM_ADDITIONAL_BUILD_TYPES**:LIST - Adding a semicolon-separated list of additional build types to this flag - allows for them to be specified as values in ``CMAKE_BUILD_TYPE`` without - encountering a fatal error during the configuration process. + +: Adding a semicolon-separated list of additional build types to this flag + allows for them to be specified as values in `CMAKE_BUILD_TYPE` without + encountering a fatal error during the configuration process. **LLVM_APPEND_VC_REV**:BOOL - Embed version control revision info (Git revision id). - The version info is provided by the ``LLVM_REVISION`` macro in - ``llvm/include/llvm/Support/VCSRevision.h``. Developers using git who don't - need revision info can disable this option to avoid re-linking most binaries - after a branch switch. Defaults to ON. + +: Embed version control revision info (Git revision id). The version info is + provided by the `LLVM_REVISION` macro in + `llvm/include/llvm/Support/VCSRevision.h`. Developers using git who don't + need revision info can disable this option to avoid re-linking most + binaries after a branch switch. Defaults to ON. **LLVM_FORCE_VC_REPOSITORY**:STRING - Set the git repository to include in version info rather than calling git to - determine it. + +: Set the git repository to include in version info rather than calling git + to determine it. **LLVM_FORCE_VC_REVISION**:STRING - Force a specific Git revision id rather than calling git to determine it. - This is useful in environments where git is not available or non-functional - but the VC revision is available through other means. + +: Force a specific Git revision id rather than calling git to determine it. + This is useful in environments where git is not available or non-functional + but the VC revision is available through other means. **LLVM_BUILD_32_BITS**:BOOL - Build 32-bit executables and libraries on 64-bit systems. This option is - available only on some 64-bit Unix systems. Defaults to OFF. + +: Build 32-bit executables and libraries on 64-bit systems. This option is + available only on some 64-bit Unix systems. Defaults to OFF. **LLVM_BUILD_BENCHMARKS**:BOOL - Adds benchmarks to the list of default targets. Defaults to OFF. + +: Adds benchmarks to the list of default targets. Defaults to OFF. **LLVM_BUILD_DOCS**:BOOL - Adds all *enabled* documentation targets (i.e., Doxygen and Sphinx targets) as - dependencies of the default build targets. This results in all of the (enabled) - documentation targets being built as part of a normal build. If the ``install`` - target is run, then this also enables all built documentation targets to be - installed. Defaults to OFF. To enable a particular documentation target, see - ``LLVM_ENABLE_SPHINX`` and ``LLVM_ENABLE_DOXYGEN``. + +: Adds all *enabled* documentation targets (i.e., Doxygen and Sphinx targets) + as dependencies of the default build targets. This results in all of the + (enabled) documentation targets being built as part of a normal build. If + the `install` target is run, then this also enables all built documentation + targets to be installed. Defaults to OFF. To enable a particular + documentation target, see `LLVM_ENABLE_SPHINX` and `LLVM_ENABLE_DOXYGEN`. **LLVM_BUILD_EXAMPLES**:BOOL - Include LLVM examples in the 'all' build target and install them as part of - the ``install`` target. Defaults to OFF. Targets for building examples are - still generated, this is controlled by *LLVM_INCLUDE_EXAMPLES*. Note that some - examples might still be built as dependencies for tests. + +: Include LLVM examples in the 'all' build target and install them as part of + the `install` target. Defaults to OFF. Targets for building examples are + still generated, this is controlled by *LLVM_INCLUDE_EXAMPLES*. Note that + some examples might still be built as dependencies for tests. **LLVM_BUILD_INSTRUMENTED_COVERAGE**:BOOL - If enabled, `source-based code coverage - `_ instrumentation - is enabled while building llvm. If CMake can locate the code coverage - scripts and the llvm-cov and llvm-profdata tools that pair with your compiler, - the build will also generate the `generate-coverage-report` target to generate - the code coverage report for LLVM, and the `clear-profile-data` utility target - to delete captured profile data. See documentation for - *LLVM_CODE_COVERAGE_TARGETS* and *LLVM_COVERAGE_SOURCE_DIRS* for more - information on configuring code coverage reports. + +: If enabled, [source-based code + coverage](https://clang.llvm.org/docs/SourceBasedCodeCoverage.html) + instrumentation is enabled while building llvm. If CMake can locate the + code coverage scripts and the llvm-cov and llvm-profdata tools that pair + with your compiler, the build will also generate the + `generate-coverage-report` target to generate the code coverage report for + LLVM, and the `clear-profile-data` utility target to delete captured + profile data. See documentation for *LLVM_CODE_COVERAGE_TARGETS* and + *LLVM_COVERAGE_SOURCE_DIRS* for more information on configuring code + coverage reports. **LLVM_BUILD_LLVM_DYLIB**:BOOL - If enabled, the target for building the libLLVM shared library is added. - This library contains all of LLVM's components in a single shared library. - Defaults to OFF. This cannot be used in conjunction with ``BUILD_SHARED_LIBS``. - Tools will only be linked to the libLLVM shared library if ``LLVM_LINK_LLVM_DYLIB`` - is also ON. - The components in the library can be customised by setting ``LLVM_DYLIB_COMPONENTS`` - to a list of the desired components. - This option is not available on Windows. + +: If enabled, the target for building the libLLVM shared library is added. + This library contains all of LLVM's components in a single shared library. + Defaults to OFF. This cannot be used in conjunction with + `BUILD_SHARED_LIBS`. Tools will only be linked to the libLLVM shared + library if `LLVM_LINK_LLVM_DYLIB` is also ON. The components in the library + can be customised by setting `LLVM_DYLIB_COMPONENTS` to a list of the + desired components. This option is not available on Windows. **LLVM_BUILD_TESTS**:BOOL - Include LLVM unit tests in the 'all' build target. Defaults to OFF. Targets - for building each unit test are generated in any case. You can build a - specific unit test using the targets defined under *unittests*, such as - ADTTests, IRTests, SupportTests, etc. (Search for ``add_llvm_unittest`` in - the subdirectories of *unittests* for a complete list of unit tests.) It is - possible to build all unit tests with the target *UnitTests*. + +: Include LLVM unit tests in the 'all' build target. Defaults to OFF. Targets + for building each unit test are generated in any case. You can build a + specific unit test using the targets defined under *unittests*, such as + ADTTests, IRTests, SupportTests, etc. (Search for `add_llvm_unittest` in + the subdirectories of *unittests* for a complete list of unit tests.) It is + possible to build all unit tests with the target *UnitTests*. **LLVM_BUILD_TOOLS**:BOOL - Build LLVM tools. Defaults to ON. Targets for building each tool are generated - in any case. You can build a tool separately by invoking its target. For - example, you can build *llvm-as* with a Makefile-based system by executing *make - llvm-as* at the root of your build directory. + +: Build LLVM tools. Defaults to ON. Targets for building each tool are + generated in any case. You can build a tool separately by invoking its + target. For example, you can build *llvm-as* with a Makefile-based system + by executing *make llvm-as* at the root of your build directory. **LLVM_CCACHE_BUILD**:BOOL - If enabled and the ``ccache`` program is available, then LLVM will be - built using ``ccache`` to speed up rebuilds of LLVM and its components. - Defaults to OFF. The size and location of the cache maintained - by ``ccache`` can be adjusted via the ``LLVM_CCACHE_MAXSIZE`` and ``LLVM_CCACHE_DIR`` - options, which are passed to the ``CCACHE_MAXSIZE`` and ``CCACHE_DIR`` environment - variables, respectively. + +: If enabled and the `ccache` program is available, then LLVM will be built + using `ccache` to speed up rebuilds of LLVM and its components. Defaults to + OFF. The size and location of the cache maintained by `ccache` can be + adjusted via the `LLVM_CCACHE_MAXSIZE` and `LLVM_CCACHE_DIR` options, which + are passed to the `CCACHE_MAXSIZE` and `CCACHE_DIR` environment variables, + respectively. **LLVM_CODE_COVERAGE_TARGETS**:STRING - If set to a semicolon-separated list of targets, those targets will be used - to drive the code coverage reports. If unset, the target list will be - constructed using the LLVM build's CMake export list. + +: If set to a semicolon-separated list of targets, those targets will be used + to drive the code coverage reports. If unset, the target list will be + constructed using the LLVM build's CMake export list. **LLVM_COVERAGE_SOURCE_DIRS**:STRING - If set to a semicolon-separated list of directories, the coverage reports - will limit code coverage summaries to just the listed directories. If unset, - coverage reports will include all sources identified by the tooling. + +: If set to a semicolon-separated list of directories, the coverage reports + will limit code coverage summaries to just the listed directories. If + unset, coverage reports will include all sources identified by the tooling. **LLVM_CREATE_XCODE_TOOLCHAIN**:BOOL - macOS only: If enabled, CMake will generate a target named - 'install-xcode-toolchain'. This target will create a directory at - ``$CMAKE_INSTALL_PREFIX/Toolchains`` containing an xctoolchain directory which can - be used to override the default system tools. + +: macOS only: If enabled, CMake will generate a target named + 'install-xcode-toolchain'. This target will create a directory at + `$CMAKE_INSTALL_PREFIX/Toolchains` containing an xctoolchain directory + which can be used to override the default system tools. **LLVM_DEFAULT_TARGET_TRIPLE**:STRING - LLVM target to use for code generation when no target is explicitly specified. - It defaults to "host", meaning that it shall pick the architecture - of the machine where LLVM is being built. If you are building a cross-compiler, - set it to the target triple of your desired architecture. + +: LLVM target to use for code generation when no target is explicitly + specified. It defaults to "host", meaning that it shall pick the + architecture of the machine where LLVM is being built. If you are building + a cross-compiler, set it to the target triple of your desired architecture. **LLVM_DOXYGEN_QCH_FILENAME**:STRING - The filename of the Qt Compressed Help file that will be generated when - ``-DLLVM_ENABLE_DOXYGEN=ON`` and - ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`` are given. Defaults to - ``org.llvm.qch``. - This option is only useful in combination with - ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; - otherwise it has no effect. + +: The filename of the Qt Compressed Help file that will be generated when + `-DLLVM_ENABLE_DOXYGEN=ON` and `-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON` are + given. Defaults to `org.llvm.qch`. This option is only useful in + combination with `-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`; otherwise it has no + effect. **LLVM_DOXYGEN_QHELPGENERATOR_PATH**:STRING - The path to the ``qhelpgenerator`` executable. Defaults to whatever CMake's - ``find_program()`` can find. This option is only useful in combination with - ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise it has no - effect. + +: The path to the `qhelpgenerator` executable. Defaults to whatever CMake's + `find_program()` can find. This option is only useful in combination with + `-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`; otherwise it has no effect. **LLVM_DOXYGEN_QHP_CUST_FILTER_NAME**:STRING - See `Qt Help Project`_ for - more information. Defaults to the CMake variable ``${PACKAGE_STRING}`` which - is a combination of the package name and version string. This filter can then - be used in Qt Creator to select only documentation from LLVM when browsing - through all the help files that you might have loaded. This option is only - useful in combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; - otherwise it has no effect. -.. _Qt Help Project: http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters +: See [Qt Help + Project](http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters) + for more information. Defaults to the CMake variable `${PACKAGE_STRING}` + which is a combination of the package name and version string. This filter + can then be used in Qt Creator to select only documentation from LLVM when + browsing through all the help files that you might have loaded. This option + is only useful in combination with `-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`; + otherwise it has no effect. **LLVM_DOXYGEN_QHP_NAMESPACE**:STRING - Namespace under which the intermediate Qt Help Project file lives. See `Qt - Help Project`_ - for more information. Defaults to "org.llvm". This option is only useful in - combination with ``-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON``; otherwise - it has no effect. +: Namespace under which the intermediate Qt Help Project file lives. See [Qt + Help + Project](http://qt-project.org/doc/qt-4.8/qthelpproject.html#custom-filters) + for more information. Defaults to "org.llvm". This option is only useful + in combination with `-DLLVM_ENABLE_DOXYGEN_QT_HELP=ON`; otherwise it has no + effect. -.. _llvm_enable_assertions: +(llvm_enable_assertions)= **LLVM_ENABLE_ASSERTIONS**:BOOL - Enables code assertions. Defaults to ON if and only if ``CMAKE_BUILD_TYPE`` - is *Debug*. + +: Enables code assertions. Defaults to ON if and only if `CMAKE_BUILD_TYPE` + is *Debug*. **LLVM_ENABLE_BINDINGS**:BOOL - If disabled, do not try to build the OCaml bindings. + +: If disabled, do not try to build the OCaml bindings. **LLVM_ENABLE_CURL**: - Used to decide if LLVM tools, should support downloading information - (particularly debug info from ``llvm-debuginfod``) over HTTP. Allowed - values are ``OFF`` (default), ``ON``, and ``FORCE_ON`` (error if libcurl - is not found). + +: Used to decide if LLVM tools, should support downloading information + (particularly debug info from `llvm-debuginfod`) over HTTP. Allowed values + are `OFF` (default), `ON`, and `FORCE_ON` (error if libcurl is not found). **LLVM_ENABLE_DEBUGLOC_COVERAGE_TRACKING**:STRING - Enhances Debugify's ability to detect line number errors by storing extra - information inside Instructions, removing false positives from Debugify's - results at the cost of performance. Allowed values are `DISABLED` (default), - `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether and why a - line number was intentionally dropped or not generated for an instruction, - allowing Debugify to avoid reporting these as errors; this comes with a small - performance cost of ~0.1%. `COVERAGE_AND_ORIGIN` additionally stores a - stacktrace of the point where each DebugLoc is unintentionally dropped, - allowing for much easier bug triaging at the cost of a ~10x performance - slowdown. `COVERAGE` and `COVERAGE_AND_ORIGIN` are ABI-breaking options. + +: Enhances Debugify's ability to detect line number errors by storing extra + information inside Instructions, removing false positives from Debugify's + results at the cost of performance. Allowed values are `DISABLED` + (default), `COVERAGE`, and `COVERAGE_AND_ORIGIN`. `COVERAGE` tracks whether + and why a line number was intentionally dropped or not generated for an + instruction, allowing Debugify to avoid reporting these as errors; this + comes with a small performance cost of \~0.1%. `COVERAGE_AND_ORIGIN` + additionally stores a stacktrace of the point where each DebugLoc is + unintentionally dropped, allowing for much easier bug triaging at the cost + of a \~10x performance slowdown. `COVERAGE` and `COVERAGE_AND_ORIGIN` are + ABI-breaking options. **LLVM_ENABLE_DIA_SDK**:BOOL - Enable building with MSVC DIA SDK for PDB debugging support. Available - only with MSVC. Defaults to ON. + +: Enable building with MSVC DIA SDK for PDB debugging support. Available only + with MSVC. Defaults to ON. **LLVM_ENABLE_DOXYGEN**:BOOL - Enables the generation of browsable HTML documentation using doxygen. - Defaults to OFF. + +: Enables the generation of browsable HTML documentation using doxygen. + Defaults to OFF. **LLVM_ENABLE_DOXYGEN_QT_HELP**:BOOL - Enables the generation of a Qt Compressed Help file. Defaults to OFF. - This affects the make target ``doxygen-llvm``. When enabled, apart from - the normal HTML output generated by doxygen, this will produce a QCH file - named ``org.llvm.qch``. You can then load this file into Qt Creator. - This option is only useful in combination with ``-DLLVM_ENABLE_DOXYGEN=ON``; - otherwise this has no effect. + +: Enables the generation of a Qt Compressed Help file. Defaults to OFF. This + affects the make target `doxygen-llvm`. When enabled, apart from the normal + HTML output generated by doxygen, this will produce a QCH file named + `org.llvm.qch`. You can then load this file into Qt Creator. This option is + only useful in combination with `-DLLVM_ENABLE_DOXYGEN=ON`; otherwise this + has no effect. **LLVM_ENABLE_EH**:BOOL - Build LLVM with exception-handling support. This is necessary if you wish to - link against LLVM libraries and make use of C++ exceptions in your own code - that need to propagate through LLVM code. Defaults to OFF. + +: Build LLVM with exception-handling support. This is necessary if you wish + to link against LLVM libraries and make use of C++ exceptions in your own + code that need to propagate through LLVM code. Defaults to OFF. **LLVM_ENABLE_EXPENSIVE_CHECKS**:BOOL - Enable additional time/memory expensive checking. Defaults to OFF. + +: Enable additional time/memory expensive checking. Defaults to OFF. **LLVM_ENABLE_FFI**:BOOL - Indicates whether the LLVM Interpreter will be linked with the Foreign Function - Interface library (libffi) in order to enable calling external functions. - If the library or its headers are installed in a custom - location, you can also set the variables ``FFI_INCLUDE_DIR`` and - ``FFI_LIBRARY_DIR`` to the directories where ``ffi.h`` and ``libffi.so`` can be found, - respectively. Defaults to OFF. + +: Indicates whether the LLVM Interpreter will be linked with the Foreign + Function Interface library (libffi) in order to enable calling external + functions. If the library or its headers are installed in a custom + location, you can also set the variables `FFI_INCLUDE_DIR` and + `FFI_LIBRARY_DIR` to the directories where `ffi.h` and `libffi.so` can be + found, respectively. Defaults to OFF. **LLVM_ENABLE_HTTPLIB**:BOOL - Enables the optional cpp-httplib dependency which is used by llvm-debuginfod - to serve debug info over HTTP. `cpp-httplib `_ - must be installed, or `httplib_ROOT` must be set. Defaults to OFF. + +: Enables the optional cpp-httplib dependency which is used by + llvm-debuginfod to serve debug info over HTTP. + [cpp-httplib](https://github.com/yhirose/cpp-httplib) must be installed, or + `httplib_ROOT` must be set. Defaults to OFF. **LLVM_ENABLE_IDE**:BOOL - Tell the build system that an IDE is being used. This in turn disables the - creation of certain convenience build system targets, such as the various - ``install-*`` and ``check-*`` targets, since IDEs don't always deal well with - a large number of targets. This is usually autodetected, but it can be - configured manually to explicitly control the generation of those targets. + +: Tell the build system that an IDE is being used. This in turn disables the + creation of certain convenience build system targets, such as the various + `install-*` and `check-*` targets, since IDEs don't always deal well with a + large number of targets. This is usually autodetected, but it can be + configured manually to explicitly control the generation of those targets. **LLVM_ENABLE_LIBCXX**:BOOL - If the host compiler and linker support the stdlib flag, ``-stdlib=libc++`` is - passed to invocations of both so that the project is built using libc++ - instead of stdlibc++. Defaults to OFF. + +: If the host compiler and linker support the stdlib flag, `-stdlib=libc++` + is passed to invocations of both so that the project is built using libc++ + instead of stdlibc++. Defaults to OFF. **LLVM_ENABLE_LIBEDIT**:BOOL - Controls whether to enable libedit support for command-line editing and history - in LLVM tools. When ``ON``, forces libedit support to be enabled and will cause a - CMake configuration error if libedit cannot be found. When ``OFF``, disables - libedit support entirely. If not specified, LLVM will auto-detect libedit - availability. Defaults to auto-detection. + +: Controls whether to enable libedit support for command-line editing and + history in LLVM tools. When `ON`, forces libedit support to be enabled and + will cause a CMake configuration error if libedit cannot be found. When + `OFF`, disables libedit support entirely. If not specified, LLVM will + auto-detect libedit availability. Defaults to auto-detection. **LLVM_ENABLE_LIBPFM**:BOOL - Enable building with libpfm to support hardware counter measurements in LLVM - tools. - Defaults to ON. + +: Enable building with libpfm to support hardware counter measurements in + LLVM tools. Defaults to ON. **LLVM_ENABLE_LLD**:BOOL - This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a 2-stage - build where a dependency is added from the first stage to the second ensuring - that lld is built before stage2 begins. + +: This option is equivalent to `-DLLVM_USE_LINKER=lld`, except during a + 2-stage build where a dependency is added from the first stage to the + second ensuring that lld is built before stage2 begins. **LLVM_ENABLE_LLVM_LIBC**: BOOL - If the LLVM libc overlay is installed in a location where the host linker - can access it, all built executables will be linked against the LLVM libc - overlay before linking against the system libc. Defaults to OFF. + +: If the LLVM libc overlay is installed in a location where the host linker + can access it, all built executables will be linked against the LLVM libc + overlay before linking against the system libc. Defaults to OFF. **LLVM_ENABLE_LTO**:STRING - Add ``-flto`` or ``-flto=`` flags to the compile and link command - lines, enabling link-time optimization. Possible values are ``Off``, - ``On``, ``Thin`` and ``Full``. Defaults to OFF. + +: Add `-flto` or `-flto=` flags to the compile and link command lines, + enabling link-time optimization. Possible values are `Off`, `On`, `Thin` + and `Full`. Defaults to OFF. **LLVM_ENABLE_MODULES**:BOOL - Compile with `Clang Header Modules - `_. -.. _llvm_enable_pdb: +: Compile with [Clang Header + Modules](https://clang.llvm.org/docs/Modules.html). + +(llvm_enable_pdb)= **LLVM_ENABLE_PDB**:BOOL - For Windows builds using MSVC or clang-cl, generate PDB files when - :ref:`CMAKE_BUILD_TYPE ` is set to Release. + +: For Windows builds using MSVC or clang-cl, generate PDB files when + {ref}`CMAKE_BUILD_TYPE ` is set to Release. **LLVM_ENABLE_PEDANTIC**:BOOL - Enable pedantic mode. This disables compiler-specific extensions, if - possible. Defaults to ON. + +: Enable pedantic mode. This disables compiler-specific extensions, if + possible. Defaults to ON. **LLVM_ENABLE_PIC**:BOOL - Add the ``-fPIC`` flag to the compiler command-line, if the compiler supports - this flag. Some systems, like Windows, do not need this flag. Defaults to ON. + +: Add the `-fPIC` flag to the compiler command-line, if the compiler supports + this flag. Some systems, like Windows, do not need this flag. Defaults to + ON. **LLVM_ENABLE_PROJECTS**:STRING - Semicolon-separated list of projects to build, or *all* for building all - (clang, lldb, lld, polly, etc) projects. This flag assumes that projects - are checked out side-by-side and not nested, i.e. clang needs to be in - parallel to llvm instead of nested in ``llvm/tools``. This feature allows - having one build for only LLVM and another for clang+llvm using the same - source checkout. - The full list is: +: Semicolon-separated list of projects to build, or *all* for building all + (clang, lldb, lld, polly, etc) projects. This flag assumes that projects + are checked out side-by-side and not nested, i.e. clang needs to be in + parallel to llvm instead of nested in `llvm/tools`. This feature allows + having one build for only LLVM and another for clang+llvm using the same + source checkout. + + The full list is: - ``bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly`` + `bolt;clang;clang-tools-extra;compiler-rt;cross-project-tests;libc;libclc;lld;lldb;mlir;openmp;polly` - .. note:: - Some projects listed here can also go in ``LLVM_ENABLE_RUNTIMES``. They - should only appear in one of the two lists. If a project is a valid possibility - for both, prefer putting it in ``LLVM_ENABLE_RUNTIMES``. + ```{note} + Some projects listed here can also go in `LLVM_ENABLE_RUNTIMES`. They should only appear in one of the two lists. If a project is a valid possibility for both, prefer putting it in `LLVM_ENABLE_RUNTIMES`. + ``` **LLVM_ENABLE_RTTI**:BOOL - Build LLVM with run-time type information. Defaults to OFF. + +: Build LLVM with run-time type information. Defaults to OFF. **LLVM_ENABLE_RUNTIMES**:STRING - Build libc++, libc++abi, libunwind or compiler-rt using the just-built compiler. - This is the correct way to build runtimes when putting together a toolchain. - It will build the builtins separately from the other runtimes to preserve - correct dependency ordering. If you want to build the runtimes using a system - compiler, see the `libc++ documentation `_. - .. note:: - The list should not have duplicates with ``LLVM_ENABLE_PROJECTS``. +: Build libc++, libc++abi, libunwind or compiler-rt using the just-built + compiler. This is the correct way to build runtimes when putting together a + toolchain. It will build the builtins separately from the other runtimes to + preserve correct dependency ordering. If you want to build the runtimes + using a system compiler, see the [libc++ + documentation](https://libcxx.llvm.org/VendorDocumentation.html). - To list all possible runtimes, include an invalid name. For example - ``-DLLVM_ENABLE_RUNTIMES=notaruntime``. The resulting CMake error will list - the possible runtime names. + ```{note} + The list should not have duplicates with `LLVM_ENABLE_PROJECTS`. + ``` - To enable all of the runtimes, use: + To list all possible runtimes, include an invalid name. For example + `-DLLVM_ENABLE_RUNTIMES=notaruntime`. The resulting CMake error will list + the possible runtime names. - ``LLVM_ENABLE_RUNTIMES=all`` + To enable all of the runtimes, use: + + `LLVM_ENABLE_RUNTIMES=all` **LLVM_ENABLE_SPHINX**:BOOL - If specified, CMake will search for the ``sphinx-build`` executable and will make - the ``SPHINX_OUTPUT_HTML`` and ``SPHINX_OUTPUT_MAN`` CMake options available. - Defaults to OFF. + +: If specified, CMake will search for the `sphinx-build` executable and will + make the `SPHINX_OUTPUT_HTML` and `SPHINX_OUTPUT_MAN` CMake options + available. Defaults to OFF. **LLVM_ENABLE_THREADS**:BOOL - Build with threads support, if available. Defaults to ON. + +: Build with threads support, if available. Defaults to ON. **LLVM_ENABLE_UNWIND_TABLES**:BOOL - Enable unwind tables in the binary. Disabling unwind tables can reduce the - size of the libraries. Defaults to ON. + +: Enable unwind tables in the binary. Disabling unwind tables can reduce the + size of the libraries. Defaults to ON. **LLVM_ENABLE_WARNINGS**:BOOL - Enable all compiler warnings. Defaults to ON. + +: Enable all compiler warnings. Defaults to ON. **LLVM_ENABLE_WARNING_SUPPRESSIONS**:BOOL - Suppress specific compiler warnings. When disabled, this - prevents suppressing warnings with flags such as MSVC's ``-wd`` or GCC/Clang's ``-Wno-...``. - Defaults to ON. + +: Suppress specific compiler warnings. When disabled, this prevents + suppressing warnings with flags such as MSVC's `-wd` or GCC/Clang's + `-Wno-...`. Defaults to ON. **LLVM_ENABLE_WERROR**:BOOL - Stop and fail the build, if a compiler warning is triggered. Defaults to OFF. + +: Stop and fail the build, if a compiler warning is triggered. Defaults to + OFF. **LLVM_ENABLE_Z3_SOLVER**:BOOL - If enabled, the Z3 constraint solver is activated for the Clang static analyzer. - A recent version of the z3 library must be available on the system. + +: If enabled, the Z3 constraint solver is activated for the Clang static + analyzer. A recent version of the z3 library must be available on the + system. **LLVM_ENABLE_ZLIB**:STRING - Used to decide if LLVM tools should support compression/decompression with - zlib. Allowed values are ``OFF``, ``ON`` (default, enable if zlib is found), - and ``FORCE_ON`` (error if zlib is not found). + +: Used to decide if LLVM tools should support compression/decompression with + zlib. Allowed values are `OFF`, `ON` (default, enable if zlib is found), + and `FORCE_ON` (error if zlib is not found). **LLVM_ENABLE_ZSTD**:STRING - Used to decide if LLVM tools should support compression/decompression with - zstd. Allowed values are ``OFF``, ``ON`` (default, enable if zstd is found), - and ``FORCE_ON`` (error if zstd is not found). + +: Used to decide if LLVM tools should support compression/decompression with + zstd. Allowed values are `OFF`, `ON` (default, enable if zstd is found), + and `FORCE_ON` (error if zstd is not found). **LLVM_EXPERIMENTAL_TARGETS_TO_BUILD**:STRING - Semicolon-separated list of experimental targets to build and linked into - llvm. This will build the experimental target without needing it to add to the - list of all the targets available in the LLVM's main ``CMakeLists.txt``. + +: Semicolon-separated list of experimental targets to build and linked into + llvm. This will build the experimental target without needing it to add to + the list of all the targets available in the LLVM's main `CMakeLists.txt`. **LLVM_EXTERNAL_PROJECTS**:STRING - Semicolon-separated list of additional external projects to build as part of - llvm. For each project, ``LLVM_EXTERNAL__SOURCE_DIR`` has to be specified - with the path for the source code of the project. Example: - ``-DLLVM_EXTERNAL_PROJECTS="Foo;Bar" - -DLLVM_EXTERNAL_FOO_SOURCE_DIR=/src/foo - -DLLVM_EXTERNAL_BAR_SOURCE_DIR=/src/bar``. - -**LLVM_EXTERNAL_{CLANG,LLD,POLLY}_SOURCE_DIR**:PATH - These variables specify the path to the source directory for the external - LLVM projects Clang, lld, and Polly, respectively, relative to the top-level - source directory. If the in-tree subdirectory for an external project - exists (e.g., ``llvm/tools/clang`` for Clang), then the corresponding variable - will not be used. If the variable for an external project does not point - to a valid path, then that project will not be built. + +: Semicolon-separated list of additional external projects to build as part + of llvm. For each project, `LLVM_EXTERNAL__SOURCE_DIR` has to be + specified with the path for the source code of the project. Example: + `-DLLVM_EXTERNAL_PROJECTS="Foo;Bar" -DLLVM_EXTERNAL_FOO_SOURCE_DIR=/src/foo + -DLLVM_EXTERNAL_BAR_SOURCE_DIR=/src/bar`. + +**LLVM_EXTERNAL\_{CLANG,LLD,POLLY}\_SOURCE_DIR**:PATH + +: These variables specify the path to the source directory for the external + LLVM projects Clang, lld, and Polly, respectively, relative to the + top-level source directory. If the in-tree subdirectory for an external + project exists (e.g., `llvm/tools/clang` for Clang), then the corresponding + variable will not be used. If the variable for an external project does not + point to a valid path, then that project will not be built. **LLVM_EXTERNALIZE_DEBUGINFO**:BOOL - Generate dSYM files and strip executables and libraries (Darwin only). - Defaults to OFF. + +: Generate dSYM files and strip executables and libraries (Darwin only). + Defaults to OFF. **LLVM_ENABLE_EXPORTED_SYMBOLS_IN_EXECUTABLES**:BOOL - When building executables, preserve symbol exports. Defaults to ON. - You can use this option to disable exported symbols from all - executables (Darwin Only). + +: When building executables, preserve symbol exports. Defaults to ON. You can + use this option to disable exported symbols from all executables (Darwin + Only). **LLVM_FORCE_USE_OLD_TOOLCHAIN**:BOOL - If enabled, the compiler and standard library versions won't be checked. LLVM - may not compile at all, or might fail at runtime due to known bugs in these - toolchains. + +: If enabled, the compiler and standard library versions won't be checked. + LLVM may not compile at all, or might fail at runtime due to known bugs in + these toolchains. **LLVM_INCLUDE_BENCHMARKS**:BOOL - Generate build targets for the LLVM benchmarks. Defaults to ON. + +: Generate build targets for the LLVM benchmarks. Defaults to ON. **LLVM_INCLUDE_EXAMPLES**:BOOL - Generate build targets for the LLVM examples. Defaults to ON. You can use this - option to disable the generation of build targets for the LLVM examples. + +: Generate build targets for the LLVM examples. Defaults to ON. You can use + this option to disable the generation of build targets for the LLVM + examples. **LLVM_INCLUDE_TESTS**:BOOL - Generate build targets for the LLVM unit tests. Defaults to ON. You can use - this option to disable the generation of build targets for the LLVM unit - tests. + +: Generate build targets for the LLVM unit tests. Defaults to ON. You can use + this option to disable the generation of build targets for the LLVM unit + tests. **LLVM_INCLUDE_TOOLS**:BOOL - Generate build targets for the LLVM tools. Defaults to ON. You can use this - option to disable the generation of build targets for the LLVM tools. + +: Generate build targets for the LLVM tools. Defaults to ON. You can use this + option to disable the generation of build targets for the LLVM tools. **LLVM_INDIVIDUAL_TEST_COVERAGE**:BOOL - Enable individual test case coverage. When set to ON, code coverage data for - each test case will be generated and stored in a separate directory under the - config.test_exec_root path. This feature allows code coverage analysis of each - individual test case. Defaults to OFF. + +: Enable individual test case coverage. When set to ON, code coverage data + for each test case will be generated and stored in a separate directory + under the config.test_exec_root path. This feature allows code coverage + analysis of each individual test case. Defaults to OFF. **LLVM_INSTALL_BINUTILS_SYMLINKS**:BOOL - Install symlinks from the binutils tool names to the corresponding LLVM tools. - For example, ar will be symlinked to llvm-ar. + +: Install symlinks from the binutils tool names to the corresponding LLVM + tools. For example, ar will be symlinked to llvm-ar. **LLVM_INSTALL_CCTOOLS_SYMLINKS**:BOOL - Install symlinks from the cctools tool names to the corresponding LLVM tools. - For example, lipo will be symlinked to llvm-lipo. + +: Install symlinks from the cctools tool names to the corresponding LLVM + tools. For example, lipo will be symlinked to llvm-lipo. **LLVM_INSTALL_OCAMLDOC_HTML_DIR**:STRING - The path to install OCamldoc-generated HTML documentation to. This path can - either be absolute or relative to the ``CMAKE_INSTALL_PREFIX``. Defaults to - ``${CMAKE_INSTALL_DOCDIR}/llvm/ocaml-html``. + +: The path to install OCamldoc-generated HTML documentation to. This path can + either be absolute or relative to the `CMAKE_INSTALL_PREFIX`. Defaults to + `${CMAKE_INSTALL_DOCDIR}/llvm/ocaml-html`. **LLVM_INSTALL_SPHINX_HTML_DIR**:STRING - The path to install Sphinx-generated HTML documentation to. This path can - either be absolute or relative to the ``CMAKE_INSTALL_PREFIX``. Defaults to - ``${CMAKE_INSTALL_DOCDIR}/llvm/html``. + +: The path to install Sphinx-generated HTML documentation to. This path can + either be absolute or relative to the `CMAKE_INSTALL_PREFIX`. Defaults to + `${CMAKE_INSTALL_DOCDIR}/llvm/html`. **LLVM_INSTALL_UTILS**:BOOL - If enabled, utility binaries like ``FileCheck`` and ``not`` will be installed - to ``CMAKE_INSTALL_PREFIX``. + +: If enabled, utility binaries like `FileCheck` and `not` will be installed + to `CMAKE_INSTALL_PREFIX`. **LLVM_INSTALL_DOXYGEN_HTML_DIR**:STRING - The path to install Doxygen-generated HTML documentation to. This path can - either be absolute or relative to the *CMAKE_INSTALL_PREFIX*. Defaults to - ``${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html``. + +: The path to install Doxygen-generated HTML documentation to. This path can + either be absolute or relative to the *CMAKE_INSTALL_PREFIX*. Defaults to + `${CMAKE_INSTALL_DOCDIR}/llvm/doxygen-html`. **LLVM_INTEGRATED_CRT_ALLOC**:PATH - On Windows, allows embedding a different C runtime allocator into the LLVM - tools and libraries. Using a lock-free allocator such as the ones listed below - greatly decreases ThinLTO link time by about an order of magnitude. It also - mildly improves Clang build times, by about 5-10%. At the moment, rpmalloc, - snmalloc and mimalloc are supported. Use the path to `git clone` to select - the respective allocator, for example: - .. code-block:: console +: On Windows, allows embedding a different C runtime allocator into the LLVM + tools and libraries. Using a lock-free allocator such as the ones listed + below greatly decreases ThinLTO link time by about an order of magnitude. + It also mildly improves Clang build times, by about 5-10%. At the moment, + rpmalloc, snmalloc and mimalloc are supported. Use the path to `git clone` + to select the respective allocator, for example: + ``` console $ D:\git> git clone https://github.com/mjansson/rpmalloc $ D:\llvm-project> cmake ... -DLLVM_INTEGRATED_CRT_ALLOC=D:\git\rpmalloc + ``` - This option needs to be used along with the static CRT, i.e., if building the - Release target, add ``-DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded``. - Note that rpmalloc is also supported natively in-tree, see option below. + This option needs to be used along with the static CRT, i.e., if building + the Release target, add `-DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreaded`. Note + that rpmalloc is also supported natively in-tree, see option below. **LLVM_ENABLE_RPMALLOC**:BOOL - Similar to LLVM_INTEGRATED_CRT_ALLOC, embeds the in-tree rpmalloc into the - host toolchain as a C runtime allocator. The version currently used is - rpmalloc 1.4.5. This option also implies linking with the static CRT, there's - no need to provide CMAKE_MSVC_RUNTIME_LIBRARY. + +: Similar to LLVM_INTEGRATED_CRT_ALLOC, embeds the in-tree rpmalloc into the + host toolchain as a C runtime allocator. The version currently used is + rpmalloc 1.4.5. This option also implies linking with the static CRT, + there's no need to provide CMAKE_MSVC_RUNTIME_LIBRARY. **LLVM_LINK_LLVM_DYLIB**:BOOL - If enabled, tools will be linked with the libLLVM shared library. Defaults - to OFF. Setting ``LLVM_LINK_LLVM_DYLIB`` to ON also sets ``LLVM_BUILD_LLVM_DYLIB`` - to ON. - This option is not available on Windows. -**LLVM__LINKER_FLAGS**:STRING - Defines the set of linker flags that should be applied to a . +: If enabled, tools will be linked with the libLLVM shared library. Defaults + to OFF. Setting `LLVM_LINK_LLVM_DYLIB` to ON also sets + `LLVM_BUILD_LLVM_DYLIB` to ON. This option is not available on Windows. + +**LLVM\_\\_LINKER_FLAGS**:STRING + +: Defines the set of linker flags that should be applied to a \. **LLVM_LIT_ARGS**:STRING - Arguments given to lit. ``make check`` and ``make clang-test`` are affected. - By default, ``'-sv --no-progress-bar'`` on Visual C++ and Xcode, ``'-sv'`` on - others. + +: Arguments given to lit. `make check` and `make clang-test` are affected. By + default, `'-sv --no-progress-bar'` on Visual C++ and Xcode, `'-sv'` on + others. **LLVM_LIT_TOOLS_DIR**:PATH - The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to - the empty string, in which case lit will look for tools needed for tests - (e.g., ``grep``, ``sort``, etc.) in your ``%PATH%``. If GnuWin32 is not in your - ``%PATH%``, then you can set this variable to the GnuWin32 directory so that - lit can find tools needed for tests in that directory. + +: The path to GnuWin32 tools for tests. Valid on Windows host. Defaults to + the empty string, in which case lit will look for tools needed for tests + (e.g., `grep`, `sort`, etc.) in your `%PATH%`. If GnuWin32 is not in your + `%PATH%`, then you can set this variable to the GnuWin32 directory so that + lit can find tools needed for tests in that directory. **LLVM_NATIVE_TOOL_DIR**:STRING - Full path to a directory containing executables for the build host - (containing binaries such as ``llvm-tblgen`` and ``clang-tblgen``). This is - intended for cross-compiling: if the user sets this variable and the - directory contains executables with the expected names, no separate - native versions of those executables will be built. + +: Full path to a directory containing executables for the build host + (containing binaries such as `llvm-tblgen` and `clang-tblgen`). This is + intended for cross-compiling: if the user sets this variable and the + directory contains executables with the expected names, no separate native + versions of those executables will be built. **LLVM_NO_INSTALL_NAME_DIR_FOR_BUILD_TREE**:BOOL - Defaults to ``OFF``. If set to ``ON``, CMake's default logic for library IDs - on Darwin in the build tree will be used. Otherwise the install-time library - IDs will be used in the build tree as well. Mainly useful when other CMake - library ID control variables (e.g., ``CMAKE_INSTALL_NAME_DIR``) are being - set to non-standard values. + +: Defaults to `OFF`. If set to `ON`, CMake's default logic for library IDs on + Darwin in the build tree will be used. Otherwise the install-time library + IDs will be used in the build tree as well. Mainly useful when other CMake + library ID control variables (e.g., `CMAKE_INSTALL_NAME_DIR`) are being set + to non-standard values. **LLVM_VERSIONED_DYLIB_NAME_ON_DARWIN**:BOOL - Defaults to ``ON``. If set to ``ON``, Darwin shared libraries built through - LLVM's CMake helpers use versioned dylib filenames and install names, matching - the behavior on other Unix platforms more closely. If set to ``OFF``, Darwin - keeps the legacy unversioned dylib install name, for compatibility with - existing consumers that expect ``@rpath/libLLVM.dylib``. - -**LLVM_UNVERSIONED_{LIBLTO,LIBCLANG}_ON_DARWIN**:BOOL - Default to ``ON``. When ``LLVM_VERSIONED_DYLIB_NAME_ON_DARWIN`` is ``ON``, - these keep ``libLTO`` and ``libclang``'s Darwin dylib identities - unversioned. Set ``LLVM_UNVERSIONED_LIBLTO_ON_DARWIN`` to ``OFF`` to - version ``libLTO`` using its Darwin ``LTO_VERSION`` policy instead. Set - ``LLVM_UNVERSIONED_LIBCLANG_ON_DARWIN`` to ``OFF`` to version - ``libclang`` using its existing ABI version policy instead. + +: Defaults to `ON`. If set to `ON`, Darwin shared libraries built through + LLVM's CMake helpers use versioned dylib filenames and install names, + matching the behavior on other Unix platforms more closely. If set to + `OFF`, Darwin keeps the legacy unversioned dylib install name, for + compatibility with existing consumers that expect `@rpath/libLLVM.dylib`. + +**LLVM_UNVERSIONED\_{LIBLTO,LIBCLANG}\_ON_DARWIN**:BOOL + +: Default to `ON`. When `LLVM_VERSIONED_DYLIB_NAME_ON_DARWIN` is `ON`, these + keep `libLTO` and `libclang`'s Darwin dylib identities unversioned. Set + `LLVM_UNVERSIONED_LIBLTO_ON_DARWIN` to `OFF` to version `libLTO` using its + Darwin `LTO_VERSION` policy instead. Set + `LLVM_UNVERSIONED_LIBCLANG_ON_DARWIN` to `OFF` to version `libclang` using + its existing ABI version policy instead. **LLVM_OPTIMIZED_TABLEGEN**:BOOL - If enabled and building a debug or assert build, the CMake build system will - generate a Release build tree to build a fully optimized tablegen for use - during the build. Enabling this option can significantly speed up build times, - especially when building LLVM in Debug configurations. -**LLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS**:STRING - Limit the maximum number of concurrent compilation, link or - tablegen jobs respectively. The default total number of parallel jobs is - determined by the number of logical CPUs. +: If enabled and building a debug or assert build, the CMake build system + will generate a Release build tree to build a fully optimized tablegen for + use during the build. Enabling this option can significantly speed up build + times, especially when building LLVM in Debug configurations. + +**LLVM_PARALLEL\_{COMPILE,LINK,TABLEGEN}\_JOBS**:STRING + +: Limit the maximum number of concurrent compilation, link or tablegen jobs + respectively. The default total number of parallel jobs is determined by + the number of logical CPUs. **LLVM_PROFDATA_FILE**:PATH - Path to a profdata file to pass into clang's ``-fprofile-instr-use`` flag. This - can only be specified if you're building with clang. - -**LLVM_RAM_PER_{COMPILE,LINK,TABLEGEN}_JOB**:STRING - Limit the number of concurrent compile, link or tablegen jobs - respectively, depending on available physical memory. The value - specified is in MB. The respective - ``LLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS`` variable is - overwritten by computing the memory size divided by the - specified value. The largest memory user is linking, but remember - that jobs in the other categories might run in parallel with the link - jobs, and you need to consider their memory requirements when - in a memory-limited environment. Using a - ``-DLLVM_RAM_PER_LINK_JOB=10000`` is a good approximation. On ELF - platforms debug builds can reduce link-time memory pressure by also - using ``LLVM_USE_SPLIT_DWARF``. + +: Path to a profdata file to pass into clang's `-fprofile-instr-use` flag. + This can only be specified if you're building with clang. + +**LLVM_RAM_PER\_{COMPILE,LINK,TABLEGEN}\_JOB**:STRING + +: Limit the number of concurrent compile, link or tablegen jobs respectively, + depending on available physical memory. The value specified is in MB. The + respective `LLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS` variable is + overwritten by computing the memory size divided by the specified value. + The largest memory user is linking, but remember that jobs in the other + categories might run in parallel with the link jobs, and you need to + consider their memory requirements when in a memory-limited environment. + Using a `-DLLVM_RAM_PER_LINK_JOB=10000` is a good approximation. On ELF + platforms debug builds can reduce link-time memory pressure by also using + `LLVM_USE_SPLIT_DWARF`. **LLVM_REVERSE_ITERATION**:BOOL - If enabled, all supported unordered llvm containers would be iterated in - reverse order. This is useful for uncovering non-determinism caused by - iteration of unordered containers. + +: If enabled, all supported unordered llvm containers would be iterated in + reverse order. This is useful for uncovering non-determinism caused by + iteration of unordered containers. **LLVM_STATIC_LINK_CXX_STDLIB**:BOOL - Statically link to the C++ standard library if possible. This uses the flag - ``-static-libstdc++``, but a Clang host compiler will statically link to libc++ - if used in conjunction with the **LLVM_ENABLE_LIBCXX** flag. Defaults to OFF. + +: Statically link to the C++ standard library if possible. This uses the flag + `-static-libstdc++`, but a Clang host compiler will statically link to + libc++ if used in conjunction with the **LLVM_ENABLE_LIBCXX** flag. + Defaults to OFF. **LLVM_TABLEGEN**:STRING - Full path to a native TableGen executable (usually named ``llvm-tblgen``). This is - intended for cross-compiling: if the user sets this variable, no native - TableGen will be created. + +: Full path to a native TableGen executable (usually named `llvm-tblgen`). + This is intended for cross-compiling: if the user sets this variable, no + native TableGen will be created. **LLVM_TARGET_ARCH**:STRING - LLVM target to use for native code generation. This is required for JIT - generation. It defaults to "host", meaning that it shall pick the architecture - of the machine where LLVM is being built. If you are cross-compiling, set it - to the target architecture name. + +: LLVM target to use for native code generation. This is required for JIT + generation. It defaults to "host", meaning that it shall pick the + architecture of the machine where LLVM is being built. If you are + cross-compiling, set it to the target architecture name. **LLVM_TARGETS_TO_BUILD**:STRING - Semicolon-separated list of targets to build, or *all* for building all - targets. Case-sensitive. Defaults to *all*. Example: - ``-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"``. - The full list, as of August 2025, is: - ``AArch64;AMDGPU;ARM;AVR;BPF;Hexagon;Lanai;LoongArch;Mips;MSP430;NVPTX;PowerPC;RISCV;Sparc;SPIRV;SystemZ;VE;WebAssembly;X86;XCore`` - - You can also specify ``host`` or ``Native`` to automatically detect and - include the target corresponding to the host machine's architecture, or - use ``all`` to include all available targets. - For example, on an x86_64 machine, specifying ``-DLLVM_TARGETS_TO_BUILD=host`` - will include the ``X86`` target. + +: Semicolon-separated list of targets to build, or *all* for building all + targets. Case-sensitive. Defaults to *all*. Example: + `-DLLVM_TARGETS_TO_BUILD="X86;PowerPC"`. The full list, as of August 2025, + is: + `AArch64;AMDGPU;ARM;AVR;BPF;Hexagon;Lanai;LoongArch;Mips;MSP430;NVPTX;PowerPC;RISCV;Sparc;SPIRV;SystemZ;VE;WebAssembly;X86;XCore` + + You can also specify `host` or `Native` to automatically detect and include + the target corresponding to the host machine's architecture, or use `all` + to include all available targets. For example, on an x86_64 machine, + specifying `-DLLVM_TARGETS_TO_BUILD=host` will include the `X86` target. **LLVM_TEMPORARILY_ALLOW_OLD_TOOLCHAIN**:BOOL - If enabled, the compiler version check will only warn when using a toolchain - which is about to be deprecated, instead of emitting an error. + +: If enabled, the compiler version check will only warn when using a + toolchain which is about to be deprecated, instead of emitting an error. **LLVM_UBSAN_FLAGS**:STRING - Defines the set of compile flags used to enable UBSan. Only used if - ``LLVM_USE_SANITIZER`` contains ``Undefined``. This can be used to override - the default set of UBSan flags. + +: Defines the set of compile flags used to enable UBSan. Only used if + `LLVM_USE_SANITIZER` contains `Undefined`. This can be used to override the + default set of UBSan flags. **LLVM_UNREACHABLE_OPTIMIZE**:BOOL - This flag controls the behavior of ``llvm_unreachable()`` in a release build - (when assertions are disabled in general). When ON (default) then - ``llvm_unreachable()`` is considered "undefined behavior" and optimized as - such. When OFF it is instead replaced with a guaranteed "trap". + +: This flag controls the behavior of `llvm_unreachable()` in a release build + (when assertions are disabled in general). When ON (default) then + `llvm_unreachable()` is considered "undefined behavior" and optimized as + such. When OFF it is instead replaced with a guaranteed "trap". **LLVM_USE_INTEL_JITEVENTS**:BOOL - Enable building support for Intel JIT Events API. Defaults to OFF. + +: Enable building support for Intel JIT Events API. Defaults to OFF. **LLVM_USE_LINKER**:STRING - Add ``-fuse-ld={name}`` to the link invocation. The possible values depend on - your compiler. For clang, the value can be an absolute path to your custom - linker, otherwise clang will prefix the name with ``ld.`` and apply its usual - search. For example, to link LLVM with the Gold linker, cmake can be invoked - with ``-DLLVM_USE_LINKER=gold``. + +: Add `-fuse-ld={name}` to the link invocation. The possible values depend on + your compiler. For clang, the value can be an absolute path to your custom + linker, otherwise clang will prefix the name with `ld.` and apply its usual + search. For example, to link LLVM with the Gold linker, cmake can be + invoked with `-DLLVM_USE_LINKER=gold`. **LLVM_USE_OPROFILE**:BOOL - Enable building OProfile JIT support. Defaults to OFF. + +: Enable building OProfile JIT support. Defaults to OFF. **LLVM_USE_PERF**:BOOL - Enable building support for Perf (linux profiling tool) JIT support. Defaults to OFF. + +: Enable building support for Perf (linux profiling tool) JIT support. + Defaults to OFF. **LLVM_USE_RELATIVE_PATHS_IN_FILES**:BOOL - Rewrite absolute source paths in sources and debug info to relative ones. The - source prefix can be adjusted via the ``LLVM_SOURCE_PREFIX`` variable. + +: Rewrite absolute source paths in sources and debug info to relative ones. + The source prefix can be adjusted via the `LLVM_SOURCE_PREFIX` variable. **LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO**:BOOL - Rewrite absolute source paths in debug info to relative ones. The source prefix - can be adjusted via the ``LLVM_SOURCE_PREFIX`` variable. + +: Rewrite absolute source paths in debug info to relative ones. The source + prefix can be adjusted via the `LLVM_SOURCE_PREFIX` variable. **LLVM_USE_SANITIZER**:STRING - Define the sanitizer used to build LLVM binaries and tests. Possible values - are ``Address``, ``HWAddress``, ``Memory``, ``MemoryWithOrigins``, ``Undefined``, - ``Thread``, ``DataFlow``, ``Leaks``, and ``Address;Undefined``. Defaults to - empty string. + +: Define the sanitizer used to build LLVM binaries and tests. Possible values + are `Address`, `HWAddress`, `Memory`, `MemoryWithOrigins`, `Undefined`, + `Thread`, `DataFlow`, `Leaks`, and `Address;Undefined`. Defaults to empty + string. **LLVM_USE_SPLIT_DWARF**:BOOL - If enabled CMake will pass ``-gsplit-dwarf`` to the compiler. This option - reduces link-time memory usage by reducing the amount of debug information that - the linker needs to resolve. It is recommended for platforms using the ELF object - format, like Linux systems when linker memory usage is too high. + +: If enabled CMake will pass `-gsplit-dwarf` to the compiler. This option + reduces link-time memory usage by reducing the amount of debug information + that the linker needs to resolve. It is recommended for platforms using the + ELF object format, like Linux systems when linker memory usage is too high. **SPHINX_EXECUTABLE**:STRING - The path to the ``sphinx-build`` executable detected by CMake. - For installation instructions, see - https://www.sphinx-doc.org/en/master/usage/installation.html + +: The path to the `sphinx-build` executable detected by CMake. For + installation instructions, see + **SPHINX_OUTPUT_HTML**:BOOL - If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) then the targets for - building the documentation as HTML are added (but not built by default unless - ``LLVM_BUILD_DOCS`` is enabled). There is a target for each project in the - source tree that uses sphinx (e.g., ``docs-llvm-html``, ``docs-clang-html`` - and ``docs-lld-html``). Defaults to ON. + +: If enabled (and `LLVM_ENABLE_SPHINX` is enabled) then the targets for + building the documentation as HTML are added (but not built by default + unless `LLVM_BUILD_DOCS` is enabled). There is a target for each project in + the source tree that uses sphinx (e.g., `docs-llvm-html`, `docs-clang-html` + and `docs-lld-html`). Defaults to ON. **SPHINX_OUTPUT_MAN**:BOOL - If enabled (and ``LLVM_ENABLE_SPHINX`` is enabled) the targets for building - the man pages are added (but not built by default unless ``LLVM_BUILD_DOCS`` - is enabled). Currently the only target added is ``docs-llvm-man``. Defaults - to ON. + +: If enabled (and `LLVM_ENABLE_SPHINX` is enabled) the targets for building + the man pages are added (but not built by default unless `LLVM_BUILD_DOCS` + is enabled). Currently the only target added is `docs-llvm-man`. Defaults + to ON. **SPHINX_WARNINGS_AS_ERRORS**:BOOL - If enabled, then sphinx documentation warnings will be treated as - errors. Defaults to ON. -Advanced variables -~~~~~~~~~~~~~~~~~~ +: If enabled, then sphinx documentation warnings will be treated as errors. + Defaults to ON. + +#### Advanced variables These are niche, and changing them from their defaults is more likely to cause -things to go wrong. They are also unstable across LLVM versions. +things to go wrong. They are also unstable across LLVM versions. **LLVM_EXAMPLES_INSTALL_DIR**:STRING - The path for examples of using LLVM, relative to the *CMAKE_INSTALL_PREFIX*. - Only matters if *LLVM_BUILD_EXAMPLES* is enabled. - Defaults to "examples". + +: The path for examples of using LLVM, relative to the + *CMAKE_INSTALL_PREFIX*. Only matters if *LLVM_BUILD_EXAMPLES* is enabled. + Defaults to "examples". **LLVM_TOOLS_INSTALL_DIR**:STRING - The path to install the main LLVM tools, relative to the *CMAKE_INSTALL_PREFIX*. - Defaults to *CMAKE_INSTALL_BINDIR*. + +: The path to install the main LLVM tools, relative to the + *CMAKE_INSTALL_PREFIX*. Defaults to *CMAKE_INSTALL_BINDIR*. **LLVM_UTILS_INSTALL_DIR**:STRING - The path to install auxiliary LLVM utilities, relative to the *CMAKE_INSTALL_PREFIX*. - Only matters if *LLVM_INSTALL_UTILS* is enabled. - Defaults to *LLVM_TOOLS_INSTALL_DIR*. -CMake Caches -============ +: The path to install auxiliary LLVM utilities, relative to the + *CMAKE_INSTALL_PREFIX*. Only matters if *LLVM_INSTALL_UTILS* is enabled. + Defaults to *LLVM_TOOLS_INSTALL_DIR*. + +## CMake Caches Recently, LLVM and Clang have been adding some more complicated build system features. Utilizing these new features often involves a complicated chain of CMake variables passed on the command line. Clang provides a collection of CMake cache scripts to make these features more approachable. -CMake cache files are utilized using CMake's ``-C`` flag: +CMake cache files are utilized using CMake's `-C` flag: -.. code-block:: console - - $ cmake -C +``` console +$ cmake -C +``` CMake cache scripts are processed in an isolated scope, only cached variables remain set when the main configuration runs. CMake cached variables do not reset @@ -1000,242 +1107,240 @@ variables that are already set unless the FORCE option is specified. A few notes about CMake Caches: - Order of command line arguments is important - - - ``-D`` arguments specified before ``-C`` are set before the cache is processed and + - `-D` arguments specified before `-C` are set before the cache is processed and can be read inside the cache file - - ``-D`` arguments specified after ``-C`` are set after the cache is processed and + - `-D` arguments specified after `-C` are set after the cache is processed and are unset inside the cache file - -- All ``-D`` arguments will override cache file settings +- All `-D` arguments will override cache file settings - CMAKE_TOOLCHAIN_FILE is evaluated after both the cache file and the command line arguments -- It is recommended that all ``-D`` options be specified *before* ``-C`` +- It is recommended that all `-D` options be specified *before* `-C` For more information about some of the advanced build configurations supported -via Cache files see :doc:`AdvancedBuilds`. +via Cache files see {doc}`AdvancedBuilds`. -Executing the Tests -=================== +## Executing the Tests Testing is performed when the *check-all* target is built. For instance, if you are using Makefiles, execute this command in the root of your build directory: -.. code-block:: console - - $ make check-all +``` console +$ make check-all +``` On Visual Studio, you may run tests by building the project "check-all". -For more information about testing, see the :doc:`TestingGuide`. +For more information about testing, see the {doc}`TestingGuide`. -Cross compiling -=============== +## Cross compiling -See `this wiki page `_ for -generic instructions on how to cross-compile with CMake. It goes into detailed -explanations and may seem daunting, but it is not. The wiki page has -several examples including toolchain files. Go directly to the -``Information how to set up various cross compiling toolchains`` section -for a quick solution. +See [this wiki +page](https://gitlab.kitware.com/cmake/community/wikis/doc/cmake/CrossCompiling) +for generic instructions on how to cross-compile with CMake. It goes into +detailed explanations and may seem daunting, but it is not. The wiki page has +several examples including toolchain files. Go directly to the `Information how +to set up various cross compiling toolchains` section for a quick solution. -Also see the `LLVM-related variables`_ section for variables used when -cross-compiling. +Also see the [LLVM-related variables](#llvm-related-variables) section for +variables used when cross-compiling. -Embedding LLVM in your project -============================== +## Embedding LLVM in your project From LLVM 3.5 onward, the CMake build system exports LLVM libraries as importable CMake targets. This means that clients of LLVM can now reliably use CMake to develop their own LLVM-based projects against an installed version of LLVM regardless of how it was built. -Here is a simple example of a ``CMakeLists.txt`` file that imports the LLVM libraries -and uses them to build a simple application ``simple-tool``. - -.. code-block:: cmake +Here is a simple example of a `CMakeLists.txt` file that imports the LLVM libraries +and uses them to build a simple application `simple-tool`. - cmake_minimum_required(VERSION 3.20.0) - project(SimpleProject) +``` cmake +cmake_minimum_required(VERSION 3.20.0) +project(SimpleProject) - find_package(LLVM REQUIRED CONFIG) +find_package(LLVM REQUIRED CONFIG) - message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") - message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}") +message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") - # Set your project compile flags. - # E.g. if using the C++ header files - # you will need to enable C++11 support - # for your compiler. +# Set your project compile flags. +# E.g. if using the C++ header files +# you will need to enable C++11 support +# for your compiler. - include_directories(${LLVM_INCLUDE_DIRS}) - separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) - add_definitions(${LLVM_DEFINITIONS_LIST}) +include_directories(${LLVM_INCLUDE_DIRS}) +separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) +add_definitions(${LLVM_DEFINITIONS_LIST}) - # Now build our tools - add_executable(simple-tool tool.cpp) +# Now build our tools +add_executable(simple-tool tool.cpp) - # Find the libraries that correspond to the LLVM components - # that we wish to use - llvm_map_components_to_libnames(llvm_libs support core irreader) +# Find the libraries that correspond to the LLVM components +# that we wish to use +llvm_map_components_to_libnames(llvm_libs support core irreader) - # Link against LLVM libraries - target_link_libraries(simple-tool ${llvm_libs}) +# Link against LLVM libraries +target_link_libraries(simple-tool ${llvm_libs}) +``` -The ``find_package(...)`` directive when used in CONFIG mode (as in the above -example) will look for the ``LLVMConfig.cmake`` file in various locations (see -CMake manual for details). It creates an ``LLVM_DIR`` cache entry to save the -directory where ``LLVMConfig.cmake`` is found or allows the user to specify the -directory (e.g., by passing ``-DLLVM_DIR=/usr/lib/cmake/llvm`` to -the ``cmake`` command or by setting it directly in ``ccmake`` or ``cmake-gui``). +The `find_package(...)` directive when used in CONFIG mode (as in the above +example) will look for the `LLVMConfig.cmake` file in various locations (see +CMake manual for details). It creates an `LLVM_DIR` cache entry to save the +directory where `LLVMConfig.cmake` is found or allows the user to specify the +directory (e.g., by passing `-DLLVM_DIR=/usr/lib/cmake/llvm` to +the `cmake` command or by setting it directly in `ccmake` or `cmake-gui`). This file is available in two different locations. -* ``/LLVMConfig.cmake`` where - ```` is the location where LLVM CMake modules are +- `/LLVMConfig.cmake` where + `` is the location where LLVM CMake modules are installed as part of an installed version of LLVM. This is typically - ``cmake/llvm/`` within the lib directory. On Linux, this is typically - ``/usr/lib/cmake/llvm/LLVMConfig.cmake``. - -* ``/lib/cmake/llvm/LLVMConfig.cmake`` where - ```` is the root of the LLVM build tree. **Note: this is only - available when building LLVM with CMake.** + `cmake/llvm/` within the lib directory. On Linux, this is typically + `/usr/lib/cmake/llvm/LLVMConfig.cmake`. +- `/lib/cmake/llvm/LLVMConfig.cmake` where + `` is the root of the LLVM build tree. **Note: this is + only available when building LLVM with CMake.** If LLVM is installed in your operating system's normal installation prefix (e.g. -on Linux this is usually ``/usr/``) ``find_package(LLVM ...)`` will +on Linux this is usually `/usr/`) `find_package(LLVM ...)` will automatically find LLVM if it is installed correctly. If LLVM is not installed or you wish to build directly against the LLVM build tree you can use -``LLVM_DIR`` as previously mentioned. +`LLVM_DIR` as previously mentioned. -The ``LLVMConfig.cmake`` file sets various useful variables. Notable variables +The `LLVMConfig.cmake` file sets various useful variables. Notable variables include: -``LLVM_CMAKE_DIR`` - The path to the LLVM CMake directory (i.e., the directory containing - ``LLVMConfig.cmake``). +`LLVM_CMAKE_DIR` -``LLVM_DEFINITIONS`` - A list of preprocessor defines that should be used when building against LLVM. +: The path to the LLVM CMake directory (i.e., the directory containing + `LLVMConfig.cmake`). -``LLVM_ENABLE_ASSERTIONS`` - This is set to ON if LLVM was built with assertions, otherwise OFF. +`LLVM_DEFINITIONS` -``LLVM_ENABLE_EH`` - This is set to ON if LLVM was built with exception handling (EH) enabled, - otherwise OFF. +: A list of preprocessor defines that should be used when building against + LLVM. -``LLVM_ENABLE_RTTI`` - This is set to ON if LLVM was built with run time type information (RTTI), - otherwise OFF. +`LLVM_ENABLE_ASSERTIONS` -``LLVM_INCLUDE_DIRS`` - A list of include paths to directories containing LLVM header files. +: This is set to ON if LLVM was built with assertions, otherwise OFF. -``LLVM_PACKAGE_VERSION`` - The LLVM version. This string can be used with CMake conditionals, e.g., ``if - (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5")``. +`LLVM_ENABLE_EH` -``LLVM_TOOLS_BINARY_DIR`` - The path to the directory containing the LLVM tools (e.g., ``llvm-as``). +: This is set to ON if LLVM was built with exception handling (EH) enabled, + otherwise OFF. -Notice that in the above example we link ``simple-tool`` against several LLVM -libraries. The list of libraries is determined by using the -``llvm_map_components_to_libnames()`` CMake function. For a list of available -components look at the output of running ``llvm-config --components``. +`LLVM_ENABLE_RTTI` -Note that for LLVM < 3.5 ``llvm_map_components_to_libraries()`` was -used instead of ``llvm_map_components_to_libnames()``. This is now deprecated -and will be removed in a future version of LLVM. +: This is set to ON if LLVM was built with run time type information (RTTI), + otherwise OFF. -.. _cmake-out-of-source-pass: +`LLVM_INCLUDE_DIRS` -Developing LLVM passes out of source ------------------------------------- +: A list of include paths to directories containing LLVM header files. -You can develop LLVM passes out of LLVM's source tree (i.e., against an -installed or built LLVM). An example of a project layout is provided below. +`LLVM_PACKAGE_VERSION` -.. code-block:: none +: The LLVM version. This string can be used with CMake conditionals, e.g., + `if (${LLVM_PACKAGE_VERSION} VERSION_LESS "3.5")`. - / - | - CMakeLists.txt - / - | - CMakeLists.txt - Pass.cpp - ... +`LLVM_TOOLS_BINARY_DIR` -Contents of ``/CMakeLists.txt``: +: The path to the directory containing the LLVM tools (e.g., `llvm-as`). -.. code-block:: cmake +Notice that in the above example we link `simple-tool` against several LLVM +libraries. The list of libraries is determined by using the +`llvm_map_components_to_libnames()` CMake function. For a list of available +components look at the output of running `llvm-config --components`. - find_package(LLVM REQUIRED CONFIG) +Note that for LLVM \< 3.5 `llvm_map_components_to_libraries()` was +used instead of `llvm_map_components_to_libnames()`. This is now deprecated +and will be removed in a future version of LLVM. - separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) - add_definitions(${LLVM_DEFINITIONS_LIST}) - include_directories(${LLVM_INCLUDE_DIRS}) +(cmake-out-of-source-pass)= +### Developing LLVM passes out of source - add_subdirectory() +You can develop LLVM passes out of LLVM's source tree (i.e., against an +installed or built LLVM). An example of a project layout is provided below. -Contents of ``//CMakeLists.txt``: +``` none +/ + | + CMakeLists.txt + / + | + CMakeLists.txt + Pass.cpp + ... +``` -.. code-block:: cmake +Contents of `/CMakeLists.txt`: - add_library(LLVMPassname MODULE Pass.cpp) +``` cmake +find_package(LLVM REQUIRED CONFIG) -Note if you intend for this pass to be merged into the LLVM source tree at some -point in the future it might make more sense to use LLVM's internal -``add_llvm_library`` function with the MODULE argument instead by... +separate_arguments(LLVM_DEFINITIONS_LIST NATIVE_COMMAND ${LLVM_DEFINITIONS}) +add_definitions(${LLVM_DEFINITIONS_LIST}) +include_directories(${LLVM_INCLUDE_DIRS}) +add_subdirectory() +``` -Adding the following to ``/CMakeLists.txt`` (after -``find_package(LLVM ...)``) +Contents of `//CMakeLists.txt`: -.. code-block:: cmake +``` cmake +add_library(LLVMPassname MODULE Pass.cpp) +``` - list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") - include(AddLLVM) +Note if you intend for this pass to be merged into the LLVM source tree at some +point in the future it might make more sense to use LLVM's internal +`add_llvm_library` function with the MODULE argument instead by adding the +following to `/CMakeLists.txt` (after `find_package(LLVM ...)`): -And then changing ``//CMakeLists.txt`` to +``` cmake +list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +include(AddLLVM) +``` -.. code-block:: cmake +And then changing `//CMakeLists.txt` to - add_llvm_library(LLVMPassname MODULE - Pass.cpp - ) +``` cmake +add_llvm_library(LLVMPassname MODULE + Pass.cpp + ) +``` When you are done developing your pass, you may wish to integrate it into the LLVM source tree. You can achieve it in two easy steps: -#. Copying ```` folder into ``/lib/Transforms`` directory. - -#. Adding ``add_subdirectory()`` line into - ``/lib/Transforms/CMakeLists.txt``. +1. Copying `` folder into `/lib/Transforms` directory. +2. Adding `add_subdirectory()` line into `/lib/Transforms/CMakeLists.txt`. -Compiler/Platform-specific topics -================================= +## Compiler/Platform-specific topics Notes for specific compilers and/or platforms. -Windows -------- +### Windows **LLVM_COMPILER_JOBS**:STRING - Specifies the maximum number of parallel compiler jobs to use per project - when building with msbuild or Visual Studio. Only supported for the Visual - Studio 2010 CMake generator. 0 means use all processors. Default is 0. + +: Specifies the maximum number of parallel compiler jobs to use per project + when building with msbuild or Visual Studio. Only supported for the Visual + Studio 2010 CMake generator. 0 means use all processors. Default is 0. **CMAKE_MT**:STRING - When compiling with clang-cl, CMake may use ``llvm-mt`` as the Manifest Tool - when available. ```llvm-mt``` is only present when libxml2 is found at build-time. - To ensure using Microsoft's Manifest Tool set `CMAKE_MT=mt`. -Apple/OSX ---------- +: When compiling with clang-cl, CMake may use `llvm-mt` as the Manifest Tool + when available. `llvm-mt` is only present when libxml2 is found at + build-time. To ensure using Microsoft's Manifest Tool set `CMAKE_MT=mt`. + +### Apple/OSX **CMAKE_OSX_SYSROOT**:STRING - When compiling for OSX, in order for the test suite to find libSystem to link - dylib tests you'll need to run CMake with ```xcrun --show-sdk-path``` as the - string to pass in so that the testsuite can find your os libraries. - This will show up as ```ld: library not found for -lSystem``` when running - tests. +: When compiling for OSX, in order for the test suite to find libSystem to + link dylib tests you'll need to run CMake with `xcrun --show-sdk-path` + as the string to pass in so that the testsuite can find your os + libraries. + + This will show up as `ld: library not found for -lSystem` when + running tests. diff --git a/llvm/docs/CodeGenerator.md b/llvm/docs/CodeGenerator.md index d5a019dcb06ba..0dc033d4e5cb6 100644 --- a/llvm/docs/CodeGenerator.md +++ b/llvm/docs/CodeGenerator.md @@ -1,32 +1,28 @@ -========================================== -The LLVM Target-Independent Code Generator -========================================== - -.. role:: raw-html(raw) - :format: html - -.. raw:: html - - - -.. contents:: - :local: - -.. warning:: - This is a work in progress. - -Introduction -============ +# The LLVM Target-Independent Code Generator + + +```{raw} html + +``` + +```{contents} +:local: +``` + +```{warning} +This is a work in progress. +``` +## Introduction The LLVM target-independent code generator is a framework that provides a suite of reusable components for translating the LLVM internal representation to the @@ -35,54 +31,53 @@ static compiler) or in binary machine code format (usable for a JIT compiler). The LLVM target-independent code generator consists of six main components: -1. `Abstract target description`_ interfaces which capture important properties +1. {ref}`Abstract target description ` interfaces which capture important properties about various aspects of the machine, independently of how they will be used. - These interfaces are defined in ``include/llvm/Target/``. + These interfaces are defined in `include/llvm/Target/`. -2. Classes used to represent the `code being generated`_ for a target. These +2. Classes used to represent the {ref}`code being generated ` for a target. These classes are intended to be abstract enough to represent the machine code for *any* target machine. These classes are defined in - ``include/llvm/CodeGen/``. At this level, concepts like "constant pool + `include/llvm/CodeGen/`. At this level, concepts like "constant pool entries" and "jump tables" are explicitly exposed. 3. Classes and algorithms used to represent code at the object file level, the - `MC Layer`_. These classes represent assembly level constructs like labels, + {ref}`MC Layer `. These classes represent assembly level constructs like labels, sections, and instructions. At this level, concepts like "constant pool entries" and "jump tables" don't exist. -4. `Target-independent algorithms`_ used to implement various phases of native +4. {ref}`Target-independent algorithms ` used to implement various phases of native code generation (register allocation, scheduling, stack frame representation, - etc). This code lives in ``lib/CodeGen/``. + etc). This code lives in `lib/CodeGen/`. -5. `Implementations of the abstract target description interfaces`_ for +5. {ref}`Implementations of the abstract target description interfaces ` for particular targets. These machine descriptions make use of the components provided by LLVM, and can optionally provide custom target-specific passes, to build complete code generators for a specific target. Target descriptions - live in ``lib/Target/``. + live in `lib/Target/`. 6. The target-independent JIT components. The LLVM JIT is completely target - independent (it uses the ``TargetJITInfo`` structure to interface for + independent (it uses the `TargetJITInfo` structure to interface for target-specific issues. The code for the target-independent JIT lives in - ``lib/ExecutionEngine/JIT``. + `lib/ExecutionEngine/JIT`. Depending on which part of the code generator you are interested in working on, different pieces of this will be useful to you. In any case, you should be -familiar with the `target description`_ and `machine code representation`_ +familiar with the {ref}`target description ` and {ref}`machine code representation ` classes. If you want to add a backend for a new target, you will need to -`implement the target description`_ classes for your new target and understand -the :doc:`LLVM code representation `. If you are interested in -implementing a new `code generation algorithm`_, it should only depend on the +{ref}`implement the target description ` classes for your new target and understand +the {doc}`LLVM code representation `. If you are interested in +implementing a new {ref}`code generation algorithm `, it should only depend on the target-description and machine code representation classes, ensuring that it is portable. -Required components in the code generator ------------------------------------------ +### Required components in the code generator The two pieces of the LLVM code generator are the high-level interface to the code generator and the set of reusable components that can be used to build -target-specific backends. The two most important interfaces (:raw-html:`` -`TargetMachine`_ :raw-html:`` and :raw-html:`` `DataLayout`_ -:raw-html:``) are the only ones that are required to be defined for a +target-specific backends. The two most important interfaces ( +{ref}`TargetMachine ` and {ref}`DataLayout ` +) are the only ones that are required to be defined for a backend to fit into the LLVM system, but the others must be defined if the reusable code generator components are going to be used. @@ -101,16 +96,15 @@ built-in components. Doing so is not recommended at all, but could be required for radically different targets that do not fit into the LLVM machine description model: FPGAs for example. -.. _high-level design of the code generator: +(high-level design of the code generator)= -The high-level design of the code generator -------------------------------------------- +### The high-level design of the code generator The LLVM target-independent code generator is designed to support efficient and quality code generation for standard register-based microprocessors. Code generation in this model is divided into the following stages: -1. `Instruction Selection`_ --- This phase determines an efficient way to +1. {ref}`Instruction Selection ` --- This phase determines an efficient way to express the input LLVM code in the target instruction set. This stage produces the initial code for the program in the target instruction set, then makes use of virtual registers in SSA form and physical registers that @@ -118,35 +112,35 @@ generation in this model is divided into the following stages: calling conventions. This step turns the LLVM code into a DAG of target instructions. -2. `Scheduling and Formation`_ --- This phase takes the DAG of target +2. {ref}`Scheduling and Formation ` --- This phase takes the DAG of target instructions produced by the instruction selection phase, determines an - ordering of the instructions, then emits the instructions as :raw-html:`` - `MachineInstr`_\s :raw-html:`` with that ordering. Note that we - describe this in the `instruction selection section`_ because it operates on - a `SelectionDAG`_. + ordering of the instructions, then emits the instructions as + {ref}`MachineInstr `s with that ordering. Note that we + describe this in the {ref}`instruction selection section ` because it operates on + a {ref}`SelectionDAG `. -3. `SSA-based Machine Code Optimizations`_ --- This optional stage consists of a +3. {ref}`SSA-based Machine Code Optimizations ` --- This optional stage consists of a series of machine-code optimizations that operate on the SSA-form produced by the instruction selector. Optimizations like modulo-scheduling or peephole optimization work here. -4. `Register Allocation`_ --- The target code is transformed from an infinite +4. {ref}`Register Allocation ` --- The target code is transformed from an infinite virtual register file in SSA form to the concrete register file used by the target. This phase introduces spill code and eliminates all virtual register references from the program. -5. `Prolog/Epilog Code Insertion`_ --- Once the machine code has been generated +5. {ref}`Prolog/Epilog Code Insertion ` --- Once the machine code has been generated for the function and the amount of stack space required is known (used for LLVM alloca's and spill slots), the prolog and epilog code for the function can be inserted and "abstract stack location references" can be eliminated. This stage is responsible for implementing optimizations like frame-pointer elimination and stack packing. -6. `Late Machine Code Optimizations`_ --- Optimizations that operate on "final" +6. {ref}`Late Machine Code Optimizations ` --- Optimizations that operate on "final" machine code can go here, such as spill code scheduling and peephole optimizations. -7. `Code Emission`_ --- The final stage actually puts out the code for the +7. {ref}`Code Emission ` --- The final stage actually puts out the code for the current function, either in the target assembler format or in machine code. @@ -164,191 +158,179 @@ target-specific passes into the flow. For example, the X86 target uses a special pass to handle the 80x87 floating point stack architecture. Other targets with unusual requirements can be supported with custom passes as needed. -Using TableGen for target description -------------------------------------- +### Using TableGen for target description The target description classes require a detailed description of the target architecture. These target descriptions often have a large amount of common -information (e.g., an ``add`` instruction is almost identical to a ``sub`` +information (e.g., an `add` instruction is almost identical to a `sub` instruction). In order to allow the maximum amount of commonality to be factored out, the LLVM code generator uses the -:doc:`TableGen/index` tool to describe big chunks of the +{doc}`TableGen/index` tool to describe big chunks of the target machine, which allows the use of domain-specific and target-specific abstractions to reduce the amount of repetition. As LLVM continues to be developed and refined, we plan to move more and more of -the target description to the ``.td`` form. Doing so gives us a number of +the target description to the `.td` form. Doing so gives us a number of advantages. The most important is that it makes it easier to port LLVM because it reduces the amount of C++ code that has to be written, and the surface area of the code generator that needs to be understood before someone can get something working. Second, it makes it easier to change things. In particular, -if tables and other things are all emitted by ``tblgen``, we only need a change -in one place (``tblgen``) to update all of the targets to a new interface. +if tables and other things are all emitted by `tblgen`, we only need a change +in one place (`tblgen`) to update all of the targets to a new interface. -.. _Abstract target description: -.. _target description: +(Abstract target description)= +(target description)= -Target description classes -========================== +## Target description classes -The LLVM target description classes (located in the ``include/llvm/Target`` +The LLVM target description classes (located in the `include/llvm/Target` directory) provide an abstract description of the target machine independent of any particular client. These classes are designed to capture the *abstract* properties of the target (such as the instructions and registers it has), and do not incorporate any particular pieces of code generation algorithms. -All of the target description classes (except the :raw-html:`` `DataLayout`_ -:raw-html:`` class) are designed to be subclassed by the concrete target +All of the target description classes (except the {ref}`DataLayout ` +class) are designed to be subclassed by the concrete target implementation, and have virtual methods implemented. To get to these -implementations, the :raw-html:`` `TargetMachine`_ :raw-html:`` class +implementations, the {ref}`TargetMachine ` class provides accessors that should be implemented by the target. -.. _TargetMachine: +(TargetMachine)= -The ``TargetMachine`` class ---------------------------- +### The `TargetMachine` class -The ``TargetMachine`` class provides virtual methods that are used to access the +The `TargetMachine` class provides virtual methods that are used to access the target-specific implementations of the various target description classes via -the ``get*Info`` methods (``getInstrInfo``, ``getRegisterInfo``, -``getFrameInfo``, etc.). This class is designed to be specialized by a concrete -target implementation (e.g., ``X86TargetMachine``) which implements the various +the `get*Info` methods (`getInstrInfo`, `getRegisterInfo`, +`getFrameInfo`, etc.). This class is designed to be specialized by a concrete +target implementation (e.g., `X86TargetMachine`) which implements the various virtual methods. The only required target description class is the -:raw-html:`` `DataLayout`_ :raw-html:`` class, but if the code +{ref}`DataLayout ` class, but if the code generator components are to be used, the other interfaces should be implemented as well. -.. _DataLayout: +(DataLayout)= -The ``DataLayout`` class ------------------------- +### The `DataLayout` class -The ``DataLayout`` class is the only required target description class, and it +The `DataLayout` class is the only required target description class, and it is the only class that is not extensible (you cannot derive a new class from -it). ``DataLayout`` specifies information about how the target lays out memory +it). `DataLayout` specifies information about how the target lays out memory for structures, the alignment requirements for various data types, the size of pointers in the target, and whether the target is little-endian or big-endian. -.. _TargetLowering: +(TargetLowering)= -The ``TargetLowering`` class ----------------------------- +### The `TargetLowering` class -The ``TargetLowering`` class is used by SelectionDAG based instruction selectors +The `TargetLowering` class is used by SelectionDAG based instruction selectors primarily to describe how LLVM code should be lowered to SelectionDAG operations. Among other things, this class indicates: -* an initial register class to use for various ``ValueType``\s, +* an initial register class to use for various `ValueType`s, * which operations are natively supported by the target machine, -* the return type of ``setcc`` operations, +* the return type of `setcc` operations, * the type to use for shift amounts, and * various high-level characteristics, like whether it is profitable to turn division by a constant into a multiplication sequence. -.. _TargetRegisterInfo: +(TargetRegisterInfo)= -The ``TargetRegisterInfo`` class --------------------------------- +### The `TargetRegisterInfo` class -The ``TargetRegisterInfo`` class is used to describe the register file of the +The `TargetRegisterInfo` class is used to describe the register file of the target and any interactions between the registers. Registers are represented in the code generator by unsigned integers. Physical registers (those that actually exist in the target description) are unique small numbers, and virtual registers are generally large. Note that -register ``#0`` is reserved as a flag value. +register `#0` is reserved as a flag value. Each register in the processor description has an associated -``TargetRegisterDesc`` entry, which provides a textual name for the register +`TargetRegisterDesc` entry, which provides a textual name for the register (used for assembly output and debugging dumps) and a set of aliases (used to indicate whether one register overlaps with another). -In addition to the per-register description, the ``TargetRegisterInfo`` class +In addition to the per-register description, the `TargetRegisterInfo` class exposes a set of processor-specific register classes (instances of the -``TargetRegisterClass`` class). Each register class contains sets of registers +`TargetRegisterClass` class). Each register class contains sets of registers that have the same properties (for example, they are all 32-bit integer registers). Each SSA virtual register created by the instruction selector has an associated register class. When the register allocator runs, it replaces virtual registers with a physical register in the set. The target-specific implementations of these classes is auto-generated from a -:doc:`TableGen/index` description of the register file. +{doc}`TableGen/index` description of the register file. -.. _TargetInstrInfo: +(TargetInstrInfo)= -The ``TargetInstrInfo`` class ------------------------------ +### The `TargetInstrInfo` class -The ``TargetInstrInfo`` class is used to describe the machine instructions +The `TargetInstrInfo` class is used to describe the machine instructions supported by the target. Descriptions define things like the mnemonic for the opcode, the number of operands, the list of implicit register uses and defs, whether the instruction has certain target-independent properties (accesses memory, is commutable, etc), and holds any target-specific flags. -The ``TargetFrameLowering`` class ---------------------------------- +### The `TargetFrameLowering` class -The ``TargetFrameLowering`` class is used to provide information about the stack +The `TargetFrameLowering` class is used to provide information about the stack frame layout of the target. It holds the direction of stack growth, the known stack alignment on entry to each function, and the offset to the local area. The offset to the local area is the offset from the stack pointer on function entry to the first location where function data (local variables, spill locations) can be stored. -The ``TargetSubtarget`` class ------------------------------ +### The `TargetSubtarget` class -The ``TargetSubtarget`` class is used to provide information about the specific +The `TargetSubtarget` class is used to provide information about the specific chip set being targeted. A sub-target informs code generation of which instructions are supported, instruction latencies and instruction execution itinerary; i.e., which processing units are used, in what order, and for how long. -The ``TargetJITInfo`` class ---------------------------- +### The `TargetJITInfo` class -The ``TargetJITInfo`` class exposes an abstract interface used by the +The `TargetJITInfo` class exposes an abstract interface used by the Just-In-Time code generator to perform target-specific activities, such as -emitting stubs. If a ``TargetMachine`` supports JIT code generation, it should -provide one of these objects through the ``getJITInfo`` method. +emitting stubs. If a `TargetMachine` supports JIT code generation, it should +provide one of these objects through the `getJITInfo` method. -.. _code being generated: -.. _machine code representation: +(code being generated)= +(machine code representation)= -Machine code description classes -================================ +## Machine code description classes At the high-level, LLVM code is translated to a machine-specific representation -formed out of :raw-html:`` `MachineFunction`_ :raw-html:``, -:raw-html:`` `MachineBasicBlock`_ :raw-html:``, and :raw-html:`` -`MachineInstr`_ :raw-html:`` instances (defined in -``include/llvm/CodeGen``). This representation is completely target agnostic, +formed out of {ref}`MachineFunction `, +{ref}`MachineBasicBlock `, and +{ref}`MachineInstr ` instances (defined in +`include/llvm/CodeGen`). This representation is completely target agnostic, representing instructions in their most abstract form: an opcode and a series of operands. This representation is designed to support both an SSA representation for machine code, as well as a register allocated, non-SSA form. -.. _MachineInstr: +(MachineInstr)= -The ``MachineInstr`` class --------------------------- +### The `MachineInstr` class -Target machine instructions are represented as instances of the ``MachineInstr`` +Target machine instructions are represented as instances of the `MachineInstr` class. This class is an extremely abstract way of representing machine instructions. In particular, it only keeps track of an opcode number and a set of operands. The opcode number is a simple unsigned integer that only has meaning to a specific backend. All of the instructions for a target should be defined in the -``*InstrInfo.td`` file for the target. The opcode enum values are auto-generated -from this description. The ``MachineInstr`` class does not have any information +`*InstrInfo.td` file for the target. The opcode enum values are auto-generated +from this description. The `MachineInstr` class does not have any information about how to interpret the instruction (i.e., what the semantics of the -instruction are); for that you must refer to the :raw-html:`` -`TargetInstrInfo`_ :raw-html:`` class. +instruction are); for that you must refer to the +{ref}`TargetInstrInfo ` class. The operands of a machine instruction can be of several different types: a register reference, a constant integer, a basic block reference, etc. In @@ -358,109 +340,101 @@ addition, a machine operand should be marked as a def or a use of the value By convention, the LLVM code generator orders instruction operands so that all register definitions come before the register uses, even on architectures that are normally printed in other orders. For example, the SPARC add instruction: -"``add %i1, %i2, %i3``" adds the "%i1", and "%i2" registers and stores the +"`add %i1, %i2, %i3`" adds the "%i1", and "%i2" registers and stores the result into the "%i3" register. In the LLVM code generator, the operands should -be stored as "``%i3, %i1, %i2``": with the destination first. +be stored as "`%i3, %i1, %i2`": with the destination first. Keeping destination (definition) operands at the beginning of the operand list has several advantages. In particular, the debugging printer will print the instruction like this: -.. code-block:: llvm - - %r3 = add %i1, %i2 - -Also if the first operand is a def, it is easier to `create instructions`_ whose +```llvm +%r3 = add %i1, %i2 +``` +Also if the first operand is a def, it is easier to {ref}`create instructions ` whose only def is the first operand. -.. _create instructions: +(create instructions)= -Using the ``MachineInstrBuilder.h`` functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Using the `MachineInstrBuilder.h` functions -Machine instructions are created by using the ``BuildMI`` functions, located in -the ``include/llvm/CodeGen/MachineInstrBuilder.h`` file. The ``BuildMI`` +Machine instructions are created by using the `BuildMI` functions, located in +the `include/llvm/CodeGen/MachineInstrBuilder.h` file. The `BuildMI` functions make it easy to build arbitrary machine instructions. Usage of the -``BuildMI`` functions look like this: - -.. code-block:: c++ +`BuildMI` functions look like this: - // Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42') - // instruction and insert it at the end of the given MachineBasicBlock. - const TargetInstrInfo &TII = ... - MachineBasicBlock &MBB = ... - DebugLoc DL; - MachineInstr *MI = BuildMI(MBB, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); +```c++ +// Create a 'DestReg = mov 42' (rendered in X86 assembly as 'mov DestReg, 42') +// instruction and insert it at the end of the given MachineBasicBlock. +const TargetInstrInfo &TII = ... +MachineBasicBlock &MBB = ... +DebugLoc DL; +MachineInstr *MI = BuildMI(MBB, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); - // Create the same instr, but insert it before a specified iterator point. - MachineBasicBlock::iterator MBBI = ... - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); +// Create the same instr, but insert it before a specified iterator point. +MachineBasicBlock::iterator MBBI = ... +BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), DestReg).addImm(42); - // Create a 'cmp Reg, 0' instruction, no destination reg. - MI = BuildMI(MBB, DL, TII.get(X86::CMP32ri8)).addReg(Reg).addImm(42); +// Create a 'cmp Reg, 0' instruction, no destination reg. +MI = BuildMI(MBB, DL, TII.get(X86::CMP32ri8)).addReg(Reg).addImm(42); - // Create an 'sahf' instruction which takes no operands and stores nothing. - MI = BuildMI(MBB, DL, TII.get(X86::SAHF)); - - // Create a self looping branch instruction. - BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(&MBB); +// Create an 'sahf' instruction which takes no operands and stores nothing. +MI = BuildMI(MBB, DL, TII.get(X86::SAHF)); +// Create a self looping branch instruction. +BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(&MBB); +``` If you need to add a definition operand (other than the optional destination register), you must explicitly mark it as such: -.. code-block:: c++ - - MI.addReg(Reg, RegState::Define); - -Fixed (preassigned) registers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +```c++ +MI.addReg(Reg, RegState::Define); +``` +#### Fixed (preassigned) registers One important issue that the code generator needs to be aware of is the presence of fixed registers. In particular, there are often places in the instruction stream where the register allocator *must* arrange for a particular value to be in a particular register. This can occur due to limitations of the instruction -set (e.g., the X86 can only do a 32-bit divide with the ``EAX``/``EDX`` +set (e.g., the X86 can only do a 32-bit divide with the `EAX`/`EDX` registers), or external factors like calling conventions. In any case, the instruction selector should emit code that copies a virtual register into or out of a physical register when needed. For example, consider this simple LLVM example: -.. code-block:: llvm - - define i32 @test(i32 %X, i32 %Y) { - %Z = sdiv i32 %X, %Y - ret i32 %Z - } - -The X86 instruction selector might produce this machine code for the ``div`` and -``ret``: - -.. code-block:: text - - ;; Start of div - %EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX - %reg1027 = sar %reg1024, 31 - %EDX = mov %reg1027 ;; Sign extend X into EDX - idiv %reg1025 ;; Divide by Y (in reg1025) - %reg1026 = mov %EAX ;; Read the result (Z) out of EAX - - ;; Start of ret - %EAX = mov %reg1026 ;; 32-bit return value goes in EAX - ret - +```llvm +define i32 @test(i32 %X, i32 %Y) { + %Z = sdiv i32 %X, %Y + ret i32 %Z +} +``` +The X86 instruction selector might produce this machine code for the `div` and +`ret`: + +```text +;; Start of div +%EAX = mov %reg1024 ;; Copy X (in reg1024) into EAX +%reg1027 = sar %reg1024, 31 +%EDX = mov %reg1027 ;; Sign extend X into EDX +idiv %reg1025 ;; Divide by Y (in reg1025) +%reg1026 = mov %EAX ;; Read the result (Z) out of EAX + +;; Start of ret +%EAX = mov %reg1026 ;; 32-bit return value goes in EAX +ret +``` By the end of code generation, the register allocator would coalesce the registers and delete the resultant identity moves producing the following code: -.. code-block:: text - - ;; X is in EAX, Y is in ECX - mov %EAX, %EDX - sar %EDX, 31 - idiv %ECX - ret - +```text +;; X is in EAX, Y is in ECX +mov %EAX, %EDX +sar %EDX, 31 +idiv %ECX +ret +``` This approach is extremely general (if it can handle the X86 architecture, it can handle anything!) and allows all of the target-specific knowledge about the instruction stream to be isolated in the instruction selector. Note that @@ -469,19 +443,17 @@ all physical registers are assumed dead on entry to and exit from basic blocks (before register allocation). Thus, if you need a value to be live across basic block boundaries, it *must* live in a virtual register. -Call-clobbered registers -^^^^^^^^^^^^^^^^^^^^^^^^ +#### Call-clobbered registers Some machine instructions, like calls, clobber a large number of physical -registers. Rather than adding ```` operands for all of them, it is -possible to use an ``MO_RegisterMask`` operand instead. The register mask +registers. Rather than adding `` operands for all of them, it is +possible to use an `MO_RegisterMask` operand instead. The register mask operand holds a bit mask of preserved registers, and everything else is considered to be clobbered by the instruction. -Machine code in SSA form -^^^^^^^^^^^^^^^^^^^^^^^^ +#### Machine code in SSA form -``MachineInstr``'s are initially selected in SSA-form, and are maintained in +`MachineInstr`'s are initially selected in SSA-form, and are maintained in SSA-form until register allocation happens. For the most part, this is trivially simple since LLVM is already in SSA form; LLVM PHI nodes become machine code PHI nodes, and virtual registers are only allowed to have a single @@ -490,33 +462,30 @@ definition. After register allocation, machine code is no longer in SSA-form because there are no virtual registers left in the code. -.. _MachineBasicBlock: +(MachineBasicBlock)= -The ``MachineBasicBlock`` class -------------------------------- +### The `MachineBasicBlock` class -The ``MachineBasicBlock`` class contains a list of machine instructions -(:raw-html:`` `MachineInstr`_ :raw-html:`` instances). It roughly +The `MachineBasicBlock` class contains a list of machine instructions +( {ref}`MachineInstr ` instances). It roughly corresponds to the LLVM code input to the instruction selector, but there can be a one-to-many mapping (i.e., one LLVM basic block can map to multiple machine -basic blocks). The ``MachineBasicBlock`` class has a "``getBasicBlock``" method, +basic blocks). The `MachineBasicBlock` class has a "`getBasicBlock`" method, which returns the LLVM basic block that it comes from. -.. _MachineFunction: +(MachineFunction)= -The ``MachineFunction`` class ------------------------------ +### The `MachineFunction` class -The ``MachineFunction`` class contains a list of machine basic blocks -(:raw-html:`` `MachineBasicBlock`_ :raw-html:`` instances). It +The `MachineFunction` class contains a list of machine basic blocks +( {ref}`MachineBasicBlock ` instances). It corresponds one-to-one with the LLVM function input to the instruction selector. -In addition to a list of basic blocks, the ``MachineFunction`` contains a -``MachineConstantPool``, a ``MachineFrameInfo``, a ``MachineFunctionInfo``, and -a ``MachineRegisterInfo``. See ``include/llvm/CodeGen/MachineFunction.h`` for +In addition to a list of basic blocks, the `MachineFunction` contains a +`MachineConstantPool`, a `MachineFrameInfo`, a `MachineFunctionInfo`, and +a `MachineRegisterInfo`. See `include/llvm/CodeGen/MachineFunction.h` for more information. -``MachineInstr Bundles`` ------------------------- +### `MachineInstr Bundles` LLVM code generator can model sequences of instructions as MachineInstr bundles. A MI bundle can model a VLIW group / pack which contains an arbitrary @@ -526,41 +495,41 @@ separated (e.g., ARM Thumb2 IT blocks). Conceptually a MI bundle is a MI with a number of other MIs nested within: -:: - - -------------- - | Bundle | --------- - -------------- \ - | ---------------- - | | MI | - | ---------------- - | | - | ---------------- - | | MI | - | ---------------- - | | - | ---------------- - | | MI | - | ---------------- - | - -------------- - | Bundle | -------- - -------------- \ - | ---------------- - | | MI | - | ---------------- - | | - | ---------------- - | | MI | - | ---------------- - | | - | ... - | - -------------- - | Bundle | -------- - -------------- \ - | - ... +``` +-------------- +| Bundle | --------- +-------------- \ + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | +-------------- +| Bundle | -------- +-------------- \ + | ---------------- + | | MI | + | ---------------- + | | + | ---------------- + | | MI | + | ---------------- + | | + | ... + | +-------------- +| Bundle | -------- +-------------- \ + | + ... +``` MI bundle support does not change the physical representations of MachineBasicBlock and MachineInstr. All the MIs (including top level and nested @@ -592,31 +561,29 @@ effectively double the virtual register def and use lists. Bundles may use virtual registers and be formed in SSA form, but may not be appropriate for all use cases. -.. _MC Layer: +(MC Layer)= -The "MC" Layer -============== +## The "MC" Layer The MC Layer is used to represent and process code at the raw machine code level, devoid of "high level" information like "constant pools", "jump tables", "global variables" or anything like that. At this level, LLVM handles things like label names, machine instructions, and sections in the object file. The code in this layer is used for a number of important purposes: the tail end of -the code generator uses it to write a ``.s`` or ``.o`` file, and it is also used by the +the code generator uses it to write a `.s` or `.o` file, and it is also used by the llvm-mc tool to implement standalone machine code assemblers and disassemblers. This section describes some of the important classes. There are also a number of important subsystems that interact at this layer, they are described later in this manual. -.. _MCStreamer: +(MCStreamer)= -The ``MCStreamer`` API ----------------------- +### The `MCStreamer` API MCStreamer is best thought of as an assembler API. It is an abstract API which -is *implemented* in different ways (e.g., to output a ``.s`` file, output an ELF ``.o`` -file, etc) but whose API corresponds directly to what you see in a ``.s`` file. +is *implemented* in different ways (e.g., to output a `.s` file, output an ELF `.o` +file, etc) but whose API corresponds directly to what you see in a `.s` file. MCStreamer has one method per directive, such as EmitLabel, EmitSymbolAttribute, switchSection, emitValue (for .byte, .word), etc, which directly correspond to assembly level directives. It also has an EmitInstruction method, which is used @@ -624,21 +591,21 @@ to output an MCInst to the streamer. This API is most important for two clients: the llvm-mc stand-alone assembler is effectively a parser that parses a line, then invokes a method on MCStreamer. In -the code generator, the `Code Emission`_ phase of the code generator lowers +the code generator, the {ref}`Code Emission ` phase of the code generator lowers higher level LLVM IR and Machine* constructs down to the MC layer, emitting directives through MCStreamer. On the implementation side of MCStreamer, there are two major implementations: -one for writing out a ``.s`` file (MCAsmStreamer), and one for writing out a ``.o`` +one for writing out a `.s` file (MCAsmStreamer), and one for writing out a `.o` file (MCObjectStreamer). MCAsmStreamer is a straightforward implementation -that prints out a directive for each method (e.g., ``EmitValue -> .byte``), but +that prints out a directive for each method (e.g., `EmitValue -> .byte`), but MCObjectStreamer implements a full assembler. For target-specific directives, the MCStreamer has a MCTargetStreamer instance. Each target that needs it defines a class that inherits from it and is a lot like MCStreamer itself: It has one method per directive and two classes that inherit from it, a target object streamer and a target asm streamer. The target -asm streamer just prints it (``emitFnStart -> .fnstart``), and the object +asm streamer just prints it (`emitFnStart -> .fnstart`), and the object streamer implements the assembler logic for it. To make llvm use these classes, the target initialization must call @@ -646,15 +613,13 @@ TargetRegistry::RegisterAsmStreamer and TargetRegistry::RegisterMCObjectStreamer passing callbacks that allocate the corresponding target streamer and pass it to createAsmStreamer or to the appropriate object streamer constructor. -The ``MCContext`` class ------------------------ +### The `MCContext` class The MCContext class is the owner of a variety of uniqued data structures at the MC layer, including symbols, sections, etc. As such, this is the class that you interact with to create symbols and sections. This class can not be subclassed. -The ``MCSymbol`` class ----------------------- +### The `MCSymbol` class The MCSymbol class represents a symbol (aka label) in the assembly file. There are two interesting kinds of symbols: assembler temporary symbols, and normal @@ -667,45 +632,42 @@ MCSymbols are created by MCContext and uniqued there. This means that MCSymbols can be compared for pointer equivalence to find out if they are the same symbol. Note that pointer inequality does not guarantee the labels will end up at different addresses though. It's perfectly legal to output something like this -to the ``.s`` file: - -:: +to the `.s` file: - foo: - bar: - .byte 4 +``` +foo: +bar: + .byte 4 +``` In this case, both the foo and bar symbols will have the same address. -The ``MCSection`` class ------------------------ +### The `MCSection` class -The ``MCSection`` class represents an object-file specific section. It is -subclassed by object file specific implementations (e.g., ``MCSectionMachO``, -``MCSectionCOFF``, ``MCSectionELF``) and these are created and uniqued by +The `MCSection` class represents an object-file specific section. It is +subclassed by object file specific implementations (e.g., `MCSectionMachO`, +`MCSectionCOFF`, `MCSectionELF`) and these are created and uniqued by MCContext. The MCStreamer has a notion of the current section, which can be changed with the SwitchToSection method (which corresponds to a ".section" -directive in a ``.s`` file). +directive in a `.s` file). -.. _MCInst: +(MCInst)= -The ``MCInst`` class --------------------- +### The `MCInst` class -The ``MCInst`` class is a target-independent representation of an instruction. -It is a simple class (much more so than `MachineInstr`_) that holds a +The `MCInst` class is a target-independent representation of an instruction. +It is a simple class (much more so than {ref}`MachineInstr `) that holds a target-specific opcode and a vector of MCOperands. MCOperand, in turn, is a simple discriminated union of three cases: 1) a simple immediate, 2) a target -register ID, 3) a symbolic expression (e.g., "``Lfoo-Lbar+42``") as an MCExpr. +register ID, 3) a symbolic expression (e.g., "`Lfoo-Lbar+42`") as an MCExpr. MCInst is the common currency used to represent machine instructions at the MC layer. It is the type used by the instruction encoder, the instruction printer, and the type generated by the assembly parser and disassembler. -.. _ObjectFormats: +(ObjectFormats)= -Object File Format ------------------- +### Object File Format The MC layer's object writers support a variety of object formats. Because of target-specific aspects of object formats each target only supports a subset of @@ -718,36 +680,31 @@ and WebAssembly). The table below captures a snapshot of object file support in LLVM: - .. table:: Object File Formats +```{table} Object File Formats +| Format | Supported Targets | +| --- | --- | +| `COFF` | AArch64, ARM, X86 | +| `DXContainer` | DirectX | +| `ELF` | AArch64, AMDGPU, ARM, AVR, BPF, CSKY, Hexagon, Lanai, LoongArch, M86k, MSP430, MIPS, PowerPC, RISCV, SPARC, SystemZ, VE, X86 | +| `GOFF` | SystemZ | +| `MachO` | AArch64, ARM, X86 | +| `SPIR-V` | SPIRV | +| `WASM` | WebAssembly | +| `XCOFF` | PowerPC | +``` +(Target-independent algorithms)= +(code generation algorithm)= - ================== ======================================================== - Format Supported Targets - ================== ======================================================== - ``COFF`` AArch64, ARM, X86 - ``DXContainer`` DirectX - ``ELF`` AArch64, AMDGPU, ARM, AVR, BPF, CSKY, Hexagon, Lanai, LoongArch, M86k, MSP430, MIPS, PowerPC, RISCV, SPARC, SystemZ, VE, X86 - ``GOFF`` SystemZ - ``MachO`` AArch64, ARM, X86 - ``SPIR-V`` SPIRV - ``WASM`` WebAssembly - ``XCOFF`` PowerPC - ================== ======================================================== +## Target-independent code generation algorithms -.. _Target-independent algorithms: -.. _code generation algorithm: +This section documents the phases described in the +{ref}`high-level design of the code generator `. +It explains how they work and some of the rationale behind their design. -Target-independent code generation algorithms -============================================= +(Instruction Selection)= +(instruction selection section)= -This section documents the phases described in the `high-level design of the -code generator`_. It explains how they work and some of the rationale behind -their design. - -.. _Instruction Selection: -.. _instruction selection section: - -Instruction Selection ---------------------- +### Instruction Selection Instruction Selection is the process of translating LLVM code presented to the code generator into target-specific machine instructions. There are several @@ -755,17 +712,16 @@ well-known ways to do this in the literature. LLVM uses a SelectionDAG based instruction selector. Portions of the DAG instruction selector are generated from the target -description (``*.td``) files. Our goal is for the entire instruction selector -to be generated from these ``.td`` files, though currently there are still +description (`*.td`) files. Our goal is for the entire instruction selector +to be generated from these `.td` files, though currently there are still things that require custom C++ code. -`GlobalISel `_ is another +[GlobalISel](https://llvm.org/docs/GlobalISel/index.html) is another instruction selection framework. -.. _SelectionDAG: +(SelectionDAG)= -Introduction to SelectionDAGs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Introduction to SelectionDAGs The SelectionDAG provides an abstraction for code representation in a way that is amenable to instruction selection using automatic techniques @@ -773,30 +729,30 @@ is amenable to instruction selection using automatic techniques well-suited to other phases of code generation; in particular, instruction scheduling (SelectionDAG's are very close to scheduling DAGs post-selection). Additionally, the SelectionDAG provides a host representation where a large -variety of very-low-level (but target-independent) `optimizations`_ may be +variety of very-low-level (but target-independent) {ref}`optimizations ` may be performed; ones which require extensive information about the instructions efficiently supported by the target. The SelectionDAG is a Directed-Acyclic-Graph whose nodes are instances of the -``SDNode`` class. The primary payload of the ``SDNode`` is its operation code +`SDNode` class. The primary payload of the `SDNode` is its operation code (Opcode) that indicates what operation the node performs and the operands to the operation. The various operation node types are described at the top of the -``include/llvm/CodeGen/ISDOpcodes.h`` file. +`include/llvm/CodeGen/ISDOpcodes.h` file. Although most operations define a single value, each node in the graph may define multiple values. For example, a combined div/rem operation will define both the dividend and the remainder. Many other situations require multiple values as well. Each node also has some number of operands, which are edges to the node defining the used value. Because nodes may define multiple values, -edges are represented by instances of the ``SDValue`` class, which is a -```` pair, indicating the node and result value being used, -respectively. Each value produced by an ``SDNode`` has an associated ``MVT`` +edges are represented by instances of the `SDValue` class, which is a +`` pair, indicating the node and result value being used, +respectively. Each value produced by an `SDNode` has an associated `MVT` (Machine Value Type) indicating what the type of the value is. SelectionDAGs contain two different kinds of values: those that represent data flow and those that represent control flow dependencies. Data values are simple edges with an integer or floating point value type. Control edges are -represented as "chain" edges which are of type ``MVT::Other``. These edges +represented as "chain" edges which are of type `MVT::Other`. These edges provide an ordering between nodes that have side effects (such as loads, stores, calls, returns, etc). All nodes that have side effects should take a token chain as input and produce a new one as output. By convention, token chain @@ -806,7 +762,7 @@ machine nodes have their chain after the instruction's operands, and may be followed by glue nodes. A SelectionDAG has designated "Entry" and "Root" nodes. The Entry node is -always a marker node with an Opcode of ``ISD::EntryToken``. The Root node is +always a marker node with an Opcode of `ISD::EntryToken`. The Root node is the final side-effecting node in the token chain. For example, in a single basic block function it would be the return node. @@ -814,42 +770,42 @@ One important concept for SelectionDAGs is the notion of a "legal" vs. "illegal" DAG. A legal DAG for a target is one that only uses supported operations and supported types. On a 32-bit PowerPC, for example, a DAG with a value of type i1, i8, i16, or i64 would be illegal, as would a DAG that uses a -SREM or UREM operation. The `legalize types`_ and `legalize operations`_ phases +SREM or UREM operation. The {ref}`legalize types ` and {ref}`legalize operations ` phases are responsible for turning an illegal DAG into a legal DAG. -.. _SelectionDAG-Process: +(SelectionDAG-Process)= -SelectionDAG Instruction Selection Process -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG Instruction Selection Process SelectionDAG-based instruction selection consists of the following steps: -#. `Build initial DAG`_ --- This stage performs a simple translation from the +1. {ref}`Build initial DAG ` --- This stage performs a simple translation from the input LLVM code to an illegal SelectionDAG. -#. `Optimize SelectionDAG`_ --- This stage performs simple optimizations on the +1. {ref}`Optimize SelectionDAG ` --- This stage performs simple optimizations on the SelectionDAG to simplify it, and recognize meta instructions (like rotates - and ``div``/``rem`` pairs) for targets that support these meta operations. - This makes the resultant code more efficient and the `select instructions - from DAG`_ phase (below) simpler. + and `div`/`rem` pairs) for targets that support these meta operations. + This makes the resultant code more efficient and the + {ref}`select instructions from DAG ` --- Finally, the target instruction selector matches the DAG operations to target instructions. This process translates the target-independent input DAG into another DAG of target instructions. -#. `SelectionDAG Scheduling and Formation`_ --- The last phase assigns a linear +1. {ref}`SelectionDAG Scheduling and Formation ` --- The last phase assigns a linear order to the instructions in the target-instruction DAG and emits them into the MachineFunction being compiled. This step uses traditional prepass scheduling techniques. @@ -857,59 +813,57 @@ SelectionDAG-based instruction selection consists of the following steps: After all of these steps are complete, the SelectionDAG is destroyed and the rest of the code generation passes are run. -One of the most common ways to debug these steps is using ``-debug-only=isel``, +One of the most common ways to debug these steps is using `-debug-only=isel`, which prints out the DAG, along with other information like debug info, -after each of these steps. Alternatively, ``-debug-only=isel-dump`` shows only +after each of these steps. Alternatively, `-debug-only=isel-dump` shows only the DAG dumps, but the results can be filtered by function names using -``-filter-print-funcs=``. +`-filter-print-funcs=`. One great way to visualize what is going on here is to take advantage of a few LLC command line options. The following options pop up a window displaying the SelectionDAG at specific times (if you only get errors printed to the console -while using this, you probably `need to configure your -system `_ to add support for it). +while using this, you probably {ref}`need to configure your system ` +to add support for it). -* ``-view-dag-combine1-dags`` displays the DAG after being built, before the +* `-view-dag-combine1-dags` displays the DAG after being built, before the first optimization pass. -* ``-view-legalize-dags`` displays the DAG before Legalization. +* `-view-legalize-dags` displays the DAG before Legalization. -* ``-view-dag-combine2-dags`` displays the DAG before the second optimization +* `-view-dag-combine2-dags` displays the DAG before the second optimization pass. -* ``-view-isel-dags`` displays the DAG before the Select phase. +* `-view-isel-dags` displays the DAG before the Select phase. -* ``-view-sched-dags`` displays the DAG before Scheduling. +* `-view-sched-dags` displays the DAG before Scheduling. -The ``-view-sunit-dags`` displays the Scheduler's dependency graph. This graph +The `-view-sunit-dags` displays the Scheduler's dependency graph. This graph is based on the final SelectionDAG, with nodes that must be scheduled together bundled into a single scheduling-unit node, and with immediate operands and other nodes that aren't relevant for scheduling omitted. -The option ``-filter-view-dags`` allows to select the name of the basic block +The option `-filter-view-dags` allows to select the name of the basic block that you are interested in visualizing and filters all the previous -``view-*-dags`` options. +`view-*-dags` options. -.. _Build initial DAG: +(Build initial DAG)= -Initial SelectionDAG Construction -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Initial SelectionDAG Construction -The initial SelectionDAG is na\ :raw-html:`ï`\ vely peephole expanded from -the LLVM input by the ``SelectionDAGBuilder`` class. The intent of this pass +The initial SelectionDAG is naïvely peephole expanded from +the LLVM input by the `SelectionDAGBuilder` class. The intent of this pass is to expose as much low-level, target-specific details to the SelectionDAG as -possible. This pass is mostly hard-coded (e.g., an LLVM ``add`` turns into an -``SDNode add`` while a ``getelementptr`` is expanded into the obvious +possible. This pass is mostly hard-coded (e.g., an LLVM `add` turns into an +`SDNode add` while a `getelementptr` is expanded into the obvious arithmetic). This pass requires target-specific hooks to lower calls, returns, -varargs, etc. For these features, the :raw-html:`` `TargetLowering`_ -:raw-html:`` interface is used. +varargs, etc. For these features, the {ref}`TargetLowering ` + interface is used. -.. _legalize types: -.. _Legalize SelectionDAG Types: -.. _Legalize SelectionDAG Ops: +(legalize types)= +(Legalize SelectionDAG Types)= +(Legalize SelectionDAG Ops)= -SelectionDAG LegalizeTypes Phase -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG LegalizeTypes Phase The Legalize phase is in charge of converting a DAG to only use the types that are natively supported by the target. @@ -931,14 +885,13 @@ all the way down to single-element parts with no supported vector type being found, the elements are converted to scalars ("scalarizing"). A target implementation tells the legalizer which types are supported (and which -register class to use for them) by calling the ``addRegisterClass`` method in -its ``TargetLowering`` constructor. +register class to use for them) by calling the `addRegisterClass` method in +its `TargetLowering` constructor. -.. _legalize operations: -.. _Legalizer: +(legalize operations)= +(Legalizer)= -SelectionDAG Legalize Phase -^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG Legalize Phase The Legalize phase is in charge of converting a DAG to only use the operations that are natively supported by the target. @@ -953,7 +906,7 @@ implement the legalization ("custom"). A target implementation tells the legalizer which operations are not supported (and which of the above three actions to take) by calling the -``setOperationAction`` method in its ``TargetLowering`` constructor. +`setOperationAction` method in its `TargetLowering` constructor. If a target has legal vector types, it is expected to produce efficient machine code for common forms of the shufflevector IR instruction using those types. @@ -964,34 +917,33 @@ handled include: * Vector select --- Each element of the vector is chosen from either of the corresponding elements of the 2 input vectors. This operation may also be known as a "blend" or "bitwise select" in target assembly. This type of shuffle - maps directly to the ``shuffle_vector`` SelectionDAG node. + maps directly to the `shuffle_vector` SelectionDAG node. * Insert subvector --- A vector is placed into a longer vector type starting - at index 0. This type of shuffle maps directly to the ``insert_subvector`` - SelectionDAG node with the ``index`` operand set to 0. + at index 0. This type of shuffle maps directly to the `insert_subvector` + SelectionDAG node with the `index` operand set to 0. * Extract subvector --- A vector is pulled from a longer vector type starting - at index 0. This type of shuffle maps directly to the ``extract_subvector`` - SelectionDAG node with the ``index`` operand set to 0. + at index 0. This type of shuffle maps directly to the `extract_subvector` + SelectionDAG node with the `index` operand set to 0. * Splat --- All elements of the vector have identical scalar elements. This operation may also be known as a "broadcast" or "duplicate" in target assembly. The shufflevector IR instruction may change the vector length, so this operation - may map to multiple SelectionDAG nodes including ``shuffle_vector``, - ``concat_vectors``, ``insert_subvector``, and ``extract_subvector``. + may map to multiple SelectionDAG nodes including `shuffle_vector`, + `concat_vectors`, `insert_subvector`, and `extract_subvector`. Prior to the existence of the Legalize passes, we required that every target -`selector`_ supported and handled every operator and type even if they are not +{ref}`selector ` supported and handled every operator and type even if they are not natively supported. The introduction of the Legalize phases allows all of the canonicalization patterns to be shared across targets, and makes it very easy to optimize the canonicalized code because it is still in the form of a DAG. -.. _optimizations: -.. _Optimize SelectionDAG: -.. _selector: +(optimizations)= +(Optimize SelectionDAG)= +(selector)= -SelectionDAG Optimization Phase: the DAG Combiner -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG Optimization Phase: the DAG Combiner The SelectionDAG optimization phase is run multiple times for code generation, immediately after the DAG is built and once after each legalization. The first @@ -1006,71 +958,68 @@ zero extension instructions. We currently use ad-hoc techniques, but could move to more rigorous techniques in the future. Here are some good papers on the subject: -"`Widening integer arithmetic `_" :raw-html:`
` -Kevin Redwine and Norman Ramsey :raw-html:`
` +"[Widening integer arithmetic](http://www.eecs.harvard.edu/~nr/pubs/widen-abstract.html)" \ +Kevin Redwine and Norman Ramsey \ International Conference on Compiler Construction (CC) 2004 -"`Effective sign extension elimination `_" :raw-html:`
` -Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani :raw-html:`
` +"[Effective sign extension elimination](http://portal.acm.org/citation.cfm?doid=512529.512552)" \ +Motohiro Kawahito, Hideaki Komatsu, and Toshio Nakatani \ Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language Design and Implementation. -.. _Select instructions from DAG: +(Select instructions from DAG)= -SelectionDAG Select Phase -^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG Select Phase The Select phase is the bulk of the target-specific code for instruction selection. This phase takes a legal SelectionDAG as input, pattern matches the instructions supported by the target to this DAG, and produces a new DAG of target code. For example, consider the following LLVM fragment: -.. code-block:: llvm - - %t1 = fadd float %W, %X - %t2 = fmul float %t1, %Y - %t3 = fadd float %t2, %Z - +```llvm +%t1 = fadd float %W, %X +%t2 = fmul float %t1, %Y +%t3 = fadd float %t2, %Z +``` This LLVM code corresponds to a SelectionDAG that looks basically like this: -.. code-block:: text - - (fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z) - +```text +(fadd:f32 (fmul:f32 (fadd:f32 W, X), Y), Z) +``` If a target supports floating point multiply-and-add (FMA) operations, one of the adds can be merged with the multiply. On the PowerPC, for example, the output of the instruction selector might look like this DAG: -:: +``` +(FMADDS (FADDS W, X), Y, Z) +``` - (FMADDS (FADDS W, X), Y, Z) - -The ``FMADDS`` instruction is a ternary instruction that multiplies its first +The `FMADDS` instruction is a ternary instruction that multiplies its first two operands and adds the third (as single-precision floating-point numbers). -The ``FADDS`` instruction is a simple binary single-precision add instruction. +The `FADDS` instruction is a simple binary single-precision add instruction. To perform this pattern match, the PowerPC backend includes the following instruction definitions: -.. code-block:: text - :emphasize-lines: 4-5,9 - - def FMADDS : AForm_1<59, 29, - (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), - "fmadds $FRT, $FRA, $FRC, $FRB", - [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), - F4RC:$FRB))]>; - def FADDS : AForm_2<59, 21, - (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB), - "fadds $FRT, $FRA, $FRB", - [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; - +```{code-block} text +:emphasize-lines: 4-5,9 + +def FMADDS : AForm_1<59, 29, + (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRC, F4RC:$FRB), + "fmadds $FRT, $FRA, $FRC, $FRB", + [(set F4RC:$FRT, (fadd (fmul F4RC:$FRA, F4RC:$FRC), + F4RC:$FRB))]>; +def FADDS : AForm_2<59, 21, + (ops F4RC:$FRT, F4RC:$FRA, F4RC:$FRB), + "fadds $FRT, $FRA, $FRB", + [(set F4RC:$FRT, (fadd F4RC:$FRA, F4RC:$FRB))]>; +``` The highlighted portion of the instruction definitions indicates the pattern -used to match the instructions. The DAG operators (like ``fmul``/``fadd``) -are defined in the ``include/llvm/Target/TargetSelectionDAG.td`` file. -"``F4RC``" is the register class of the input and result values. +used to match the instructions. The DAG operators (like `fmul`/`fadd`) +are defined in the `include/llvm/Target/TargetSelectionDAG.td` file. +"`F4RC`" is the register class of the input and result values. The TableGen DAG instruction selector generator reads the instruction patterns -in the ``.td`` file and automatically builds parts of the pattern matching code +in the `.td` file and automatically builds parts of the pattern matching code for your target. It has the following strengths: * At compiler-compile time, it analyzes your instruction patterns and tells you @@ -1078,64 +1027,65 @@ for your target. It has the following strengths: * It can handle arbitrary constraints on operands for the pattern match. In particular, it is straightforward to say things like "match any immediate - that is a 13-bit sign-extended value". For examples, see the ``immSExt16`` - and related ``tblgen`` classes in the PowerPC backend. + that is a 13-bit sign-extended value". For examples, see the `immSExt16` + and related `tblgen` classes in the PowerPC backend. * It knows several important identities for the patterns defined. For example, - it knows that addition is commutative, so it allows the ``FMADDS`` pattern - above to match "``(fadd X, (fmul Y, Z))``" as well as "``(fadd (fmul X, Y), - Z)``", without the target author having to specially handle this case. + it knows that addition is commutative, so it allows the `FMADDS` pattern + above to match "`(fadd X, (fmul Y, Z))`" as well as + "`(fadd (fmul X, Y), Z)`", without the target author having to specially + handle this case. * It has a full-featured type-inferencing system. In particular, you should rarely have to explicitly tell the system what type parts of your patterns - are. In the ``FMADDS`` case above, we didn't have to tell ``tblgen`` that all + are. In the `FMADDS` case above, we didn't have to tell `tblgen` that all of the nodes in the pattern are of type 'f32'. It was able to infer and - propagate this knowledge from the fact that ``F4RC`` has type 'f32'. + propagate this knowledge from the fact that `F4RC` has type 'f32'. * Targets can define their own (and rely on built-in) "pattern fragments". Pattern fragments are chunks of reusable patterns that get inlined into your - patterns during compiler-compile time. For example, the integer "``(not - x)``" operation is actually defined as a pattern fragment that expands as - "``(xor x, -1)``", since the SelectionDAG does not have a native '``not``' + patterns during compiler-compile time. For example, the integer "`(not x)`" + operation is actually defined as a pattern fragment that expands as + "`(xor x, -1)`", since the SelectionDAG does not have a native '`not`' operation. Targets can define their own short-hand fragments as they see fit. - See the definition of '``not``' and '``ineg``' for examples. + See the definition of '`not`' and '`ineg`' for examples. * In addition to instructions, targets can specify arbitrary patterns that map to one or more instructions using the 'Pat' class. For example, the PowerPC has no way to load an arbitrary integer immediate into a register in one instruction. To tell tblgen how to do this, it defines: - :: - - // Arbitrary immediate support. Implement in terms of LIS/ORI. - def : Pat<(i32 imm:$imm), - (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + ``` + // Arbitrary immediate support. Implement in terms of LIS/ORI. + def : Pat<(i32 imm:$imm), + (ORI (LIS (HI16 imm:$imm)), (LO16 imm:$imm))>; + ``` If none of the single-instruction patterns for loading an immediate into a register match, this will be used. This rule says "match an arbitrary i32 - immediate, turning it into an ``ORI`` ('or a 16-bit immediate') and an ``LIS`` + immediate, turning it into an `ORI` ('or a 16-bit immediate') and an `LIS` ('load 16-bit immediate, where the immediate is shifted to the left 16 bits') - instruction". To make this work, the ``LO16``/``HI16`` node transformations + instruction". To make this work, the `LO16`/`HI16` node transformations are used to manipulate the input immediate (in this case, take the high or low 16-bits of the immediate). * When using the 'Pat' class to map a pattern to an instruction that has one - or more complex operands (like e.g., `X86 addressing mode`_), the pattern may - either specify the operand as a whole using a ``ComplexPattern``, or else it + or more complex operands (like e.g., {ref}`X86 addressing mode `), the pattern may + either specify the operand as a whole using a `ComplexPattern`, or else it may specify the components of the complex operand separately. The latter is done e.g., for pre-increment instructions by the PowerPC back end: - :: - - def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst), - "stwu $rS, $dst", LdStStoreUpd, []>, - RegConstraint<"$dst.reg = $ea_res">; + ``` + def STWU : DForm_1<37, (outs ptr_rc:$ea_res), (ins GPRC:$rS, memri:$dst), + "stwu $rS, $dst", LdStStoreUpd, []>, + RegConstraint<"$dst.reg = $ea_res">; - def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff), - (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>; + def : Pat<(pre_store GPRC:$rS, ptr_rc:$ptrreg, iaddroff:$ptroff), + (STWU GPRC:$rS, iaddroff:$ptroff, ptr_rc:$ptrreg)>; + ``` - Here, the pair of ``ptroff`` and ``ptrreg`` operands is matched onto the - complex operand ``dst`` of class ``memri`` in the ``STWU`` instruction. + Here, the pair of `ptroff` and `ptrreg` operands is matched onto the + complex operand `dst` of class `memri` in the `STWU` instruction. * While the system does automate a lot, it still allows you to write custom C++ code to match special cases if there is something that is hard to @@ -1145,20 +1095,20 @@ While it has many strengths, the system currently has some limitations, primarily because it is a work in progress and is not yet finished: * Overall, there is no way to define or match SelectionDAG nodes that define - multiple values (e.g., ``SMUL_LOHI``, ``LOAD``, ``CALL``, etc). This is the + multiple values (e.g., `SMUL_LOHI`, `LOAD`, `CALL`, etc). This is the biggest reason that you currently still *have to* write custom C++ code for your instruction selector. * There is no great way to support matching complex addressing modes yet. In the future, we will extend pattern fragments to allow them to define multiple - values (e.g., the four operands of the `X86 addressing mode`_, which are + values (e.g., the four operands of the {ref}`X86 addressing mode `, which are currently matched with custom C++ code). In addition, we'll extend fragments so that a fragment can match multiple different patterns. -* We don't automatically infer flags like ``isStore``/``isLoad`` yet. +* We don't automatically infer flags like `isStore`/`isLoad` yet. * We don't automatically generate the set of supported registers and operations - for the `Legalizer`_ yet. + for the {ref}`Legalizer ` yet. * We don't have a way of tying in custom legalized nodes yet. @@ -1167,47 +1117,42 @@ useful for most of the binary and logical operations in typical instruction sets. If you run into any problems or can't figure out how to do something, please let Chris know! -.. _Scheduling and Formation: -.. _SelectionDAG Scheduling and Formation: +(Scheduling and Formation)= +(SelectionDAG Scheduling and Formation)= -SelectionDAG Scheduling and Formation Phase -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### SelectionDAG Scheduling and Formation Phase The scheduling phase takes the DAG of target instructions from the selection phase and assigns an order. The scheduler can pick an order depending on various constraints of the machines (i.e., order for minimal register pressure or try to cover instruction latencies). Once an order is established, the DAG is -converted to a list of :raw-html:`` `MachineInstr`_\s :raw-html:`` and +converted to a list of {ref}`MachineInstr `s and the SelectionDAG is destroyed. Note that this phase is logically separate from the instruction selection phase, but is tied to it closely in the code because it operates on SelectionDAGs. -Future directions for the SelectionDAG -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Future directions for the SelectionDAG -#. Optional function-at-a-time selection. +1. Optional function-at-a-time selection. -#. Auto-generate entire selector from ``.td`` file. +1. Auto-generate entire selector from `.td` file. -.. _SSA-based Machine Code Optimizations: +(SSA-based Machine Code Optimizations)= -SSA-based Machine Code Optimizations ------------------------------------- +### SSA-based Machine Code Optimizations To Be Written -Live Intervals --------------- +### Live Intervals Live Intervals are the ranges (intervals) where a variable is *live*. They are -used by some `register allocator`_ passes to determine if two or more virtual +used by some {ref}`register allocator ` passes to determine if two or more virtual registers which require the same physical register are live at the same point in the program (i.e., they conflict). When this situation occurs, one virtual register must be *spilled*. -Live Variable Analysis -^^^^^^^^^^^^^^^^^^^^^^ +#### Live Variable Analysis The first step in determining the live intervals of variables is to calculate the set of registers that are immediately dead after the instruction (i.e., the @@ -1228,67 +1173,63 @@ Physical registers may be live in to or out of a function. Live in values are typically arguments in registers. Live out values are typically return values in registers. Live in values are marked as such, and are given a dummy "defining" instruction during live intervals analysis. If the last basic block of a -function is a ``return``, then it's marked as using all live out values in the +function is a `return`, then it's marked as using all live out values in the function. -``PHI`` nodes need to be handled specially, because the calculation of the live +`PHI` nodes need to be handled specially, because the calculation of the live variable information from a depth first traversal of the CFG of the function -won't guarantee that a virtual register used by the ``PHI`` node is defined -before it's used. When a ``PHI`` node is encountered, only the definition is +won't guarantee that a virtual register used by the `PHI` node is defined +before it's used. When a `PHI` node is encountered, only the definition is handled, because the uses will be handled in other basic blocks. -For each ``PHI`` node of the current basic block, we simulate an assignment at +For each `PHI` node of the current basic block, we simulate an assignment at the end of the current basic block and traverse the successor basic blocks. If a -successor basic block has a ``PHI`` node and one of the ``PHI`` node's operands +successor basic block has a `PHI` node and one of the `PHI` node's operands is coming from the current basic block, then the variable is marked as *alive* within the current basic block and all of its predecessor basic blocks, until the basic block with the defining instruction is encountered. -Live Intervals Analysis -^^^^^^^^^^^^^^^^^^^^^^^ +#### Live Intervals Analysis We now have the information available to perform the live intervals analysis and build the live intervals themselves. We start off by numbering the basic blocks and machine instructions. We then handle the "live-in" values. These are in physical registers, so the physical register is assumed to be killed by the end of the basic block. Live intervals for virtual registers are computed for some -ordering of the machine instructions ``[1, N]``. A live interval is an interval -``[i, j)``, where ``1 >= i >= j > N``, for which a variable is live. - -.. note:: - More to come... +ordering of the machine instructions `[1, N]`. A live interval is an interval +`[i, j)`, where `1 >= i >= j > N`, for which a variable is live. -.. _Register Allocation: -.. _register allocator: +```{note} +More to come... +``` +(Register Allocation)= +(register allocator)= -Register Allocation -------------------- +### Register Allocation The *Register Allocation problem* consists in mapping a program -:raw-html:`` P\ :sub:`v`\ :raw-html:``, that can use an unbounded -number of virtual registers, to a program :raw-html:`` P\ :sub:`p`\ -:raw-html:`` that contains a finite (possibly small) number of physical +P{sub}`v`, that can use an unbounded +number of virtual registers, to a program P{sub}`p` that contains a finite (possibly small) number of physical registers. Each target architecture has a different number of physical registers. If the number of physical registers is not enough to accommodate all the virtual registers, some of them will have to be mapped into memory. These virtuals are called *spilled virtuals*. -How registers are represented in LLVM -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### How registers are represented in LLVM In LLVM, physical registers are denoted by integer numbers that normally range from 1 to 1023. To see how this numbering is defined for a particular -architecture, you can read the ``GenRegisterNames.inc`` file for that +architecture, you can read the `GenRegisterNames.inc` file for that architecture. For instance, by inspecting -``lib/Target/X86/X86GenRegisterInfo.inc`` we see that the 32-bit register -``EAX`` is denoted by 43, and the MMX register ``MM0`` is mapped to 65. +`lib/Target/X86/X86GenRegisterInfo.inc` we see that the 32-bit register +`EAX` is denoted by 43, and the MMX register `MM0` is mapped to 65. Some architectures contain registers that share the same physical location. A notable example is the X86 platform. For instance, in the X86 architecture, the -registers ``EAX``, ``AX`` and ``AL`` share the first eight bits. These physical +registers `EAX`, `AX` and `AL` share the first eight bits. These physical registers are marked as *aliased* in LLVM. Given a particular architecture, you -can check which registers are aliased by inspecting its ``RegisterInfo.td`` -file. Moreover, the class ``MCRegAliasIterator`` enumerates all the physical +can check which registers are aliased by inspecting its `RegisterInfo.td` +file. Moreover, the class `MCRegAliasIterator` enumerates all the physical registers aliased to a register. Physical registers, in LLVM, are grouped in *Register Classes*. Elements in the @@ -1296,56 +1237,54 @@ same register class are functionally equivalent, and can be interchangeably used. Each virtual register can only be mapped to physical registers of a particular class. For instance, in the X86 architecture, some virtuals can only be allocated to 8-bit registers. A register class is described by -``TargetRegisterClass`` objects. To discover if a virtual register is +`TargetRegisterClass` objects. To discover if a virtual register is compatible with a given physical, this code can be used: -.. code-block:: c++ - - bool RegMapping_Fer::compatible_class(MachineFunction &mf, - unsigned v_reg, - unsigned p_reg) { - assert(TargetRegisterInfo::isPhysicalRegister(p_reg) && - "Target register must be physical"); - const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg); - return trc->contains(p_reg); - } - +```c++ +bool RegMapping_Fer::compatible_class(MachineFunction &mf, + unsigned v_reg, + unsigned p_reg) { + assert(TargetRegisterInfo::isPhysicalRegister(p_reg) && + "Target register must be physical"); + const TargetRegisterClass *trc = mf.getRegInfo().getRegClass(v_reg); + return trc->contains(p_reg); +} +``` Sometimes, mostly for debugging purposes, it is useful to change the number of physical registers available in the target architecture. This must be done -statically, inside the ``TargetRegisterInfo.td`` file. Just ``grep`` for -``RegisterClass``, the last parameter of which is a list of registers. Just +statically, inside the `TargetRegisterInfo.td` file. Just `grep` for +`RegisterClass`, the last parameter of which is a list of registers. Just commenting some out is one simple way to avoid them being used. A more polite way is to explicitly exclude some registers from the *allocation order*. See the -definition of the ``GR8`` register class in -``lib/Target/X86/X86RegisterInfo.td`` for an example of this. +definition of the `GR8` register class in +`lib/Target/X86/X86RegisterInfo.td` for an example of this. Virtual registers are also denoted by integer numbers. Contrary to physical registers, different virtual registers never share the same number. Whereas -physical registers are statically defined in a ``TargetRegisterInfo.td`` file +physical registers are statically defined in a `TargetRegisterInfo.td` file and cannot be created by the application developer, that is not the case with virtual registers. In order to create new virtual registers, use the method -``MachineRegisterInfo::createVirtualRegister()``. This method will return a new -virtual register. Use an ``IndexedMap`` to hold +`MachineRegisterInfo::createVirtualRegister()`. This method will return a new +virtual register. Use an `IndexedMap` to hold information per virtual register. If you need to enumerate all virtual -registers, use the function ``TargetRegisterInfo::index2VirtReg()`` to find the +registers, use the function `TargetRegisterInfo::index2VirtReg()` to find the virtual register numbers: -.. code-block:: c++ - - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i); - stuff(VirtReg); - } - +```c++ + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned VirtReg = TargetRegisterInfo::index2VirtReg(i); + stuff(VirtReg); + } +``` Before register allocation, the operands of an instruction are mostly virtual registers, although physical registers may also be used. In order to check if a given machine operand is a register, use the boolean function -``MachineOperand::isRegister()``. To obtain the integer code of a register, use -``MachineOperand::getReg()``. An instruction may define or use a register. For -instance, ``ADD reg:1026 := reg:1025 reg:1024`` defines the registers 1024, and +`MachineOperand::isRegister()`. To obtain the integer code of a register, use +`MachineOperand::getReg()`. An instruction may define or use a register. For +instance, `ADD reg:1026 := reg:1025 reg:1024` defines the registers 1024, and uses registers 1025 and 1026. Given a register operand, the method -``MachineOperand::isUse()`` informs if that register is being used by the -instruction. The method ``MachineOperand::isDef()`` informs if that registers is +`MachineOperand::isUse()` informs if that register is being used by the +instruction. The method `MachineOperand::isDef()` informs if that registers is being defined. We will call physical registers present in the LLVM bitcode before register @@ -1354,27 +1293,26 @@ different situations, for instance, to pass parameters of functions calls, and to store results of particular instructions. There are two types of pre-colored registers: the ones *implicitly* defined, and those *explicitly* defined. Explicitly defined registers are normal operands, and can be accessed -with ``MachineInstr::getOperand(int)::getReg()``. In order to check which +with `MachineInstr::getOperand(int)::getReg()`. In order to check which registers are implicitly defined by an instruction, use the -``TargetInstrInfo::get(opcode)::ImplicitDefs``, where ``opcode`` is the opcode +`TargetInstrInfo::get(opcode)::ImplicitDefs`, where `opcode` is the opcode of the target instruction. One important difference between explicit and implicit physical registers is that the latter are defined statically for each instruction, whereas the former may vary depending on the program being compiled. For example, an instruction that represents a function call will always implicitly define or use the same set of physical registers. To read the registers implicitly used by an instruction, use -``TargetInstrInfo::get(opcode)::ImplicitUses``. Pre-colored registers impose +`TargetInstrInfo::get(opcode)::ImplicitUses`. Pre-colored registers impose constraints on any register allocation algorithm. The register allocator must make sure that none of them are overwritten by the values of virtual registers while still alive. -Mapping virtual registers to physical registers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Mapping virtual registers to physical registers There are two ways to map virtual registers to physical registers (or to memory slots). The first way, that we will call *direct mapping*, is based on the use -of methods of the classes ``TargetRegisterInfo``, and ``MachineOperand``. The -second way, that we will call *indirect mapping*, relies on the ``VirtRegMap`` +of methods of the classes `TargetRegisterInfo`, and `MachineOperand`. The +second way, that we will call *indirect mapping*, relies on the `VirtRegMap` class in order to insert loads and stores sending and getting values to and from memory. @@ -1383,18 +1321,18 @@ allocator; however, it is more error prone, and demands more implementation work. Basically, the programmer will have to specify where load and store instructions should be inserted in the target function being compiled in order to get and store values in memory. To assign a physical register to a virtual -register present in a given operand, use ``MachineOperand::setReg(p_reg)``. To -insert a store instruction, use ``TargetInstrInfo::storeRegToStackSlot(...)``, -and to insert a load instruction, use ``TargetInstrInfo::loadRegFromStackSlot``. +register present in a given operand, use `MachineOperand::setReg(p_reg)`. To +insert a store instruction, use `TargetInstrInfo::storeRegToStackSlot(...)`, +and to insert a load instruction, use `TargetInstrInfo::loadRegFromStackSlot`. The indirect mapping shields the application developer from the complexities of inserting load and store instructions. In order to map a virtual register to a -physical one, use ``VirtRegMap::assignVirt2Phys(vreg, preg)``. In order to map +physical one, use `VirtRegMap::assignVirt2Phys(vreg, preg)`. In order to map a certain virtual register to memory, use -``VirtRegMap::assignVirt2StackSlot(vreg)``. This method will return the stack -slot where ``vreg``'s value will be located. If it is necessary to map another +`VirtRegMap::assignVirt2StackSlot(vreg)`. This method will return the stack +slot where `vreg`'s value will be located. If it is necessary to map another virtual register to the same stack slot, use -``VirtRegMap::assignVirt2StackSlot(vreg, stack_location)``. One important point +`VirtRegMap::assignVirt2StackSlot(vreg, stack_location)`. One important point to consider when using the indirect mapping, is that even if a virtual register is mapped to memory, it still needs to be mapped to a physical register. This physical register is the location where the virtual register is supposed to be @@ -1406,39 +1344,37 @@ object to place load and store instructions in the code. Every virtual that has been mapped to a stack slot will be stored to memory after being defined and will be loaded before being used. The implementation of the spiller tries to recycle load/store instructions, avoiding unnecessary instructions. For an example of -how to invoke the spiller, see ``RegAllocLinearScan::runOnMachineFunction`` in -``lib/CodeGen/RegAllocLinearScan.cpp``. +how to invoke the spiller, see `RegAllocLinearScan::runOnMachineFunction` in +`lib/CodeGen/RegAllocLinearScan.cpp`. -Handling two address instructions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Handling two address instructions With very rare exceptions (e.g., function calls), the LLVM machine code instructions are three address instructions. That is, each instruction is expected to define at most one register, and to use at most two registers. However, some architectures use two address instructions. In this case, the defined register is also one of the used registers. For instance, an instruction -such as ``ADD %EAX, %EBX``, in X86 is actually equivalent to ``%EAX = %EAX + -%EBX``. +such as `ADD %EAX, %EBX`, in X86 is actually equivalent to +`%EAX = %EAX + %EBX`. In order to produce correct code, LLVM must convert three address instructions that represent two address instructions into true two address instructions. LLVM -provides the pass ``TwoAddressInstructionPass`` for this specific purpose. It +provides the pass `TwoAddressInstructionPass` for this specific purpose. It must be run before register allocation takes place. After its execution, the resulting code may no longer be in SSA form. This happens, for instance, in -situations where an instruction such as ``%a = ADD %b %c`` is converted to two +situations where an instruction such as `%a = ADD %b %c` is converted to two instructions such as: -:: +``` +%a = MOVE %b +%a = ADD %a %c +``` - %a = MOVE %b - %a = ADD %a %c - -Notice that, internally, the second instruction is represented as ``ADD -%a[def/use] %c``. I.e., the register operand ``%a`` is both used and defined by +Notice that, internally, the second instruction is represented as +`ADD %a[def/use] %c`. I.e., the register operand `%a` is both used and defined by the instruction. -The SSA deconstruction phase -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The SSA deconstruction phase An important transformation that happens during register allocation is called the *SSA Deconstruction Phase*. The SSA form simplifies many analyses that are @@ -1451,36 +1387,34 @@ There are many ways in which PHI instructions can safely be removed from the target code. The most traditional PHI deconstruction algorithm replaces PHI instructions with copy instructions. That is the strategy adopted by LLVM. The SSA deconstruction algorithm is implemented in -``lib/CodeGen/PHIElimination.cpp``. In order to invoke this pass, the identifier -``PHIEliminationID`` must be marked as required in the code of the register +`lib/CodeGen/PHIElimination.cpp`. In order to invoke this pass, the identifier +`PHIEliminationID` must be marked as required in the code of the register allocator. -Instruction folding -^^^^^^^^^^^^^^^^^^^ +#### Instruction folding *Instruction folding* is an optimization performed during register allocation that removes unnecessary copy instructions. For instance, a sequence of instructions such as: -:: - - %EBX = LOAD %mem_address - %EAX = COPY %EBX +``` +%EBX = LOAD %mem_address +%EAX = COPY %EBX +``` can be safely substituted by the single instruction: -:: - - %EAX = LOAD %mem_address +``` +%EAX = LOAD %mem_address +``` Instructions can be folded with the -``TargetRegisterInfo::foldMemoryOperand(...)`` method. Care must be taken when +`TargetRegisterInfo::foldMemoryOperand(...)` method. Care must be taken when folding instructions; a folded instruction can be quite different from the -original instruction. See ``LiveIntervals::addIntervalsForSpills`` in -``lib/CodeGen/LiveIntervalAnalysis.cpp`` for an example of its use. +original instruction. See `LiveIntervals::addIntervalsForSpills` in +`lib/CodeGen/LiveIntervalAnalysis.cpp` for an example of its use. -Built in register allocators -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Built in register allocators The LLVM infrastructure provides the application developer with three different register allocators: @@ -1505,26 +1439,22 @@ register allocators: the register allocation problem under consideration, solving this using a PBQP solver, and mapping the solution back to a register assignment. -The type of register allocator used in ``llc`` can be chosen with the command -line option ``-regalloc=...``: - -.. code-block:: bash - - $ llc -regalloc=linearscan file.bc -o ln.s - $ llc -regalloc=fast file.bc -o fa.s - $ llc -regalloc=pbqp file.bc -o pbqp.s - -.. _Prolog/Epilog Code Insertion: +The type of register allocator used in `llc` can be chosen with the command +line option `-regalloc=...`: -Prolog/Epilog Code Insertion ----------------------------- +```bash +$ llc -regalloc=linearscan file.bc -o ln.s +$ llc -regalloc=fast file.bc -o fa.s +$ llc -regalloc=pbqp file.bc -o pbqp.s +``` +(Prolog/Epilog Code Insertion)= -.. note:: +### Prolog/Epilog Code Insertion - To Be Written - -Compact Unwind --------------- +```{note} +To Be Written +``` +### Compact Unwind Throwing an exception requires *unwinding* out of a function. The information on how to unwind a given function is traditionally expressed in DWARF unwind @@ -1537,76 +1467,74 @@ unwind* and requires just 4-bytes per function. The compact unwind encoding is a 32-bit value, which is encoded in an architecture-specific way. It specifies which registers to restore and from where, and how to unwind out of the function. When the linker creates a final -linked image, it will create a ``__TEXT,__unwind_info`` section. This section is +linked image, it will create a `__TEXT,__unwind_info` section. This section is a small and fast way for the runtime to access unwind info for any given function. If we emit compact unwind info for the function, that compact unwind -info will be encoded in the ``__TEXT,__unwind_info`` section. If we emit DWARF -unwind info, the ``__TEXT,__unwind_info`` section will contain the offset of the -FDE in the ``__TEXT,__eh_frame`` section in the final linked image. +info will be encoded in the `__TEXT,__unwind_info` section. If we emit DWARF +unwind info, the `__TEXT,__unwind_info` section will contain the offset of the +FDE in the `__TEXT,__eh_frame` section in the final linked image. For X86, there are three modes for the compact unwind encoding: -*Function with a Frame Pointer (``EBP`` or ``RBP``)* - ``EBP/RBP``-based frame, where ``EBP/RBP`` is pushed onto the stack - immediately after the return address, then ``ESP/RSP`` is moved to - ``EBP/RBP``. Thus to unwind, ``ESP/RSP`` is restored with the current - ``EBP/RBP`` value, then ``EBP/RBP`` is restored by popping the stack, and the - return is done by popping the stack once more into the PC. All non-volatile - registers that need to be restored must have been saved in a small range on - the stack that starts ``EBP-4`` to ``EBP-1020`` (``RBP-8`` to - ``RBP-1020``). The offset (divided by 4 in 32-bit mode and 8 in 64-bit mode) - is encoded in bits 16-23 (mask: ``0x00FF0000``). The registers saved are - encoded in bits 0-14 (mask: ``0x00007FFF``) as five 3-bit entries from the - following table: - - ============== ============= =============== - Compact Number i386 Register x86-64 Register - ============== ============= =============== - 1 ``EBX`` ``RBX`` - 2 ``ECX`` ``R12`` - 3 ``EDX`` ``R13`` - 4 ``EDI`` ``R14`` - 5 ``ESI`` ``R15`` - 6 ``EBP`` ``RBP`` - ============== ============= =============== - -*Frameless with a Small Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)* - To return, a constant (encoded in the compact unwind encoding) is added to the - ``ESP/RSP``. Then the return is done by popping the stack into the PC. All - non-volatile registers that need to be restored must have been saved on the - stack immediately after the return address. The stack size (divided by 4 in - 32-bit mode and 8 in 64-bit mode) is encoded in bits 16-23 (mask: - ``0x00FF0000``). There is a maximum stack size of 1024 bytes in 32-bit mode - and 2048 in 64-bit mode. The number of registers saved is encoded in bits 9-12 - (mask: ``0x00001C00``). Bits 0-9 (mask: ``0x000003FF``) contain which - registers were saved and their order. (See the - ``encodeCompactUnwindRegistersWithoutFrame()`` function in - ``lib/Target/X86FrameLowering.cpp`` for the encoding algorithm.) - -*Frameless with a Large Constant Stack Size (``EBP`` or ``RBP`` is not used as a frame pointer)* - This case is like the "Frameless with a Small Constant Stack Size" case, but - the stack size is too large to encode in the compact unwind encoding. Instead - it requires that the function contains "``subl $nnnnnn, %esp``" in its - prolog. The compact encoding contains the offset to the ``$nnnnnn`` value in - the function in bits 9-12 (mask: ``0x00001C00``). - -.. _Late Machine Code Optimizations: - -Late Machine Code Optimizations -------------------------------- - -.. note:: - - To Be Written - -.. _Code Emission: - -Code Emission -------------- +*Function with a Frame Pointer (`EBP` or `RBP`)* + +`EBP/RBP`-based frame, where `EBP/RBP` is pushed onto the stack +immediately after the return address, then `ESP/RSP` is moved to +`EBP/RBP`. Thus to unwind, `ESP/RSP` is restored with the current +`EBP/RBP` value, then `EBP/RBP` is restored by popping the stack, and the +return is done by popping the stack once more into the PC. All non-volatile +registers that need to be restored must have been saved in a small range on +the stack that starts `EBP-4` to `EBP-1020` (`RBP-8` to +`RBP-1020`). The offset (divided by 4 in 32-bit mode and 8 in 64-bit mode) +is encoded in bits 16-23 (mask: `0x00FF0000`). The registers saved are +encoded in bits 0-14 (mask: `0x00007FFF`) as five 3-bit entries from the +following table: + +| Compact Number | i386 Register | x86-64 Register | +| --- | --- | --- | +| 1 | `EBX` | `RBX` | +| 2 | `ECX` | `R12` | +| 3 | `EDX` | `R13` | +| 4 | `EDI` | `R14` | +| 5 | `ESI` | `R15` | +| 6 | `EBP` | `RBP` | + +*Frameless with a Small Constant Stack Size (`EBP` or `RBP` is not used as a frame pointer)* + +To return, a constant (encoded in the compact unwind encoding) is added to the +`ESP/RSP`. Then the return is done by popping the stack into the PC. All +non-volatile registers that need to be restored must have been saved on the +stack immediately after the return address. The stack size (divided by 4 in +32-bit mode and 8 in 64-bit mode) is encoded in bits 16-23 (mask: +`0x00FF0000`). There is a maximum stack size of 1024 bytes in 32-bit mode +and 2048 in 64-bit mode. The number of registers saved is encoded in bits 9-12 +(mask: `0x00001C00`). Bits 0-9 (mask: `0x000003FF`) contain which +registers were saved and their order. (See the +`encodeCompactUnwindRegistersWithoutFrame()` function in +`lib/Target/X86FrameLowering.cpp` for the encoding algorithm.) + +*Frameless with a Large Constant Stack Size (`EBP` or `RBP` is not used as a frame pointer)* + +This case is like the "Frameless with a Small Constant Stack Size" case, but +the stack size is too large to encode in the compact unwind encoding. Instead +it requires that the function contains "`subl $nnnnnn, %esp`" in its +prolog. The compact encoding contains the offset to the `$nnnnnn` value in +the function in bits 9-12 (mask: `0x00001C00`). + +(Late Machine Code Optimizations)= + +### Late Machine Code Optimizations + +```{note} +To Be Written +``` +(Code Emission)= + +### Code Emission The code emission step of code generation is responsible for lowering from the -code generator abstractions (like `MachineFunction`_, `MachineInstr`_, etc) down -to the abstractions used by the MC layer (`MCInst`_, `MCStreamer`_, etc). This +code generator abstractions (like {ref}`MachineFunction `, {ref}`MachineInstr `, etc) down +to the abstractions used by the MC layer ({ref}`MCInst `, {ref}`MCStreamer `, etc). This is done with a combination of several different classes: the (misnamed) target-independent AsmPrinter class, target-specific subclasses of AsmPrinter (such as SparcAsmPrinter), and the TargetLoweringObjectFile class. @@ -1615,7 +1543,7 @@ Since the MC layer works at the level of abstraction of object files, it doesn't have a notion of functions, global variables etc. Instead, it thinks about labels, directives, and instructions. A key class used at this time is the MCStreamer class. This is an abstract API that is implemented in different ways -(e.g., to output a ``.s`` file, output an ELF ``.o`` file, etc) that is effectively an +(e.g., to output a `.s` file, output an ELF `.o` file, etc) that is effectively an "assembler API". MCStreamer has one method per directive, such as EmitLabel, EmitSymbolAttribute, switchSection, etc, which directly correspond to assembly level directives. @@ -1623,7 +1551,7 @@ level directives. If you are interested in implementing a code generator for a target, there are three important things that you have to implement for your target: -#. First, you need a subclass of AsmPrinter for your target. This class +1. First, you need a subclass of AsmPrinter for your target. This class implements the general lowering process converting MachineFunction's into MC label constructs. The AsmPrinter base class provides a number of useful methods and routines, and also allows you to override the lowering process in @@ -1631,14 +1559,14 @@ three important things that you have to implement for your target: implementing an ELF, COFF, or MachO target, because the TargetLoweringObjectFile class implements much of the common logic. -#. Second, you need to implement an instruction printer for your target. The - instruction printer takes an `MCInst`_ and renders it to a raw_ostream as +1. Second, you need to implement an instruction printer for your target. The + instruction printer takes an {ref}`MCInst ` and renders it to a raw_ostream as text. Most of this is automatically generated from the .td file (when you - specify something like "``add $dst, $src1, $src2``" in the instructions), but + specify something like "`add $dst, $src1, $src2`" in the instructions), but you need to implement routines to print operands. -#. Third, you need to implement code that lowers a `MachineInstr`_ to an MCInst, - usually implemented in "MCInstLower.cpp". This lowering process is +1. Third, you need to implement code that lowers a {ref}`MachineInstr ` to an MCInst, + usually implemented in `MCInstLower.cpp`. This lowering process is often target specific, and is responsible for turning jump table entries, constant pool indices, global variable addresses, etc into MCLabels as appropriate. This translation layer is also responsible for expanding pseudo @@ -1648,29 +1576,26 @@ three important things that you have to implement for your target: Finally, at your choosing, you can also implement a subclass of MCCodeEmitter which lowers MCInst's into machine code bytes and relocations. This is -important if you want to support direct ``.o`` file emission, or would like to +important if you want to support direct `.o` file emission, or would like to implement an assembler for your target. -Emitting function stack size information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Emitting function stack size information A section containing metadata on function stack sizes will be emitted when -``TargetLoweringObjectFile::StackSizesSection`` is not null, and -``TargetOptions::EmitStackSizeSection`` is set (-stack-size-section). The +`TargetLoweringObjectFile::StackSizesSection` is not null, and +`TargetOptions::EmitStackSizeSection` is set (-stack-size-section). The section will contain an array of pairs of function symbol values (pointer size) and stack sizes (unsigned LEB128). The stack size values only include the space allocated in the function prologue. Functions with dynamic stack allocations are not included. -Emitting function call graph information -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Emitting function call graph information A section containing metadata on function call graph will be emitted when -``TargetOptions::EmitCallGraphSection`` is set (--call-graph-section). Layout of -this section is documented in detail at :doc:`CallGraphSection`. +`TargetOptions::EmitCallGraphSection` is set (--call-graph-section). Layout of +this section is documented in detail at {doc}`CallGraphSection`. -VLIW Packetizer ---------------- +### VLIW Packetizer In a Very Long Instruction Word (VLIW) architecture, the compiler is responsible for mapping instructions to functional-units available on the architecture. To @@ -1678,8 +1603,7 @@ that end, the compiler creates groups of instructions called *packets* or *bundles*. The VLIW packetizer in LLVM is a target-independent mechanism to enable the packetization of machine instructions. -Mapping from instructions to functional units -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Mapping from instructions to functional units Instructions in a VLIW target can typically be mapped to multiple functional units. During the process of packetizing, the compiler must be able to reason @@ -1691,8 +1615,7 @@ at compiler build time. These tables can then be queried by the provided machine-independent API to determine if an instruction can be accommodated in a packet. -How the packetization tables are generated and used -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### How the packetization tables are generated and used The packetizer reads instruction classes from a target's itineraries and creates a deterministic finite automaton (DFA) to represent the state of a packet. A DFA @@ -1705,17 +1628,16 @@ legal mapping of functional units to instructions, then the DFA contains a corresponding transition. The absence of a transition indicates that a legal mapping does not exist and that the instruction cannot be added to the packet. -To generate tables for a VLIW target, add *Target*\ GenDFAPacketizer.inc as a +To generate tables for a VLIW target, add *Target*GenDFAPacketizer.inc as a target to the Makefile in the target directory. The exported API provides three -functions: ``DFAPacketizer::clearResources()``, -``DFAPacketizer::reserveResources(MachineInstr *MI)``, and -``DFAPacketizer::canReserveResources(MachineInstr *MI)``. These functions allow +functions: `DFAPacketizer::clearResources()`, +`DFAPacketizer::reserveResources(MachineInstr *MI)`, and +`DFAPacketizer::canReserveResources(MachineInstr *MI)`. These functions allow a target packetizer to add an instruction to an existing packet and to check whether an instruction can be added to a packet. See -``llvm/CodeGen/DFAPacketizer.h`` for more information. +`llvm/CodeGen/DFAPacketizer.h` for more information. -Implementing a Native Assembler -=============================== +## Implementing a Native Assembler Though you're probably reading this because you want to write or maintain a compiler backend, LLVM also fully supports building a native assembler. @@ -1724,16 +1646,12 @@ We've tried hard to automate the generation of the assembler from the .td files part of the manual and repetitive data entry can be factored and shared with the compiler. -Instruction Parsing -------------------- +### Instruction Parsing -.. note:: - - To Be Written - - -Instruction Alias Processing ----------------------------- +```{note} +To Be Written +``` +### Instruction Alias Processing Once the instruction is parsed, it enters the MatchInstructionImpl function. The MatchInstructionImpl function performs alias processing and then performs actual @@ -1747,8 +1665,7 @@ complex/powerful). Generally you want to use the first alias mechanism that meets the needs of your instruction, because it will allow a more concise description. -Mnemonic Aliases -^^^^^^^^^^^^^^^^ +#### Mnemonic Aliases The first phase of alias processing is simple instruction mnemonic remapping for classes of instructions which are allowed with two different mnemonics. This @@ -1757,44 +1674,43 @@ output mnemonic. It isn't possible for this form of alias to look at the operands at all, so the remapping must apply for all forms of a given mnemonic. Mnemonic aliases are defined simply, for example X86 has: -:: - - def : MnemonicAlias<"cbw", "cbtw">; - def : MnemonicAlias<"smovq", "movsq">; - def : MnemonicAlias<"fldcww", "fldcw">; - def : MnemonicAlias<"fucompi", "fucomip">; - def : MnemonicAlias<"ud2a", "ud2">; +``` +def : MnemonicAlias<"cbw", "cbtw">; +def : MnemonicAlias<"smovq", "movsq">; +def : MnemonicAlias<"fldcww", "fldcw">; +def : MnemonicAlias<"fucompi", "fucomip">; +def : MnemonicAlias<"ud2a", "ud2">; +``` ... and many others. With a MnemonicAlias definition, the mnemonic is remapped simply and directly. Though MnemonicAlias's can't look at any aspect of the instruction (such as the operands) they can depend on global modes (the same ones supported by the matcher), through a Requires clause: -:: - - def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; - def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; +``` +def : MnemonicAlias<"pushf", "pushfq">, Requires<[In64BitMode]>; +def : MnemonicAlias<"pushf", "pushfl">, Requires<[In32BitMode]>; +``` In this example, the mnemonic gets mapped into a different one depending on the current instruction set. -Instruction Aliases -^^^^^^^^^^^^^^^^^^^ +#### Instruction Aliases The most general phase of alias processing occurs while matching is happening: it provides new forms for the matcher to match along with a specific instruction to generate. An instruction alias has two parts: the string to match and the instruction to generate. For example: -:: - - def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8 :$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8 :$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8 :$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)>; - def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)>; +``` +def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8 :$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8 :$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16 :$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8 :$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16 :$src)>; +def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32 :$src)>; +``` This shows a powerful example of the instruction aliases, matching the same mnemonic in multiple different ways depending on what operands are present in @@ -1802,12 +1718,12 @@ the assembly. The result of instruction aliases can include operands in a different order than the destination instruction, and can use an input multiple times, for example: -:: - - def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; - def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>; - def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; - def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; +``` +def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg)>; +def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg)>; +def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg)>; +def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg)>; +``` This example also shows that tied operands are only listed once. In the X86 backend, XOR8rr has two input GR8's and one output GR8 (where an input is tied @@ -1816,16 +1732,16 @@ for tied operands. The result of an instruction alias can also use immediates and fixed physical registers which are added as simple immediate operands in the result, for example: -:: - - // Fixed Immediate operand. - def : InstAlias<"aad", (AAD8i8 10)>; +``` +// Fixed Immediate operand. +def : InstAlias<"aad", (AAD8i8 10)>; - // Fixed register operand. - def : InstAlias<"fcomi", (COM_FIr ST1)>; +// Fixed register operand. +def : InstAlias<"fcomi", (COM_FIr ST1)>; - // Simple alias. - def : InstAlias<"fcomi $reg", (COM_FIr RST:$reg)>; +// Simple alias. +def : InstAlias<"fcomi $reg", (COM_FIr RST:$reg)>; +``` Instruction aliases can also have a Requires clause to make them subtarget specific. @@ -1835,39 +1751,35 @@ alias rather than what's being aliased. It typically leads to better, more readable code. If it's better to print out what's being aliased, then pass a '0' as the third parameter to the InstAlias definition. -Instruction Matching --------------------- - -.. note:: - - To Be Written +### Instruction Matching -.. _Implementations of the abstract target description interfaces: -.. _implement the target description: +```{note} +To Be Written +``` +(Implementations of the abstract target description interfaces)= +(implement the target description)= -Target-specific Implementation Notes -==================================== +## Target-specific Implementation Notes This section of the document explains features or design decisions that are specific to the code generator for a particular target. -.. _tail call section: +(tail call section)= -Tail call optimization ----------------------- +### Tail call optimization Tail call optimization, callee reusing the stack of the caller, is currently supported on x86/x86-64, PowerPC, AArch64, and WebAssembly. It is performed on x86/x86-64, PowerPC, and AArch64 if: -* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC - calling convention), ``cc 11`` (HiPE calling convention), ``tailcc``, or - ``swifttailcc``. +* Caller and callee have the calling convention `fastcc`, `cc 10` (GHC + calling convention), `cc 11` (HiPE calling convention), `tailcc`, or + `swifttailcc`. * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). -* Option ``-tailcallopt`` is enabled or the calling convention is ``tailcc``. +* Option `-tailcallopt` is enabled or the calling convention is `tailcc`. * Platform-specific constraints are met. @@ -1902,39 +1814,37 @@ AArch64 constraints: Example: -Call as ``llc -tailcallopt test.ll``. - -.. code-block:: llvm - - declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4) +Call as `llc -tailcallopt test.ll`. - define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { - %l1 = add i32 %in1, %in2 - %tmp = tail call fastcc i32 @tailcallee(i32 inreg %in1, i32 inreg %in2, i32 %in1, i32 %l1) - ret i32 %tmp - } +```llvm +declare fastcc i32 @tailcallee(i32 inreg %a1, i32 inreg %a2, i32 %a3, i32 %a4) -Implications of ``-tailcallopt``: +define fastcc i32 @tailcaller(i32 %in1, i32 %in2) { + %l1 = add i32 %in1, %in2 + %tmp = tail call fastcc i32 @tailcallee(i32 inreg %in1, i32 inreg %in2, i32 %in1, i32 %l1) + ret i32 %tmp +} +``` +Implications of `-tailcallopt`: To support tail call optimization in situations where the callee has more arguments than the caller a 'callee pops arguments' convention is used. This -currently causes each ``fastcc`` call that is not tail call optimized (because +currently causes each `fastcc` call that is not tail call optimized (because one or more of above constraints are not met) to be followed by a readjustment of the stack. So performance might be worse in such cases. -Sibling call optimization -------------------------- +### Sibling call optimization Sibling call optimization is a restricted form of tail call optimization. Unlike tail call optimization described in the previous section, it can be -performed automatically on any tail calls when ``-tailcallopt`` option is not +performed automatically on any tail calls when `-tailcallopt` option is not specified. Sibling call optimization is currently performed on x86/x86-64 when the following constraints are met: -* Caller and callee have the same calling convention. It can be either ``c`` or - ``fastcc``. +* Caller and callee have the same calling convention. It can be either `c` or + `fastcc`. * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). @@ -1947,25 +1857,22 @@ following constraints are met: Example: -.. code-block:: llvm - - declare i32 @bar(i32, i32) +```llvm +declare i32 @bar(i32, i32) - define i32 @foo(i32 %a, i32 %b, i32 %c) { - entry: - %0 = tail call i32 @bar(i32 %a, i32 %b) - ret i32 %0 - } +define i32 @foo(i32 %a, i32 %b, i32 %c) { +entry: + %0 = tail call i32 @bar(i32 %a, i32 %b) + ret i32 %0 +} +``` +### The X86 backend -The X86 backend ---------------- - -The X86 code generator lives in the ``lib/Target/X86`` directory. This code +The X86 code generator lives in the `lib/Target/X86` directory. This code generator is capable of targeting a variety of x86-32 and x86-64 processors, and includes support for ISA extensions such as MMX and SSE. -X86 Target Triples supported -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### X86 Target Triples supported The following are the known target triples that are supported by the X86 backend. This is not an exhaustive list, and it would be useful to add those @@ -1985,8 +1892,7 @@ that people test. * **x86_64-unknown-linux-gnu** --- Linux -X86 Calling Conventions supported -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### X86 Calling Conventions supported The following target-specific calling conventions are known to backend: @@ -2000,36 +1906,34 @@ The following target-specific calling conventions are known to backend: others via stack. Callee is responsible for stack cleaning. This convention is used by MSVC by default for methods in its ABI (CC ID = 70). -.. _X86 addressing mode: +(X86 addressing mode)= -Representing X86 addressing modes in MachineInstrs -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Representing X86 addressing modes in MachineInstrs The x86 has a very flexible way of accessing memory. It is capable of forming memory addresses of the following expression directly in integer instructions (which use ModR/M addressing): -:: - - SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32 +``` +SegmentReg: Base + [1,2,4,8] * IndexReg + Disp32 +``` In order to represent this, LLVM tracks no less than 5 operands for each memory -operand of this form. This means that the "load" form of '``mov``' has the -following ``MachineOperand``\s in this order: - -:: +operand of this form. This means that the "load" form of '`mov`' has the +following `MachineOperand`s in this order: - Index: 0 | 1 2 3 4 5 - Meaning: DestReg, | BaseReg, Scale, IndexReg, Displacement Segment - OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm PhysReg +``` +Index: 0 | 1 2 3 4 5 +Meaning: DestReg, | BaseReg, Scale, IndexReg, Displacement Segment +OperandTy: VirtReg, | VirtReg, UnsImm, VirtReg, SignExtImm PhysReg +``` Stores, and all other instructions, treat the four memory operands in the same way and in the same order. If the segment register is unspecified (regno = 0), then no segment override is generated. "Lea" operations do not have a segment register specified, so they only have 4 operands for their memory reference. -X86 address spaces supported -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### X86 address spaces supported x86 has a feature which provides the ability to perform loads and stores to different address spaces via the x86 segment registers. A segment override @@ -2041,14 +1945,14 @@ represented by address space 256, the FS-segment is represented by address space 257, and the SS-segment is represented by address space 258. Other x86 segments have yet to be allocated address space numbers. -While these address spaces may seem similar to TLS via the ``thread_local`` +While these address spaces may seem similar to TLS via the `thread_local` keyword, and often use the same underlying hardware, there are some fundamental differences. -The ``thread_local`` keyword applies to global variables and specifies that they +The `thread_local` keyword applies to global variables and specifies that they are to be allocated in thread-local memory. There are no type qualifiers involved, and these variables can be pointed to with normal pointers and -accessed with normal loads and stores. The ``thread_local`` keyword is +accessed with normal loads and stores. The `thread_local` keyword is target-independent at the LLVM IR level (though LLVM doesn't yet have implementations of it for some configurations) @@ -2065,41 +1969,35 @@ Some operating systems and runtime environments use (or may in the future use) the FS/GS-segment registers for various low-level purposes, so care should be taken when considering them. -Instruction naming -^^^^^^^^^^^^^^^^^^ +#### Instruction naming An instruction name consists of the base name, a default operand size, and a character per operand with an optional special size. For example: -:: +``` +ADD8rr -> add, 8-bit register, 8-bit register +IMUL16rmi -> imul, 16-bit register, 16-bit memory, 16-bit immediate +IMUL16rmi8 -> imul, 16-bit register, 16-bit memory, 8-bit immediate +MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory +``` - ADD8rr -> add, 8-bit register, 8-bit register - IMUL16rmi -> imul, 16-bit register, 16-bit memory, 16-bit immediate - IMUL16rmi8 -> imul, 16-bit register, 16-bit memory, 8-bit immediate - MOVSX32rm16 -> movsx, 32-bit register, 16-bit memory +### The PowerPC backend -The PowerPC backend -------------------- - -The PowerPC code generator lives in the ``lib/Target/PowerPC`` directory. The code +The PowerPC code generator lives in the `lib/Target/PowerPC` directory. The code generation is retargetable to several variations or *subtargets* of the PowerPC ISA; including ppc32, ppc64 and altivec. -LLVM PowerPC ABI -^^^^^^^^^^^^^^^^ +#### LLVM PowerPC ABI LLVM follows the AIX PowerPC ABI, with two deviations. LLVM uses a PC relative (PIC) or static addressing for accessing global values, so no TOC (r2) is used. Second, r31 is used as a frame pointer to allow dynamic growth of a stack frame. LLVM takes advantage of having no TOC to provide space to save the frame pointer in the PowerPC linkage area of the caller frame. Other details of -PowerPC ABI can be found at `PowerPC ABI -`_\ -. Note: This link describes the 32-bit ABI. The 64-bit ABI is similar except +PowerPC ABI can be found at [PowerPC ABI](http://developer.apple.com/documentation/DeveloperTools/Conceptual/LowLevelABI/Articles/32bitPowerPC.html). Note: This link describes the 32-bit ABI. The 64-bit ABI is similar except space for GPRs are 8 bytes wide (not 4) and r13 is reserved for system use. -Frame Layout -^^^^^^^^^^^^ +#### Frame Layout The size of a PowerPC frame is usually fixed for the duration of a function's invocation. Since the frame is fixed size, all references into the frame can be @@ -2112,29 +2010,15 @@ that space allocated for altivec vectors will be properly aligned. An invocation frame is laid out as follows (low memory at top): -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`
Linkage

Parameter area

Dynamic area

Locals area

Saved registers area


Previous Frame

` +| Stack frame area | +| --- | +| Linkage | +| Parameter area | +| Dynamic area | +| Locals area | +| Saved registers area | +| | +| Previous Frame | The *linkage* area is used by a callee to save special registers prior to allocating its own frame. Only three entries are relevant to LLVM. The first @@ -2149,61 +2033,25 @@ GPR, thus the linkage area is 24 bytes long in 32-bit mode and 48 bytes in 32-bit linkage area: -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`
0Saved SP (r1)
4Saved CR
8Saved LR
12Reserved
16Reserved
20Saved FP (r31)
` +| Offset | Contents | +| --- | --- | +| 0 | Saved SP (r1) | +| 4 | Saved CR | +| 8 | Saved LR | +| 12 | Reserved | +| 16 | Reserved | +| 20 | Saved FP (r31) | 64-bit linkage area: -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`
0Saved SP (r1)
8Saved CR
16Saved LR
24Reserved
32Reserved
40Saved FP (r31)
` +| Offset | Contents | +| --- | --- | +| 0 | Saved SP (r1) | +| 8 | Saved CR | +| 16 | Saved LR | +| 24 | Reserved | +| 32 | Reserved | +| 40 | Saved FP (r31) | The *parameter area* is used to store arguments being passed to a callee function. Following the PowerPC ABI, the first few arguments are actually @@ -2235,8 +2083,7 @@ The *locals area* is where the llvm compiler reserves space for local variables. The *saved registers area* is where the llvm compiler spills callee saved registers on entry to the callee. -Prolog/Epilog -^^^^^^^^^^^^^ +#### Prolog/Epilog The llvm prolog and epilog are the same as described in the PowerPC ABI, with the following exceptions. Callee saved registers are spilled after the frame is @@ -2245,15 +2092,12 @@ targets. The base pointer callee saved register r31 is saved in the TOC slot of linkage area. This simplifies allocation of space for the base pointer and makes it convenient to locate programmatically and during debugging. -Dynamic Allocation -^^^^^^^^^^^^^^^^^^ - -.. note:: +#### Dynamic Allocation - TODO - More to come. - -The NVPTX backend ------------------ +```{note} +TODO - More to come. +``` +### The NVPTX backend The NVPTX code generator under lib/Target/NVPTX is an open-source version of the NVIDIA NVPTX code generator for LLVM. It is contributed by NVIDIA and is @@ -2266,43 +2110,20 @@ the official NVIDIA toolchain. Code Generator Options: -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`` -:raw-html:`
OptionDescription
sm_20Set shader model/compute capability to 2.0
sm_21Set shader model/compute capability to 2.1
sm_30Set shader model/compute capability to 3.0
sm_35Set shader model/compute capability to 3.5
ptx30Target PTX 3.0
ptx31Target PTX 3.1
` - -The extended Berkeley Packet Filter (eBPF) backend --------------------------------------------------- +| Option | Description | +| --- | --- | +| sm_20 | Set shader model/compute capability to 2.0 | +| sm_21 | Set shader model/compute capability to 2.1 | +| sm_30 | Set shader model/compute capability to 3.0 | +| sm_35 | Set shader model/compute capability to 3.5 | +| ptx30 | Target PTX 3.0 | +| ptx31 | Target PTX 3.1 | + +### The extended Berkeley Packet Filter (eBPF) backend Extended BPF (or eBPF) is similar to the original ("classic") BPF (cBPF) used to filter network packets. The -`bpf() system call `_ +[bpf() system call](http://man7.org/linux/man-pages/man2/bpf.2.html) performs a range of operations related to eBPF. For both cBPF and eBPF programs, the Linux kernel statically analyzes the programs before loading them, in order to ensure that they cannot harm the running system. eBPF is @@ -2310,134 +2131,133 @@ a 64-bit RISC instruction set designed for one to one mapping to 64-bit CPUs. Opcodes are 8-bit encoded, and 87 instructions are defined. There are 10 registers, grouped by function as outlined below. -:: - - R0 return value from in-kernel functions; exit value for eBPF program - R1 - R5 function call arguments to in-kernel functions - R6 - R9 callee-saved registers preserved by in-kernel functions - R10 stack frame pointer (read only) +``` +R0 return value from in-kernel functions; exit value for eBPF program +R1 - R5 function call arguments to in-kernel functions +R6 - R9 callee-saved registers preserved by in-kernel functions +R10 stack frame pointer (read only) +``` -Instruction encoding (arithmetic and jump) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Instruction encoding (arithmetic and jump) eBPF is reusing most of the opcode encoding from classic to simplify conversion of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code' field is divided into three parts: -:: - - +----------------+--------+--------------------+ - | 4 bits | 1 bit | 3 bits | - | operation code | source | instruction class | - +----------------+--------+--------------------+ - (MSB) (LSB) +``` ++----------------+--------+--------------------+ +| 4 bits | 1 bit | 3 bits | +| operation code | source | instruction class | ++----------------+--------+--------------------+ +(MSB) (LSB) +``` Three LSB bits store instruction class which is one of: -:: - - BPF_LD 0x0 - BPF_LDX 0x1 - BPF_ST 0x2 - BPF_STX 0x3 - BPF_ALU 0x4 - BPF_JMP 0x5 - (unused) 0x6 - BPF_ALU64 0x7 +``` +BPF_LD 0x0 +BPF_LDX 0x1 +BPF_ST 0x2 +BPF_STX 0x3 +BPF_ALU 0x4 +BPF_JMP 0x5 +(unused) 0x6 +BPF_ALU64 0x7 +``` When BPF_CLASS(code) == BPF_ALU or BPF_ALU64 or BPF_JMP, 4th bit encodes source operand -:: - - BPF_X 0x1 use src_reg register as source operand - BPF_K 0x0 use 32-bit immediate as source operand +``` +BPF_X 0x1 use src_reg register as source operand +BPF_K 0x0 use 32-bit immediate as source operand +``` and four MSB bits store operation code -:: - - BPF_ADD 0x0 add - BPF_SUB 0x1 subtract - BPF_MUL 0x2 multiply - BPF_DIV 0x3 divide - BPF_OR 0x4 bitwise logical OR - BPF_AND 0x5 bitwise logical AND - BPF_LSH 0x6 left shift - BPF_RSH 0x7 right shift (zero extended) - BPF_NEG 0x8 arithmetic negation - BPF_MOD 0x9 modulo - BPF_XOR 0xa bitwise logical XOR - BPF_MOV 0xb move register to register - BPF_ARSH 0xc right shift (sign extended) - BPF_END 0xd endianness conversion +``` +BPF_ADD 0x0 add +BPF_SUB 0x1 subtract +BPF_MUL 0x2 multiply +BPF_DIV 0x3 divide +BPF_OR 0x4 bitwise logical OR +BPF_AND 0x5 bitwise logical AND +BPF_LSH 0x6 left shift +BPF_RSH 0x7 right shift (zero extended) +BPF_NEG 0x8 arithmetic negation +BPF_MOD 0x9 modulo +BPF_XOR 0xa bitwise logical XOR +BPF_MOV 0xb move register to register +BPF_ARSH 0xc right shift (sign extended) +BPF_END 0xd endianness conversion +``` If BPF_CLASS(code) == BPF_JMP, BPF_OP(code) is one of -:: - - BPF_JA 0x0 unconditional jump - BPF_JEQ 0x1 jump == - BPF_JGT 0x2 jump > - BPF_JGE 0x3 jump >= - BPF_JSET 0x4 jump if (DST & SRC) - BPF_JNE 0x5 jump != - BPF_JSGT 0x6 jump signed > - BPF_JSGE 0x7 jump signed >= - BPF_CALL 0x8 function call - BPF_EXIT 0x9 function return - -Instruction encoding (load, store) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +``` +BPF_JA 0x0 unconditional jump +BPF_JEQ 0x1 jump == +BPF_JGT 0x2 jump > +BPF_JGE 0x3 jump >= +BPF_JSET 0x4 jump if (DST & SRC) +BPF_JNE 0x5 jump != +BPF_JSGT 0x6 jump signed > +BPF_JSGE 0x7 jump signed >= +BPF_CALL 0x8 function call +BPF_EXIT 0x9 function return +``` + +#### Instruction encoding (load, store) For load and store instructions the 8-bit 'code' field is divided as: -:: - - +--------+--------+-------------------+ - | 3 bits | 2 bits | 3 bits | - | mode | size | instruction class | - +--------+--------+-------------------+ - (MSB) (LSB) +``` ++--------+--------+-------------------+ +| 3 bits | 2 bits | 3 bits | +| mode | size | instruction class | ++--------+--------+-------------------+ +(MSB) (LSB) +``` Size modifier is one of -:: - - BPF_W 0x0 word - BPF_H 0x1 half word - BPF_B 0x2 byte - BPF_DW 0x3 double word +``` +BPF_W 0x0 word +BPF_H 0x1 half word +BPF_B 0x2 byte +BPF_DW 0x3 double word +``` Mode modifier is one of -:: - - BPF_IMM 0x0 immediate - BPF_ABS 0x1 used to access packet data - BPF_IND 0x2 used to access packet data - BPF_MEM 0x3 memory - (reserved) 0x4 - (reserved) 0x5 - BPF_XADD 0x6 exclusive add +``` +BPF_IMM 0x0 immediate +BPF_ABS 0x1 used to access packet data +BPF_IND 0x2 used to access packet data +BPF_MEM 0x3 memory +(reserved) 0x4 +(reserved) 0x5 +BPF_XADD 0x6 exclusive add +``` -Packet data access (BPF_ABS, BPF_IND) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Packet data access (BPF_ABS, BPF_IND) -Two non-generic instructions: (BPF_ABS | | BPF_LD) and -(BPF_IND | | BPF_LD) which are used to access packet data. +Two non-generic instructions: (`BPF_ABS | | BPF_LD`) and +(`BPF_IND | | BPF_LD`) which are used to access packet data. Register R6 is an implicit input that must contain pointer to sk_buff. Register R0 is an implicit output which contains the data fetched from the packet. Registers R1-R5 are scratch registers and must not -be used to store the data across BPF_ABS | BPF_LD or BPF_IND | BPF_LD +be used to store the data across `BPF_ABS | BPF_LD` or `BPF_IND | BPF_LD` instructions. These instructions have implicit program exit condition as well. When eBPF program is trying to access the data beyond the packet boundary, the interpreter will abort the execution of the program. -BPF_IND | BPF_W | BPF_LD is equivalent to: - R0 = ntohl(\*(u32 \*) (((struct sk_buff \*) R6)->data + src_reg + imm32)) +`BPF_IND | BPF_W | BPF_LD` is equivalent to: -eBPF maps -^^^^^^^^^ +``` +R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32)) +``` + +#### eBPF maps eBPF maps are provided for sharing data between kernel and user-space. Currently implemented types are hash and array, with potential extension to @@ -2445,8 +2265,7 @@ support bloom filters, radix trees, etc. A map is defined by its type, maximum number of elements, key size and value size in bytes. eBPF syscall supports create, update, find and delete functions on maps. -Function calls -^^^^^^^^^^^^^^ +#### Function calls Function call arguments are passed using up to five registers (R1 - R5). The return value is passed in a dedicated register (R0). Four additional @@ -2458,25 +2277,24 @@ using the read-only frame pointer R10. eBPF registers map 1:1 to hardware registers on x86_64 and other 64-bit architectures. For example, x86_64 in-kernel JIT maps them as -:: - - R0 - rax - R1 - rdi - R2 - rsi - R3 - rdx - R4 - rcx - R5 - r8 - R6 - rbx - R7 - r13 - R8 - r14 - R9 - r15 - R10 - rbp +``` +R0 - rax +R1 - rdi +R2 - rsi +R3 - rdx +R4 - rcx +R5 - r8 +R6 - rbx +R7 - r13 +R8 - r14 +R9 - r15 +R10 - rbp +``` since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing and rbx, r12 - r15 are callee saved. -Program start -^^^^^^^^^^^^^ +#### Program start An eBPF program receives a single argument and contains a single eBPF main routine; the program does not contain eBPF functions. @@ -2486,17 +2304,15 @@ a limited number of kernel function calls. Prior to running an eBPF program, a verifier performs static analysis to prevent loops in the code and to ensure valid register usage and operand types. -The AMDGPU backend ------------------- +### The AMDGPU backend -The AMDGPU code generator lives in the ``lib/Target/AMDGPU`` +The AMDGPU code generator lives in the `lib/Target/AMDGPU` directory. This code generator is capable of targeting a variety of -AMD GPU processors. Refer to :doc:`AMDGPUUsage` for more information. +AMD GPU processors. Refer to {doc}`AMDGPUUsage` for more information. -The Lightweight Fault Isolation (LFI) sub-architecture ------------------------------------------------------- +### The Lightweight Fault Isolation (LFI) sub-architecture LFI is a sub-architecture available for certain backends that allows programs compiled for the target to run in a sandboxed environment that is within the -same address space as host code. Refer to :doc:`LFI` for more information about +same address space as host code. Refer to {doc}`LFI` for more information about LFI. diff --git a/llvm/docs/CodingStandards.md b/llvm/docs/CodingStandards.md index 2cffcdd96b1dd..aac3f32c9c8de 100644 --- a/llvm/docs/CodingStandards.md +++ b/llvm/docs/CodingStandards.md @@ -1,12 +1,10 @@ -===================== -LLVM Coding Standards -===================== +# LLVM Coding Standards -.. contents:: - :local: +```{contents} +:local: +``` -Introduction -============ +## Introduction This document describes coding standards that are used in the LLVM project. Although no coding standards should be regarded as absolute requirements to be @@ -18,14 +16,14 @@ While this document may provide guidance for some mechanical formatting issues, whitespace, or other "microscopic details", these are not fixed standards. Always follow the golden rule: -.. _Golden Rule: +(Golden Rule)= - **If you are extending, enhancing, or bug fixing already implemented code, - use the style that is already being used so that the source is uniform and - easy to follow.** +> **If you are extending, enhancing, or bug fixing already implemented code, +> use the style that is already being used so that the source is uniform and +> easy to follow.** -Note that some code bases (e.g. ``libc++``) have special reasons to deviate -from the coding standards. For example, in the case of ``libc++``, this is +Note that some code bases (e.g. `libc++`) have special reasons to deviate +from the coding standards. For example, in the case of `libc++`, this is because the naming and other conventions are dictated by the C++ standard. There are some conventions that are not uniformly followed in the code base @@ -40,8 +38,7 @@ make code review easier. The ultimate goal of these guidelines is to increase the readability and maintainability of our common source base. -Languages, Libraries, and Standards -=================================== +## Languages, Libraries, and Standards Most source code in LLVM and other LLVM projects using these coding standards is C++ code. There are some places where C code is used either due to @@ -53,15 +50,14 @@ choice. For automation, build systems, and utility scripts, Python is preferred and is widely used in the LLVM repository already. -C++ Standard Versions ---------------------- +### C++ Standard Versions Unless otherwise documented, LLVM subprojects are written using standard C++17 code and avoid unnecessary vendor-specific extensions. Nevertheless, we restrict ourselves to features which are available in the -major toolchains supported as host compilers (see :doc:`GettingStarted` page, -section `Software`). +major toolchains supported as host compilers (see {doc}`GettingStarted` page, +section [Software](project:GettingStarted.md#software)). Each toolchain provides a good reference for what it accepts: @@ -76,7 +72,7 @@ Each toolchain provides a good reference for what it accepts: * MSVC: https://learn.microsoft.com/cpp/overview/visual-cpp-language-conformance Additionally, there are compiler comparison tables of supported C++ features on -`cppreference.com `_. +[cppreference.com](https://en.cppreference.com/w/cpp/compiler_support/17). To keep track with the evolution of the standard, newer C++ versions can be used to build LLVM. However, our support focuses on the minimum supported C++ @@ -85,8 +81,7 @@ latest version of the supported toolchains and possibly not across all the subprojects. -C++ Standard Library --------------------- +### C++ Standard Library Instead of implementing custom data structures, we encourage the use of C++ standard library facilities or LLVM support libraries whenever they are @@ -94,60 +89,63 @@ available for a particular task. LLVM and related projects emphasize and rely on the standard library facilities and the LLVM support libraries as much as possible. -LLVM support libraries (for example, `ADT -`_) +LLVM support libraries (for example, [ADT]) implement specialized data structures or functionality missing in the standard -library. Such libraries are usually implemented in the ``llvm`` namespace and +library. Such libraries are usually implemented in the `llvm` namespace and follow the expected standard interface when there is one. +[ADT]: https://github.com/llvm/llvm-project/tree/main/llvm/include/llvm/ADT + When both C++ and the LLVM support libraries provide similar functionality, and there isn't a specific reason to favor the C++ implementation, it is generally -preferable to use the LLVM library. For example, ``llvm::DenseMap`` should -almost always be used instead of ``std::map`` or ``std::unordered_map``, and -``llvm::SmallVector`` should usually be used instead of ``std::vector``. +preferable to use the LLVM library. For example, `llvm::DenseMap` should +almost always be used instead of `std::map` or `std::unordered_map`, and +`llvm::SmallVector` should usually be used instead of `std::vector`. We explicitly avoid some standard facilities, like the I/O streams, and instead -use LLVM's streams library (raw_ostream_). More detailed information on these -subjects is available in the :doc:`ProgrammersManual`. +use LLVM's streams library ({ref}`raw_ostream `). More detailed information on these +subjects is available in the {doc}`ProgrammersManual`. For more information about LLVM's data structures and the tradeoffs they make, -please consult `that section of the programmer's manual -`_. +please consult [that section of the programmer's manual]. + +[that section of the programmer's manual]: https://llvm.org/docs/ProgrammersManual.html#picking-the-right-data-structure-for-a-task -Python version and Source Code Formatting ------------------------------------------ +### Python version and Source Code Formatting -The current minimum version of Python required is documented in the :doc:`GettingStarted` +The current minimum version of Python required is documented in the {doc}`GettingStarted` section. Python code in the LLVM repository should only use language features available in this version of Python. The Python code within the LLVM repository should adhere to the formatting guidelines -outlined in `PEP 8 `_. +outlined in [PEP 8](https://peps.python.org/pep-0008/). For consistency and to limit churn, code should be automatically formatted with -the `black `_ utility, which is PEP 8 compliant. -Use its default rules. For example, avoid specifying ``--line-length`` even +the [black](https://github.com/psf/black) utility, which is PEP 8 compliant. +Use its default rules. For example, avoid specifying `--line-length` even though it does not default to 80. The default rules can change between major versions of black. In order to avoid unnecessary churn in the formatting rules, we currently use black version 23.x in LLVM. When contributing a patch unrelated to formatting, you should format only the -Python code that the patch modifies. For this purpose, use the `darker -`_ utility, which runs default black rules +Python code that the patch modifies. For this purpose, use the [darker] +utility, which runs default black rules over only the modified Python code. Doing so should ensure the patch will pass the Python format checks in LLVM's pre-commit CI, which also uses darker. When contributing a patch specifically for reformatting Python files, use black, which currently only supports formatting entire files. +[darker]: https://pypi.org/project/darker/ + Here are some quick examples, but see the black and darker documentation for details: -.. code-block:: bash - - $ pip install black=='23.*' darker # install black 23.x and darker - $ darker test.py # format uncommitted changes - $ darker -r HEAD^ test.py # also format changes from last commit - $ black test.py # format entire file +```bash +$ pip install black=='23.*' darker # install black 23.x and darker +$ darker test.py # format uncommitted changes +$ darker -r HEAD^ test.py # also format changes from last commit +$ black test.py # format entire file +``` Instead of individual file names, you can specify directories to darker, and it will find the changed files. However, if a directory is @@ -155,79 +153,72 @@ large, like a clone of the LLVM repository, darker can be painfully slow. In that case, you might wish to use git to list changed files. For example: -.. code-block:: bash - - $ darker -r HEAD^ $(git diff --name-only --diff-filter=d HEAD^) +```bash +$ darker -r HEAD^ $(git diff --name-only --diff-filter=d HEAD^) +``` -Mechanical Source Issues -======================== +## Mechanical Source Issues -Source Code Formatting ----------------------- +### Source Code Formatting -Commenting -^^^^^^^^^^ +#### Commenting Comments are important for readability and maintainability. When writing comments, write them as English prose, using proper capitalization, punctuation, etc. Aim to describe what the code is trying to do and why, not *how* it does it at a micro level. Here are a few important things to document: -.. _header file comment: +(header file comment)= -File Headers -"""""""""""" +##### File Headers Every source file should have a header on it that describes the basic purpose of the file. The standard header looks like this: -.. code-block:: c++ - - //===----------------------------------------------------------------------===// - // - // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. - // See https://llvm.org/LICENSE.txt for license information. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - // - //===----------------------------------------------------------------------===// - /// - /// \file - /// This file contains the declaration of the Instruction class, which is the - /// base class for all of the VM instructions. - /// - //===----------------------------------------------------------------------===// +```c++ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of the Instruction class, which is the +/// base class for all of the VM instructions. +/// +//===----------------------------------------------------------------------===// +``` The first section in the file is a concise note that defines the license that the file is released under. This makes it perfectly clear what terms the source code can be distributed under and should not be modified in any way. -The main body is a `Doxygen `_ comment (identified by -the ``///`` comment marker instead of the usual ``//``) describing the purpose -of the file. The first sentence (or a passage beginning with ``\brief``) is +The main body is a [Doxygen](http://www.doxygen.nl/) comment (identified by +the `///` comment marker instead of the usual `//`) describing the purpose +of the file. The first sentence (or a passage beginning with `\brief`) is used as an abstract. Any additional information should be separated by a blank line. If an algorithm is based on a paper or is described in another source, provide a reference. -Header Guard -"""""""""""" +##### Header Guard The header file's guard should be the all-caps path that a user of this header would #include, using '_' instead of path separator and extension marker. For example, the header file -``llvm/include/llvm/Analysis/Utils/Local.h`` would be ``#include``-ed as -``#include "llvm/Analysis/Utils/Local.h"``, so its guard is -``LLVM_ANALYSIS_UTILS_LOCAL_H``. +`llvm/include/llvm/Analysis/Utils/Local.h` would be `#include`-ed as +`#include "llvm/Analysis/Utils/Local.h"`, so its guard is +`LLVM_ANALYSIS_UTILS_LOCAL_H`. -Class overviews -""""""""""""""" +##### Class overviews Classes are a fundamental part of an object-oriented design. As such, a class definition should have a comment block that explains what the class is used for and how it works. Every non-trivial class is expected to have a -``doxygen`` comment block. +`doxygen` comment block. -Method information -"""""""""""""""""" +##### Method information Methods and global functions should also be documented. A quick note about what it does and a description of the edge cases is all that is necessary here. @@ -237,97 +228,95 @@ the code itself. Good things to talk about here are what happens when something unexpected happens, for instance, does the method return null? -Comment Formatting -^^^^^^^^^^^^^^^^^^ +#### Comment Formatting -In general, prefer C++-style comments (``//`` for normal comments, ``///`` for -``doxygen`` documentation comments). There are a few cases when it is -useful to use C-style (``/* */``) comments, however: +In general, prefer C++-style comments (`//` for normal comments, `///` for +`doxygen` documentation comments). There are a few cases when it is +useful to use C-style (`/* */`) comments, however: -#. When writing C code to be compatible with C89. +1. When writing C code to be compatible with C89. -#. When writing a header file that may be ``#include``\d by a C source file. +1. When writing a header file that may be `#include`d by a C source file. -#. When writing a source file that is used by a tool that only accepts C-style +1. When writing a source file that is used by a tool that only accepts C-style comments. -#. When documenting the significance of constants used as actual parameters in - a call. This is most helpful for ``bool`` parameters, or passing ``0`` or - ``nullptr``. The comment should contain the parameter name, which ought to be +1. When documenting the significance of constants used as actual parameters in + a call. This is most helpful for `bool` parameters, or passing `0` or + `nullptr`. The comment should contain the parameter name, which ought to be meaningful. For example, it's not clear what the parameter means in this call: - .. code-block:: c++ - - Object.emitName(nullptr); + ```c++ + Object.emitName(nullptr); + ``` An in-line C-style comment makes the intent obvious: - .. code-block:: c++ - - Object.emitName(/*Prefix=*/nullptr); + ```c++ + Object.emitName(/*Prefix=*/nullptr); + ``` Commenting out large blocks of code is discouraged, but if you really have to do this (for documentation purposes or as a suggestion for debug printing), use -``#if 0`` and ``#endif``. These nest properly and are better behaved in general +`#if 0` and `#endif`. These nest properly and are better behaved in general than C-style comments. -Doxygen Use in Documentation Comments -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Doxygen Use in Documentation Comments -Use the ``\file`` command to turn the standard file header into a file-level +Use the `\file` command to turn the standard file header into a file-level comment. Include descriptive paragraphs for all public interfaces (public classes, member and non-member functions). Avoid restating the information that can be inferred from the API name or signature. The first sentence (or a paragraph -beginning with ``\brief``) is used as an abstract. Try to use a single -sentence as the ``\brief`` adds visual clutter. Put detailed discussion into +beginning with `\brief`) is used as an abstract. Try to use a single +sentence as the `\brief` adds visual clutter. Put detailed discussion into separate paragraphs. A minimal documentation comment: -.. code-block:: c++ - - /// Sets the xyzzy property to \p Baz. - void setXyzzy(bool Baz); +```c++ +/// Sets the xyzzy property to \p Baz. +void setXyzzy(bool Baz); +``` Only include code examples, function parameters and return values when it provides additional information, such as intent, usage, or behavior that’s non-obvious. Use descriptive function and argument names to eliminate the need for documentation comments when possible. -To refer to parameter names inside a paragraph, use the ``\p name`` command. -Don't use the ``\arg name`` command since it starts a new paragraph that +To refer to parameter names inside a paragraph, use the `\p name` command. +Don't use the `\arg name` command since it starts a new paragraph that contains documentation for the parameter. -Wrap non-inline code examples in ``\code ... \endcode``. +Wrap non-inline code examples in `\code ... \endcode`. To document a function parameter, start a new paragraph with the -``\param name`` command. If the parameter is used as an out or an in/out -parameter, use the ``\param [out] name`` or ``\param [in,out] name`` command, +`\param name` command. If the parameter is used as an out or an in/out +parameter, use the `\param [out] name` or `\param [in,out] name` command, respectively. -To describe function return value, start a new paragraph with the ``\returns`` +To describe function return value, start a new paragraph with the `\returns` command. A documentation comment that uses all Doxygen features in a preferred way: -.. code-block:: c++ - - /// Does foo and bar. - /// - /// Does not do foo the usual way if \p Baz is true. - /// - /// Typical usage: - /// \code - /// fooBar(false, "quux", Res); - /// \endcode - /// - /// \param Quux kind of foo to do. - /// \param [out] Result filled with bar sequence on foo success. - /// - /// \returns true on success. - bool fooBar(bool Baz, StringRef Quux, std::vector &Result); +```c++ +/// Does foo and bar. +/// +/// Does not do foo the usual way if \p Baz is true. +/// +/// Typical usage: +/// \code +/// fooBar(false, "quux", Res); +/// \endcode +/// +/// \param Quux kind of foo to do. +/// \param [out] Result filled with bar sequence on foo success. +/// +/// \returns true on success. +bool fooBar(bool Baz, StringRef Quux, std::vector &Result); +``` Don't duplicate the documentation comment in the header file and in the implementation file. Put the documentation comments for public APIs into the @@ -343,34 +332,33 @@ to the correct declaration. Avoid: -.. code-block:: c++ - - // Example.h: +```c++ +// Example.h: - // example - Does something important. - void example(); +// example - Does something important. +void example(); - // Example.cpp: +// Example.cpp: - // example - Does something important. - void example() { ... } +// example - Does something important. +void example() { ... } +``` Preferred: -.. code-block:: c++ +```c++ +// Example.h: - // Example.h: +/// Does something important. +void example(); - /// Does something important. - void example(); +// Example.cpp: - // Example.cpp: +/// Builds a B-tree in order to do foo. See paper by... +void example() { ... } +``` - /// Builds a B-tree in order to do foo. See paper by... - void example() { ... } - -Error and Warning Messages -^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Error and Warning Messages Clear diagnostic messages are important to help users identify and fix issues in their inputs. Use succinct but correct English prose that gives the user the @@ -382,16 +370,16 @@ end in one otherwise. Sentences which end with different punctuation, such as For example, this is a good error message: -.. code-block:: none - - error: file.o: section header 3 is corrupt. Size is 10 when it should be 20 +```text +error: file.o: section header 3 is corrupt. Size is 10 when it should be 20 +``` This is a bad message, since it does not provide useful information and uses the wrong style: -.. code-block:: none - - error: file.o: Corrupt section header. +```text +error: file.o: Corrupt section header. +``` As with other coding standards, individual projects, such as the Clang Static Analyzer, may have preexisting styles that do not conform to this. If a @@ -400,54 +388,52 @@ that style instead. Otherwise, this standard applies to all LLVM tools, including clang, clang-tidy, and so on. If the tool or project does not have existing functions to emit warnings or -errors, use the error and warning handlers provided in ``Support/WithColor.h`` +errors, use the error and warning handlers provided in `Support/WithColor.h` to ensure they are printed in the appropriate style, rather than printing to stderr directly. -When using ``report_fatal_error``, follow the same standards for the message as -regular error messages. Assertion messages and ``llvm_unreachable`` calls do not +When using `report_fatal_error`, follow the same standards for the message as +regular error messages. Assertion messages and `llvm_unreachable` calls do not necessarily need to follow these same styles as they are automatically formatted, and thus these guidelines may not be suitable. -``#include`` Style -^^^^^^^^^^^^^^^^^^ +#### `#include` Style -Immediately after the `header file comment`_ (and include guards if working on a -header file), the `minimal list of #includes`_ required by the file should be -listed. We prefer these ``#include``\s to be listed in this order: +Immediately after the {ref}`header file comment

` (and include guards if working on a +header file), the {ref}`minimal list of #includes ` required by the file should be +listed. We prefer these `#include`s to be listed in this order: -.. _Main Module Header: -.. _Local/Private Headers: +(Main Module Header)= +(Local/Private Headers)= -#. Main Module Header -#. Local/Private Headers -#. LLVM project/subproject headers (``clang/...``, ``lldb/...``, ``llvm/...``, etc) -#. System ``#include``\s +1. Main Module Header +1. Local/Private Headers +1. LLVM project/subproject headers (`clang/...`, `lldb/...`, `llvm/...`, etc) +1. System `#include`s and each category should be sorted lexicographically by the full path. -The `Main Module Header`_ file applies to ``.cpp`` files which implement an -interface defined by a ``.h`` file. This ``#include`` should always be included +The {ref}`Main Module Header
` file applies to `.cpp` files which implement an +interface defined by a `.h` file. This `#include` should always be included **first** regardless of where it lives on the file system. By including a -header file first in the ``.cpp`` files that implement the interfaces, we ensure +header file first in the `.cpp` files that implement the interfaces, we ensure that the header does not have any hidden dependencies which are not explicitly -``#include``\d in the header, but should be. It is also a form of documentation -in the ``.cpp`` file to indicate where the interfaces it implements are defined. +`#include`d in the header, but should be. It is also a form of documentation +in the `.cpp` file to indicate where the interfaces it implements are defined. LLVM project and subproject headers should be grouped from most specific to least specific, for the same reasons described above. For example, LLDB depends on both clang and LLVM, and clang depends on LLVM. So an LLDB source file should -include ``lldb`` headers first, followed by ``clang`` headers, followed by -``llvm`` headers, to reduce the possibility (for example) of an LLDB header +include `lldb` headers first, followed by `clang` headers, followed by +`llvm` headers, to reduce the possibility (for example) of an LLDB header accidentally picking up a missing include due to the previous inclusion of that header in the main source file or some earlier header file. clang should similarly include its own headers before including llvm headers. This rule applies to all LLVM subprojects. -.. _fit into 80 columns: +(fit into 80 columns)= -Source Code Width -^^^^^^^^^^^^^^^^^ +#### Source Code Width Write your code to fit within 80 columns. @@ -472,8 +458,7 @@ If your reformatting is causing unnecessary changes in subsequent lines, please Unecessary changes in TD files cause unnecessary churn in the file history and force recompilation of many unnecessary files. It also causes unnecessary changes in forks working on the same TD files, which makes it harder to rebase and merge later. -Whitespace -^^^^^^^^^^ +#### Whitespace In all cases, prefer spaces to tabs in source files. People have different preferred indentation levels, and different styles of indentation that they @@ -481,15 +466,14 @@ like; this is fine. What isn't fine is that different editors/viewers expand tabs out to different tab stops. This can cause your code to look completely unreadable, and it is not worth dealing with. -As always, follow the `Golden Rule`_ above: follow the style of existing code +As always, follow the {ref}`Golden Rule ` above: follow the style of existing code if you are modifying and extending it. Do not add trailing whitespace. Some common editors will automatically remove trailing whitespace when saving a file which causes unrelated changes to appear in diffs and commits. -Format Lambdas Like Blocks Of Code -"""""""""""""""""""""""""""""""""" +##### Format Lambdas Like Blocks Of Code When formatting a multi-line lambda, format it like a block of code. If there is only one multi-line lambda in a statement, and there are no expressions @@ -497,42 +481,41 @@ lexically after it in the statement, drop the indent to the standard two space indent for a block of code, as if it were an if-block opened by the preceding part of the statement: -.. code-block:: c++ - - std::sort(foo.begin(), foo.end(), [&](Foo a, Foo b) -> bool { - if (a.blah < b.blah) - return true; - if (a.baz < b.baz) - return true; - return a.bam < b.bam; - }); +```c++ +std::sort(foo.begin(), foo.end(), [&](Foo a, Foo b) -> bool { + if (a.blah < b.blah) + return true; + if (a.baz < b.baz) + return true; + return a.bam < b.bam; +}); +``` To take best advantage of this formatting, if you are designing an API which accepts a continuation or single callable argument (be it a function object, or -a ``std::function``), it should be the last argument if at all possible. +a `std::function`), it should be the last argument if at all possible. If there are multiple multi-line lambdas in a statement, or additional parameters after the lambda, indent the block two spaces from the indent of the -``[]``: - -.. code-block:: c++ - - dyn_switch(V->stripPointerCasts(), - [] (PHINode *PN) { - // process phis... - }, - [] (SelectInst *SI) { - // process selects... - }, - [] (LoadInst *LI) { - // process loads... - }, - [] (AllocaInst *AI) { - // process allocas... - }); - -Braced Initializer Lists -"""""""""""""""""""""""" +`[]`: + +```c++ +dyn_switch(V->stripPointerCasts(), + [] (PHINode *PN) { + // process phis... + }, + [] (SelectInst *SI) { + // process selects... + }, + [] (LoadInst *LI) { + // process loads... + }, + [] (AllocaInst *AI) { + // process allocas... + }); +``` + +##### Braced Initializer Lists Starting from C++11, there are significantly more uses of braced lists to perform initialization. For example, they can be used to construct aggregate @@ -547,141 +530,136 @@ formatting braced initialization lists: act as if the braces were parentheses in a function call. The formatting rules exactly match those already well understood for formatting nested function calls. Examples: -.. code-block:: c++ - - foo({a, b, c}, {1, 2, 3}); +```c++ +foo({a, b, c}, {1, 2, 3}); - llvm::Constant *Mask[] = { - llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0), - llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1), - llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)}; +llvm::Constant *Mask[] = { + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 0), + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 1), + llvm::ConstantInt::get(llvm::Type::getInt32Ty(getLLVMContext()), 2)}; +``` This formatting scheme also makes it particularly easy to get predictable, -consistent, and automatic formatting with tools like `Clang Format`_. +consistent, and automatic formatting with tools like [Clang Format]. -.. _Clang Format: https://clang.llvm.org/docs/ClangFormat.html +[Clang Format]: https://clang.llvm.org/docs/ClangFormat.html -Language and Compiler Issues ----------------------------- +### Language and Compiler Issues -Treat Compiler Warnings Like Errors -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Treat Compiler Warnings Like Errors Compiler warnings are often useful and help improve the code. Those that are not useful, can be often suppressed with a small code change. For example, an -assignment in the ``if`` condition is often a typo: +assignment in the `if` condition is often a typo: -.. code-block:: c++ - - if (V = getValue()) { - ... - } +```c++ +if (V = getValue()) { + ... +} +``` Several compilers will print a warning for the code above. It can be suppressed by adding parentheses: -.. code-block:: c++ - - if ((V = getValue())) { - ... - } +```c++ +if ((V = getValue())) { + ... +} +``` -Write Portable Code -^^^^^^^^^^^^^^^^^^^ +#### Write Portable Code In almost all cases, it is possible to write completely portable code. When you need to rely on non-portable code, put it behind a well-defined and well-documented interface. -Do not use RTTI or Exceptions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Do not use RTTI or Exceptions In an effort to reduce code and executable size, LLVM does not use exceptions -or RTTI (`runtime type information -`_, for example, -``dynamic_cast<>``). +or RTTI ([runtime type information], for example, +`dynamic_cast<>`). + +[runtime type information]: https://en.wikipedia.org/wiki/Run-time_type_information That said, LLVM does make extensive use of a hand-rolled form of RTTI that use -templates like :ref:`isa\<>, cast\<>, and dyn_cast\<> `. +templates like [isa<>, cast<>, and dyn_cast<>](project:ProgrammersManual.md#the-isa-cast-and-dyn-cast-templates). This form of RTTI is opt-in and can be -:doc:`added to any class `. +{doc}`added to any class `. -Prefer C++-style casts -^^^^^^^^^^^^^^^^^^^^^^ +#### Prefer C++-style casts -When casting, use ``static_cast``, ``reinterpret_cast``, and ``const_cast``, +When casting, use `static_cast`, `reinterpret_cast`, and `const_cast`, rather than C-style casts. There are two exceptions to this: -* When casting to ``void`` to suppress warnings about unused variables (as an - alternative to ``[[maybe_unused]]``). Prefer C-style casts in this instance. - Note that if the variable is unused because it's used only in ``assert``, use - ``[[maybe_unused]]`` instead of a C-style void cast. +* When casting to `void` to suppress warnings about unused variables (as an + alternative to `[[maybe_unused]]`). Prefer C-style casts in this instance. + Note that if the variable is unused because it's used only in `assert`, use + `[[maybe_unused]]` instead of a C-style void cast. * When casting between integral types (including enums that are not strongly- typed), functional-style casts are permitted as an alternative to - ``static_cast``. + `static_cast`. -.. _static constructor: +(static constructor)= -Do not use Static Constructors -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Do not use Static Constructors Static constructors and destructors (e.g., global variables whose types have a constructor or destructor) should not be added to the code base, and should be removed wherever possible. -Globals in different source files are initialized in an `arbitrary order -`_, making the code more +Globals in different source files are initialized in an [arbitrary order], +making the code more difficult to reason about. +[arbitrary order]: https://yosefk.com/c++fqa/ctors.html#fqa-10.12 + Static constructors have a negative impact on the launch time of programs that use LLVM as a library. We would really like for there to be zero cost for linking in an additional LLVM target or other library into an application, but static constructors undermine this goal. -Use of ``class`` and ``struct`` Keywords -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use of `class` and `struct` Keywords -In C++, the ``class`` and ``struct`` keywords can be used almost +In C++, the `class` and `struct` keywords can be used almost interchangeably. The only difference is when they are used to declare a class: -``class`` makes all members private by default while ``struct`` makes all +`class` makes all members private by default while `struct` makes all members public by default. -* All declarations and definitions of a given ``class`` or ``struct`` must use +* All declarations and definitions of a given `class` or `struct` must use the same keyword. For example: -.. code-block:: c++ +```c++ +// Avoid if `Example` is defined as a struct. +class Example; - // Avoid if `Example` is defined as a struct. - class Example; +// OK. +struct Example; - // OK. - struct Example; +struct Example { ... }; +``` - struct Example { ... }; +* `struct` should be used when *all* members are declared public. -* ``struct`` should be used when *all* members are declared public. +```c++ +// Avoid using `struct` here, use `class` instead. +struct Foo { +private: + int Data; +public: + Foo() : Data(0) { } + int getData() const { return Data; } + void setData(int D) { Data = D; } +}; -.. code-block:: c++ +// OK to use `struct`: all members are public. +struct Bar { + int Data; + Bar() : Data(0) { } +}; +``` - // Avoid using `struct` here, use `class` instead. - struct Foo { - private: - int Data; - public: - Foo() : Data(0) { } - int getData() const { return Data; } - void setData(int D) { Data = D; } - }; - - // OK to use `struct`: all members are public. - struct Bar { - int Data; - Bar() : Data(0) { } - }; - -Do not use Braced Initializer Lists to Call a Constructor -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Do not use Braced Initializer Lists to Call a Constructor Starting from C++11 there is a "generalized initialization syntax" which allows calling constructors using braced initializer lists. Do not use these to call @@ -693,70 +671,67 @@ don't use a braced initializer list. Instead, use a braced initializer list (without any type for temporaries) when doing aggregate initialization or something notionally equivalent. Examples: -.. code-block:: c++ +```c++ +class Foo { +public: + // Construct a Foo by reading data from the disk in the whizbang format, ... + Foo(std::string filename); - class Foo { - public: - // Construct a Foo by reading data from the disk in the whizbang format, ... - Foo(std::string filename); + // Construct a Foo by looking up the Nth element of some global data ... + Foo(int N); - // Construct a Foo by looking up the Nth element of some global data ... - Foo(int N); + // ... +}; - // ... - }; +// The Foo constructor call is reading a file, don't use braces to call it. +llvm::fill(foo, Foo("name")); - // The Foo constructor call is reading a file, don't use braces to call it. - llvm::fill(foo, Foo("name")); - - // The pair is being constructed like an aggregate, use braces. - bar_map.insert({my_key, my_value}); +// The pair is being constructed like an aggregate, use braces. +bar_map.insert({my_key, my_value}); +``` If you use a braced initializer list when initializing a variable, use an equals before the open curly brace: -.. code-block:: c++ - - int data[] = {0, 1, 2, 3}; +```c++ +int data[] = {0, 1, 2, 3}; +``` -Use ``auto`` Type Deduction to Make Code More Readable -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use `auto` Type Deduction to Make Code More Readable -Some are advocating a policy of "almost always ``auto``" in C++11; however, LLVM -uses a more moderate stance. Use ``auto`` if and only if it makes the code more -readable or easier to maintain. Don't "almost always" use ``auto``, but do use -``auto`` with initializers like ``cast(...)`` or other places where the -type is already obvious from the context. Another time when ``auto`` works well +Some are advocating a policy of "almost always `auto`" in C++11; however, LLVM +uses a more moderate stance. Use `auto` if and only if it makes the code more +readable or easier to maintain. Don't "almost always" use `auto`, but do use +`auto` with initializers like `cast(...)` or other places where the +type is already obvious from the context. Another time when `auto` works well for these purposes is when the type would have been abstracted away anyway, -often behind a container's typedef such as ``std::vector::iterator``. +often behind a container's typedef such as `std::vector::iterator`. Similarly, C++14 adds generic lambda expressions where parameter types can be -``auto``. Use these where you would have used a template. +`auto`. Use these where you would have used a template. -Beware unnecessary copies with ``auto`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Beware unnecessary copies with `auto` -The convenience of ``auto`` makes it easy to forget that its default behavior -is a copy. Particularly in range-based ``for`` loops, careless copies are +The convenience of `auto` makes it easy to forget that its default behavior +is a copy. Particularly in range-based `for` loops, careless copies are expensive. -Use ``auto &`` for values and ``auto *`` for pointers unless you need to make a +Use `auto &` for values and `auto *` for pointers unless you need to make a copy. -.. code-block:: c++ +```c++ +// Typically there's no reason to copy. +for (const auto &Val : Container) observe(Val); +for (auto &Val : Container) Val.change(); - // Typically there's no reason to copy. - for (const auto &Val : Container) observe(Val); - for (auto &Val : Container) Val.change(); +// Remove the reference if you really want a new copy. +for (auto Val : Container) { Val.change(); saveSomewhere(Val); } - // Remove the reference if you really want a new copy. - for (auto Val : Container) { Val.change(); saveSomewhere(Val); } +// Copy pointers, but make it clear that they're pointers. +for (const auto *Ptr : Container) observe(*Ptr); +for (auto *Ptr : Container) Ptr->change(); +``` - // Copy pointers, but make it clear that they're pointers. - for (const auto *Ptr : Container) observe(*Ptr); - for (auto *Ptr : Container) Ptr->change(); - -Beware of non-determinism due to ordering of pointers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Beware of non-determinism due to ordering of pointers In general, there is no relative ordering among pointers. As a result, when unordered containers like sets and maps are used with pointer keys @@ -767,31 +742,27 @@ debug the compiler. In case an ordered result is expected, remember to sort an unordered container before iteration. Or use ordered containers -like ``vector``/``MapVector``/``SetVector`` if you want to iterate pointer +like `vector`/`MapVector`/`SetVector` if you want to iterate pointer keys. -Beware of non-deterministic sorting order of equal elements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Beware of non-deterministic sorting order of equal elements -``std::sort`` uses a non-stable sorting algorithm in which the order of equal -elements is not guaranteed to be preserved. Thus using ``std::sort`` for a +`std::sort` uses a non-stable sorting algorithm in which the order of equal +elements is not guaranteed to be preserved. Thus using `std::sort` for a container having equal elements may result in non-deterministic behavior. To uncover such instances of non-determinism, LLVM has introduced a new -``llvm::sort`` wrapper function. For an ``EXPENSIVE_CHECKS`` build this will randomly -shuffle the container before sorting. Default to using ``llvm::sort`` instead -of ``std::sort``. +`llvm::sort` wrapper function. For an `EXPENSIVE_CHECKS` build this will randomly +shuffle the container before sorting. Default to using `llvm::sort` instead +of `std::sort`. -Style Issues -============ +## Style Issues -The High-Level Issues ---------------------- +### The High-Level Issues -Self-contained Headers -^^^^^^^^^^^^^^^^^^^^^^ +#### Self-contained Headers -Header files should be self-contained (compile on their own) and end in ``.h``. -Non-header files that are meant for inclusion should end in ``.inc`` and be +Header files should be self-contained (compile on their own) and end in `.h`. +Non-header files that are meant for inclusion should end in `.inc` and be used sparingly. All header files should be self-contained. Users and refactoring tools should @@ -804,17 +775,16 @@ locations, such as the middle of another file. They might not use header guards, and might not include their prerequisites. Name such files with the .inc extension. Use sparingly, and prefer self-contained headers when possible. -In general, a header should be implemented by one or more ``.cpp`` files. Each -of these ``.cpp`` files should include the header that defines their interface +In general, a header should be implemented by one or more `.cpp` files. Each +of these `.cpp` files should include the header that defines their interface first. This ensures that all of the dependencies of the header have been properly added to the header itself, and are not implicit. System headers should be included after user headers for a translation unit. -Library Layering -^^^^^^^^^^^^^^^^ +#### Library Layering -A directory of header files (for example, ``include/llvm/Foo``) defines a -library (``Foo``). One library (both +A directory of header files (for example, `include/llvm/Foo`) defines a +library (`Foo`). One library (both its headers and implementation) should only use things from the libraries listed in its dependencies. @@ -829,27 +799,26 @@ doesn't enforce header file circular dependencies created by inline functions. A good way to answer the "is this layered correctly" would be to consider whether a Unix linker would succeed at linking the program if all inline functions were defined out-of-line. (& for all valid orderings of dependencies -- since linking resolution is linear, it's possible that some implicit +— since linking resolution is linear, it's possible that some implicit dependencies can sneak through: A depends on B and C, so valid orderings are "C B A" or "B C A", in both cases the explicit dependencies come before their use. But in the first case, B could still link successfully if it implicitly depended on C, or the opposite in the second case) -.. _minimal list of #includes: +(minimal list of #includes)= -``#include`` as Little as Possible -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### `#include` as Little as Possible -``#include`` hurts compile time performance. Don't do it unless you have to, +`#include` hurts compile time performance. Don't do it unless you have to, especially in header files. But wait! Sometimes you need to have the definition of a class to use it, or to -inherit from it. In these cases go ahead and ``#include`` that header file. Be +inherit from it. In these cases go ahead and `#include` that header file. Be aware, however, that there are many cases where you don't need to have the full definition of a class. If you are using a pointer or reference to a class, you don't need the header file. If you are simply returning a class instance from a prototyped function or method, you don't need it. In fact, for most cases, you -simply don't need the definition of a class. And not ``#include``\ing speeds up +simply don't need the definition of a class. And not `#include`ing speeds up compilation. It is easy to try to go overboard on this recommendation, however. You @@ -860,11 +829,10 @@ header, make sure to include your module header **first** in the implementation file (as mentioned above). This way there won't be any hidden dependencies that you'll find out about later. -Keep "Internal" Headers Private -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Keep "Internal" Headers Private Many modules have a complex implementation that causes them to use more than one -implementation (``.cpp``) file. It is often tempting to put the internal +implementation (`.cpp`) file. It is often tempting to put the internal communication interface (helper classes, extra functions, etc) in the public module header file. Don't do this! @@ -872,58 +840,57 @@ If you really need to do something like this, put a private header file in the same directory as the source files, and include it locally. This ensures that your private interface remains private and undisturbed by outsiders. -.. note:: - - It's okay to put extra implementation methods in a public class itself. Just - make them private (or protected) and all is well. +```{note} +It's okay to put extra implementation methods in a public class itself. Just +make them private (or protected) and all is well. +``` -Use Namespace Qualifiers to Define Previously Declared Symbols -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use Namespace Qualifiers to Define Previously Declared Symbols When providing an out-of-line definition for various symbols (variables, functions, opaque classes) in a source file, do not open namespace blocks in the source file. Instead, use namespace qualifiers to help ensure that your definition matches an existing declaration. Do this: -.. code-block:: c++ - - // Foo.h - namespace llvm { - extern int FooVal; - int foo(const char *s); +```c++ +// Foo.h +namespace llvm { +extern int FooVal; +int foo(const char *s); - namespace detail { - class FooImpl; - } // namespace detail - } // namespace llvm +namespace detail { +class FooImpl; +} // namespace detail +} // namespace llvm - // Foo.cpp - #include "Foo.h" - using namespace llvm; +// Foo.cpp +#include "Foo.h" +using namespace llvm; - int llvm::FooVal; +int llvm::FooVal; - int llvm::foo(const char *s) { - // ... - } +int llvm::foo(const char *s) { + // ... +} - class detail::FooImpl { - // ... - } +class detail::FooImpl { + // ... +} +``` Doing this helps to avoid bugs where the definition does not match the declaration from the header. For example, the following C++ code defines a new -overload of ``llvm::foo`` instead of providing a definition for the existing +overload of `llvm::foo` instead of providing a definition for the existing function declared in the header: -.. code-block:: c++ - - // Foo.cpp - #include "Foo.h" - namespace llvm { - int foo(char *s) { // Mismatch between "const char *" and "char *" - } - } // namespace llvm +```c++ +// Foo.cpp +#include "Foo.h" +namespace llvm { +int foo(char *s) { // Mismatch between "const char *" and "char *" +} +} // namespace llvm +``` This error will not be caught until the build is nearly complete, when the linker fails to find a definition for any uses of the original function. If the @@ -933,34 +900,33 @@ been caught immediately when the definition was compiled. Class method implementations must already name the class and new overloads cannot be introduced out of line, so this recommendation does not apply to them. -.. _early exits: +(early exits)= -Use Early Exits and ``continue`` to Simplify Code -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use Early Exits and `continue` to Simplify Code When reading code, keep in mind how much state and how many previous decisions have to be remembered by the reader to understand a block of code. Aim to reduce indentation where possible when it doesn't make it more difficult to understand the code. One great way to do this is by making use of early exits -and the ``continue`` keyword in long loops. Consider this code that does not +and the `continue` keyword in long loops. Consider this code that does not use an early exit: -.. code-block:: c++ - - Value *doSomething(Instruction *I) { - if (!I->isTerminator() && - I->hasOneUse() && doOtherThing(I)) { - ... some long code .... - } - - return 0; +```c++ +Value *doSomething(Instruction *I) { + if (!I->isTerminator() && + I->hasOneUse() && doOtherThing(I)) { + ... some long code .... } -This code has several problems if the body of the ``'if'`` is large. When + return 0; +} +``` + +This code has several problems if the body of the `'if'` is large. When you're looking at the top of the function, it isn't immediately clear that this *only* does interesting things with non-terminator instructions, and only applies to things with the other predicates. Second, it is relatively difficult -to describe (in comments) why these predicates are important because the ``if`` +to describe (in comments) why these predicates are important because the `if` statement makes it difficult to lay out the comments. Third, when you're deep within the body of the code, it is indented an extra level. Finally, when reading the top of the function, it isn't clear what the result is if the @@ -969,191 +935,189 @@ it returns null. It is much preferred to format the code like this: -.. code-block:: c++ - - Value *doSomething(Instruction *I) { - // Terminators never need 'something' done to them because ... - if (I->isTerminator()) - return 0; +```c++ +Value *doSomething(Instruction *I) { + // Terminators never need 'something' done to them because ... + if (I->isTerminator()) + return 0; - // We conservatively avoid transforming instructions with multiple uses - // because goats like cheese. - if (!I->hasOneUse()) - return 0; + // We conservatively avoid transforming instructions with multiple uses + // because goats like cheese. + if (!I->hasOneUse()) + return 0; - // This is really just here for example. - if (!doOtherThing(I)) - return 0; + // This is really just here for example. + if (!doOtherThing(I)) + return 0; - ... some long code .... - } + ... some long code .... +} +``` -This fixes these problems. A similar problem frequently happens in ``for`` +This fixes these problems. A similar problem frequently happens in `for` loops. A silly example is something like this: -.. code-block:: c++ - - for (Instruction &I : BB) { - if (auto *BO = dyn_cast(&I)) { - Value *LHS = BO->getOperand(0); - Value *RHS = BO->getOperand(1); - if (LHS != RHS) { - ... - } +```c++ +for (Instruction &I : BB) { + if (auto *BO = dyn_cast(&I)) { + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS != RHS) { + ... } } +} +``` When you have very, very small loops, this sort of structure is fine. But if it exceeds more than 10-15 lines, it becomes difficult for people to read and understand at a glance. The problem with this sort of code is that it gets very nested very quickly. This means that the reader of the code has to keep a lot of context in their brain to remember what is going immediately on in the loop, -because they don't know if/when the ``if`` conditions will have ``else``\s etc. +because they don't know if/when the `if` conditions will have `else`s etc. It is strongly preferred to structure the loop like this: -.. code-block:: c++ +```c++ +for (Instruction &I : BB) { + auto *BO = dyn_cast(&I); + if (!BO) continue; - for (Instruction &I : BB) { - auto *BO = dyn_cast(&I); - if (!BO) continue; - - Value *LHS = BO->getOperand(0); - Value *RHS = BO->getOperand(1); - if (LHS == RHS) continue; + Value *LHS = BO->getOperand(0); + Value *RHS = BO->getOperand(1); + if (LHS == RHS) continue; - ... - } + ... +} +``` This has all the benefits of using early exits for functions: it reduces the nesting of the loop, it makes it easier to describe why the conditions are true, and it -makes it obvious to the reader that there is no ``else`` coming up that they +makes it obvious to the reader that there is no `else` coming up that they have to push context into their brain for. If a loop is large, this can be a big understandability win. -Don't use ``else`` after a ``return`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Don't use `else` after a `return` For similar reasons as above (reduction of indentation and easier reading), please -do not use ``'else'`` or ``'else if'`` after something that interrupts control -flow --- like ``return``, ``break``, ``continue``, ``goto``, etc. For example: - -.. code-block:: c++ - - case 'J': { - if (Signed) { - Type = Context.getsigjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_sigjmp_buf; - return QualType(); - } else { - break; // Unnecessary. - } +do not use `'else'` or `'else if'` after something that interrupts control +flow --- like `return`, `break`, `continue`, `goto`, etc. For example: + +```c++ +case 'J': { + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); + } else { + break; // Unnecessary. + } + } else { + Type = Context.getjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_jmp_buf; + return QualType(); } else { - Type = Context.getjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_jmp_buf; - return QualType(); - } else { - break; // Unnecessary. - } + break; // Unnecessary. } } +} +``` It is better to write it like this: -.. code-block:: c++ - - case 'J': - if (Signed) { - Type = Context.getsigjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_sigjmp_buf; - return QualType(); - } - } else { - Type = Context.getjmp_bufType(); - if (Type.isNull()) { - Error = ASTContext::GE_Missing_jmp_buf; - return QualType(); - } +```c++ +case 'J': + if (Signed) { + Type = Context.getsigjmp_bufType(); + if (Type.isNull()) { + Error = ASTContext::GE_Missing_sigjmp_buf; + return QualType(); } - break; - -Or better yet (in this case) as: - -.. code-block:: c++ - - case 'J': - if (Signed) - Type = Context.getsigjmp_bufType(); - else - Type = Context.getjmp_bufType(); - + } else { + Type = Context.getjmp_bufType(); if (Type.isNull()) { - Error = Signed ? ASTContext::GE_Missing_sigjmp_buf : - ASTContext::GE_Missing_jmp_buf; + Error = ASTContext::GE_Missing_jmp_buf; return QualType(); } - break; + } + break; +``` + +Or better yet (in this case) as: + +```c++ +case 'J': + if (Signed) + Type = Context.getsigjmp_bufType(); + else + Type = Context.getjmp_bufType(); + + if (Type.isNull()) { + Error = Signed ? ASTContext::GE_Missing_sigjmp_buf : + ASTContext::GE_Missing_jmp_buf; + return QualType(); + } + break; +``` The idea is to reduce indentation and the amount of code you have to keep track of when reading the code. -Note: this advice does not apply to a ``constexpr if`` statement. The -substatement of the ``else`` clause may be a discarded statement, so removing -the ``else`` can cause unexpected template instantiations. Thus, the following +Note: this advice does not apply to a `constexpr if` statement. The +substatement of the `else` clause may be a discarded statement, so removing +the `else` can cause unexpected template instantiations. Thus, the following example is correct: -.. code-block:: c++ +```c++ +template +static constexpr bool VarTempl = true; - template - static constexpr bool VarTempl = true; +template +int func() { + if constexpr (VarTempl) + return 1; + else + static_assert(!VarTempl); +} +``` - template - int func() { - if constexpr (VarTempl) - return 1; - else - static_assert(!VarTempl); - } - -Turn Predicate Loops into Predicate Functions -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Turn Predicate Loops into Predicate Functions It is very common to write small loops that just compute a boolean value. There are a number of ways that people commonly write these, but an example of this sort of thing is: -.. code-block:: c++ - - bool FoundFoo = false; - for (unsigned I = 0, E = BarList.size(); I != E; ++I) - if (BarList[I]->isFoo()) { - FoundFoo = true; - break; - } - - if (FoundFoo) { - ... +```c++ +bool FoundFoo = false; +for (unsigned I = 0, E = BarList.size(); I != E; ++I) + if (BarList[I]->isFoo()) { + FoundFoo = true; + break; } +if (FoundFoo) { + ... +} +``` + Instead of this sort of loop, we prefer to use a predicate function (which may -be `static`_) that uses `early exits`_: +be {ref}`static `) that uses {ref}`early exits `: -.. code-block:: c++ +```c++ +/// \returns true if the specified list has an element that is a foo. +static bool containsFoo(const std::vector &List) { + for (unsigned I = 0, E = List.size(); I != E; ++I) + if (List[I]->isFoo()) + return true; + return false; +} +... - /// \returns true if the specified list has an element that is a foo. - static bool containsFoo(const std::vector &List) { - for (unsigned I = 0, E = List.size(); I != E; ++I) - if (List[I]->isFoo()) - return true; - return false; - } +if (containsFoo(BarList)) { ... - - if (containsFoo(BarList)) { - ... - } +} +``` There are many reasons for doing this: it reduces indentation and factors out code which can often be shared by other code that checks for the same predicate. @@ -1165,11 +1129,9 @@ being faced with the in-line details of how we check to see if the BarList contains a foo, we can trust the function name and continue reading with better locality. -The Low-Level Issues --------------------- +### The Low-Level Issues -Name Types, Functions, Variables, and Enumerators Properly -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Name Types, Functions, Variables, and Enumerators Properly Poorly-chosen names can mislead the reader and cause bugs. We cannot stress enough how important it is to use *descriptive* names. Pick names that match @@ -1178,74 +1140,73 @@ abbreviations unless they are well known. After picking a good name, make sure to use consistent capitalization for the name, as inconsistency requires clients to either memorize the APIs or to look it up to find the exact spelling. -In general, names should be in camel case (e.g. ``TextFileReader`` and -``isLValue()``). Different kinds of declarations have different rules: +In general, names should be in camel case (e.g. `TextFileReader` and +`isLValue()`). Different kinds of declarations have different rules: * **Type names** (including classes, structs, enums, typedefs, etc) should be - nouns and start with an upper-case letter (e.g. ``TextFileReader``). + nouns and start with an upper-case letter (e.g. `TextFileReader`). * **Variable names** should be nouns (as they represent state). The name should - be camel case, and start with an upper-case letter (e.g. ``Leader`` or - ``Boats``). + be camel case, and start with an upper-case letter (e.g. `Leader` or + `Boats`). * **Function names** should be verb phrases (as they represent actions), and command-like function should be imperative. The name should be camel case, - and start with a lowercase letter (e.g. ``openFile()`` or ``isFoo()``). + and start with a lowercase letter (e.g. `openFile()` or `isFoo()`). -* **Enum declarations** (e.g. ``enum Foo {...}``) are types, so they should +* **Enum declarations** (e.g. `enum Foo {...}`) are types, so they should follow the naming conventions for types. A common use for enums is as a discriminator for a union, or an indicator of a subclass. When an enum is - used for something like this, it should have a ``Kind`` suffix - (e.g. ``ValueKind``). + used for something like this, it should have a `Kind` suffix + (e.g. `ValueKind`). -* **Enumerators** (e.g. ``enum { Foo, Bar }``) and **public member variables** +* **Enumerators** (e.g. `enum { Foo, Bar }`) and **public member variables** should start with an upper-case letter, just like types. Unless the enumerators are defined in their own small namespace or inside a class, enumerators should have a prefix corresponding to the enum declaration name. - For example, ``enum ValueKind { ... };`` may contain enumerators like - ``VK_Argument``, ``VK_BasicBlock``, etc. Enumerators that are just + For example, `enum ValueKind { ... };` may contain enumerators like + `VK_Argument`, `VK_BasicBlock`, etc. Enumerators that are just convenience constants are exempt from the requirement for a prefix. For instance: - .. code-block:: c++ - - enum { - MaxSize = 42, - Density = 12 - }; + ```c++ + enum { + MaxSize = 42, + Density = 12 + }; + ``` As an exception, classes that mimic STL classes can have member names in STL's -style of lowercase words separated by underscores (e.g. ``begin()``, -``push_back()``, and ``empty()``). Classes that provide multiple -iterators should add a singular prefix to ``begin()`` and ``end()`` -(e.g. ``global_begin()`` and ``use_begin()``). +style of lowercase words separated by underscores (e.g. `begin()`, +`push_back()`, and `empty()`). Classes that provide multiple +iterators should add a singular prefix to `begin()` and `end()` +(e.g. `global_begin()` and `use_begin()`). Here are some examples: -.. code-block:: c++ - - class VehicleMaker { - ... - Factory F; // Avoid: a non-descriptive abbreviation. - Factory Factory; // Better: more descriptive. - Factory TireFactory; // Even better: if VehicleMaker has more than one - // kind of factories. - }; - - Vehicle makeVehicle(VehicleType Type) { - VehicleMaker M; // Might be OK if scope is small. - Tire Tmp1 = M.makeTire(); // Avoid: 'Tmp1' provides no information. - Light Headlight = M.makeLight("head"); // Good: descriptive. - ... - } +```c++ +class VehicleMaker { + ... + Factory F; // Avoid: a non-descriptive abbreviation. + Factory Factory; // Better: more descriptive. + Factory TireFactory; // Even better: if VehicleMaker has more than one + // kind of factories. +}; + +Vehicle makeVehicle(VehicleType Type) { + VehicleMaker M; // Might be OK if scope is small. + Tire Tmp1 = M.makeTire(); // Avoid: 'Tmp1' provides no information. + Light Headlight = M.makeLight("head"); // Good: descriptive. + ... +} +``` -Assert Liberally -^^^^^^^^^^^^^^^^ +#### Assert Liberally -Use the "``assert``" macro to its fullest. Check all of your preconditions and +Use the "`assert`" macro to its fullest. Check all of your preconditions and assumptions. You never know when a bug (not necessarily even yours) might be caught early by an assertion, which reduces debugging time dramatically. The -"````" header file is probably already included by the header files you +"``" header file is probably already included by the header files you are using, so it doesn't cost anything to use it. To further assist with debugging, make sure to put some kind of error message in @@ -1253,147 +1214,144 @@ the assertion statement, which is printed if the assertion is tripped. This helps the poor debugger make sense of why an assertion is being made and enforced, and hopefully what to do about it. Here is one complete example: -.. code-block:: c++ - - inline Value *getOperand(unsigned I) { - assert(I < Operands.size() && "getOperand() out of range!"); - return Operands[I]; - } +```c++ +inline Value *getOperand(unsigned I) { + assert(I < Operands.size() && "getOperand() out of range!"); + return Operands[I]; +} +``` Here are more examples: -.. code-block:: c++ - - assert(Ty->isPointerType() && "Can't allocate a non-pointer type!"); +```c++ +assert(Ty->isPointerType() && "Can't allocate a non-pointer type!"); - assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); +assert((Opcode == Shl || Opcode == Shr) && "ShiftInst Opcode invalid!"); - assert(idx < getNumSuccessors() && "Successor # out of range!"); +assert(idx < getNumSuccessors() && "Successor # out of range!"); - assert(V1.getType() == V2.getType() && "Constant types must be identical!"); +assert(V1.getType() == V2.getType() && "Constant types must be identical!"); - assert(isa(Succ->front()) && "Only works on PHId BBs!"); +assert(isa(Succ->front()) && "Only works on PHId BBs!"); +``` You get the idea. In the past, asserts were used to indicate a piece of code that should not be reached. These were typically of the form: -.. code-block:: c++ - - assert(0 && "Invalid radix for integer literal"); +```c++ +assert(0 && "Invalid radix for integer literal"); +``` This has a few issues, the main one being that some compilers might not understand the assertion, or warn about a missing return in builds where assertions are compiled out. -Today, we have something much better: ``llvm_unreachable``: +Today, we have something much better: `llvm_unreachable`: -.. code-block:: c++ - - llvm_unreachable("Invalid radix for integer literal"); +```c++ +llvm_unreachable("Invalid radix for integer literal"); +``` When assertions are enabled, this will print the message if it's ever reached and then exit the program. When assertions are disabled (i.e. in release -builds), ``llvm_unreachable`` becomes a hint to compilers to skip generating +builds), `llvm_unreachable` becomes a hint to compilers to skip generating code for this branch. If the compiler does not support this, it will fall back to the "abort" implementation. -Use ``llvm_unreachable`` to mark a specific point in code that should never be +Use `llvm_unreachable` to mark a specific point in code that should never be reached. This is especially desirable for addressing warnings about unreachable branches, etc., but can be used whenever reaching a particular code path is unconditionally a bug (not originating from user input; see below) of some kind. -Use of ``assert`` should always include a testable predicate (as opposed to -``assert(false)``). +Use of `assert` should always include a testable predicate (as opposed to +`assert(false)`). If the error condition can be triggered by user input then the -recoverable error mechanism described in :doc:`ProgrammersManual` should be -used instead. In cases where this is not practical, ``report_fatal_error`` may +recoverable error mechanism described in {doc}`ProgrammersManual` should be +used instead. In cases where this is not practical, `report_fatal_error` may be used. Another issue is that values used only by assertions will produce an "unused value" warning when assertions are disabled. For example, this code will warn: -.. code-block:: c++ - - unsigned Size = V.size(); - assert(Size > 42 && "Vector smaller than it should be"); +```c++ +unsigned Size = V.size(); +assert(Size > 42 && "Vector smaller than it should be"); - bool NewToSet = Myset.insert(Value); - assert(NewToSet && "The value shouldn't be in the set yet"); +bool NewToSet = Myset.insert(Value); +assert(NewToSet && "The value shouldn't be in the set yet"); +``` These are two interesting different cases. In the first case, the call to -``V.size()`` is only useful for the assert, and we don't want it executed when +`V.size()` is only useful for the assert, and we don't want it executed when assertions are disabled. Code like this should move the call into the assert itself. In the second case, the side effects of the call must happen whether the assert is enabled or not. In this case, the value should be defined using -the ``[[maybe_unused]]`` attribute to suppress the warning. To be specific, it is +the `[[maybe_unused]]` attribute to suppress the warning. To be specific, it is preferred to write the code like this: -.. code-block:: c++ - - assert(V.size() > 42 && "Vector smaller than it should be"); +```c++ +assert(V.size() > 42 && "Vector smaller than it should be"); - [[maybe_unused]] bool NewToSet = Myset.insert(Value); - assert(NewToSet && "The value shouldn't be in the set yet"); +[[maybe_unused]] bool NewToSet = Myset.insert(Value); +assert(NewToSet && "The value shouldn't be in the set yet"); +``` -In C code where ``[[maybe_unused]]`` is not supported, use ``void`` cast to +In C code where `[[maybe_unused]]` is not supported, use `void` cast to suppress an unused variable warning as follows: -.. code-block:: c +```c +LLVMValueRef Value = LLVMMetadataAsValue(Context, NodeMD); +assert(LLVMIsAValueAsMetadata(Value) != NULL); +(void)Value; +``` - LLVMValueRef Value = LLVMMetadataAsValue(Context, NodeMD); - assert(LLVMIsAValueAsMetadata(Value) != NULL); - (void)Value; - -Do Not Use ``using namespace std`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Do Not Use `using namespace std` In LLVM, we prefer to explicitly prefix all identifiers from the standard -namespace with an "``std::``" prefix, rather than rely on "``using namespace -std;``". +namespace with an "`std::`" prefix, rather than rely on "`using namespace +std;`". -In header files, adding a ``'using namespace XXX'`` directive pollutes the -namespace of any source file that ``#include``\s the header, creating +In header files, adding a `'using namespace XXX'` directive pollutes the +namespace of any source file that `#include`s the header, creating maintenance issues. -In implementation files (e.g. ``.cpp`` files), the rule is more of a stylistic +In implementation files (e.g. `.cpp` files), the rule is more of a stylistic rule, but is still important. Basically, using explicit namespace prefixes makes the code **clearer**, because it is immediately obvious what facilities are being used and where they are coming from. And **more portable**, because namespace clashes cannot occur between LLVM code and other namespaces. The portability rule is important because different standard library implementations expose different symbols (potentially ones they shouldn't), and future revisions -to the C++ standard will add more symbols to the ``std`` namespace. As such, we -never use ``'using namespace std;'`` in LLVM. +to the C++ standard will add more symbols to the `std` namespace. As such, we +never use `'using namespace std;'` in LLVM. -The exception to the general rule (i.e. it's not an exception for the ``std`` +The exception to the general rule (i.e. it's not an exception for the `std` namespace) is for implementation files. For example, all of the code in the LLVM project implements code that lives in the 'llvm' namespace. As such, it is -ok, and actually clearer, for the ``.cpp`` files to have a ``'using namespace -llvm;'`` directive at the top, after the ``#include``\s. This reduces +ok, and actually clearer, for the `.cpp` files to have a `'using namespace +llvm;'` directive at the top, after the `#include`s. This reduces indentation in the body of the file for source editors that indent based on braces, and keeps the conceptual context cleaner. The general form of this rule -is that any ``.cpp`` file that implements code in any namespace may use that +is that any `.cpp` file that implements code in any namespace may use that namespace (and its parents'), but should not use any others. -Provide a Virtual Method Anchor for Classes in Headers -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Provide a Virtual Method Anchor for Classes in Headers If a class is defined in a header file and has a vtable (either it has virtual methods or it derives from classes with virtual methods), it must always have at least one out-of-line virtual method in the class. Without this, the compiler -will copy the vtable and RTTI into every ``.o`` file that ``#include``\s the -header, bloating ``.o`` file sizes and increasing link times. +will copy the vtable and RTTI into every `.o` file that `#include`s the +header, bloating `.o` file sizes and increasing link times. -Don't use default labels in fully covered switches over enumerations -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Don't use default labels in fully covered switches over enumerations -``-Wswitch`` warns if a switch, without a default label, over an enumeration +`-Wswitch` warns if a switch, without a default label, over an enumeration does not cover every enumeration value. If you write a default label on a fully -covered switch over an enumeration then the ``-Wswitch`` warning won't fire +covered switch over an enumeration then the `-Wswitch` warning won't fire when new elements are added to that enumeration. To help avoid adding these -kinds of defaults, Clang has the warning ``-Wcovered-switch-default`` which is +kinds of defaults, Clang has the warning `-Wcovered-switch-default` which is off by default but turned on when building LLVM with a version of Clang that supports the warning. @@ -1401,53 +1359,51 @@ A knock-on effect of this stylistic requirement is that when building LLVM with GCC you may get warnings related to "control may reach end of non-void function" if you return from each case of a covered switch-over-enum because GCC assumes that the enum expression may take any representable value, not just those of -individual enumerators. To suppress this warning, use ``llvm_unreachable`` after +individual enumerators. To suppress this warning, use `llvm_unreachable` after the switch. -Use range-based ``for`` loops wherever possible -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use range-based `for` loops wherever possible -The introduction of range-based ``for`` loops in C++11 means that explicit -manipulation of iterators is rarely necessary. We use range-based ``for`` +The introduction of range-based `for` loops in C++11 means that explicit +manipulation of iterators is rarely necessary. We use range-based `for` loops wherever possible for all newly added code. For example: -.. code-block:: c++ - - BasicBlock *BB = ... - for (Instruction &I : *BB) - ... use I ... +```c++ +BasicBlock *BB = ... +for (Instruction &I : *BB) + ... use I ... +``` -Usage of ``std::for_each()``/``llvm::for_each()`` functions is discouraged, +Usage of `std::for_each()`/`llvm::for_each()` functions is discouraged, unless the callable object already exists. -Don't evaluate ``end()`` every time through a loop -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Don't evaluate `end()` every time through a loop -In cases where range-based ``for`` loops can't be used and it is necessary +In cases where range-based `for` loops can't be used and it is necessary to write an explicit iterator-based loop, pay close attention to whether -``end()`` is re-evaluated on each loop iteration. One common mistake is to +`end()` is re-evaluated on each loop iteration. One common mistake is to write a loop in this style: -.. code-block:: c++ +```c++ +BasicBlock *BB = ... +for (auto I = BB->begin(); I != BB->end(); ++I) + ... use I ... +``` - BasicBlock *BB = ... - for (auto I = BB->begin(); I != BB->end(); ++I) - ... use I ... - -The problem with this construct is that it evaluates "``BB->end()``" every time +The problem with this construct is that it evaluates "`BB->end()`" every time through the loop. Instead of writing the loop like this, we strongly prefer loops to be written so that they evaluate it once before the loop starts. A convenient way to do this is like so: -.. code-block:: c++ - - BasicBlock *BB = ... - for (auto I = BB->begin(), E = BB->end(); I != E; ++I) - ... use I ... +```c++ +BasicBlock *BB = ... +for (auto I = BB->begin(), E = BB->end(); I != E; ++I) + ... use I ... +``` The observant may quickly point out that these two loops may have different semantics: if the container (a basic block in this case) is being mutated, then -"``BB->end()``" may change its value every time through the loop and the second +"`BB->end()`" may change its value every time through the loop and the second loop may not in fact be correct. If you actually do depend on this behavior, please write the loop in the first form and add a comment indicating that you did it intentionally. @@ -1457,7 +1413,7 @@ form has two problems. First, it may be less efficient than evaluating it at the start of the loop. In this case, the cost is probably minor --- a few extra loads every time through the loop. However, if the base expression is more complex, then the cost can rise quickly. I've seen loops where the end -expression was actually something like: "``SomeMap[X]->end()``" and map lookups +expression was actually something like: "`SomeMap[X]->end()`" and map lookups really aren't cheap. By writing it in the second form consistently, you eliminate the issue entirely and don't even have to think about it. @@ -1471,112 +1427,105 @@ understand what it does. While the second form of the loop is a few extra keystrokes, we do strongly prefer it. -``#include `` is Forbidden -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### `#include ` is Forbidden -The use of ``#include `` in library files is hereby **forbidden**, -because many common implementations transparently inject a `static constructor`_ +The use of `#include ` in library files is hereby **forbidden**, +because many common implementations transparently inject a {ref}`static constructor ` into every translation unit that includes it. -Note that using the other stream headers (```` for example) is not -problematic in this regard --- just ````. However, ``raw_ostream`` +Note that using the other stream headers (`` for example) is not +problematic in this regard --- just ``. However, `raw_ostream` provides various APIs that are better performing for almost every use than -``std::ostream`` style APIs. - -.. note:: +`std::ostream` style APIs. - New code should always use `raw_ostream`_ for writing, or the - ``llvm::MemoryBuffer`` API for reading files. +```{note} +New code should always use {ref}`raw_ostream ` for writing, or the +`llvm::MemoryBuffer` API for reading files. +``` -.. _raw_ostream: +(raw_ostream)= -Use ``raw_ostream`` -^^^^^^^^^^^^^^^^^^^ +#### Use `raw_ostream` LLVM includes a lightweight, simple, and efficient stream implementation in -``llvm/Support/raw_ostream.h``, which provides all of the common features of -``std::ostream``. All new code should use ``raw_ostream`` instead of -``ostream``. +`llvm/Support/raw_ostream.h`, which provides all of the common features of +`std::ostream`. All new code should use `raw_ostream` instead of +`ostream`. -Unlike ``std::ostream``, ``raw_ostream`` is not a template and can be forward -declared as ``class raw_ostream``. Public headers should generally not include -the ``raw_ostream`` header, but use forward declarations and constant references -to ``raw_ostream`` instances. +Unlike `std::ostream`, `raw_ostream` is not a template and can be forward +declared as `class raw_ostream`. Public headers should generally not include +the `raw_ostream` header, but use forward declarations and constant references +to `raw_ostream` instances. -Avoid ``std::endl`` -^^^^^^^^^^^^^^^^^^^ +#### Avoid `std::endl` -The ``std::endl`` modifier, when used with ``iostreams`` outputs a newline to +The `std::endl` modifier, when used with `iostreams` outputs a newline to the output stream specified. In addition to doing this, however, it also flushes the output stream. In other words, these are equivalent: -.. code-block:: c++ - - std::cout << std::endl; - std::cout << '\n' << std::flush; +```c++ +std::cout << std::endl; +std::cout << '\n' << std::flush; +``` Most of the time, you probably have no reason to flush the output stream, so -it's better to use a literal ``'\n'``. +it's better to use a literal `'\n'`. -Don't use ``inline`` when defining a function in a class definition -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Don't use `inline` when defining a function in a class definition A member function defined in a class definition is implicitly inline, so don't -put the ``inline`` keyword in this case. +put the `inline` keyword in this case. Don't: -.. code-block:: c++ - - class Foo { - public: - inline void bar() { - // ... - } - }; +```c++ +class Foo { +public: + inline void bar() { + // ... + } +}; +``` Do: -.. code-block:: c++ - - class Foo { - public: - void bar() { - // ... - } - }; +```c++ +class Foo { +public: + void bar() { + // ... + } +}; +``` -Microscopic Details -------------------- +### Microscopic Details This section describes preferred low-level formatting guidelines along with reasoning on why we prefer them. -Spaces Before Parentheses -^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Spaces Before Parentheses Put a space before an open parenthesis only in control flow statements, but not in normal function call expressions and function-like macros. For example: -.. code-block:: c++ - - if (X) ... - for (I = 0; I != 100; ++I) ... - while (LLVMRocks) ... +```c++ +if (X) ... +for (I = 0; I != 100; ++I) ... +while (LLVMRocks) ... - somefunc(42); - assert(3 != 4 && "laws of math are failing me"); +somefunc(42); +assert(3 != 4 && "laws of math are failing me"); - A = foo(42, 92) + bar(X); +A = foo(42, 92) + bar(X); +``` The reason for doing this is not completely arbitrary. This style makes control flow operators stand out more, and makes expressions flow better. -Prefer Preincrement -^^^^^^^^^^^^^^^^^^^ +#### Prefer Preincrement -Hard fast rule: Preincrement (``++X``) may be no slower than postincrement -(``X++``) and could very well be a lot faster than it. Use preincrementation +Hard fast rule: Preincrement (`++X`) may be no slower than postincrement +(`X++`) and could very well be a lot faster than it. Use preincrementation whenever possible. The semantics of postincrement include making a copy of the value being @@ -1587,36 +1536,35 @@ copying an iterator could invoke the copy ctor's of these as well). In general, get in the habit of always using preincrement, and you won't have a problem. -Namespace Indentation -^^^^^^^^^^^^^^^^^^^^^ +#### Namespace Indentation In general, we strive to reduce indentation wherever possible. This is useful -because we want code to `fit into 80 columns`_ without excessive wrapping, but +because we want code to {ref}`fit into 80 columns ` without excessive wrapping, but also because it makes it easier to understand the code. To facilitate this and avoid some insanely deep nesting on occasion, don't indent namespaces. If it helps readability, feel free to add a comment indicating what namespace is -being closed by a ``}``. For example: +being closed by a `}`. For example: -.. code-block:: c++ +```c++ +namespace llvm { +namespace knowledge { - namespace llvm { - namespace knowledge { +/// This class represents things that Smith can have an intimate +/// understanding of and contains the data associated with it. +class Grokable { +... +public: + explicit Grokable() { ... } + virtual ~Grokable() = 0; - /// This class represents things that Smith can have an intimate - /// understanding of and contains the data associated with it. - class Grokable { ... - public: - explicit Grokable() { ... } - virtual ~Grokable() = 0; - ... +}; - }; - - } // namespace knowledge - } // namespace llvm +} // namespace knowledge +} // namespace llvm +``` Feel free to skip the closing comment when the namespace being closed is obvious for any reason. For example, the outer-most namespace in a header file @@ -1624,21 +1572,20 @@ is rarely a source of confusion. But namespaces both anonymous and named in source files that are being closed half way through the file probably could use clarification. -.. _static: +(static)= -Restrict Visibility -^^^^^^^^^^^^^^^^^^^ +#### Restrict Visibility Functions and variables should have the most restricted visibility possible. -For class members, that means using appropriate ``private``, ``protected``, or -``public`` keyword to restrict their access. +For class members, that means using appropriate `private`, `protected`, or +`public` keyword to restrict their access. For non-member functions, variables, and classes, that means restricting -visibility to a single ``.cpp`` file if it is not referenced outside that file. +visibility to a single `.cpp` file if it is not referenced outside that file. Visibility of file-scope non-member variables and functions can be restricted to -the current translation unit by using either the ``static`` keyword or an anonymous +the current translation unit by using either the `static` keyword or an anonymous namespace. Anonymous namespaces are a great language feature that tells the C++ @@ -1646,8 +1593,8 @@ compiler that the contents of the namespace are only visible within the current translation unit, allowing more aggressive optimization and eliminating the possibility of symbol name collisions. -Anonymous namespaces are to C++ as ``static`` is to C functions and global -variables. While ``static`` is available in C++, anonymous namespaces are more +Anonymous namespaces are to C++ as `static` is to C functions and global +variables. While `static` is available in C++, anonymous namespaces are more general: they can make entire classes private to a file. The problem with anonymous namespaces is that they naturally want to encourage @@ -1659,57 +1606,56 @@ chunk of the file. Because of this, we have a simple guideline: make anonymous namespaces as small as possible, and only use them for class declarations. For example: -.. code-block:: c++ - - namespace { - class StringSort { +```c++ +namespace { +class StringSort { +... +public: + StringSort(...) + bool operator<(const char *RHS) const; +}; +} // namespace + +static void runHelper() { ... - public: - StringSort(...) - bool operator<(const char *RHS) const; - }; - } // namespace - - static void runHelper() { - ... - } +} - bool StringSort::operator<(const char *RHS) const { - ... - } +bool StringSort::operator<(const char *RHS) const { + ... +} +``` Avoid putting declarations other than classes into anonymous namespaces: -.. code-block:: c++ - - namespace { +```c++ +namespace { - // ... many declarations ... +// ... many declarations ... - void runHelper() { - ... - } +void runHelper() { + ... +} - // ... many declarations ... +// ... many declarations ... - } // namespace +} // namespace +``` -When you are looking at ``runHelper`` in the middle of a large C++ file, +When you are looking at `runHelper` in the middle of a large C++ file, you have no immediate way to tell if this function is local to the file. In contrast, when the function is marked static, you don't need to cross-reference faraway places in the file to tell that the function is local: -.. code-block:: c++ - - static void runHelper() { - ... - } +```c++ +static void runHelper() { + ... +} +``` -Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements -When writing the body of an ``if``, ``else``, or ``for``/``while`` loop +When writing the body of an `if`, `else`, or `for`/`while` loop statement, we aim to reduce unnecessary line noise. **Omit braces when:** @@ -1717,7 +1663,7 @@ statement, we aim to reduce unnecessary line noise. * The body consists of a single **simple** statement. * The single statement is not preceded by a comment. (Hoist comments above the control statement if you can.) -* An ``else`` clause, if present, also meets the above criteria (single +* An `else` clause, if present, also meets the above criteria (single simple statement, no associated comments). **Use braces in all other cases, including:** @@ -1726,121 +1672,120 @@ statement, we aim to reduce unnecessary line noise. * Single-statement bodies with non-hoistable comments * Complex single-statement bodies (e.g., deep nesting, complex nested loops) -* Inconsistent bracing within ``if``/``else if``/``else`` chains (if one +* Inconsistent bracing within `if`/`else if`/`else` chains (if one block requires braces, all must) -* ``if`` statements ending with a nested ``if`` lacking an ``else`` (to +* `if` statements ending with a nested `if` lacking an `else` (to prevent "dangling else") The examples below provide guidelines for these cases: -.. code-block:: c++ - - // Omit the braces since the body is simple and clearly associated with the - // `if`. - if (isa(D)) - handleFunctionDecl(D); - else if (isa(D)) - handleVarDecl(D); - - // Here we document the condition itself and not the body. - if (isa(D)) { - // It is necessary that we explain the situation with this surprisingly long - // comment, so it would be unclear without the braces whether the following - // statement is in the scope of the `if`. - // Because the condition is documented, we can't really hoist this - // comment that applies to the body above the `if`. - handleOtherDecl(D); - } - - // Use braces on the outer `if` to avoid a potential dangling `else` - // situation. - if (isa(D)) { - if (shouldProcessAttr(A)) - handleAttr(A); - } - - // Use braces for the `if` block to keep it uniform with the `else` block. - if (isa(D)) { - handleFunctionDecl(D); - } else { - // In this `else` case, it is necessary that we explain the situation with - // this surprisingly long comment, so it would be unclear without the braces - // whether the following statement is in the scope of the `if`. - handleOtherDecl(D); - } - - // Use braces for the `else if` and `else` block to keep it uniform with the - // `if` block. - if (isa(D)) { - verifyFunctionDecl(D); - handleFunctionDecl(D); - } else if (isa(D)) { - handleGlobalVarDecl(D); - } else { - handleOtherDecl(D); - } - - // This should also omit braces. The `for` loop contains only a single - // statement, so it shouldn't have braces. The `if` also only contains a - // single simple statement (the `for` loop), so it also should omit braces. - if (isa(D)) - for (auto *A : D.attrs()) - handleAttr(A); - - // Use braces for a `do-while` loop and its enclosing statement. - if (Tok->is(tok::l_brace)) { - do { - Tok = Tok->Next; - } while (Tok); - } - - // Use braces for the outer `if` since the nested `for` is braced. - if (isa(D)) { - for (auto *A : D.attrs()) { - // In this `for` loop body, it is necessary that we explain the situation - // with this surprisingly long comment, forcing braces on the `for` block. - handleAttr(A); - } - } - - // Use braces on the outer block because there are more than two levels of - // nesting. - if (isa(D)) { - for (auto *A : D.attrs()) - for (ssize_t i : llvm::seq(count)) - handleAttrOnDecl(D, A, i); - } - - // Use braces on the outer block because of a nested `if`; otherwise, the - // compiler would warn: `add explicit braces to avoid dangling else` - if (auto *D = dyn_cast(D)) { - if (shouldProcess(D)) - handleVarDecl(D); - else - markAsIgnored(D); +```c++ +// Omit the braces since the body is simple and clearly associated with the +// `if`. +if (isa(D)) + handleFunctionDecl(D); +else if (isa(D)) + handleVarDecl(D); + +// Here we document the condition itself and not the body. +if (isa(D)) { + // It is necessary that we explain the situation with this surprisingly long + // comment, so it would be unclear without the braces whether the following + // statement is in the scope of the `if`. + // Because the condition is documented, we can't really hoist this + // comment that applies to the body above the `if`. + handleOtherDecl(D); +} + +// Use braces on the outer `if` to avoid a potential dangling `else` +// situation. +if (isa(D)) { + if (shouldProcessAttr(A)) + handleAttr(A); +} + +// Use braces for the `if` block to keep it uniform with the `else` block. +if (isa(D)) { + handleFunctionDecl(D); +} else { + // In this `else` case, it is necessary that we explain the situation with + // this surprisingly long comment, so it would be unclear without the braces + // whether the following statement is in the scope of the `if`. + handleOtherDecl(D); +} + +// Use braces for the `else if` and `else` block to keep it uniform with the +// `if` block. +if (isa(D)) { + verifyFunctionDecl(D); + handleFunctionDecl(D); +} else if (isa(D)) { + handleGlobalVarDecl(D); +} else { + handleOtherDecl(D); +} + +// This should also omit braces. The `for` loop contains only a single +// statement, so it shouldn't have braces. The `if` also only contains a +// single simple statement (the `for` loop), so it also should omit braces. +if (isa(D)) + for (auto *A : D.attrs()) + handleAttr(A); + +// Use braces for a `do-while` loop and its enclosing statement. +if (Tok->is(tok::l_brace)) { + do { + Tok = Tok->Next; + } while (Tok); +} + +// Use braces for the outer `if` since the nested `for` is braced. +if (isa(D)) { + for (auto *A : D.attrs()) { + // In this `for` loop body, it is necessary that we explain the situation + // with this surprisingly long comment, forcing braces on the `for` block. + handleAttr(A); } +} + +// Use braces on the outer block because there are more than two levels of +// nesting. +if (isa(D)) { + for (auto *A : D.attrs()) + for (ssize_t i : llvm::seq(count)) + handleAttrOnDecl(D, A, i); +} + +// Use braces on the outer block because of a nested `if`; otherwise, the +// compiler would warn: `add explicit braces to avoid dangling else` +if (auto *D = dyn_cast(D)) { + if (shouldProcess(D)) + handleVarDecl(D); + else + markAsIgnored(D); +} +``` -Use Unix line endings for files -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Use Unix line endings for files Use Unix line endings for all files. CRLF line endings are allowed as an exception for test files that intend to test CRLF handling or when the file -format requires it (like ``.bat`` or ``.rc`` files). +format requires it (like `.bat` or `.rc` files). -See Also -======== +## See Also A lot of these comments and recommendations have been culled from other sources. Two particularly important books for our work are: -#. `Effective C++ - `_ +1. [Effective C++] by Scott Meyers. Also interesting and useful are "More Effective C++" and "Effective STL" by the same author. -#. `Large-Scale C++ Software Design - `_ +1. [Large-Scale C++ Software Design] by John Lakos +[Effective C++]: https://www.amazon.com/Effective-Specific-Addison-Wesley-Professional-Computing/dp/0321334876 +[Large-Scale C++ Software Design]: https://www.amazon.com/Large-Scale-Software-Design-John-Lakos/dp/0201633620 + If you get some free time, and you haven't read them: do so, you might learn something. diff --git a/llvm/docs/CommandGuide/index.md b/llvm/docs/CommandGuide/index.md index 5421b133e96c3..6b126837343e2 100644 --- a/llvm/docs/CommandGuide/index.md +++ b/llvm/docs/CommandGuide/index.md @@ -1,105 +1,104 @@ -LLVM Command Guide ------------------- +# LLVM Command Guide The following documents are command descriptions for all of the LLVM tools. These pages describe how to use the LLVM commands and what their options are. Note that these pages do not describe all of the options available for all -tools. To get a complete listing, pass the ``--help`` (general options) or -``--help-hidden`` (general and debugging options) arguments to the tool you are +tools. To get a complete listing, pass the `--help` (general options) or +`--help-hidden` (general and debugging options) arguments to the tool you are interested in. -Basic Commands -~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - dsymutil - llc - lli - llubi - llvm-as - llvm-cgdata - llvm-config - llvm-cov - llvm-cxxmap - llvm-debuginfo-analyzer - llvm-diff - llvm-dis - llvm-dwarfdump - llvm-dwarfutil - llvm-extract-bundle-entry - llvm-ir2vec - llvm-lib - llvm-libtool-darwin - llvm-link - llvm-lipo - llvm-mc - llvm-mca - llvm-opt-report - llvm-otool - llvm-profdata - llvm-readobj - llvm-reduce - llvm-stress - llvm-symbolizer - opt - -GNU binutils replacements -~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - llvm-addr2line - llvm-ar - llvm-cxxfilt - llvm-install-name-tool - llvm-nm - llvm-objcopy - llvm-objdump - llvm-ranlib - llvm-readelf - llvm-size - llvm-strings - llvm-strip - -Debugging Tools -~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - llvm-extract - llvm-bcanalyzer - llvm-reduce - -Developer Tools -~~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - FileCheck - tblgen - clang-tblgen - lldb-tblgen - llvm-tblgen - mlir-tblgen - lit - llvm-exegesis - llvm-ifs - llvm-locstats - llvm-test-mustache-spec - llvm-pdbutil - llvm-profgen - llvm-tli-checker - llvm-offload-binary - -Remarks Tools -~~~~~~~~~~~~~~ - -.. toctree:: - :maxdepth: 1 - - llvm-remarkutil +## Basic Commands + +```{toctree} +:maxdepth: 1 + +dsymutil +llc +lli +llubi +llvm-as +llvm-cgdata +llvm-config +llvm-cov +llvm-cxxmap +llvm-debuginfo-analyzer +llvm-diff +llvm-dis +llvm-dwarfdump +llvm-dwarfutil +llvm-extract-bundle-entry +llvm-ir2vec +llvm-lib +llvm-libtool-darwin +llvm-link +llvm-lipo +llvm-mc +llvm-mca +llvm-opt-report +llvm-otool +llvm-profdata +llvm-readobj +llvm-reduce +llvm-stress +llvm-symbolizer +opt +``` + +## GNU binutils replacements + +```{toctree} +:maxdepth: 1 + +llvm-addr2line +llvm-ar +llvm-cxxfilt +llvm-install-name-tool +llvm-nm +llvm-objcopy +llvm-objdump +llvm-ranlib +llvm-readelf +llvm-size +llvm-strings +llvm-strip +``` + +## Debugging Tools + +```{toctree} +:maxdepth: 1 + +llvm-extract +llvm-bcanalyzer +llvm-reduce +``` + +## Developer Tools + +```{toctree} +:maxdepth: 1 + +FileCheck +tblgen +clang-tblgen +lldb-tblgen +llvm-tblgen +mlir-tblgen +lit +llvm-exegesis +llvm-ifs +llvm-locstats +llvm-test-mustache-spec +llvm-pdbutil +llvm-profgen +llvm-tli-checker +llvm-offload-binary +``` + +## Remarks Tools + +```{toctree} +:maxdepth: 1 + +llvm-remarkutil +``` diff --git a/llvm/docs/Contributing.md b/llvm/docs/Contributing.md index b6475e2f6c3a7..3dba6cdc22fed 100644 --- a/llvm/docs/Contributing.md +++ b/llvm/docs/Contributing.md @@ -1,124 +1,119 @@ -================================== -Contributing to LLVM -================================== +# Contributing to LLVM Thank you for your interest in contributing to LLVM! There are multiple ways to contribute, and we appreciate all contributions. If you have questions, -you can either use the `Forum`_ or, for a more interactive chat, go to our -`Discord server`_. +you can either use the [Forum] or, for a more interactive chat, go to our +[Discord server]. -If you want to contribute code, please familiarize yourself with the :doc:`DeveloperPolicy`. +If you want to contribute code, please familiarize yourself with the {doc}`DeveloperPolicy`. -.. contents:: - :local: +```{contents} +:local: +``` -Ways to Contribute -================== +## Ways to Contribute -Bug Reports ------------ +### Bug Reports If you are working with LLVM and run into a bug, we definitely want to know about it. Please follow the instructions in -:doc:`HowToSubmitABug` to create a bug report. +{doc}`HowToSubmitABug` to create a bug report. -Bug Fixes ---------- +### Bug Fixes If you are interested in contributing code to LLVM, bugs labeled with the -`good first issue`_ keyword in the `bug tracker`_ are a good way to get familiar with +[good first issue] keyword in the [bug tracker] are a good way to get familiar with the code base. If you are interested in fixing a bug, please comment on it to let people know you are working on it. Then try to reproduce and fix the bug with upstream LLVM. Start by building -LLVM from source as described in :doc:`GettingStarted` and +LLVM from source as described in {doc}`GettingStarted` and use the built binaries to reproduce the failure described in the bug. Use a debug build (`-DCMAKE_BUILD_TYPE=Debug`) or a build with assertions (`-DLLVM_ENABLE_ASSERTIONS=On`, enabled for Debug builds). -Reporting a Security Issue --------------------------- +### Reporting a Security Issue -There is a separate process to submit security-related bugs, see :ref:`report-security-issue`. +There is a separate process to submit security-related bugs, see {ref}`report-security-issue`. -Bigger Pieces of Work ---------------------- +### Bigger Pieces of Work If you are interested in taking on a bigger piece of work, a list of -interesting projects is maintained at the `LLVM's Open Projects page`_. If +interesting projects is maintained at the [LLVM's Open Projects page]. If you are interested in working on any of these projects, please post on the -`Forum`_, so that we know the project is being worked on. +[Forum], so that we know the project is being worked on. -.. _submit_patch: +(submit_patch)= -How to Submit a Patch -===================== +## How to Submit a Patch Once you have a patch ready, it is time to submit it. The patch should: * include a small unit test -* conform to the :doc:`CodingStandards`. You can use the `clang-format-diff.py`_ or `git-clang-format`_ tools to automatically format your patch properly. +* conform to the {doc}`CodingStandards`. You can use the [clang-format-diff.py] or [git-clang-format] tools to automatically format your patch properly. * not contain any unrelated changes * be an isolated change. Independent changes should be submitted as separate patches as this makes reviewing easier. -* have a single commit, up-to-date with the upstream ``origin/main`` branch, and don't have merges. +* have a single commit, up-to-date with the upstream `origin/main` branch, and don't have merges. -.. _format patches: +(format patches)= Before sending a patch for review, please also ensure it is -formatted properly. We use ``clang-format`` for this, which has git integration -through the ``git-clang-format`` script. On some systems, it may already be +formatted properly. We use `clang-format` for this, which has git integration +through the `git-clang-format` script. On some systems, it may already be installed (or be installable via your package manager). If so, you can simply run it -- the following command will format only the code changed in the most recent commit: -.. code-block:: console +```console +% git clang-format HEAD~1 +``` - % git clang-format HEAD~1 +```{note} +For some patches, formatting them may add changes that obscure the intent of +the patch. For example, adding to an enum that was not previously formatted +may result in the entire enum being reformatted. This happens because not all +of the LLVM Project conforms to LLVM's clang-format style at this time. -.. note:: - For some patches, formatting them may add changes that obscure the intent of - the patch. For example, adding to an enum that was not previously formatted - may result in the entire enum being reformatted. This happens because not all - of the LLVM Project conforms to LLVM's clang-format style at this time. +If you think that this might be the case for your changes, or are unsure, we +recommend that you add the formatting changes as a **separate commit** within +the Pull Request. - If you think that this might be the case for your changes, or are unsure, we - recommend that you add the formatting changes as a **separate commit** within - the Pull Request. +Reviewers may request that this formatting commit be made into a separate Pull +Request that will be merged before your actual changes. - Reviewers may request that this formatting commit be made into a separate Pull - Request that will be merged before your actual changes. +This means that if the formatting changes are the first commit, you will have +an easier time doing this. If they are not, that is ok too, but you will have +to do a bit more work to separate it out. +``` - This means that if the formatting changes are the first commit, you will have - an easier time doing this. If they are not, that is ok too, but you will have - to do a bit more work to separate it out. - -Note that ``git clang-format`` modifies the files, but does not commit them -- +Note that `git clang-format` modifies the files, but does not commit them -- you will likely want to run one of the following to add the changes to a commit: -.. code-block:: console - - # To create a new commit. - % git commit -a - # To add to the most recent commit. - % git commit --amend -a +```console +# To create a new commit. +% git commit -a +# To add to the most recent commit. +% git commit --amend -a +``` -.. note:: - If you don't already have ``clang-format`` or ``git clang-format`` installed - on your system, the ``clang-format`` binary will be built alongside clang, and - the git integration can be run from - ``clang/tools/clang-format/git-clang-format``. +```{note} +If you don't already have `clang-format` or `git clang-format` installed +on your system, the `clang-format` binary will be built alongside clang, and +the git integration can be run from +`clang/tools/clang-format/git-clang-format`. +``` The LLVM project has migrated to GitHub Pull Requests as its review process. For more information about the workflow of using GitHub Pull Requests see our -:ref:`GitHub ` documentation. We still have a read-only -`LLVM's Phabricator `_ instance. +{ref}`GitHub ` documentation. We still have a read-only +[LLVM's Phabricator](https://reviews.llvm.org) instance. To make sure the right people see your patch, please select suitable reviewers and add them to your patch when requesting a review. Suitable reviewers are the maintainers of the project you are modifying, and anyone else working in the area your patch touches. To find maintainers, look for -the ``Maintainers.md`` file in the root of the project's -sub-directory. For example, LLVM's is ``llvm/Maintainers.md`` and -clang-tools-extra's is ``clang-tools-extra/Maintainers.md``. +the `Maintainers.md` file in the root of the project's +sub-directory. For example, LLVM's is `llvm/Maintainers.md` and +clang-tools-extra's is `clang-tools-extra/Maintainers.md`. If you are a new contributor, you will not be able to select reviewers in such a way, in which case you can still get the attention of potential reviewers by CC'ing @@ -135,32 +130,32 @@ explicitly, as reviewers' default assumption is that you are able to merge your own PR. For more information on LLVM's code-review process, please see -:doc:`CodeReview`. +{doc}`CodeReview`. -.. _commit_from_git: +(commit_from_git)= -For developers to commit changes from Git ------------------------------------------ +### For developers to commit changes from Git -.. note:: - See also :ref:`GitHub ` for more details on merging your changes - into LLVM project monorepo. +```{note} +See also {ref}`GitHub ` for more details on merging your changes +into LLVM project monorepo. +``` Once a pull request is approved, you can select the "Squash and merge" button in the GitHub web interface. -When pushing directly from the command-line to the ``main`` branch, you will need +When pushing directly from the command-line to the `main` branch, you will need to rebase your change. LLVM has a linear-history policy, which means -that merge commits are not allowed, and the ``main`` branch is configured to reject +that merge commits are not allowed, and the `main` branch is configured to reject pushes that include merges. GitHub will display a message that looks like this: -.. code-block:: console - - remote: Bypassed rule violations for refs/heads/main: - remote: - remote: - Required status check “buildkite/github-pull-requests” is expected. +```console +remote: Bypassed rule violations for refs/heads/main: +remote: +remote: - Required status check “buildkite/github-pull-requests” is expected. +``` This can seem scary, but this is just an artifact of the GitHub setup: it is intended as a warning for people merging pull-requests with failing CI. We can't @@ -168,56 +163,52 @@ disable it for people pushing on the command-line. Please ask for help if you're having trouble with your particular git workflow. -.. _git_pre_push_hook: +(git_pre_push_hook)= -Git pre-push hook -^^^^^^^^^^^^^^^^^ +#### Git pre-push hook We include an optional pre-push hook that runs some sanity checks on the revisions you are about to push and asks for confirmation if you push multiple commits at once. You can set it up (on Unix systems) by running from the repository root: -.. code-block:: console - - % ln -sf ../../llvm/utils/git/pre-push.py .git/hooks/pre-push +```console +% ln -sf ../../llvm/utils/git/pre-push.py .git/hooks/pre-push +``` -Helpful Information About LLVM -============================== -:doc:`LLVM's documentation ` provides a wealth of information about LLVM's internals as +## Helpful Information About LLVM +{doc}`LLVM's documentation ` provides a wealth of information about LLVM's internals as well as various user guides. The pages listed below should provide a good overview of LLVM's high-level design, as well as its internals: -:doc:`GettingStarted` - Discusses how to get up and running quickly with the LLVM infrastructure. - Everything from unpacking and compilation of the distribution to execution - of some tools. +{doc}`GettingStarted` +: Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. -:doc:`LangRef` - Defines the LLVM intermediate representation. +{doc}`LangRef` +: Defines the LLVM intermediate representation. -:doc:`ProgrammersManual` - Introduction to the general layout of the LLVM sourcebase, important classes +{doc}`ProgrammersManual` +: Introduction to the general layout of the LLVM sourcebase, important classes and APIs, and some tips & tricks. -`LLVM for Grad Students`__ - This is an introduction to the LLVM infrastructure by Adrian Sampson. While it +[LLVM for Grad Students] +: This is an introduction to the LLVM infrastructure by Adrian Sampson. While it has been written for grad students, it provides a good, compact overview of LLVM's architecture, LLVM's IR and how to write a new pass. - .. __: http://www.cs.cornell.edu/~asampson/blog/llvm.html - -`Intro to LLVM`__ - Book chapter providing a compiler hacker's introduction to LLVM. - - .. __: http://www.aosabook.org/en/llvm.html - -.. _Forum: https://discourse.llvm.org -.. _Discord server: https://discord.gg/xS7Z362 -.. _irc.oftc.net: irc://irc.oftc.net/llvm -.. _good first issue: https://github.com/llvm/llvm-project/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22 -.. _bug tracker: https://github.com/llvm/llvm-project/issues -.. _clang-format-diff.py: https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-format/clang-format-diff.py -.. _git-clang-format: https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-format/git-clang-format -.. _LLVM's GitHub: https://github.com/llvm/llvm-project -.. _LLVM's Phabricator (read-only): https://reviews.llvm.org/ -.. _LLVM's Open Projects page: https://llvm.org/OpenProjects.html#what +[Intro to LLVM] +: Book chapter providing a compiler hacker's introduction to LLVM. + +[Forum]: https://discourse.llvm.org +[Discord server]: https://discord.gg/xS7Z362 +[irc.oftc.net]: irc://irc.oftc.net/llvm +[good first issue]: https://github.com/llvm/llvm-project/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22 +[bug tracker]: https://github.com/llvm/llvm-project/issues +[clang-format-diff.py]: https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-format/clang-format-diff.py +[git-clang-format]: https://github.com/llvm/llvm-project/blob/main/clang/tools/clang-format/git-clang-format +[LLVM's GitHub]: https://github.com/llvm/llvm-project +[LLVM's Phabricator (read-only)]: https://reviews.llvm.org/ +[LLVM's Open Projects page]: https://llvm.org/OpenProjects.html#what +[LLVM for Grad Students]: http://www.cs.cornell.edu/~asampson/blog/llvm.html +[Intro to LLVM]: http://www.aosabook.org/en/llvm.html diff --git a/llvm/docs/DeveloperPolicy.md b/llvm/docs/DeveloperPolicy.md index e302b2d79dc1b..a447903167131 100644 --- a/llvm/docs/DeveloperPolicy.md +++ b/llvm/docs/DeveloperPolicy.md @@ -1,112 +1,99 @@ -.. _developer_policy: +(developer_policy)= +# LLVM Developer Policy -===================== -LLVM Developer Policy -===================== +```{contents} +:local: +``` -.. contents:: - :local: - -Introduction -============ +## Introduction This document contains the LLVM Developer Policy which defines the project's policy towards developers and their contributions. The intent of this policy is to eliminate miscommunication, rework, and confusion that might arise from the -distributed nature of LLVM's development. By stating the policy in clear terms, +distributed nature of LLVM's development. By stating the policy in clear terms, we hope each developer can know ahead of time what to expect when making LLVM -contributions. This policy covers all llvm.org subprojects, including Clang, +contributions. This policy covers all llvm.org subprojects, including Clang, LLDB, libc++, MLIR, etc. The developer policy supports the following LLVM project objectives: -#. Attract both users and new contributors to the LLVM project. - -#. Help people contribute to LLVM by documenting our development practices. - -#. Maintain the stability, performance, and quality of the ``main`` branch. - -#. Establish the project's :ref:`copyright, license, and patent +1. Attract both users and new contributors to the LLVM project. +2. Help people contribute to LLVM by documenting our development practices. +3. Maintain the stability, performance, and quality of the `main` branch. +4. Establish the project's {ref}`copyright, license, and patent policies ` policies. -Developer Policies -================== +## Developer Policies -Communication Channels ----------------------- +### Communication Channels LLVM is a large project with many subcomponents, and it has a wide array of communication channels that you can use to keep track of recent developments, upcoming projects, new designs, enhancements, and other community business. -First and foremost is the `LLVM Discourse forums`_, which is the successor -to our former mailing lists (llvm-dev@, cfe-dev@, lldb-dev@, etc). This is -probably the most vital and active communication channel to our highly -distributed open source project. It enables long-form asynchronous text -communication, and this is where people tend to propose major changes or -propose new designs in the form of RFCs (Request For Comment), which are -described later. Please be aware that the Discourse forums are public and +First and foremost is the [LLVM Discourse forums](https://discourse.llvm.org), +which is the successor to our former mailing lists (llvm-dev@, cfe-dev@, +lldb-dev@, etc). This is probably the most vital and active communication +channel to our highly distributed open source project. It enables long-form +asynchronous text communication, and this is where people tend to propose major +changes or propose new designs in the form of RFCs (Request For Comment), which +are described later. Please be aware that the Discourse forums are public and archived, and that notices of confidentiality or non-disclosure cannot be respected. -We accept code contributions as :ref:`GitHub Pull Requests `. +We accept code contributions as {ref}`GitHub Pull Requests `. Our project is generally too large to subscribe to all github notifications, so if you want to be notified of pull requests affecting specific parts of the -code, you can join -one of the `pr-subscribers-* `_ -GitHub teams. This `mapping `_ +code, you can join one of the +[pr-subscribers-\*](https://github.com/orgs/llvm/teams?query=pr-subscribers) +GitHub teams. This +[mapping](https://github.com/llvm/llvm-project/blob/main/.github/new-prs-labeler.yml) documents the paths that trigger notifications for each of the listed teams. -Missing features and bugs are tracked through our `GitHub issue tracker -`_. You can subscribe for -notification for specific components by joining one of the `issue-subscribers-* -`_ teams. You may -also subscribe to the `llvm-bugs -`_ email list to subscribe to -the firehose of all issue notifications, which some community members use to -perform custom filtering. +Missing features and bugs are tracked through our [GitHub issue +tracker](https://github.com/llvm/llvm-project/issues). You can subscribe for +notification for specific components by joining one of the +[issue-subscribers-\*](https://github.com/orgs/llvm/teams?query=issue-subscribers) +teams. You may also subscribe to the +[llvm-bugs](http://lists.llvm.org/mailman/listinfo/llvm-bugs) email list to +subscribe to the firehose of all issue notifications, which some community +members use to perform custom filtering. Beyond the asynchronous written communication channels, LLVM has a Discord server for real-time chat communication, as well as a community calendar with -many regular workgroup video calls and office hours. See :doc:`GettingInvolved` +many regular workgroup video calls and office hours. See {doc}`GettingInvolved` for more information on other ways to engage with the community. -.. _patch: - -Making and Submitting a Patch ------------------------------ +(patch)= +### Making and Submitting a Patch Patches are submitted to GitHub and reviewed using Pull Requests. Follow the -:ref:`Getting Started Guide ` to check out sources, make a patch, and -then follow the :ref:`GitHub Pull Request ` guide to upload a +{ref}`Getting Started Guide ` to check out sources, make a patch, and +then follow the {ref}`GitHub Pull Request ` guide to upload a pull request. Here are some tips to enable a successful code review: -* :ref:`Include a test `. This tends to be one of the first +- {ref}`Include a test `. This tends to be one of the first things a reviewer will ask for and look at to understand what a new patch does. - -* Identify 2-3 individuals to review the patch. Look through the relevant - :ref:`Maintainers` file or browse git blame for likely stakeholders for the - code you want to modify, and add ``@username`` to a PR comment to notify them +- Identify 2-3 individuals to review the patch. Look through the relevant + {ref}`Maintainers` file or browse git blame for likely stakeholders for the + code you want to modify, and add `@username` to a PR comment to notify them of your PR if you are unable to add reviewers yourself due to GitHub permissions. +- To avoid precommit CI failures due to merge conflicts, base your patches on + a recent commit from `main`. If you want to make changes to a release + branch, land a change in `main` first and then follow the {ref}`backporting + instructions `. -* To avoid precommit CI failures due to merge conflicts, base your patches on a - recent commit from ``main``. If you want to make changes to a release branch, - land a change in ``main`` first and then follow the - :ref:`backporting instructions `. - -See :doc:`CodeReview` for more info on what to expect. +See {doc}`CodeReview` for more info on what to expect. When submitting patches, please do not add confidentiality or non-disclosure -notices to the patches themselves. These notices conflict with the LLVM +notices to the patches themselves. These notices conflict with the LLVM licensing terms and may result in your contribution being excluded. -.. _github-email-address: - -Email Addresses ---------------- +(github-email-address)= +### Email Addresses The LLVM project uses email to communicate to contributors outside of the GitHub platform about their past contributions. Primarily, our buildbot @@ -115,23 +102,19 @@ failures. Therefore, the LLVM community requires contributors to have a public email address associated with their GitHub commits, so please ensure that "Keep my -email addresses private" is disabled in your `account settings -`_. There are many free email forwarding -services available if you wish to keep your identity private. - -.. _code review: +email addresses private" is disabled in your [account +settings](https://github.com/settings/emails). There are many free email +forwarding services available if you wish to keep your identity private. -Code Reviews ------------- +(code review)= +### Code Reviews LLVM uses code review, which is a generally accepted software engineering best -practice for maintaining high code quality. Please see :doc:`CodeReview` for +practice for maintaining high code quality. Please see {doc}`CodeReview` for more information on LLVM's code review process. -.. _maintainers: - -Maintainers ------------ +(maintainers)= +### Maintainers The LLVM Project aims to evolve features quickly while continually being in a release-ready state. In order to accomplish this, the project needs volunteers @@ -141,21 +124,20 @@ products. Maintainers are those volunteers; they are regular contributors who volunteer to take on additional community responsibilities beyond code contributions. Community members can find active and inactive maintainers for a project in the -``Maintainers.md`` file at the root directory of the individual project. +`Maintainers.md` file at the root directory of the individual project. Maintainers are volunteering to take on the following shared responsibilities within an area of a project: -* ensure that commits receive high-quality review, either by the maintainer +- ensure that commits receive high-quality review, either by the maintainer or by someone else, -* help to confirm and comment on issues, -* mediate code review disagreements through collaboration with other +- help to confirm and comment on issues, +- mediate code review disagreements through collaboration with other maintainers (and other reviewers) to come to a consensus on how best to proceed with disputed changes, -* actively engage with relevant RFCs, -* aid release managers with backporting and other release-related - activities, -* be a point of contact for contributors who need help (answering questions +- actively engage with relevant RFCs, +- aid release managers with backporting and other release-related activities, +- be a point of contact for contributors who need help (answering questions on Discord/Discourse or holding office hours). Each top-level project in the monorepo will specify one or more @@ -172,10 +154,10 @@ project should be discontinued. All contributors with commit access to the LLVM Project are eligible to be a maintainer. However, we are looking for people who can commit to: -* engaging in their responsibilities the majority of the days in a month, -* ensuring that they, and the community members they interact with, abide by the - :ref:`LLVM Community Code of Conduct`, and -* performing these duties for at least three months. +- engaging in their responsibilities the majority of the days in a month, +- ensuring that they, and the community members they interact with, abide by + the {ref}`LLVM Community Code of Conduct`, and +- performing these duties for at least three months. We recognize that priorities shift, job changes happen, burnout is real, extended vacations are a blessing, and people's lives are generally complex. @@ -191,7 +173,7 @@ the same project vouches for their ability to perform the responsibilities and there are no explicit objections raised by the community. *To step down as a maintainer*, you can move your name to the "inactive -maintainers" section of the ``Maintainers.md`` file for +maintainers" section of the `Maintainers.md` file for the project, or remove your name entirely; no PR review is necessary. Additionally, any maintainer who has not been actively performing their responsibilities over an extended period of time can be moved to the "inactive @@ -206,133 +188,122 @@ as a maintainer is normal and does not prevent someone from resuming their activities as a maintainer in the future. *To resume activities as a maintainer*, you can post a PR moving your name from -the "inactive maintainers" section of the ``Maintainers.md`` +the "inactive maintainers" section of the `Maintainers.md` file to the active maintainers list. Because the volunteer was already previously accepted, they will be re-accepted so long as at least one maintainer in the same project approves the PR and there are no explicit objections raised by the community. -.. _include a testcase: - -Test Cases ----------- +(include a testcase)= +### Test Cases Developers are required to create test cases for any bugs fixed and any new -features added. Some tips for getting your testcase approved: - -* All feature and regression test cases are added to the ``test`` subdirectory - of each LLVM subproject, i.e. ``llvm-project/llvm/test`` for LLVM itself. The - appropriate sub-directory should be selected (see the - :doc:`Testing Guide ` for details). - -* We prefer that functional changes are tested using ``FileCheck`` and the tool - that fits most closely with the code being modified. For example, ``opt`` is - used to test IR transformations, ``llc`` for backend changes, and ``clang`` +features added. Some tips for getting your testcase approved: + +- All feature and regression test cases are added to the `test` subdirectory + of each LLVM subproject, i.e. `llvm-project/llvm/test` for LLVM itself. The + appropriate sub-directory should be selected (see the {doc}`Testing Guide + ` for details). +- We prefer that functional changes are tested using `FileCheck` and the tool + that fits most closely with the code being modified. For example, `opt` is + used to test IR transformations, `llc` for backend changes, and `clang` for frontend changes. Some components have scripts for generating and - updating golden tests in the ``utils/`` subproject directory, i.e. - `mlir/utils/generate-test-checks.py `_ - and `llvm/utils/update_llc_test_checks.py `_ - -* Some subprojects such as ``clang`` and ``clangd`` have project specific - testing tools, like the ``clang -verify`` flag (`docs - `_) - and the ``clangd -lit-test`` - flag, which are preferred over ``FileCheck``. - -* Changes to libraries, such as Support, which are not directly observable + updating golden tests in the `utils/` subproject directory, i.e. + [mlir/utils/generate-test-checks.py](https://github.com/llvm/llvm-project/blob/main/mlir/utils/generate-test-checks.py) + and + [llvm/utils/update_llc_test_checks.py](https://github.com/llvm/llvm-project/blob/main/llvm/utils/update_llc_test_checks.py) +- Some subprojects such as `clang` and `clangd` have project specific testing + tools, like the `clang -verify` flag + ([docs](https://clang.llvm.org/docs/InternalsManual.html#verifying-diagnostics)) + and the `clangd -lit-test` flag, which are preferred over `FileCheck`. +- Changes to libraries, such as Support, which are not directly observable through tool invocations, are often best tested with unit tests. Unit tests - are located under the ``unittests`` subdirectory of each subproject. - -* Test cases should be targeted. Large inputs exhibiting bugs should be reduced - with tools like ``llvm-reduce`` before committing them to the suite. It is not - acceptable to place an entire failing program into ``llvm/test`` as this - creates a *time-to-test* burden on all developers. Please keep them short. - -* Avoid adding links to resources that are not available to the entire + are located under the `unittests` subdirectory of each subproject. +- Test cases should be targeted. Large inputs exhibiting bugs should be + reduced with tools like `llvm-reduce` before committing them to the suite. + It is not acceptable to place an entire failing program into `llvm/test` as + this creates a *time-to-test* burden on all developers. Please keep them + short. +- Avoid adding links to resources that are not available to the entire community, such as links to private bug trackers, internal corporate documentation, etc. Instead, add sufficient comments to the test to provide the context behind such links. As a project, we prefer to separate tests into small in-tree tests, and large out-of-tree integration tests. More extensive integration test cases (e.g., -entire applications, benchmarks, etc) should be added to the `llvm-test-suite -`_ repository. The -``llvm-test-suite`` repository is for integration and application testing +entire applications, benchmarks, etc) should be added to the +[llvm-test-suite](https://github.com/llvm/llvm-test-suite) repository. The +`llvm-test-suite` repository is for integration and application testing (correctness, performance, etc) testing, not feature or regression testing. It also serves to separate out third party code that falls under a different license. -Release Notes -------------- +### Release Notes Many projects in LLVM communicate important changes to users through release -notes, typically found in ``docs/ReleaseNotes.rst`` for the project. Changes to +notes, typically found in `docs/ReleaseNotes.rst` for the project. Changes to a project that are user-facing, or that users may wish to know about, should be added to the project's release notes at the author's or code reviewer's discretion, preferably as part of the commit landing the changes. Examples of changes that would typically warrant adding a release note (this list is not exhaustive): -* Adding, removing, or modifying command-line options. -* Adding, removing, or regrouping a diagnostic. -* Fixing a bug that potentially has significant user-facing impact (please link +- Adding, removing, or modifying command-line options. +- Adding, removing, or regrouping a diagnostic. +- Fixing a bug that potentially has significant user-facing impact (please link to the issue fixed in the bug database). -* Adding or removing optimizations that have widespread impact or enables new +- Adding or removing optimizations that have widespread impact or enables new programming paradigms. -* Modifying a C stable API. -* Notifying users about a potentially disruptive change expected to be made in - a future release, such as removal of a deprecated feature. In this case, the - release note should be added to a ``Potentially Breaking Changes`` section of - the notes with sufficient information and examples to demonstrate the - potential disruption. Additionally, any new entries to this section should be - announced in the `Announcements `_ - channel on Discourse. See :ref:`breaking` for more details. +- Modifying a C stable API. +- Notifying users about a potentially disruptive change expected to be made + in a future release, such as removal of a deprecated feature. In this case, + the release note should be added to a `Potentially Breaking Changes` + section of the notes with sufficient information and examples to + demonstrate the potential disruption. Additionally, any new entries to this + section should be announced in the + [Announcements](https://discourse.llvm.org/c/announce/) channel on + Discourse. See {ref}`breaking` for more details. Code reviewers are encouraged to request a release note if they think one is warranted when performing a code review. -Quality -------- +### Quality The minimum quality standards that any change must satisfy before being committed to the main development branch are: -#. Code must adhere to the :doc:`LLVM Coding Standards `. - -#. Code must compile cleanly (no errors, no warnings) on at least one platform. - -#. Bug fixes and new features should `include a testcase`_ so we know if the - fix/feature ever regresses in the future. - -#. Pull requests should build and pass premerge checks. For first-time - contributors, this will require an initial cursory review to run the checks. - -#. Ensure that links in source code and test files point to publicly available +1. Code must adhere to the {doc}`LLVM Coding Standards `. +2. Code must compile cleanly (no errors, no warnings) on at least one + platform. +3. Bug fixes and new features should {ref}`include a testcase ` so we know if the fix/feature ever regresses in the future. +4. Pull requests should build and pass premerge checks. For first-time + contributors, this will require an initial cursory review to run the + checks. +5. Ensure that links in source code and test files point to publicly available resources and are used primarily to add additional information rather than to supply critical context. The surrounding comments should be sufficient to provide the context behind such links. Additionally, the committer is responsible for addressing any problems found in -the future that the change is responsible for. For example: - -* The code needs to compile cleanly and pass tests on all stable `LLVM - buildbots `_. - -* The changes should not cause any correctness regressions in the - `llvm-test-suite `_ - and must not cause any major performance regressions. - -* The change set should not cause performance or correctness regressions for the - LLVM tools. See `llvm-compile-time-tracker.com `_ - -* The changes should not cause performance or correctness regressions in code +the future that the change is responsible for. For example: + +- The code needs to compile cleanly and pass tests on all stable [LLVM + buildbots](https://lab.llvm.org/buildbot/). +- The changes should not cause any correctness regressions in the + [llvm-test-suite](https://github.com/llvm/llvm-test-suite) and must not + cause any major performance regressions. +- The change set should not cause performance or correctness regressions for + the LLVM tools. See + [llvm-compile-time-tracker.com](https://llvm-compile-time-tracker.com) +- The changes should not cause performance or correctness regressions in code compiled by LLVM on all applicable targets. +- You are expected to address any [GitHub + Issues](https://github.com/llvm/llvm-project/issues) that result from your + change. -* You are expected to address any `GitHub Issues - `_ that result from your change. - -Our build bots and `nightly testing infrastructure -`_ find many of these issues. Build bots +Our build bots and [nightly testing +infrastructure](https://llvm.org/docs/lnt/intro.html) find many of these issues. Build bots will directly email you if a group of commits that included yours caused a failure. You are expected to check the build bot messages to see if they are your fault and, if so, fix the breakage. However, keep in mind that if you @@ -344,11 +315,8 @@ Commits that violate these quality standards may be reverted (see below). This is necessary when the change blocks other developers from making progress. The developer is welcome to re-commit the change after the problem has been fixed. - -.. _commit messages: - -Commit messages ---------------- +(commit messages)= +### Commit messages Although we don't enforce the format of commit messages, we prefer that you follow these guidelines to help review, search in logs, email formatting @@ -364,55 +332,48 @@ TargetInfo" conveys almost all there is to the change. Below are some guidelines about the format of the message itself: -* Separate the commit message into title and body separated by a blank line. - -* In the situation where there are multiple authors, or in the rare situation +- Separate the commit message into title and body separated by a blank line. +- In the situation where there are multiple authors, or in the rare situation you are submitting a change for someone else (e.g., after putting an old - patch from someone else through review yourself), please use the `git - tag 'Co-authored-by:' to list the additional authors - `_. - See `Attribution of Changes`_ for more information including the method we - used for attribution before the project migrated to git. - -* The title should be concise. Because all commits are emailed to the list with - the first line as the subject, long titles are frowned upon. Short titles + patch from someone else through review yourself), please use the [git tag + 'Co-authored-by:' to list the additional + authors](https://github.blog/2018-01-29-commit-together-with-co-authors/). + See [Attribution of Changes](#attribution-of-changes) for more information + including the method we used for attribution before the project migrated to + git. +- The title should be concise. Because all commits are emailed to the list with + the first line as the subject, long titles are frowned upon. Short titles also look better in `git log`. - -* When the changes are restricted to a specific part of the code (e.g. a +- When the changes are restricted to a specific part of the code (e.g. a back-end or optimization pass), it is customary to add a tag to the - beginning of the line in square brackets. For example, "[SCEV] ..." - or "[OpenMP] ...". This helps email filters and searches for post-commit + beginning of the line in square brackets. For example, "\[SCEV\] \..." or + "\[OpenMP\] \...". This helps email filters and searches for post-commit reviews. - -* The body should be concise, but explanatory, including a complete - rationale. Unless it is required to understand the change, examples, - code snippets and gory details should be left to bug comments, web - review or the mailing list. - -* Text formatting and spelling should follow the same rules as documentation +- The body should be concise, but explanatory, including a complete + rationale. Unless it is required to understand the change, examples, code + snippets and gory details should be left to bug comments, web review or the + mailing list. +- Text formatting and spelling should follow the same rules as documentation and in-code comments, ex. capitalization, full stop, etc. - -* If the commit is a bug fix on top of another recently committed patch, or a +- If the commit is a bug fix on top of another recently committed patch, or a revert or reapply of a patch, include the git commit hash of the prior related commit. This could be as simple as "Revert commit NNNN because it caused issue #". - -* If the patch has been reviewed, add a link to its review page, as shown - `here `__. - If the patch fixes a bug in GitHub Issues, we encourage adding a reference to +- If the patch has been reviewed, add a link to its review page, as shown + [here](https://www.llvm.org/docs/Phabricator.html#committing-a-change). If + the patch fixes a bug in GitHub Issues, we encourage adding a reference to the issue being closed, as described - `here `__. - -* It is also acceptable to add other metadata to the commit message to automate + [here](https://llvm.org/docs/BugLifeCycle.html#resolving-closing-bugs). +- It is also acceptable to add other metadata to the commit message to automate processes, including for downstream consumers. This metadata can include links to resources that are not available to the entire community. However, such links and/or metadata should not be used in place of making the commit message self-explanatory. Note that such non-public links should not be included in the submitted code. - -* Avoid 'tagging' someone's username in your commits and PR descriptions - (e.g., `@`), doing so results in that account receiving a notification - every time the commit is cherry-picked and/or pushed to a fork. +- Avoid 'tagging' someone's username in your commits and PR descriptions + (e.g., `@`), doing so results in that account receiving a + notification every time the commit is cherry-picked and/or pushed to a + fork. LLVM uses a squash workflow for pull requests, so as the pull request evolves during review, it's important to update the pull request description over the @@ -424,10 +385,8 @@ squashing and merging PRs. For minor violations of these recommendations, the community normally favors reminding the contributor of this policy over reverting. -.. _revert_policy: - -Patch reversion policy ----------------------- +(revert_policy)= +### Patch reversion policy As a community, we strongly value having the tip of tree in a good state while allowing rapid iterative development. As such, we tend to make much heavier @@ -436,86 +395,86 @@ and our norms are a bit different. How should you respond if someone reverted your change? -* Remember, it is normal and healthy to have patches reverted. Having a patch +- Remember, it is normal and healthy to have patches reverted. Having a patch reverted does not necessarily mean you did anything wrong. -* We encourage explicitly thanking the person who reverted the patch for doing +- We encourage explicitly thanking the person who reverted the patch for doing the task on your behalf. -* If you need more information to address the problem, please follow up in the +- If you need more information to address the problem, please follow up in the original commit thread with the reverting patch author. When should you revert your own change? -* Any time you learn of a serious problem with a change, you should revert it. - We strongly encourage "revert to green" as opposed to "fixing forward". We - encourage reverting first, investigating offline, and then reapplying the - fixed patch - possibly after another round of review if warranted. -* If you break a buildbot in a way which can't be quickly fixed, please revert. -* If a test case that demonstrates a problem is reported in the commit thread, +- Any time you learn of a serious problem with a change, you should revert + it. We strongly encourage "revert to green" as opposed to "fixing + forward". We encourage reverting first, investigating offline, and then + reapplying the fixed patch - possibly after another round of review if + warranted. +- If you break a buildbot in a way which can't be quickly fixed, please + revert. +- If a test case that demonstrates a problem is reported in the commit thread, please revert and investigate offline. -* If you receive substantial :ref:`post-commit review ` +- If you receive substantial {ref}`post-commit review ` feedback, please revert and address said feedback before recommitting. (Possibly after another round of review.) -* If you are asked to revert by another contributor, please revert and discuss - the merits of the request offline (unless doing so would further destabilize - tip of tree). +- If you are asked to revert by another contributor, please revert and + discuss the merits of the request offline (unless doing so would further + destabilize tip of tree). When should you revert someone else's change? -* In general, if the author themselves would revert the change per these +- In general, if the author themselves would revert the change per these guidelines, we encourage other contributors to do so as a courtesy to the author. This is one of the major cases where our norms differ from others; - we generally consider reverting a normal part of development. We don't + we generally consider reverting a normal part of development. We don't expect contributors to be always available, and the assurance that a problematic patch will be reverted and we can return to it at our next opportunity enables this. What are the expectations around a revert? -* Use your best judgment. If you're uncertain, please start an email on - the commit thread asking for assistance. We aren't trying to enumerate +- Use your best judgment. If you're uncertain, please start an email on + the commit thread asking for assistance. We aren't trying to enumerate every case, but rather give a set of guidelines. -* You should be sure that reverting the change improves the stability of tip +- You should be sure that reverting the change improves the stability of tip of tree. Sometimes, reverting one change in a series can worsen things - instead of improving them. We expect reasonable judgment to ensure that + instead of improving them. We expect reasonable judgment to ensure that the proper patch or set of patches is being reverted. -* The commit message for the reverting commit should explain why patch - is being reverted. -* It is customary to respond to the original commit email mentioning the +- The commit message for the reverting commit should explain why patch is + being reverted. +- It is customary to respond to the original commit email mentioning the revert. This serves as both a notice to the original author that their patch was reverted, and helps others following llvm-commits track context. -* Ideally, you should have a publicly reproducible test case ready to share. +- Ideally, you should have a publicly reproducible test case ready to share. Where possible, we encourage sharing of test cases in commit threads, or - in PRs. We encourage the reverter to minimize the test case and to prune + in PRs. We encourage the reverter to minimize the test case and to prune dependencies where practical. This even applies when reverting your own - patch; documenting the reasons for others who might be following along - is critical. -* It is not considered reasonable to revert without at least the promise to + patch; documenting the reasons for others who might be following along is + critical. +- It is not considered reasonable to revert without at least the promise to provide a means for the patch author to debug the root issue. If a situation arises where a public reproducer can not be shared for some reason (e.g. requires hardware patch author doesn't have access to, sharp regression in compile time of internal workload, etc.), the reverter is expected to be proactive about working with the patch author to debug and test candidate patches. -* Reverts should be reasonably timely. A change submitted two hours ago - can be reverted without prior discussion. A change submitted two years ago - should not be. Where exactly the transition point is is hard to say, but - it's probably in the handful of days in tree territory. If you are unsure, +- Reverts should be reasonably timely. A change submitted two hours ago + can be reverted without prior discussion. A change submitted two years ago + should not be. Where exactly the transition point is is hard to say, but + it's probably in the handful of days in tree territory. If you are unsure, we encourage you to reply to the commit thread, give the author a bit to respond, and then proceed with the revert if the author doesn't seem to be actively responding. -* When re-applying a reverted patch, the commit message should be updated to +- When re-applying a reverted patch, the commit message should be updated to indicate the problem that was addressed and how it was addressed. -.. _obtaining_commit_access: +(obtaining_commit_access)= +### Obtaining Commit Access -Obtaining Commit Access ------------------------ - -Once you have 3 or more merged pull requests, you may use `this link -`_ -to file an issue and request commit access. Replace the string in the title +Once you have 3 or more merged pull requests, you may use [this +link](https://github.com/llvm/llvm-project/issues/new?title=Request%20Commit%20Access%20For%20%3Cuser%3E&body=%23%23%23%20Why%20Are%20you%20requesting%20commit%20access%20?) +to file an issue and request commit access. Replace the \ string in the title with your github username, and explain why you are requesting commit access in -the issue description. Once the issue is created, you will need to get two +the issue description. Once the issue is created, you will need to get two current contributors to support your request before commit access will be granted. Reviewers of your committed patches will automatically be CCed upon creating the issue. @@ -527,10 +486,9 @@ adhering to our Developer Policy and Code of Conduct. Reviewers should clearly s reasoning for accepting or rejecting the request, and finish with a clear statement such as "I approve of this request", "LGTM", or "I do not approve of this request". - If approved, a GitHub invitation will be sent to your GitHub account. In case you don't get notification from GitHub, go to -`Invitation Link `_ directly. Once +[Invitation Link](https://github.com/orgs/llvm/invitation) directly. Once you accept the invitation, you'll get commit access. Prior to obtaining commit access, it is common practice to request that @@ -540,20 +498,19 @@ property of the commit. For external tracking purposes, committed changes are automatically reflected on a commits mailing list soon after the commit lands (e.g. -llvm-commits@lists.llvm.org). Note that these mailing lists are moderated, and +). Note that these mailing lists are moderated, and it is not unusual for a large commit to require a moderator to approve the email, so do not be concerned if a commit does not immediately appear in the archives. If you have recently been granted commit access, these policies apply: -#. You are granted *commit-after-approval* to all parts of LLVM. For - information on how to get approval for a patch, please see :doc:`CodeReview`. - When approved, you may commit it yourself. - -#. You are allowed to commit patches without approval which you think are - obvious. This is clearly a subjective decision --- we simply expect you to - use good judgement. Examples include: fixing build breakage, reverting +1. You are granted *commit-after-approval* to all parts of LLVM. For + information on how to get approval for a patch, please see + {doc}`CodeReview`. When approved, you may commit it yourself. +2. You are allowed to commit patches without approval which you think are + obvious. This is clearly a subjective decision \-\-- we simply expect you to + use good judgement. Examples include: fixing build breakage, reverting obviously broken patches, documentation/comment changes, any other minor changes. Avoid committing formatting- or whitespace-only changes outside of code you plan to make subsequent changes to. Also, try to separate @@ -561,24 +518,21 @@ If you have recently been granted commit access, these policies apply: correcting the format first (ideally) or afterward. Such changes should be highly localized and the commit message should clearly state that the commit is not intended to change functionality, usually by stating it is - :ref:`NFC `. - -#. You are allowed to commit patches without approval to those portions of LLVM + {ref}`NFC `. +3. You are allowed to commit patches without approval to those portions of LLVM that you have contributed or maintain (i.e., have been assigned responsibility for), with the proviso that such commits must not break the - build. This is a "trust but verify" policy, and commits of this nature are + build. This is a "trust but verify" policy, and commits of this nature are reviewed after they are committed. - -#. Multiple violations of these policies or a single egregious violation may +4. Multiple violations of these policies or a single egregious violation may cause commit access to be revoked. -In any case, your changes are still subject to `code review`_ (either before or -after they are committed, depending on the nature of the change). You are -encouraged to review other peoples' patches as well, but you aren't required -to do so. +In any case, your changes are still subject to {ref}`code review ` +(either before or after they are committed, depending on the nature of the +change). You are encouraged to review other peoples' patches as well, but you +aren't required to do so. -Obtaining Other Access or Permissions -------------------------------------- +### Obtaining Other Access or Permissions To obtain access other than commit access, you can raise an issue like the one for obtaining commit access. However, instead of including PRs you have authored, @@ -588,10 +542,8 @@ For example, if you are helping to triage issues and want the ability to add labels, include links to issues you have triaged previously and explain how having this ability would help that work. -.. _discuss the change/gather consensus: - -Proposing Major Changes (RFCs) ------------------------------- +(discuss the change/gather consensus)= +### Proposing Major Changes (RFCs) The design of LLVM is carefully controlled to ensure that all the pieces fit together well and are as consistent as possible. If you plan to make a major @@ -603,14 +555,12 @@ what is possible. LLVM is a large community with many stakeholders, and before landing any major change, it is important to discuss the design of a change publicly with the -community. This is done by posting a Request For Comments (RFC) on the `LLVM -Discourse forums`_. See the :doc:`RFC process ` documentation for -more details. +community. This is done by posting a Request For Comments (RFC) on the [LLVM +Discourse forums](https://discourse.llvm.org). See the {doc}`RFC process +` documentation for more details. -.. _incremental-changes: - -Incremental Development ------------------------ +(incremental-changes)= +### Incremental Development In the LLVM project, we prefer the incremental development approach, where significant changes are developed in-tree incrementally. The alternative @@ -618,58 +568,49 @@ approach of implementing features in long-lived development branches or forks is discouraged, although we have accepted features developed this way in the past. Long-term development branches have a number of drawbacks: -#. Branches must have mainline merged into them periodically. If the branch +1. Branches must have mainline merged into them periodically. If the branch development and mainline development occur in the same pieces of code, resolving merge conflicts can take a lot of time. - -#. Other people in the community tend to ignore work on branches. - -#. Huge changes (produced when a branch is merged back onto mainline) are - extremely difficult to `code review`_. - -#. Branches are not routinely tested by our nightly tester infrastructure. - -#. Changes developed as monolithic large changes often don't work until the - entire set of changes is done. Breaking it down into a set of smaller - changes increases the odds that any of the work will be committed to the main - repository. +2. Other people in the community tend to ignore work on branches. +3. Huge changes (produced when a branch is merged back onto mainline) are + extremely difficult to {ref}`code review `. +4. Branches are not routinely tested by our nightly tester infrastructure. +5. Changes developed as monolithic large changes often don't work until the + entire set of changes is done. Breaking it down into a set of smaller + changes increases the odds that any of the work will be committed to the + main repository. To address these problems, LLVM uses an incremental development style and we require contributors to follow this practice when making a large/invasive -change. Some tips: +change. Some tips: -* Large/invasive changes usually have a number of secondary changes that are - required before the big change can be made (e.g. API cleanup, etc). These +- Large/invasive changes usually have a number of secondary changes that are + required before the big change can be made (e.g. API cleanup, etc). These sorts of changes can often be done before the major change is done, independently of that work. - -* The remaining inter-related work should be decomposed into unrelated sets of - changes if possible. Once this is done, define the first increment and get +- The remaining inter-related work should be decomposed into unrelated sets of + changes if possible. Once this is done, define the first increment and get consensus on what the end goal of the change is. - -* Each change in the set can be stand alone (e.g. to fix a bug), or part of a +- Each change in the set can be stand alone (e.g. to fix a bug), or part of a planned series of changes that works towards the development goal. - -* Each change should be kept as small as possible. This simplifies your work +- Each change should be kept as small as possible. This simplifies your work (into a logical progression), simplifies code review and reduces the chance that you will get negative feedback on the change. Small increments also facilitate the maintenance of a high quality code base. - -* Often, an independent precursor to a big change is to add a new API and slowly +- Often, an independent precursor to a big change is to add a new API and slowly migrate clients to use the new API. Each change to use the new API is often "obvious" and can be committed without review. Once the new API is in place and used, it is much easier to replace the underlying implementation of the API. This implementation change is logically separate from the API change. -If you are interested in making a large change, and this scares you, please make -sure to first `discuss the change/gather consensus`_ then ask about the best way -to go about making the change. - -.. _breaking: +If you are interested in making a large change, and this scares you, please +make sure to first {ref}`discuss the change/gather consensus ` then ask about the best way to go about making the +change. -Making Potentially Breaking Changes ------------------------------------ +(breaking)= +### Making Potentially Breaking Changes Please help notify users and vendors of potential disruptions when upgrading to a newer version of a tool. For example, deprecating a feature that is expected @@ -678,52 +619,51 @@ a diagnostic from a warning to an error, switching important default behavior, or any other potentially disruptive situation thought to be worth raising awareness of. For such changes, the following should be done: -* When performing the code review for the change, please add any applicable +- When performing the code review for the change, please add any applicable "vendors" github team to the review for their awareness. The purpose of these groups is to give vendors early notice that potentially disruptive changes are being considered but have not yet been accepted. Vendors can give early testing feedback on the changes to alert us to unacceptable breakages. The current list of vendor groups is: - * `Clang vendors `_ - * `libc++ vendors `_ + - [Clang vendors](https://github.com/orgs/llvm/teams/clang-vendors) + - [libc++ vendors](https://github.com/orgs/llvm/teams/libcxx-vendors) People interested in joining the vendors group can do so by clicking the "Join team" button on the linked github pages above. -* When committing the change to the repository, add appropriate information - about the potentially breaking changes to the ``Potentially Breaking Changes`` +- When committing the change to the repository, add appropriate information + about the potentially breaking changes to the `Potentially Breaking Changes` section of the project's release notes. The release note should have information about what the change is, what is potentially disruptive about it, as well as any code examples, links, and motivation that is appropriate to share with users. This helps users to learn about potential issues with upgrading to that release. -* After the change has been committed to the repository, the potentially +- After the change has been committed to the repository, the potentially disruptive changes described in the release notes should be posted to the - `Announcements `_ channel on - Discourse. The post should be tagged with the ``potentially-breaking`` label - and a label specific to the project (such as ``clang``, ``llvm``, etc). This + [Announcements](https://discourse.llvm.org/c/announce/) channel on + Discourse. The post should be tagged with the `potentially-breaking` label + and a label specific to the project (such as `clang`, `llvm`, etc). This is another mechanism by which we can give pre-release notice to users about potentially disruptive changes. It is a lower-traffic alternative to the joining "vendors" group. To automatically be notified of new announcements - with the ``potentially-breaking`` label, go to your user preferences page in + with the `potentially-breaking` label, go to your user preferences page in Discourse, and add the label to one of the watch categories under - ``Notifications->Tags``. + `Notifications->Tags`. -Attribution of Changes ----------------------- +### Attribution of Changes When contributors submit a patch to an LLVM project, other developers with -commit access may merge the PR for the author (based on the -progression of code review, etc.). GitHub will automatically ensure that -authorship is preserved, and one does not need to take any further action. We -do not want the source code to be littered with random attributions "this code -written by J. Random Hacker" (this is noisy and distracting). In practice, the -revision control system keeps a perfect history of who changed what, and the -CREDITS.txt file describes higher-level contributions. If you need to adjust -authorship for any reason, please follow the attribution of changes in the -simple manner as outlined by the `commit messages`_ section. Overall, please do +commit access may merge the PR for the author (based on the progression of code +review, etc.). GitHub will automatically ensure that authorship is preserved, +and one does not need to take any further action. We do not want the source +code to be littered with random attributions "this code written by J. Random +Hacker" (this is noisy and distracting). In practice, the revision control +system keeps a perfect history of who changed what, and the CREDITS.txt file +describes higher-level contributions. If you need to adjust authorship for any +reason, please follow the attribution of changes in the simple manner as +outlined by the [commit messages](#commit-messages) section. Overall, please do not add contributor names to the source code. Also, don't commit patches authored by others unless they have submitted the @@ -739,12 +679,11 @@ attribution mechanism. The previous method was to include "Patch by John Doe." in a separate line of the commit message and there are automated processes that rely on this format. -Bans ----- +### Bans The goal of a ban is to protect people in the community from having to interact with people who are consistently not respecting the -:ref:`LLVM Community Code of Conduct` in LLVM project spaces. Contributions of +{ref}`LLVM Community Code of Conduct` in LLVM project spaces. Contributions of any variety (pull requests, issue reports, forum posts, etc.) require interacting with the community. Therefore, we do not accept any form of direct contribution from a banned individual. @@ -756,122 +695,99 @@ the community regarding that contribution. Trying to evade a non-permanent ban results in getting banned permanently. When in doubt how to act in a specific instance, please reach out to -conduct@llvm.org for advice. + for advice. - -.. _IR backwards compatibility: - -IR Backwards Compatibility --------------------------- +(IR backwards compatibility)= +### IR Backwards Compatibility When the IR format has to be changed, keep in mind that we try to maintain some backwards compatibility. The rules are intended as a balance between convenience for llvm users and not imposing a big burden on llvm developers: -* The textual format is not backwards compatible. We don't change it too often, +- The textual format is not backwards compatible. We don't change it too often, but there are no specific promises. - -* Additions and changes to the IR should be reflected in - ``test/Bitcode/compatibility.ll``. - -* The current LLVM version supports loading any bitcode since version 3.0. - -* After each X.Y release, ``compatibility.ll`` must be copied to - ``compatibility-X.Y.ll``. The corresponding bitcode file should be assembled - using the X.Y build and committed as ``compatibility-X.Y.ll.bc``. - -* Newer releases can ignore features from older releases, but they cannot +- Additions and changes to the IR should be reflected in + `test/Bitcode/compatibility.ll`. +- The current LLVM version supports loading any bitcode since version 3.0. +- After each X.Y release, `compatibility.ll` must be copied to + `compatibility-X.Y.ll`. The corresponding bitcode file should be assembled + using the X.Y build and committed as `compatibility-X.Y.ll.bc`. +- Newer releases can ignore features from older releases, but they cannot miscompile them. For example, if nsw is ever replaced with something else, dropping it would be a valid way to upgrade the IR. - -* Debug metadata is special in that it is currently dropped during upgrades. - -* Non-debug metadata is defined to be safe to drop, so a valid way to upgrade +- Debug metadata is special in that it is currently dropped during upgrades. +- Non-debug metadata is defined to be safe to drop, so a valid way to upgrade it is to drop it. That is not very user friendly and a bit more effort is expected, but no promises are made. - -* Legacy bitcode may have degraded performance when compared to +- Legacy bitcode may have degraded performance when compared to the compiled output with the legacy compiler. -C API Changes -------------- +### C API Changes -* Stability Guarantees: The C API is, in general, a "best effort" for stability. +- Stability Guarantees: The C API is, in general, a "best effort" for stability. This means that we make every attempt to keep the C API stable, but that stability will be limited by the abstractness of the interface and the stability of the C++ API that it wraps. In practice, this means that things like "create debug info" or "create this type of instruction" are likely to be less stable than "take this IR file and JIT it for my current machine". - -* Release stability: We won't break the C API on the release branch with patches +- Release stability: We won't break the C API on the release branch with patches that go on that branch, with the exception that we will fix an unintentional C API break that will keep the release consistent with both the previous and next release. - -* Testing: Patches to the C API are expected to come with tests just like any +- Testing: Patches to the C API are expected to come with tests just like any other patch. - -* Including new things into the API: If an LLVM subcomponent has a C API already +- Including new things into the API: If an LLVM subcomponent has a C API already included, then expanding that C API is acceptable. Adding C API for subcomponents that don't currently have one needs to be discussed on the - `LLVM Discourse forums`_ for design and maintainability feedback prior to implementation. - -* Documentation: Any changes to the C API are required to be documented in the + [LLVM Discourse forums](https://discourse.llvm.org) for design and + maintainability feedback prior to implementation. +- Documentation: Any changes to the C API are required to be documented in the release notes so that it's clear to external users who do not follow the project how the C API is changing and evolving. -.. _toolchain: - -Updating Toolchain Requirements -------------------------------- +(toolchain)= +### Updating Toolchain Requirements We intend to require newer toolchains as time goes by. This means LLVM's codebase can use newer versions of C++ as they get standardized. Requiring newer toolchains to build LLVM can be painful for those building LLVM; therefore, it will only be done through the following process: - * It is a general goal to support LLVM and GCC versions from the last 3 years - at a minimum. This time-based guideline is not strict: we may support much - older compilers, or decide to support fewer versions. - - * An RFC is sent to the `LLVM Discourse forums`_ - - - Detail upsides of the version increase (e.g. which newer C++ language or - library features LLVM should use; avoid miscompiles in particular compiler - versions, etc). - - Detail downsides on important platforms (e.g. Ubuntu LTS status). - - See the :doc:`RFC process ` documentation for more details. - - * Once the RFC reaches consensus, update the CMake toolchain version checks as - well as the :doc:`getting started` guide. This provides a - softer transition path for developers compiling LLVM, because the - error can be turned into a warning using a CMake flag. This is an important - step: LLVM still doesn't have code which requires the new toolchains, but it - soon will. If you compile LLVM but don't read the forums, we should - tell you! - - * Ensure that at least one LLVM release has had this soft-error. Not all - developers compile LLVM top-of-tree. These release-bound developers should - also be told about upcoming changes. - - * Turn the soft-error into a hard-error after said LLVM release has branched. - - * Update the :doc:`coding standards` to allow the new - features we've explicitly approved in the RFC. - - * Start using the new features in LLVM's codebase. - -Here's a `sample RFC -`_ and the -`corresponding change `_. - -.. _ci-usage: - -Working with the CI system --------------------------- +- It is a general goal to support LLVM and GCC versions from the last 3 years + at a minimum. This time-based guideline is not strict: we may support much + older compilers, or decide to support fewer versions. +- An RFC is sent to the [LLVM Discourse forums](https://discourse.llvm.org) + - Detail upsides of the version increase (e.g. which newer C++ language or + library features LLVM should use; avoid miscompiles in particular compiler + versions, etc). + - Detail downsides on important platforms (e.g. Ubuntu LTS status). + - See the {doc}`RFC process ` documentation for more + details. +- Once the RFC reaches consensus, update the CMake toolchain version checks + as well as the {doc}`getting started` guide. This + provides a softer transition path for developers compiling LLVM, because + the error can be turned into a warning using a CMake flag. This is an + important step: LLVM still doesn't have code which requires the new + toolchains, but it soon will. If you compile LLVM but don't read the + forums, we should tell you! +- Ensure that at least one LLVM release has had this soft-error. Not all + developers compile LLVM top-of-tree. These release-bound developers + should also be told about upcoming changes. +- Turn the soft-error into a hard-error after said LLVM release has + branched. +- Update the {doc}`coding standards` to allow the new + features we've explicitly approved in the RFC. +- Start using the new features in LLVM's codebase. + +Here's a [sample +RFC](https://discourse.llvm.org/t/rfc-migrating-past-c-11/50943) and the +[corresponding change](https://reviews.llvm.org/D57264). + +(ci-usage)= +### Working with the CI system The main continuous integration (CI) tool for the LLVM project is the -`LLVM Buildbot `_. It uses different *builders* +[LLVM Buildbot](https://lab.llvm.org/buildbot/). It uses different *builders* to cover a wide variety of sub-projects and configurations. The builds are executed on different *workers*. Builders and workers are configured and provided by community members. @@ -884,44 +800,45 @@ their patch with every possible configuration. *If your commit broke the build:* -* Fix the build as soon as possible as this might block other contributors or +- Fix the build as soon as possible as this might block other contributors or downstream users. -* If you need more time to analyze and fix the bug, please revert your change to - unblock others. +- If you need more time to analyze and fix the bug, please revert your change + to unblock others. *If someone else broke the build and this blocks your work* -* Comment on the code review in `GitHub `_ - (if available) or email the author, explain the problem and how this impacts - you. Add a link to the broken build and the error message so folks can - understand the problem. -* Revert the commit if this blocks your work, see revert_policy_ . +- Comment on the code review in + [GitHub](https://github.com/llvm/llvm-project/pulls) (if available) or + email the author, explain the problem and how this impacts you. Add a link + to the broken build and the error message so folks can understand the + problem. +- Revert the commit if this blocks your work, see + [revert_policy](#revert_policy) . *If a build/worker is permanently broken* -* 1st step: contact the owner of the worker. You can find the name and contact - information for the *Admin* of worker on the page of the build in the - *Worker* tab: +- 1st step: contact the owner of the worker. You can find the name and + contact information for the *Admin* of worker on the page of the build in + the *Worker* tab: - .. image:: buildbot_worker_contact.png + ![image](buildbot_worker_contact.png) -* 2nd step: If the owner does not respond or fix the worker, please escalate +- 2nd step: If the owner does not respond or fix the worker, please escalate to Galina Kostanova, the maintainer of the BuildBot master. -* 3rd step: If Galina could not help you, please escalate to the - `Infrastructure Working Group `_. -.. _new-llvm-components: +- 3rd step: If Galina could not help you, please escalate to the + [Infrastructure Working Group](mailto:iwg@llvm.org). -Introducing New Components into LLVM -==================================== +(new-llvm-components)= +## Introducing New Components into LLVM The LLVM community is a vibrant and exciting place to be, and we look to be inclusive of new projects and foster new communities, and increase collaboration across industry and academia. That said, we need to strike a balance between being inclusive of new ideas and -people and the cost of ongoing maintenance that new code requires. As such, we -have a general :doc:`support policy` for introducing major new +people and the cost of ongoing maintenance that new code requires. As such, we +have a general {doc}`support policy` for introducing major new components into the LLVM world, depending on the degree of detail and responsibility required. *Core* projects need a higher degree of scrutiny than *peripheral* projects, and the latter may have additional differences. @@ -929,16 +846,15 @@ than *peripheral* projects, and the latter may have additional differences. However, this is really only intended to cover common cases that we have seen arise: different situations are different, and we are open to discussing unusual cases as well - just start an RFC thread on the -`LLVM Discourse forums`_. +[LLVM Discourse forums](https://discourse.llvm.org). -Adding a New Target -------------------- +### Adding a New Target LLVM is very receptive to new targets, even experimental ones, but a number of problems can appear when adding new large portions of code, and back-ends are normally added in bulk. New targets need the same level of support as other *core* parts of the compiler, so they are covered in the *core tier* of our -:doc:`support policy`. +{doc}`support policy`. We have found that landing large pieces of new code and then trying to fix emergent problems in-tree is problematic for a variety of reasons. For these @@ -947,72 +863,62 @@ proven stable, and later moved to non-experimental. The differences between both classes are: -* Experimental targets are not built by default (they need to be explicitly +- Experimental targets are not built by default (they need to be explicitly enabled at CMake time). - -* Test failures, bugs, and build breakages that only appear when the +- Test failures, bugs, and build breakages that only appear when the experimental target is enabled, caused by changes unrelated to the target, are the responsibility of the community behind the target to fix. The basic rules for a back-end to be upstreamed in **experimental** mode are: -* Every target must have at least one :ref:`maintainer`. The +- Every target must have at least one {ref}`maintainer`. The `Maintainers.md` file has to be updated as part of the first merge. These - maintainers make sure that changes to the target get reviewed and steers the - overall effort. - -* There must be an active community behind the target. This community - will help maintain the target by providing buildbots, fixing - bugs, answering the LLVM community's questions and making sure the new - target doesn't break any of the other targets, or generic code. This - behavior is expected to continue throughout the lifetime of the - target's code. - -* The code must be free of contentious issues, for example, large + maintainers make sure that changes to the target get reviewed and steers + the overall effort. +- There must be an active community behind the target. This community will + help maintain the target by providing buildbots, fixing bugs, answering the + LLVM community's questions and making sure the new target doesn't break any + of the other targets, or generic code. This behavior is expected to + continue throughout the lifetime of the target's code. +- The code must be free of contentious issues, for example, large changes in how the IR behaves or should be formed by the front-ends, unless agreed by the majority of the community via refactoring of the - (:doc:`IR standard`) **before** the merge of the new target changes, - following the :ref:`IR backwards compatibility`. - -* The code conforms to all of the policies laid out in this developer policy + ({doc}`IR standard`) **before** the merge of the new target changes, + following the {ref}`IR backwards compatibility`. +- The code conforms to all of the policies laid out in this developer policy document, including license, patent, and coding standards. - -* The target should have either reasonable documentation on how it - works (ISA, ABI, etc.) or a publicly available simulator/hardware - (either free or cheap enough) - preferably both. This allows - developers to validate assumptions, understand constraints and review code - that can affect the target. +- The target should have either reasonable documentation on how it works + (ISA, ABI, etc.) or a publicly available simulator/hardware (either free or + cheap enough) - preferably both. This allows developers to validate + assumptions, understand constraints and review code that can affect the + target. In addition, the rules for a back-end to be promoted to **official** are: -* The target must have addressed every other minimum requirement and +- The target must have addressed every other minimum requirement and have been stable in tree for at least 3 months. This cool down period is to make sure that the back-end and the target community can endure continuous upstream development for the foreseeable future. - -* The target's code must have been completely adapted to this policy - as well as the :doc:`coding standards`. Any exceptions that +- The target's code must have been completely adapted to this policy + as well as the {doc}`coding standards`. Any exceptions that were made to move into experimental mode must have been fixed **before** becoming official. - -* The test coverage needs to be broad and well written (small tests, - well documented). The build target ``check-all`` must pass with the - new target built, and where applicable, the ``test-suite`` must also +- The test coverage needs to be broad and well written (small tests, + well documented). The build target `check-all` must pass with the + new target built, and where applicable, the `test-suite` must also pass without errors, in at least one configuration (publicly demonstrated, for example, via buildbots). - -* Public buildbots need to be created and actively maintained, unless - the target requires no additional buildbots (ex. ``check-all`` covers +- Public buildbots need to be created and actively maintained, unless + the target requires no additional buildbots (ex. `check-all` covers all tests). The more relevant and public the new target's CI infrastructure is, the more the LLVM community will embrace it. To **continue** as a supported and official target: -* The maintainer(s) must continue following these rules throughout the lifetime +- The maintainer(s) must continue following these rules throughout the lifetime of the target. Continuous violations of aforementioned rules and policies could lead to complete removal of the target from the code base. - -* Degradation in support, documentation or test coverage will make the target as +- Degradation in support, documentation or test coverage will make the target as nuisance to other targets and be considered a candidate for deprecation and ultimately removed. @@ -1025,11 +931,12 @@ Those wishing to add a new target to LLVM must follow the procedure below: 1. Read this section and make sure your target follows all requirements. For minor issues, your community will be responsible for making all necessary adjustments soon after the initial merge. -2. Send a request for comment (RFC) to the `LLVM Discourse forums`_ describing +2. Send a request for comment (RFC) to the [LLVM Discourse + forums](https://discourse.llvm.org) describing your target and how it follows all the requirements and what work has been done and will need to be done to accommodate the official target requirements. Make sure to expose any and all controversial issues, changes needed in the - base code, table gen, etc. See the :doc:`RFC process ` + base code, table gen, etc. See the {doc}`RFC process ` documentation for more details. 3. Once the response is positive, the LLVM community can start reviewing the actual patches (but they can be prepared before, to support the RFC). Create @@ -1049,51 +956,50 @@ Those wishing to add a new target to LLVM must follow the procedure below: sure the progress is still consistent. 7. Once all official requirements have been fulfilled (as above), the maintainers should request the target to be enabled by default by sending another RFC to - the `LLVM Discourse forums`_. + the [LLVM Discourse forums](https://discourse.llvm.org). -Adding an Established Project To the LLVM Monorepo --------------------------------------------------- +### Adding an Established Project To the LLVM Monorepo -The `LLVM monorepo `_ is the centerpoint +The [LLVM monorepo](https://github.com/llvm/llvm-project) is the centerpoint of development in the LLVM world, and has all of the primary LLVM components, -including the LLVM optimizer and code generators, Clang, LLDB, etc. `Monorepos -in general `_ are great because they +including the LLVM optimizer and code generators, Clang, LLDB, etc. [Monorepos +in general](https://en.wikipedia.org/wiki/Monorepo) are great because they allow atomic commits to the project, simplify CI, and make it easier for subcommunities to collaborate. Like new targets, most projects already in the monorepo are considered to be in -the *core tier* of our :doc:`support policy`. The burden to add +the *core tier* of our {doc}`support policy`. The burden to add things to the LLVM monorepo needs to be very high - code that is added to this repository is checked out by everyone in the community. As such, we hold components to a high bar similar to "official targets", they: - * Must be generally aligned with the mission of the LLVM project to advance - compilers, languages, tools, runtimes, etc. - * Must conform to all of the policies laid out in this developer policy - document, including license, patent, coding standards, and code of conduct. - * Must have an active community that maintains the code, including established - maintainers. - * Should have reasonable documentation about how it works, including a high - quality README file. - * Should have CI to catch breakage within the project itself or due to - underlying LLVM dependencies. - * Should have code free of issues the community finds contentious, or be on a - clear path to resolving them. - * Must be proposed through the LLVM RFC process, and have its addition approved - by the LLVM community - this ultimately mediates the resolution of the - "should" concerns above. See the :doc:`RFC process ` - documentation for more details. +- Must be generally aligned with the mission of the LLVM project to advance + compilers, languages, tools, runtimes, etc. +- Must conform to all of the policies laid out in this developer policy + document, including license, patent, coding standards, and code of conduct. +- Must have an active community that maintains the code, including established + maintainers. +- Should have reasonable documentation about how it works, including a high + quality README file. +- Should have CI to catch breakage within the project itself or due to + underlying LLVM dependencies. +- Should have code free of issues the community finds contentious, or be on a + clear path to resolving them. +- Must be proposed through the LLVM RFC process, and have its addition approved + by the LLVM community - this ultimately mediates the resolution of the + "should" concerns above. See the {doc}`RFC process ` + documentation for more details. If you have a project that you think would make sense to add to the LLVM -monorepo, please start an RFC topic on the `LLVM Discourse forums`_ to kick off -the discussion. This process can take some time and iteration - please don’t -be discouraged or intimidated by that! +monorepo, please start an RFC topic on the [LLVM Discourse +forums](https://discourse.llvm.org) to kick off the discussion. This process +can take some time and iteration - please don't be discouraged or intimidated +by that! If you have an earlier stage project that you think is aligned with LLVM, please see the "Incubating New Projects" section. -Incubating New Projects ------------------------ +### Incubating New Projects The burden to add a new project to the LLVM monorepo is intentionally very high, but that can have a chilling effect on new and innovative projects. To help @@ -1107,27 +1013,27 @@ to projects under the LLVM umbrella. Projects which can be considered for the LLVM incubator meet the following criteria: - * Must be generally aligned with the mission of the LLVM project to advance - compilers, languages, tools, runtimes, etc. - * Must conform to the license, patent, and code of conduct policies laid out - in this developer policy document. - * Must have a documented charter and development plan, e.g. in the form of a - README file, mission statement, and/or manifesto. - * Should conform to coding standards, incremental development process, and - other expectations. - * Should have a sense of the community that it hopes to eventually foster, and - there should be interest from members with different affiliations / - organizations. - * Should have a feasible path to eventually graduate as a dedicated top-level - or sub-project within the `LLVM monorepo - `_. - * Should include a notice (e.g. in the project README or web page) that the - project is in ‘incubation status’ and is not included in LLVM releases (see - suggested wording below). - * Must be proposed through the LLVM RFC process, and have its addition - approved by the LLVM community - this ultimately mediates the resolution of - the "should" concerns above. See the :doc:`RFC process ` - documentation for more details. +- Must be generally aligned with the mission of the LLVM project to advance + compilers, languages, tools, runtimes, etc. +- Must conform to the license, patent, and code of conduct policies laid + out in this developer policy document. +- Must have a documented charter and development plan, e.g. in the form of + a README file, mission statement, and/or manifesto. +- Should conform to coding standards, incremental development process, and + other expectations. +- Should have a sense of the community that it hopes to eventually foster, + and there should be interest from members with different affiliations / + organizations. +- Should have a feasible path to eventually graduate as a dedicated + top-level or sub-project within the [LLVM + monorepo](https://github.com/llvm/llvm-project). +- Should include a notice (e.g. in the project README or web page) that the + project is in 'incubation status' and is not included in LLVM releases + (see suggested wording below). +- Must be proposed through the LLVM RFC process, and have its addition + approved by the LLVM community - this ultimately mediates the resolution + of the "should" concerns above. See the {doc}`RFC process ` + documentation for more details. That said, the project need not have any code to get started, and need not have an established community at all! Furthermore, incubating projects may pass @@ -1137,30 +1043,30 @@ dependencies that have not yet been factored appropriately, leveraging experimental components or APIs that are not yet upstream, etc). When approved, the llvm-admin group can grant the new project: - * A new repository in the LLVM Github Organization - but not the LLVM monorepo. - * New mailing list, discourse forum, and/or discord chat hosted with other LLVM - forums. - * Other infrastructure integration can be discussed on a case-by-case basis. + +- A new repository in the LLVM Github Organization - but not the LLVM monorepo. +- New mailing list, discourse forum, and/or discord chat hosted with other LLVM + forums. +- Other infrastructure integration can be discussed on a case-by-case basis. Graduation to the mono-repo would follow existing processes and standards for becoming a first-class part of the monorepo. Similarly, an incubating project may be eventually retired, but no process has been established for that yet. If -and when this comes up, please start an RFC discussion on the `LLVM Discourse forums`_. +and when this comes up, please start an RFC discussion on the [LLVM Discourse +forums](https://discourse.llvm.org). This process is very new - please expect the details to change, it is always -safe to ask on the `LLVM Discourse forums`_ about this. +safe to ask on the [LLVM Discourse forums](https://discourse.llvm.org) about +this. Suggested disclaimer for the project README and the main project web page: -:: - - This project is participating in the LLVM Incubator process: as such, it is - not part of any official LLVM release. While incubation status is not - necessarily a reflection of the completeness or stability of the code, it - does indicate that the project is not yet endorsed as a component of LLVM. + This project is participating in the LLVM Incubator process: as such, it is + not part of any official LLVM release. While incubation status is not + necessarily a reflection of the completeness or stability of the code, it + does indicate that the project is not yet endorsed as a component of LLVM. -Adding or enabling a new LLVM pass ----------------------------------- +### Adding or enabling a new LLVM pass The guidelines here are primarily targeted at the enablement of new major passes in the target-independent optimization pipeline. Small additions, or @@ -1185,62 +1091,58 @@ The recommended workflow is: enabled, it becomes easier to identify the specific change that has caused a regression in correctness, optimization quality or compile-time. -When enabling a pass, certain requirements must be met (in no particular order): - - * **Maintenance:** The pass (and any analyses it depends on) must have at - least one maintainer. - * **Usefulness:** There should be evidence that the pass improves performance - (or whatever metric it optimizes for) on real-world workloads. Improvements - seen only on synthetic benchmarks may be insufficient. - * **Compile-Time:** The pass should not have a large impact on compile-time, - where the evaluation of what "large" means is up to reviewer discretion, and - may differ based on the value the pass provides. In any case, it is expected - that a concerted effort has been made to mitigate the compile-time impact, - both for the average case, and for pathological cases. - * **Correctness:** The pass should have no known correctness issues (except - global correctness issues that affect all of LLVM). If an old pass is being - enabled (rather than implementing a new one incrementally), additional due - diligence is required. The pass should be fully reviewed to ensure that it - still complies with current quality standards. Fuzzing with disabled - profitability checks may help gain additional confidence in the - implementation. +When enabling a pass, certain requirements must be met (in no particular +order): + +- **Maintenance:** The pass (and any analyses it depends on) must have at + least one maintainer. +- **Usefulness:** There should be evidence that the pass improves performance + (or whatever metric it optimizes for) on real-world workloads. Improvements + seen only on synthetic benchmarks may be insufficient. +- **Compile-Time:** The pass should not have a large impact on compile-time, + where the evaluation of what "large" means is up to reviewer discretion, and + may differ based on the value the pass provides. In any case, it is expected + that a concerted effort has been made to mitigate the compile-time impact, + both for the average case, and for pathological cases. +- **Correctness:** The pass should have no known correctness issues (except + global correctness issues that affect all of LLVM). If an old pass is being + enabled (rather than implementing a new one incrementally), additional due + diligence is required. The pass should be fully reviewed to ensure that it + still complies with current quality standards. Fuzzing with disabled + profitability checks may help gain additional confidence in the + implementation. If non-trivial issues are found in a newly enabled pass, it may be temporarily disabled again, until the issues have been resolved. -.. _copyright-license-patents: - -Copyright, License, and Patents -=============================== +(copyright-license-patents)= +## Copyright, License, and Patents -.. note:: +```{note} - This section deals with legal matters but does not provide legal advice. We - are not lawyers --- please seek legal counsel from a licensed attorney. +This section deals with legal matters but does not provide legal advice. We are not lawyers \-\-- please seek legal counsel from a licensed attorney. +``` This section addresses the issues of copyright, license and patents for the LLVM -project. The copyright for the code is held by the contributors of -the code. The code is licensed under permissive `open source licensing terms`_, -namely the Apache-2.0 with LLVM-exception license, which includes a copyright -and `patent license`_. When you contribute code to the LLVM project, you -license it under these terms. +project. The copyright for the code is held by the contributors of the code. +The code is licensed under permissive {ref}`open source licensing terms `, namely the Apache-2.0 with LLVM-exception license, +which includes a copyright and {ref}`patent license `. When you +contribute code to the LLVM project, you license it under these terms. In certain circumstances, code licensed under other licenses can be added to the codebase. However, this may only be done with approval of the LLVM Foundation Board of Directors, and contributors should plan for the approval process to take at least 4-6 weeks. If you would like to contribute code under a different license, please create a pull request with the code -you want to contribute and email board@llvm.org requesting a review. - -If you have questions or comments about these topics, please ask on the -`LLVM Discourse forums`_. However, -please realize that most compiler developers are not lawyers, and therefore you -will not be getting official legal advice. +you want to contribute and email requesting a review. -.. _LLVM Discourse forums: https://discourse.llvm.org +If you have questions or comments about these topics, please ask on the [LLVM +Discourse forums](https://discourse.llvm.org). However, please realize that +most compiler developers are not lawyers, and therefore you will not be getting +official legal advice. -Copyright ---------- +### Copyright The LLVM project does not collect copyright assignments, which means that the copyright for the code in the project is held by the respective contributors. @@ -1256,40 +1158,38 @@ acceptable for their contributions. We feel that a high burden for relicensing is good for the project, because contributors do not have to fear that their code will be used in a way with which they disagree. -Embedded Copyright or 'Contributed by' Statements -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Embedded Copyright or 'Contributed by' Statements The LLVM project does not accept contributions that include in-source copyright notices except where such notices are part of a larger external project being added as a vendored dependency. LLVM source code lives for a long time and is edited by many people, the best -way to track contributions is through revision control history. -See the `Attribution of Changes`_ section for more information about attributing -changes to authors other than the committer. +way to track contributions is through revision control history. See the +[Attribution of Changes](#attribution-of-changes) section for more information +about attributing changes to authors other than the committer. -Relicensing ------------ +### Relicensing The last paragraph notwithstanding, the LLVM Project is in the middle of a large effort to change licenses, which aims to solve several problems: -* The old licenses made it difficult to move code from (e.g.) the compiler to - runtime libraries, because runtime libraries used a different license from the - rest of the compiler. -* Some contributions were not submitted to LLVM due to concerns that +- The old licenses made it difficult to move code from (e.g.) the compiler to + runtime libraries, because runtime libraries used a different license from + the rest of the compiler. +- Some contributions were not submitted to LLVM due to concerns that the patent grant required by the project was overly broad. -* The patent grant was unique to the LLVM Project, not written by a lawyer, and +- The patent grant was unique to the LLVM Project, not written by a lawyer, and was difficult to determine what protection was provided (if any). The scope of relicensing is all code that is considered part of the LLVM project, including the main LLVM repository, runtime libraries (compiler_rt, -OpenMP, etc), Polly, and all other subprojects. There are a few exceptions: +OpenMP, etc), Polly, and all other subprojects. There are a few exceptions: -* Code imported from other projects (e.g. Google Test, Autoconf, etc) will - remain as it is. This code isn't developed as part of the LLVM project, it +- Code imported from other projects (e.g. Google Test, Autoconf, etc) will + remain as it is. This code isn't developed as part of the LLVM project, it is used by LLVM. -* Some subprojects are impractical or uninteresting to relicense (e.g. llvm-gcc +- Some subprojects are impractical or uninteresting to relicense (e.g. llvm-gcc and dragonegg). These will be split off from the LLVM project (e.g. to separate GitHub projects), allowing interested people to continue their development elsewhere. @@ -1307,39 +1207,33 @@ both the new license and the legacy license. If you are a contributor to LLVM with contributions committed before 2019-01-19 and have not done so already, please do follow the instructions at -https://foundation.llvm.org/docs/relicensing/, under section "Individual +, under section "Individual Relicensing Agreement" to relicense your contributions under the new license. +(open source licensing terms)= +### New LLVM Project License Framework -.. _open source licensing terms: - -New LLVM Project License Framework ----------------------------------- - -Contributions to LLVM are licensed under the `Apache License, Version 2.0 -`_, with two limited +Contributions to LLVM are licensed under the [Apache License, Version +2.0](https://www.apache.org/licenses/LICENSE-2.0), with two limited exceptions intended to ensure that LLVM is very permissively licensed. Collectively, the name of this license is "Apache 2.0 License with LLVM -exceptions". The exceptions read: - -:: +exceptions". The exceptions read: - ---- LLVM Exceptions to the Apache 2.0 License ---- + ---- LLVM Exceptions to the Apache 2.0 License ---- - As an exception, if, as a result of your compiling your source code, portions - of this Software are embedded into an Object form of such source code, you - may redistribute such embedded portions in such Object form without complying - with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - - In addition, if you combine or link compiled forms of this Software with - software that is licensed under the GPLv2 ("Combined Software") and if a - court of competent jurisdiction determines that the patent provision (Section - 3), the indemnity provision (Section 9) or other Section of the License - conflicts with the conditions of the GPLv2, you may retroactively and - prospectively choose to deem waived or otherwise exclude such Section(s) of - the License, but only in their entirety and only with respect to the Combined - Software. + As an exception, if, as a result of your compiling your source code, portions + of this Software are embedded into an Object form of such source code, you + may redistribute such embedded portions in such Object form without complying + with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + In addition, if you combine or link compiled forms of this Software with + software that is licensed under the GPLv2 ("Combined Software") and if a + court of competent jurisdiction determines that the patent provision (Section + 3), the indemnity provision (Section 9) or other Section of the License + conflicts with the conditions of the GPLv2, you may retroactively and + prospectively choose to deem waived or otherwise exclude such Section(s) of + the License, but only in their entirety and only with respect to the Combined + Software. We intend to keep LLVM perpetually open source and available under a permissive license - this fosters the widest adoption of LLVM by @@ -1349,135 +1243,130 @@ particular, LLVM's license is not a "copyleft" license like the GPL. The "Apache 2.0 License with LLVM exceptions" allows you to: -* freely download and use LLVM (in whole or in part) for personal, internal, or - commercial purposes. -* include LLVM in packages or distributions you create. -* combine LLVM with code licensed under every other major open source - license (including BSD, MIT, GPLv2, GPLv3...). -* make changes to LLVM code without being required to contribute it back +- freely download and use LLVM (in whole or in part) for personal, internal, + or commercial purposes. +- include LLVM in packages or distributions you create. +- combine LLVM with code licensed under every other major open source + license (including BSD, MIT, GPLv2, GPLv3\...). +- make changes to LLVM code without being required to contribute it back to the project - contributions are appreciated though! However, it imposes these limitations on you: -* You must retain the copyright notice if you redistribute LLVM: You cannot +- You must retain the copyright notice if you redistribute LLVM: You cannot strip the copyright headers off or replace them with your own. -* Binaries that include LLVM must reproduce the copyright notice (e.g. in an +- Binaries that include LLVM must reproduce the copyright notice (e.g. in an included README file or in an "About" box), unless the LLVM code was added as a by-product of compilation. For example, if an LLVM runtime library like compiler_rt or libc++ was automatically included into your application by the compiler, you do not need to attribute it. -* You can't use our names to promote your products (LLVM derived or not) - +- You can't use our names to promote your products (LLVM derived or not) - though you can make truthful statements about your use of the LLVM code, without implying our sponsorship. -* There's no warranty on LLVM at all. +- There's no warranty on LLVM at all. -We want LLVM code to be widely used, and believe that this provides a model that -is great for contributors and users of the project. For more information about -the Apache 2.0 License, please see the `Apache License FAQ -`_, maintained by the +We want LLVM code to be widely used, and believe that this provides a model +that is great for contributors and users of the project. For more information +about the Apache 2.0 License, please see the [Apache License +FAQ](http://www.apache.org/foundation/license-faq.html), maintained by the Apache Project. -.. _patent license: - -Patents -------- +(patent license)= +### Patents Section 3 of the Apache 2.0 license is a patent grant under which contributors of code to the project contribute the rights to use any of their patents that would otherwise be infringed by that code contribution -(protecting uses of that code). Further, the patent grant is revoked +(protecting uses of that code). Further, the patent grant is revoked from anyone who files a patent lawsuit about code in LLVM - this protects the community by providing a "patent commons" for the code base and reducing the odds of patent lawsuits in general. The license specifically scopes which patents are included with code -contributions. To help explain this, the `Apache License FAQ -`_ explains this scope using +contributions. To help explain this, the [Apache License +FAQ](http://www.apache.org/foundation/license-faq.html) explains this scope using some questions and answers, which we reproduce here for your convenience (for reference, the "ASF" is the Apache Software Foundation, the guidance still -holds though):: - - Q1: If I own a patent and contribute to a Work, and, at the time my - contribution is included in that Work, none of my patent's claims are subject - to Apache's Grant of Patent License, is there a way any of those claims would - later become subject to the Grant of Patent License solely due to subsequent - contributions by other parties who are not licensees of that patent. - - A1: No. - - Q2: If at any time after my contribution, I am able to license other patent - claims that would have been subject to Apache's Grant of Patent License if - they were licensable by me at the time of my contribution, do those other - claims become subject to the Grant of Patent License? - - A2: Yes. - - Q3: If I own or control a licensable patent and contribute code to a specific - Apache product, which of my patent claims are subject to Apache's Grant of - Patent License? - - A3: The only patent claims that are licensed to the ASF are those you own or - have the right to license that read on your contribution or on the - combination of your contribution with the specific Apache product to which - you contributed as it existed at the time of your contribution. No additional - patent claims become licensed as a result of subsequent combinations of your - contribution with any other software. Note, however, that licensable patent - claims include those that you acquire in the future, as long as they read on - your original contribution as made at the original time. Once a patent claim - is subject to Apache's Grant of Patent License, it is licensed under the - terms of that Grant to the ASF and to recipients of any software distributed - by the ASF for any Apache software product whatsoever. - -.. _legacy: - -Legacy License Structure ------------------------- - -.. note:: - The code base was previously licensed under the Terms described here. - We are in the middle of relicensing to a new approach (described above). - More than 99% of all contributions made to LLVM are covered by the Apache-2.0 - WITH LLVM-exception license. A small portion of LLVM code remains exclusively - covered by the legacy license. Contributions after 2024-06-01 are covered - exclusively by the new license._ +holds though): + + Q1: If I own a patent and contribute to a Work, and, at the time my + contribution is included in that Work, none of my patent's claims are subject + to Apache's Grant of Patent License, is there a way any of those claims would + later become subject to the Grant of Patent License solely due to subsequent + contributions by other parties who are not licensees of that patent. + + A1: No. + + Q2: If at any time after my contribution, I am able to license other patent + claims that would have been subject to Apache's Grant of Patent License if + they were licensable by me at the time of my contribution, do those other + claims become subject to the Grant of Patent License? + + A2: Yes. + + Q3: If I own or control a licensable patent and contribute code to a specific + Apache product, which of my patent claims are subject to Apache's Grant of + Patent License? + + A3: The only patent claims that are licensed to the ASF are those you own or + have the right to license that read on your contribution or on the + combination of your contribution with the specific Apache product to which + you contributed as it existed at the time of your contribution. No additional + patent claims become licensed as a result of subsequent combinations of your + contribution with any other software. Note, however, that licensable patent + claims include those that you acquire in the future, as long as they read on + your original contribution as made at the original time. Once a patent claim + is subject to Apache's Grant of Patent License, it is licensed under the + terms of that Grant to the ASF and to recipients of any software distributed + by the ASF for any Apache software product whatsoever. + +(legacy)= +### Legacy License Structure + +```{note} +The code base was previously licensed under the Terms described here. +We are in the middle of relicensing to a new approach (described above). +More than 99% of all contributions made to LLVM are covered by the Apache-2.0 +WITH LLVM-exception license. A small portion of LLVM code remains exclusively +covered by the legacy license. Contributions after 2024-06-01 are covered +exclusively by the new license. +``` We intend to keep LLVM perpetually open source and to use a permissive open source license. The code in -LLVM is available under the `University of Illinois/NCSA Open Source License -`_, which boils down to +LLVM is available under the [University of Illinois/NCSA Open Source +License](http://www.opensource.org/licenses/UoI-NCSA.php), which boils down to this: -* You can freely distribute LLVM. -* You must retain the copyright notice if you redistribute LLVM. -* Binaries derived from LLVM must reproduce the copyright notice (e.g. in an +- You can freely distribute LLVM. +- You must retain the copyright notice if you redistribute LLVM. +- Binaries derived from LLVM must reproduce the copyright notice (e.g. in an included README file). -* You can't use our names to promote your LLVM derived products. -* There's no warranty on LLVM at all. +- You can't use our names to promote your LLVM derived products. +- There's no warranty on LLVM at all. We believe this fosters the widest adoption of LLVM because it **allows commercial products to be derived from LLVM** with few restrictions and without a requirement for making any derived works also open source (i.e. LLVM's license is not a "copyleft" license like the GPL). We suggest that you read the -`License `_ if further +[License](http://www.opensource.org/licenses/UoI-NCSA.php) if further clarification is needed. In addition to the UIUC license, the runtime library components of LLVM -(**compiler_rt, libc++, and libclc**) are also licensed under the `MIT License -`_, which does not contain -the binary redistribution clause. As a user of these runtime libraries, it +(**compiler_rt, libc++, and libclc**) are also licensed under the [MIT +License](http://www.opensource.org/licenses/mit-license.php), which does not contain +the binary redistribution clause. As a user of these runtime libraries, it means that you can choose to use the code under either license (and thus don't need the binary redistribution clause), and as a contributor to the code that you agree that any contributions to these libraries be licensed under both -licenses. We feel that this is important for runtime libraries, because they +licenses. We feel that this is important for runtime libraries, because they are implicitly linked into applications and therefore should not subject those applications to the binary redistribution clause. This also means that it is ok -to move code from (e.g.) libc++ to the LLVM core without concern, but that code +to move code from (e.g.) libc++ to the LLVM core without concern, but that code cannot be moved from the LLVM core to libc++ without the copyright owner's permission. -.. _ai contributions: - -AI generated contributions --------------------------- +(ai contributions)= +### AI generated contributions -This section has moved into a :doc:`separate policy document `. +This section has moved into a {doc}`separate policy document `. diff --git a/llvm/docs/GettingInvolved.md b/llvm/docs/GettingInvolved.md index 4803b31d77811..a172481f81df9 100644 --- a/llvm/docs/GettingInvolved.md +++ b/llvm/docs/GettingInvolved.md @@ -1,149 +1,160 @@ -Getting Involved -================ +# Getting Involved LLVM welcomes contributions of all kinds. To get started, please review the following topics: -.. contents:: - :local: +```{contents} +:local: +``` -.. toctree:: - :hidden: +```{toctree} +:hidden: - Contributing - DeveloperPolicy - CodeReview - SupportPolicy - SphinxQuickstartTemplate - HowToSubmitABug - BugLifeCycle - CodingStandards - GitHub - GitBisecting - GitRepositoryPolicy +Contributing +DeveloperPolicy +CodeReview +SupportPolicy +SphinxQuickstartTemplate +HowToSubmitABug +BugLifeCycle +CodingStandards +GitHub +GitBisecting +GitRepositoryPolicy +``` -:doc:`Contributing` - An overview on how to contribute to LLVM. +* {doc}`Contributing` -:doc:`DeveloperPolicy` - The LLVM project's policy towards developers and their contributions. + An overview on how to contribute to LLVM. -:doc:`CodeReview` - The LLVM project's code-review process. +* {doc}`DeveloperPolicy` -:doc:`SupportPolicy` - The LLVM support policy for core and non-core components. + The LLVM project's policy towards developers and their contributions. + +* {doc}`CodeReview` + + The LLVM project's code-review process. + +* {doc}`SupportPolicy` + + The LLVM support policy for core and non-core components. + +* {doc}`SphinxQuickstartTemplate` -:doc:`SphinxQuickstartTemplate` A template + tutorial for writing new Sphinx documentation. It is meant to be read in source form. -:doc:`HowToSubmitABug` - Instructions for properly submitting information about any bugs you run into - in the LLVM system. +* {doc}`HowToSubmitABug` + + Instructions for properly submitting information about any bugs you run into + in the LLVM system. + +* {doc}`BugLifeCycle` -:doc:`BugLifeCycle` - Describes how bugs are reported, triaged, and closed. + Describes how bugs are reported, triaged, and closed. + +* {doc}`CodingStandards` -:doc:`CodingStandards` Details the LLVM coding standards and provides useful information on writing efficient C++ code. -:doc:`GitHub` +* {doc}`GitHub` + Describes how to use the llvm-project repository and code reviews on GitHub. -:doc:`GitBisecting` - Describes how to use ``git bisect`` on LLVM's repository. +* {doc}`GitBisecting` + + Describes how to use `git bisect` on LLVM's repository. -:doc:`GitRepositoryPolicy` - Collection of policies around the git repositories. +* {doc}`GitRepositoryPolicy` -.. _development-process: + Collection of policies around the git repositories. -Development Process -------------------- +(development-process)= + +## Development Process Information about LLVM's development process. -.. toctree:: - :hidden: +```{toctree} +:hidden: + +Projects +HowToReleaseLLVM +ReleaseProcess +HowToAddABuilder +ReleaseNotes +``` - Projects - HowToReleaseLLVM - ReleaseProcess - HowToAddABuilder - ReleaseNotes +* {doc}`Projects` -:doc:`Projects` How-to guide and templates for new projects that *use* the LLVM infrastructure. The templates (directory organization, Makefiles, and test - tree) allow the project code to be located outside (or inside) the ``llvm/`` + tree) allow the project code to be located outside (or inside) the `llvm/` tree, while using LLVM header files and libraries. -:doc:`HowToReleaseLLVM` +* {doc}`HowToReleaseLLVM` + This is a guide to preparing LLVM releases. Most developers can ignore it. -:doc:`ReleaseProcess` +* {doc}`ReleaseProcess` + This is a guide to validate a new release, during the release process. Most developers can ignore it. -:doc:`HowToAddABuilder` - Instructions for adding new builder to LLVM buildbot master. +* {doc}`HowToAddABuilder` + + Instructions for adding new builder to LLVM buildbot master. + +* {doc}`Release notes for the current release ` -:doc:`Release notes for the current release ` - This describes new features, known bugs, and other limitations. + This describes new features, known bugs, and other limitations. -.. _lists-forums: +(lists-forums)= -Forums & Mailing Lists ----------------------- +## Forums & Mailing Lists If you can't find what you need in these docs, try consulting the Discourse forums. There are also commit mailing lists for all commits to the LLVM Project. -The :doc:`CodeOfConduct` applies to all these forums and mailing lists. +The {doc}`CodeOfConduct` applies to all these forums and mailing lists. + +* [LLVM Discourse](https://discourse.llvm.org/) -`LLVM Discourse`__ The forums for all things LLVM and related sub-projects. There are categories and subcategories for a wide variety of areas within LLVM. You can also view tags or search for a specific topic. - .. __: https://discourse.llvm.org/ +* [Commits Archive (llvm-commits)](http://lists.llvm.org/pipermail/llvm-commits/) -`Commits Archive (llvm-commits)`__ This list contains all commit messages that are made when LLVM developers commit code changes to the repository. It also serves as a forum for patch review (i.e., send patches here). It is useful for those who want to stay on the bleeding edge of LLVM development. This list is very high volume. - .. __: http://lists.llvm.org/pipermail/llvm-commits/ +* [Bugs & Patches Archive (llvm-bugs)](http://lists.llvm.org/pipermail/llvm-bugs/) -`Bugs & Patches Archive (llvm-bugs)`__ This list gets emailed every time a bug is opened and closed. It is higher volume than the LLVM-dev list. - .. __: http://lists.llvm.org/pipermail/llvm-bugs/ +* [LLVM Announcements](https://discourse.llvm.org/c/announce/46) -`LLVM Announcements`__ If you just want project-wide announcements such as releases, developers meetings, or blog posts, then you should check out the Announcement category on LLVM Discourse. - .. __: https://discourse.llvm.org/c/announce/46 - -.. _online-sync-ups: +(online-sync-ups)= -Online Sync-Ups ---------------- +## Online Sync-Ups A number of regular calls are organized on specific topics. It should be expected that the range of topics will change over time. At the time of writing, the following sync-ups are organized. -The :doc:`CodeOfConduct` applies to all online sync-ups. +The {doc}`CodeOfConduct` applies to all online sync-ups. If you'd like to organize a new sync-up, please add the info in the table below. Please also create a calendar event for it and invite calendar@llvm.org -to the event, so that it'll show up on the :ref:`llvm-community-calendar`. -Please see :ref:`llvm-community-calendar-host-guidance` for more guidance on +to the event, so that it'll show up on the {ref}`llvm-community-calendar`. +Please see {ref}`llvm-community-calendar-host-guidance` for more guidance on what to add to your calendar invite. -.. list-table:: LLVM regular sync-up calls - :widths: 25 25 25 25 - :header-rows: 1 +```{list-table} LLVM regular sync-up calls +:widths: 25 25 25 25 +:header-rows: 1 * - Topic - Frequency @@ -151,116 +162,116 @@ what to add to your calendar invite. - Minutes/docs link * - Loop Optimization Working Group - Every first Wednesday of the month - - `ics <./_static/LoopOptWG_invite.ics>`__ - - `Minutes/docs `__ + - [ics](./_static/LoopOptWG_invite.ics) + - [Minutes/docs](https://docs.google.com/document/d/1sdzoyB11s0ccTZ3fobqctDpgJmRoFcz0sviKxqczs4g/edit) * - RISC-V - Every 2 weeks on Thursday - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/lowrisc.org_0n5pkesfjcnp0bh5hps1p0bd80%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/b/1?cid=bG93cmlzYy5vcmdfMG41cGtlc2ZqY25wMGJoNWhwczFwMGJkODBAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) + - [Minutes/docs](https://docs.google.com/document/d/1G3ocHm2zE6AYTS2N3_3w2UxFnSEyKkcF57siLWe-NVs) * - ML Guided Compiler Optimizations - Monthly - - - `Minutes/docs `__ - * - `LLVM security group `__ + - [Minutes/docs](https://docs.google.com/document/d/1JecbplF09l3swTjze-UVeLh4L48svJxGVy4mz_e9Rhs/edit?usp=gmail#heading=h.ts9cmcjbir1j) + * - [LLVM security group](https://llvm.org/docs/Security.html) - Monthly, every 3rd Tuesday - - `ics `__ - `gcal `__ - - `Minutes/docs `__ - * - `CIRCT `__ + - [ics](https://calendar.google.com/calendar/ical/eoh3m9k1l6vqbd1fkp94fv5q74%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=eoh3m9k1l6vqbd1fkp94fv5q74%40group.calendar.google.com) + - [Minutes/docs](https://discourse.llvm.org/t/llvm-security-group-public-sync-ups/62735) + * - [CIRCT](https://github.com/llvm/circt) - Weekly, on Wednesday - - - `Minutes/docs `__ + - [Minutes/docs](https://docs.google.com/document/d/1fOSRdyZR2w75D87yU2Ma9h2-_lEPL4NxvhJGJd-s5pk/edit#heading=h.mulvhjtr8dk9) * - flang - - Multiple meeting series, `documented here `__ + - Multiple meeting series, [documented here](https://github.com/llvm/llvm-project/blob/main/flang/docs/GettingInvolved.md#calls) - - * - OpenMP - - Multiple meeting series, `documented here `__ + - Multiple meeting series, [documented here](https://openmp.llvm.org/docs/SupportAndFAQ.html) - - * - LLVM Alias Analysis - Every 4 weeks on Tuesdays - - `ics `__ - - `Minutes/docs `__ + - [ics](http://lists.llvm.org/pipermail/llvm-dev/attachments/20201103/a3499a67/attachment-0001.ics) + - [Minutes/docs](https://docs.google.com/document/d/17U-WvX8qyKc3S36YUKr3xfF-GHunWyYowXbxEdpHscw) * - LLVM Pointer Authentication - Every month on Mondays - - `ics `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/fr1qtmrmt2s9odufjvurkb6j70%40group.calendar.google.com/public/basic.ics) + - [Minutes/docs](https://discourse.llvm.org/t/llvm-pointer-authentication-sync-ups/62661) * - LLVM Embedded Toolchains - Every 4 weeks on Thursdays - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://drive.google.com/file/d/1uNa-PFYkhAfT83kR2Nc4Fi706TAQFBEL/view?usp=sharing) + [gcal](https://calendar.google.com/calendar/u/0?cid=ZDQyc3ZlajJmbjIzNG1jaTUybjFsdjA2dWNAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) + - [Minutes/docs](https://docs.google.com/document/d/1GahxppHJ7o1O_fn1Mbidu1DHEg7V2aOr92LXCtNV1_o/edit?usp=sharing) * - Clang C and C++ Language Working Group - 1st and 3rd Wednesday of the month - - - `Minutes/docs `__ + - [Minutes/docs](https://docs.google.com/document/d/1x5-RbOC6-jnI_NcJ9Dp4pSmGhhNe7lUevuWUIB46TeM/edit?usp=sharing) * - LLVM SPIR-V Backend Working Group - Every week on Monday - - - `Meeting details/agenda `__ + - [Meeting details/agenda](https://docs.google.com/document/d/1UjX-LAwPjJ75Nmb8a5jz-Qrm-pPtKtQw0k1S1Lop9jU/edit?usp=sharing) * - SYCL Upstream Working Group - Every 2 weeks on Mondays - - `gcal `__ - - `Meeting details/agenda `__ + - [gcal](https://calendar.google.com/calendar/u/0?cid=c3ljbC5sbHZtLndnQGdtYWlsLmNvbQ) + - [Meeting details/agenda](https://docs.google.com/document/d/1ivYDSn_5ChTeiZ7TiO64WC_jYJnGwAUiT9Ngi9cAdFU/edit?usp=sharing) * - Formal Semantics Working Group - Every 2 weeks on Mondays - - `gcal `__ - - `Meeting notes `__ + - [gcal](https://calendar.google.com/calendar/event?eid=aHJpNnNzb3Zia3FtNzNuYjdpbmJtZG5nZGVfMjAyNjA1MDRUMTUzMDAwWiBjYWxlbmRhckBsbHZtLm9yZw) + - [Meeting notes](https://docs.google.com/document/d/1muS2gZ7PUhbypbl0bmjb2J-UUzY7K8AO9a8RHVw_Mjo/edit?tab=t.0) * - Vectorizer Improvement Working Group - 3rd Tuesday of the month - - `ics `__ - - `Meeting details/agenda: `__ - * - `LLVM Qualification Working Group `__ + - [ics](https://www.icloud.com/iclouddrive/032PeZzdN6U4uRMwJRJPrS2Lw#Vectorizer_Improvements) + - [Meeting details/agenda:](https://docs.google.com/document/d/1Glzy2JiWuysbD-HBWGUOkZqT09GJ4_Ljodr0lXD5XfQ/edit) + * - [LLVM Qualification Working Group](https://llvm.org/docs/QualGroup.html) - Monthly: 2nd Tuesday (EU/Asia) and 2nd Friday JST / Thursday (Americas) - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/f731f5b57956a132f6c553ed30f496b16e1018f831be13eb6c4b896c108a6626%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=f731f5b57956a132f6c553ed30f496b16e1018f831be13eb6c4b896c108a6626%40group.calendar.google.com&ctz=Asia%2FTokyo) + - [Minutes/docs](https://discourse.llvm.org/t/llvm-qualification-wg-sync-ups-meeting-minutes/87148) * - MLIR C/C++ Frontend Working Group - Monthly, usually 1st Monday of the month - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/jvceakm3kbpku3f4jrsv1lkigo%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=jvceakm3kbpku3f4jrsv1lkigo%40group.calendar.google.com&ctz=America%2FLos_Angeles) + - [Minutes/docs](https://docs.google.com/document/d/1-flHK3TjQUrkSO2Fdt4webZ2zCyeXxpTLMiRQbMW7hE) * - ClangIR Upstreaming Coordination Meeting - Every 2 weeks on Mondays - - `ics `__ - `gcal `__ + - [ics](https://calendar.google.com/calendar/ical/c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=c_673c6cd64474c0aff173bf8fa609559f93d654e0984d9d91d71abd32d28c0486%40group.calendar.google.com&ctz=America%2FLos_Angeles) - * - GlobalISel - Every 2nd Tuesday of the month - - `gcal `__ - - `Meeting details/agenda `__ + - [gcal](https://calendar.google.com/calendar/u/0?cid=YWZjNzhmMzE4MDNlNTAyNGY1NmE1MDIyODY0YTYwZmJmYzRjYTEwNTE1NmUxODA2NzBkYTliY2ZhYTVkNjk0NUBncm91cC5jYWxlbmRhci5nb29nbGUuY29t) + - [Meeting details/agenda](https://docs.google.com/document/d/1Ry8O4-Tm5BFj9AMjr8qTQFU80z-ptiNQ62687NaIvLs/edit?usp=sharing) * - Clang Static Analysis Working Group - Every 2 weeks on Tuesdays - - `ics `__ - `gcal `__ - - `Meeting notes `__ + - [ics](https://calendar.google.com/calendar/ical/9c23f3a54dbb4fbac3801c50094fc43118a37c186f5c65b2898cd0fc251c8610%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/u/0?cid=OWMyM2YzYTU0ZGJiNGZiYWMzODAxYzUwMDk0ZmM0MzExOGEzN2MxODZmNWM2NWIyODk4Y2QwZmMyNTFjODYxMEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t) + - [Meeting notes](https://docs.google.com/document/d/1ijI8pWeyidmhFOd5Ndgvr5AziZwrMCbt2oUehv8qHmw/edit?usp=sharing) * - LLVM Memory Safety Working Group - Every 4 weeks on Thursdays - - `ics `__ - `gcal `__ - - `Meeting notes `__ - * - `Lifetime Safety Breakout Group `__ + - [ics](https://calendar.google.com/calendar/ical/2d77f9a2624d18cd46e5299d15cc0fa0c90dca53fd68802261d52121d21a0573%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/u/0?cid=MmQ3N2Y5YTI2MjRkMThjZDQ2ZTUyOTlkMTVjYzBmYTBjOTBkY2E1M2ZkNjg4MDIyNjFkNTIxMjFkMjFhMDU3M0Bncm91cC5jYWxlbmRhci5nb29nbGUuY29t) + - [Meeting notes](https://docs.google.com/document/d/1DkCik6BTnO-cox_9y_BTKzPaJJOo_hBxiNFP3lInvOM/edit?usp=sharing) + * - [Lifetime Safety Breakout Group](https://github.com/orgs/llvm/projects/39) - Every 2 weeks on Wednesdays - - `ics `__ - `gcal `__ - - `Meeting notes `__ + - [ics](https://calendar.google.com/calendar/ical/2d77f9a2624d18cd46e5299d15cc0fa0c90dca53fd68802261d52121d21a0573%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/u/0?cid=MmQ3N2Y5YTI2MjRkMThjZDQ2ZTUyOTlkMTVjYzBmYTBjOTBkY2E1M2ZkNjg4MDIyNjFkNTIxMjFkMjFhMDU3M0Bncm91cC5jYWxlbmRhci5nb29nbGUuY29t) + - [Meeting notes](https://docs.google.com/document/d/1DkCik6BTnO-cox_9y_BTKzPaJJOo_hBxiNFP3lInvOM/edit?tab=t.nvvd6cfloi81) +``` For event owners, our Discord bot also supports sending automated announcements -of upcoming sync-ups. Please see the :ref:`discord-bot-event-pings` section for +of upcoming sync-ups. Please see the {ref}`discord-bot-event-pings` section for info. -Past online sync-ups -^^^^^^^^^^^^^^^^^^^^ +### Past online sync-ups Some online sync-ups are no longer happening. We keep pointing to them here to keep track of the meeting notes and in case anyone would want to revive them in the future. -.. list-table:: LLVM no-longer-happening sync-up calls - :widths: 25 25 25 25 - :header-rows: 1 +```{list-table} LLVM no-longer-happening sync-up calls +:widths: 25 25 25 25 +:header-rows: 1 * - Topic - Frequency @@ -268,42 +279,42 @@ the future. - Minutes/docs link * - Scalable Vectors and Arm SVE - Monthly, every 3rd Tuesday - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/bjms39pe6k6bo5egtsp7don414%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/u/0/embed?src=bjms39pe6k6bo5egtsp7don414@group.calendar.google.com) + - [Minutes/docs](https://docs.google.com/document/d/1UPH2Hzou5RgGT8XfO39OmVXKEibWPfdYLELSaHr3xzo/edit) * - MemorySSA in LLVM - Every 8 weeks on Mondays - - `ics `__ - `gcal `__ - - `Minutes/docs `__ + - [ics](https://calendar.google.com/calendar/ical/c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=c_1mincouiltpa24ac14of14lhi4%40group.calendar.google.com) + - [Minutes/docs](https://docs.google.com/document/d/1-uEEZfmRdPThZlctOq9eXlmUaSSAAi8oKxhrPY_lpjk/edit#) * - Vector Predication - Every 2 weeks on Tuesdays, 3pm UTC - - - `Minutes/docs `__ - * - `MLIR `__ design meetings + - [Minutes/docs](https://docs.google.com/document/d/1q26ToudQjnqN5x31zk8zgq_s0lem1-BF8pQmciLa4k8/edit?usp=sharing) + * - [MLIR](https://mlir.llvm.org) design meetings - Weekly, on Thursdays - - - `Minutes/docs `__ + - [Minutes/docs](https://docs.google.com/document/d/1y_9f1AbfgcoVdJh4_aM6-BaSHvrHl8zuA5G4jv_94K8/edit#heading=h.cite1kolful9) +``` -.. _office-hours: +(office-hours)= -Office hours ------------- +## Office hours A number of experienced LLVM contributors make themselves available for a chat on a regular schedule, to anyone who is looking for some guidance. Please find the list of who is available when, through which medium, and what their area of expertise is. Don't be too shy to dial in! -Office hours are also listed on the :ref:`llvm-community-calendar`. Of course, +Office hours are also listed on the {ref}`llvm-community-calendar`. Of course, people take time off from time to time, so if you dial in and you don't find anyone present, chances are they happen to be off that day. -The :doc:`CodeOfConduct` applies to all office hours. +The {doc}`CodeOfConduct` applies to all office hours. -.. list-table:: LLVM office hours - :widths: 15 40 15 15 15 - :header-rows: 1 +```{list-table} LLVM office hours +:widths: 15 40 15 15 15 +:header-rows: 1 * - Name - In-scope topics @@ -315,92 +326,92 @@ The :doc:`CodeOfConduct` applies to all office hours. submitting talks; and other general LLVM-related topics. Arm/AArch64 codegen. LLVM security group. LLVM Office Hours. - Every 2nd and 4th Wednesday of the month at 9.30am CET, for 30 minutes. - `ics `__ - - `Jitsi `__ + [ics](https://user.fm/calendar/v1-eac36694e3024854a9402da023f9e0fa/Kristof%20Beyls%20LLVM%20office%20hours.ics) + - [Jitsi](https://meet.jit.si/KristofBeylsLLVMOfficeHour) - English, Flemish, Dutch * - Alina Sbirlea - General questions on how to contribute to LLVM; women in compilers; MemorySSA, BatchAA, various loop passes, new pass manager. - Monthly, 2nd Tuesdays, 10.00am PT/7:00pm CET, for 30 minutes. - `ics `__ - `gcal `__ - - `GoogleMeet `__ + [ics](https://calendar.google.com/calendar/ical/c_pm6e7160iq7n5fcm1s6m3rjhh4%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=c_pm6e7160iq7n5fcm1s6m3rjhh4%40group.calendar.google.com) + - [GoogleMeet](https://meet.google.com/hhk-xpdj-gvx) - English, Romanian * - Aaron Ballman (he/him) - Clang internals; frontend attributes; clang-tidy; clang-query; AST matchers - Monthly, 2nd Monday and 3rd Friday of the month at 10:00am Eastern and again at 2:00pm Eastern, for 60 minutes. - `ics `__ - `gcal `__ - - `GoogleMeet `__ + [ics](https://calendar.google.com/calendar/ical/npgke5dug0uliud0qapptmps58%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=npgke5dug0uliud0qapptmps58%40group.calendar.google.com) + - [GoogleMeet](https://meet.google.com/xok-iqne-gmi) - English, Norwegian (not fluently) * - Johannes Doerfert (he/him) - OpenMP, LLVM-IR, interprocedural optimizations, Attributor, workshops, research, ... - Every week, Wednesdays 9:30am (Pacific Time), for 1 hour. - `ics `__ - - `MS Teams `__ + [ics](https://drive.google.com/file/d/1E_QkRvirmdJzlXf2EKBUX-v8Xj7-eW3v/view?usp=sharing) + - [MS Teams](https://teams.microsoft.com/l/meetup-join/19%3ameeting_MTMxNzU4MWYtYzViNS00OTM2LWJmNWQtMjg5ZWFhNGVjNzgw%40thread.v2/0?context=%7b%22Tid%22%3a%22a722dec9-ae4e-4ae3-9d75-fd66e2680a63%22%2c%22Oid%22%3a%22885bda30-ce8e-46db-aa7e-15de0474831a%22%7d) - English, German * - Tobias Grosser - General questions on how to contribute to LLVM/MLIR, Polly, Loop Optimization, FPL, Research in LLVM, PhD in CS, Summer of Code. - Monthly, last Monday of the month at 18:00 London time (typically 9am PT), for 30 minutes. - - `Video Call `__ + - [Video Call](https://meet.grosser.science/LLVMOfficeHours) - English, German, Spanish, French * - Alexey Bader - SYCL compiler, offload tools, OpenCL and SPIR-V, how to contribute. - Monthly, 2nd Monday of the month at 9:30am PT, for 30 minutes. - - `GoogleMeet `__ + - [GoogleMeet](https://meet.google.com/pdz-xhns-uus) - English, Russian * - Maksim Panchenko - BOLT internals, IR, new passes, proposals, etc. - Monthly, 2nd Wednesday of the month at 11:00am PT, for 30 minutes. - - `Zoom `__ + - [Zoom](https://fb.zoom.us/j/97065697120?pwd=NTFaUWJjZW9uVkJuaVlPTE9qclE3dz09) - English, Russian * - Quentin Colombet (he/him) - LLVM/MLIR; Codegen (Instruction selection (GlobalISel/SDISel), Machine IR, Register allocation, etc.); Optimizations; MCA - Monthly, 1st Wednesday of the month at 8.00am PT, for 30 minutes. - `ics `__ - `gcal `__ - - `Google meet `__ + [ics](https://calendar.google.com/calendar/ical/48c4ad60290a4df218e51e1ceec1106fe317b0ebc76938d9273592053f38204e%40group.calendar.google.com/public/basic.ics) + [gcal](https://calendar.google.com/calendar/embed?src=48c4ad60290a4df218e51e1ceec1106fe317b0ebc76938d9273592053f38204e%40group.calendar.google.com) + - [Google meet](https://meet.google.com/cbz-grrp-obs) - English, French * - Phoebe Wang (she/her) - X86 backend, General questions to X86, women in compilers. - Monthly, 3rd Wednesday of the month at 8:30am Beijing time, for 30 minutes. - - `MS Teams `__ + - [MS Teams](https://teams.microsoft.com/l/meetup-join/19%3ameeting_NWQ0MjU0NjYtZjUyMi00YTU3LThmM2EtY2Y2YTE4NGM3NmFi%40thread.v2/0?context=%7b%22Tid%22%3a%2246c98d88-e344-4ed4-8496-4ed7712e255d%22%2c%22Oid%22%3a%227b309d9c-a9bb-44c8-a940-ab97eef42d4d%22%7d) - English, Chinese * - Amara Emerson - GlobalISel questions. - Monthly, 4th Wednesday of the month at 9:30am PT, for 30 minutes. - - `Google meet `__ + - [Google meet](https://meet.google.com/pdd-dibg-cwv) - English * - Maksim Levental and Jeremy Kun - - MLIR newcomers and general discussion (`livestreamed `__) + - MLIR newcomers and general discussion ([livestreamed](https://www.youtube.com/playlist?list=PLhxO86S3jsX2k7kOhZaV-qKWm8tNsUdAE)) - Every two weeks, Wednesdays at 2:00pm US Pacific, for 90 minutes. - - Livestream chat or `Google meet `__ + - Livestream chat or [Google meet](https://meet.google.com/wit-tvzc-dwc) - English * - Renato Golin - General LLVM, MLIR & Linalg, distributed computing, research, socials. - Every first Friday of the month, 14:00 UK time, for 60 minutes. - - `Google meet `__ + - [Google meet](https://meet.google.com/jps-twgq-ivz) - English, Portuguese +``` For event owners, our Discord bot also supports sending automated announcements -of upcoming office hours. Please see the :ref:`discord-bot-event-pings` section +of upcoming office hours. Please see the {ref}`discord-bot-event-pings` section for info. -Guidance for office hours hosts -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Guidance for office hours hosts * If you're interested in becoming an office hours host, please add your information to the list above. Please create a calendar event for it and invite calendar@llvm.org to the event so that it'll show up on the - :ref:`llvm-community-calendar`. - Please see :ref:`llvm-community-calendar-host-guidance` for more guidance on + {ref}`llvm-community-calendar`. + Please see {ref}`llvm-community-calendar-host-guidance` for more guidance on what to add to your calendar invite. * When starting an office hours session, if you haven't set up the Discord bot integration, consider typing something like "*Hi, I'm available for chats in the next half hour at* video chat URL. *I'm looking forward to having conversations on the video chat or here.*" on the - `#office-hours Discord channel `__. + [#office-hours Discord channel](https://discord.com/channels/636084430946959380/976196303681896538). Doing this can help: * overcome potential anxiety to call in for a first time, @@ -411,66 +422,66 @@ Guidance for office hours hosts from the list above. -Discord -------- +## Discord Users and developers of the LLVM project (including subprojects such as Clang) -can be found on the community's `Discord `_ +can be found on the community's [Discord](https://discord.gg/xS7Z362) chat server. The server is actively moderated. The #buildbot-status channel has a bot for -`LLVM buildbot `_ status changes. The +[LLVM buildbot](http://lab.llvm.org/buildbot/#/console) status changes. The bot will update the channel with a link to a build bot when a build goes from passing to failing and again when the build goes from failing back to passing. It is a great way to actively monitor the status of the build. The bot also supports @mention-ing you when your email appears on a blamelist. -For more details, DM ``help`` to the bot. +For more details, DM `help` to the bot. -.. _discord-bot-event-pings: +(discord-bot-event-pings)= -Discord bot event pings -^^^^^^^^^^^^^^^^^^^^^^^ +### Discord bot event pings Our Discord bot supports automatically sending messages about upcoming events -on `the LLVM community calendar `_ +on [the LLVM community calendar](https://calendar.google.com/calendar/u/0/embed?src=calendar@llvm.org) to Discord. This behavior is controlled on a per-event basis, by metadata in the event's description. Each piece of metadata should be on its own line in the event description. The currently supported metadata is: -- ``discord-bot-event-type`` - **Required**. Specifies the event type. Valid - values are ``office-hours`` and ``sync-up``. -- ``discord-bot-channels-to-mention`` - **Sometimes required**. A +- `discord-bot-event-type` - **Required**. Specifies the event type. Valid + values are `office-hours` and `sync-up`. +- `discord-bot-channels-to-mention` - **Sometimes required**. A comma-separated list of Discord channels to post notifications in. If your - ``discord-bot-event-type`` is ``office-hours``, the ``#office-hours`` channel + `discord-bot-event-type` is `office-hours`, the `#office-hours` channel will be implicitly appended to this list (ergo, you don't need to specify this item). Otherwise, you must specify a value here. -- ``discord-bot-mention`` - **Optional**. A comma-separated list of people to +- `discord-bot-mention` - **Optional**. A comma-separated list of people to ping on each event notification. All names mentioned must be Discord - usernames, and must have a leading ``@``. e.g., ``@foo, @bar``. -- ``discord-bot-reminder-time-before-start`` - **Optional**. The number of + usernames, and must have a leading `@`. e.g., `@foo, @bar`. +- `discord-bot-reminder-time-before-start` - **Optional**. The number of minutes before the beginning of an event to send a ping. This should be formatted as an integer. Defaults to 30. -- ``discord-bot-message`` - **Optional**. Text to append to all event pings. +- `discord-bot-message` - **Optional**. Text to append to all event pings. -An example of an event description with valid metadata is:: +An example of an event description with valid metadata is: - Regular office hours to chat with people about LLVM! We can help with - questions, troubleshooting bugs, etc. +```text +Regular office hours to chat with people about LLVM! We can help with +questions, troubleshooting bugs, etc. - discord-bot-channels-to-mention: #beginners, #foo - discord-bot-event-type: office-hours - discord-bot-mention: @gburgessiv, @bar - discord-bot-message: Come join us for office hours! - discord-bot-reminder-time-before-start: 5 +discord-bot-channels-to-mention: #beginners, #foo +discord-bot-event-type: office-hours +discord-bot-mention: @gburgessiv, @bar +discord-bot-message: Come join us for office hours! +discord-bot-reminder-time-before-start: 5 +``` This metadata will prompt the Discord bot to: -* send pings 5 minutes before the given event starts, mentioning ``@gburgessiv`` - and ``@bar`` in the ping -* send the pings to the ``#beginners``, ``#foo``, and ``#office-hours`` channels +* send pings 5 minutes before the given event starts, mentioning `@gburgessiv` + and `@bar` in the ping +* send the pings to the `#beginners`, `#foo`, and `#office-hours` channels * include the text "Come join us for office hours!" in the ping A few minutes before sending a ping, the bot will double-check that the event @@ -479,79 +490,82 @@ hasn't been cancelled. If you need help troubleshooting, or have feature requests/questions, please feel free to ping @gburgessiv! -.. _meetups-social-events: +(meetups-social-events)= -Meetups and social events -------------------------- +## Meetups and social events -.. toctree:: - :hidden: +```{toctree} +:hidden: - MeetupGuidelines +MeetupGuidelines +``` -Besides developer `meetings and conferences `_, +Besides developer [meetings and conferences](https://llvm.org/devmtg/), there are several user groups called -`LLVM Socials `_. We greatly encourage you to +[LLVM Socials](https://www.meetup.com/pro/llvm/). We greatly encourage you to join one in your city. Or start a new one if there is none: -:doc:`MeetupGuidelines` +{doc}`MeetupGuidelines` -.. _community-proposals: +(community-proposals)= -Community wide proposals ------------------------- +## Community wide proposals Proposals for large-scale changes in how the community behaves and how the work flow can be better. -.. toctree:: - :hidden: +```{toctree} +:hidden: + +Proposals/GitHubMove +BugpointRedesign +Proposals/TestSuite +Proposals/VariableNames +Proposals/VectorPredication +``` - Proposals/GitHubMove - BugpointRedesign - Proposals/TestSuite - Proposals/VariableNames - Proposals/VectorPredication +* {doc}`Proposals/GitHubMove` -:doc:`Proposals/GitHubMove` - Proposal to move from SVN/Git to GitHub. + Proposal to move from SVN/Git to GitHub. -:doc:`BugpointRedesign` - Design doc for a redesign of the Bugpoint tool. +* {doc}`BugpointRedesign` -:doc:`Proposals/TestSuite` - Proposals for additional benchmarks/programs for llvm's test-suite. + Design doc for a redesign of the Bugpoint tool. -:doc:`Proposals/VariableNames` - Proposal to change the variable names coding standard. +* {doc}`Proposals/TestSuite` -:doc:`Proposals/VectorPredication` - Proposal for predicated vector instructions in LLVM. + Proposals for additional benchmarks/programs for llvm's test-suite. -.. _llvm-community-calendar: +* {doc}`Proposals/VariableNames` -LLVM community calendar ------------------------ + Proposal to change the variable names coding standard. + +* {doc}`Proposals/VectorPredication` + + Proposal for predicated vector instructions in LLVM. + +(llvm-community-calendar)= + +## LLVM community calendar We aim to maintain a public calendar view of all events happening in the LLVM -community such as :ref:`online-sync-ups` and :ref:`office-hours`. The calendar +community such as {ref}`online-sync-ups` and {ref}`office-hours`. The calendar can be found at https://calendar.google.com/calendar/u/0/embed?src=calendar@llvm.org and can also be seen inline below: -.. raw:: html - - +```{raw} html + +``` Note that the web view of the LLVM community calendar shows events in Coordinated Universal Time (UTC). If you use Google Calendar, consider subscribing to it with the + button in the bottom-right corner to view all events in your local time zone alongside your other calendars. -.. _llvm-community-calendar-host-guidance: +(llvm-community-calendar-host-guidance)= -Guidance on what to put into LLVM community calendar invites -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +### Guidance on what to put into LLVM community calendar invites To add your event, create a calendar event for it and invite calendar@llvm.org on it. Your event should then show up on the community calendar. @@ -560,7 +574,7 @@ Please put the following pieces of information in your calendar invite: * Write a single paragraph describing what the event is about. Include things such as who the event is for and what sort of topics are discussed. -* State explicitly that the :doc:`CodeOfConduct` applies to this event. +* State explicitly that the {doc}`CodeOfConduct` applies to this event. * Make it clear who: * the organizer is. @@ -573,25 +587,25 @@ Please put the following pieces of information in your calendar invite: * If you're hosting a sync-up or office hours event and would like it to be announced by the Discord bot, add the relevant metadata (full descriptions - available in the :ref:`discord-bot-event-pings` section). + available in the {ref}`discord-bot-event-pings` section). An example invite looks as follows -.. code-block:: none - - This event is a meetup for all developers of LLDB. Meeting agendas are posted - on Discourse before the event. +```text +This event is a meetup for all developers of LLDB. Meeting agendas are posted +on Discourse before the event. - Attendees must adhere to the LLVM Code of Conduct - (https://llvm.org/docs/CodeOfConduct.html). For any Code of Conduct reports, - please contact the organizers and also email conduct@llvm.org. +Attendees must adhere to the LLVM Code of Conduct +(https://llvm.org/docs/CodeOfConduct.html). For any Code of Conduct reports, +please contact the organizers and also email conduct@llvm.org. - Agenda/Meeting Minutes: Link to minutes +Agenda/Meeting Minutes: Link to minutes - Organizer(s): First Surname (name@email.com) +Organizer(s): First Surname (name@email.com) - discord-bot-channels-to-mention: #lldb - discord-bot-event-type: sync-up - discord-bot-mention: @host-username, @another-host - discord-bot-message: Come join us to chat about LLDB! - discord-bot-reminder-time-before-start: 30 +discord-bot-channels-to-mention: #lldb +discord-bot-event-type: sync-up +discord-bot-mention: @host-username, @another-host +discord-bot-message: Come join us to chat about LLDB! +discord-bot-reminder-time-before-start: 30 +``` diff --git a/llvm/docs/GettingStarted.md b/llvm/docs/GettingStarted.md index fffa5a0d8280e..ddc5fb73610e1 100644 --- a/llvm/docs/GettingStarted.md +++ b/llvm/docs/GettingStarted.md @@ -1,12 +1,10 @@ -==================================== -Getting Started with the LLVM System -==================================== +# Getting Started with the LLVM System -.. contents:: - :local: +```{contents} +:local: +``` -Overview -======== +## Overview Welcome to the LLVM project! @@ -16,127 +14,121 @@ files needed to process intermediate representations and convert them into object files. Tools include an assembler, disassembler, bitcode analyzer, and bitcode optimizer. It also contains basic regression tests. -C-like languages use the `Clang `_ front end. This +C-like languages use the [Clang](https://clang.llvm.org/) front end. This component compiles C, C++, Objective-C, and Objective-C++ code into LLVM bitcode -- and from there into object files, using LLVM. Other components include: -the `libc++ C++ standard library `_, -the `LLD linker `_, and more. +the [libc++ C++ standard library](https://libcxx.llvm.org), +the [LLD linker](https://lld.llvm.org), and more. -.. _sources: +(sources)= +## Getting the Source Code and Building LLVM -Getting the Source Code and Building LLVM -========================================= +1. Check out LLVM (including subprojects like Clang): -#. Check out LLVM (including subprojects like Clang): - - * ``git clone https://github.com/llvm/llvm-project.git`` + * `git clone https://github.com/llvm/llvm-project.git` * Or, on Windows: - ``git clone --config core.autocrlf=false - https://github.com/llvm/llvm-project.git`` + `git clone --config core.autocrlf=false https://github.com/llvm/llvm-project.git` * To save storage and speed up the checkout time, you may want to do a - `shallow clone `_. + [shallow clone](https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---depthltdepthgt). For example, to get the latest revision of the LLVM project, use - ``git clone --depth 1 https://github.com/llvm/llvm-project.git`` + `git clone --depth 1 https://github.com/llvm/llvm-project.git` * You are likely not interested in the user branches in the repo (used for stacked pull requests and reverts), you can filter them from your `git fetch` (or `git pull`) with this configuration: - .. code-block:: console - - git config --add remote.origin.fetch '^refs/heads/users/*' - git config --add remote.origin.fetch '^refs/heads/revert-*' - -#. Configure and build LLVM and Clang: + ```console + git config --add remote.origin.fetch '^refs/heads/users/*' + git config --add remote.origin.fetch '^refs/heads/revert-*' + ``` +1. Configure and build LLVM and Clang: - * ``cd llvm-project`` - * ``cmake -S llvm -B build -G [options]`` + * `cd llvm-project` + * `cmake -S llvm -B build -G [options]` Some common build system generators are: - * ``Ninja`` --- for generating `Ninja `_ + * `Ninja` --- for generating [Ninja](https://ninja-build.org) build files. Most llvm developers use Ninja. - * ``Unix Makefiles`` --- for generating make-compatible parallel makefiles. - * ``Visual Studio`` --- for generating Visual Studio projects and + * `Unix Makefiles` --- for generating make-compatible parallel makefiles. + * `Visual Studio` --- for generating Visual Studio projects and solutions. - * ``Xcode`` --- for generating Xcode projects. + * `Xcode` --- for generating Xcode projects. - * See the `CMake docs - `_ + * See the [CMake docs](https://cmake.org/cmake/help/latest/manual/cmake-generators.7.html) for a more comprehensive list. Some common options: - * ``-DLLVM_ENABLE_PROJECTS='...'`` --- A semicolon-separated list of the LLVM + * `-DLLVM_ENABLE_PROJECTS='...'` --- A semicolon-separated list of the LLVM subprojects you'd like to additionally build. Can include any of: clang, clang-tools-extra, lldb, lld, polly, or cross-project-tests. For example, to build LLVM, Clang, and LLD, use - ``-DLLVM_ENABLE_PROJECTS="clang;lld"``. + `-DLLVM_ENABLE_PROJECTS="clang;lld"`. - * ``-DCMAKE_INSTALL_PREFIX=directory`` --- Specify for *directory* the full + * `-DCMAKE_INSTALL_PREFIX=directory` --- Specify for *directory* the full pathname of where you want the LLVM tools and libraries to be installed - (default ``/usr/local``). + (default `/usr/local`). - * ``-DCMAKE_BUILD_TYPE=type`` --- Controls the optimization level and debug - information of the build. Valid options for *type* are ``Debug``, - ``Release``, ``RelWithDebInfo``, and ``MinSizeRel``. For more detailed - information, see :ref:`CMAKE_BUILD_TYPE `. + * `-DCMAKE_BUILD_TYPE=type` --- Controls the optimization level and debug + information of the build. Valid options for *type* are `Debug`, + `Release`, `RelWithDebInfo`, and `MinSizeRel`. For more detailed + information, see {ref}`CMAKE_BUILD_TYPE `. - * ``-DLLVM_ENABLE_ASSERTIONS=ON`` --- Compile with assertion checks enabled + * `-DLLVM_ENABLE_ASSERTIONS=ON` --- Compile with assertion checks enabled (default is ON for Debug builds, OFF for all other build types). - * ``-DLLVM_USE_LINKER=lld`` --- Link with the `lld linker`_, assuming it + * `-DLLVM_USE_LINKER=lld` --- Link with the [lld linker](https://lld.llvm.org), assuming it is installed on your system. This can dramatically speed up link times if the default linker is slow. - * ``-DLLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS=N`` --- Limit the number of + * `-DLLVM_PARALLEL_{COMPILE,LINK,TABLEGEN}_JOBS=N` --- Limit the number of compile/link/tablegen jobs running in parallel at the same time. This is especially important for linking since linking can use lots of memory. If you run into memory issues building LLVM, try setting this to limit the maximum number of compile/link/tablegen jobs running at the same time. - * ``cmake --build build [--target ]`` or the build system specified + * `cmake --build build [--target ]` or the build system specified above directly. - * The default target (i.e. ``cmake --build build`` or ``make -C build``) + * The default target (i.e. `cmake --build build` or `make -C build`) will build all of LLVM. - * The ``check-all`` target (i.e. ``ninja check-all``) will run the + * The `check-all` target (i.e. `ninja check-all`) will run the regression tests to ensure everything is in working order. * CMake will generate build targets for each tool and library, and most - LLVM sub-projects generate their own ``check-`` target. + LLVM sub-projects generate their own `check-` target. * Running a serial build will be **slow**. To improve speed, try running a - parallel build. That's done by default in Ninja; for ``make``, use the - option ``-j NN``, where ``NN`` is the number of parallel jobs, e.g. the + parallel build. That's done by default in Ninja; for `make`, use the + option `-j NN`, where `NN` is the number of parallel jobs, e.g. the number of available CPUs. * A basic CMake and build/test invocation which only builds LLVM and no other subprojects: - ``cmake -S llvm -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug`` + `cmake -S llvm -B build -G Ninja -DCMAKE_BUILD_TYPE=Debug` - ``ninja -C build check-llvm`` + `ninja -C build check-llvm` This will set up an LLVM build with debugging info, then compile LLVM and run LLVM tests. - * For more detailed information on CMake options, see `CMake `__ + * For more detailed information on CMake options, see {doc}`CMake ` - * If you get build or test failures, see `below`_. + * If you get build or test failures, see {ref}`below `. -Consult the `Getting Started with LLVM`_ section for detailed information on -configuring and compiling LLVM. Go to `Directory Layout`_ to learn about the +Consult the {ref}`Getting Started with LLVM ` section for detailed information on +configuring and compiling LLVM. Go to {ref}`Directory Layout ` to learn about the layout of the source code tree. -Stand-alone Builds ------------------- +### Stand-alone Builds Stand-alone builds allow you to build a sub-project against a pre-built version of the clang or llvm libraries that is already present on your @@ -144,8 +136,8 @@ system. You can use the source code from a standard checkout of the llvm-project (as described above) to do stand-alone builds, but you may also build -from a :ref:`sparse checkout` or from the -tarballs available on the `releases `_ +from a {ref}`sparse checkout ` or from the +tarballs available on the [releases](https://github.com/llvm/llvm-project/releases/) page. For stand-alone builds, you must have an llvm install that is configured @@ -153,125 +145,115 @@ properly to be consumable by stand-alone builds of the other projects. This could be a distro-provided LLVM install, or you can build it yourself, like this: -.. code-block:: console - - cmake -G Ninja -S path/to/llvm-project/llvm -B $builddir \ - -DLLVM_INSTALL_UTILS=ON \ - -DCMAKE_INSTALL_PREFIX=/path/to/llvm/install/prefix \ - < other options > - - ninja -C $builddir install +```console +cmake -G Ninja -S path/to/llvm-project/llvm -B $builddir \ + -DLLVM_INSTALL_UTILS=ON \ + -DCMAKE_INSTALL_PREFIX=/path/to/llvm/install/prefix \ + < other options > +ninja -C $builddir install +``` Once llvm is installed, to configure a project for a stand-alone build, invoke CMake like this: -.. code-block:: console - - cmake -G Ninja -S path/to/llvm-project/$subproj \ - -B $buildir_subproj \ - -DLLVM_EXTERNAL_LIT=/path/to/lit \ - -DLLVM_ROOT=/path/to/llvm/install/prefix - +```console +cmake -G Ninja -S path/to/llvm-project/$subproj \ + -B $buildir_subproj \ + -DLLVM_EXTERNAL_LIT=/path/to/lit \ + -DLLVM_ROOT=/path/to/llvm/install/prefix +``` Notice that: * The stand-alone build needs to happen in a folder that is not the original folder where LLVM was built (`$builddir!=$builddir_subproj`). -* ``LLVM_ROOT`` should point to the prefix of your llvm installation, - so for example, if llvm is installed into ``/usr/bin`` and - ``/usr/lib64``, then you should pass ``-DLLVM_ROOT=/usr/``. -* Both the ``LLVM_ROOT`` and ``LLVM_EXTERNAL_LIT`` options are +* `LLVM_ROOT` should point to the prefix of your llvm installation, + so for example, if llvm is installed into `/usr/bin` and + `/usr/lib64`, then you should pass `-DLLVM_ROOT=/usr/`. +* Both the `LLVM_ROOT` and `LLVM_EXTERNAL_LIT` options are required to do stand-alone builds for all sub-projects. Additional required options for each sub-project can be found in the table below. -The ``check-$subproj`` and ``install`` build targets are supported for the +The `check-$subproj` and `install` build targets are supported for the sub-projects listed in the table below. -============ ======================== ====================== -Sub-Project Required Sub-Directories Required CMake Options -============ ======================== ====================== -llvm llvm, cmake, third-party LLVM_INSTALL_UTILS=ON -clang clang, cmake CLANG_INCLUDE_TESTS=ON (Required for check-clang only) -lld lld, cmake -============ ======================== ====================== +| Sub-Project | Required Sub-Directories | Required CMake Options | +| --- | --- | --- | +| llvm | llvm, cmake, third-party | LLVM_INSTALL_UTILS=ON | +| clang | clang, cmake | CLANG_INCLUDE_TESTS=ON (Required for check-clang only) | +| lld | lld, cmake | | Example of building stand-alone `clang`: -.. code-block:: console - - #!/bin/sh +```console +#!/bin/sh - build_llvm=`pwd`/build-llvm - build_clang=`pwd`/build-clang - installprefix=`pwd`/install - llvm=`pwd`/llvm-project - mkdir -p $build_llvm - mkdir -p $installprefix +build_llvm=`pwd`/build-llvm +build_clang=`pwd`/build-clang +installprefix=`pwd`/install +llvm=`pwd`/llvm-project +mkdir -p $build_llvm +mkdir -p $installprefix - cmake -G Ninja -S $llvm/llvm -B $build_llvm \ - -DLLVM_INSTALL_UTILS=ON \ - -DCMAKE_INSTALL_PREFIX=$installprefix \ - -DCMAKE_BUILD_TYPE=Release +cmake -G Ninja -S $llvm/llvm -B $build_llvm \ + -DLLVM_INSTALL_UTILS=ON \ + -DCMAKE_INSTALL_PREFIX=$installprefix \ + -DCMAKE_BUILD_TYPE=Release - ninja -C $build_llvm install +ninja -C $build_llvm install - cmake -G Ninja -S $llvm/clang -B $build_clang \ - -DLLVM_EXTERNAL_LIT=$build_llvm/utils/lit \ - -DLLVM_ROOT=$installprefix +cmake -G Ninja -S $llvm/clang -B $build_clang \ + -DLLVM_EXTERNAL_LIT=$build_llvm/utils/lit \ + -DLLVM_ROOT=$installprefix - ninja -C $build_clang - -Requirements -============ +ninja -C $build_clang +``` +## Requirements Before you begin to use the LLVM system, review the requirements below. This may save you some trouble by knowing ahead of time what hardware and software you will need. -Hardware --------- +### Hardware LLVM is known to work on the following host platforms: -================== ===================== ============================== -OS Arch Compilers -================== ===================== ============================== -Linux x86\ :sup:`1` GCC, Clang -Linux amd64 GCC, Clang -Linux ARM GCC, Clang -Linux AArch64 GCC, Clang -Linux LoongArch GCC, Clang -Linux Mips GCC, Clang -Linux PowerPC GCC, Clang -Linux RISC-V GCC, Clang -Linux SystemZ GCC, Clang -Solaris V9 (Ultrasparc) GCC -DragonFlyBSD amd64 GCC, Clang -FreeBSD x86\ :sup:`1` GCC, Clang -FreeBSD amd64 GCC, Clang -FreeBSD AArch64 GCC, Clang -NetBSD x86\ :sup:`1` GCC, Clang -NetBSD amd64 GCC, Clang -OpenBSD x86\ :sup:`1` GCC, Clang -OpenBSD amd64 GCC, Clang -macOS\ :sup:`2` PowerPC GCC -macOS x86 GCC, Clang -macOS arm64 Clang -Cygwin/Win32 x86\ :sup:`1, 3` GCC -Windows x86\ :sup:`1` Visual Studio -Windows x64 x86-64 Visual Studio, Clang\ :sup:`4` -Windows on Arm ARM64 Visual Studio, Clang\ :sup:`4` -================== ===================== ============================== - -.. note:: - - #. Code generation supported for Pentium processors and up - #. Code generation supported for 32-bit ABI only - #. To use LLVM modules on a Win32-based system, you may configure LLVM - with ``-DBUILD_SHARED_LIBS=On``. - #. Visual Studio alone can compile LLVM. When using Clang, you - must also have Visual Studio installed. - +| OS | Arch | Compilers | +| --- | --- | --- | +| Linux | x86{sup}`1` | GCC, Clang | +| Linux | amd64 | GCC, Clang | +| Linux | ARM | GCC, Clang | +| Linux | AArch64 | GCC, Clang | +| Linux | LoongArch | GCC, Clang | +| Linux | Mips | GCC, Clang | +| Linux | PowerPC | GCC, Clang | +| Linux | RISC-V | GCC, Clang | +| Linux | SystemZ | GCC, Clang | +| Solaris | V9 (Ultrasparc) | GCC | +| DragonFlyBSD | amd64 | GCC, Clang | +| FreeBSD | x86{sup}`1` | GCC, Clang | +| FreeBSD | amd64 | GCC, Clang | +| FreeBSD | AArch64 | GCC, Clang | +| NetBSD | x86{sup}`1` | GCC, Clang | +| NetBSD | amd64 | GCC, Clang | +| OpenBSD | x86{sup}`1` | GCC, Clang | +| OpenBSD | amd64 | GCC, Clang | +| macOS{sup}`2` | PowerPC | GCC | +| macOS | x86 | GCC, Clang | +| macOS | arm64 | Clang | +| Cygwin/Win32 | x86{sup}`1, 3` | GCC | +| Windows | x86{sup}`1` | Visual Studio | +| Windows x64 | x86-64 | Visual Studio, Clang{sup}`4` | +| Windows on Arm | ARM64 | Visual Studio, Clang{sup}`4` | + +```{note} +1. Code generation supported for Pentium processors and up +1. Code generation supported for 32-bit ABI only +1. To use LLVM modules on a Win32-based system, you may configure LLVM + with `-DBUILD_SHARED_LIBS=On`. +1. Visual Studio alone can compile LLVM. When using Clang, you + must also have Visual Studio installed. +``` Note that Debug builds require a lot of time and disk space. An LLVM-only build will need about 1-3 GB of space. A full build of LLVM and Clang will need around 15-20 GB of disk space. The exact space requirements will vary by system. (It @@ -287,8 +269,7 @@ assemble, disassemble, analyze, and optimize LLVM bitcode. Code generation should work as well, although the generated native code may not work on your platform. -Software --------- +### Software Compiling LLVM requires that you have several software packages installed. The table below lists those required packages. The Package column is the usual name @@ -296,26 +277,23 @@ for the software package that LLVM depends on. The Version column provides "known to work" versions of the package. The Notes column describes how LLVM uses the package and provides other details. -=========================================================== ============ ========================================== -Package Version Notes -=========================================================== ============ ========================================== -`CMake `_ >=3.20.0 Makefile/workspace generator -`python `_ >=3.8 Automated test suite\ :sup:`1` -`zlib `_ >=1.2.3.4 Compression library\ :sup:`2` -`GNU Make `_ 3.79, 3.79.1 Makefile/build processor\ :sup:`3` -`PyYAML `_ >=5.1 Header generator\ :sup:`4` -=========================================================== ============ ========================================== - -.. note:: - - #. Only needed if you want to run the automated test suite in the - ``llvm/test`` directory, or if you plan to utilize any Python libraries, - utilities, or bindings. - #. Optional, adds compression/uncompression capabilities to selected LLVM - tools. - #. Optional, you can use any other build tool supported by CMake. - #. Only needed when building libc with New Headergen. Mainly used by libc. - +| Package | Version | Notes | +| --- | --- | --- | +| [CMake](http://cmake.org/) | >=3.20.0 | Makefile/workspace generator | +| [python](http://www.python.org/) | >=3.8 | Automated test suite{sup}`1` | +| [zlib](http://zlib.net) | >=1.2.3.4 | Compression library{sup}`2` | +| [GNU Make](http://savannah.gnu.org/projects/make) | 3.79, 3.79.1 | Makefile/build processor{sup}`3` | +| [PyYAML](https://pypi.org/project/PyYAML/) | >=5.1 | Header generator{sup}`4` | + +```{note} +1. Only needed if you want to run the automated test suite in the + `llvm/test` directory, or if you plan to utilize any Python libraries, + utilities, or bindings. +1. Optional, adds compression/uncompression capabilities to selected LLVM + tools. +1. Optional, you can use any other build tool supported by CMake. +1. Only needed when building libc with New Headergen. Mainly used by libc. +``` Additionally, your compilation host is expected to have the usual plethora of Unix utilities. Specifically: @@ -344,20 +322,17 @@ Unix utilities. Specifically: * **unzip** --- unzip command for distribution checking * **zip** --- zip command for distribution generation -.. _below: -.. _check here: - -.. _host_cpp_toolchain: - -Host C++ Toolchain, both Compiler and Standard Library ------------------------------------------------------- +(below)= +(check here)= +(host_cpp_toolchain)= +### Host C++ Toolchain, both Compiler and Standard Library LLVM is very demanding of the host C++ compiler, and as such tends to expose bugs in the compiler. We also attempt to follow improvements and developments in the C++ language and library reasonably closely. As such, we require a modern host C++ toolchain, both compiler and standard library, in order to build LLVM. -LLVM is written using the subset of C++ documented in :doc:`coding +LLVM is written using the subset of C++ documented in {doc}`coding standards`. To enforce this language version, we check the most popular host toolchains for specific minimum versions in our build systems: @@ -378,23 +353,23 @@ We track certain versions of software that are *known* to fail when used as part of the host toolchain. These even include linkers at times. **GNU ld 2.16.X**. Some 2.16.X versions of the ld linker will produce very long -warning messages complaining that some "``.gnu.linkonce.t.*``" symbol was +warning messages complaining that some "`.gnu.linkonce.t.*`" symbol was defined in a discarded section. You can safely ignore these messages as they are erroneous and the linkage is correct. These messages disappear using ld 2.17. -**GNU binutils 2.17**: Binutils 2.17 contains `a bug -`__ which causes huge link +**GNU binutils 2.17**: Binutils 2.17 contains [a bug] which causes huge link times (minutes instead of seconds) when building LLVM. We recommend upgrading to a newer version (2.17.50.0.4 or later). -**GNU Binutils 2.19.1 Gold**: This version of Gold contained `a bug -`__ which causes +**GNU Binutils 2.19.1 Gold**: This version of Gold contained [a bug][gold-bug] which causes intermittent failures when building LLVM with position independent code. The symptom is an error about cyclic dependencies. We recommend upgrading to a newer version of Gold. -Getting a Modern Host C++ Toolchain -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +[a bug]: http://sourceware.org/bugzilla/show_bug.cgi?id=3111 +[gold-bug]: http://sourceware.org/bugzilla/show_bug.cgi?id=9836 + +#### Getting a Modern Host C++ Toolchain This section mostly applies to Linux and older BSDs. On macOS, you should have a sufficiently modern Xcode, or you will likely need to upgrade until you @@ -415,98 +390,89 @@ initial host in a bootstrap, and then using Clang (and potentially libc++). The first step is to get a recent GCC toolchain installed. The most common distribution on which users have struggled with the version requirements is Ubuntu Precise, 12.04 LTS. For this distribution, one easy option is to install -the `toolchain testing PPA`_ and use it to install a modern GCC. There is -a really nice discussion of this on the `ask ubuntu stack exchange`_ and a -`github gist`_ with updated commands. However, not all users can use PPAs and +the [toolchain testing PPA] and use it to install a modern GCC. There is +a really nice discussion of this on the [ask ubuntu stack exchange] and a +[github gist] with updated commands. However, not all users can use PPAs and there are many other distributions, so it may be necessary (or just useful, if you're here you *are* doing compiler development after all) to build and install GCC from source. It is also quite easy to do these days. -.. _toolchain testing PPA: - https://launchpad.net/~ubuntu-toolchain-r/+archive/test -.. _ask ubuntu stack exchange: - https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/581497#58149 -.. _github gist: - https://gist.github.com/application2000/73fd6f4bf1be6600a2cf9f56315a2d91 +[toolchain testing PPA]: https://launchpad.net/~ubuntu-toolchain-r/+archive/test +[ask ubuntu stack exchange]: https://askubuntu.com/questions/466651/how-do-i-use-the-latest-gcc-on-ubuntu/581497#58149 +[github gist]: https://gist.github.com/application2000/73fd6f4bf1be6600a2cf9f56315a2d91 Easy steps for installing a specific version of GCC: -.. code-block:: console - - % gcc_version=7.4.0 - % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2 - % wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2.sig - % wget https://ftp.gnu.org/gnu/gnu-keyring.gpg - % signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-${gcc_version}.tar.bz2.sig` - % if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi - % tar -xvjf gcc-${gcc_version}.tar.bz2 - % cd gcc-${gcc_version} - % ./contrib/download_prerequisites - % cd .. - % mkdir gcc-${gcc_version}-build - % cd gcc-${gcc_version}-build - % $PWD/../gcc-${gcc_version}/configure --prefix=$HOME/toolchains --enable-languages=c,c++ - % make -j$(nproc) - % make install - -For more details, check out the excellent `GCC wiki entry`_, where I got most +```console +% gcc_version=7.4.0 +% wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2 +% wget https://ftp.gnu.org/gnu/gcc/gcc-${gcc_version}/gcc-${gcc_version}.tar.bz2.sig +% wget https://ftp.gnu.org/gnu/gnu-keyring.gpg +% signature_invalid=`gpg --verify --no-default-keyring --keyring ./gnu-keyring.gpg gcc-${gcc_version}.tar.bz2.sig` +% if [ $signature_invalid ]; then echo "Invalid signature" ; exit 1 ; fi +% tar -xvjf gcc-${gcc_version}.tar.bz2 +% cd gcc-${gcc_version} +% ./contrib/download_prerequisites +% cd .. +% mkdir gcc-${gcc_version}-build +% cd gcc-${gcc_version}-build +% $PWD/../gcc-${gcc_version}/configure --prefix=$HOME/toolchains --enable-languages=c,c++ +% make -j$(nproc) +% make install +``` +For more details, check out the excellent [GCC wiki entry], where I got most of this information from. -.. _GCC wiki entry: - https://gcc.gnu.org/wiki/InstallingGCC +[GCC wiki entry]: https://gcc.gnu.org/wiki/InstallingGCC Once you have a GCC toolchain, configure your build of LLVM to use the new toolchain for your host compiler and C++ standard library. Because the new version of libstdc++ is not on the system library search path, you need to pass -extra linker flags so that it can be found at link time (``-L``) and at runtime -(``-rpath``). If you are using CMake, this invocation should produce working +extra linker flags so that it can be found at link time (`-L`) and at runtime +(`-rpath`). If you are using CMake, this invocation should produce working binaries: -.. code-block:: console - - % mkdir build - % cd build - % CC=$HOME/toolchains/bin/gcc CXX=$HOME/toolchains/bin/g++ \ - cmake .. -DCMAKE_CXX_LINK_FLAGS="-Wl,-rpath,$HOME/toolchains/lib64 -L$HOME/toolchains/lib64" - +```console +% mkdir build +% cd build +% CC=$HOME/toolchains/bin/gcc CXX=$HOME/toolchains/bin/g++ \ + cmake .. -DCMAKE_CXX_LINK_FLAGS="-Wl,-rpath,$HOME/toolchains/lib64 -L$HOME/toolchains/lib64" +``` If you fail to set rpath, most LLVM binaries will fail on startup with a message from the loader similar to ``libstdc++.so.6: version `GLIBCXX_3.4.20' not -found``. This means you need to tweak the ``-rpath`` linker flag. +found``. This means you need to tweak the `-rpath` linker flag. This method will add an absolute path to the rpath of all executables. That's fine for local development. If you want to distribute the binaries you build -so that they can run on older systems, copy ``libstdc++.so.6`` into the -``lib/`` directory. All of LLVM's shipping binaries have an rpath pointing at -``$ORIGIN/../lib``, so they will find ``libstdc++.so.6`` there. Non-distributed -binaries don't have an rpath set and won't find ``libstdc++.so.6``. Pass -``-DLLVM_LOCAL_RPATH="$HOME/toolchains/lib64"`` to CMake to add an absolute -path to ``libstdc++.so.6`` as above. Since these binaries are not distributed, +so that they can run on older systems, copy `libstdc++.so.6` into the +`lib/` directory. All of LLVM's shipping binaries have an rpath pointing at +`$ORIGIN/../lib`, so they will find `libstdc++.so.6` there. Non-distributed +binaries don't have an rpath set and won't find `libstdc++.so.6`. Pass +`-DLLVM_LOCAL_RPATH="$HOME/toolchains/lib64"` to CMake to add an absolute +path to `libstdc++.so.6` as above. Since these binaries are not distributed, having an absolute local path is fine for them. When you build Clang, you will need to give *it* access to a modern C++ standard library in order to use it as your new host in part of a bootstrap. There are two easy ways to do this, either build (and install) libc++ along -with Clang and then use it with the ``-stdlib=libc++`` compile and link flag, -or install Clang into the same prefix (``$HOME/toolchains`` above) as GCC. +with Clang and then use it with the `-stdlib=libc++` compile and link flag, +or install Clang into the same prefix (`$HOME/toolchains` above) as GCC. Clang will look within its own prefix for libstdc++ and use it if found. You can also add an explicit prefix for Clang to look in for a GCC toolchain with -the ``--gcc-toolchain=/opt/my/gcc/prefix`` flag, passing it to both compile and +the `--gcc-toolchain=/opt/my/gcc/prefix` flag, passing it to both compile and link commands when using your just-built-Clang to bootstrap. -.. _Getting Started with LLVM: - -Getting Started with LLVM -========================= +(Getting Started with LLVM)= +## Getting Started with LLVM The remainder of this guide is meant to get you up and running with LLVM and to give you some basic information about the LLVM environment. -The later sections of this guide describe the `general layout`_ of the LLVM -source tree, a `simple example`_ using the LLVM toolchain, and `links`_ to find +The later sections of this guide describe the {ref}`general layout ` of the LLVM +source tree, a {ref}`simple example ` using the LLVM toolchain, and {ref}`links ` to find more information about LLVM or to get help via e-mail. -Terminology and Notation ------------------------- +### Terminology and Notation Throughout this manual, the following names are used to denote paths specific to the local system and working environment. *These are not environment variables @@ -514,157 +480,146 @@ you need to set but just strings used in the rest of this document below*. In any of the examples below, simply replace each of these names with the appropriate pathname on your local system. All these paths are absolute: -``SRC_ROOT`` +* `SRC_ROOT` This is the top-level directory of the LLVM source tree. -``OBJ_ROOT`` +* `OBJ_ROOT` This is the top-level directory of the LLVM object tree (i.e. the tree where object files and compiled programs will be placed. It can be the same as SRC_ROOT). -Sending patches -^^^^^^^^^^^^^^^ +#### Sending patches -See :ref:`Contributing `. +See {ref}`Contributing `. -Bisecting commits -^^^^^^^^^^^^^^^^^ +#### Bisecting commits -See `Bisecting LLVM code `_ for how to use ``git bisect`` +See {doc}`Bisecting LLVM code ` for how to use `git bisect` on LLVM. -Reverting a change -^^^^^^^^^^^^^^^^^^ +#### Reverting a change When reverting changes using git, the default message will say "This reverts commit XYZ". Leave this at the end of the commit message, but add some details before it as to why the commit is being reverted. A brief explanation and/or links to bots that demonstrate the problem are sufficient. -Local LLVM Configuration ------------------------- +### Local LLVM Configuration Once checked out repository, the LLVM suite source code must be configured -before being built. This process uses CMake. Unlike the normal ``configure`` +before being built. This process uses CMake. Unlike the normal `configure` script, CMake generates the build files in whatever format you request as well -as various ``*.inc`` files, and ``llvm/include/llvm/Config/config.h.cmake``. +as various `*.inc` files, and `llvm/include/llvm/Config/config.h.cmake`. -Variables are passed to ``cmake`` on the command line using the format -``-D=``. The following variables are some common options +Variables are passed to `cmake` on the command line using the format +`-D=`. The following variables are some common options used by people developing LLVM. -* ``CMAKE_C_COMPILER`` -* ``CMAKE_CXX_COMPILER`` -* ``CMAKE_BUILD_TYPE`` -* ``CMAKE_INSTALL_PREFIX`` -* ``Python3_EXECUTABLE`` -* ``LLVM_TARGETS_TO_BUILD`` -* ``LLVM_ENABLE_PROJECTS`` -* ``LLVM_ENABLE_RUNTIMES`` -* ``LLVM_ENABLE_DOXYGEN`` -* ``LLVM_ENABLE_SPHINX`` -* ``LLVM_BUILD_LLVM_DYLIB`` -* ``LLVM_LINK_LLVM_DYLIB`` -* ``LLVM_PARALLEL_LINK_JOBS`` -* ``LLVM_OPTIMIZED_TABLEGEN`` - -See :ref:`the list of frequently-used CMake variables ` +* `CMAKE_C_COMPILER` +* `CMAKE_CXX_COMPILER` +* `CMAKE_BUILD_TYPE` +* `CMAKE_INSTALL_PREFIX` +* `Python3_EXECUTABLE` +* `LLVM_TARGETS_TO_BUILD` +* `LLVM_ENABLE_PROJECTS` +* `LLVM_ENABLE_RUNTIMES` +* `LLVM_ENABLE_DOXYGEN` +* `LLVM_ENABLE_SPHINX` +* `LLVM_BUILD_LLVM_DYLIB` +* `LLVM_LINK_LLVM_DYLIB` +* `LLVM_PARALLEL_LINK_JOBS` +* `LLVM_OPTIMIZED_TABLEGEN` + +See {ref}`the list of frequently-used CMake variables ` for more information. To configure LLVM, follow these steps: -#. Change directory into the object root directory: - - .. code-block:: console - - % cd OBJ_ROOT +1. Change directory into the object root directory: -#. Run the ``cmake``: + ```console + % cd OBJ_ROOT + ``` +1. Run the `cmake`: - .. code-block:: console - - % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE= -DCMAKE_INSTALL_PREFIX=/install/path - [other options] SRC_ROOT - -Compiling the LLVM Suite Source Code ------------------------------------- + ```console + % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE= -DCMAKE_INSTALL_PREFIX=/install/path + [other options] SRC_ROOT + ``` +### Compiling the LLVM Suite Source Code Unlike with autotools, with CMake your build type is defined at configuration. If you want to change your build type, you can re-run CMake with the following invocation: - .. code-block:: console - - % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE= SRC_ROOT - + ```console + % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE= SRC_ROOT + ``` Between runs, CMake preserves the values set for all options. CMake has the following build types defined: -Debug +* Debug These builds are the default. The build system will compile the tools and libraries unoptimized, with debugging information, and asserts enabled. -Release +* Release For these builds, the build system will compile the tools and libraries with optimizations enabled and not generate debug info. CMakes default optimization level is -O3. This can be configured by setting the - ``CMAKE_CXX_FLAGS_RELEASE`` variable on the CMake command line. + `CMAKE_CXX_FLAGS_RELEASE` variable on the CMake command line. -RelWithDebInfo +* RelWithDebInfo These builds are useful when debugging. They generate optimized binaries with debug information. CMakes default optimization level is -O2. This can be - configured by setting the ``CMAKE_CXX_FLAGS_RELWITHDEBINFO`` variable on the + configured by setting the `CMAKE_CXX_FLAGS_RELWITHDEBINFO` variable on the CMake command line. Once you have LLVM configured, you can build it by entering the *OBJ_ROOT* directory and issuing the following command: -.. code-block:: console - - % make - -If the build fails, please `check here`_ to see if you are using a version of +```console +% make +``` +If the build fails, please {ref}`check here ` to see if you are using a version of GCC that is known not to compile LLVM. If you have multiple processors in your machine, you may wish to use some of the parallel build options provided by GNU Make. For example, you could use the command: -.. code-block:: console - - % make -j2 - +```console +% make -j2 +``` There are several special targets which are useful when working with the LLVM source code: -``make clean`` +* `make clean` Removes all files generated by the build. This includes object files, generated C/C++ files, libraries, and executables. -``make install`` +* `make install` Installs LLVM header files, libraries, tools, and documentation in a hierarchy - under ``$PREFIX``, specified with ``CMAKE_INSTALL_PREFIX``, which - defaults to ``/usr/local``. + under `$PREFIX`, specified with `CMAKE_INSTALL_PREFIX`, which + defaults to `/usr/local`. -``make docs-llvm-html`` +* `make docs-llvm-html` - If configured with ``-DLLVM_ENABLE_SPHINX=On``, this will generate a directory - at ``OBJ_ROOT/docs/html`` which contains the HTML formatted documentation. + If configured with `-DLLVM_ENABLE_SPHINX=On`, this will generate a directory + at `OBJ_ROOT/docs/html` which contains the HTML formatted documentation. -Cross-Compiling LLVM --------------------- +### Cross-Compiling LLVM It is possible to cross-compile LLVM itself. That is, you can create LLVM executables and libraries to be hosted on a platform different from the platform where they are built (a Canadian Cross build). To generate build files for -cross-compiling CMake provides a variable ``CMAKE_TOOLCHAIN_FILE`` which can +cross-compiling CMake provides a variable `CMAKE_TOOLCHAIN_FILE` which can define compiler flags and variables used during the CMake test operations. The result of such a build is executables that are not runnable on the build @@ -672,23 +627,22 @@ host but can be executed on the target. As an example, the following CMake invocation can generate build files targeting iOS. This will work on macOS with the latest Xcode: -.. code-block:: console - - % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64" - -DCMAKE_TOOLCHAIN_FILE=/cmake/platforms/iOS.cmake - -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off - -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options] - - +```console +% cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64" + -DCMAKE_TOOLCHAIN_FILE=/cmake/platforms/iOS.cmake + -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off + -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options] + +``` Note: There are some additional flags that need to be passed when building for iOS due to limitations in the iOS SDK. -Check :doc:`HowToCrossCompileLLVM` and `Clang docs on how to cross-compile in general -`_ for more information -about cross-compiling. +Check {doc}`HowToCrossCompileLLVM` and [Clang docs on how to cross-compile in general] +for more information about cross-compiling. + +[Clang docs on how to cross-compile in general]: https://clang.llvm.org/docs/CrossCompilation.html -The Location of LLVM Object Files ---------------------------------- +### The Location of LLVM Object Files The LLVM build system is capable of sharing a single LLVM source tree among several LLVM builds. Hence, it is possible to build LLVM for several different @@ -696,78 +650,73 @@ platforms or configurations using the same source tree. * Change directory to where the LLVM object files should live: - .. code-block:: console - - % cd OBJ_ROOT - -* Run ``cmake``: - - .. code-block:: console - - % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release SRC_ROOT + ```console + % cd OBJ_ROOT + ``` +* Run `cmake`: + ```console + % cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release SRC_ROOT + ``` The LLVM build will create a structure underneath *OBJ_ROOT* that matches the LLVM source tree. At each level where source files are present in the source -tree there will be a corresponding ``CMakeFiles`` directory in the *OBJ_ROOT*. +tree there will be a corresponding `CMakeFiles` directory in the *OBJ_ROOT*. Underneath that directory there is another directory with a name ending in -``.dir`` under which you'll find object files for each source. +`.dir` under which you'll find object files for each source. For example: - .. code-block:: console - - % cd llvm_build_dir - % find lib/Support/ -name APFloat* - lib/Support/CMakeFiles/LLVMSupport.dir/APFloat.cpp.o - -Optional Configuration Items ----------------------------- + ```console + % cd llvm_build_dir + % find lib/Support/ -name APFloat* + lib/Support/CMakeFiles/LLVMSupport.dir/APFloat.cpp.o + ``` +### Optional Configuration Items -If you're running on a Linux system that supports the `binfmt_misc -`_ +If you're running on a Linux system that supports the [binfmt_misc] module, and you have root access on the system, you can set your system up to execute LLVM bitcode files directly. To do this, use commands like this (the first command may not be required if you are already using the module): -.. code-block:: console - - % mount -t binfmt_misc none /proc/sys/fs/binfmt_misc - % echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register - % chmod u+x hello.bc (if needed) - % ./hello.bc +[binfmt_misc]: http://en.wikipedia.org/wiki/binfmt_misc +```console +% mount -t binfmt_misc none /proc/sys/fs/binfmt_misc +% echo ':llvm:M::BC::/path/to/lli:' > /proc/sys/fs/binfmt_misc/register +% chmod u+x hello.bc (if needed) +% ./hello.bc +``` This allows you to execute LLVM bitcode files directly. On Debian, you can also use this command instead of the 'echo' command above: -.. code-block:: console - - % sudo update-binfmts --install llvm /path/to/lli --magic 'BC' +```console +% sudo update-binfmts --install llvm /path/to/lli --magic 'BC' +``` +(Program Layout)= +(general layout)= +## Directory Layout -.. _Program Layout: -.. _general layout: +One useful source of information about the LLVM source base is the LLVM [doxygen] +documentation available at . The following is a +brief introduction to code layout: -Directory Layout -================ +[doxygen]: http://www.doxygen.org/ -One useful source of information about the LLVM source base is the LLVM `doxygen -`_ documentation available at -``_. The following is a brief introduction to code -layout: +### `llvm/cmake` -``llvm/cmake`` --------------- Generates system build files. -``llvm/cmake/modules`` +* `llvm/cmake/modules` + Build configuration for llvm user defined options. Checks compiler version and linker flags. -``llvm/cmake/platforms`` +* `llvm/cmake/platforms` + Toolchain configuration for Android NDK, iOS systems and non-Windows hosts to target MSVC. -``llvm/examples`` ------------------ +### `llvm/examples` - Some simple examples showing how to use LLVM as a compiler for a custom language - including lowering, optimization, and code generation. @@ -777,416 +726,390 @@ Generates system build files. including a hand-written lexer, parser, AST, as well as code generation support using LLVM- both static (ahead of time) and various approaches to Just In Time (JIT) compilation. - `Kaleidoscope Tutorial for complete beginner - `_. + [Kaleidoscope Tutorial for complete beginner](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/index.html). -- BuildingAJIT: Examples of the `BuildingAJIT tutorial - `_ that shows how LLVM’s +- BuildingAJIT: Examples of the [BuildingAJIT tutorial] that shows how LLVM’s ORC JIT APIs interact with other parts of LLVM. It also teaches how to recombine them to build a custom JIT that is suited to your use-case. -``llvm/include`` ----------------- +[BuildingAJIT tutorial]: https://llvm.org/docs/tutorial/BuildingAJIT1.html + +### `llvm/include` Public header files exported from the LLVM library. The three main subdirectories: -``llvm/include/llvm`` +* `llvm/include/llvm` All LLVM-specific header files, and subdirectories for different portions of - LLVM: ``Analysis``, ``CodeGen``, ``Target``, ``Transforms``, etc... + LLVM: `Analysis`, `CodeGen`, `Target`, `Transforms`, etc... -``llvm/include/llvm/Support`` +* `llvm/include/llvm/Support` Generic support libraries provided with LLVM but not necessarily specific to LLVM. For example, some C++ STL utilities and a Command Line option processing library store header files here. -``llvm/include/llvm/Config`` +* `llvm/include/llvm/Config` - Header files configured by ``cmake``. They wrap "standard" UNIX and + Header files configured by `cmake`. They wrap "standard" UNIX and C header files. Source code can include these header files which - automatically take care of the conditional #includes that ``cmake`` + automatically take care of the conditional #includes that `cmake` generates. -``llvm/lib`` ------------- +### `llvm/lib` Most source files are here. By putting code in libraries, LLVM makes it easy to -share code among the `tools`_. +share code among the [tools](#tools). -``llvm/lib/IR/`` +* `llvm/lib/IR/` Core LLVM source files that implement core classes like Instruction and BasicBlock. -``llvm/lib/AsmParser/`` +* `llvm/lib/AsmParser/` Source code for the LLVM assembly language parser library. -``llvm/lib/Bitcode/`` +* `llvm/lib/Bitcode/` Code for reading and writing bitcode. -``llvm/lib/Analysis/`` +* `llvm/lib/Analysis/` A variety of program analyses, such as Call Graphs, Induction Variables, Natural Loop Identification, etc. -``llvm/lib/Transforms/`` +* `llvm/lib/Transforms/` IR-to-IR program transformations, such as Aggressive Dead Code Elimination, Sparse Conditional Constant Propagation, Inlining, Loop Invariant Code Motion, Dead Global Elimination, and many others. -``llvm/lib/Target/`` +* `llvm/lib/Target/` Files describing target architectures for code generation. For example, - ``llvm/lib/Target/X86`` holds the X86 machine description. + `llvm/lib/Target/X86` holds the X86 machine description. -``llvm/lib/CodeGen/`` +* `llvm/lib/CodeGen/` The major parts of the code generator: Instruction Selector, Instruction Scheduling, and Register Allocation. -``llvm/lib/MC/`` +* `llvm/lib/MC/` The libraries represent and process code at machine code level. Handles assembly and object-file emission. -``llvm/lib/ExecutionEngine/`` +* `llvm/lib/ExecutionEngine/` Libraries for directly executing bitcode at runtime in interpreted and JIT-compiled scenarios. -``llvm/lib/Support/`` +* `llvm/lib/Support/` - Source code that corresponds to the header files in ``llvm/include/ADT/`` - and ``llvm/include/Support/``. + Source code that corresponds to the header files in `llvm/include/ADT/` + and `llvm/include/Support/`. -``llvm/bindings`` ----------------------- +### `llvm/bindings` Contains bindings for the LLVM compiler infrastructure to allow programs written in languages other than C or C++ to take advantage of the LLVM infrastructure. The LLVM project provides language bindings for OCaml and Python. -``llvm/projects`` ------------------ +### `llvm/projects` Projects not strictly part of LLVM but shipped with LLVM. This is also the directory for creating your own LLVM-based projects which leverage the LLVM build system. -``llvm/test`` -------------- +### `llvm/test` Feature and regression tests and other sanity checks on LLVM infrastructure. These are intended to run quickly and cover a lot of territory without being exhaustive. -``test-suite`` --------------- +### `test-suite` A comprehensive correctness, performance, and benchmarking test suite -for LLVM. This comes in a ``separate git repository -``, because it contains a +for LLVM. This comes in a [separate git repository], because it contains a large amount of third-party code under a variety of licenses. For -details see the :doc:`Testing Guide ` document. +details see the {doc}`Testing Guide ` document. -.. _tools: +[separate git repository]: https://github.com/llvm/llvm-test-suite -``llvm/tools`` --------------- +(tools)= +### `llvm/tools` Executables built out of the libraries above, which form the main part of the user interface. You can always get help -for a tool by typing ``tool_name -help``. The following is a brief introduction +for a tool by typing `tool_name -help`. The following is a brief introduction to the most important tools. More detailed information is in -the `Command Guide `_. +the {doc}`Command Guide `. -``llvm-reduce`` +* `llvm-reduce` - ``llvm-reduce`` is used to debug optimization passes or code generation backends + `llvm-reduce` is used to debug optimization passes or code generation backends by narrowing down the given test case to the minimum number of passes and/or instructions that still cause a problem, whether it is a crash or - miscompilation. See ``_ for more information on using - ``llvm-reduce``. + miscompilation. See {doc}`HowToSubmitABug.html ` for more information on using + `llvm-reduce`. -``llvm-ar`` +* `llvm-ar` The archiver produces an archive containing the given LLVM bitcode files, optionally with an index for faster lookup. -``llvm-as`` +* `llvm-as` The assembler transforms the human-readable LLVM assembly to LLVM bitcode. -``llvm-dis`` +* `llvm-dis` The disassembler transforms the LLVM bitcode to human-readable LLVM assembly. -``llvm-link`` +* `llvm-link` - ``llvm-link``, not surprisingly, links multiple LLVM modules into a single + `llvm-link`, not surprisingly, links multiple LLVM modules into a single program. -``lli`` +* `lli` - ``lli`` is the LLVM interpreter, which can directly execute LLVM bitcode + `lli` is the LLVM interpreter, which can directly execute LLVM bitcode (although very slowly...). For architectures that support it (currently x86, - Sparc, and PowerPC), by default, ``lli`` will function as a Just-In-Time + Sparc, and PowerPC), by default, `lli` will function as a Just-In-Time compiler (if the functionality was compiled in), and will execute the code *much* faster than the interpreter. -``llc`` +* `llc` - ``llc`` is the LLVM backend compiler, which translates LLVM bitcode to a + `llc` is the LLVM backend compiler, which translates LLVM bitcode to a native code assembly file. -``opt`` +* `opt` - ``opt`` reads LLVM bitcode, applies a series of LLVM to LLVM transformations + `opt` reads LLVM bitcode, applies a series of LLVM to LLVM transformations (which are specified on the command line), and outputs the resultant - bitcode. '``opt -help``' is a good way to get a list of the + bitcode. '`opt -help`' is a good way to get a list of the program transformations available in LLVM. - ``opt`` can also run a specific analysis on an input LLVM bitcode + `opt` can also run a specific analysis on an input LLVM bitcode file and print the results. Primarily useful for debugging analyses, or familiarizing yourself with what an analysis does. -``llvm/utils`` --------------- +### `llvm/utils` Utilities for working with LLVM source code; some are part of the build process because they are code generators for parts of the infrastructure. -``codegen-diff`` +* `codegen-diff` - ``codegen-diff`` finds differences between code that LLC + `codegen-diff` finds differences between code that LLC generates and code that LLI generates. This is useful if you are debugging one of them, assuming that the other generates correct output. For - the full user manual, run ```perldoc codegen-diff'``. + the full user manual, run `` `perldoc codegen-diff' ``. -``emacs/`` +* `emacs/` Emacs and XEmacs syntax highlighting for LLVM assembly files and TableGen - description files. See the ``README`` for information on using them. + description files. See the `README` for information on using them. -``getsrcs.sh`` +* `getsrcs.sh` Finds and outputs all non-generated source files, useful if one wishes to do a lot of development across directories and does not want to find each file. One way to use it is to run, - for example: ``xemacs `utils/getsources.sh``` from the top of the LLVM source + for example: ``xemacs `utils/getsources.sh` `` from the top of the LLVM source tree. -``llvmgrep`` +* `llvmgrep` - Performs an ``egrep -H -n`` on each source file in LLVM and - passes to it a regular expression provided on ``llvmgrep``'s command + Performs an `egrep -H -n` on each source file in LLVM and + passes to it a regular expression provided on `llvmgrep`'s command line. This is an efficient way of searching the source base for a particular regular expression. -``TableGen/`` +* `TableGen/` Contains the tool used to generate register descriptions, instruction set descriptions, and even assemblers from common TableGen description files. -``vim/`` +* `vim/` vim syntax-highlighting for LLVM assembly files - and TableGen description files. See the ``README`` for how to use them. + and TableGen description files. See the `README` for how to use them. -.. _simple example: - -An Example Using the LLVM Tool Chain -==================================== +(simple example)= +## An Example Using the LLVM Tool Chain This section gives an example of using LLVM with the Clang front end. -Example with clang ------------------- - -#. First, create a simple C file, name it 'hello.c': - - .. code-block:: c - - #include - - int main() { - printf("hello world\n"); - return 0; - } - -#. Next, compile the C file into a native executable: - - .. code-block:: console - - % clang hello.c -o hello - - .. note:: - - Clang works just like GCC by default. The standard ``-S`` and ``-c`` arguments - work as usual (producing a native ``.s`` or ``.o`` file, respectively). - -#. Next, compile the C file into an LLVM bitcode file: - - .. code-block:: console - - % clang -O3 -emit-llvm hello.c -c -o hello.bc - - The ``-emit-llvm`` option can be used with the ``-S`` or ``-c`` options to emit an LLVM - ``.ll`` or ``.bc`` file (respectively) for the code. This allows you to use - the `standard LLVM tools `_ on the bitcode file. - -#. Run the program in both forms. To run the program, use: - - .. code-block:: console - - % ./hello - +### Example with clang + +1. First, create a simple C file, name it 'hello.c': + + ```c + #include + + int main() { + printf("hello world\n"); + return 0; + } + ``` +1. Next, compile the C file into a native executable: + + ```console + % clang hello.c -o hello + ``` + ```{note} + Clang works just like GCC by default. The standard `-S` and `-c` arguments + work as usual (producing a native `.s` or `.o` file, respectively). + ``` +1. Next, compile the C file into an LLVM bitcode file: + + ```console + % clang -O3 -emit-llvm hello.c -c -o hello.bc + ``` + The `-emit-llvm` option can be used with the `-S` or `-c` options to emit an LLVM + `.ll` or `.bc` file (respectively) for the code. This allows you to use + the {doc}`standard LLVM tools ` on the bitcode file. + +1. Run the program in both forms. To run the program, use: + + ```console + % ./hello + ``` and - .. code-block:: console - - % lli hello.bc - - The second example shows how to invoke the LLVM JIT, :doc:`lli - `. - -#. Use the ``llvm-dis`` utility to take a look at the LLVM assembly code: + ```console + % lli hello.bc + ``` + The second example shows how to invoke the LLVM JIT, {doc}`lli `. - .. code-block:: console +1. Use the `llvm-dis` utility to take a look at the LLVM assembly code: - % llvm-dis < hello.bc | less + ```console + % llvm-dis < hello.bc | less + ``` +1. Compile the program to native assembly using the LLC code generator: -#. Compile the program to native assembly using the LLC code generator: + ```console + % llc hello.bc -o hello.s + ``` +1. Assemble the native assembly language file into a program: - .. code-block:: console + ```console + % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native # On Solaris - % llc hello.bc -o hello.s - -#. Assemble the native assembly language file into a program: - - .. code-block:: console - - % /opt/SUNWspro/bin/cc -xarch=v9 hello.s -o hello.native # On Solaris - - % gcc hello.s -o hello.native # On others - -#. Execute the native code program: - - .. code-block:: console - - % ./hello.native + % gcc hello.s -o hello.native # On others + ``` +1. Execute the native code program: + ```console + % ./hello.native + ``` Note that using clang to compile directly to native code (i.e. when the - ``-emit-llvm`` option is not present) does steps 6/7/8 for you. + `-emit-llvm` option is not present) does steps 6/7/8 for you. -Common Problems -=============== +## Common Problems If you are having problems building or using LLVM, or if you have any other -general questions about LLVM, please consult the `Frequently Asked -Questions `_ page. +general questions about LLVM, please consult the {doc}`Frequently Asked Questions ` page. If you are having problems with limited memory and build time, please try -building with ``ninja`` instead of ``make``. Please consider configuring the +building with `ninja` instead of `make`. Please consider configuring the following options with CMake: - * ``-G Ninja`` + * `-G Ninja` Setting this option will allow you to build with ninja instead of make. Building with ninja significantly improves your build time, especially with incremental builds, and improves your memory usage. - * ``-DLLVM_USE_LINKER`` + * `-DLLVM_USE_LINKER` - Setting this option to ``lld`` will significantly reduce linking time for LLVM + Setting this option to `lld` will significantly reduce linking time for LLVM executables, particularly on Linux and Windows. If you are building LLVM for the first time and lld is not available to you as a binary package, then you may want to use the gold linker as a faster alternative to GNU ld. - * ``-DCMAKE_BUILD_TYPE`` + * `-DCMAKE_BUILD_TYPE` Controls optimization level and debug information of the build. This setting - can affect RAM and disk usage, see :ref:`CMAKE_BUILD_TYPE ` + can affect RAM and disk usage, see {ref}`CMAKE_BUILD_TYPE ` for more information. - * ``-DLLVM_ENABLE_ASSERTIONS`` + * `-DLLVM_ENABLE_ASSERTIONS` - This option defaults to ``ON`` for Debug builds and defaults to ``OFF`` for Release + This option defaults to `ON` for Debug builds and defaults to `OFF` for Release builds. As mentioned in the previous option, using the Release build type and enabling assertions may be a good alternative to using the Debug build type. - * ``-DLLVM_PARALLEL_LINK_JOBS`` + * `-DLLVM_PARALLEL_LINK_JOBS` Set this equal to number of jobs you wish to run simultaneously. This is - similar to the ``-j`` option used with ``make``, but only for link jobs. This option + similar to the `-j` option used with `make`, but only for link jobs. This option can only be used with ninja. You may wish to use a very low number of jobs, as this will greatly reduce the amount of memory used during the build - process. If you have limited memory, you may wish to set this to ``1``. + process. If you have limited memory, you may wish to set this to `1`. - * ``-DLLVM_TARGETS_TO_BUILD`` + * `-DLLVM_TARGETS_TO_BUILD` Set this equal to the target you wish to build. You may wish to set this to - only your host architecture. For example ``X86`` if you are using an Intel or + only your host architecture. For example `X86` if you are using an Intel or AMD machine. You will find a full list of targets within the - `llvm-project/llvm/lib/Target `_ + [llvm-project/llvm/lib/Target](https://github.com/llvm/llvm-project/tree/main/llvm/lib/Target) directory. - * ``-DLLVM_OPTIMIZED_TABLEGEN`` + * `-DLLVM_OPTIMIZED_TABLEGEN` - Set this to ``ON`` to generate a fully optimized TableGen compiler during your - build, even if that build is a ``Debug`` build. This will significantly improve + Set this to `ON` to generate a fully optimized TableGen compiler during your + build, even if that build is a `Debug` build. This will significantly improve your build time. You should not enable this if your intention is to debug the TableGen compiler. - * ``-DLLVM_ENABLE_PROJECTS`` + * `-DLLVM_ENABLE_PROJECTS` - Set this equal to the projects you wish to compile (e.g. ``clang``, ``lld``, etc.) If + Set this equal to the projects you wish to compile (e.g. `clang`, `lld`, etc.) If compiling more than one project, separate the items with a semicolon. Should you run into issues with the semicolon, try surrounding it with single quotes. - * ``-DLLVM_ENABLE_RUNTIMES`` + * `-DLLVM_ENABLE_RUNTIMES` - Set this equal to the runtimes you wish to compile (e.g. ``libcxx``, ``libcxxabi``, etc.) + Set this equal to the runtimes you wish to compile (e.g. `libcxx`, `libcxxabi`, etc.) If compiling more than one runtime, separate the items with a semicolon. Should you run into issues with the semicolon, try surrounding it with single quotes. - * ``-DCLANG_ENABLE_STATIC_ANALYZER`` + * `-DCLANG_ENABLE_STATIC_ANALYZER` - Set this option to ``OFF`` if you do not require the clang static analyzer. This + Set this option to `OFF` if you do not require the clang static analyzer. This should improve your build time slightly. - * ``-DLLVM_USE_SPLIT_DWARF`` + * `-DLLVM_USE_SPLIT_DWARF` - Consider setting this to ``ON`` if you require a debug build, as this will ease + Consider setting this to `ON` if you require a debug build, as this will ease memory pressure on the linker. This will make linking much faster, as the binaries will not contain any of the debug information. Instead, the debug - information is in a separate DWARF object file (with the extension ``.dwo``). + information is in a separate DWARF object file (with the extension `.dwo`). This only applies to host platforms using ELF, such as Linux. - * ``-DBUILD_SHARED_LIBS`` + * `-DBUILD_SHARED_LIBS` - Setting this to ``ON`` will build shared libraries instead of static + Setting this to `ON` will build shared libraries instead of static libraries. This will ease memory pressure on the linker. However, this should only be used when developing llvm. See - :ref:`BUILD_SHARED_LIBS ` + {ref}`BUILD_SHARED_LIBS ` for more information. -.. _links: - -Links -===== +(links)= +## Links This document is just an **introduction** on how to use LLVM to do some simple things... there are many more interesting and complicated things that you can do that aren't documented here (but we'll gladly accept a patch if you want to write something up!). For more information about LLVM, check out: -* `LLVM Homepage `_ -* `LLVM Doxygen Tree `_ -* `Starting a Project that Uses LLVM `_ +* [LLVM Homepage](https://llvm.org/) +* [LLVM Doxygen Tree](https://llvm.org/doxygen/) +* {doc}`Starting a Project that Uses LLVM ` diff --git a/llvm/docs/GettingStartedTutorials.md b/llvm/docs/GettingStartedTutorials.md index 61253e39c34d4..7a65486034ecb 100644 --- a/llvm/docs/GettingStartedTutorials.md +++ b/llvm/docs/GettingStartedTutorials.md @@ -1,47 +1,47 @@ -Getting Started/Tutorials -========================= +# Getting Started/Tutorials For those new to the LLVM system. -.. toctree:: - :hidden: - - CompilerWriterInfo - Frontend/PerformanceTips - GettingStarted - GettingStartedVS - ProgrammersManual - DebuggingLLVM - tutorial/index - MyFirstTypoFix - -:doc:`GettingStarted` - Discusses how to get up and running quickly with the LLVM infrastructure. - Everything from unpacking and compilation of the distribution to execution - of some tools. - -:doc:`tutorial/index` - Tutorials about using LLVM. Includes a tutorial about making a custom - language with LLVM. - -:doc:`ProgrammersManual` - Introduction to the general layout of the LLVM sourcebase, important classes +```{toctree} +:hidden: + +CompilerWriterInfo +Frontend/PerformanceTips +GettingStarted +GettingStartedVS +ProgrammersManual +DebuggingLLVM +tutorial/index +MyFirstTypoFix +``` + +{doc}`GettingStarted` +: Discusses how to get up and running quickly with the LLVM infrastructure. + Everything from unpacking and compilation of the distribution to execution + of some tools. + +{doc}`tutorial/index` +: Tutorials about using LLVM. Includes a tutorial about making a custom + language with LLVM. + +{doc}`ProgrammersManual` +: Introduction to the general layout of the LLVM sourcebase, important classes and APIs, and some tips & tricks. -:doc:`DebuggingLLVM` - Provides information about how to debug LLVM. +{doc}`DebuggingLLVM` +: Provides information about how to debug LLVM. -:doc:`Frontend/PerformanceTips` - A collection of tips for frontend authors on how to generate IR - which LLVM is able to effectively optimize. +{doc}`Frontend/PerformanceTips` +: A collection of tips for frontend authors on how to generate IR + which LLVM is able to effectively optimize. -:doc:`GettingStartedVS` - An addendum to the main Getting Started guide for those using Visual Studio - on Windows. +{doc}`GettingStartedVS` +: An addendum to the main Getting Started guide for those using Visual Studio + on Windows. -:doc:`CompilerWriterInfo` - A list of helpful links for compiler writers. +{doc}`CompilerWriterInfo` +: A list of helpful links for compiler writers. -:doc:`MyFirstTypoFix` - This tutorial will guide you through the process of making a change to - LLVM, and contributing it back to the LLVM project. +{doc}`MyFirstTypoFix` +: This tutorial will guide you through the process of making a change to + LLVM, and contributing it back to the LLVM project. diff --git a/llvm/docs/GitHub.md b/llvm/docs/GitHub.md index c3a9cb8560269..2d1df8518832e 100644 --- a/llvm/docs/GitHub.md +++ b/llvm/docs/GitHub.md @@ -1,51 +1,50 @@ -.. _github-reviews: +(github-reviews)= -====================== -LLVM GitHub User Guide -====================== +# LLVM GitHub User Guide -.. contents:: - :local: +```{contents} +:local: +``` -Introduction -============ -The LLVM Project uses `GitHub `_ for -`Source Code `_, -`Releases `_, -`Issue Tracking `_., and -`Code Reviews `_. +## Introduction + +The LLVM Project uses [GitHub](https://github.com/) for +[Source Code](https://github.com/llvm/llvm-project), +[Releases](https://github.com/llvm/llvm-project/releases), +[Issue Tracking](https://github.com/llvm/llvm-project/issues)., and +[Code Reviews](https://github.com/llvm/llvm-project/pulls). This page describes how the LLVM Project users and developers can participate in the project using GitHub. -Before your first PR -==================== +## Before your first PR Please ensure that you have set a valid email address in your GitHub account, -see :ref:`github-email-address`. +see {ref}`github-email-address`. + +## Pull Requests -Pull Requests -============= The LLVM project is using GitHub Pull Requests for Code Reviews. This document describes the typical workflow of creating a Pull Request and getting it reviewed and accepted. This is meant as an overview of the GitHub workflow, for complete -documentation refer to `GitHub's documentation `_. +documentation refer to [GitHub's documentation](https://docs.github.com/pull-requests). -.. note:: - If you are using a Pull Request for purposes other than review - (eg: precommit CI results, convenient web-based reverts, etc) - add the `skip-precommit-approval `_ - label to the PR. +```{note} +If you are using a Pull Request for purposes other than review +(eg: precommit CI results, convenient web-based reverts, etc) +add the [skip-precommit-approval](https://github.com/llvm/llvm-project/labels?q=skip-precommit-approval) +label to the PR. +``` + +### GitHub Tools -GitHub Tools ------------- You can interact with GitHub in several ways: via git command line tools, -the web browser, `GitHub Desktop `_, or the -`GitHub CLI `_. This guide will cover the git command line +the web browser, [GitHub Desktop](https://desktop.github.com/), or the +[GitHub CLI](https://cli.github.com). This guide will cover the git command line tools and the GitHub CLI. -Creating Pull Requests ----------------------- +### Creating Pull Requests + Keep in mind that when creating a pull request, it should generally only contain one self-contained commit initially. This makes it easier for reviewers to understand the introduced changes and @@ -54,32 +53,32 @@ for the project. If you have multiple changes you want to introduce, it's recommended to create separate pull requests for each change. Create a local branch per commit you want to submit and then push that branch -to your `fork `_ +to your [fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks) of the llvm-project and -`create a pull request from the fork `_. +[create a pull request from the fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork). As GitHub uses the first line of the commit message truncated to 72 characters as the pull request title, you may have to edit to reword or to undo this truncation. -Creating Pull Requests with GitHub CLI -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -With the CLI it's enough to create the branch locally and then run: +#### Creating Pull Requests with GitHub CLI -:: +With the CLI it's enough to create the branch locally and then run: - gh pr create +```console +gh pr create +``` When prompted select to create and use your own fork and follow the instructions to add more information needed. -.. note:: +```{note} +When you let the GitHub CLI create a fork of llvm-project to +your user, it will change the git "remotes" so that "origin" points +to your fork and "upstream" points to the main llvm-project repository. +``` - When you let the GitHub CLI create a fork of llvm-project to - your user, it will change the git "remotes" so that "origin" points - to your fork and "upstream" points to the main llvm-project repository. +### Updating Pull Requests -Updating Pull Requests ----------------------- In order to update your pull request, the only thing you need to do is to push your new commits to the branch in your fork. That will automatically update the pull request. You can also use the Update Branch button in GitHub's Pull @@ -88,7 +87,7 @@ Request UI, but be aware that it will create a merge commit on your branch. When updating a pull request, you should push additional "fix up" commits to your branch instead of force pushing. This makes it easier for GitHub to track the context of previous review comments. Consider using the -`built-in support for fixups `_ +[built-in support for fixups](https://git-scm.com/docs/git-commit#Documentation/git-commit.txt---fixupamendrewordltcommitgt) in git. If you create fix up or merge commits, you must squash and merge before @@ -100,14 +99,14 @@ fix below. When pushing to your branch, make sure you push to the correct fork. Check your remotes with: -:: - - git remote -v +```console +git remote -v +``` And make sure you push to the remote that's pointing to your fork. -Rebasing Pull Requests and Force Pushes ---------------------------------------- +### Rebasing Pull Requests and Force Pushes + In general, you should avoid rebasing a Pull Request and force pushing to the branch that's the root of the Pull Request during the review. This action will make the context of the old changes and comments harder to find and read. If @@ -120,37 +119,37 @@ or in some dependent code. After your PR is reviewed and accepted, you want to rebase your branch to ensure you won't encounter merge conflicts when landing the PR. -.. note:: - This guide assumes that the PR branch only has 1 author. If you are - collaborating with others on a single branch, be careful how and when you push - changes. ``--force-with-lease`` may be useful in this situation. +```{note} +This guide assumes that the PR branch only has 1 author. If you are +collaborating with others on a single branch, be careful how and when you push +changes. `--force-with-lease` may be useful in this situation. +``` -Approvals ---------- +### Approvals Before merging a PR you must have the required approvals. See -:ref:`lgtm_how_a_patch_is_accepted` for more details. +{ref}`lgtm_how_a_patch_is_accepted` for more details. -Landing your change -------------------- +### Landing your change After your PR is approved, ensure that: * The PR title and description describe the final changes. These will be used as the title and message of the final squashed commit. The titles and messages of commits in the PR will **not** be used. - * You have set a valid email address in your GitHub account, see :ref:`github-email-address`. + * You have set a valid email address in your GitHub account, see {ref}`github-email-address`. -.. note:: - The LLVM Project monorepo on GitHub is configured to always use "Squash - and Merge" as the pull request merge option when using the web interface. - With this option, GitHub uses the PR summary as the default commit - message. +```{note} +The LLVM Project monorepo on GitHub is configured to always use "Squash +and Merge" as the pull request merge option when using the web interface. +With this option, GitHub uses the PR summary as the default commit +message. - Users with write access who can merge PRs have a final opportunity to edit - the commit title and message before merging. However, this option is not - available to contributors without write access. +Users with write access who can merge PRs have a final opportunity to edit +the commit title and message before merging. However, this option is not +available to contributors without write access. +``` At this point, you can merge your changes. If you do not have write permissions for the repository, the merge button in GitHub's web interface will be @@ -170,7 +169,7 @@ commonly used first: Afterwards you can select the option `Delete branch` to delete the branch from your fork. -* `Interactive rebase `_ +* [Interactive rebase](https://git-scm.com/docs/git-rebase#_interactive_mode) with fixups. This is the recommended method since you can control the final commit message and check that the final commit looks as you expect. When your local state is correct, remember to force-push to your branch and press @@ -179,9 +178,9 @@ commonly used first: * Merge using the GitHub command line interface. Switch to your branch locally and run: - :: - - gh pr merge --squash --delete-branch + ```console + gh pr merge --squash --delete-branch + ``` If you observe an error message from the above informing you that your pull request is not mergeable, then that is likely because upstream has been @@ -189,26 +188,26 @@ commonly used first: merge conflict. You must first resolve this merge conflict in order to merge your pull request. In order to do that: - :: - - git fetch upstream - git rebase upstream/main + ```console + git fetch upstream + git rebase upstream/main + ``` Then fix the source files causing merge conflicts and make sure to rebuild and retest the result. Then: - :: - - git add - git rebase --continue + ```console + git add + git rebase --continue + ``` Finally, you'll need to force push to your branch one more time before you can merge: - :: - - git push --force - gh pr merge --squash --delete-branch + ```console + git push --force + gh pr merge --squash --delete-branch + ``` This force push may ask if you intend to push hundreds, or potentially thousands of patches (depending on how long it's been since your pull request @@ -217,10 +216,9 @@ commonly used first: request will understand that you're rebasing just your patches, and display this result correctly with a note that a force push did occur. -.. _github_branches: +(github_branches)= -Branches -======== +## Branches It is possible to create branches in `llvm/llvm-project/` that start with `users//`, however this is intended to be able to support "stacked" @@ -228,31 +226,31 @@ pull-request. Do not create any branches in the `llvm/llvm-project` repository otherwise, please use a fork (see above). User branches that aren't associated with a pull-request **will be deleted**. -.. _stacked_pull_requests: +(stacked_pull_requests)= -Stacked Pull Requests -===================== +## Stacked Pull Requests To separate related changes or to break down a larger PR into smaller, reviewable pieces, use "stacked pull requests" — this helps make the review process smoother. -.. note:: - The LLVM Project monorepo on GitHub is configured to always use "Squash and - Merge" as the pull request merge option. As a result, each PR results in - exactly one commit being merged into the project. +```{note} +The LLVM Project monorepo on GitHub is configured to always use "Squash and +Merge" as the pull request merge option. As a result, each PR results in +exactly one commit being merged into the project. - This means that stacked pull requests are the only available option for - landing a series of related changes. In contrast, submitting a PR with - multiple commits and merging them as-is (without squashing) is not supported - in LLVM. +This means that stacked pull requests are the only available option for +landing a series of related changes. In contrast, submitting a PR with +multiple commits and merging them as-is (without squashing) is not supported +in LLVM. +``` While GitHub does not natively support stacked pull requests, there are several common alternatives. To illustrate, assume that you are working on two branches in your fork of the -``llvm/llvm-project`` repository, and you want to eventually merge both into -``main``: +`llvm/llvm-project` repository, and you want to eventually merge both into +`main`: - `feature_1`, which contains commit `feature_commit_1` - `feature_2`, which contains commit `feature_commit_2` and depends on @@ -260,10 +258,10 @@ To illustrate, assume that you are working on two branches in your fork of the Your options are as follows: -#. Use user branches in ``llvm/llvm-project`` +1. Use user branches in `llvm/llvm-project` Create user branches in the main repository, as described - :ref:`above`. Then: + {ref}`above `. Then: - Open a pull request from `users//feature_1` → `main` - Open another from `users//feature_2` → `users//feature_1` @@ -275,9 +273,9 @@ Your options are as follows: perform this step using the web interface. This approach requires commit access. See how to obtain it - `here `_. + [here](https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access). -#. Two PRs with a dependency note +2. Two PRs with a dependency note Create PR_1 for `feature_1` and PR_2 for `feature_2`. In PR_2, include a note in the PR summary indicating that it depends on PR_1 (e.g., @@ -287,44 +285,43 @@ Your options are as follows: and which are new, e.g. "The first N commits are from the base PR". This helps reviewers focus only on the incremental changes. -#. Use a stacked PR tool +3. Use a stacked PR tool Use tools like SPR or Graphite (described below) to automate managing stacked PRs. These tools are also based on using user branches - in ``llvm/llvm-project``. + in `llvm/llvm-project`. -.. note:: - When not using user branches, GitHub will not display proper diffs for - subsequent PRs in a stack. Instead, it will show a combined diff that - includes all commits from earlier PRs. +```{note} +When not using user branches, GitHub will not display proper diffs for +subsequent PRs in a stack. Instead, it will show a combined diff that +includes all commits from earlier PRs. - As described above, it is the PR author’s responsibility to clearly indicate - which commits are relevant to the current PR. - For example: “The first N commits are from the base PR.” +As described above, it is the PR author’s responsibility to clearly indicate +which commits are relevant to the current PR. +For example: “The first N commits are from the base PR.” - You can avoid this issue by using user branches directly in the - ``llvm/llvm-project`` repository. +You can avoid this issue by using user branches directly in the +`llvm/llvm-project` repository. +``` -Using Graphite for stacked Pull Requests ----------------------------------------- +### Using Graphite for stacked Pull Requests -`Graphite `_ is a stacked pull request tool supported -by the LLVM repo (the other being `reviewable.io `_). +[Graphite](https://app.graphite.dev/) is a stacked pull request tool supported +by the LLVM repo (the other being [reviewable.io](https://reviewable.io)). -Graphite will want to create branches under ``llvm/llvm-project`` rather than your +Graphite will want to create branches under `llvm/llvm-project` rather than your private fork, so the guidance above, about branch naming, is critical, otherwise -``gt submit`` (i.e. publish your PRs for review) will fail. +`gt submit` (i.e. publish your PRs for review) will fail. -Use ``gt config`` then ``Branch naming settings`` and ``Set a prefix for branch names``. -Include the last ``/``. +Use `gt config` then `Branch naming settings` and `Set a prefix for branch names`. +Include the last `/`. If you didn't do the above and Graphite created non-prefixed branches, a simple way to -unblock is to rename (``git -m ``), and then checkout the branch -and ``gt track``. +unblock is to rename (`git -m `), and then checkout the branch +and `gt track`. -Pre-merge Continuous Integration (CI) -------------------------------------- +### Pre-merge Continuous Integration (CI) Multiple checks will be applied on a pull-request, either for linting/formatting or some build and tests. None of these are perfect and you will encounter @@ -342,112 +339,110 @@ project. However, please make sure you do not force-merge any changes that have clear test failures directly linked to your changes. Our policy is still to keep the -``main`` branch in a good condition, and introducing failures to be fixed later +`main` branch in a good condition, and introducing failures to be fixed later violates that policy. -Problems After Landing Your Change -================================== +## Problems After Landing Your Change Even though your PR passed the pre-commit checks and is approved by reviewers, it may cause problems for some configurations after it lands. You will be notified if this happens and the community is ready to help you fix the problems. This process is described in detail -:ref:`here `. +{ref}`here `. + +### Checking out another PR locally -Checking out another PR locally -------------------------------- Sometimes you want to review another person's PR on your local machine to run tests or inspect code in your preferred editor. This is easily done with the CLI: -:: - - gh pr checkout +```console +gh pr checkout +``` This is also possible with the web interface and the normal git command line tools, but the process is a bit more complicated. See GitHub's -`documentation `_ +[documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/reviewing-changes-in-pull-requests/checking-out-pull-requests-locally?platform=linux&tool=webui#modifying-an-inactive-pull-request-locally) on the topic. -Example Pull Request with GitHub CLI -==================================== -Here is an example for creating a Pull Request with the GitHub CLI: +## Example Pull Request with GitHub CLI -:: +Here is an example for creating a Pull Request with the GitHub CLI: - # Clone the repo - gh repo clone llvm/llvm-project +```console +# Clone the repo +gh repo clone llvm/llvm-project - # Switch to the repo and create a new branch - cd llvm-project - git switch -c my_change +# Switch to the repo and create a new branch +cd llvm-project +git switch -c my_change - # Create your changes - $EDITOR file.cpp +# Create your changes +$EDITOR file.cpp - # Don't forget clang-format - git clang-format +# Don't forget clang-format +git clang-format - # and don't forget running your tests - ninja check-llvm +# and don't forget running your tests +ninja check-llvm - # Commit, use a good commit message - git commit file.cpp +# Commit, use a good commit message +git commit file.cpp - # Create the PR, select to use your own fork when prompted. - # If you don't have a fork, gh will create one for you. - gh pr create +# Create the PR, select to use your own fork when prompted. +# If you don't have a fork, gh will create one for you. +gh pr create - # If you get any review comments, come back to the branch and - # adjust them. - git switch my_change - $EDITOR file.cpp +# If you get any review comments, come back to the branch and +# adjust them. +git switch my_change +$EDITOR file.cpp - # Commit your changes - git commit file.cpp -m "Code Review adjustments" +# Commit your changes +git commit file.cpp -m "Code Review adjustments" - # Format changes - git clang-format HEAD~ +# Format changes +git clang-format HEAD~ - # Recommit if any formatting changes - git commit -a --amend +# Recommit if any formatting changes +git commit -a --amend - # Push your changes to your fork branch, be mindful of - # your remotes here, if you don't remember what points to your - # fork, use git remote -v to see. Usually origin points to your - # fork and upstream to llvm/llvm-project - git push origin my_change +# Push your changes to your fork branch, be mindful of +# your remotes here, if you don't remember what points to your +# fork, use git remote -v to see. Usually origin points to your +# fork and upstream to llvm/llvm-project +git push origin my_change +``` Before merging the PR, it is recommended that you rebase locally and re-run test checks: -:: - - # Add upstream as a remote (if you don't have it already) - git remote add upstream https://github.com/llvm/llvm-project.git +```console +# Add upstream as a remote (if you don't have it already) +git remote add upstream https://github.com/llvm/llvm-project.git - # Make sure you have all the latest changes - git fetch upstream && git rebase -i upstream/main +# Make sure you have all the latest changes +git fetch upstream && git rebase -i upstream/main - # Make sure tests pass with latest changes and your change - ninja check +# Make sure tests pass with latest changes and your change +ninja check - # Push the rebased changes to your fork. - git push origin my_change --force +# Push the rebased changes to your fork. +git push origin my_change --force - # Now merge it - gh pr merge --squash --delete-branch +# Now merge it +gh pr merge --squash --delete-branch +``` See more in-depth information about how to contribute in the following documentation: -* :doc:`Contributing` -* :doc:`MyFirstTypoFix` +* {doc}`Contributing` +* {doc}`MyFirstTypoFix` -Example Pull Request with git -==================================== +## Example Pull Request with git Instead of using the GitHub CLI to create a PR, you can push your code to a remote branch on your fork and create the PR to upstream using the GitHub web @@ -455,98 +450,97 @@ interface. Here is an example of making a PR using git and the GitHub web interface: -First follow the instructions to `fork the repository `_. +First follow the instructions to [fork the repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo?tool=webui#forking-a-repository). -Next follow the instructions to `clone your forked repository `_. +Next follow the instructions to [clone your forked repository](https://docs.github.com/en/get-started/quickstart/fork-a-repo?tool=webui#cloning-your-forked-repository). Once you've cloned your forked repository, -:: - - # Switch to the forked repo - cd llvm-project +```console +# Switch to the forked repo +cd llvm-project - # Create a new branch - git switch -c my_change +# Create a new branch +git switch -c my_change - # Create your changes - $EDITOR file.cpp +# Create your changes +$EDITOR file.cpp - # Don't forget clang-format - git clang-format +# Don't forget clang-format +git clang-format - # and don't forget running your tests - ninja check-llvm +# and don't forget running your tests +ninja check-llvm - # Commit, use a good commit message - git commit file.cpp +# Commit, use a good commit message +git commit file.cpp - # Push your changes to your fork branch, be mindful of - # your remotes here, if you don't remember what points to your - # fork, use git remote -v to see. Usually origin points to your - # fork and upstream to llvm/llvm-project - git push origin my_change +# Push your changes to your fork branch, be mindful of +# your remotes here, if you don't remember what points to your +# fork, use git remote -v to see. Usually origin points to your +# fork and upstream to llvm/llvm-project +git push origin my_change +``` Navigate to the URL printed to the console from the git push command in the last step. Create a pull request from your branch to llvm::main. -:: - - # If you get any review comments, come back to the branch and - # adjust them. - git switch my_change - $EDITOR file.cpp +```console +# If you get any review comments, come back to the branch and +# adjust them. +git switch my_change +$EDITOR file.cpp - # Commit your changes - git commit file.cpp -m "Code Review adjustments" +# Commit your changes +git commit file.cpp -m "Code Review adjustments" - # Format changes - git clang-format HEAD~ +# Format changes +git clang-format HEAD~ - # Recommit if any formatting changes - git commit -a --amend +# Recommit if any formatting changes +git commit -a --amend - # Re-run tests and make sure nothing broke. - ninja check +# Re-run tests and make sure nothing broke. +ninja check - # Push your changes to your fork branch, be mindful of - # your remotes here, if you don't remember what points to your - # fork, use git remote -v to see. Usually origin points to your - # fork and upstream to llvm/llvm-project - git push origin my_change +# Push your changes to your fork branch, be mindful of +# your remotes here, if you don't remember what points to your +# fork, use git remote -v to see. Usually origin points to your +# fork and upstream to llvm/llvm-project +git push origin my_change +``` Before merging the PR, it is recommended that you rebase locally and re-run test checks: -:: +```console +# Add upstream as a remote (if you don't have it already) +git remote add upstream https://github.com/llvm/llvm-project.git - # Add upstream as a remote (if you don't have it already) - git remote add upstream https://github.com/llvm/llvm-project.git +# Make sure you have all the latest changes +git fetch upstream && git rebase -i upstream/main - # Make sure you have all the latest changes - git fetch upstream && git rebase -i upstream/main +# Make sure tests pass with latest changes and your change +ninja check - # Make sure tests pass with latest changes and your change - ninja check - - # Push the rebased changes to your fork. - git push origin my_change --force +# Push the rebased changes to your fork. +git push origin my_change --force +``` Once your PR is approved, rebased, and tests are passing, click `Squash and Merge` on your PR in the GitHub web interface. See more in-depth information about how to contribute in the following documentation: -* :doc:`Contributing` -* :doc:`MyFirstTypoFix` +* {doc}`Contributing` +* {doc}`MyFirstTypoFix` + +## Releases -Releases -======== +(backporting)= -.. _backporting: +### Backporting Fixes to the Release Branches -Backporting Fixes to the Release Branches ------------------------------------------ You can use special comments on issues or pull requests to make backport requests for the release branches. To do this, after your pull request has been merged: @@ -556,9 +550,9 @@ merged: 2. Add a comment to it in the following format: -:: - - /cherry-pick <...> +```console +/cherry-pick <...> +``` This command takes one or more git commit hashes as arguments and will attempt to cherry-pick the commit(s) to the release branch. If the commit(s) fail to @@ -570,14 +564,13 @@ If a commit you want to backport does not apply cleanly, you may resolve the conflicts locally and then create a pull request against the release branch. Just make sure to add the release milestone to the pull request. -Getting admin access to CI infrastructure -========================================= +## Getting admin access to CI infrastructure Any individual who is responsible for setting up and/or maintaining CI infrastructure for a LLVM project can request to be granted the CI/CD role by -the LLVM infrastructure area team. The request can be made by creating `a -Github issue `_ and using the -``infrastructure`` label. Applicants must include a justification for why the +the LLVM infrastructure area team. The request can be made by creating [a +Github issue](https://github.com/llvm/llvm-project/issues/new) and using the +`infrastructure` label. Applicants must include a justification for why the role is being requested. Applications are reviewed on a case-by-case basis by the LLVM infrastructure area team and the role can be revoked at any point as the area team sees fit. diff --git a/llvm/docs/Passes.md b/llvm/docs/Passes.md index c89b549eb871a..28ea11eb994d9 100644 --- a/llvm/docs/Passes.md +++ b/llvm/docs/Passes.md @@ -1,21 +1,24 @@ -==================================== -LLVM's Analysis and Transform Passes -==================================== +# LLVM's Analysis and Transform Passes -.. contents:: - :local: +```{contents} +:local: +``` -.. toctree:: - :hidden: +```{toctree} +:hidden: - KernelInfo - LoopFusion +KernelInfo +LoopFusion -Introduction -============ -.. warning:: This document is not updated frequently, and the list of passes - is most likely incomplete. It is possible to list passes known by the opt - tool using ``opt -print-passes``. +``` + +## Introduction + +```{warning} +This document is not updated frequently, and the list of passes +is most likely incomplete. It is possible to list passes known by the opt +tool using `opt -print-passes`. +``` This document serves as a high-level summary of the optimization features that LLVM provides. Optimizations are implemented as Passes that traverse some @@ -30,13 +33,11 @@ are neither analysis nor transform passes. The table of contents above provides a quick summary of each pass and links to the more complete pass description later in the document. -Analysis Passes -=============== +## Analysis Passes This section describes the LLVM Analysis Passes. -``aa-eval``: Exhaustive Alias Analysis Precision Evaluator ----------------------------------------------------------- +### `aa-eval`: Exhaustive Alias Analysis Precision Evaluator This is a simple N^2 alias analysis accuracy evaluator. Basically, for each function in the program, it simply queries to see how the alias analysis @@ -46,127 +47,109 @@ function. This is inspired and adapted from code by: Naveen Neelakantam, Francesco Spadini, and Wojciech Stryjewski. -``basic-aa``: Basic Alias Analysis (stateless AA impl) ------------------------------------------------------- +### `basic-aa`: Basic Alias Analysis (stateless AA impl) A basic alias analysis pass that implements identities (two different globals cannot alias, etc), but does no stateful analysis. -``basiccg``: Basic CallGraph Construction ------------------------------------------ +### `basiccg`: Basic CallGraph Construction Yet to be written. -.. _passes-da: +(passes-da)= -``da``: Dependence Analysis ---------------------------- +### `da`: Dependence Analysis Dependence analysis framework, which is used to detect dependencies in memory accesses. -``domfrontier``: Dominance Frontier Construction ------------------------------------------------- +### `domfrontier`: Dominance Frontier Construction This pass is a simple dominator construction algorithm for finding forward dominator frontiers. -``domtree``: Dominator Tree Construction ----------------------------------------- +### `domtree`: Dominator Tree Construction This pass is a simple dominator construction algorithm for finding forward dominators. -``dot-callgraph``: Print Call Graph to "dot" file -------------------------------------------------- +### `dot-callgraph`: Print Call Graph to "dot" file -This pass, only available in ``opt``, prints the call graph into a ``.dot`` +This pass, only available in `opt`, prints the call graph into a `.dot` graph. This graph can then be processed with the "dot" tool to convert it to postscript or some other suitable format. -``dot-cfg``: Print CFG of function to "dot" file ------------------------------------------------- +### `dot-cfg`: Print CFG of function to "dot" file -This pass, only available in ``opt``, prints the control flow graph into a -``.dot`` graph. This graph can then be processed with the :program:`dot` tool +This pass, only available in `opt`, prints the control flow graph into a +`.dot` graph. This graph can then be processed with the {program}`dot` tool to convert it to postscript or some other suitable format. -Additionally, the ``-cfg-func-name=`` option can be used to filter the +Additionally, the `-cfg-func-name=` option can be used to filter the functions that are printed. All functions that contain the specified substring will be printed. -``dot-cfg-only``: Print CFG of function to "dot" file (with no function bodies) -------------------------------------------------------------------------------- +### `dot-cfg-only`: Print CFG of function to "dot" file (with no function bodies) -This pass, only available in ``opt``, prints the control flow graph into a -``.dot`` graph, omitting the function bodies. This graph can then be processed -with the :program:`dot` tool to convert it to postscript or some other suitable +This pass, only available in `opt`, prints the control flow graph into a +`.dot` graph, omitting the function bodies. This graph can then be processed +with the {program}`dot` tool to convert it to postscript or some other suitable format. -Additionally, the ``-cfg-func-name=`` option can be used to filter the +Additionally, the `-cfg-func-name=` option can be used to filter the functions that are printed. All functions that contain the specified substring will be printed. -``dot-dom``: Print dominance tree of function to "dot" file ------------------------------------------------------------ +### `dot-dom`: Print dominance tree of function to "dot" file -This pass, only available in ``opt``, prints the dominator tree into a ``.dot`` -graph. This graph can then be processed with the :program:`dot` tool to +This pass, only available in `opt`, prints the dominator tree into a `.dot` +graph. This graph can then be processed with the {program}`dot` tool to convert it to postscript or some other suitable format. -``dot-dom-only``: Print dominance tree of function to "dot" file (with no function bodies) ------------------------------------------------------------------------------------------- +### `dot-dom-only`: Print dominance tree of function to "dot" file (with no function bodies) -This pass, only available in ``opt``, prints the dominator tree into a ``.dot`` +This pass, only available in `opt`, prints the dominator tree into a `.dot` graph, omitting the function bodies. This graph can then be processed with the -:program:`dot` tool to convert it to postscript or some other suitable format. +{program}`dot` tool to convert it to postscript or some other suitable format. -``dot-post-dom``: Print postdominance tree of function to "dot" file --------------------------------------------------------------------- +### `dot-post-dom`: Print postdominance tree of function to "dot" file -This pass, only available in ``opt``, prints the post dominator tree into a -``.dot`` graph. This graph can then be processed with the :program:`dot` tool +This pass, only available in `opt`, prints the post dominator tree into a +`.dot` graph. This graph can then be processed with the {program}`dot` tool to convert it to postscript or some other suitable format. -``dot-post-dom-only``: Print postdominance tree of function to "dot" file (with no function bodies) ---------------------------------------------------------------------------------------------------- +### `dot-post-dom-only`: Print postdominance tree of function to "dot" file (with no function bodies) -This pass, only available in ``opt``, prints the post dominator tree into a -``.dot`` graph, omitting the function bodies. This graph can then be processed -with the :program:`dot` tool to convert it to postscript or some other suitable +This pass, only available in `opt`, prints the post dominator tree into a +`.dot` graph, omitting the function bodies. This graph can then be processed +with the {program}`dot` tool to convert it to postscript or some other suitable format. -``globals-aa``: Simple mod/ref analysis for globals ---------------------------------------------------- +### `globals-aa`: Simple mod/ref analysis for globals This simple pass provides alias and mod/ref information for global values that do not have their address taken, and keeps track of whether functions read or write memory (are "pure"). For this simple (but very common) case, we can provide pretty accurate and useful information. -``instcount``: Counts the various types of ``Instruction``\ s -------------------------------------------------------------- +### `instcount`: Counts the various types of `Instruction`s This pass collects the count of all instructions and reports them. -``iv-users``: Induction Variable Users --------------------------------------- +### `iv-users`: Induction Variable Users Bookkeeping for "interesting" users of expressions computed from induction variables. -``kernel-info``: GPU Kernel Info --------------------------------- +### `kernel-info`: GPU Kernel Info Reports various statistics for codes compiled for GPUs. This pass is -:doc:`documented separately`. +{doc}`documented separately `. -``lazy-value-info``: Lazy Value Information Analysis ----------------------------------------------------- +### `lazy-value-info`: Lazy Value Information Analysis Interface for lazy computation of value constraint information. -``lint``: Statically lint-checks LLVM IR ----------------------------------------- +### `lint`: Statically lint-checks LLVM IR This pass statically checks for common and easily-identified constructs which produce undefined or likely unintended behavior in LLVM IR. @@ -187,137 +170,119 @@ obvious. If an optimization pass appears to be introducing a warning, it may be that the optimization pass is merely exposing an existing condition in the code. -This code may be run before :ref:`instcombine `. In many +This code may be run before {ref}`instcombine `. In many cases, instcombine checks for the same kinds of things and turns instructions with undefined behavior into unreachable (or equivalent). Because of this, this pass makes some effort to look through bitcasts and so on. -``loops``: Natural Loop Information ------------------------------------ +### `loops`: Natural Loop Information This analysis is used to identify natural loops and determine the loop depth of various nodes of the CFG. Note that the loops identified may actually be several natural loops that share the same header node... not just a single natural loop. -``memdep``: Memory Dependence Analysis --------------------------------------- +### `memdep`: Memory Dependence Analysis An analysis that determines, for a given memory operation, what preceding memory operations it depends on. It builds on alias analysis information, and tries to provide a lazy, caching interface to a common kind of alias information query. -``print``: Decodes module-level debug info ------------------------------------------------------------- +### `print`: Decodes module-level debug info This pass decodes the debug info metadata in a module and prints it to standard output in a (sufficiently-prepared-) human-readable form. -``postdomtree``: Post-Dominator Tree Construction -------------------------------------------------- +### `postdomtree`: Post-Dominator Tree Construction This pass is a simple post-dominator construction algorithm for finding post-dominators. -``print``: Alias Set Printer ----------------------------------------- +### `print`: Alias Set Printer Yet to be written. -``print-callgraph``: Print a call graph ---------------------------------------- +### `print-callgraph`: Print a call graph -This pass, only available in ``opt``, prints the call graph to standard error +This pass, only available in `opt`, prints the call graph to standard error in a human-readable form. -``print-callgraph-sccs``: Print SCCs of the Call Graph ------------------------------------------------------- +### `print-callgraph-sccs`: Print SCCs of the Call Graph -This pass, only available in ``opt``, prints the SCCs of the call graph to +This pass, only available in `opt`, prints the SCCs of the call graph to standard error in a human-readable form. -``print``: Print SCCs of each function CFG ----------------------------------------------------- +### `print`: Print SCCs of each function CFG -This pass, only available in ``opt``, prints the SCCs of each function CFG to +This pass, only available in `opt`, prints the SCCs of each function CFG to standard error in a human-readable form. -``function(print)``: Print function to stderr ---------------------------------------------- +### `function(print)`: Print function to stderr -The ``PrintFunctionPass`` class is designed to be pipelined with other -``FunctionPasses``, and prints out the functions of the module as they are +The `PrintFunctionPass` class is designed to be pipelined with other +`FunctionPasses`, and prints out the functions of the module as they are processed. -``module(print)``: Print module to stderr ------------------------------------------ +### `module(print)`: Print module to stderr This pass simply prints out the entire module when it is executed. -``regions``: Detect single entry single exit regions ----------------------------------------------------- +### `regions`: Detect single entry single exit regions -The ``RegionInfo`` pass detects single entry single exit regions in a function, +The `RegionInfo` pass detects single entry single exit regions in a function, where a region is defined as any subgraph that is connected to the remaining graph at only two spots. Furthermore, a hierarchical region tree is built. -.. _passes-scalar-evolution: +(passes-scalar-evolution)= -``scalar-evolution``: Scalar Evolution Analysis ------------------------------------------------ +### `scalar-evolution`: Scalar Evolution Analysis -The ``ScalarEvolution`` analysis can be used to analyze and categorize scalar +The `ScalarEvolution` analysis can be used to analyze and categorize scalar expressions in loops. It specializes in recognizing general induction -variables, representing them with the abstract and opaque ``SCEV`` class. +variables, representing them with the abstract and opaque `SCEV` class. Given this analysis, trip counts of loops and other important properties can be obtained. This analysis is primarily useful for induction variable substitution and strength reduction. -``scev-aa``: ScalarEvolution-based Alias Analysis -------------------------------------------------- +### `scev-aa`: ScalarEvolution-based Alias Analysis -Simple alias analysis implemented in terms of ``ScalarEvolution`` queries. +Simple alias analysis implemented in terms of `ScalarEvolution` queries. This differs from traditional loop dependence analysis in that it tests for dependencies within a single iteration of a loop, rather than dependencies between different iterations. -``ScalarEvolution`` has a more complete understanding of pointer arithmetic -than ``BasicAliasAnalysis``' collection of ad-hoc analyses. +`ScalarEvolution` has a more complete understanding of pointer arithmetic +than `BasicAliasAnalysis`' collection of ad-hoc analyses. -``stack-safety``: Stack Safety Analysis ---------------------------------------- +### `stack-safety`: Stack Safety Analysis -The ``StackSafety`` analysis can be used to determine if stack allocated +The `StackSafety` analysis can be used to determine if stack allocated variables can be considered safe from memory access bugs. This analysis' primary purpose is to be used by sanitizers to avoid unnecessary instrumentation of safe variables. -Transform Passes -================ +## Transform Passes This section describes the LLVM Transform Passes. -``adce``: Aggressive Dead Code Elimination ------------------------------------------- +### `adce`: Aggressive Dead Code Elimination -ADCE aggressively tries to eliminate code. This pass is similar to :ref:`DCE -` but it assumes that values are dead until proven otherwise. This -is similar to :ref:`SCCP `, except applied to the liveness of +ADCE aggressively tries to eliminate code. This pass is similar to +{ref}`DCE ` but it assumes that values are dead until proven otherwise. This +is similar to {ref}`SCCP `, except applied to the liveness of values. -``always-inline``: Inliner for ``always_inline`` functions ----------------------------------------------------------- +### `always-inline`: Inliner for `always_inline` functions A custom inliner that handles only functions that are marked as "always inline". -``argpromotion``: Promote 'by reference' arguments to scalars -------------------------------------------------------------- +### `argpromotion`: Promote 'by reference' arguments to scalars This pass promotes "by reference" arguments to be "by value" arguments. In practice, this means looking for internal functions that have pointer @@ -338,8 +303,7 @@ stored to (returning the value instead), but does not currently. This case would be best handled when and if LLVM starts supporting multiple return values from functions. -``block-placement``: Profile Guided Basic Block Placement ---------------------------------------------------------- +### `block-placement`: Profile Guided Basic Block Placement This pass is a very simple profile guided basic block placement algorithm. The idea is to put frequently executed blocks together at the start of the function @@ -347,40 +311,35 @@ and hopefully increase the number of fall-through conditional branches. If there is no profile information for a particular function, this pass basically orders blocks in depth-first order. -``break-crit-edges``: Break critical edges in CFG -------------------------------------------------- +### `break-crit-edges`: Break critical edges in CFG Break all of the critical edges in the CFG by inserting a dummy basic block. It may be "required" by passes that cannot deal with critical edges. This transformation obviously invalidates the CFG, but can update forward dominator (set, immediate dominators, tree, and frontier) information. -``codegenprepare``: Optimize for code generation ------------------------------------------------- +### `codegenprepare`: Optimize for code generation This pass munges the code in the input function to better prepare it for SelectionDAG-based code generation. This works around limitations in its basic-block-at-a-time approach. It should eventually be removed. -``constmerge``: Merge Duplicate Global Constants ------------------------------------------------- +### `constmerge`: Merge Duplicate Global Constants Merges duplicate global constants together into a single constant that is shared. This is useful because some passes (i.e., TraceValues) insert a lot of string constants into the program, regardless of whether or not an existing string is available. -.. _passes-dce: +(passes-dce)= -``dce``: Dead Code Elimination ------------------------------- +### `dce`: Dead Code Elimination Dead code elimination is similar to dead instruction elimination, but it rechecks instructions that were used by removed instructions to see if they are newly dead. -``deadargelim``: Dead Argument Elimination ------------------------------------------- +### `deadargelim`: Dead Argument Elimination This pass deletes dead arguments from internal functions. Dead argument elimination removes arguments which are directly dead, as well as arguments @@ -390,28 +349,25 @@ pass also deletes dead arguments in a similar way. This pass is often useful as a cleanup pass to run after aggressive interprocedural passes, which add possibly-dead arguments. -``dse``: Dead Store Elimination -------------------------------- +### `dse`: Dead Store Elimination A trivial dead store elimination that only considers basic-block local redundant stores. -.. _passes-function-attrs: +(passes-function-attrs)= -``function-attrs``: Deduce function attributes ----------------------------------------------- +### `function-attrs`: Deduce function attributes A simple interprocedural pass which walks the call-graph, looking for functions which do not access or only read non-local memory, and marking them -``readnone``/``readonly``. In addition, it marks function arguments (of -pointer type) "``nocapture``" if a call to the function does not create any +`readnone`/`readonly`. In addition, it marks function arguments (of +pointer type) "`nocapture`" if a call to the function does not create any copies of the pointer value that outlive the call. This more or less means that the pointer is only dereferenced, and not returned from the function or stored in a global. This pass is implemented as a bottom-up traversal of the call-graph. -``globaldce``: Dead Global Elimination --------------------------------------- +### `globaldce`: Dead Global Elimination This transform is designed to eliminate unreachable internal globals from the program. It uses an aggressive algorithm, searching out globals that are known @@ -419,23 +375,20 @@ to be alive. After it finds all of the globals which are needed, it deletes whatever is left over. This allows it to delete recursive chunks of the program which are unreachable. -``globalopt``: Global Variable Optimizer ----------------------------------------- +### `globalopt`: Global Variable Optimizer This pass transforms simple global variables that never have their address taken. If obviously true, it marks read/write globals as constant, deletes variables only stored to, etc. -``gvn``: Global Value Numbering -------------------------------- +### `gvn`: Global Value Numbering This pass performs global value numbering to eliminate fully and partially redundant instructions. It also performs redundant load elimination. -.. _passes-indvars: +(passes-indvars)= -``indvars``: Canonicalize Induction Variables ---------------------------------------------- +### `indvars`: Canonicalize Induction Variables This transformation analyzes and transforms the induction variables (and computations derived from them) into simpler forms suitable for subsequent @@ -456,16 +409,14 @@ changes: * The exit condition for the loop is canonicalized to compare the induction value against the exit value. This turns loops like: - .. code-block:: c++ - - for (i = 7; i*i < 1000; ++i) - - into - - .. code-block:: c++ - - for (i = 0; i != 25; ++i) + ```c++ + for (i = 7; i*i < 1000; ++i) + into + ``` + ```c++ + for (i = 0; i != 25; ++i) + ``` * Any use outside of the loop of an expression derived from the indvar is changed to compute the derived value outside of the loop, eliminating the dependence on the exit value of the induction variable. If the only purpose @@ -477,94 +428,85 @@ desired loop transformations have been performed. Additionally, on targets where it is profitable, the loop could be transformed to count down to zero (the "do loop" optimization). -``inline``: Function Integration/Inlining ------------------------------------------ +### `inline`: Function Integration/Inlining Bottom-up inlining of functions into callees. -.. _passes-instcombine: +(passes-instcombine)= -``instcombine``: Combine redundant instructions ------------------------------------------------ +### `instcombine`: Combine redundant instructions Combine instructions to form fewer, simpler instructions. This pass does not modify the CFG. This pass is where algebraic simplification happens. This pass combines things like: -.. code-block:: llvm - - %Y = add i32 %X, 1 - %Z = add i32 %Y, 1 - +```llvm +%Y = add i32 %X, 1 +%Z = add i32 %Y, 1 +``` into: -.. code-block:: llvm - - %Z = add i32 %X, 2 - +```llvm +%Z = add i32 %X, 2 +``` This is a simple worklist-driven algorithm. This pass guarantees that the following canonicalizations are performed on the program: -#. If a binary operator has a constant operand, it is moved to the right-hand +1. If a binary operator has a constant operand, it is moved to the right-hand side. -#. Bitwise operators with constant operands are always grouped so that shifts - are performed first, then ``or``\ s, then ``and``\ s, then ``xor``\ s. -#. Compare instructions are converted from ``<``, ``>``, ``≤``, or ``≥`` to - ``=`` or ``≠`` if possible. -#. All ``cmp`` instructions on boolean values are replaced with logical +1. Bitwise operators with constant operands are always grouped so that shifts + are performed first, then `or`s, then `and`s, then `xor`s. +1. Compare instructions are converted from `<`, `>`, `≤`, or `≥` to + `=` or `≠` if possible. +1. All `cmp` instructions on boolean values are replaced with logical operations. -#. ``add X, X`` is represented as ``mul X, 2`` ⇒ ``shl X, 1`` -#. Multiplies with a constant power-of-two argument are transformed into +1. `add X, X` is represented as `mul X, 2` ⇒ `shl X, 1` +1. Multiplies with a constant power-of-two argument are transformed into shifts. -#. … etc. +1. … etc. This pass can also simplify calls to specific well-known function calls (e.g. -runtime library functions). For example, a call ``exit(3)`` that occurs within -the ``main()`` function can be transformed into simply ``return 3``. Whether or +runtime library functions). For example, a call `exit(3)` that occurs within +the `main()` function can be transformed into simply `return 3`. Whether or not library calls are simplified is controlled by the -:ref:`-function-attrs ` pass and LLVM's knowledge of +{ref}`-function-attrs ` pass and LLVM's knowledge of library calls on different targets. -.. _passes-aggressive-instcombine: +(passes-aggressive-instcombine)= -``aggressive-instcombine``: Combine expression patterns --------------------------------------------------------- +### `aggressive-instcombine`: Combine expression patterns Combine expression patterns to form expressions with fewer, simpler instructions. -For example, this pass reduces the width of expressions post-dominated by ``TruncInst`` +For example, this pass reduces the width of expressions post-dominated by `TruncInst` into smaller width when applicable. It differs from instcombine pass in that it can modify CFG and contains pattern optimization that requires higher complexity than the O(1), thus, it should run fewer times than instcombine pass. -``internalize``: Internalize Global Symbols -------------------------------------------- +### `internalize`: Internalize Global Symbols This pass loops over all of the functions in the input module, looking for a main function. If a main function is found, all other functions and all global variables with initializers are marked as internal. -``ipsccp``: Interprocedural Sparse Conditional Constant Propagation -------------------------------------------------------------------- +### `ipsccp`: Interprocedural Sparse Conditional Constant Propagation -An interprocedural variant of :ref:`Sparse Conditional Constant Propagation -`. +An interprocedural variant of +{ref}`Sparse Conditional Constant Propagation `. -``normalize``: Transforms IR into a normal form that's easier to diff ---------------------------------------------------------------------- +### `normalize`: Transforms IR into a normal form that's easier to diff This pass aims to transform LLVM Modules into a normal form by reordering and renaming instructions while preserving the same semantics. The normalizer makes it easier to spot semantic differences while diffing two modules which have undergone two different passes. -``jump-threading``: Jump Threading ----------------------------------- +### `jump-threading`: Jump Threading Jump threading tries to find distinct threads of control flow running through a basic block. This pass looks at blocks that have multiple predecessors and @@ -575,46 +517,42 @@ block. An example of when this can occur is code like this: -.. code-block:: c++ - - if () { ... - X = 4; - } - if (X < 3) { - +```c++ +if () { ... + X = 4; +} +if (X < 3) { +``` In this case, the unconditional branch at the end of the first if can be revectored to the false side of the second if. -.. _passes-lcssa: +(passes-lcssa)= -``lcssa``: Loop-Closed SSA Form Pass ------------------------------------- +### `lcssa`: Loop-Closed SSA Form Pass This pass transforms loops by placing phi nodes at the end of the loops for all values that are live across the loop boundary. For example, it turns the left into the right code: -.. code-block:: c++ - - for (...) for (...) - if (c) if (c) - X1 = ... X1 = ... - else else - X2 = ... X2 = ... - X3 = phi(X1, X2) X3 = phi(X1, X2) - ... = X3 + 4 X4 = phi(X3) - ... = X4 + 4 - +```c++ +for (...) for (...) + if (c) if (c) + X1 = ... X1 = ... + else else + X2 = ... X2 = ... + X3 = phi(X1, X2) X3 = phi(X1, X2) +... = X3 + 4 X4 = phi(X3) + ... = X4 + 4 +``` This is still valid LLVM; the extra phi nodes are purely redundant, and will be -trivially eliminated by ``InstCombine``. The major benefit of this +trivially eliminated by `InstCombine`. The major benefit of this transformation is that it makes many other loop optimizations, such as -``LoopUnswitch``\ ing, simpler. You can read more in the -:ref:`loop terminology section for the LCSSA form `. +`LoopUnswitch`ing, simpler. You can read more in the +{ref}`loop terminology section for the LCSSA form `. -.. _passes-licm: +(passes-licm)= -``licm``: Loop Invariant Code Motion ------------------------------------- +### `licm`: Loop Invariant Code Motion This pass performs loop invariant code motion, attempting to remove as much code from the body of a loop as possible. It does this by either hoisting code @@ -630,49 +568,45 @@ also handles other optimizations than LICM that increase live-ranges. This pass uses alias analysis for two purposes: -#. Moving loop invariant loads and calls out of loops. If we can determine +1. Moving loop invariant loads and calls out of loops. If we can determine that a load or call inside of a loop never aliases anything stored to, we can hoist it or sink it like any other instruction. -#. Scalar Promotion of Memory. If there is a store instruction inside of the +1. Scalar Promotion of Memory. If there is a store instruction inside of the loop, we try to move the store to happen AFTER the loop instead of inside of the loop. This can only happen if a few conditions are true: - #. The pointer stored through is loop invariant. - #. There are no stores or loads in the loop which *may* alias the pointer. + 1. The pointer stored through is loop invariant. + 1. There are no stores or loads in the loop which *may* alias the pointer. There are no calls in the loop which mod/ref the pointer. If these conditions are true, we can promote the loads and stores in the loop of the pointer to use a temporary alloca'd variable. We then use the - :ref:`mem2reg ` functionality to construct the appropriate + {ref}`mem2reg ` functionality to construct the appropriate SSA form for the variable. -``loop-deletion``: Delete dead loops ------------------------------------- +### `loop-deletion`: Delete dead loops This file implements the Dead Loop Deletion Pass. This pass is responsible for eliminating loops with non-infinite computable trip counts that have no side effects or volatile instructions, and do not contribute to the computation of the function's return value. -.. _passes-loop-extract: +(passes-loop-extract)= -``loop-extract``: Extract loops into new functions --------------------------------------------------- +### `loop-extract`: Extract loops into new functions -A pass wrapper around the ``ExtractLoop()`` scalar transformation to extract +A pass wrapper around the `ExtractLoop()` scalar transformation to extract each top-level loop into its own new function. If the loop is the *only* loop in a given function, it is not touched. This is a pass most useful for debugging via bugpoint. -``loop-fusion``: Loop Fusion ----------------------------- +### `loop-fusion`: Loop Fusion Merges adjacent loops when it can prove the transformation preserves the -program's semantics. This pass is :doc:`documented separately`. +program's semantics. This pass is {doc}`documented separately `. -``loop-reduce``: Loop Strength Reduction ----------------------------------------- +### `loop-reduce`: Loop Strength Reduction This pass performs a strength reduction on array references inside loops that have as one or more of their components the loop induction variable. This is @@ -680,28 +614,26 @@ accomplished by creating a new value to hold the initial value of the array access for the first iteration, and then creating a new GEP instruction in the loop to increment the value by the appropriate amount. -.. _passes-loop-rotate: +(passes-loop-rotate)= -``loop-rotate``: Rotate Loops ------------------------------ +### `loop-rotate`: Rotate Loops A simple loop rotation transformation. A summary of it can be found in -:ref:`Loop Terminology for Rotated Loops `. +{ref}`Loop Terminology for Rotated Loops `. -.. _passes-loop-simplify: +(passes-loop-simplify)= -``loop-simplify``: Canonicalize natural loops ---------------------------------------------- +### `loop-simplify`: Canonicalize natural loops This pass performs several transformations to transform natural loops into a simpler form, which makes subsequent analyses and transformations simpler and more effective. A summary of it can be found in -:ref:`Loop Terminology, Loop Simplify Form `. +{ref}`Loop Terminology, Loop Simplify Form `. Loop pre-header insertion guarantees that there is a single, non-critical entry edge from outside of the loop to the loop header. This simplifies a number of -analyses and transformations, such as :ref:`LICM `. +analyses and transformations, such as {ref}`LICM `. Loop exit-block insertion guarantees that all exit blocks from the loop (blocks which are outside of the loop that have predecessors inside of the loop) only @@ -711,51 +643,46 @@ into LICM. This pass also guarantees that loops will have exactly one backedge. -Note that the :ref:`simplifycfg ` pass will clean up blocks +Note that the {ref}`simplifycfg ` pass will clean up blocks which are split out but end up being unnecessary, so usage of this pass should not pessimize generated code. This pass obviously modifies the CFG, but updates loop information and dominator information. -``loop-unroll``: Unroll loops ------------------------------ +### `loop-unroll`: Unroll loops This pass implements a simple loop unroller. It works best when loops have -been canonicalized by the :ref:`indvars ` pass, allowing it to +been canonicalized by the {ref}`indvars ` pass, allowing it to determine the trip counts of loops easily. -``loop-unroll-and-jam``: Unroll and Jam loops ---------------------------------------------- +### `loop-unroll-and-jam`: Unroll and Jam loops This pass implements a simple unroll and jam classical loop optimisation pass. It transforms a loop from: -.. code-block:: c++ - - for i.. i+= 1 for i.. i+= 4 - for j.. for j.. - code(i, j) code(i, j) - code(i+1, j) - code(i+2, j) - code(i+3, j) - remainder loop - +```c++ +for i.. i+= 1 for i.. i+= 4 + for j.. for j.. + code(i, j) code(i, j) + code(i+1, j) + code(i+2, j) + code(i+3, j) + remainder loop +``` Which can be seen as unrolling the outer loop and "jamming" (fusing) the inner loops into one. When variables or loads can be shared in the new inner loop, this can lead to significant performance improvements. It uses -:ref:`Dependence Analysis ` for proving the transformations are safe. +{ref}`Dependence Analysis ` for proving the transformations are safe. -``lower-global-dtors``: Lower global destructors ------------------------------------------------- +### `lower-global-dtors`: Lower global destructors -This pass lowers global module destructors (``llvm.global_dtors``) by creating +This pass lowers global module destructors (`llvm.global_dtors`) by creating wrapper functions that are registered as global constructors in -``llvm.global_ctors`` and which contain a call to ``__cxa_atexit`` to register +`llvm.global_ctors` and which contain a call to `__cxa_atexit` to register their destructor functions. -``lower-atomic``: Lower atomic intrinsics to non-atomic form ------------------------------------------------------------- +### `lower-atomic`: Lower atomic intrinsics to non-atomic form This pass lowers atomic intrinsics to non-atomic form for use in a known non-preemptible environment. @@ -765,42 +692,37 @@ this would require knowledge of the entire call graph of the program including any libraries which may not be available in bitcode form); it simply lowers every atomic intrinsic. -``lower-invoke``: Lower invokes to calls, for unwindless code generators ------------------------------------------------------------------------- +### `lower-invoke`: Lower invokes to calls, for unwindless code generators This transformation is designed for use by code generators which do not yet -support stack unwinding. This pass converts ``invoke`` instructions to -``call`` instructions, so that any exception-handling ``landingpad`` blocks -become dead code (which can be removed by running the ``-simplifycfg`` pass +support stack unwinding. This pass converts `invoke` instructions to +`call` instructions, so that any exception-handling `landingpad` blocks +become dead code (which can be removed by running the `-simplifycfg` pass afterwards). -``lower-switch``: Lower ``SwitchInst``\ s to branches ------------------------------------------------------ +### `lower-switch`: Lower `SwitchInst`s to branches Rewrites switch instructions with a sequence of branches, which allows targets to get away with not implementing the switch instruction until it is convenient. -.. _passes-mem2reg: +(passes-mem2reg)= -``mem2reg``: Promote Memory to Register ---------------------------------------- +### `mem2reg`: Promote Memory to Register This file promotes memory references to be register references. It promotes -alloca instructions which only have loads and stores as uses. An ``alloca`` is +alloca instructions which only have loads and stores as uses. An `alloca` is transformed by using dominator frontiers to place phi nodes, then traversing the function in depth-first order to rewrite loads and stores as appropriate. This is just the standard SSA construction algorithm to construct "pruned" SSA form. -``memcpyopt``: MemCpy Optimization ----------------------------------- +### `memcpyopt`: MemCpy Optimization -This pass performs various transformations related to eliminating ``memcpy`` -calls, or transforming sets of stores into ``memset``\ s. +This pass performs various transformations related to eliminating `memcpy` +calls, or transforming sets of stores into `memset`s. -``mergefunc``: Merge Functions ------------------------------- +### `mergefunc`: Merge Functions This pass looks for equivalent functions that are mergeable and folds them. @@ -820,26 +742,23 @@ Lookup routine has O(log(n)) complexity, while whole merging process has complexity of O(n*log(n)). Read -:doc:`this ` +{doc}`this ` article for more details. -``mergereturn``: Unify function exit nodes ------------------------------------------- +### `mergereturn`: Unify function exit nodes -Ensure that functions have at most one ``ret`` instruction in them. +Ensure that functions have at most one `ret` instruction in them. Additionally, it keeps track of which node is the new exit node of the CFG. -``partial-inliner``: Partial Inliner ------------------------------------- +### `partial-inliner`: Partial Inliner -This pass performs partial inlining, typically by inlining an ``if`` statement +This pass performs partial inlining, typically by inlining an `if` statement that surrounds the body of the function. -``reassociate``: Reassociate expressions ----------------------------------------- +### `reassociate`: Reassociate expressions This pass reassociates commutative expressions in an order that is designed to -promote better constant propagation, GCSE, :ref:`LICM `, PRE, etc. +promote better constant propagation, GCSE, {ref}`LICM `, PRE, etc. For example: 4 + (x + 5) ⇒ x + (4 + 5) @@ -849,35 +768,31 @@ corresponding to the reverse post-order traversal of the current function (start at 2), which effectively gives values in deep loops higher rank than values not in loops. -``rel-lookup-table-converter``: Relative lookup table converter ---------------------------------------------------------------- +### `rel-lookup-table-converter`: Relative lookup table converter This pass converts lookup tables to PIC-friendly relative lookup tables. -``reg2mem``: Demote all values to stack slots ---------------------------------------------- +### `reg2mem`: Demote all values to stack slots This file demotes all registers to memory references. It is intended to be the -inverse of :ref:`mem2reg `. By converting to ``load`` -instructions, the only values live across basic blocks are ``alloca`` -instructions and ``load`` instructions before ``phi`` nodes. It is intended +inverse of {ref}`mem2reg `. By converting to `load` +instructions, the only values live across basic blocks are `alloca` +instructions and `load` instructions before `phi` nodes. It is intended that this should make CFG hacking much easier. To make later hacking easier, -the entry block is split into two, such that all introduced ``alloca`` +the entry block is split into two, such that all introduced `alloca` instructions (and nothing else) are in the entry block. -``sroa``: Scalar Replacement of Aggregates ------------------------------------------- +### `sroa`: Scalar Replacement of Aggregates The well-known scalar replacement of aggregates transformation. This transform -breaks up ``alloca`` instructions of aggregate type (structure or array) into -individual ``alloca`` instructions for each member if possible. Then, if -possible, it transforms the individual ``alloca`` instructions into nice clean +breaks up `alloca` instructions of aggregate type (structure or array) into +individual `alloca` instructions for each member if possible. Then, if +possible, it transforms the individual `alloca` instructions into nice clean scalar SSA form. -.. _passes-sccp: +(passes-sccp)= -``sccp``: Sparse Conditional Constant Propagation -------------------------------------------------- +### `sccp`: Sparse Conditional Constant Propagation Sparse conditional constant propagation and merging, which can be summarized as: @@ -888,12 +803,11 @@ as: * Proves conditional branches to be unconditional Note that this pass has a habit of making definitions be dead. It is a good -idea to run a :ref:`DCE ` pass sometime after running this pass. +idea to run a {ref}`DCE ` pass sometime after running this pass. -.. _passes-simplifycfg: +(passes-simplifycfg)= -``simplifycfg``: Simplify the CFG ---------------------------------- +### `simplifycfg`: Simplify the CFG Performs dead code elimination and basic block merging. Specifically: @@ -903,39 +817,35 @@ Performs dead code elimination and basic block merging. Specifically: * Eliminates PHI nodes for basic blocks with a single predecessor. * Eliminates a basic block that only contains an unconditional branch. -``sink``: Code sinking ----------------------- +### `sink`: Code sinking This pass moves instructions into successor blocks, when possible, so that they aren't executed on paths where their results aren't needed. -.. _passes-simple-loop-unswitch: +(passes-simple-loop-unswitch)= -``simple-loop-unswitch``: Unswitch loops ----------------------------------------- +### `simple-loop-unswitch`: Unswitch loops This pass transforms loops that contain branches on loop-invariant conditions to have multiple loops. For example, it turns the left into the right code: -.. code-block:: c++ - - for (...) if (lic) - A for (...) - if (lic) A; B; C - B else - C for (...) - A; C - +```c++ +for (...) if (lic) + A for (...) + if (lic) A; B; C + B else + C for (...) + A; C +``` This can increase the size of the code exponentially (doubling it every time a loop is unswitched) so we only unswitch if the resultant code will be smaller than a threshold. -This pass expects :ref:`LICM ` to be run before it to hoist +This pass expects {ref}`LICM ` to be run before it to hoist invariant conditions out of the loop, to make the unswitching opportunity obvious. -``strip``: Strip all symbols from a module ------------------------------------------- +### `strip`: Strip all symbols from a module Performs code stripping. This transformation can delete: @@ -947,78 +857,68 @@ Note that this transformation makes code much less readable, so it should only be used in situations where the strip utility would be used, such as reducing code size or making it harder to reverse engineer code. -``strip-dead-debug-info``: Strip debug info for unused symbols --------------------------------------------------------------- +### `strip-dead-debug-info`: Strip debug info for unused symbols Performs code stripping. Similar to strip, but only strips debug info for unused symbols. -``strip-dead-prototypes``: Strip Unused Function Prototypes ------------------------------------------------------------ +### `strip-dead-prototypes`: Strip Unused Function Prototypes This pass loops over all of the functions in the input module, looking for dead declarations and removes them. Dead declarations are declarations of functions for which no implementation is available (i.e., declarations for unused library functions). -``strip-debug-declare``: Strip all ``llvm.dbg.declare`` intrinsics and -``#dbg_declare`` records. -------------------------------------------------------------------- +### `strip-debug-declare`: Strip all `llvm.dbg.declare` intrinsics and `#dbg_declare` records. Performs code stripping. Similar to strip, but only strips -``llvm.dbg.declare`` intrinsics. +`llvm.dbg.declare` intrinsics. -``strip-nondebug``: Strip all symbols, except dbg symbols, from a module ------------------------------------------------------------------------- +### `strip-nondebug`: Strip all symbols, except dbg symbols, from a module Performs code stripping. Similar to strip, but dbg info is preserved. -``tailcallelim``: Tail Call Elimination ---------------------------------------- +### `tailcallelim`: Tail Call Elimination This file transforms calls of the current function (self recursion) followed by a return instruction with a branch to the entry of the function, creating a loop. This pass also implements the following extensions to the basic algorithm: -#. Trivial instructions between the call and return do not prevent the +1. Trivial instructions between the call and return do not prevent the transformation from taking place, though currently the analysis cannot support moving any really useful instructions (only dead ones). -#. This pass transforms functions that are prevented from being tail recursive +1. This pass transforms functions that are prevented from being tail recursive by an associative expression to use an accumulator variable, thus compiling the typical naive factorial or fib implementation into efficient code. -#. TRE is performed if the function returns void, if the return returns the +1. TRE is performed if the function returns void, if the return returns the result returned by the call, or if the function returns a run-time constant on all exits from the function. It is possible, though unlikely, that the return returns something else (like constant 0), and can still be TRE'd. It can be TRE'd if *all other* return instructions in the function return the exact same value. -#. If it can prove that callees do not access their caller stack frame, they +1. If it can prove that callees do not access their caller stack frame, they are marked as eligible for tail call elimination (by the code generator). -Utility Passes -============== +## Utility Passes This section describes the LLVM Utility Passes. -``extract-blocks``: Extract Basic Blocks From Module (for bugpoint use) ------------------------------------------------------------------------ +### `extract-blocks`: Extract Basic Blocks From Module (for bugpoint use) This pass is used by bugpoint to extract all blocks from the module into their own functions. -``instnamer``: Assign names to anonymous instructions ------------------------------------------------------ +### `instnamer`: Assign names to anonymous instructions This is a little utility pass that gives instructions names, this is mostly useful when diffing the effect of an optimization because deleting an unnamed instruction can change all other instruction numbering, making the diff very noisy. -.. _passes-verify: +(passes-verify)= -``verify``: Module Verifier ---------------------------- +### `verify`: Module Verifier Verifies LLVM IR code. This is useful to run after an optimization which is undergoing testing. Note that llvm-as verifies its input before emitting @@ -1026,77 +926,70 @@ bitcode, and also that malformed bitcode is likely to make LLVM crash. All language front-ends are therefore encouraged to verify their output before performing optimizing transformations. -#. Both of a binary operator's parameters are of the same type. -#. Verify that the indices of mem access instructions match other operands. -#. Verify that arithmetic and other things are only performed on first-class +1. Both of a binary operator's parameters are of the same type. +1. Verify that the indices of mem access instructions match other operands. +1. Verify that arithmetic and other things are only performed on first-class types. Verify that shifts and logicals only happen on integrals f.e. -#. All of the constants in a switch statement are of the correct type. -#. The code is in valid SSA form. -#. It is illegal to put a label into any other type (like a structure) or to +1. All of the constants in a switch statement are of the correct type. +1. The code is in valid SSA form. +1. It is illegal to put a label into any other type (like a structure) or to return one. -#. Only phi nodes can be self referential: ``%x = add i32 %x``, ``%x`` is +1. Only phi nodes can be self referential: `%x = add i32 %x`, `%x` is invalid. -#. PHI nodes must have an entry for each predecessor, with no extras. -#. PHI nodes must be the first thing in a basic block, all grouped together. -#. PHI nodes must have at least one entry. -#. All basic blocks should only end with terminator insts, not contain them. -#. The entry node to a function must not have predecessors. -#. All Instructions must be embedded into a basic block. -#. Functions cannot take a void-typed parameter. -#. Verify that a function's argument list agrees with its declared type. -#. It is illegal to specify a name for a void value. -#. It is illegal to have an internal global value with no initializer. -#. It is illegal to have a ``ret`` instruction that returns a value that does +1. PHI nodes must have an entry for each predecessor, with no extras. +1. PHI nodes must be the first thing in a basic block, all grouped together. +1. PHI nodes must have at least one entry. +1. All basic blocks should only end with terminator insts, not contain them. +1. The entry node to a function must not have predecessors. +1. All Instructions must be embedded into a basic block. +1. Functions cannot take a void-typed parameter. +1. Verify that a function's argument list agrees with its declared type. +1. It is illegal to specify a name for a void value. +1. It is illegal to have an internal global value with no initializer. +1. It is illegal to have a `ret` instruction that returns a value that does not agree with the function return value type. -#. Function call argument types match the function prototype. -#. All other things that are tested by asserts spread about the code. +1. Function call argument types match the function prototype. +1. All other things that are tested by asserts spread about the code. Note that this does not provide full security verification (like Java), but instead just tries to ensure that code is well-formed. -.. _passes-view-cfg: +(passes-view-cfg)= -``view-cfg``: View CFG of function ----------------------------------- +### `view-cfg`: View CFG of function Displays the control flow graph using the GraphViz tool. -Additionally, the ``-cfg-func-name=`` option can be used to filter the +Additionally, the `-cfg-func-name=` option can be used to filter the functions that are displayed. All functions that contain the specified substring will be displayed. -``view-cfg-only``: View CFG of function (with no function bodies) ------------------------------------------------------------------ +### `view-cfg-only`: View CFG of function (with no function bodies) Displays the control flow graph using the GraphViz tool, but omitting function bodies. -Additionally, the ``-cfg-func-name=`` option can be used to filter the +Additionally, the `-cfg-func-name=` option can be used to filter the functions that are displayed. All functions that contain the specified substring will be displayed. -``view-dom``: View dominance tree of function ---------------------------------------------- +### `view-dom`: View dominance tree of function Displays the dominator tree using the GraphViz tool. -``view-dom-only``: View dominance tree of function (with no function bodies) ----------------------------------------------------------------------------- +### `view-dom-only`: View dominance tree of function (with no function bodies) Displays the dominator tree using the GraphViz tool, but omitting function bodies. -``view-post-dom``: View postdominance tree of function ------------------------------------------------------- +### `view-post-dom`: View postdominance tree of function Displays the post dominator tree using the GraphViz tool. -``view-post-dom-only``: View postdominance tree of function (with no function bodies) -------------------------------------------------------------------------------------- +### `view-post-dom-only`: View postdominance tree of function (with no function bodies) Displays the post dominator tree using the GraphViz tool, but omitting function bodies. -``transform-warning``: Report missed forced transformations ------------------------------------------------------------ +### `transform-warning`: Report missed forced transformations Emits warnings about not yet applied forced transformations (e.g. from -``#pragma omp simd``). +`#pragma omp simd`). diff --git a/llvm/docs/ProgrammersManual.md b/llvm/docs/ProgrammersManual.md index 26a6c13edd094..5f6937d2ab9f5 100644 --- a/llvm/docs/ProgrammersManual.md +++ b/llvm/docs/ProgrammersManual.md @@ -1,17 +1,14 @@ -======================== -LLVM Programmer's Manual -======================== +# LLVM Programmer's Manual -.. contents:: - :local: +```{contents} +:local: +``` +````{warning} +This is always a work in progress. +```` +(introduction)= -.. warning:: - This is always a work in progress. - -.. _introduction: - -Introduction -============ +## Introduction This document is meant to highlight some of the important classes and interfaces available in the LLVM source-base. This manual is not intended to explain what @@ -23,29 +20,25 @@ This document should get you oriented so that you can find your way in the continuously growing source code that makes up the LLVM infrastructure. Note that this manual is not intended to serve as a replacement for reading the source code, so if you think there should be a method in one of these classes to -do something, but it's not listed, check the source. Links to the `doxygen -`__ sources are provided to make this as easy as +do something, but it's not listed, check the source. Links to the [doxygen](https://llvm.org/doxygen/) sources are provided to make this as easy as possible. The first section of this document describes general information that is useful to know when working in the LLVM infrastructure, and the second describes the Core LLVM classes. In the future this manual will be extended with information describing how to use extension libraries, such as dominator information, CFG -traversal routines, and useful utilities like the ``InstVisitor`` (`doxygen -`__) template. +traversal routines, and useful utilities like the `InstVisitor` ([doxygen](https://llvm.org/doxygen/InstVisitor_8h_source.html)) template. -.. _general: +(general)= -General Information -=================== +## General Information This section contains general information that is useful if you are working in the LLVM source-base, but that isn't specific to any particular API. -.. _stl: +(stl)= -The C++ Standard Template Library ---------------------------------- +### The C++ Standard Template Library LLVM makes heavy use of the C++ Standard Template Library (STL), perhaps much more than you are used to, or have seen before. Because of this, you might want @@ -55,367 +48,335 @@ the subject that you can get, so it will not be discussed in this document. Here are some useful links: -#. `cppreference.com - `_ - an excellent +1. [cppreference.com](https://en.cppreference.com/w/) - an excellent reference for the STL and other parts of the standard C++ library. -#. `cplusplus.com - `_ - another excellent +1. [cplusplus.com](https://cplusplus.com/reference/) - another excellent reference like the one above. -#. `C++ In a Nutshell `_ - This is an O'Reilly +1. [C++ In a Nutshell](http://www.tempest-sw.com/cpp/) - This is an O'Reilly book in the making. It has a decent Standard Library Reference that rivals Dinkumware's, and is unfortunately no longer free since the book has been published. -#. `C++ Frequently Asked Questions `_. +1. [C++ Frequently Asked Questions](https://www.parashift.com/c++-faq-lite/). -#. `Bjarne Stroustrup's C++ Page - `_. +1. [Bjarne Stroustrup's C++ Page](https://www.stroustrup.com/C++.html). -#. `Bruce Eckel's Thinking in C++, 2nd ed. Volume 2. - (even better, get the book) - `_. +1. [Bruce Eckel's Thinking in C++, 2nd ed. Volume 2. (even better, get the book)](https://archive.org/details/TICPP2ndEdVolTwo). -You are also encouraged to take a look at the :doc:`LLVM Coding Standards -` guide which focuses on how to write maintainable code more +You are also encouraged to take a look at the {doc}`LLVM Coding Standards ` guide which focuses on how to write maintainable code more than where to put your curly braces. -.. _resources: +(resources)= -Other useful references ------------------------ +### Other useful references -#. `Using static and shared libraries across platforms - `_ +1. [Using static and shared libraries across platforms](http://www.fortran-2000.com/ArnaudRecipes/sharedlib.html) -.. _apis: +(apis)= -Important and useful LLVM APIs -============================== +## Important and useful LLVM APIs Here we highlight some LLVM APIs that are generally useful and good to know about when writing transformations. -.. _isa: +(isa)= -The ``isa<>``, ``cast<>`` and ``dyn_cast<>`` templates ------------------------------------------------------- +### The `isa<>`, `cast<>` and `dyn_cast<>` templates The LLVM source-base makes extensive use of a custom form of RTTI. These -templates have many similarities to the C++ ``dynamic_cast<>`` operator, but +templates have many similarities to the C++ `dynamic_cast<>` operator, but they don't have some drawbacks (primarily stemming from the fact that -``dynamic_cast<>`` only works on classes that have a v-table). Because they are +`dynamic_cast<>` only works on classes that have a v-table). Because they are used so often, you must know what they do and how they work. All of these -templates are defined in the ``llvm/Support/Casting.h`` (`doxygen -`__) file (note that you very +templates are defined in the `llvm/Support/Casting.h` ([doxygen](https://llvm.org/doxygen/Casting_8h_source.html)) file (note that you very rarely have to include this file directly). -``isa<>``: - The ``isa<>`` operator works exactly like the Java "``instanceof``" operator. - It returns ``true`` or ``false`` depending on whether a reference or pointer points to +`isa<>`: + The `isa<>` operator works exactly like the Java "`instanceof`" operator. + It returns `true` or `false` depending on whether a reference or pointer points to an instance of the specified class. This can be very useful for constraint checking of various sorts (example below). It's a variadic operator, so you can specify more than one class to check if the reference or pointer points to an instance of one of the classes specified. -``cast<>``: - The ``cast<>`` operator is a "checked cast" operation. It converts a pointer +`cast<>`: + The `cast<>` operator is a "checked cast" operation. It converts a pointer or reference from a base class to a derived class, causing an assertion failure if it is not really an instance of the right type. This should be used in cases where you have some information that makes you believe that - something is of the right type. An example of the ``isa<>`` and ``cast<>`` + something is of the right type. An example of the `isa<>` and `cast<>` template is: - .. code-block:: c++ - - static bool isLoopInvariant(const Value *V, const Loop *L) { - if (isa(V) || isa(V) || isa(V)) - return true; + ```cpp + static bool isLoopInvariant(const Value *V, const Loop *L) { + if (isa(V) || isa(V) || isa(V)) + return true; - // Alternate, more compact form. - if (isa(V)) - return true; + // Alternate, more compact form. + if (isa(V)) + return true; - // Otherwise, it must be an instruction. - return !L->contains(cast(V)->getParent()); - } - - Note that you should **not** use an ``isa<>`` test followed by a ``cast<>``; - for that use the ``dyn_cast<>`` operator. + // Otherwise, it must be an instruction. + return !L->contains(cast(V)->getParent()); + } + ``` + Note that you should **not** use an `isa<>` test followed by a `cast<>`; + for that use the `dyn_cast<>` operator. -``dyn_cast<>``: - The ``dyn_cast<>`` operator is a "checking cast" operation. It checks to see +`dyn_cast<>`: + The `dyn_cast<>` operator is a "checking cast" operation. It checks to see if the operand is of the specified type, and if so, returns a pointer to it (this operator does not work with references). If the operand is not of the correct type, a null pointer is returned. Thus, this works very much like - the ``dynamic_cast<>`` operator in C++, and should be used in the same - circumstances. Typically, the ``dyn_cast<>`` operator is used in an ``if`` + the `dynamic_cast<>` operator in C++, and should be used in the same + circumstances. Typically, the `dyn_cast<>` operator is used in an `if` statement or some other flow control statement like this: - .. code-block:: c++ - - if (auto *AI = dyn_cast(Val)) { - // ... - } - - This form of the ``if`` statement effectively combines together a call to - ``isa<>`` and a call to ``cast<>`` into one statement, which is very + ```cpp + if (auto *AI = dyn_cast(Val)) { + // ... + } + ``` + This form of the `if` statement effectively combines together a call to + `isa<>` and a call to `cast<>` into one statement, which is very convenient. - Note that the ``dyn_cast<>`` operator, like C++'s ``dynamic_cast<>`` or Java's - ``instanceof`` operator, can be abused. In particular, you should not use big - chained ``if/then/else`` blocks to check for lots of different variants of + Note that the `dyn_cast<>` operator, like C++'s `dynamic_cast<>` or Java's + `instanceof` operator, can be abused. In particular, you should not use big + chained `if/then/else` blocks to check for lots of different variants of classes. If you find yourself wanting to do this, it is much cleaner and more - efficient to use the ``InstVisitor`` class to dispatch over the instruction + efficient to use the `InstVisitor` class to dispatch over the instruction type directly. -``isa_and_present<>``: - The ``isa_and_present<>`` operator works just like the ``isa<>`` operator, +`isa_and_present<>`: + The `isa_and_present<>` operator works just like the `isa<>` operator, except that it allows for a null pointer as an argument (which it then - returns ``false``). This can sometimes be useful, allowing you to combine - several null checks into one. Similar to ``isa<>`` operator, you can specify + returns `false`). This can sometimes be useful, allowing you to combine + several null checks into one. Similar to `isa<>` operator, you can specify more than one class to check. -``cast_if_present<>``: - The ``cast_if_present<>`` operator works just like the ``cast<>`` operator, +`cast_if_present<>`: + The `cast_if_present<>` operator works just like the `cast<>` operator, except that it allows for a null pointer as an argument (which it then propagates). This can sometimes be useful, allowing you to combine several null checks into one. -``dyn_cast_if_present<>``: - The ``dyn_cast_if_present<>`` operator works just like the ``dyn_cast<>`` +`dyn_cast_if_present<>`: + The `dyn_cast_if_present<>` operator works just like the `dyn_cast<>` operator, except that it allows for a null pointer as an argument (which it then propagates). This can sometimes be useful, allowing you to combine several null checks into one. These five templates can be used with any classes, whether they have a v-table or not. If you want to add support for these templates, see the document -:doc:`How to set up LLVM-style RTTI for your class hierarchy -` +{doc}`How to set up LLVM-style RTTI for your class hierarchy ` -.. _string_apis: +(string_apis)= -Passing strings (the ``StringRef`` and ``Twine`` classes) ---------------------------------------------------------- +### Passing strings (the `StringRef` and `Twine` classes) Although LLVM generally does not do much string manipulation, we do have several important APIs which take strings. Two important examples are the Value class --- which has names for instructions, functions, etc. -- and the ``StringMap`` +-- which has names for instructions, functions, etc. -- and the `StringMap` class which is used extensively in LLVM and Clang. These are generic classes, and they need to be able to accept strings which may -have embedded null characters. Therefore, they cannot simply take a ``const -char *``, and taking a ``const std::string&`` requires clients to perform a heap +have embedded null characters. Therefore, they cannot simply take a `const +char *`, and taking a `const std::string&` requires clients to perform a heap allocation which is usually unnecessary. Instead, many LLVM APIs use a -``StringRef`` or a ``const Twine&`` for passing strings efficiently. +`StringRef` or a `const Twine&` for passing strings efficiently. -.. _StringRef: +(StringRef)= -The ``StringRef`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `StringRef` class -The ``StringRef`` data type represents a reference to a constant string (a +The `StringRef` data type represents a reference to a constant string (a character array and a length) and supports the common operations available on -``std::string``, but does not require heap allocation. +`std::string`, but does not require heap allocation. It can be implicitly constructed using a C style null-terminated string, an -``std::string``, or explicitly with a character pointer and length. For -example, the ``StringMap`` find function is declared as: - -.. code-block:: c++ - - iterator find(StringRef Key); +`std::string`, or explicitly with a character pointer and length. For +example, the `StringMap` find function is declared as: +```cpp +iterator find(StringRef Key); +``` and clients can call it using any one of: -.. code-block:: c++ - - Map.find("foo"); // Lookup "foo" - Map.find(std::string("bar")); // Lookup "bar" - Map.find(StringRef("\0baz", 4)); // Lookup "\0baz" - -Similarly, APIs which need to return a string may return a ``StringRef`` -instance, which can be used directly or converted to an ``std::string`` using -the ``str`` member function. See ``llvm/ADT/StringRef.h`` (`doxygen -`__) for more +```cpp +Map.find("foo"); // Lookup "foo" +Map.find(std::string("bar")); // Lookup "bar" +Map.find(StringRef("\0baz", 4)); // Lookup "\0baz" +``` +Similarly, APIs which need to return a string may return a `StringRef` +instance, which can be used directly or converted to an `std::string` using +the `str` member function. See `llvm/ADT/StringRef.h` ([doxygen](https://llvm.org/doxygen/StringRef_8h_source.html)) for more information. -You should rarely use the ``StringRef`` class directly. Because it contains +You should rarely use the `StringRef` class directly. Because it contains pointers to external memory, it is not generally safe to store an instance of the class (unless you know that the external storage will not be freed). -``StringRef`` is small and pervasive enough in LLVM that it should always be +`StringRef` is small and pervasive enough in LLVM that it should always be passed by value. -The ``Twine`` class -^^^^^^^^^^^^^^^^^^^ +#### The `Twine` class -The ``Twine`` (`doxygen `__) +The `Twine` ([doxygen](https://llvm.org/doxygen/classllvm_1_1Twine.html)) class is an efficient way for APIs to accept concatenated strings. For example, a common LLVM paradigm is to name one instruction based on the name of another instruction with a suffix, for example: -.. code-block:: c++ - - New = CmpInst::Create(..., SO->getName() + ".cmp"); - -The ``Twine`` class is effectively a lightweight `rope -`_ which points to +```cpp +New = CmpInst::Create(..., SO->getName() + ".cmp"); +``` +The `Twine` class is effectively a lightweight [rope](http://en.wikipedia.org/wiki/Rope_(computer_science)) which points to temporary (stack allocated) objects. Twines can be implicitly constructed as the result of the plus operator applied to strings (i.e., a C strings, an -``std::string``, or a ``StringRef``). The twine delays the actual concatenation +`std::string`, or a `StringRef`). The twine delays the actual concatenation of strings until it is actually required, at which point it can be efficiently rendered directly into a character array. This avoids unnecessary heap allocation involved in constructing the temporary results of string -concatenation. See ``llvm/ADT/Twine.h`` (`doxygen -`__) and :ref:`here ` +concatenation. See `llvm/ADT/Twine.h` ([doxygen](https://llvm.org/doxygen/Twine_8h_source.html)) and {ref}`here ` for more information. -As with a ``StringRef``, ``Twine`` objects point to external memory and should +As with a `StringRef`, `Twine` objects point to external memory and should almost never be stored or mentioned directly. They are intended solely for use when defining a function which should be able to efficiently accept concatenated strings. -.. _formatting_strings: +(formatting_strings)= -Formatting strings (the ``formatv`` function) ---------------------------------------------- +### Formatting strings (the `formatv` function) While LLVM doesn't necessarily do a lot of string manipulation and parsing, it does do a lot of string formatting. From diagnostic messages, to llvm tool -outputs such as ``llvm-readobj`` to printing verbose disassembly listings and +outputs such as `llvm-readobj` to printing verbose disassembly listings and LLDB runtime logging, the need for string formatting is pervasive. -The ``formatv`` is similar in spirit to ``printf``, but uses a different syntax -which borrows heavily from Python and C#. Unlike ``printf`` it deduces the type +The `formatv` is similar in spirit to `printf`, but uses a different syntax +which borrows heavily from Python and C#. Unlike `printf` it deduces the type to be formatted at compile time, so it does not need a format specifier such as -``%d``. This reduces the mental overhead of trying to construct portable format -strings, especially for platform-specific types like ``size_t`` or pointer types. -Unlike both ``printf`` and Python, it additionally fails to compile if LLVM does +`%d`. This reduces the mental overhead of trying to construct portable format +strings, especially for platform-specific types like `size_t` or pointer types. +Unlike both `printf` and Python, it additionally fails to compile if LLVM does not know how to format the type. These two properties ensure that the function is both safer and simpler to use than traditional formatting methods such as -the ``printf`` family of functions. +the `printf` family of functions. -Simple formatting -^^^^^^^^^^^^^^^^^ +#### Simple formatting -A call to ``formatv`` involves a single **format string** consisting of 0 or more +A call to `formatv` involves a single **format string** consisting of 0 or more **replacement sequences**, followed by a variable length list of **replacement values**. -A replacement sequence is a string of the form ``{N[[,align]:style]}``. +A replacement sequence is a string of the form `{N[[,align]:style]}`. -``N`` refers to the 0-based index of the argument from the list of replacement +`N` refers to the 0-based index of the argument from the list of replacement values. Note that this means it is possible to reference the same parameter multiple times, possibly with different style and/or alignment options, in any order. -``align`` is an optional string specifying the width of the field to format +`align` is an optional string specifying the width of the field to format the value into, and the alignment of the value within the field. It is specified as an optional **alignment style** followed by a positive integral **field width**. The -alignment style can be one of the characters ``-`` (left align), ``=`` (center align), -or ``+`` (right align). The default is right aligned. +alignment style can be one of the characters `-` (left align), `=` (center align), +or `+` (right align). The default is right aligned. -``style`` is an optional string consisting of a type specific that controls the +`style` is an optional string consisting of a type specific that controls the formatting of the value. For example, to format a floating point value as a percentage, -you can use the style option ``P``. +you can use the style option `P`. -Custom formatting -^^^^^^^^^^^^^^^^^ +#### Custom formatting There are two ways to customize the formatting behavior for a type. -1. Provide a template specialization of ``llvm::format_provider`` for your - type ``T`` with the appropriate static format method. +1. Provide a template specialization of `llvm::format_provider` for your + type `T` with the appropriate static format method. - .. code-block:: c++ - - namespace llvm { - template<> - struct format_provider { - static void format(const MyFooBar &V, raw_ostream &Stream, StringRef Style) { - // Do whatever is necessary to format `V` into `Stream` - } - }; - void foo() { - MyFooBar X; - std::string S = formatv("{0}", X); + ```cpp + namespace llvm { + template<> + struct format_provider { + static void format(const MyFooBar &V, raw_ostream &Stream, StringRef Style) { + // Do whatever is necessary to format `V` into `Stream` } + }; + void foo() { + MyFooBar X; + std::string S = formatv("{0}", X); } - + } + ``` This is a useful extensibility mechanism for adding support for formatting your own custom types with your own custom Style options. But it does not help when you want to extend the mechanism for formatting a type that the library already knows how to format. For that, we need something else. -2. Provide a **format adapter** inheriting from ``llvm::FormatAdapter``. - - .. code-block:: c++ +2. Provide a **format adapter** inheriting from `llvm::FormatAdapter`. - namespace anything { - struct format_int_custom : public llvm::FormatAdapter { - explicit format_int_custom(int N) : llvm::FormatAdapter(N) {} - void format(llvm::raw_ostream &Stream, StringRef Style) override { - // Do whatever is necessary to format ``this->Item`` into ``Stream`` - } - }; - } - namespace llvm { - void foo() { - std::string S = formatv("{0}", anything::format_int_custom(42)); + ```cpp + namespace anything { + struct format_int_custom : public llvm::FormatAdapter { + explicit format_int_custom(int N) : llvm::FormatAdapter(N) {} + void format(llvm::raw_ostream &Stream, StringRef Style) override { + // Do whatever is necessary to format ``this->Item`` into ``Stream`` } + }; + } + namespace llvm { + void foo() { + std::string S = formatv("{0}", anything::format_int_custom(42)); } - - If the type is detected to be derived from ``FormatAdapter``, ``formatv`` + } + ``` + If the type is detected to be derived from `FormatAdapter`, `formatv` will call the - ``format`` method on the argument passing in the specified style. This allows + `format` method on the argument passing in the specified style. This allows one to provide custom formatting of any type, including one which already has a builtin format provider. -``formatv`` Examples -^^^^^^^^^^^^^^^^^^^^ +#### `formatv` Examples Below is intended to provide an incomplete set of examples demonstrating -the usage of ``formatv``. More information can be found by reading the +the usage of `formatv`. More information can be found by reading the doxygen documentation or by looking at the unit test suite. -.. code-block:: c++ - - std::string S; - // Simple formatting of basic types and implicit string conversion. - S = formatv("{0} ({1:P})", 7, 0.35); // S == "7 (35.00%)" +```cpp +std::string S; +// Simple formatting of basic types and implicit string conversion. +S = formatv("{0} ({1:P})", 7, 0.35); // S == "7 (35.00%)" - // Out-of-order referencing and multi-referencing - outs() << formatv("{0} {2} {1} {0}", 1, "test", 3); // prints "1 3 test 1" +// Out-of-order referencing and multi-referencing +outs() << formatv("{0} {2} {1} {0}", 1, "test", 3); // prints "1 3 test 1" - // Left, right, and center alignment - S = formatv("{0,7}", 'a'); // S == " a"; - S = formatv("{0,-7}", 'a'); // S == "a "; - S = formatv("{0,=7}", 'a'); // S == " a "; - S = formatv("{0,+7}", 'a'); // S == " a"; +// Left, right, and center alignment +S = formatv("{0,7}", 'a'); // S == " a"; +S = formatv("{0,-7}", 'a'); // S == "a "; +S = formatv("{0,=7}", 'a'); // S == " a "; +S = formatv("{0,+7}", 'a'); // S == " a"; - // Custom styles - S = formatv("{0:N} - {0:x} - {1:E}", 12345, 123908342); // S == "12,345 - 0x3039 - 1.24E8" +// Custom styles +S = formatv("{0:N} - {0:x} - {1:E}", 12345, 123908342); // S == "12,345 - 0x3039 - 1.24E8" - // Adapters - S = formatv("{0}", fmt_align(42, AlignStyle::Center, 7)); // S == " 42 " - S = formatv("{0}", fmt_repeat("hi", 3)); // S == "hihihi" - S = formatv("{0}", fmt_pad("hi", 2, 6)); // S == " hi " +// Adapters +S = formatv("{0}", fmt_align(42, AlignStyle::Center, 7)); // S == " 42 " +S = formatv("{0}", fmt_repeat("hi", 3)); // S == "hihihi" +S = formatv("{0}", fmt_pad("hi", 2, 6)); // S == " hi " - // Ranges - std::vector V = {8, 9, 10}; - S = formatv("{0}", make_range(V.begin(), V.end())); // S == "8, 9, 10" - S = formatv("{0:$[+]}", make_range(V.begin(), V.end())); // S == "8+9+10" - S = formatv("{0:$[ + ]@[x]}", make_range(V.begin(), V.end())); // S == "0x8 + 0x9 + 0xA" +// Ranges +std::vector V = {8, 9, 10}; +S = formatv("{0}", make_range(V.begin(), V.end())); // S == "8, 9, 10" +S = formatv("{0:$[+]}", make_range(V.begin(), V.end())); // S == "8+9+10" +S = formatv("{0:$[ + ]@[x]}", make_range(V.begin(), V.end())); // S == "0x8 + 0x9 + 0xA" +``` +(error_apis)= -.. _error_apis: - -Error handling --------------- +### Error handling Proper error handling helps us identify bugs in our code, and helps end users understand errors in their tool usage. Errors fall into two broad categories: *programmatic* and *recoverable*, with different strategies for handling and reporting. -Programmatic Errors -^^^^^^^^^^^^^^^^^^^ +#### Programmatic Errors Programmatic errors are violations of program invariants or API contracts, and represent bugs within the program itself. Our aim is to document invariants, and @@ -423,47 +384,42 @@ to abort quickly at the point of failure (providing some basic diagnostic) when invariants are broken at runtime. The fundamental tools for handling programmatic errors are assertions and the -``llvm_unreachable`` function. Assertions are used to express invariant conditions, +`llvm_unreachable` function. Assertions are used to express invariant conditions, and should include a message describing the invariant: -.. code-block:: c++ - - assert(isPhysReg(R) && "All virt regs should have been allocated already."); - -The ``llvm_unreachable`` function can be used to document areas of control flow +```cpp +assert(isPhysReg(R) && "All virt regs should have been allocated already."); +``` +The `llvm_unreachable` function can be used to document areas of control flow that should never be entered if the program invariants hold: -.. code-block:: c++ - - enum { Foo, Bar, Baz } X = foo(); - - switch (X) { - case Foo: /* Handle Foo */; break; - case Bar: /* Handle Bar */; break; - default: - llvm_unreachable("X should be Foo or Bar here"); - } - -Additionally, ``reportFatalInternalError`` can be used to report invariant +```cpp +enum { Foo, Bar, Baz } X = foo(); + +switch (X) { + case Foo: /* Handle Foo */; break; + case Bar: /* Handle Bar */; break; + default: + llvm_unreachable("X should be Foo or Bar here"); +} +``` +Additionally, `reportFatalInternalError` can be used to report invariant violations even in builds that do not enable assertions: -.. code-block:: c++ - - if (VerifyFooAnalysis && !Foo.verify()) { - reportFatalInternalError("Analysis 'foo' not preserved"); - } - -Additionally, ``checkNotNull`` can be used to check/document that a pointer +```cpp +if (VerifyFooAnalysis && !Foo.verify()) { + reportFatalInternalError("Analysis 'foo' not preserved"); +} +``` +Additionally, `checkNotNull` can be used to check/document that a pointer is never supposed to be null inline. -.. code-block:: c++ - - setMyPointer("key", Pointer); - // [...] - Type *P = checkNotNull(getMyPointer("key")); - -Recoverable Errors -^^^^^^^^^^^^^^^^^^ +```cpp +setMyPointer("key", Pointer); +// [...] +Type *P = checkNotNull(getMyPointer("key")); +``` +#### Recoverable Errors Recoverable errors represent an error in the program's environment, for example, a resource failure (a missing file, a dropped network connection, etc.), or @@ -472,169 +428,161 @@ the program that can handle them appropriately. Handling the error may be as simple as reporting the issue to the user, or it may involve attempts at recovery. -.. note:: - - While it would be ideal to use this error handling scheme throughout - LLVM, there are places where this hasn't been practical to apply. In - situations where you absolutely must emit a non-programmatic error and - the ``Error`` model isn't workable you can call ``reportFatalUsageError``, - which will call installed error handlers, print a message, and exit the - program. The use of ``reportFatalUsageError`` in this case is discouraged. - -Recoverable errors are modeled using LLVM's ``Error`` scheme. This scheme +````{note} +While it would be ideal to use this error handling scheme throughout +LLVM, there are places where this hasn't been practical to apply. In +situations where you absolutely must emit a non-programmatic error and +the `Error` model isn't workable you can call `reportFatalUsageError`, +which will call installed error handlers, print a message, and exit the +program. The use of `reportFatalUsageError` in this case is discouraged. +```` +Recoverable errors are modeled using LLVM's `Error` scheme. This scheme represents errors using function return values, similar to classic C integer -error codes, or C++'s ``std::error_code``. However, the ``Error`` class is +error codes, or C++'s `std::error_code`. However, the `Error` class is actually a lightweight wrapper for user-defined error types, allowing arbitrary information to be attached to describe the error. This is similar to the way C++ exceptions allow throwing of user-defined types. -Success values are created by calling ``Error::success()``, E.g.: - -.. code-block:: c++ - - Error foo() { - // Do something. - // Return success. - return Error::success(); - } +Success values are created by calling `Error::success()`, E.g.: +```cpp +Error foo() { + // Do something. + // Return success. + return Error::success(); +} +``` Success values are very cheap to construct and return - they have minimal impact on program performance. -Failure values are constructed using ``make_error``, where ``T`` is any class -that inherits from the ``ErrorInfo`` utility, E.g.: - -.. code-block:: c++ +Failure values are constructed using `make_error`, where `T` is any class +that inherits from the `ErrorInfo` utility, E.g.: - class BadFileFormat : public ErrorInfo { - public: - static char ID; - std::string Path; - - BadFileFormat(StringRef Path) : Path(Path.str()) {} +```cpp +class BadFileFormat : public ErrorInfo { +public: + static char ID; + std::string Path; - void log(raw_ostream &OS) const override { - OS << Path << " is malformed"; - } + BadFileFormat(StringRef Path) : Path(Path.str()) {} - std::error_code convertToErrorCode() const override { - return make_error_code(object_error::parse_failed); - } - }; - - char BadFileFormat::ID; // This should be declared in the C++ file. + void log(raw_ostream &OS) const override { + OS << Path << " is malformed"; + } - Error printFormattedFile(StringRef Path) { - if () - return make_error(Path); - // print file contents. - return Error::success(); + std::error_code convertToErrorCode() const override { + return make_error_code(object_error::parse_failed); } +}; + +char BadFileFormat::ID; // This should be declared in the C++ file. +Error printFormattedFile(StringRef Path) { + if () + return make_error(Path); + // print file contents. + return Error::success(); +} +``` Error values can be implicitly converted to bool: true for error, false for success, enabling the following idiom: -.. code-block:: c++ +```cpp +Error mayFail(); - Error mayFail(); - - Error foo() { - if (auto Err = mayFail()) - return Err; - // Success! We can proceed. - ... - -For functions that can fail but need to return a value the ``Expected`` +Error foo() { + if (auto Err = mayFail()) + return Err; + // Success! We can proceed. + ... +``` +For functions that can fail but need to return a value the `Expected` utility can be used. Values of this type can be constructed with either a -``T``, or an ``Error``. Expected values are also implicitly convertible to -boolean, but with the opposite convention to ``Error``: true for success, false -for error. If success, the ``T`` value can be accessed via the dereference -operator. If failure, the ``Error`` value can be extracted using the -``takeError()`` method. Idiomatic usage looks like: - -.. code-block:: c++ - - Expected openFormattedFile(StringRef Path) { - // If badly formatted, return an error. - if (auto Err = checkFormat(Path)) - return std::move(Err); - // Otherwise return a FormattedFile instance. - return FormattedFile(Path); - } - - Error processFormattedFile(StringRef Path) { - // Try to open a formatted file - if (auto FileOrErr = openFormattedFile(Path)) { - // On success, grab a reference to the file and continue. - auto &File = *FileOrErr; - ... - } else - // On error, extract the Error value and return it. - return FileOrErr.takeError(); - } - -If an ``Expected`` value is in success mode then the ``takeError()`` method -will return a success value. Using this fact, the above function can be -rewritten as: - -.. code-block:: c++ - - Error processFormattedFile(StringRef Path) { - // Try to open a formatted file - auto FileOrErr = openFormattedFile(Path); - if (auto Err = FileOrErr.takeError()) - // On error, extract the Error value and return it. - return Err; +`T`, or an `Error`. `Expected` values are also implicitly convertible to +boolean, but with the opposite convention to `Error`: true for success, false +for error. If success, the `T` value can be accessed via the dereference +operator. If failure, the `Error` value can be extracted using the +`takeError()` method. Idiomatic usage looks like: + +```cpp +Expected openFormattedFile(StringRef Path) { + // If badly formatted, return an error. + if (auto Err = checkFormat(Path)) + return std::move(Err); + // Otherwise return a FormattedFile instance. + return FormattedFile(Path); +} + +Error processFormattedFile(StringRef Path) { + // Try to open a formatted file + if (auto FileOrErr = openFormattedFile(Path)) { // On success, grab a reference to the file and continue. auto &File = *FileOrErr; ... - } + } else + // On error, extract the Error value and return it. + return FileOrErr.takeError(); +} +``` +If an `Expected` value is in success mode then the `takeError()` method +will return a success value. Using this fact, the above function can be +rewritten as: +```cpp +Error processFormattedFile(StringRef Path) { + // Try to open a formatted file + auto FileOrErr = openFormattedFile(Path); + if (auto Err = FileOrErr.takeError()) + // On error, extract the Error value and return it. + return Err; + // On success, grab a reference to the file and continue. + auto &File = *FileOrErr; + ... +} +``` This second form is often more readable for functions that involve multiple -``Expected`` values as it limits the indentation required. +`Expected` values as it limits the indentation required. -If an ``Expected`` value will be moved into an existing variable then the -``moveInto()`` method avoids the need to name an extra variable. This is -useful to enable ``operator->()`` if the ``Expected`` value has pointer-like +If an `Expected` value will be moved into an existing variable then the +`moveInto()` method avoids the need to name an extra variable. This is +useful to enable `operator->()` if the `Expected` value has pointer-like semantics. For example: -.. code-block:: c++ - - Expected> openBuffer(StringRef Path); - Error processBuffer(StringRef Buffer); +```cpp +Expected> openBuffer(StringRef Path); +Error processBuffer(StringRef Buffer); - Error processBufferAtPath(StringRef Path) { - // Try to open a buffer. - std::unique_ptr MB; - if (auto Err = openBuffer(Path).moveInto(MB)) +Error processBufferAtPath(StringRef Path) { + // Try to open a buffer. + std::unique_ptr MB; + if (auto Err = openBuffer(Path).moveInto(MB)) + // On error, return the Error value. + return Err; + // On success, use MB. + return processBuffer(MB->getBuffer()); +} +``` +This third form works with any type that can be assigned to from `T&&`. This +can be useful if the `Expected` value needs to be stored in an already-declared +`std::optional`. For example: + +```cpp +Expected extractClassName(StringRef Definition); +struct ClassData { + StringRef Definition; + std::optional LazyName; + ... + Error initialize() { + if (auto Err = extractClassName(Path).moveInto(LazyName)) // On error, return the Error value. return Err; - // On success, use MB. - return processBuffer(MB->getBuffer()); - } - -This third form works with any type that can be assigned to from ``T&&``. This -can be useful if the ``Expected`` value needs to be stored in an already-declared -``std::optional``. For example: - -.. code-block:: c++ - - Expected extractClassName(StringRef Definition); - struct ClassData { - StringRef Definition; - std::optional LazyName; + // On success, LazyName has been initialized. ... - Error initialize() { - if (auto Err = extractClassName(Path).moveInto(LazyName)) - // On error, return the Error value. - return Err; - // On success, LazyName has been initialized. - ... - } - }; - -All ``Error`` instances, whether success or failure, must be either checked or -moved from (via ``std::move`` or a return) before they are destructed. + } +}; +``` +All `Error` instances, whether success or failure, must be either checked or +moved from (via `std::move` or a return) before they are destructed. Accidentally discarding an unchecked error will cause a program to abort at the point where the unchecked value's destructor is run, making it easy to identify and fix violations of this rule. @@ -642,525 +590,487 @@ and fix violations of this rule. Success values are considered checked once they have been tested (by invoking the boolean conversion operator): -.. code-block:: c++ - - if (auto Err = mayFail(...)) - return Err; // Failure value - move error to caller. +```cpp +if (auto Err = mayFail(...)) + return Err; // Failure value - move error to caller. - // Safe to continue: Err was checked. - -In contrast, the following code will always cause an abort, even if ``mayFail`` +// Safe to continue: Err was checked. +``` +In contrast, the following code will always cause an abort, even if `mayFail` returns a success value: -.. code-block:: c++ - - mayFail(); - // Program will always abort here, even if mayFail() returns Success, since - // the value is not checked. - +```cpp +mayFail(); +// Program will always abort here, even if mayFail() returns Success, since +// the value is not checked. +``` Failure values are considered checked once a handler for the error type has been activated: -.. code-block:: c++ - - handleErrors( - processFormattedFile(...), - [](const BadFileFormat &BFF) { - report("Unable to process " + BFF.Path + ": bad format"); - }, - [](const FileNotFound &FNF) { - report("File not found " + FNF.Path); - }); - -The ``handleErrors`` function takes an error as its first argument, followed by +```cpp +handleErrors( + processFormattedFile(...), + [](const BadFileFormat &BFF) { + report("Unable to process " + BFF.Path + ": bad format"); + }, + [](const FileNotFound &FNF) { + report("File not found " + FNF.Path); + }); +``` +The `handleErrors` function takes an error as its first argument, followed by a variadic list of "handlers", each of which must be a callable type (a function, lambda, or class with a call operator) with one argument. The -``handleErrors`` function will visit each handler in the sequence and check its +`handleErrors` function will visit each handler in the sequence and check its argument type against the dynamic type of the error, running the first handler that matches. This is the same decision process that is used to decide which catch clause to run for a C++ exception. -Since the list of handlers passed to ``handleErrors`` may not cover every error -type that can occur, the ``handleErrors`` function also returns an Error value +Since the list of handlers passed to `handleErrors` may not cover every error +type that can occur, the `handleErrors` function also returns an Error value that must be checked or propagated. If the error value that is passed to -``handleErrors`` does not match any of the handlers it will be returned from -``handleErrors``. Idiomatic use of ``handleErrors`` thus looks like: - -.. code-block:: c++ - - if (auto Err = - handleErrors( - processFormattedFile(...), - [](const BadFileFormat &BFF) { - report("Unable to process " + BFF.Path + ": bad format"); - }, - [](const FileNotFound &FNF) { - report("File not found " + FNF.Path); - })) - return Err; - +`handleErrors` does not match any of the handlers it will be returned from +`handleErrors`. Idiomatic use of `handleErrors` thus looks like: + +```cpp +if (auto Err = + handleErrors( + processFormattedFile(...), + [](const BadFileFormat &BFF) { + report("Unable to process " + BFF.Path + ": bad format"); + }, + [](const FileNotFound &FNF) { + report("File not found " + FNF.Path); + })) + return Err; +``` In cases where you truly know that the handler list is exhaustive, the -``handleAllErrors`` function can be used instead. This is identical to -``handleErrors`` except that it will terminate the program if an unhandled -error is passed in, and can therefore return void. The ``handleAllErrors`` +`handleAllErrors` function can be used instead. This is identical to +`handleErrors` except that it will terminate the program if an unhandled +error is passed in, and can therefore return void. The `handleAllErrors` function should generally be avoided: the introduction of a new error type elsewhere in the program can easily turn a formerly exhaustive list of errors into a non-exhaustive list, risking unexpected program termination. Where -possible, use ``handleErrors`` and propagate unknown errors up the stack instead. +possible, use `handleErrors` and propagate unknown errors up the stack instead. For tool code, where errors can be handled by printing an error message then -exiting with an error code, the :ref:`ExitOnError ` utility -may be a better choice than ``handleErrors``, as it simplifies control flow when +exiting with an error code, the {ref}`ExitOnError ` utility +may be a better choice than `handleErrors`, as it simplifies control flow when calling fallible functions. In situations where it is known that a particular call to a fallible function will always succeed (for example, a call to a function that can only fail on a subset of inputs with an input that is known to be safe) the -:ref:`cantFail ` functions can be used to remove the error type, +{ref}`cantFail ` functions can be used to remove the error type, simplifying control flow. -StringError -""""""""""" +##### StringError Many kinds of errors have no recovery strategy; the only action that can be taken is to report them to the user so that the user can attempt to fix the environment. In this case, representing the error as a string makes perfect -sense. LLVM provides the ``StringError`` class for this purpose. It takes two -arguments: A string error message, and an equivalent ``std::error_code`` for -interoperability. It also provides a ``createStringError`` function to simplify +sense. LLVM provides the `StringError` class for this purpose. It takes two +arguments: A string error message, and an equivalent `std::error_code` for +interoperability. It also provides a `createStringError` function to simplify common usage of this class: -.. code-block:: c++ - - // These two lines of code are equivalent: - make_error("Bad executable", errc::executable_format_error); - createStringError(errc::executable_format_error, "Bad executable"); - +```cpp +// These two lines of code are equivalent: +make_error("Bad executable", errc::executable_format_error); +createStringError(errc::executable_format_error, "Bad executable"); +``` If you're certain that the error you're building will never need to be converted -to a ``std::error_code``, you can use the ``inconvertibleErrorCode()`` function: - -.. code-block:: c++ - - createStringError(inconvertibleErrorCode(), "Bad executable"); +to a `std::error_code`, you can use the `inconvertibleErrorCode()` function: +```cpp +createStringError(inconvertibleErrorCode(), "Bad executable"); +``` This should be done only after careful consideration. If any attempt is made to -convert this error to a ``std::error_code`` it will trigger immediate program +convert this error to a `std::error_code` it will trigger immediate program termination. Unless you are certain that your errors will not need -interoperability you should look for an existing ``std::error_code`` that you +interoperability you should look for an existing `std::error_code` that you can convert to, and even (as painful as it is) consider introducing a new one as a stopgap measure. -``createStringError`` can take ``printf`` style format specifiers to provide a +`createStringError` can take `printf` style format specifiers to provide a formatted message: -.. code-block:: c++ - - createStringError(errc::executable_format_error, - "Bad executable: %s", FileName); +```cpp +createStringError(errc::executable_format_error, + "Bad executable: %s", FileName); +``` +##### Interoperability with std::error_code and ErrorOr -Interoperability with std::error_code and ErrorOr -""""""""""""""""""""""""""""""""""""""""""""""""" - -Many existing LLVM APIs use ``std::error_code`` and its partner ``ErrorOr`` -(which plays the same role as ``Expected``, but wraps a ``std::error_code`` -rather than an ``Error``). The infectious nature of error types means that an -attempt to change one of these functions to return ``Error`` or ``Expected`` +Many existing LLVM APIs use `std::error_code` and its partner `ErrorOr` +(which plays the same role as `Expected`, but wraps a `std::error_code` +rather than an `Error`). The infectious nature of error types means that an +attempt to change one of these functions to return `Error` or `Expected` instead often results in an avalanche of changes to callers, callers of callers, -and so on. (The first such attempt, returning an ``Error`` from +and so on. (The first such attempt, returning an `Error` from MachOObjectFile's constructor, was abandoned after the diff reached 3000 lines, impacted half a dozen libraries, and was still growing). -To solve this problem, the ``Error``/``std::error_code`` interoperability requirement was -introduced. Two pairs of functions allow any ``Error`` value to be converted to a -``std::error_code``, any ``Expected`` to be converted to an ``ErrorOr``, and vice +To solve this problem, the `Error`/`std::error_code` interoperability requirement was +introduced. Two pairs of functions allow any `Error` value to be converted to a +`std::error_code`, any `Expected` to be converted to an `ErrorOr`, and vice versa: -.. code-block:: c++ - - std::error_code errorToErrorCode(Error Err); - Error errorCodeToError(std::error_code EC); - - template ErrorOr expectedToErrorOr(Expected TOrErr); - template Expected errorOrToExpected(ErrorOr TOrEC); - +```cpp +std::error_code errorToErrorCode(Error Err); +Error errorCodeToError(std::error_code EC); +template ErrorOr expectedToErrorOr(Expected TOrErr); +template Expected errorOrToExpected(ErrorOr TOrEC); +``` Using these APIs it is easy to make surgical patches that update individual -functions from ``std::error_code`` to ``Error``, and from ``ErrorOr`` to -``Expected``. +functions from `std::error_code` to `Error`, and from `ErrorOr` to +`Expected`. -Returning Errors from error handlers -"""""""""""""""""""""""""""""""""""" +##### Returning Errors from error handlers -Error recovery attempts may themselves fail. For that reason, ``handleErrors`` +Error recovery attempts may themselves fail. For that reason, `handleErrors` actually recognises three different forms of handler signature: -.. code-block:: c++ - - // Error must be handled, no new errors produced: - void(UserDefinedError &E); +```cpp +// Error must be handled, no new errors produced: +void(UserDefinedError &E); - // Error must be handled, new errors can be produced: - Error(UserDefinedError &E); +// Error must be handled, new errors can be produced: +Error(UserDefinedError &E); - // Original error can be inspected, then re-wrapped and returned (or a new - // error can be produced): - Error(std::unique_ptr E); - -Any error returned from a handler will be returned from the ``handleErrors`` +// Original error can be inspected, then re-wrapped and returned (or a new +// error can be produced): +Error(std::unique_ptr E); +``` +Any error returned from a handler will be returned from the `handleErrors` function so that it can be handled itself or propagated up the stack. -.. _err_exitonerr: +(err_exitonerr)= -Using ExitOnError to simplify tool code -""""""""""""""""""""""""""""""""""""""" +##### Using ExitOnError to simplify tool code -Library code should never call ``exit`` for a recoverable error; however, in tool +Library code should never call `exit` for a recoverable error; however, in tool code (especially command line tools) this can be a reasonable approach. Calling -``exit`` upon encountering an error dramatically simplifies control flow as the +`exit` upon encountering an error dramatically simplifies control flow as the error no longer needs to be propagated up the stack. This allows code to be written in a straight-line style, as long as each fallible call is wrapped in a -check and call to exit. The ``ExitOnError`` class supports this pattern by -providing call operators that inspect ``Error`` values, stripping the error away -in the success case and logging to ``stderr`` then exiting in the failure case. - -To use this class, declare a global ``ExitOnError`` variable in your program: - -.. code-block:: c++ +check and call to exit. The `ExitOnError` class supports this pattern by +providing call operators that inspect `Error` values, stripping the error away +in the success case and logging to `stderr` then exiting in the failure case. - ExitOnError ExitOnErr; +To use this class, declare a global `ExitOnError` variable in your program: -Calls to fallible functions can then be wrapped with a call to ``ExitOnErr``, +```cpp +ExitOnError ExitOnErr; +``` +Calls to fallible functions can then be wrapped with a call to `ExitOnErr`, turning them into non-failing calls: -.. code-block:: c++ - - Error mayFail(); - Expected mayFail2(); - - void foo() { - ExitOnErr(mayFail()); - int X = ExitOnErr(mayFail2()); - } - -On failure, the error's log message will be written to ``stderr``, optionally -preceded by a string "banner" that can be set by calling the ``setBanner`` method. A -mapping can also be supplied from ``Error`` values to exit codes using the -``setExitCodeMapper`` method: - -.. code-block:: c++ - - int main(int argc, char *argv[]) { - ExitOnErr.setBanner(std::string(argv[0]) + " error:"); - ExitOnErr.setExitCodeMapper( - [](const Error &Err) { - if (Err.isA()) - return 2; - return 1; - }); - -Use ``ExitOnError`` in your tool code where possible as it can greatly improve +```cpp +Error mayFail(); +Expected mayFail2(); + +void foo() { + ExitOnErr(mayFail()); + int X = ExitOnErr(mayFail2()); +} +``` +On failure, the error's log message will be written to `stderr`, optionally +preceded by a string "banner" that can be set by calling the `setBanner` method. A +mapping can also be supplied from `Error` values to exit codes using the +`setExitCodeMapper` method: + +```cpp +int main(int argc, char *argv[]) { + ExitOnErr.setBanner(std::string(argv[0]) + " error:"); + ExitOnErr.setExitCodeMapper( + [](const Error &Err) { + if (Err.isA()) + return 2; + return 1; + }); +``` +Use `ExitOnError` in your tool code where possible as it can greatly improve readability. -.. _err_cantfail: +(err_cantfail)= -Using cantFail to simplify safe callsites -""""""""""""""""""""""""""""""""""""""""" +##### Using cantFail to simplify safe callsites Some functions may only fail for a subset of their inputs, so calls using known safe inputs can be assumed to succeed. The cantFail functions encapsulate this by wrapping an assertion that their -argument is a success value and, in the case of ``Expected``, unwrapping the -``T`` value: +argument is a success value and, in the case of `Expected`, unwrapping the +`T` value: -.. code-block:: c++ +```cpp +Error onlyFailsForSomeXValues(int X); +Expected onlyFailsForSomeXValues2(int X); - Error onlyFailsForSomeXValues(int X); - Expected onlyFailsForSomeXValues2(int X); - - void foo() { - cantFail(onlyFailsForSomeXValues(KnownSafeValue)); - int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue)); - ... - } - -Like the ExitOnError utility, ``cantFail`` simplifies control flow. Their treatment +void foo() { + cantFail(onlyFailsForSomeXValues(KnownSafeValue)); + int Y = cantFail(onlyFailsForSomeXValues2(KnownSafeValue)); + ... +} +``` +Like the ExitOnError utility, `cantFail` simplifies control flow. Their treatment of error cases is very different, however: Where ExitOnError is guaranteed to -terminate the program on an error input, ``cantFail`` simply asserts that the result +terminate the program on an error input, `cantFail` simply asserts that the result is success. In debug builds this will result in an assertion failure if an error -is encountered. In release builds, the behavior of ``cantFail`` for failure values is -undefined. As such, care must be taken in the use of ``cantFail``: clients must be -certain that a ``cantFail`` wrapped call really can not fail with the given +is encountered. In release builds, the behavior of `cantFail` for failure values is +undefined. As such, care must be taken in the use of `cantFail`: clients must be +certain that a `cantFail` wrapped call really can not fail with the given arguments. -Use of the ``cantFail`` functions should be rare in library code, but they are +Use of the `cantFail` functions should be rare in library code, but they are likely to be of more use in tool and unit-test code where inputs and/or mocked-up classes or functions may be known to be safe. -Fallible constructors -""""""""""""""""""""" +##### Fallible constructors Some classes require resource acquisition or other complex initialization that can fail during construction. Unfortunately constructors can't return errors, and having clients test objects after they're constructed to ensure that they're valid is error prone as it's all too easy to forget the test. To work around -this, use the named constructor idiom and return an ``Expected``: +this, use the named constructor idiom and return an `Expected`: -.. code-block:: c++ +```cpp +class Foo { +public: - class Foo { - public: - - static Expected Create(Resource R1, Resource R2) { - Error Err = Error::success(); - Foo F(R1, R2, Err); - if (Err) - return std::move(Err); - return std::move(F); - } + static Expected Create(Resource R1, Resource R2) { + Error Err = Error::success(); + Foo F(R1, R2, Err); + if (Err) + return std::move(Err); + return std::move(F); + } - private: +private: - Foo(Resource R1, Resource R2, Error &Err) { - ErrorAsOutParameter EAO(&Err); - if (auto Err2 = R1.acquire()) { - Err = std::move(Err2); - return; - } - Err = R2.acquire(); + Foo(Resource R1, Resource R2, Error &Err) { + ErrorAsOutParameter EAO(&Err); + if (auto Err2 = R1.acquire()) { + Err = std::move(Err2); + return; } - }; - - -Here, the named constructor passes an ``Error`` by reference into the actual + Err = R2.acquire(); + } +}; +``` +Here, the named constructor passes an `Error` by reference into the actual constructor, which the constructor can then use to return errors. The -``ErrorAsOutParameter`` utility sets the ``Error`` value's checked flag on entry +`ErrorAsOutParameter` utility sets the `Error` value's checked flag on entry to the constructor so that the error can be assigned to, then resets it on exit to force the client (the named constructor) to check the error. By using this idiom, clients attempting to construct a Foo receive either a well-formed Foo or an Error, never an object in an invalid state. -Propagating and consuming errors based on types -""""""""""""""""""""""""""""""""""""""""""""""" +##### Propagating and consuming errors based on types In some contexts, certain types of errors are known to be benign. For example, when walking an archive, some clients may be happy to skip over badly formatted object files rather than terminating the walk immediately. Skipping badly formatted objects could be achieved using an elaborate handler method, but the -``Error.h`` header provides two utilities that make this idiom much cleaner: the -type inspection method, ``isA``, and the ``consumeError`` function: - -.. code-block:: c++ - - Error walkArchive(Archive A) { - for (unsigned I = 0; I != A.numMembers(); ++I) { - auto ChildOrErr = A.getMember(I); - if (auto Err = ChildOrErr.takeError()) { - if (Err.isA()) - consumeError(std::move(Err)) - else - return Err; - } - auto &Child = *ChildOrErr; - // Use Child - ... +`Error.h` header provides two utilities that make this idiom much cleaner: the +type inspection method, `isA`, and the `consumeError` function: + +```cpp +Error walkArchive(Archive A) { + for (unsigned I = 0; I != A.numMembers(); ++I) { + auto ChildOrErr = A.getMember(I); + if (auto Err = ChildOrErr.takeError()) { + if (Err.isA()) + consumeError(std::move(Err)) + else + return Err; } - return Error::success(); + auto &Child = *ChildOrErr; + // Use Child + ... } + return Error::success(); +} +``` +##### Concatenating Errors with joinErrors -Concatenating Errors with joinErrors -"""""""""""""""""""""""""""""""""""" - -In the archive walking example above, ``BadFileFormat`` errors are simply +In the archive walking example above, `BadFileFormat` errors are simply consumed and ignored. If the client had wanted to report these errors after -completing the walk over the archive they could use the ``joinErrors`` utility: - -.. code-block:: c++ - - Error walkArchive(Archive A) { - Error DeferredErrs = Error::success(); - for (unsigned I = 0; I != A.numMembers(); ++I) { - auto ChildOrErr = A.getMember(I); - if (auto Err = ChildOrErr.takeError()) - if (Err.isA()) - DeferredErrs = joinErrors(std::move(DeferredErrs), std::move(Err)); - else - return Err; - auto &Child = *ChildOrErr; - // Use Child - ... - } - return DeferredErrs; +completing the walk over the archive they could use the `joinErrors` utility: + +```cpp +Error walkArchive(Archive A) { + Error DeferredErrs = Error::success(); + for (unsigned I = 0; I != A.numMembers(); ++I) { + auto ChildOrErr = A.getMember(I); + if (auto Err = ChildOrErr.takeError()) + if (Err.isA()) + DeferredErrs = joinErrors(std::move(DeferredErrs), std::move(Err)); + else + return Err; + auto &Child = *ChildOrErr; + // Use Child + ... } - -The ``joinErrors`` routine builds a special error type called ``ErrorList``, -which holds a list of user-defined errors. The ``handleErrors`` routine + return DeferredErrs; +} +``` +The `joinErrors` routine builds a special error type called `ErrorList`, +which holds a list of user-defined errors. The `handleErrors` routine recognizes this type and will attempt to handle each of the contained errors in -order. If all contained errors can be handled, ``handleErrors`` will return -``Error::success()``; otherwise, ``handleErrors`` will concatenate the remaining -errors and return the resulting ``ErrorList``. +order. If all contained errors can be handled, `handleErrors` will return +`Error::success()`; otherwise, `handleErrors` will concatenate the remaining +errors and return the resulting `ErrorList`. -Building fallible iterators and iterator ranges -""""""""""""""""""""""""""""""""""""""""""""""" +##### Building fallible iterators and iterator ranges The archive walking examples above retrieve archive members by index; however, this requires considerable boilerplate for iteration and error checking. We can clean this up by using the "fallible iterator" pattern, which supports the following natural iteration idiom for fallible containers like Archive: -.. code-block:: c++ +```cpp +Error Err = Error::success(); +for (auto &Child : Ar->children(Err)) { + // Use Child - only enter the loop when it's valid - Error Err = Error::success(); - for (auto &Child : Ar->children(Err)) { - // Use Child - only enter the loop when it's valid - - // Allow early exit from the loop body, since we know that Err is success - // when we're inside the loop. - if (BailOutOn(Child)) - return; - - ... - } - // Check Err after the loop to ensure it didn't break due to an error. - if (Err) - return Err; + // Allow early exit from the loop body, since we know that Err is success + // when we're inside the loop. + if (BailOutOn(Child)) + return; + ... +} +// Check Err after the loop to ensure it didn't break due to an error. +if (Err) + return Err; +``` To enable this idiom, iterators over fallible containers are written in a -natural style, with their ``++`` and ``--`` operators replaced with fallible -``Error inc()`` and ``Error dec()`` functions. E.g.: - -.. code-block:: c++ - - class FallibleChildIterator { - public: - FallibleChildIterator(Archive &A, unsigned ChildIdx); - Archive::Child &operator*(); - friend bool operator==(const ArchiveIterator &LHS, - const ArchiveIterator &RHS); - - // operator++/operator-- replaced with fallible increment / decrement: - Error inc() { - if (!A.childValid(ChildIdx + 1)) - return make_error(...); - ++ChildIdx; - return Error::success(); - } - - Error dec() { ... } - }; +natural style, with their `++` and `--` operators replaced with fallible +`Error inc()` and `Error dec()` functions. E.g.: + +```cpp +class FallibleChildIterator { +public: + FallibleChildIterator(Archive &A, unsigned ChildIdx); + Archive::Child &operator*(); + friend bool operator==(const ArchiveIterator &LHS, + const ArchiveIterator &RHS); + + // operator++/operator-- replaced with fallible increment / decrement: + Error inc() { + if (!A.childValid(ChildIdx + 1)) + return make_error(...); + ++ChildIdx; + return Error::success(); + } + Error dec() { ... } +}; +``` Instances of this kind of fallible iterator interface are then wrapped with the -fallible_iterator utility which provides ``operator++`` and ``operator--``, +fallible_iterator utility which provides `operator++` and `operator--`, returning any errors via a reference passed in to the wrapper at construction time. The fallible_iterator wrapper takes care of (a) jumping to the end of the range on error, and (b) marking the error as checked whenever an iterator is -compared to ``end`` and found to be unequal (in particular, this marks the +compared to `end` and found to be unequal (in particular, this marks the error as checked throughout the body of a range-based for loop), enabling early exit from the loop without redundant error checking. Instances of the fallible iterator interface (e.g., FallibleChildIterator above) -are wrapped using the ``make_fallible_itr`` and ``make_fallible_end`` +are wrapped using the `make_fallible_itr` and `make_fallible_end` functions. E.g.: -.. code-block:: c++ +```cpp +class Archive { +public: + using child_iterator = fallible_iterator; - class Archive { - public: - using child_iterator = fallible_iterator; - - child_iterator child_begin(Error &Err) { - return make_fallible_itr(FallibleChildIterator(*this, 0), Err); - } - - child_iterator child_end() { - return make_fallible_end(FallibleChildIterator(*this, size())); - } + child_iterator child_begin(Error &Err) { + return make_fallible_itr(FallibleChildIterator(*this, 0), Err); + } - iterator_range children(Error &Err) { - return make_range(child_begin(Err), child_end()); - } - }; + child_iterator child_end() { + return make_fallible_end(FallibleChildIterator(*this, size())); + } + iterator_range children(Error &Err) { + return make_range(child_begin(Err), child_end()); + } +}; +``` Using the fallible_iterator utility allows for both natural construction of -fallible iterators (using failing ``inc`` and ``dec`` operations) and +fallible iterators (using failing `inc` and `dec` operations) and relatively natural use of C++ iterator/loop idioms. -.. _function_apis: +(function_apis)= More information on Error and its related utilities can be found in the -``Error.h`` header file. +`Error.h` header file. -Passing functions and other callable objects --------------------------------------------- +### Passing functions and other callable objects Sometimes you may want a function to be passed a callback object. In order to support lambda expressions and other function objects, you should not use the traditional C approach of taking a function pointer and an opaque cookie: -.. code-block:: c++ - - void takeCallback(bool (*Callback)(Function *, void *), void *Cookie); - +```cpp +void takeCallback(bool (*Callback)(Function *, void *), void *Cookie); +``` Instead, use one of the following approaches: -Function template -^^^^^^^^^^^^^^^^^ +#### Function template If you don't mind putting the definition of your function into a header file, make it a function template that is templated on the callable type. -.. code-block:: c++ - - template - void takeCallback(Callable Callback) { - Callback(1, 2, 3); - } +```cpp +template +void takeCallback(Callable Callback) { + Callback(1, 2, 3); +} +``` +#### The `function_ref` class template -The ``function_ref`` class template -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -The ``function_ref`` -(`doxygen `__) class +The `function_ref` +([doxygen](https://llvm.org/doxygen/classllvm_1_1function__ref_3_01Ret_07Params_8_8_8_08_4.html)) class template represents a reference to a callable object, templated over the type of the callable. This is a good choice for passing a callback to a function, if you don't need to hold onto the callback after the function returns. In this -way, ``function_ref`` is to ``std::function`` as ``StringRef`` is to -``std::string``. +way, `function_ref` is to `std::function` as `StringRef` is to +`std::string`. -``function_ref`` can be implicitly constructed from -any callable object that can be called with arguments of type ``Param1``, -``Param2``, ..., and returns a value that can be converted to type ``Ret``. +`function_ref` can be implicitly constructed from +any callable object that can be called with arguments of type `Param1`, +`Param2`, ..., and returns a value that can be converted to type `Ret`. For example: -.. code-block:: c++ - - void visitBasicBlocks(Function *F, function_ref Callback) { - for (BasicBlock &BB : *F) - if (Callback(&BB)) - return; - } - +```cpp +void visitBasicBlocks(Function *F, function_ref Callback) { + for (BasicBlock &BB : *F) + if (Callback(&BB)) + return; +} +``` can be called using: -.. code-block:: c++ - - visitBasicBlocks(F, [&](BasicBlock *BB) { - if (process(BB)) - return isEmpty(BB); - return false; - }); - -Note that a ``function_ref`` object contains pointers to external memory, so it +```cpp +visitBasicBlocks(F, [&](BasicBlock *BB) { + if (process(BB)) + return isEmpty(BB); + return false; +}); +``` +Note that a `function_ref` object contains pointers to external memory, so it is not generally safe to store an instance of the class (unless you know that the external storage will not be freed). If you need this ability, consider -using ``std::function``. ``function_ref`` is small enough that it should always +using `std::function`. `function_ref` is small enough that it should always be passed by value. -.. _DEBUG: +(DEBUG)= -The ``LDBG`` and ``LLVM_DEBUG()`` macros and ``-debug`` option --------------------------------------------------------------- +### The `LDBG` and `LLVM_DEBUG()` macros and `-debug` option Often, when working on your pass, you will put a bunch of debugging printouts and other code into your pass. After you get it working, you want to remove it, but @@ -1170,158 +1080,142 @@ Naturally, because of this, you don't want to delete the debug printouts, but you don't want them to always be noisy. A standard compromise is to comment them out, allowing you to enable them if you need them in the future. -The ``llvm/Support/DebugLog.h`` file provides a macro named ``LDBG`` that is a +The `llvm/Support/DebugLog.h` file provides a macro named `LDBG` that is a more convenient way to add debug output to your code. It is a macro that provides a raw_ostream that is used to write the debug output. -.. code-block:: c++ - - LDBG() << "I am here!"; - +```cpp +LDBG() << "I am here!"; +``` It'll only print the output if the debug output is enabled. It also supports a `level` argument to control the verbosity of the output. -.. code-block:: c++ - - LDBG(2) << "I am here!"; - -A ``DEBUG_TYPE`` macro may optionally be defined in the file before using -``LDBG()``, otherwise the file name is used as the debug type. +```cpp +LDBG(2) << "I am here!"; +``` +A `DEBUG_TYPE` macro may optionally be defined in the file before using +`LDBG()`, otherwise the file name is used as the debug type. The file name and line number are automatically added to the output, as well as a terminating newline. -The debug output can be enabled by passing the ``-debug`` command line argument. - -.. code-block:: none +The debug output can be enabled by passing the `-debug` command line argument. - $ opt < a.bc > /dev/null -mypass - - $ opt < a.bc > /dev/null -mypass -debug - [my-pass MyPass.cpp:123 2] I am here! - -While ``LDBG()`` is useful to add debug output to your code, there are cases +```none +$ opt < a.bc > /dev/null -mypass + +$ opt < a.bc > /dev/null -mypass -debug +[my-pass MyPass.cpp:123 2] I am here! +``` +While `LDBG()` is useful to add debug output to your code, there are cases where you may need to guard a block of code with a debug check. The -``llvm/Support/Debug.h`` (`doxygen -`__) file provides a macro named -``LLVM_DEBUG()`` that offers a solution to this problem. You can put arbitrary -code into the argument of the ``LLVM_DEBUG`` macro, and it is only executed if -'``opt``' (or any other tool) is run with the '``-debug``' command +`llvm/Support/Debug.h` ([doxygen](https://llvm.org/doxygen/Debug_8h_source.html)) file provides a macro named +`LLVM_DEBUG()` that offers a solution to this problem. You can put arbitrary +code into the argument of the `LLVM_DEBUG` macro, and it is only executed if +'`opt`' (or any other tool) is run with the '`-debug`' command line argument. -.. code-block:: c++ - - LLVM_DEBUG({ - llvm::ErrorOr> logBuffer = - llvm::MemoryBuffer::getFile(logFile->first); - if (logBuffer && !(*logBuffer)->getBuffer().empty()) { - LDBG() << "Output:\n" << (*logBuffer)->getBuffer(); - } - }); - - +```cpp +LLVM_DEBUG({ + llvm::ErrorOr> logBuffer = + llvm::MemoryBuffer::getFile(logFile->first); + if (logBuffer && !(*logBuffer)->getBuffer().empty()) { + LDBG() << "Output:\n" << (*logBuffer)->getBuffer(); + } +}); +``` Using these macros instead of a home-brewed solution allows you to not have to create "yet another" command-line option for the debug output for your pass. -Note that ``LDBG()`` and ``LLVM_DEBUG()`` macros are disabled for non-asserts +Note that `LDBG()` and `LLVM_DEBUG()` macros are disabled for non-asserts builds, so they do not cause a performance impact at all (for the same reason, they should also not contain side-effects!). -One additional nice thing about the ``LDBG()`` and ``LLVM_DEBUG()`` macros is +One additional nice thing about the `LDBG()` and `LLVM_DEBUG()` macros is that you can enable or disable it directly in gdb. Just use -"``set DebugFlag=0``" or "``set DebugFlag=1``" from the gdb if the program is +"`set DebugFlag=0`" or "`set DebugFlag=1`" from the gdb if the program is running. If the program hasn't been started yet, you can always just run it -with ``-debug``. +with `-debug`. -.. _DEBUG_TYPE: +(DEBUG_TYPE)= -Fine grained debug info with ``DEBUG_TYPE`` and the ``-debug-only`` option -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Fine grained debug info with `DEBUG_TYPE` and the `-debug-only` option -Sometimes, you may find yourself in a situation where enabling ``-debug`` just +Sometimes, you may find yourself in a situation where enabling `-debug` just turns on **too much** information (such as when working on the code generator). If you want to enable debug information with more fine-grained control, you can control the debug type and level with associate with each logging statement as follows: -.. code-block:: c++ - - #define DEBUG_TYPE "foo" // Optional: the file name is used instead if not defined - LDBG(2) << "Hello,"; - // DEBUG_TYPE can be overridden locally, here with "bar" - LDBG("bar", 3) << "'bar' debug type"; - - +```cpp +#define DEBUG_TYPE "foo" // Optional: the file name is used instead if not defined +LDBG(2) << "Hello,"; +// DEBUG_TYPE can be overridden locally, here with "bar" +LDBG("bar", 3) << "'bar' debug type"; +``` A more fine-grained control of the output can be achieved by passing the -``-debug-only`` command line argument: - -.. code-block:: none - - $ opt < a.bc > /dev/null -mypass -debug-only=foo - [foo MyPass.cpp:123 2] Hello, - $ opt < a.bc > /dev/null -mypass -debug-only=foo,bar - [foo MyPass.cpp:123 2] Hello, - [bar MyPass.cpp:124 3] World! - $ opt < a.bc > /dev/null -mypass -debug-only=bar - [bar MyPass.cpp:124 3] World! - +`-debug-only` command line argument: + +```none +$ opt < a.bc > /dev/null -mypass -debug-only=foo +[foo MyPass.cpp:123 2] Hello, +$ opt < a.bc > /dev/null -mypass -debug-only=foo,bar +[foo MyPass.cpp:123 2] Hello, +[bar MyPass.cpp:124 3] World! +$ opt < a.bc > /dev/null -mypass -debug-only=bar +[bar MyPass.cpp:124 3] World! +``` The debug-only argument is a comma separated list of debug types and levels. The level is an optional integer setting the maximum debug level to enable: -.. code-block:: none - - $ opt < a.bc > /dev/null -mypass -debug-only=foo:2,bar:2 - [foo MyPass.cpp:123 2] Hello, - $ opt < a.bc > /dev/null -mypass -debug-only=foo:1,bar:3 - [bar MyPass.cpp:124 3] World! - -Instead of opting in specific debug types, the ``-debug-only`` option also +```none +$ opt < a.bc > /dev/null -mypass -debug-only=foo:2,bar:2 +[foo MyPass.cpp:123 2] Hello, +$ opt < a.bc > /dev/null -mypass -debug-only=foo:1,bar:3 +[bar MyPass.cpp:124 3] World! +``` +Instead of opting in specific debug types, the `-debug-only` option also works to filter out debug output for specific debug types, by omitting the level (or setting it to 0): -.. code-block:: none - - $ opt < a.bc > /dev/null -mypass -debug-only=foo: - [bar MyPass.cpp:124 3] World! - $ opt < a.bc > /dev/null -mypass -debug-only=bar:0,foo: - - -In practice, you should only set ``DEBUG_TYPE`` at the top of a file, to +```none +$ opt < a.bc > /dev/null -mypass -debug-only=foo: +[bar MyPass.cpp:124 3] World! +$ opt < a.bc > /dev/null -mypass -debug-only=bar:0,foo: +``` +In practice, you should only set `DEBUG_TYPE` at the top of a file, to specify the debug type for the entire module. Be careful that you only do -this after you're done including headers (in particular ``Debug.h``/``DebugLog.h``). +this after you're done including headers (in particular `Debug.h`/`DebugLog.h`). Also, you should use names more meaningful than "foo" and "bar", because there is no system in place to ensure that names do not conflict. If two different modules use the same string, they will all be turned on when the name is specified. This allows, for example, all debug information for instruction scheduling to be -enabled with ``-debug-only=InstrSched``, even if the source lives in multiple +enabled with `-debug-only=InstrSched`, even if the source lives in multiple files. The name must not include a comma (,) as that is used to separate the -arguments of the ``-debug-only`` option. +arguments of the `-debug-only` option. For performance reasons, -debug-only is not available in non-asserts build of LLVM. -The ``DEBUG_WITH_TYPE`` macro is an alternative to the ``LLVM_DEBUG()`` macro -for situations where you would like to set ``DEBUG_TYPE``, but only for one -specific ``LLVM_DEBUG`` statement. It takes an additional first parameter, +The `DEBUG_WITH_TYPE` macro is an alternative to the `LLVM_DEBUG()` macro +for situations where you would like to set `DEBUG_TYPE`, but only for one +specific `LLVM_DEBUG` statement. It takes an additional first parameter, which is the type to use. The example from the previous section could be written as: -.. code-block:: c++ - - DEBUG_WITH_TYPE("special-type", { - llvm::ErrorOr> logBuffer = - llvm::MemoryBuffer::getFile(logFile->first); - if (logBuffer && !(*logBuffer)->getBuffer().empty()) { - LDBG("special-type") << "Output:\n" << (*logBuffer)->getBuffer(); - } - }); - -.. _Statistic: +```cpp +DEBUG_WITH_TYPE("special-type", { + llvm::ErrorOr> logBuffer = + llvm::MemoryBuffer::getFile(logFile->first); + if (logBuffer && !(*logBuffer)->getBuffer().empty()) { + LDBG("special-type") << "Output:\n" << (*logBuffer)->getBuffer(); + } +}); +``` +(Statistic)= -The ``Statistic`` class & ``-stats`` option -------------------------------------------- +### The `Statistic` class & `-stats` option -The ``llvm/ADT/Statistic.h`` (`doxygen -`__) file provides a class -named ``Statistic`` that is used as a unified way to keep track of what the LLVM +The `llvm/ADT/Statistic.h` ([doxygen](https://llvm.org/doxygen/Statistic_8h_source.html)) file provides a class +named `Statistic` that is used as a unified way to keep track of what the LLVM compiler is doing and how effective various optimizations are. It is useful to see what optimizations are contributing to making a particular program run faster. @@ -1329,81 +1223,76 @@ faster. Often you may run your pass on some big program, and you're interested to see how many times it makes a certain transformation. Although you can do this with hand inspection, or some ad-hoc method, this is a real pain and not very useful -for big programs. Using the ``Statistic`` class makes it very easy to keep +for big programs. Using the `Statistic` class makes it very easy to keep track of this information, and the calculated information is presented in a uniform manner with the rest of the passes being executed. -There are many examples of ``Statistic`` uses, but the basics of using it are as +There are many examples of `Statistic` uses, but the basics of using it are as follows: Define your statistic like this: -.. code-block:: c++ - - #define DEBUG_TYPE "mypassname" // This goes after any #includes. - STATISTIC(NumXForms, "The # of times I did stuff"); - -The ``STATISTIC`` macro defines a static variable, whose name is specified by -the first argument. The pass name is taken from the ``DEBUG_TYPE`` macro, and +```cpp +#define DEBUG_TYPE "mypassname" // This goes after any #includes. +STATISTIC(NumXForms, "The # of times I did stuff"); +``` +The `STATISTIC` macro defines a static variable, whose name is specified by +the first argument. The pass name is taken from the `DEBUG_TYPE` macro, and the description is taken from the second argument. The variable defined ("NumXForms" in this case) acts like an unsigned integer. Whenever you make a transformation, bump the counter: -.. code-block:: c++ - - ++NumXForms; // I did stuff! - -That's all you have to do. To get '``opt``' to print out the statistics -gathered, use the '``-stats``' option: - -.. code-block:: none - - $ opt -stats -mypassname < program.bc > /dev/null - ... statistics output ... - -Note that in order to use the '``-stats``' option, LLVM must be +```cpp +++NumXForms; // I did stuff! +``` +That's all you have to do. To get '`opt`' to print out the statistics +gathered, use the '`-stats`' option: + +```none +$ opt -stats -mypassname < program.bc > /dev/null +... statistics output ... +``` +Note that in order to use the '`-stats`' option, LLVM must be compiled with assertions enabled. -When running ``opt`` on a C file from the SPEC benchmark suite, it gives a +When running `opt` on a C file from the SPEC benchmark suite, it gives a report that looks like this: -.. code-block:: none - - 7646 bitcodewriter - Number of normal instructions - 725 bitcodewriter - Number of oversized instructions - 129996 bitcodewriter - Number of bitcode bytes written - 2817 raise - Number of insts DCEd or constprop'd - 3213 raise - Number of cast-of-self removed - 5046 raise - Number of expression trees converted - 75 raise - Number of other getelementptr's formed - 138 raise - Number of load/store peepholes - 42 deadtypeelim - Number of unused typenames removed from symtab - 392 funcresolve - Number of varargs functions resolved - 27 globaldce - Number of global variables removed - 2 adce - Number of basic blocks removed - 134 cee - Number of branches revectored - 49 cee - Number of setcc instruction eliminated - 532 gcse - Number of loads removed - 2919 gcse - Number of instructions removed - 86 indvars - Number of canonical indvars added - 87 indvars - Number of aux indvars removed - 25 instcombine - Number of dead inst eliminate - 434 instcombine - Number of insts combined - 248 licm - Number of load insts hoisted - 1298 licm - Number of insts hoisted to a loop pre-header - 3 licm - Number of insts hoisted to multiple loop preds (bad, no loop pre-header) - 75 mem2reg - Number of alloca's promoted - 1444 cfgsimplify - Number of blocks simplified - +```none + 7646 bitcodewriter - Number of normal instructions + 725 bitcodewriter - Number of oversized instructions +129996 bitcodewriter - Number of bitcode bytes written + 2817 raise - Number of insts DCEd or constprop'd + 3213 raise - Number of cast-of-self removed + 5046 raise - Number of expression trees converted + 75 raise - Number of other getelementptr's formed + 138 raise - Number of load/store peepholes + 42 deadtypeelim - Number of unused typenames removed from symtab + 392 funcresolve - Number of varargs functions resolved + 27 globaldce - Number of global variables removed + 2 adce - Number of basic blocks removed + 134 cee - Number of branches revectored + 49 cee - Number of setcc instruction eliminated + 532 gcse - Number of loads removed + 2919 gcse - Number of instructions removed + 86 indvars - Number of canonical indvars added + 87 indvars - Number of aux indvars removed + 25 instcombine - Number of dead inst eliminate + 434 instcombine - Number of insts combined + 248 licm - Number of load insts hoisted + 1298 licm - Number of insts hoisted to a loop pre-header + 3 licm - Number of insts hoisted to multiple loop preds (bad, no loop pre-header) + 75 mem2reg - Number of alloca's promoted + 1444 cfgsimplify - Number of blocks simplified +``` Obviously, with so many optimizations, having a unified framework for this stuff is very nice. Making your pass fit well into the framework makes it more maintainable and useful. -.. _DebugCounters: +(DebugCounters)= -Adding debug counters to aid in debugging your code ---------------------------------------------------- +### Adding debug counters to aid in debugging your code Sometimes, when writing new passes or trying to track down bugs, it is useful to be able to control whether certain things in your pass @@ -1414,50 +1303,46 @@ automatically, using bisection. This is where debug counters help. They provide a framework for making parts of your code only execute a certain number of times. -The ``llvm/Support/DebugCounter.h`` (`doxygen -`__) file -provides a class named ``DebugCounter`` that can be used to create +The `llvm/Support/DebugCounter.h` ([doxygen](https://llvm.org/doxygen/DebugCounter_8h_source.html)) file +provides a class named `DebugCounter` that can be used to create command-line counter options that control execution of parts of your code. -Define your ``DebugCounter`` like this: +Define your `DebugCounter` like this: -.. code-block:: c++ - - DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction", +```cpp +DEBUG_COUNTER(DeleteAnInstruction, "passname-delete-instruction", +``` "Controls which instructions get delete"); -The ``DEBUG_COUNTER`` macro defines a static variable, whose name +The `DEBUG_COUNTER` macro defines a static variable, whose name is specified by the first argument. The name of the counter (which is used on the command line) is specified by the second argument, and the description used in the help is specified by the third argument. -Whatever code you want to control, use ``DebugCounter::shouldExecute`` to control it. - -.. code-block:: c++ - - if (DebugCounter::shouldExecute(DeleteAnInstruction)) - I->eraseFromParent(); +Whatever code you want to control, use `DebugCounter::shouldExecute` to control it. +```cpp +if (DebugCounter::shouldExecute(DeleteAnInstruction)) + I->eraseFromParent(); +``` That's all you have to do. Now, using opt, you can control when this code triggers using -the '``--debug-counter``' Options. To specify when to execute the codepath. - -.. code-block:: none - - $ opt --debug-counter=passname-delete-instruction=2-3 -passname +the '`--debug-counter`' Options. To specify when to execute the codepath. +```none +$ opt --debug-counter=passname-delete-instruction=2-3 -passname +``` This will skip the above code the first two times we hit it, then execute it 2 times, then skip the rest of the executions. So if executed on the following code: -.. code-block:: llvm - - %1 = add i32 %a, %b - %2 = add i32 %a, %b - %3 = add i32 %a, %b - %4 = add i32 %a, %b - -It would delete number ``%2`` and ``%3``. +```llvm +%1 = add i32 %a, %b +%2 = add i32 %a, %b +%3 = add i32 %a, %b +%4 = add i32 %a, %b +``` +It would delete number `%2` and `%3`. A utility is provided in `utils/bisect-skip-count` to binary search the begin and end of the range argument. It can be used to automatically minimize the @@ -1470,77 +1355,71 @@ First, Figure out the number of calls to the debug counter you want to minimize. To do so, run the compilation command causing you want to minimize with `-print-debug-counter` adding a `-mllvm` if needed. Then find the line with the counter of interest. it should look like: -.. code-block:: none - - my-counter : {5678,empty} - +```none +my-counter : {5678,empty} +``` The number of calls to `my-counter` is 5678 Then find the minimum set of chunks that is interesting, with `reduce-chunk-list`. Build a reproducer script like: -.. code-block:: bash - - #! /bin/bash - opt -debug-counter=my-counter=$1 - # ... Test result of the command. Failure of the script is considered interesting - +```bash +#! /bin/bash +opt -debug-counter=my-counter=$1 +# ... Test result of the command. Failure of the script is considered interesting +``` Then run `reduce-chunk-list my-script.sh 0-5678 2>&1 | tee dump_bisect` This command may take some time. but when it is done, it will print the result like: `Minimal Chunks = 0:1:5:11-12:33-34` -.. _ViewGraph: +(ViewGraph)= -Viewing graphs while debugging code ------------------------------------ +### Viewing graphs while debugging code Several of the important data structures in LLVM are graphs: for example CFGs -made out of LLVM :ref:`BasicBlocks `, CFGs made out of LLVM -:ref:`MachineBasicBlocks `, and :ref:`Instruction Selection -DAGs `. In many cases, while debugging various parts of the +made out of LLVM {ref}`BasicBlocks `, CFGs made out of LLVM +[MachineBasicBlocks](https://llvm.org/docs/CodeGenerator.html#machinebasicblock), and [Instruction Selection +DAGs](https://llvm.org/docs/CodeGenerator.html#selectiondag). In many cases, while debugging various parts of the compiler, it is nice to instantly visualize these graphs. LLVM provides several callbacks that are available in a debug build to do -exactly that. If you call the ``Function::viewCFG()`` method, for example, the +exactly that. If you call the `Function::viewCFG()` method, for example, the current LLVM tool will pop up a window containing the CFG for the function where each basic block is a node in the graph, and each node contains the instructions -in the block. Similarly, there also exists ``Function::viewCFGOnly()`` (does -not include the instructions), the ``MachineFunction::viewCFG()`` and -``MachineFunction::viewCFGOnly()``, and the ``SelectionDAG::viewGraph()`` -methods. Within GDB, for example, you can usually use something like ``call -DAG.viewGraph()`` to pop up a window. Alternatively, you can sprinkle calls to +in the block. Similarly, there also exists `Function::viewCFGOnly()` (does +not include the instructions), the `MachineFunction::viewCFG()` and +`MachineFunction::viewCFGOnly()`, and the `SelectionDAG::viewGraph()` +methods. Within GDB, for example, you can usually use something like `call +DAG.viewGraph()` to pop up a window. Alternatively, you can sprinkle calls to these functions in your code in places you want to debug. Getting this to work requires a small amount of setup. On Unix systems -with X11, install the `graphviz `_ toolkit, and make +with X11, install the [graphviz](http://www.graphviz.org) toolkit, and make sure 'dot' and 'gv' are in your path. If you are running on macOS, download -and install the macOS `Graphviz program -`_ and add -``/Applications/Graphviz.app/Contents/MacOS/`` (or wherever you install it) to +and install the macOS [Graphviz program](http://www.pixelglow.com/graphviz/) and add +`/Applications/Graphviz.app/Contents/MacOS/` (or wherever you install it) to your path. The programs need not be present when configuring, building or running LLVM and can simply be installed when needed during an active debug session. -``SelectionDAG`` has been extended to make it easier to locate *interesting* -nodes in large complex graphs. From gdb, if you ``call DAG.setGraphColor(node, -"color")``, then the next ``call DAG.viewGraph()`` would highlight the node in -the specified color (choices of colors can be found at `colors -`_.) More complex node attributes -can be provided with ``call DAG.setGraphAttrs(node, "attributes")`` (choices can -be found at `Graph attributes `_.) +`SelectionDAG` has been extended to make it easier to locate *interesting* +nodes in large complex graphs. From gdb, if you `call DAG.setGraphColor(node, +"color")`, then the next `call DAG.viewGraph()` would highlight the node in +the specified color (choices of colors can be found at [colors](http://www.graphviz.org/doc/info/colors.html).) More complex node attributes +can be provided with `call DAG.setGraphAttrs(node, "attributes")` (choices can +be found at [Graph attributes](http://www.graphviz.org/doc/info/attrs.html).) If you want to restart and clear all the current graph attributes, then you can -``call DAG.clearGraphAttrs()``. +`call DAG.clearGraphAttrs()`. Note that graph visualization features are compiled out of Release builds to reduce file size. This means that you need a Debug+Asserts or Release+Asserts build to use these features. -.. _datastructure: +(datastructure)= -Picking the Right Data Structure for a Task -=========================================== +## Picking the Right Data Structure for a Task -LLVM has a plethora of data structures in the ``llvm/ADT/`` directory, and we +LLVM has a plethora of data structures in the `llvm/ADT/` directory, and we commonly use STL data structures. This section describes the trade-offs you should consider when you pick one. @@ -1550,7 +1429,7 @@ thing when choosing a container is the algorithmic properties of how you plan to access the container. Based on that, you should use: -* a :ref:`map-like ` container if you need efficient look-up of a +* a {ref}`map-like ` container if you need efficient look-up of a value based on another value. Map-like containers also support efficient queries for containment (whether a key is in the map). Map-like containers generally do not support efficient reverse mapping (values to keys). If you @@ -1558,20 +1437,20 @@ access the container. Based on that, you should use: iteration through the keys in sorted order. Map-like containers are the most expensive sort, only use them if you need one of these capabilities. -* a :ref:`set-like ` container if you need to put a bunch of stuff into +* a {ref}`set-like ` container if you need to put a bunch of stuff into a container that automatically eliminates duplicates. Some set-like containers support efficient iteration through the elements in sorted order. Set-like containers are more expensive than sequential containers. -* a :ref:`sequential ` container provides the most efficient way +* a {ref}`sequential ` container provides the most efficient way to add elements and keeps track of the order they are added to the collection. They permit duplicates and support efficient iteration, but do not support efficient look-up based on a key. -* a :ref:`string ` container is a specialized sequential container or +* a {ref}`string ` container is a specialized sequential container or reference structure that is used for character or byte arrays. -* a :ref:`bit ` container provides an efficient way to store and +* a {ref}`bit ` container provides an efficient way to store and perform set operations on sets of numeric id's, while automatically eliminating duplicates. Bit containers require a maximum of 1 bit for each identifier you want to store. @@ -1581,283 +1460,266 @@ memory use, constant factors, and cache behaviors of access by intelligently picking a member of the category. Note that constant factors and cache behavior can be a big deal. If you have a vector that usually only contains a few elements (but could contain many), for example, it's much better to use -:ref:`SmallVector ` than :ref:`vector `. Doing so +{ref}`SmallVector ` than {ref}`vector `. Doing so avoids (relatively) expensive malloc/free calls, which dwarf the cost of adding the elements to the container. -.. _ds_sequential: +(ds_sequential)= -Sequential Containers (std::vector, std::list, etc) ---------------------------------------------------- +### Sequential Containers (std::vector, std::list, etc) There are a variety of sequential containers available for you, based on your needs. Pick the first in this section that will do what you want. -.. _dss_arrayref: +(dss_arrayref)= -llvm/ADT/ArrayRef.h -^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/ArrayRef.h -The ``llvm::ArrayRef`` class is the preferred class to use in an interface that +The `llvm::ArrayRef` class is the preferred class to use in an interface that accepts a sequential list of elements in memory and just reads from them. By -taking an ``ArrayRef``, the API can be passed a fixed size array, an -``std::vector``, an ``llvm::SmallVector`` and anything else that is contiguous +taking an `ArrayRef`, the API can be passed a fixed size array, an +`std::vector`, an `llvm::SmallVector` and anything else that is contiguous in memory. -.. _dss_fixedarrays: +(dss_fixedarrays)= -Fixed Size Arrays -^^^^^^^^^^^^^^^^^ +#### Fixed Size Arrays Fixed size arrays are very simple and very fast. They are good if you know exactly how many elements you have, or you have a (low) upper bound on how many you have. -.. _dss_heaparrays: +(dss_heaparrays)= -Heap Allocated Arrays -^^^^^^^^^^^^^^^^^^^^^ +#### Heap Allocated Arrays -Heap allocated arrays (``new[]`` + ``delete[]``) are also simple. They are good +Heap allocated arrays (`new[]` + `delete[]`) are also simple. They are good if the number of elements is variable, if you know how many elements you will need before the array is allocated, and if the array is usually large (if not, -consider a :ref:`SmallVector `). The cost of a heap allocated +consider a {ref}`SmallVector `). The cost of a heap allocated array is the cost of the new/delete (aka malloc/free). Also note that if you are allocating an array of a type with a constructor, the constructor and destructors will be run for every element in the array (re-sizable vectors only construct those elements actually used). -.. _dss_tinyptrvector: +(dss_tinyptrvector)= -llvm/ADT/TinyPtrVector.h -^^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/TinyPtrVector.h -``TinyPtrVector`` is a highly specialized collection class that is +`TinyPtrVector` is a highly specialized collection class that is optimized to avoid allocation in the case when a vector has zero or one elements. It has two major restrictions: 1) it can only hold values of pointer type, and 2) it cannot hold a null pointer. Since this container is highly specialized, it is rarely used. -.. _dss_smallvector: +(dss_smallvector)= -llvm/ADT/SmallVector.h -^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SmallVector.h -``SmallVector`` is a simple class that looks and smells just like -``vector``: it supports efficient iteration, lays out elements in memory +`SmallVector` is a simple class that looks and smells just like +`vector`: it supports efficient iteration, lays out elements in memory order (so you can do pointer arithmetic between elements), supports efficient -``push_back``/``pop_back`` operations, supports efficient random access to its elements, +`push_back`/`pop_back` operations, supports efficient random access to its elements, etc. -The main advantage of ``SmallVector`` is that it allocates space for some number of -elements (N) **in the object itself**. Because of this, if the ``SmallVector`` is +The main advantage of `SmallVector` is that it allocates space for some number of +elements (N) **in the object itself**. Because of this, if the `SmallVector` is dynamically smaller than N, no malloc is performed. This can be a big win in cases where the malloc/free call is far more expensive than the code that fiddles around with the elements. This is good for vectors that are "usually small" (e.g., the number of predecessors/successors of a block is usually less than 8). On the other hand, -this makes the size of the ``SmallVector`` itself large, so you don't want to +this makes the size of the `SmallVector` itself large, so you don't want to allocate lots of them (doing so will waste a lot of space). As such, SmallVectors are most useful when on the stack. In the absence of a well-motivated choice for the number of -inlined elements ``N``, it is recommended to use ``SmallVector`` (that is, -omitting the ``N``). This will choose a default number of +inlined elements `N`, it is recommended to use `SmallVector` (that is, +omitting the `N`). This will choose a default number of inlined elements reasonable for allocation on the stack (for example, trying -to keep ``sizeof(SmallVector)`` around 64 bytes). +to keep `sizeof(SmallVector)` around 64 bytes). -``SmallVector`` also provides a nice portable and efficient replacement for -``alloca``. +`SmallVector` also provides a nice portable and efficient replacement for +`alloca`. -``SmallVector`` has grown a few other minor advantages over ``std::vector``, causing -``SmallVector`` to be preferred over ``std::vector``. +`SmallVector` has grown a few other minor advantages over `std::vector`, causing +`SmallVector` to be preferred over `std::vector`. -#. ``std::vector`` is exception-safe, and some implementations have pessimizations - that copy elements when ``SmallVector`` would move them. +1. `std::vector` is exception-safe, and some implementations have pessimizations + that copy elements when `SmallVector` would move them. -#. ``SmallVector`` understands ``std::is_trivially_copyable`` and uses realloc aggressively. +1. `SmallVector` understands `std::is_trivially_copyable` and uses realloc aggressively. -#. Many LLVM APIs take a ``SmallVectorImpl`` as an out parameter (see the note +1. Many LLVM APIs take a `SmallVectorImpl` as an out parameter (see the note below). -#. ``SmallVector`` with N equal to 0 is smaller than ``std::vector`` on 64-bit - platforms, since it uses ``unsigned`` (instead of ``void*``) for its size +1. `SmallVector` with N equal to 0 is smaller than `std::vector` on 64-bit + platforms, since it uses `unsigned` (instead of `void*`) for its size and capacity. -.. note:: - - Prefer to use ``ArrayRef`` or ``SmallVectorImpl`` as a parameter type. - - It's rarely appropriate to use ``SmallVector`` as a parameter type. - If an API only reads from the vector, it should use :ref:`ArrayRef - `. Even if an API updates the vector the "small size" is - unlikely to be relevant; such an API should use the ``SmallVectorImpl`` - class, which is the "vector header" (and methods) without the elements - allocated after it. Note that ``SmallVector`` inherits from - ``SmallVectorImpl`` so the conversion is implicit and costs nothing. E.g. - - .. code-block:: c++ - - // DISCOURAGED: Clients cannot pass e.g., raw arrays. - hardcodedContiguousStorage(const SmallVectorImpl &In); - // ENCOURAGED: Clients can pass any contiguous storage of Foo. - allowsAnyContiguousStorage(ArrayRef In); - - void someFunc1() { - Foo Vec[] = { /* ... */ }; - hardcodedContiguousStorage(Vec); // Error. - allowsAnyContiguousStorage(Vec); // Works. - } - - // DISCOURAGED: Clients cannot pass e.g., SmallVector. - hardcodedSmallSize(SmallVector &Out); - // ENCOURAGED: Clients can pass any SmallVector. - allowsAnySmallSize(SmallVectorImpl &Out); - - void someFunc2() { - SmallVector Vec; - hardcodedSmallSize(Vec); // Error. - allowsAnySmallSize(Vec); // Works. - } - - Even though it has "``Impl``" in the name, SmallVectorImpl is widely used - and is no longer "private to the implementation". A name like - ``SmallVectorHeader`` might be more appropriate. - -.. _dss_pagedvector: - -llvm/ADT/PagedVector.h -^^^^^^^^^^^^^^^^^^^^^^ - -``PagedVector`` is a random access container that allocates -``PageSize`` elements of type ``Type`` when the first element of a page is -accessed via the ``operator[]``. This is useful for cases where the number of +````{note} +Prefer to use `ArrayRef` or `SmallVectorImpl` as a parameter type. + +It's rarely appropriate to use `SmallVector` as a parameter type. +If an API only reads from the vector, it should use {ref}`ArrayRef `. Even if an API updates the vector the "small size" is +unlikely to be relevant; such an API should use the `SmallVectorImpl` +class, which is the "vector header" (and methods) without the elements +allocated after it. Note that `SmallVector` inherits from +`SmallVectorImpl` so the conversion is implicit and costs nothing. E.g. + +```cpp +// DISCOURAGED: Clients cannot pass e.g., raw arrays. +hardcodedContiguousStorage(const SmallVectorImpl &In); +// ENCOURAGED: Clients can pass any contiguous storage of Foo. +allowsAnyContiguousStorage(ArrayRef In); + +void someFunc1() { + Foo Vec[] = { /* ... */ }; + hardcodedContiguousStorage(Vec); // Error. + allowsAnyContiguousStorage(Vec); // Works. +} + +// DISCOURAGED: Clients cannot pass e.g., SmallVector. +hardcodedSmallSize(SmallVector &Out); +// ENCOURAGED: Clients can pass any SmallVector. +allowsAnySmallSize(SmallVectorImpl &Out); + +void someFunc2() { + SmallVector Vec; + hardcodedSmallSize(Vec); // Error. + allowsAnySmallSize(Vec); // Works. +} +``` +Even though it has "`Impl`" in the name, SmallVectorImpl is widely used +and is no longer "private to the implementation". A name like +`SmallVectorHeader` might be more appropriate. +```` +(dss_pagedvector)= + +#### llvm/ADT/PagedVector.h + +`PagedVector` is a random access container that allocates +`PageSize` elements of type `Type` when the first element of a page is +accessed via the `operator[]`. This is useful for cases where the number of elements is known in advance; their actual initialization is expensive; and they are sparsely used. This utility uses page-granular lazy initialization when the element is accessed. When the number of used pages is small significant memory savings can be achieved. -The main advantage is that a ``PagedVector`` allows to delay the actual +The main advantage is that a `PagedVector` allows to delay the actual allocation of the page until it's needed, at the extra cost of one pointer per page and one extra indirection when accessing elements with their positional index. In order to minimise the memory footprint of this container, it's important to -balance the ``PageSize`` so that it's not too small (otherwise, the overhead of the +balance the `PageSize` so that it's not too small (otherwise, the overhead of the pointer per page might become too high) and not too big (otherwise, the memory is wasted if the page is not fully used). Moreover, while retaining the order of the elements based on their insertion -index, like a vector, iterating over the elements via ``begin()`` and ``end()`` +index, like a vector, iterating over the elements via `begin()` and `end()` is not provided in the API, due to the fact that accessing the elements in order would allocate all the iterated pages, defeating memory savings and the purpose -of the ``PagedVector``. +of the `PagedVector`. -Finally, ``materialized_begin()`` and ``materialized_end`` iterators are +Finally, `materialized_begin()` and `materialized_end` iterators are provided to access the elements associated to the accessed pages, which could speed up operations that need to iterate over initialized elements in a non-ordered manner. -.. _dss_vector: +(dss_vector)= - -^^^^^^^^ +#### `` -``std::vector`` is well loved and respected. However, ``SmallVector`` -is often a better option due to the advantages listed above. ``std::vector`` is -still useful when you need to store more than ``UINT32_MAX`` elements or when +`std::vector` is well loved and respected. However, `SmallVector` +is often a better option due to the advantages listed above. `std::vector` is +still useful when you need to store more than `UINT32_MAX` elements or when interfacing with code that expects vectors :). -One worthwhile note about ``std::vector``: avoid code like this: - -.. code-block:: c++ - - for ( ... ) { - std::vector V; - // make use of V. - } +One worthwhile note about `std::vector`: avoid code like this: +```cpp +for ( ... ) { + std::vector V; + // make use of V. +} +``` Instead, write this as: -.. code-block:: c++ - - std::vector V; - for ( ... ) { - // make use of V. - V.clear(); - } - +```cpp +std::vector V; +for ( ... ) { + // make use of V. + V.clear(); +} +``` Doing so will save (at least) one heap allocation and free per iteration of the loop. -.. _dss_deque: +(dss_deque)= - -^^^^^^^ +#### `` -``std::deque`` is, in some senses, a generalized version of ``std::vector``. -Like ``std::vector``, it provides constant-time random access and other similar +`std::deque` is, in some senses, a generalized version of `std::vector`. +Like `std::vector`, it provides constant-time random access and other similar properties, but it also provides efficient access to the front of the list. It does not guarantee the continuity of elements within memory. -In exchange for this extra flexibility, ``std::deque`` has significantly higher -constant factor costs than ``std::vector``. If possible, use ``std::vector`` or +In exchange for this extra flexibility, `std::deque` has significantly higher +constant factor costs than `std::vector`. If possible, use `std::vector` or something cheaper. -.. _dss_list: +(dss_list)= - -^^^^^^ +#### `` -``std::list`` is an extremely inefficient class that is rarely useful. It +`std::list` is an extremely inefficient class that is rarely useful. It performs a heap allocation for every element inserted into it, thus having an extremely high constant factor, particularly for small data types. -``std::list`` also only supports bidirectional iteration, not random access +`std::list` also only supports bidirectional iteration, not random access iteration. -In exchange for this high cost, ``std::list`` supports efficient access to both ends -of the list (like ``std::deque``, but unlike ``std::vector`` or -``SmallVector``). In addition, the iterator invalidation characteristics of -``std::list`` are stronger than that of a vector class: inserting or removing an +In exchange for this high cost, `std::list` supports efficient access to both ends +of the list (like `std::deque`, but unlike `std::vector` or +`SmallVector`). In addition, the iterator invalidation characteristics of +`std::list` are stronger than that of a vector class: inserting or removing an element into the list does not invalidate iterator or pointers to other elements in the list. -.. _dss_ilist: +(dss_ilist)= -llvm/ADT/ilist.h -^^^^^^^^^^^^^^^^ +#### llvm/ADT/ilist.h -``ilist`` implements an 'intrusive' doubly-linked list. It is intrusive, +`ilist` implements an 'intrusive' doubly-linked list. It is intrusive, because it requires the element to store and provide access to the prev/next pointers for the list. -``ilist`` has the same drawbacks as ``std::list``, and additionally requires an -``ilist_traits`` implementation for the element type, but it provides some novel +`ilist` has the same drawbacks as `std::list`, and additionally requires an +`ilist_traits` implementation for the element type, but it provides some novel characteristics. In particular, it can efficiently store polymorphic objects, the traits class is informed when an element is inserted or removed from the -list, and ``ilist``\ s are guaranteed to support a constant-time splice +list, and `ilist`s are guaranteed to support a constant-time splice operation. -An ``ilist`` and an ``iplist`` are ``using`` aliases to one another and the +An `ilist` and an `iplist` are `using` aliases to one another and the latter only currently exists for historical purposes. -These properties are exactly what we want for things like ``Instruction``\ s and -basic blocks, which is why these are implemented with ``ilist``\ s. +These properties are exactly what we want for things like `Instruction`s and +basic blocks, which is why these are implemented with `ilist`s. Related classes of interest are explained in the following subsections: -* :ref:`ilist_traits ` +* {ref}`ilist_traits ` -* :ref:`llvm/ADT/ilist_node.h ` +* {ref}`llvm/ADT/ilist_node.h ` -* :ref:`Sentinels ` +* {ref}`Sentinels ` -.. _dss_packedvector: +(dss_packedvector)= -llvm/ADT/PackedVector.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/PackedVector.h Useful for storing a vector of values using only a few bits for each value. Apart from the standard operations of a vector-like container, it can @@ -1865,155 +1727,146 @@ also perform an 'or' set operation. For example: -.. code-block:: c++ - - enum State { - None = 0x0, - FirstCondition = 0x1, - SecondCondition = 0x2, - Both = 0x3 - }; +```cpp +enum State { + None = 0x0, + FirstCondition = 0x1, + SecondCondition = 0x2, + Both = 0x3 +}; - State get() { - PackedVector Vec1; - Vec1.push_back(FirstCondition); +State get() { + PackedVector Vec1; + Vec1.push_back(FirstCondition); - PackedVector Vec2; - Vec2.push_back(SecondCondition); + PackedVector Vec2; + Vec2.push_back(SecondCondition); - Vec1 |= Vec2; - return Vec1[0]; // returns 'Both'. - } + Vec1 |= Vec2; + return Vec1[0]; // returns 'Both'. +} +``` +(dss_ilist_traits)= -.. _dss_ilist_traits: +#### ilist_traits -ilist_traits -^^^^^^^^^^^^ - -``ilist_traits`` is ``ilist``'s customization mechanism. ``ilist`` +`ilist_traits` is `ilist`'s customization mechanism. `ilist` publicly derives from this traits class. -.. _dss_ilist_node: +(dss_ilist_node)= -llvm/ADT/ilist_node.h -^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/ilist_node.h -``ilist_node`` implements the forward and backward links that are expected -by the ``ilist`` (and analogous containers) in the default manner. +`ilist_node` implements the forward and backward links that are expected +by the `ilist` (and analogous containers) in the default manner. -``ilist_node``\ s are meant to be embedded in the node type ``T``, usually -``T`` publicly derives from ``ilist_node``. +`ilist_node`s are meant to be embedded in the node type `T`, usually +`T` publicly derives from `ilist_node`. -.. _dss_ilist_sentinel: +(dss_ilist_sentinel)= -Sentinels -^^^^^^^^^ +#### Sentinels -``ilist``\ s have another specialty that must be considered. To be a good +`ilist`s have another specialty that must be considered. To be a good citizen in the C++ ecosystem, it needs to support the standard container -operations, such as ``begin`` and ``end`` iterators, etc. Also, the -``operator--`` must work correctly on the ``end`` iterator in the case of -non-empty ``ilist``\ s. +operations, such as `begin` and `end` iterators, etc. Also, the +`operator--` must work correctly on the `end` iterator in the case of +non-empty `ilist`s. The only sensible solution to this problem is to allocate a so-called *sentinel* -along with the intrusive list, which serves as the ``end`` iterator, providing +along with the intrusive list, which serves as the `end` iterator, providing the back-link to the last element. However, conforming to the C++ convention it -is illegal to ``operator++`` beyond the sentinel and it also must not be +is illegal to `operator++` beyond the sentinel and it also must not be dereferenced. -These constraints allow for some implementation freedom to the ``ilist`` how to +These constraints allow for some implementation freedom to the `ilist` how to allocate and store the sentinel. The corresponding policy is dictated by -``ilist_traits``. By default, a ``T`` gets heap-allocated whenever the need +`ilist_traits`. By default, a `T` gets heap-allocated whenever the need for a sentinel arises. While the default policy is sufficient in most cases, it may break down when -``T`` does not provide a default constructor. Also, in the case of many -instances of ``ilist``\ s, the memory overhead of the associated sentinels is +`T` does not provide a default constructor. Also, in the case of many +instances of `ilist`s, the memory overhead of the associated sentinels is wasted. To alleviate the situation with numerous and voluminous -``T``-sentinels, sometimes a trick is employed, leading to *ghostly sentinels*. +`T`-sentinels, sometimes a trick is employed, leading to *ghostly sentinels*. -Ghostly sentinels are obtained by specially-crafted ``ilist_traits`` which -superpose the sentinel with the ``ilist`` instance in memory. Pointer -arithmetic is used to obtain the sentinel, which is relative to the ``ilist``'s -``this`` pointer. The ``ilist`` is augmented by an extra pointer, which serves +Ghostly sentinels are obtained by specially-crafted `ilist_traits` which +superpose the sentinel with the `ilist` instance in memory. Pointer +arithmetic is used to obtain the sentinel, which is relative to the `ilist`'s +`this` pointer. The `ilist` is augmented by an extra pointer, which serves as the back-link of the sentinel. This is the only field in the ghostly sentinel which can be legally accessed. -.. _dss_other: +(dss_other)= -Other Sequential Container options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Other Sequential Container options -Other STL containers are available, such as ``std::string``. +Other STL containers are available, such as `std::string`. -There are also various STL adapter classes such as ``std::queue``, -``std::priority_queue``, ``std::stack``, etc. These provide simplified access +There are also various STL adapter classes such as `std::queue`, +`std::priority_queue`, `std::stack`, etc. These provide simplified access to an underlying container but don't affect the cost of the container itself. -.. _ds_string: +(ds_string)= -String-like containers ----------------------- +### String-like containers There are a variety of ways to pass around and use strings in C and C++, and LLVM adds a few new options to choose from. Pick the first option on this list that will do what you need; they are ordered according to their relative cost. -Note that it is generally preferred to *not* pass strings around as ``const -char*``'s. These have a number of problems, including the fact that they +Note that it is generally preferred to *not* pass strings around as `const +char*`'s. These have a number of problems, including the fact that they cannot represent embedded nul ("\0") characters, and do not have a length -available efficiently. The general replacement for '``const char*``' is -``StringRef``. +available efficiently. The general replacement for '`const char*`' is +`StringRef`. For more information on choosing string containers for APIs, please see -:ref:`Passing Strings `. +{ref}`Passing Strings `. -.. _dss_stringref: +(dss_stringref)= -llvm/ADT/StringRef.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/StringRef.h -The ``StringRef`` class is a simple value class that contains a pointer to a -character and a length, and is quite related to the :ref:`ArrayRef -` class (but specialized for arrays of characters). Because -``StringRef`` carries a length with it, it safely handles strings with embedded nul +The `StringRef` class is a simple value class that contains a pointer to a +character and a length, and is quite related to the {ref}`ArrayRef ` class (but specialized for arrays of characters). Because +`StringRef` carries a length with it, it safely handles strings with embedded nul characters in it, getting the length does not require a strlen call, and it even has very convenient APIs for slicing and dicing the character range that it represents. -``StringRef`` is ideal for passing simple strings around that are known to be live, -either because they are C string literals, ``std::string``, a C array, or a -``SmallVector``. Each of these cases has an efficient implicit conversion to -``StringRef``, which doesn't result in a dynamic ``strlen`` being executed. +`StringRef` is ideal for passing simple strings around that are known to be live, +either because they are C string literals, `std::string`, a C array, or a +`SmallVector`. Each of these cases has an efficient implicit conversion to +`StringRef`, which doesn't result in a dynamic `strlen` being executed. -``StringRef`` has a few major limitations which make more powerful string containers +`StringRef` has a few major limitations which make more powerful string containers useful: -#. You cannot directly convert a ``StringRef`` to a ``const char*`` because there is - no way to add a trailing nul (unlike the ``.c_str()`` method on various stronger +1. You cannot directly convert a `StringRef` to a `const char*` because there is + no way to add a trailing nul (unlike the `.c_str()` method on various stronger classes). -#. ``StringRef`` doesn't own or keep alive the underlying string bytes. +1. `StringRef` doesn't own or keep alive the underlying string bytes. As such, it can easily lead to dangling pointers, and is not suitable for - embedding in datastructures in most cases (instead, use an ``std::string`` or + embedding in datastructures in most cases (instead, use an `std::string` or something like that). -#. For the same reason, ``StringRef`` cannot be used as the return value of a - method if the method "computes" the result string. Instead, use ``std::string``. +1. For the same reason, `StringRef` cannot be used as the return value of a + method if the method "computes" the result string. Instead, use `std::string`. -#. ``StringRef``'s do not allow you to mutate the pointed-to string bytes and it +1. `StringRef`'s do not allow you to mutate the pointed-to string bytes and it doesn't allow you to insert or remove bytes from the range. For editing - operations like this, it interoperates with the :ref:`Twine ` + operations like this, it interoperates with the {ref}`Twine ` class. Because of its strengths and limitations, it is very common for a function to -take a ``StringRef`` and for a method on an object to return a ``StringRef`` that points +take a `StringRef` and for a method on an object to return a `StringRef` that points into some string that it owns. -.. _dss_twine: +(dss_twine)= -llvm/ADT/Twine.h -^^^^^^^^^^^^^^^^ +#### llvm/ADT/Twine.h The Twine class is used as an intermediary datatype for APIs that want to take a string that can be constructed inline with a series of concatenations. Twine @@ -2022,14 +1875,13 @@ object) on the stack as temporary objects, linking them together into a tree which is then linearized when the Twine is consumed. Twine is only safe to use as the argument to a function, and should always be a const reference, e.g.: -.. code-block:: c++ - - void foo(const Twine &T); - ... - StringRef X = ... - unsigned i = ... - foo(X + "." + Twine(i)); - +```cpp +void foo(const Twine &T); +... +StringRef X = ... +unsigned i = ... +foo(X + "." + Twine(i)); +``` This example forms a string like "blarg.42" by concatenating the values together, and does not form intermediate strings containing "blarg" or "blarg.". @@ -2038,159 +1890,147 @@ these instances are destroyed at the end of the current statement, it is an inherently dangerous API. For example, this simple variant contains undefined behavior and will probably crash: -.. code-block:: c++ - - void foo(const Twine &T); - ... - StringRef X = ... - unsigned i = ... - const Twine &Tmp = X + "." + Twine(i); - foo(Tmp); - -... because the temporaries are destroyed before the call. That said, ``Twine``'s -are much more efficient than intermediate ``std::string`` temporaries, and they work -really well with ``StringRef``. Just be aware of their limitations. +```cpp +void foo(const Twine &T); +... +StringRef X = ... +unsigned i = ... +const Twine &Tmp = X + "." + Twine(i); +foo(Tmp); +``` +... because the temporaries are destroyed before the call. That said, `Twine`'s +are much more efficient than intermediate `std::string` temporaries, and they work +really well with `StringRef`. Just be aware of their limitations. -.. _dss_smallstring: +(dss_smallstring)= -llvm/ADT/SmallString.h -^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SmallString.h -``SmallString`` is a subclass of :ref:`SmallVector ` that adds some -convenience APIs like += that takes ``StringRef``'s. ``SmallString`` avoids allocating +`SmallString` is a subclass of {ref}`SmallVector ` that adds some +convenience APIs like += that takes `StringRef`'s. `SmallString` avoids allocating memory in the case when the preallocated space is enough to hold its data, and it calls back to general heap allocation when required. Since it owns its data, it is very safe to use and supports full mutation of the string. -Like ``SmallVector``'s, the big downside to ``SmallString`` is their sizeof. While they +Like `SmallVector`'s, the big downside to `SmallString` is their sizeof. While they are optimized for small strings, they themselves are not particularly small. This means that they work great for temporary scratch buffers on the stack, but -should not generally be put into the heap: it is very rare to see a ``SmallString`` +should not generally be put into the heap: it is very rare to see a `SmallString` as the member of a frequently-allocated heap data structure or returned by-value. -.. _dss_stdstring: +(dss_stdstring)= -std::string -^^^^^^^^^^^ +#### std::string -The standard C++ ``std::string`` class is a very general class that (like -``SmallString``) owns its underlying data. sizeof(std::string) is very reasonable +The standard C++ `std::string` class is a very general class that (like +`SmallString`) owns its underlying data. sizeof(std::string) is very reasonable so it can be embedded into heap data structures and returned by-value. On the -other hand, ``std::string`` is highly inefficient for inline editing (e.g. +other hand, `std::string` is highly inefficient for inline editing (e.g. concatenating a bunch of stuff together) and because it is provided by the standard library, its performance characteristics depend a lot of the host standard library (e.g., libc++ and MSVC provide a highly optimized string class, GCC contains a really slow implementation). -The major disadvantage of ``std::string`` is that almost every operation that makes +The major disadvantage of `std::string` is that almost every operation that makes them larger can allocate memory, which is slow. As such, it is better to use -``SmallVector`` or ``Twine`` as a scratch buffer, but then use ``std::string`` to persist +`SmallVector` or `Twine` as a scratch buffer, but then use `std::string` to persist the result. -.. _ds_set: +(ds_set)= -Set-Like Containers (std::set, SmallSet, SetVector, etc) --------------------------------------------------------- +### Set-Like Containers (std::set, SmallSet, SetVector, etc) Set-like containers are useful when you need to canonicalize multiple values into a single representation. There are several different choices for how to do this, providing various trade-offs. -.. _dss_sortedvectorset: +(dss_sortedvectorset)= -A sorted 'vector' -^^^^^^^^^^^^^^^^^ +#### A sorted 'vector' If you intend to insert a lot of elements, then do a lot of queries, a great -approach is to use an ``std::vector`` (or other sequential container) with -``std::sort``+``std::unique`` to remove duplicates. This approach works really well if +approach is to use an `std::vector` (or other sequential container) with +`std::sort`+`std::unique` to remove duplicates. This approach works really well if your usage pattern has these two distinct phases (insert then query), and can be -coupled with a good choice of :ref:`sequential container `. +coupled with a good choice of {ref}`sequential container `. This combination provides the several nice properties: the result data is contiguous in memory (good for cache locality), has few allocations, is easy to address (iterators in the final vector are just indices or pointers), and can be efficiently queried with a standard binary search (e.g. -``std::lower_bound``; if you want the whole range of elements comparing -equal, use ``std::equal_range``). +`std::lower_bound`; if you want the whole range of elements comparing +equal, use `std::equal_range`). -.. _dss_smallset: +(dss_smallset)= -llvm/ADT/SmallSet.h -^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SmallSet.h If you have a set-like data structure that is usually small and whose elements -are reasonably small, a ``SmallSet`` is a good choice. This set has +are reasonably small, a `SmallSet` is a good choice. This set has space for N elements in place (thus, if the set is dynamically smaller than N, no malloc traffic is required) and accesses them with a simple linear search. When the set grows beyond N elements, it allocates a more expensive representation that guarantees efficient access (for most types, it falls back -to :ref:`std::set `, but for pointers it uses something far better, -:ref:`SmallPtrSet `. +to {ref}`std::set `, but for pointers it uses something far better, +{ref}`SmallPtrSet `. The magic of this class is that it handles small sets extremely efficiently, but gracefully handles extremely large sets without loss of efficiency. -.. _dss_smallptrset: +(dss_smallptrset)= -llvm/ADT/SmallPtrSet.h -^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SmallPtrSet.h -``SmallPtrSet`` has all the advantages of ``SmallSet`` (and a ``SmallSet`` of -pointers is transparently implemented with a ``SmallPtrSet``). If more than N +`SmallPtrSet` has all the advantages of `SmallSet` (and a `SmallSet` of +pointers is transparently implemented with a `SmallPtrSet`). If more than N insertions are performed, a single quadratically probed hash table is allocated and grows as needed, providing extremely efficient access (constant time insertion/deleting/queries with low constant factors) and is very stingy with malloc traffic. -Note that, unlike :ref:`std::set `, the iterators of ``SmallPtrSet`` -are invalidated whenever an insertion or erasure occurs. The ``remove_if`` +Note that, unlike {ref}`std::set `, the iterators of `SmallPtrSet` +are invalidated whenever an insertion or erasure occurs. The `remove_if` method can be used to remove elements while iterating over the set. Also, the values visited by the iterators are not visited in sorted order. -.. _dss_stringset: +(dss_stringset)= -llvm/ADT/StringSet.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/StringSet.h -``StringSet`` is a thin wrapper around :ref:`StringMap\ `, +`StringSet` is a thin wrapper around {ref}`StringMap `\, and it allows efficient storage and retrieval of unique strings. -Functionally analogous to ``SmallSet``, ``StringSet`` also supports -iteration. (The iterator dereferences to a ``StringMapEntry``, so you -need to call ``i->getKey()`` to access the item of the StringSet.) On the -other hand, ``StringSet`` doesn't support range-insertion and -copy-construction, which :ref:`SmallSet ` and :ref:`SmallPtrSet -` do support. +Functionally analogous to `SmallSet`, `StringSet` also supports +iteration. (The iterator dereferences to a `StringMapEntry`, so you +need to call `i->getKey()` to access the item of the StringSet.) On the +other hand, `StringSet` doesn't support range-insertion and +copy-construction, which {ref}`SmallSet ` and {ref}`SmallPtrSet ` do support. -.. _dss_denseset: +(dss_denseset)= -llvm/ADT/DenseSet.h -^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/DenseSet.h -``DenseSet`` is a simple linearly probed hash table. It excels at supporting +`DenseSet` is a simple linearly probed hash table. It excels at supporting small values: it uses a single allocation to hold all of the pairs that are -currently inserted in the set. ``DenseSet`` is a great way to unique small values -that are not simple pointers (use :ref:`SmallPtrSet ` for -pointers). Note that ``DenseSet`` has the same requirements for the value type that -:ref:`DenseMap ` has. +currently inserted in the set. `DenseSet` is a great way to unique small values +that are not simple pointers (use {ref}`SmallPtrSet ` for +pointers). Note that `DenseSet` has the same requirements for the value type that +{ref}`DenseMap ` has. -.. _dss_radixtree: +(dss_radixtree)= -llvm/ADT/RadixTree.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/RadixTree.h -``RadixTree`` is a trie-based data structure that stores range-like keys and +`RadixTree` is a trie-based data structure that stores range-like keys and their associated values. It is particularly efficient for storing keys that share common prefixes, as it can compress these prefixes to save memory. It supports efficient search of matching prefixes. -.. _dss_sparseset: +(dss_sparseset)= -llvm/ADT/SparseSet.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SparseSet.h SparseSet holds a small number of objects identified by unsigned keys of moderate size. It uses a lot of memory, but provides operations that are almost @@ -2201,31 +2041,29 @@ SparseSet is useful for algorithms that need very fast clear/find/insert/erase and fast iteration over small sets. It is not intended for building composite data structures. -.. _dss_sparsemultiset: +(dss_sparsemultiset)= -llvm/ADT/SparseMultiSet.h -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SparseMultiSet.h -``SparseMultiSet`` adds multiset behavior to ``SparseSet``, while retaining ``SparseSet``'s -desirable attributes. Like ``SparseSet``, it typically uses a lot of memory, but +`SparseMultiSet` adds multiset behavior to `SparseSet`, while retaining `SparseSet`'s +desirable attributes. Like `SparseSet`, it typically uses a lot of memory, but provides operations that are almost as fast as a vector. Typical keys are physical registers, virtual registers, or numbered basic blocks. -``SparseMultiSet`` is useful for algorithms that need very fast +`SparseMultiSet` is useful for algorithms that need very fast clear/find/insert/erase of the entire collection, and iteration over sets of elements sharing a key. It is often a more efficient choice than using composite data structures (e.g., vector-of-vectors, map-of-vectors). It is not intended for building composite data structures. -.. _dss_FoldingSet: +(dss_FoldingSet)= -llvm/ADT/FoldingSet.h -^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/FoldingSet.h -``FoldingSet`` is an aggregate class that is really good at uniquing +`FoldingSet` is an aggregate class that is really good at uniquing expensive-to-create or polymorphic objects. It is a combination of a chained hash table with intrusive links (uniqued objects are required to inherit from -``FoldingSetNode``) that uses :ref:`SmallVector ` as part of its ID +`FoldingSetNode`) that uses {ref}`SmallVector ` as part of its ID process. Consider a case where you want to implement a "getOrCreateFoo" method for a @@ -2235,25 +2073,24 @@ operands), but we don't want to 'new' a node, then try inserting it into a set only to find out it already exists, at which point we would have to delete it and return the node that already exists. -To support this style of client, ``FoldingSet`` perform a query with a -``FoldingSetNodeID`` (which wraps ``SmallVector``) that can be used to describe the +To support this style of client, `FoldingSet` perform a query with a +`FoldingSetNodeID` (which wraps `SmallVector`) that can be used to describe the element that we want to query for. The query either returns the element matching the ID or it returns an opaque ID that indicates where insertion should take place. Construction of the ID usually does not require heap traffic. -Because ``FoldingSet`` uses intrusive links, it can support polymorphic objects in -the set (for example, you can have ``SDNode`` instances mixed with ``LoadSDNodes``). +Because `FoldingSet` uses intrusive links, it can support polymorphic objects in +the set (for example, you can have `SDNode` instances mixed with `LoadSDNodes`). Because the elements are individually allocated, pointers to the elements are stable: inserting or removing elements does not invalidate any pointers to other elements. -.. _dss_set: +(dss_set)= - -^^^^^ +#### `` -``std::set`` is a reasonable all-around set class, which is decent at many -things but great at nothing. ``std::set`` allocates memory for each element +`std::set` is a reasonable all-around set class, which is decent at many +things but great at nothing. `std::set` allocates memory for each element inserted (thus it is very malloc intensive) and typically stores three pointers per element in the set (thus adding a large amount of per-element space overhead). It offers guaranteed log(n) performance, which is not particularly @@ -2261,53 +2098,51 @@ fast from a complexity standpoint (particularly if the elements of the set are expensive to compare, like strings), and has extremely high constant factors for lookup, insertion and removal. -The advantages of ``std::set`` are that its iterators are stable (deleting or +The advantages of `std::set` are that its iterators are stable (deleting or inserting an element from the set does not affect iterators or pointers to other elements) and that iteration over the set is guaranteed to be in sorted order. If the elements in the set are large, then the relative overhead of the pointers and malloc traffic is not a big deal, but if the elements of the set are small, -``std::set`` is almost never a good choice. +`std::set` is almost never a good choice. -.. _dss_setvector: +(dss_setvector)= -llvm/ADT/SetVector.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/SetVector.h -LLVM's ``SetVector`` is an adapter class that combines your choice of a -set-like container along with a :ref:`Sequential Container ` The +LLVM's `SetVector` is an adapter class that combines your choice of a +set-like container along with a {ref}`Sequential Container ` The important property that this provides is efficient insertion with uniquing (duplicate elements are ignored) with iteration support. It implements this by inserting elements into both a set-like container and the sequential container, using the set-like container for uniquing and the sequential container for iteration. -The difference between ``SetVector`` and other sets is that the order of iteration -is guaranteed to match the order of insertion into the ``SetVector``. This property +The difference between `SetVector` and other sets is that the order of iteration +is guaranteed to match the order of insertion into the `SetVector`. This property is really important for things like sets of pointers. Because pointer values are non-deterministic (e.g., vary across runs of the program on different machines), iterating over the pointers in the set will not be in a well-defined order. -The drawback of ``SetVector`` is that it requires twice as much space as a normal +The drawback of `SetVector` is that it requires twice as much space as a normal set and has the sum of constant factors from the set-like container and the sequential container that it uses. Use it **only** if you need to iterate over -the elements in a deterministic order. ``SetVector`` is also expensive to delete +the elements in a deterministic order. `SetVector` is also expensive to delete elements out of (linear time), unless you use its "pop_back" method, which is faster. -``SetVector`` is an adapter class that defaults to using ``std::vector`` and a -size 16 ``SmallSet`` for the underlying containers, so it is quite expensive. -However, ``"llvm/ADT/SetVector.h"`` also provides a ``SmallSetVector`` class, -which defaults to using a ``SmallVector`` and ``SmallSet`` of a specified size. -If you use this, and if your sets are dynamically smaller than ``N``, you will +`SetVector` is an adapter class that defaults to using `std::vector` and a +size 16 `SmallSet` for the underlying containers, so it is quite expensive. +However, `"llvm/ADT/SetVector.h"` also provides a `SmallSetVector` class, +which defaults to using a `SmallVector` and `SmallSet` of a specified size. +If you use this, and if your sets are dynamically smaller than `N`, you will save a lot of heap traffic. -.. _dss_uniquevector: +(dss_uniquevector)= -llvm/ADT/UniqueVector.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/UniqueVector.h -UniqueVector is similar to :ref:`SetVector ` but it retains a +UniqueVector is similar to {ref}`SetVector ` but it retains a unique ID for each element inserted into the set. It internally contains a map and a vector, and it assigns a unique ID for each value inserted into the set. @@ -2315,63 +2150,57 @@ UniqueVector is very expensive: its cost is the sum of the cost of maintaining both the map and vector, it has high complexity, high constant factors, and produces a lot of malloc traffic. It should be avoided. -.. _dss_immutableset: +(dss_immutableset)= -llvm/ADT/ImmutableSet.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/ImmutableSet.h -``ImmutableSet`` is an immutable (functional) set implementation based on an AVL +`ImmutableSet` is an immutable (functional) set implementation based on an AVL tree. Adding or removing elements is done through a Factory object and results -in the creation of a new ``ImmutableSet`` object. If an ``ImmutableSet`` already exists +in the creation of a new `ImmutableSet` object. If an `ImmutableSet` already exists with the given contents, then the existing one is returned; equality is compared -with a ``FoldingSetNodeID``. The time and space complexity of add or remove +with a `FoldingSetNodeID`. The time and space complexity of add or remove operations is logarithmic in the size of the original set. There is no method for returning an element of the set, you can only check for membership. -.. _dss_otherset: +(dss_otherset)= -Other Set-Like Container Options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Other Set-Like Container Options -The STL provides several other options, such as ``std::multiset`` and -``std::unordered_set``. We never use containers like ``unordered_set`` because +The STL provides several other options, such as `std::multiset` and +`std::unordered_set`. We never use containers like `unordered_set` because they are generally very expensive (each insertion requires a malloc). -``std::multiset`` is useful if you're not interested in elimination of duplicates, -but has all the drawbacks of :ref:`std::set `. A sorted vector +`std::multiset` is useful if you're not interested in elimination of duplicates, +but has all the drawbacks of {ref}`std::set `. A sorted vector (where you don't delete duplicate entries) or some other approach is almost always better. -.. _ds_map: +(ds_map)= -Map-Like Containers (std::map, DenseMap, etc) ---------------------------------------------- +### Map-Like Containers (std::map, DenseMap, etc) Map-like containers are useful when you want to associate data to a key. As usual, there are a lot of different ways to do this. :) -.. _dss_sortedvectormap: +(dss_sortedvectormap)= -A sorted 'vector' -^^^^^^^^^^^^^^^^^ +#### A sorted 'vector' If your usage pattern follows a strict insert-then-query approach, you can -trivially use the same approach as :ref:`sorted vectors for set-like containers -`. The only difference is that your query function (which -uses ``std::lower_bound`` to get efficient log(n) lookup) should only compare the +trivially use the same approach as {ref}`sorted vectors for set-like containers `. The only difference is that your query function (which +uses `std::lower_bound` to get efficient log(n) lookup) should only compare the key, not both the key and value. This yields the same advantages as sorted vectors for sets. -.. _dss_stringmap: +(dss_stringmap)= -llvm/ADT/StringMap.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/StringMap.h Strings are commonly used as keys in maps, and they are difficult to support efficiently: they are variable length, inefficient to hash and compare when -long, expensive to copy, etc. ``StringMap`` is a specialized container designed to +long, expensive to copy, etc. `StringMap` is a specialized container designed to cope with these issues. It supports mapping an arbitrary range of bytes to an arbitrary other object. @@ -2380,33 +2209,32 @@ buckets store a pointer to the heap allocated entries (and some other stuff). The entries in the map must be heap allocated because the strings are variable length. The string data (key) and the element object (value) are stored in the same allocation with the string data immediately after the element object. -This container guarantees the "``(char*)(&Value+1)``" points to the key string +This container guarantees the "`(char*)(&Value+1)`" points to the key string for a value. -The ``StringMap`` is very fast for several reasons: linear probing is very cache +The `StringMap` is very fast for several reasons: linear probing is very cache efficient for lookups, the hash value of strings in buckets is not recomputed -when looking up an element, ``StringMap`` rarely has to touch the memory for +when looking up an element, `StringMap` rarely has to touch the memory for unrelated objects when looking up a value (even when hash collisions happen), hash table growth does not recompute the hash values for strings already in the table, and each pair in the map is store in a single allocation (the string data is stored in the same allocation as the Value of a pair). -``StringMap`` also provides query methods that take byte ranges, so it only ever +`StringMap` also provides query methods that take byte ranges, so it only ever copies a string if a value is inserted into the table. -``StringMap`` iteration order, however, is not guaranteed to be deterministic, so -any uses which require that should instead use a ``std::map``. +`StringMap` iteration order, however, is not guaranteed to be deterministic, so +any uses which require that should instead use a `std::map`. -Like ``DenseMap``, ``StringMap`` iterators are invalidated whenever an insertion -or erasure occurs. To erase matching elements in a single pass, use the -``remove_if`` member instead of erasing while iterating. +Like `DenseMap`, `StringMap` iterators are invalidated whenever an insertion +or erasure occurs. To erase matching elements in a single pass, use the +`remove_if` member instead of erasing while iterating. -.. _dss_indexmap: +(dss_indexmap)= -llvm/ADT/IndexedMap.h -^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/IndexedMap.h -``IndexedMap`` is a specialized container for mapping small dense integers (or +`IndexedMap` is a specialized container for mapping small dense integers (or values that can be mapped to small dense integers) to some other type. It is internally implemented as a vector with a mapping function that maps the keys to the dense integer range. @@ -2415,94 +2243,88 @@ This is useful for cases like virtual registers in the LLVM code generator: they have a dense mapping that is offset by a compile-time constant (the first virtual register ID). -.. _dss_densemap: +(dss_densemap)= -llvm/ADT/DenseMap.h -^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/DenseMap.h -``DenseMap`` is a simple linearly probed hash table. It excels at supporting +`DenseMap` is a simple linearly probed hash table. It excels at supporting small keys and values: it uses a single allocation to hold all of the pairs -that are currently inserted in the map. ``DenseMap`` is a great way to map +that are currently inserted in the map. `DenseMap` is a great way to map pointers to pointers, or map other small types to each other. -There are several aspects of ``DenseMap`` that you should be aware of, however. -The iterators in a ``DenseMap`` are invalidated whenever an insertion or -erasure occurs, unlike ``map``. Also, because ``DenseMap`` allocates space for +There are several aspects of `DenseMap` that you should be aware of, however. +The iterators in a `DenseMap` are invalidated whenever an insertion or +erasure occurs, unlike `map`. Also, because `DenseMap` allocates space for a large number of key/value pairs (it starts with 64 by default), it will waste a lot of space if your keys or values are large. Finally, you must implement a -partial specialization of ``DenseMapInfo`` for the key that you want, if it -isn't already supported. This is required to tell ``DenseMap`` about two +partial specialization of `DenseMapInfo` for the key that you want, if it +isn't already supported. This is required to tell `DenseMap` about two special marker values (which can never be inserted into the map) that it needs internally. -``DenseMap``'s ``find_as()`` method supports lookup operations using an alternate key +`DenseMap`'s `find_as()` method supports lookup operations using an alternate key type. This is useful in cases where the normal key type is expensive to -construct, but cheap to compare against. The ``DenseMapInfo`` is responsible for +construct, but cheap to compare against. The `DenseMapInfo` is responsible for defining the appropriate comparison and hashing methods for each alternate key type used. -``DenseMap.h`` also contains a ``SmallDenseMap`` variant, that similar to -:ref:`SmallVector ` performs no heap allocation until the +`DenseMap.h` also contains a `SmallDenseMap` variant, that similar to +{ref}`SmallVector ` performs no heap allocation until the number of elements in the template parameter N are exceeded. -.. _dss_valuemap: +(dss_valuemap)= -llvm/IR/ValueMap.h -^^^^^^^^^^^^^^^^^^^ +#### llvm/IR/ValueMap.h -ValueMap is a wrapper around a :ref:`DenseMap ` mapping -``Value*``\ s (or subclasses) to another type. When a Value is deleted or -RAUW'ed, ``ValueMap`` will update itself so the new version of the key is mapped to +ValueMap is a wrapper around a {ref}`DenseMap ` mapping +`Value*`s (or subclasses) to another type. When a Value is deleted or +RAUW'ed, `ValueMap` will update itself so the new version of the key is mapped to the same value, just as if the key were a WeakVH. You can configure exactly how -this happens, and what else happens on these two events, by passing a ``Config`` -parameter to the ``ValueMap`` template. +this happens, and what else happens on these two events, by passing a `Config` +parameter to the `ValueMap` template. -.. _dss_intervalmap: +(dss_intervalmap)= -llvm/ADT/IntervalMap.h -^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/IntervalMap.h -``IntervalMap`` is a compact map for small keys and values. It maps key intervals +`IntervalMap` is a compact map for small keys and values. It maps key intervals instead of single keys, and it will automatically coalesce adjacent intervals. When the map only contains a few intervals, they are stored in the map object itself to avoid allocations. -The ``IntervalMap`` iterators are quite big, so they should not be passed around as +The `IntervalMap` iterators are quite big, so they should not be passed around as STL iterators. The heavyweight iterators allow a smaller data structure. -.. _dss_intervaltree: +(dss_intervaltree)= -llvm/ADT/IntervalTree.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/IntervalTree.h -``llvm::IntervalTree`` is a light tree data structure to hold intervals. It +`llvm::IntervalTree` is a light tree data structure to hold intervals. It allows finding all intervals that overlap with any given point. At this time, it does not support any deletion or rebalancing operations. -The ``IntervalTree`` is designed to be set up once, and then queried without any +The `IntervalTree` is designed to be set up once, and then queried without any further additions. -.. _dss_map: +(dss_map)= - -^^^^^ +#### `` -``std::map`` has similar characteristics to :ref:`std::set `: it uses a +`std::map` has similar characteristics to {ref}`std::set `: it uses a single allocation per pair inserted into the map, it offers log(n) lookup with an extremely large constant factor, imposes a space penalty of 3 pointers per pair in the map, etc. -``std::map`` is most useful when your keys or values are very large, if you need to +`std::map` is most useful when your keys or values are very large, if you need to iterate over the collection in sorted order, or if you need stable iterators into the map (i.e., they don't get invalidated if an insertion or deletion of another element takes place). -.. _dss_mapvector: +(dss_mapvector)= -llvm/ADT/MapVector.h -^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/MapVector.h -``MapVector`` provides a subset of the ``DenseMap`` interface. The +`MapVector` provides a subset of the `DenseMap` interface. The main difference is that the iteration order is guaranteed to be the insertion order, making it an easy (but somewhat expensive) solution for non-deterministic iteration over maps of pointers. @@ -2511,17 +2333,16 @@ It is implemented by mapping from key to an index in a vector of key,value pairs. This provides fast lookup and iteration, but has two main drawbacks: the key is stored twice and removing elements takes linear time. If it is necessary to remove elements, it's best to remove them in bulk using -``remove_if()``. +`remove_if()`. -.. _dss_inteqclasses: +(dss_inteqclasses)= -llvm/ADT/IntEqClasses.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/IntEqClasses.h -``IntEqClasses`` provides a compact representation of equivalence classes of small +`IntEqClasses` provides a compact representation of equivalence classes of small integers. Initially, each integer in the range 0..n-1 has its own equivalence class. Classes can be joined by passing two class representatives to the -``join(a, b)`` method. Two integers are in the same class when ``findLeader()`` returns +`join(a, b)` method. Two integers are in the same class when `findLeader()` returns the same representative. Once all equivalence classes are formed, the map can be compressed so each @@ -2529,61 +2350,56 @@ integer 0..n-1 maps to an equivalence class number in the range 0..m-1, where m is the total number of equivalence classes. The map must be uncompressed before it can be edited again. -.. _dss_immutablemap: +(dss_immutablemap)= -llvm/ADT/ImmutableMap.h -^^^^^^^^^^^^^^^^^^^^^^^ +#### llvm/ADT/ImmutableMap.h -``ImmutableMap`` is an immutable (functional) map implementation based on an AVL +`ImmutableMap` is an immutable (functional) map implementation based on an AVL tree. Adding or removing elements is done through a Factory object and results -in the creation of a new ``ImmutableMap`` object. If an ``ImmutableMap`` already exists +in the creation of a new `ImmutableMap` object. If an `ImmutableMap` already exists with the given key set, then the existing one is returned; equality is compared -with a ``FoldingSetNodeID``. The time and space complexity of add or remove +with a `FoldingSetNodeID`. The time and space complexity of add or remove operations is logarithmic in the size of the original map. -.. _dss_othermap: +(dss_othermap)= -Other Map-Like Container Options -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Other Map-Like Container Options -The STL provides several other options, such as ``std::multimap`` and -``std::unordered_map``. We never use containers like ``unordered_map`` because +The STL provides several other options, such as `std::multimap` and +`std::unordered_map`. We never use containers like `unordered_map` because they are generally very expensive (each insertion requires a malloc). -``std::multimap`` is useful if you want to map a key to multiple values, but has all -the drawbacks of ``std::map``. A sorted vector or some other approach is almost +`std::multimap` is useful if you want to map a key to multiple values, but has all +the drawbacks of `std::map`. A sorted vector or some other approach is almost always better. -.. _ds_bit: +(ds_bit)= -Bit storage containers ------------------------------------------------------------------------- +### Bit storage containers There are several bit storage containers, and choosing when to use each is relatively straightforward. -One additional option is ``std::vector``: we discourage its use for two +One additional option is `std::vector`: we discourage its use for two reasons 1) the implementation in many common compilers (e.g., commonly available versions of GCC) is extremely inefficient and 2) the C++ standards committee is likely to deprecate this container and/or change it significantly somehow. In any case, please don't use it. -.. _dss_bitvector: +(dss_bitvector)= -BitVector -^^^^^^^^^ +#### BitVector -The ``BitVector`` container provides a dynamic size set of bits for manipulation. +The `BitVector` container provides a dynamic size set of bits for manipulation. It supports individual bit setting/testing, as well as set operations. The set operations take time O(size of bitvector), but operations are performed one word -at a time, instead of one bit at a time. This makes the ``BitVector`` very fast for -set operations compared to other containers. Use the ``BitVector`` when you expect +at a time, instead of one bit at a time. This makes the `BitVector` very fast for +set operations compared to other containers. Use the `BitVector` when you expect the number of set bits to be high (i.e., a dense set). -.. _dss_smallbitvector: +(dss_smallbitvector)= -SmallBitVector -^^^^^^^^^^^^^^ +#### SmallBitVector The SmallBitVector container provides the same interface as BitVector, but it is optimized for the case where only a small number of bits, less than 25 or so, @@ -2594,164 +2410,150 @@ larger counts are rare. At this time, SmallBitVector does not support set operations (and, or, xor), and its operator[] does not provide an assignable lvalue. -.. _dss_sparsebitvector: +(dss_sparsebitvector)= -SparseBitVector -^^^^^^^^^^^^^^^ +#### SparseBitVector -The ``SparseBitVector`` container is much like ``BitVector``, with one major difference: -Only the bits that are set, are stored. This makes the ``SparseBitVector`` much -more space efficient than ``BitVector`` when the set is sparse, as well as making +The `SparseBitVector` container is much like `BitVector`, with one major difference: +Only the bits that are set, are stored. This makes the `SparseBitVector` much +more space efficient than `BitVector` when the set is sparse, as well as making set operations O(number of set bits) instead of O(size of universe). The -downside to the ``SparseBitVector`` is that setting and testing of random bits is -O(N), and on large ``SparseBitVectors``, this can be slower than ``BitVector``. In our +downside to the `SparseBitVector` is that setting and testing of random bits is +O(N), and on large `SparseBitVectors`, this can be slower than `BitVector`. In our implementation, setting or testing bits in sorted order (either forwards or reverse) is O(1) worst case. Testing and setting bits within 128 bits (depends on size) of the current bit is also O(1). As a general statement, -testing/setting bits in a ``SparseBitVector`` is O(distance away from last set bit). +testing/setting bits in a `SparseBitVector` is O(distance away from last set bit). -.. _dss_coalescingbitvector: +(dss_coalescingbitvector)= -CoalescingBitVector -^^^^^^^^^^^^^^^^^^^ +#### CoalescingBitVector -The ``CoalescingBitVector`` container is similar in principle to a ``SparseBitVector``, +The `CoalescingBitVector` container is similar in principle to a `SparseBitVector`, but is optimized to represent large contiguous ranges of set bits compactly. It does this by coalescing contiguous ranges of set bits into intervals. Searching -for a bit in a ``CoalescingBitVector`` is O(log(gaps between contiguous ranges)). +for a bit in a `CoalescingBitVector` is O(log(gaps between contiguous ranges)). -``CoalescingBitVector`` is a better choice than ``BitVector`` when gaps between ranges -of set bits are large. It's a better choice than ``SparseBitVector`` when find() +`CoalescingBitVector` is a better choice than `BitVector` when gaps between ranges +of set bits are large. It's a better choice than `SparseBitVector` when find() operations must have fast, predictable performance. However, it's not a good choice for representing sets which have lots of very short ranges. E.g. the set `{2*x : x \in [0, n)}` would be a pathological input. -.. _utility_functions: +(utility_functions)= -Useful Utility Functions -======================== +## Useful Utility Functions LLVM implements a number of general utility functions used across the -codebase. You can find the most common ones in ``STLExtras.h`` -(`doxygen `__). Some of these wrap +codebase. You can find the most common ones in `STLExtras.h` +([doxygen](https://llvm.org/doxygen/STLExtras_8h.html)). Some of these wrap well-known C++ standard library functions, while others are unique to LLVM. -.. _uf_iteration: +(uf_iteration)= -Iterating over ranges ---------------------- +### Iterating over ranges Sometimes you may want to iterate over more than range at a time or know the index of the index. LLVM provides custom utility functions to make that easier, without having to manually manage all iterators and/or indices: -.. _uf_zip: +(uf_zip)= -The ``zip``\ * functions -^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `zip`* functions -``zip``\ * functions allow for iterating over elements from two or more ranges +`zip`* functions allow for iterating over elements from two or more ranges at the same time. For example: -.. code-block:: c++ - - SmallVector Counts = ...; - char Letters[26] = ...; - for (auto [Letter, Count] : zip_equal(Letters, Counts)) - errs() << Letter << ": " << Count << "\n"; - +```cpp +SmallVector Counts = ...; +char Letters[26] = ...; +for (auto [Letter, Count] : zip_equal(Letters, Counts)) + errs() << Letter << ": " << Count << "\n"; +``` Note that the elements are provided through a 'reference wrapper' proxy type (tuple of references), which combined with the structured bindings declaration -makes ``Letter`` and ``Count`` references to range elements. Any modification -to these references will affect the elements of ``Letters`` or ``Counts``. - -The ``zip``\ * functions support temporary ranges, for example: +makes `Letter` and `Count` references to range elements. Any modification +to these references will affect the elements of `Letters` or `Counts`. -.. code-block:: c++ +The `zip`* functions support temporary ranges, for example: - for (auto [Letter, Count] : zip(SmallVector{'a', 'b', 'c'}, Counts)) - errs() << Letter << ": " << Count << "\n"; - -The difference between the functions in the ``zip`` family is how they behave +```cpp +for (auto [Letter, Count] : zip(SmallVector{'a', 'b', 'c'}, Counts)) + errs() << Letter << ": " << Count << "\n"; +``` +The difference between the functions in the `zip` family is how they behave when the supplied ranges have different lengths: -* ``zip_equal`` -- requires all input ranges have the same length. -* ``zip`` -- iteration stops when the end of the shortest range is reached. -* ``zip_first`` -- requires the first range is the shortest one. -* ``zip_longest`` -- iteration continues until the end of the longest range is +* `zip_equal` -- requires all input ranges have the same length. +* `zip` -- iteration stops when the end of the shortest range is reached. +* `zip_first` -- requires the first range is the shortest one. +* `zip_longest` -- iteration continues until the end of the longest range is reached. The non-existent elements of shorter ranges are replaced with - ``std::nullopt``. + `std::nullopt`. -The length requirements are checked with ``assert``\ s. +The length requirements are checked with `assert`s. -As a rule of thumb, prefer to use ``zip_equal`` when you expect all -ranges to have the same lengths, and consider alternative ``zip`` functions only -when this is not the case. This is because ``zip_equal`` clearly communicates +As a rule of thumb, prefer to use `zip_equal` when you expect all +ranges to have the same lengths, and consider alternative `zip` functions only +when this is not the case. This is because `zip_equal` clearly communicates this same-length assumption and has the best (release-mode) runtime performance. -.. _uf_enumerate: +(uf_enumerate)= -``enumerate`` -^^^^^^^^^^^^^ +#### `enumerate` -The ``enumerate`` functions allows to iterate over one or more ranges while +The `enumerate` functions allows to iterate over one or more ranges while keeping track of the index of the current loop iteration. For example: -.. code-block:: c++ - - for (auto [Idx, BB, Value] : enumerate(Phi->blocks(), - Phi->incoming_values())) - errs() << "#" << Idx << " " << BB->getName() << ": " << *Value << "\n"; - +```cpp +for (auto [Idx, BB, Value] : enumerate(Phi->blocks(), + Phi->incoming_values())) + errs() << "#" << Idx << " " << BB->getName() << ": " << *Value << "\n"; +``` The current element index is provided as the first structured bindings element. Alternatively, the index and the element value can be obtained with the -``index()`` and ``value()`` member functions: - -.. code-block:: c++ - - char Letters[26] = ...; - for (auto En : enumerate(Letters)) - errs() << "#" << En.index() << " " << En.value() << "\n"; - -Note that ``enumerate`` has ``zip_equal`` semantics and provides elements +`index()` and `value()` member functions: + +```cpp +char Letters[26] = ...; +for (auto En : enumerate(Letters)) + errs() << "#" << En.index() << " " << En.value() << "\n"; +``` +Note that `enumerate` has `zip_equal` semantics and provides elements through a 'reference wrapper' proxy, which makes them modifiable when accessed -through structured bindings or the ``value()`` member function. When two or more -ranges are passed, ``enumerate`` requires them to have equal lengths (checked -with an ``assert``). +through structured bindings or the `value()` member function. When two or more +ranges are passed, `enumerate` requires them to have equal lengths (checked +with an `assert`). -.. _debugging: +(debugging)= -Debugging -========= +## Debugging -See :doc:`Debugging LLVM `. +See {doc}`Debugging LLVM `. -.. _common: +(common)= -Helpful Hints for Common Operations -=================================== +## Helpful Hints for Common Operations This section describes how to perform some very simple transformations of LLVM code. This is meant to give examples of common idioms used, showing the practical side of LLVM transformations. Because this is a "how-to" section, you should also read about the main classes -that you will be working with. The :ref:`Core LLVM Class Hierarchy Reference -` contains details and descriptions of the main classes that you +that you will be working with. The {ref}`Core LLVM Class Hierarchy Reference ` contains details and descriptions of the main classes that you should know about. -.. _inspection: +(inspection)= -Basic Inspection and Traversal Routines ---------------------------------------- +### Basic Inspection and Traversal Routines The LLVM compiler infrastructure have many different data structures that may be traversed. Following the example of the C++ standard template library, the techniques used to traverse these various data structures are all basically the -same. For an enumerable sequence of values, the ``XXXbegin()`` function (or -method) returns an iterator to the start of the sequence, the ``XXXend()`` +same. For an enumerable sequence of values, the `XXXbegin()` function (or +method) returns an iterator to the start of the sequence, the `XXXend()` function returns an iterator pointing to one past the last valid element of the -sequence, and there is some ``XXXiterator`` data type that is common between the +sequence, and there is some `XXXiterator` data type that is common between the two operations. Because the pattern for iteration is common across many different aspects of the @@ -2760,265 +2562,239 @@ them, and it is easier to remember how to iterate. First we show a few common examples of the data structures that need to be traversed. Other data structures are traversed in very similar ways. -.. _iterate_function: - -Iterating over the ``BasicBlock`` in a ``Function`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -It's quite common to have a ``Function`` instance that you'd like to transform -in some way; in particular, you'd like to manipulate its ``BasicBlock``\ s. To -facilitate this, you'll need to iterate over all of the ``BasicBlock``\ s that -constitute the ``Function``. The following is an example that prints the name -of a ``BasicBlock`` and the number of ``Instruction``\ s it contains: - -.. code-block:: c++ - - Function &Func = ... - for (BasicBlock &BB : Func) - // Print out the name of the basic block if it has one, and then the - // number of instructions that it contains - errs() << "Basic block (name=" << BB.getName() << ") has " - << BB.size() << " instructions.\n"; - -.. _iterate_basicblock: - -Iterating over the ``Instruction`` in a ``BasicBlock`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Just like when dealing with ``BasicBlock``\ s in ``Function``\ s, it's easy to -iterate over the individual instructions that make up ``BasicBlock``\ s. Here's -a code snippet that prints out each instruction in a ``BasicBlock``: - -.. code-block:: c++ - - BasicBlock& BB = ... - for (Instruction &I : BB) - // The next statement works since operator<<(ostream&,...) - // is overloaded for Instruction& - errs() << I << "\n"; - - +(iterate_function)= + +#### Iterating over the `BasicBlock` in a `Function` + +It's quite common to have a `Function` instance that you'd like to transform +in some way; in particular, you'd like to manipulate its `BasicBlock`s. To +facilitate this, you'll need to iterate over all of the `BasicBlock`s that +constitute the `Function`. The following is an example that prints the name +of a `BasicBlock` and the number of `Instruction`s it contains: + +```cpp +Function &Func = ... +for (BasicBlock &BB : Func) + // Print out the name of the basic block if it has one, and then the + // number of instructions that it contains + errs() << "Basic block (name=" << BB.getName() << ") has " + << BB.size() << " instructions.\n"; +``` +(iterate_basicblock)= + +#### Iterating over the `Instruction` in a `BasicBlock` + +Just like when dealing with `BasicBlock`s in `Function`s, it's easy to +iterate over the individual instructions that make up `BasicBlock`s. Here's +a code snippet that prints out each instruction in a `BasicBlock`: + +```cpp +BasicBlock& BB = ... +for (Instruction &I : BB) + // The next statement works since operator<<(ostream&,...) + // is overloaded for Instruction& + errs() << I << "\n"; +``` However, this isn't really the best way to print out the contents of a -``BasicBlock``! Since the ostream operators are overloaded for virtually +`BasicBlock`! Since the ostream operators are overloaded for virtually anything you'll care about, you could have just invoked the print routine on the -basic block itself: ``errs() << BB << "\n";``. +basic block itself: `errs() << BB << "\n";`. -.. _iterate_insiter: +(iterate_insiter)= -Iterating over the ``Instruction`` in a ``Function`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Iterating over the `Instruction` in a `Function` -If you're finding that you commonly iterate over a ``Function``'s -``BasicBlock``\ s and then that ``BasicBlock``'s ``Instruction``\ s, -``InstIterator`` should be used instead. You'll need to include -``llvm/IR/InstIterator.h`` (`doxygen -`__) and then instantiate -``InstIterator``\ s explicitly in your code. Here's a small example that shows +If you're finding that you commonly iterate over a `Function`'s +`BasicBlock`s and then that `BasicBlock`'s `Instruction`s, +`InstIterator` should be used instead. You'll need to include +`llvm/IR/InstIterator.h` ([doxygen](https://llvm.org/doxygen/InstIterator_8h.html)) and then instantiate +`InstIterator`s explicitly in your code. Here's a small example that shows how to dump all instructions in a function to the standard error stream: -.. code-block:: c++ - - #include "llvm/IR/InstIterator.h" - - // F is a pointer to a Function instance - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - errs() << *I << "\n"; +```cpp +#include "llvm/IR/InstIterator.h" -Easy, isn't it? You can also use ``InstIterator``\ s to fill a work list with +// F is a pointer to a Function instance +for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + errs() << *I << "\n"; +``` +Easy, isn't it? You can also use `InstIterator`s to fill a work list with its initial contents. For example, if you wanted to initialize a work list to -contain all instructions in a ``Function`` F, all you would need to do is +contain all instructions in a `Function` F, all you would need to do is something like: -.. code-block:: c++ +```cpp +std::set worklist; +// or better yet, SmallPtrSet worklist; - std::set worklist; - // or better yet, SmallPtrSet worklist; - - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) - worklist.insert(&*I); - -The STL set ``worklist`` would now contain all instructions in the ``Function`` +for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) + worklist.insert(&*I); +``` +The STL set `worklist` would now contain all instructions in the `Function` pointed to by F. -.. _iterate_convert: +(iterate_convert)= -Turning an iterator into a class pointer (and vice-versa) -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Turning an iterator into a class pointer (and vice-versa) Sometimes, it'll be useful to grab a reference (or pointer) to a class instance when all you've got at hand is an iterator. Well, extracting a reference or a -pointer from an iterator is very straightforward. Assuming that ``i`` is a -``BasicBlock::iterator`` and ``j`` is a ``BasicBlock::const_iterator``: - -.. code-block:: c++ - - Instruction& inst = *i; // Grab reference to instruction reference - Instruction* pinst = &*i; // Grab pointer to instruction reference - const Instruction& inst = *j; - +pointer from an iterator is very straightforward. Assuming that `i` is a +`BasicBlock::iterator` and `j` is a `BasicBlock::const_iterator`: + +```cpp +Instruction& inst = *i; // Grab reference to instruction reference +Instruction* pinst = &*i; // Grab pointer to instruction reference +const Instruction& inst = *j; +``` It's also possible to turn a class pointer into the corresponding iterator, and this is a constant time operation (very efficient). The following code snippet illustrates use of the conversion constructors provided by LLVM iterators. By using these, you can explicitly grab the iterator of something without actually obtaining it via iteration over some structure: -.. code-block:: c++ +```cpp +void printNextInstruction(Instruction* inst) { + BasicBlock::iterator it(inst); + ++it; // After this line, it refers to the instruction after *inst + if (it != inst->getParent()->end()) errs() << *it << "\n"; +} +``` +(iterate_complex)= - void printNextInstruction(Instruction* inst) { - BasicBlock::iterator it(inst); - ++it; // After this line, it refers to the instruction after *inst - if (it != inst->getParent()->end()) errs() << *it << "\n"; - } - -.. _iterate_complex: - -Finding call sites: a slightly more complex example -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Finding call sites: a slightly more complex example Say that you're writing a FunctionPass and would like to count all the locations -in the entire module (that is, across every ``Function``) where a certain -function (i.e., some ``Function *``) is already in scope. As you'll learn -later, you may want to use an ``InstVisitor`` to accomplish this in a much more +in the entire module (that is, across every `Function`) where a certain +function (i.e., some `Function *`) is already in scope. As you'll learn +later, you may want to use an `InstVisitor` to accomplish this in a much more straightforward manner, but this example will allow us to explore how you'd do -it if you didn't have ``InstVisitor`` around. In pseudo-code, this is what we +it if you didn't have `InstVisitor` around. In pseudo-code, this is what we want to do: -.. code-block:: none - - initialize callCounter to zero - for each Function f in the Module - for each BasicBlock b in f - for each Instruction i in b - if (i a Call and calls the given function) - increment callCounter - -And the actual code is (remember, because we're writing a ``FunctionPass``, our -``FunctionPass``-derived class simply has to override the ``runOnFunction`` +```none +initialize callCounter to zero +for each Function f in the Module + for each BasicBlock b in f + for each Instruction i in b + if (i a Call and calls the given function) + increment callCounter +``` +And the actual code is (remember, because we're writing a `FunctionPass`, our +`FunctionPass`-derived class simply has to override the `runOnFunction` method): -.. code-block:: c++ - - Function* targetFunc = ...; +```cpp +Function* targetFunc = ...; - class OurFunctionPass : public FunctionPass { - public: - OurFunctionPass(): callCounter(0) { } - - virtual runOnFunction(Function& F) { - for (BasicBlock &B : F) { - for (Instruction &I: B) { - if (auto *CB = dyn_cast(&I)) { - // We know we've encountered some kind of call instruction (call, - // invoke, or callbr), so we need to determine if it's a call to - // the function pointed to by m_func or not. - if (CB->getCalledFunction() == targetFunc) - ++callCounter; - } +class OurFunctionPass : public FunctionPass { + public: + OurFunctionPass(): callCounter(0) { } + + virtual runOnFunction(Function& F) { + for (BasicBlock &B : F) { + for (Instruction &I: B) { + if (auto *CB = dyn_cast(&I)) { + // We know we've encountered some kind of call instruction (call, + // invoke, or callbr), so we need to determine if it's a call to + // the function pointed to by m_func or not. + if (CB->getCalledFunction() == targetFunc) + ++callCounter; } } } - - private: - unsigned callCounter; - }; - -.. _iterate_chains: - -Iterating over def-use & use-def chains -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -Frequently, we might have an instance of the ``Value`` class (`doxygen -`__) and we want to determine -which ``User``\ s use the ``Value``. The list of all ``User``\ s of a particular -``Value`` is called a *def-use* chain. For example, let's say we have a -``Function*`` named ``F`` to a particular function ``foo``. Finding all of the -instructions that *use* ``foo`` is as simple as iterating over the *def-use* -chain of ``F``: - -.. code-block:: c++ - - Function *F = ...; - - for (User *U : F->users()) { - if (Instruction *Inst = dyn_cast(U)) { - errs() << "F is used in instruction:\n"; - errs() << *Inst << "\n"; } -Alternatively, it's common to have an instance of the ``User`` Class (`doxygen -`__) and need to know what -``Value``\ s are used by it. The list of all ``Value``\ s used by a ``User`` is -known as a *use-def* chain. Instances of class ``Instruction`` are common -``User`` s, so we might want to iterate over all of the values that a particular -instruction uses (that is, the operands of the particular ``Instruction``): - -.. code-block:: c++ - - Instruction *pi = ...; - - for (Use &U : pi->operands()) { - Value *v = U.get(); - // ... + private: + unsigned callCounter; +}; +``` +(iterate_chains)= + +#### Iterating over def-use & use-def chains + +Frequently, we might have an instance of the `Value` class ([doxygen](https://llvm.org/doxygen/classllvm_1_1Value.html)) and we want to determine +which `User`s use the `Value`. The list of all `User`s of a particular +`Value` is called a *def-use* chain. For example, let's say we have a +`Function*` named `F` to a particular function `foo`. Finding all of the +instructions that *use* `foo` is as simple as iterating over the *def-use* +chain of `F`: + +```cpp +Function *F = ...; + +for (User *U : F->users()) { + if (Instruction *Inst = dyn_cast(U)) { + errs() << "F is used in instruction:\n"; + errs() << *Inst << "\n"; } - -Declaring objects as ``const`` is an important tool of enforcing mutation free +``` +Alternatively, it's common to have an instance of the `User` Class ([doxygen](https://llvm.org/doxygen/classllvm_1_1User.html)) and need to know what +`Value`s are used by it. The list of all `Value`s used by a `User` is +known as a *use-def* chain. Instances of class `Instruction` are common +`User` s, so we might want to iterate over all of the values that a particular +instruction uses (that is, the operands of the particular `Instruction`): + +```cpp +Instruction *pi = ...; + +for (Use &U : pi->operands()) { + Value *v = U.get(); + // ... +} +``` +Declaring objects as `const` is an important tool of enforcing mutation free algorithms (such as analyses, etc.). For this purpose above iterators come in -constant flavors as ``Value::const_use_iterator`` and -``Value::const_op_iterator``. They automatically arise when calling -``use/op_begin()`` on ``const Value*``\ s or ``const User*``\ s respectively. -Upon dereferencing, they return ``const Use*``\ s. Otherwise the above patterns +constant flavors as `Value::const_use_iterator` and +`Value::const_op_iterator`. They automatically arise when calling +`use/op_begin()` on `const Value*`s or `const User*`s respectively. +Upon dereferencing, they return `const Use*`s. Otherwise the above patterns remain unchanged. -.. _iterate_preds: +(iterate_preds)= -Iterating over predecessors & successors of blocks -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Iterating over predecessors & successors of blocks Iterating over the predecessors and successors of a block is quite easy with the -routines defined in ``"llvm/IR/CFG.h"``. Just use code like this to +routines defined in `"llvm/IR/CFG.h"`. Just use code like this to iterate over all predecessors of BB: -.. code-block:: c++ +```cpp +#include "llvm/IR/CFG.h" +BasicBlock *BB = ...; - #include "llvm/IR/CFG.h" - BasicBlock *BB = ...; +for (BasicBlock *Pred : predecessors(BB)) { + // ... +} +``` +Similarly, to iterate over successors use `successors`. - for (BasicBlock *Pred : predecessors(BB)) { - // ... - } - -Similarly, to iterate over successors use ``successors``. +(simplechanges)= -.. _simplechanges: - -Making simple changes ---------------------- +### Making simple changes There are some primitive transformation operations present in the LLVM infrastructure that are worth knowing about. When performing transformations, it's fairly common to manipulate the contents of basic blocks. This section describes some of the common methods for doing so and gives example code. -.. _schanges_creating: +(schanges_creating)= -Creating and inserting new ``Instruction``\ s -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Creating and inserting new `Instruction`s *Instantiating Instructions* -Creation of ``Instruction``\ s is straightforward: simply call the constructor +Creation of `Instruction`s is straightforward: simply call the constructor for the kind of instruction to instantiate and provide the necessary parameters. -For example, an ``AllocaInst`` only *requires* a (const-ptr-to) ``Type``. Thus: - -.. code-block:: c++ - - auto *ai = new AllocaInst(Type::Int32Ty); +For example, an `AllocaInst` only *requires* a (const-ptr-to) `Type`. Thus: -will create an ``AllocaInst`` instance that represents the allocation of one -integer in the current stack frame, at run time. Each ``Instruction`` subclass +```cpp +auto *ai = new AllocaInst(Type::Int32Ty); +``` +will create an `AllocaInst` instance that represents the allocation of one +integer in the current stack frame, at run time. Each `Instruction` subclass is likely to have varying default parameters which change the semantics of the -instruction, so refer to the `doxygen documentation for the subclass of -Instruction `_ that +instruction, so refer to the [doxygen documentation for the subclass of Instruction](https://llvm.org/doxygen/classllvm_1_1Instruction.html) that you're interested in instantiating. *Naming values* @@ -3027,194 +2803,173 @@ It is very useful to name the values of instructions when you're able to, as this facilitates the debugging of your transformations. If you end up looking at generated LLVM machine code, you definitely want to have logical names associated with the results of instructions! By supplying a value for the -``Name`` (default) parameter of the ``Instruction`` constructor, you associate a +`Name` (default) parameter of the `Instruction` constructor, you associate a logical name with the result of the instruction's execution at run time. For example, say that I'm writing a transformation that dynamically allocates space for an integer on the stack, and that integer is going to be used as some kind -of index by some other code. To accomplish this, I place an ``AllocaInst`` at -the first point in the first ``BasicBlock`` of some ``Function``, and I'm -intending to use it within the same ``Function``. I might do: - -.. code-block:: c++ - - auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc"); - -where ``indexLoc`` is now the logical name of the instruction's execution value, +of index by some other code. To accomplish this, I place an `AllocaInst` at +the first point in the first `BasicBlock` of some `Function`, and I'm +intending to use it within the same `Function`. I might do: + +```cpp +auto *pa = new AllocaInst(Type::Int32Ty, 0, "indexLoc"); +``` +where `indexLoc` is now the logical name of the instruction's execution value, which is a pointer to an integer on the run time stack. *Inserting instructions* -There are essentially three ways to insert an ``Instruction`` into an existing -sequence of instructions that form a ``BasicBlock``: +There are essentially three ways to insert an `Instruction` into an existing +sequence of instructions that form a `BasicBlock`: -* Insertion into the instruction list of the ``BasicBlock`` +* Insertion into the instruction list of the `BasicBlock` - Given a ``BasicBlock* pb``, an ``Instruction* pi`` within that ``BasicBlock``, - and a newly-created instruction we wish to insert before ``*pi``, we do the + Given a `BasicBlock* pb`, an `Instruction* pi` within that `BasicBlock`, + and a newly-created instruction we wish to insert before `*pi`, we do the following: - .. code-block:: c++ - - BasicBlock *pb = ...; - Instruction *pi = ...; - auto *newInst = new Instruction(...); - - newInst->insertBefore(pi); // Inserts newInst before pi + ```cpp + BasicBlock *pb = ...; + Instruction *pi = ...; + auto *newInst = new Instruction(...); - Appending to the end of a ``BasicBlock`` is so common that the ``Instruction`` - class and ``Instruction``-derived classes provide constructors which take a - pointer to a ``BasicBlock`` to be appended to. For example code that looked + newInst->insertBefore(pi); // Inserts newInst before pi + ``` + Appending to the end of a `BasicBlock` is so common that the `Instruction` + class and `Instruction`-derived classes provide constructors which take a + pointer to a `BasicBlock` to be appended to. For example code that looked like: - .. code-block:: c++ - - BasicBlock *pb = ...; - auto *newInst = new Instruction(...); - - newInst->insertInto(pb, pb->end()); // Appends newInst to pb + ```cpp + BasicBlock *pb = ...; + auto *newInst = new Instruction(...); + newInst->insertInto(pb, pb->end()); // Appends newInst to pb + ``` becomes: - .. code-block:: c++ - - BasicBlock *pb = ...; - auto *newInst = new Instruction(..., pb); - + ```cpp + BasicBlock *pb = ...; + auto *newInst = new Instruction(..., pb); + ``` which is much cleaner, especially if you are creating long instruction streams. -* Insertion using an instance of ``IRBuilder`` +* Insertion using an instance of `IRBuilder` - Inserting several ``Instruction``\ s can be quite laborious using the previous - methods. The ``IRBuilder`` is a convenience class that can be used to add - several instructions to the end of a ``BasicBlock`` or before a particular - ``Instruction``. It also supports constant folding and renaming named - registers (see ``IRBuilder``'s template arguments). + Inserting several `Instruction`s can be quite laborious using the previous + methods. The `IRBuilder` is a convenience class that can be used to add + several instructions to the end of a `BasicBlock` or before a particular + `Instruction`. It also supports constant folding and renaming named + registers (see `IRBuilder`'s template arguments). - The example below demonstrates a very simple use of the ``IRBuilder`` where - three instructions are inserted before the instruction ``pi``. The first two + The example below demonstrates a very simple use of the `IRBuilder` where + three instructions are inserted before the instruction `pi`. The first two instructions are Call instructions and third instruction multiplies the return value of the two calls. - .. code-block:: c++ - - Instruction *pi = ...; - IRBuilder<> Builder(pi); - CallInst* callOne = Builder.CreateCall(...); - CallInst* callTwo = Builder.CreateCall(...); - Value* result = Builder.CreateMul(callOne, callTwo); - + ```cpp + Instruction *pi = ...; + IRBuilder<> Builder(pi); + CallInst* callOne = Builder.CreateCall(...); + CallInst* callTwo = Builder.CreateCall(...); + Value* result = Builder.CreateMul(callOne, callTwo); + ``` The example below is similar to the above example except that the created - ``IRBuilder`` inserts instructions at the end of the ``BasicBlock`` ``pb``. - - .. code-block:: c++ + `IRBuilder` inserts instructions at the end of the `BasicBlock` `pb`. - BasicBlock *pb = ...; - IRBuilder<> Builder(pb); - CallInst* callOne = Builder.CreateCall(...); - CallInst* callTwo = Builder.CreateCall(...); - Value* result = Builder.CreateMul(callOne, callTwo); + ```cpp + BasicBlock *pb = ...; + IRBuilder<> Builder(pb); + CallInst* callOne = Builder.CreateCall(...); + CallInst* callTwo = Builder.CreateCall(...); + Value* result = Builder.CreateMul(callOne, callTwo); + ``` + See {doc}`tutorial/LangImpl03` for a practical use of the `IRBuilder`. - See :doc:`tutorial/LangImpl03` for a practical use of the ``IRBuilder``. +(schanges_deleting)= -.. _schanges_deleting: - -Deleting Instructions -^^^^^^^^^^^^^^^^^^^^^ +#### Deleting Instructions Deleting an instruction from an existing sequence of instructions that form a -``BasicBlock`` is very straightforward: just call the instruction's -``eraseFromParent()`` method. For example: - -.. code-block:: c++ - - Instruction *I = .. ; - I->eraseFromParent(); +`BasicBlock` is very straightforward: just call the instruction's +`eraseFromParent()` method. For example: +```cpp +Instruction *I = .. ; +I->eraseFromParent(); +``` This unlinks the instruction from its containing basic block and deletes it. If you'd just like to unlink the instruction from its containing basic block but -not delete it, you can use the ``removeFromParent()`` method. +not delete it, you can use the `removeFromParent()` method. -.. _schanges_replacing: +(schanges_replacing)= -Replacing an Instruction with another Value -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Replacing an Instruction with another Value -Replacing individual instructions -""""""""""""""""""""""""""""""""" +##### Replacing individual instructions -Including "`llvm/Transforms/Utils/BasicBlockUtils.h -`_" permits use of two -very useful replace functions: ``ReplaceInstWithValue`` and -``ReplaceInstWithInst``. +Including "[llvm/Transforms/Utils/BasicBlockUtils.h](https://llvm.org/doxygen/BasicBlockUtils_8h_source.html)" permits use of two +very useful replace functions: `ReplaceInstWithValue` and +`ReplaceInstWithInst`. -.. _schanges_deleting_sub: +(schanges_deleting_sub)= -Deleting Instructions -""""""""""""""""""""" +##### Deleting Instructions -* ``ReplaceInstWithValue`` +* `ReplaceInstWithValue` This function replaces all uses of a given instruction with a value, and then removes the original instruction. The following example illustrates the - replacement of the result of a particular ``AllocaInst`` that allocates memory + replacement of the result of a particular `AllocaInst` that allocates memory for a single integer with a null pointer to an integer. - .. code-block:: c++ - - AllocaInst* instToReplace = ...; - BasicBlock::iterator ii(instToReplace); + ```cpp + AllocaInst* instToReplace = ...; + BasicBlock::iterator ii(instToReplace); - ReplaceInstWithValue(ii, Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty))); - -* ``ReplaceInstWithInst`` + ReplaceInstWithValue(ii, Constant::getNullValue(PointerType::getUnqual(Type::Int32Ty))); + ``` +* `ReplaceInstWithInst` This function replaces a particular instruction with another instruction, inserting the new instruction into the basic block at the location where the old instruction was, and replacing any uses of the old instruction with the new instruction. The following example illustrates the replacement of one - ``AllocaInst`` with another. - - .. code-block:: c++ - - AllocaInst* instToReplace = ...; - BasicBlock::iterator ii(instToReplace); + `AllocaInst` with another. - ReplaceInstWithInst(instToReplace->getParent(), ii, - new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt")); + ```cpp + AllocaInst* instToReplace = ...; + BasicBlock::iterator ii(instToReplace); + ReplaceInstWithInst(instToReplace->getParent(), ii, + new AllocaInst(Type::Int32Ty, 0, "ptrToReplacedInt")); + ``` +##### Replacing multiple uses of Users and Values -Replacing multiple uses of Users and Values -""""""""""""""""""""""""""""""""""""""""""" - -You can use ``Value::replaceAllUsesWith`` and ``User::replaceUsesOfWith`` to +You can use `Value::replaceAllUsesWith` and `User::replaceUsesOfWith` to change more than one use at a time. See the doxygen documentation for the -`Value Class `_ and `User Class -`_, respectively, for more +[Value Class](https://llvm.org/doxygen/classllvm_1_1Value.html) and [User Class](https://llvm.org/doxygen/classllvm_1_1User.html), respectively, for more information. -.. _schanges_deletingGV: +(schanges_deletingGV)= -Deleting GlobalVariables -^^^^^^^^^^^^^^^^^^^^^^^^ +#### Deleting GlobalVariables Deleting a global variable from a module is just as easy as deleting an Instruction. First, you must have a pointer to the global variable that you wish to delete. You use this pointer to erase it from its parent, the module. For example: -.. code-block:: c++ - - GlobalVariable *GV = .. ; - - GV->eraseFromParent(); +```cpp +GlobalVariable *GV = .. ; +GV->eraseFromParent(); +``` +(threading)= -.. _threading: - -Threads and LLVM -================ +## Threads and LLVM This section describes the interaction of the LLVM APIs with multithreading, both on the part of client applications, and in the JIT, in the hosted @@ -3233,187 +2988,174 @@ compiler, consider compiling LLVM and LLVM-GCC in single-threaded mode, and using the resultant compiler to build a copy of LLVM with multithreading support. -.. _shutdown: +(shutdown)= -Ending Execution with ``llvm_shutdown()`` ------------------------------------------ +### Ending Execution with `llvm_shutdown()` -When you are done using the LLVM APIs, you should call ``llvm_shutdown()`` to +When you are done using the LLVM APIs, you should call `llvm_shutdown()` to deallocate memory used for internal structures. -.. _managedstatic: +(managedstatic)= -Lazy Initialization with ``ManagedStatic`` ------------------------------------------- +### Lazy Initialization with `ManagedStatic` -``ManagedStatic`` is a utility class in LLVM used to implement static +`ManagedStatic` is a utility class in LLVM used to implement static initialization of static resources, such as the global type tables. In a single-threaded environment, it implements a simple lazy initialization scheme. When LLVM is compiled with support for multi-threading, however, it uses double-checked locking to implement thread-safe lazy initialization. -.. _llvmcontext: +(llvmcontext)= -Achieving Isolation with ``LLVMContext`` ----------------------------------------- +### Achieving Isolation with `LLVMContext` -``LLVMContext`` is an opaque class in the LLVM API which clients can use to +`LLVMContext` is an opaque class in the LLVM API which clients can use to operate multiple, isolated instances of LLVM concurrently within the same address space. For instance, in a hypothetical compile-server, the compilation of an individual translation unit is conceptually independent from all the others, and it would be desirable to be able to compile incoming translation -units concurrently on independent server threads. Fortunately, ``LLVMContext`` +units concurrently on independent server threads. Fortunately, `LLVMContext` exists to enable just this kind of scenario! -Conceptually, ``LLVMContext`` provides isolation. Every LLVM entity -(``Module``\ s, ``Value``\ s, ``Type``\ s, ``Constant``\ s, etc.) in LLVM's -in-memory IR belongs to an ``LLVMContext``. Entities in different contexts -*cannot* interact with each other: ``Module``\ s in different contexts cannot be -linked together, ``Function``\ s cannot be added to ``Module``\ s in different +Conceptually, `LLVMContext` provides isolation. Every LLVM entity +(`Module`s, `Value`s, `Type`s, `Constant`s, etc.) in LLVM's +in-memory IR belongs to an `LLVMContext`. Entities in different contexts +*cannot* interact with each other: `Module`s in different contexts cannot be +linked together, `Function`s cannot be added to `Module`s in different contexts, etc. What this means is that is safe to compile on multiple threads simultaneously, as long as no two threads operate on entities within the same context. In practice, very few places in the API require the explicit specification of a -``LLVMContext``, other than the ``Type`` creation/lookup APIs. Because every -``Type`` carries a reference to its owning context, most other entities can -determine what context they belong to by looking at their own ``Type``. If you +`LLVMContext`, other than the `Type` creation/lookup APIs. Because every +`Type` carries a reference to its owning context, most other entities can +determine what context they belong to by looking at their own `Type`. If you are adding new entities to LLVM IR, please try to maintain this interface design. -.. _jitthreading: +(jitthreading)= -Threads and the JIT -------------------- +### Threads and the JIT LLVM's "eager" JIT compiler is safe to use in threaded programs. Multiple -threads can call ``ExecutionEngine::getPointerToFunction()`` or -``ExecutionEngine::runFunction()`` concurrently, and multiple threads can run +threads can call `ExecutionEngine::getPointerToFunction()` or +`ExecutionEngine::runFunction()` concurrently, and multiple threads can run code output by the JIT concurrently. The user must still ensure that only one -thread accesses IR in a given ``LLVMContext`` while another thread might be +thread accesses IR in a given `LLVMContext` while another thread might be modifying it. One way to do that is to always hold the JIT lock while accessing -IR outside the JIT (the JIT *modifies* the IR by adding ``CallbackVH``\ s). -Another way is to only call ``getPointerToFunction()`` from the -``LLVMContext``'s thread. +IR outside the JIT (the JIT *modifies* the IR by adding `CallbackVH`s). +Another way is to only call `getPointerToFunction()` from the +`LLVMContext`'s thread. When the JIT is configured to compile lazily (using -``ExecutionEngine::DisableLazyCompilation(false)``), there is currently a `race -condition `_ in updating call sites +`ExecutionEngine::DisableLazyCompilation(false)`), there is currently a [race condition](https://bugs.llvm.org/show_bug.cgi?id=5184) in updating call sites after a function is lazily-jitted. It's still possible to use the lazy JIT in a threaded program if you ensure that only one thread at a time can call any particular lazy stub and that the JIT lock guards any IR access, but we suggest using only the eager JIT in threaded programs. -.. _advanced: +(advanced)= -Advanced Topics -=============== +## Advanced Topics This section describes some of the advanced or obscure API's that most clients do not need to be aware of. These API's tend manage the inner workings of the LLVM system, and only need to be accessed in unusual circumstances. -.. _SymbolTable: +(SymbolTable)= -The ``ValueSymbolTable`` class ------------------------------- +### The `ValueSymbolTable` class -The ``ValueSymbolTable`` (`doxygen -`__) class provides -a symbol table that the :ref:`Function ` and Module_ classes use for -naming value definitions. The symbol table can provide a name for any Value_. +The `ValueSymbolTable` ([doxygen](https://llvm.org/doxygen/classllvm_1_1ValueSymbolTable.html)) class provides +a symbol table that the {ref}`Function ` and {ref}`Module ` classes use for +naming value definitions. The symbol table can provide a name for any {ref}`Value `. -Note that the ``SymbolTable`` class should not be directly accessed by most +Note that the `SymbolTable` class should not be directly accessed by most clients. It should only be used when iteration over the symbol table names themselves are required, which is very special purpose. Note that not all LLVM -Value_\ s have names, and those without names (i.e., they have an empty name) do +{ref}`Value `s have names, and those without names (i.e., they have an empty name) do not exist in the symbol table. Symbol tables support iteration over the values in the symbol table with -``begin/end/iterator`` and supports querying to see if a specific name is in the -symbol table (with ``lookup``). The ``ValueSymbolTable`` class exposes no -public mutator methods, instead, simply call ``setName`` on a value, which will +`begin/end/iterator` and supports querying to see if a specific name is in the +symbol table (with `lookup`). The `ValueSymbolTable` class exposes no +public mutator methods, instead, simply call `setName` on a value, which will autoinsert it into the appropriate symbol table. -.. _UserLayout: +(UserLayout)= -The ``User`` and owned ``Use`` classes' memory layout ------------------------------------------------------ +### The `User` and owned `Use` classes' memory layout -The ``User`` (`doxygen `__) -class provides a basis for expressing the ownership of ``User`` towards other -`Value instance `_\ s. The -``Use`` (`doxygen `__) helper +The `User` ([doxygen](https://llvm.org/doxygen/classllvm_1_1User.html)) +class provides a basis for expressing the ownership of `User` towards other +[Value instance](https://llvm.org/doxygen/classllvm_1_1Value.html)s. The +`Use` ([doxygen](https://llvm.org/doxygen/classllvm_1_1Use.html)) helper class is employed to do the bookkeeping and to facilitate *O(1)* addition and removal. -.. _Use2User: +(Use2User)= -Interaction and relationship between ``User`` and ``Use`` objects -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Interaction and relationship between `User` and `Use` objects -A subclass of ``User`` can choose between incorporating its ``Use`` objects or -refer to them out-of-line by means of a pointer. A mixed variant (some ``Use`` +A subclass of `User` can choose between incorporating its `Use` objects or +refer to them out-of-line by means of a pointer. A mixed variant (some `Use` s inline others hung off) is impractical and breaks the invariant that the -``Use`` objects belonging to the same ``User`` form a contiguous array. +`Use` objects belonging to the same `User` form a contiguous array. -We have 2 different layouts in the ``User`` (sub)classes: +We have 2 different layouts in the `User` (sub)classes: * Layout a) - The ``Use`` object(s) are inside (resp. at fixed offset) of the ``User`` + The `Use` object(s) are inside (resp. at fixed offset) of the `User` object and there are a fixed number of them. * Layout b) - The ``Use`` object(s) are referenced by a pointer to an array from the - ``User`` object and there may be a variable number of them. + The `Use` object(s) are referenced by a pointer to an array from the + `User` object and there may be a variable number of them. As of v2.4 each layout still possesses a direct pointer to the start of the -array of ``Use``\ s. Though not mandatory for layout a), we stick to this -redundancy for the sake of simplicity. The ``User`` object also stores the -number of ``Use`` objects it has. (Theoretically this information can also be +array of `Use`s. Though not mandatory for layout a), we stick to this +redundancy for the sake of simplicity. The `User` object also stores the +number of `Use` objects it has. (Theoretically this information can also be calculated given the scheme presented below.) -Special forms of allocation operators (``operator new``) enforce the following +Special forms of allocation operators (`operator new`) enforce the following memory layouts: -* Layout a) is modelled by prepending the ``User`` object by the ``Use[]`` +* Layout a) is modelled by prepending the `User` object by the `Use[]` array. - .. code-block:: none - - ...---.---.---.---.-------... - | P | P | P | P | User - '''---'---'---'---'-------''' - -* Layout b) is modelled by pointing at the ``Use[]`` array. - - .. code-block:: none - - .-------... - | User - '-------''' - | - v - .---.---.---.---... - | P | P | P | P | - '---'---'---'---''' - -*(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in -each* ``Use`` *object in the member* ``Use::Prev`` *)* - -.. _polymorphism: - -Designing Type Hierarchies and Polymorphic Interfaces ------------------------------------------------------ + ```none + ...---.---.---.---.-------... + | P | P | P | P | User + '''---'---'---'---'-------''' + ``` +* Layout b) is modelled by pointing at the `Use[]` array. + + ```none + .-------... + | User + '-------''' + | + v + .---.---.---.---... + | P | P | P | P | + '---'---'---'---''' + ``` +*(In the above figures* '`P`' *stands for the* `Use**` *that is stored in +each* `Use` *object in the member* `Use::Prev` *)* + +(polymorphism)= + +### Designing Type Hierarchies and Polymorphic Interfaces There are two different design patterns that tend to result in the use of virtual dispatch for methods in a type hierarchy in C++ programs. The first is a genuine type hierarchy where different types in the hierarchy model a specific subset of the functionality and semantics, and these types nest -strictly within each other. Good examples of this can be seen in the ``Value`` -or ``Type`` type hierarchies. +strictly within each other. Good examples of this can be seen in the `Value` +or `Type` type hierarchies. A second is the desire to dispatch dynamically across a collection of polymorphic interface implementations. This latter use case can be modeled with @@ -3427,28 +3169,25 @@ implementations. The preferred implementation strategy for the second use case is that of generic programming (sometimes called "compile-time duck typing" or "static -polymorphism"). For example, a template over some type parameter ``T`` can be +polymorphism"). For example, a template over some type parameter `T` can be instantiated across any particular implementation that conforms to the interface or *concept*. A good example here is the highly generic properties of any type which models a node in a directed graph. LLVM models these primarily through templates and generic programming. Such templates include the -``LoopInfoBase`` and ``DominatorTreeBase``. When this type of polymorphism +`LoopInfoBase` and `DominatorTreeBase`. When this type of polymorphism truly needs **dynamic** dispatch you can generalize it using a technique called *concept-based polymorphism*. This pattern emulates the interfaces and behaviors of templates using a very limited form of virtual dispatch for type erasure inside its implementation. You can find examples of this technique in -the ``PassManager.h`` system, and there is a more detailed introduction to it +the `PassManager.h` system, and there is a more detailed introduction to it by Sean Parent in several of his talks and papers: -#. `Inheritance Is The Base Class of Evil - `_ +1. [Inheritance Is The Base Class of Evil](https://learn.microsoft.com/en-us/shows/goingnative-2013/inheritance-base-class-of-evil) - The GoingNative 2013 talk describing this technique, and probably the best place to start. -#. `Value Semantics and Concepts-based Polymorphism - `_ - The C++Now! 2012 talk +1. [Value Semantics and Concepts-based Polymorphism](http://www.youtube.com/watch?v=_BpMYeUFXv8) - The C++Now! 2012 talk describing this technique in more detail. -#. `Sean Parent's Papers and Presentations - `_ +1. [Sean Parent's Papers and Presentations](https://sean-parent.stlab.cc/papers-and-presentations) - Links to slides, videos, and sometimes code. When deciding between creating a type hierarchy (with either tagged or virtual @@ -3469,14 +3208,12 @@ generate significantly more efficient code. We have also found that a large amount of our usage of type hierarchies fits better with tag-based pattern matching rather than dynamic dispatch across a common interface. Within LLVM we have built custom helpers to facilitate this design. See this document's -section on :ref:`isa and dyn_cast ` and our :doc:`detailed document -` which describes how you can implement this +section on {ref}`isa and dyn_cast ` and our {doc}`detailed document ` which describes how you can implement this pattern for use with the LLVM helpers. -.. _abi_breaking_checks: +(abi_breaking_checks)= -ABI Breaking Checks -------------------- +### ABI Breaking Checks Checks and asserts that alter the LLVM C++ ABI are predicated on the preprocessor symbol `LLVM_ENABLE_ABI_BREAKING_CHECKS` -- LLVM @@ -3489,242 +3226,232 @@ between +Asserts and -Asserts builds should use the CMake build system to set `LLVM_ENABLE_ABI_BREAKING_CHECKS` independently of `LLVM_ENABLE_ASSERTIONS`. -.. _coreclasses: +(coreclasses)= -The Core LLVM Class Hierarchy Reference -======================================= +## The Core LLVM Class Hierarchy Reference -``#include "llvm/IR/Type.h"`` +`#include "llvm/IR/Type.h"` -header source: `Type.h `_ +header source: [Type.h](https://llvm.org/doxygen/Type_8h_source.html) -doxygen info: `Type Classes `_ +doxygen info: [Type Classes](https://llvm.org/doxygen/classllvm_1_1Type.html) The Core LLVM classes are the primary means of representing the program being inspected or transformed. The core LLVM classes are defined in header files in -the ``include/llvm/IR`` directory, and implemented in the ``lib/IR`` +the `include/llvm/IR` directory, and implemented in the `lib/IR` directory. It's worth noting that, for historical reasons, this library is -called ``libLLVMCore.so``, not ``libLLVMIR.so`` as you might expect. +called `libLLVMCore.so`, not `libLLVMIR.so` as you might expect. -.. _Type: +(Type)= -The Type class and Derived Types --------------------------------- +### The Type class and Derived Types -``Type`` is a superclass of all type classes. Every ``Value`` has a ``Type``. -``Type`` cannot be instantiated directly but only through its subclasses. -Certain primitive types (``VoidType``, ``LabelType``, ``FloatType`` and -``DoubleType``) have hidden subclasses. They are hidden because they offer no -useful functionality beyond what the ``Type`` class offers except to distinguish -themselves from other subclasses of ``Type``. +`Type` is a superclass of all type classes. Every `Value` has a `Type`. +`Type` cannot be instantiated directly but only through its subclasses. +Certain primitive types (`VoidType`, `LabelType`, `FloatType` and +`DoubleType`) have hidden subclasses. They are hidden because they offer no +useful functionality beyond what the `Type` class offers except to distinguish +themselves from other subclasses of `Type`. -All other types are subclasses of ``DerivedType``. Types can be named, but this +All other types are subclasses of `DerivedType`. Types can be named, but this is not a requirement. There exists exactly one instance of a given shape at any one time. This allows type equality to be performed with address equality of -the Type Instance. That is, given two ``Type*`` values, the types are identical +the Type Instance. That is, given two `Type*` values, the types are identical if the pointers are identical. -.. _m_Type: +(m_Type)= -Important Public Methods -^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Methods -* ``bool isIntegerTy() const``: Returns true for any integer type. +* `bool isIntegerTy() const`: Returns true for any integer type. -* ``bool isFloatingPointTy()``: Return true if this is one of the five +* `bool isFloatingPointTy()`: Return true if this is one of the five floating point types. -* ``bool isSized()``: Return true if the type has known size. Things +* `bool isSized()`: Return true if the type has known size. Things that don't have a size are abstract types, labels and void. -.. _derivedtypes: +(derivedtypes)= -Important Derived Types -^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Derived Types -``IntegerType`` +`IntegerType` Subclass of DerivedType that represents integer types of any bit width. Any - bit width between ``IntegerType::MIN_INT_BITS`` (1) and - ``IntegerType::MAX_INT_BITS`` (~8 million) can be represented. + bit width between `IntegerType::MIN_INT_BITS` (1) and + `IntegerType::MAX_INT_BITS` (~8 million) can be represented. - * ``static const IntegerType* get(unsigned NumBits)``: get an integer + * `static const IntegerType* get(unsigned NumBits)`: get an integer type of a specific bit width. - * ``unsigned getBitWidth() const``: Get the bit width of an integer type. + * `unsigned getBitWidth() const`: Get the bit width of an integer type. -``SequentialType`` +`SequentialType` This is subclassed by ArrayType and VectorType. - * ``const Type * getElementType() const``: Returns the type of each + * `const Type * getElementType() const`: Returns the type of each of the elements in the sequential type. - * ``uint64_t getNumElements() const``: Returns the number of elements + * `uint64_t getNumElements() const`: Returns the number of elements in the sequential type. -``ArrayType`` +`ArrayType` This is a subclass of SequentialType and defines the interface for array types. -``PointerType`` +`PointerType` Subclass of Type for pointer types. -``VectorType`` +`VectorType` Subclass of SequentialType for vector types. A vector type is similar to an ArrayType but is distinguished because it is a first class type whereas ArrayType is not. Vector types are used for vector operations and are usually small vectors of an integer or floating point type. -``StructType`` +`StructType` Subclass of DerivedTypes for struct types. -.. _FunctionType: +(FunctionType)= -``FunctionType`` +`FunctionType` Subclass of DerivedTypes for function types. - * ``bool isVarArg() const``: Returns true if it's a vararg function. + * `bool isVarArg() const`: Returns true if it's a vararg function. - * ``const Type * getReturnType() const``: Returns the return type of the + * `const Type * getReturnType() const`: Returns the return type of the function. - * ``const Type * getParamType (unsigned i)``: Returns the type of the ith + * `const Type * getParamType (unsigned i)`: Returns the type of the ith parameter. - * ``const unsigned getNumParams() const``: Returns the number of formal + * `const unsigned getNumParams() const`: Returns the number of formal parameters. -.. _Module: +(Module)= -The ``Module`` class --------------------- +### The `Module` class -``#include "llvm/IR/Module.h"`` +`#include "llvm/IR/Module.h"` -header source: `Module.h `_ +header source: [Module.h](https://llvm.org/doxygen/Module_8h_source.html) -doxygen info: `Module Class `_ +doxygen info: [Module Class](https://llvm.org/doxygen/classllvm_1_1Module.html) -The ``Module`` class represents the top level structure present in LLVM +The `Module` class represents the top level structure present in LLVM programs. An LLVM module is effectively either a translation unit of the original program or a combination of several translation units merged by the -linker. The ``Module`` class keeps track of a list of :ref:`Function -`\ s, a list of GlobalVariable_\ s, and a SymbolTable_. +linker. The `Module` class keeps track of a list of {ref}`Function `s, a list of {ref}`GlobalVariable `s, and a {ref}`SymbolTable `. Additionally, it contains a few helpful member functions that try to make common operations easy. -.. _m_Module: +(m_Module)= -Important Public Members of the ``Module`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `Module` class -* ``Module::Module(std::string name = "")`` +* `Module::Module(std::string name = "")` - Constructing a Module_ is easy. You can optionally provide a name for it + Constructing a {ref}`Module ` is easy. You can optionally provide a name for it (probably based on the name of the translation unit). -* | ``Module::iterator`` - Typedef for function list iterator - | ``Module::const_iterator`` - Typedef for const_iterator. - | ``begin()``, ``end()``, ``size()``, ``empty()`` +* `Module::iterator` - Typedef for function list iterator\ + `Module::const_iterator` - Typedef for const_iterator.\ + `begin()`, `end()`, `size()`, `empty()` These are forwarding methods that make it easy to access the contents of a - ``Module`` object's :ref:`Function ` list. + `Module` object's {ref}`Function ` list. -* ``Module::FunctionListType &getFunctionList()`` +* `Module::FunctionListType &getFunctionList()` - Returns the list of :ref:`Function `\ s. This is necessary to use + Returns the list of {ref}`Function `s. This is necessary to use when you need to update the list or perform a complex action that doesn't have a forwarding method. ---------------- -* | ``Module::global_iterator`` - Typedef for global variable list iterator - | ``Module::const_global_iterator`` - Typedef for const_iterator. - | ``Module::insertGlobalVariable()`` - Inserts a global variable to the list. - | ``Module::removeGlobalVariable()`` - Removes a global variable from the list. - | ``Module::eraseGlobalVariable()`` - Removes a global variable from the list and deletes it. - | ``global_begin()``, ``global_end()``, ``global_size()``, ``global_empty()`` +* `Module::global_iterator` - Typedef for global variable list iterator\ + `Module::const_global_iterator` - Typedef for const_iterator.\ + `Module::insertGlobalVariable()` - Inserts a global variable to the list.\ + `Module::removeGlobalVariable()` - Removes a global variable from the list.\ + `Module::eraseGlobalVariable()` - Removes a global variable from the list and deletes it.\ + `global_begin()`, `global_end()`, `global_size()`, `global_empty()` These are forwarding methods that make it easy to access the contents of a - ``Module`` object's GlobalVariable_ list. + `Module` object's {ref}`GlobalVariable ` list. ---------------- -* ``SymbolTable *getSymbolTable()`` +* `SymbolTable *getSymbolTable()` - Return a reference to the SymbolTable_ for this ``Module``. + Return a reference to the {ref}`SymbolTable ` for this `Module`. ---------------- -* ``Function *getFunction(StringRef Name) const`` +* `Function *getFunction(StringRef Name) const` - Look up the specified function in the ``Module`` SymbolTable_. If it does not - exist, return ``null``. + Look up the specified function in the `Module` {ref}`SymbolTable `. If it does not + exist, return `null`. -* ``FunctionCallee getOrInsertFunction(const std::string &Name, - const FunctionType *T)`` +* `FunctionCallee getOrInsertFunction(const std::string &Name, + const FunctionType *T)` - Look up the specified function in the ``Module`` SymbolTable_. If + Look up the specified function in the `Module` {ref}`SymbolTable `. If it does not exist, add an external declaration for the function and return it. Note that the function signature already present may not match the requested signature. Thus, in order to enable the common usage of passing the result directly to EmitCall, the return type is - a struct of ``{FunctionType *T, Constant *FunctionPtr}``, rather - than simply the ``Function*`` with potentially an unexpected + a struct of `{FunctionType *T, Constant *FunctionPtr}`, rather + than simply the `Function*` with potentially an unexpected signature. -* ``std::string getTypeName(const Type *Ty)`` +* `std::string getTypeName(const Type *Ty)` - If there is at least one entry in the SymbolTable_ for the specified Type_, + If there is at least one entry in the {ref}`SymbolTable ` for the specified {ref}`Type `, return it. Otherwise return the empty string. -* ``bool addTypeName(const std::string &Name, const Type *Ty)`` +* `bool addTypeName(const std::string &Name, const Type *Ty)` - Insert an entry in the SymbolTable_ mapping ``Name`` to ``Ty``. If there is - already an entry for this name, true is returned and the SymbolTable_ is not + Insert an entry in the {ref}`SymbolTable ` mapping `Name` to `Ty`. If there is + already an entry for this name, true is returned and the {ref}`SymbolTable ` is not modified. -.. _Value: +(Value)= -The ``Value`` class -------------------- +### The `Value` class -``#include "llvm/IR/Value.h"`` +`#include "llvm/IR/Value.h"` -header source: `Value.h `_ +header source: [Value.h](https://llvm.org/doxygen/Value_8h_source.html) -doxygen info: `Value Class `_ +doxygen info: [Value Class](https://llvm.org/doxygen/classllvm_1_1Value.html) -The ``Value`` class is the most important class in the LLVM Source base. It +The `Value` class is the most important class in the LLVM Source base. It represents a typed value that may be used (among other things) as an operand to -an instruction. There are many different types of ``Value``\ s, such as -Constant_\ s, Argument_\ s. Even Instruction_\ s and :ref:`Function -`\ s are ``Value``\ s. +an instruction. There are many different types of `Value`s, such as +{ref}`Constant `s, {ref}`Argument `s. Even {ref}`Instruction `s and {ref}`Function `s are `Value`s. -A particular ``Value`` may be used many times in the LLVM representation for a +A particular `Value` may be used many times in the LLVM representation for a program. For example, an incoming argument to a function (represented with an -instance of the Argument_ class) is "used" by every instruction in the function -that references the argument. To keep track of this relationship, the ``Value`` -class keeps a list of all of the ``User``\ s that is using it (the User_ class -is a base class for all nodes in the LLVM graph that can refer to ``Value``\ s). +instance of the {ref}`Argument ` class) is "used" by every instruction in the function +that references the argument. To keep track of this relationship, the `Value` +class keeps a list of all of the `User`s that is using it (the {ref}`User ` class +is a base class for all nodes in the LLVM graph that can refer to `Value`s). This use list is how LLVM represents def-use information in the program, and is -accessible through the ``use_*`` methods, shown below. +accessible through the `use_*` methods, shown below. -Because LLVM is a typed representation, every LLVM ``Value`` is typed, and this -Type_ is available through the ``getType()`` method. In addition, all LLVM -values can be named. The "name" of the ``Value`` is a symbolic string printed +Because LLVM is a typed representation, every LLVM `Value` is typed, and this +{ref}`Type ` is available through the `getType()` method. In addition, all LLVM +values can be named. The "name" of the `Value` is a symbolic string printed in the LLVM code: -.. code-block:: llvm - - %foo = add i32 1, 2 - -.. _nameWarning: +```llvm +%foo = add i32 1, 2 +``` +(nameWarning)= The name of this instruction is "foo". **NOTE** that the name of any value may be missing (an empty string), so names should **ONLY** be used for debugging (making the source code easier to read, debugging printouts), they should not be used to keep track of values or map between them. For this purpose, use a -``std::map`` of pointers to the ``Value`` itself instead. +`std::map` of pointers to the `Value` itself instead. One important aspect of LLVM is that there is no distinction between an SSA variable and the operation that produces it. Because of this, any reference to @@ -3733,429 +3460,405 @@ argument, for example) is represented as a direct pointer to the instance of the class that represents this value. Although this may take some getting used to, it simplifies the representation and makes it easier to manipulate. -.. _m_Value: +(m_Value)= -Important Public Members of the ``Value`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `Value` class -* | ``Value::use_iterator`` - Typedef for iterator over the use-list - | ``Value::const_use_iterator`` - Typedef for const_iterator over the - use-list - | ``unsigned use_size()`` - Returns the number of users of the value. - | ``bool use_empty()`` - Returns true if there are no users. - | ``use_iterator use_begin()`` - Get an iterator to the start of the - use-list. - | ``use_iterator use_end()`` - Get an iterator to the end of the use-list. - | ``User *use_back()`` - Returns the last element in the list. +* `Value::use_iterator` - Typedef for iterator over the use-list\ + `Value::const_use_iterator` - Typedef for const_iterator over the + use-list\ + `unsigned use_size()` - Returns the number of users of the value.\ + `bool use_empty()` - Returns true if there are no users.\ + `use_iterator use_begin()` - Get an iterator to the start of the + use-list.\ + `use_iterator use_end()` - Get an iterator to the end of the use-list.\ + `User *use_back()` - Returns the last element in the list. These methods are the interface to access the def-use information in LLVM. As with all other iterators in LLVM, the naming conventions follow the - conventions defined by the STL_. + conventions defined by the {ref}`STL `. -* ``Type *getType() const`` +* `Type *getType() const` This method returns the Type of the Value. -* | ``bool hasName() const`` - | ``std::string getName() const`` - | ``void setName(const std::string &Name)`` +* `bool hasName() const`\ + `std::string getName() const`\ + `void setName(const std::string &Name)` - This family of methods is used to access and assign a name to a ``Value``, be - aware of the :ref:`precaution above `. + This family of methods is used to access and assign a name to a `Value`, be + aware of the {ref}`precaution above `. -* ``void replaceAllUsesWith(Value *V)`` +* `void replaceAllUsesWith(Value *V)` - This method traverses the use list of a ``Value`` changing all User_\ s of the - current value to refer to "``V``" instead. For example, if you detect that an + This method traverses the use list of a `Value` changing all {ref}`User `s of the + current value to refer to "`V`" instead. For example, if you detect that an instruction always produces a constant value (for example through constant folding), you can replace all uses of the instruction with the constant like this: - .. code-block:: c++ - - Inst->replaceAllUsesWith(ConstVal); - -.. _User: + ```cpp + Inst->replaceAllUsesWith(ConstVal); + ``` +(User)= -The ``User`` class ------------------- +### The `User` class -``#include "llvm/IR/User.h"`` +`#include "llvm/IR/User.h"` -header source: `User.h `_ +header source: [User.h](https://llvm.org/doxygen/User_8h_source.html) -doxygen info: `User Class `_ +doxygen info: [User Class](https://llvm.org/doxygen/classllvm_1_1User.html) -Superclass: Value_ +Superclass: {ref}`Value ` -The ``User`` class is the common base class of all LLVM nodes that may refer to -``Value``\ s. It exposes a list of "Operands" that are all of the ``Value``\ s -that the User is referring to. The ``User`` class itself is a subclass of -``Value``. +The `User` class is the common base class of all LLVM nodes that may refer to +`Value`s. It exposes a list of "Operands" that are all of the `Value`s +that the User is referring to. The `User` class itself is a subclass of +`Value`. -The operands of a ``User`` point directly to the LLVM ``Value`` that it refers +The operands of a `User` point directly to the LLVM `Value` that it refers to. Because LLVM uses Static Single Assignment (SSA) form, there can only be one definition referred to, allowing this direct connection. This connection provides the use-def information in LLVM. -.. _m_User: +(m_User)= -Important Public Members of the ``User`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `User` class -The ``User`` class exposes the operand list in two ways: through an index access +The `User` class exposes the operand list in two ways: through an index access interface and through an iterator based interface. -* | ``Value *getOperand(unsigned i)`` - | ``unsigned getNumOperands()`` +* `Value *getOperand(unsigned i)`\ + `unsigned getNumOperands()` - These two methods expose the operands of the ``User`` in a convenient form for + These two methods expose the operands of the `User` in a convenient form for direct access. -* | ``User::op_iterator`` - Typedef for iterator over the operand list - | ``op_iterator op_begin()`` - Get an iterator to the start of the operand - list. - | ``op_iterator op_end()`` - Get an iterator to the end of the operand list. +* `User::op_iterator` - Typedef for iterator over the operand list\ + `op_iterator op_begin()` - Get an iterator to the start of the operand + list.\ + `op_iterator op_end()` - Get an iterator to the end of the operand list. Together, these methods make up the iterator based interface to the operands - of a ``User``. + of a `User`. -.. _Instruction: +(Instruction)= -The ``Instruction`` class -------------------------- +### The `Instruction` class -``#include "llvm/IR/Instruction.h"`` +`#include "llvm/IR/Instruction.h"` -header source: `Instruction.h -`_ +header source: [Instruction.h](https://llvm.org/doxygen/Instruction_8h_source.html) -doxygen info: `Instruction Class -`_ +doxygen info: [Instruction Class](https://llvm.org/doxygen/classllvm_1_1Instruction.html) -Superclasses: User_, Value_ +Superclasses: {ref}`User `, {ref}`Value ` -The ``Instruction`` class is the common base class for all LLVM instructions. +The `Instruction` class is the common base class for all LLVM instructions. It provides only a few methods, but is a very commonly used class. The primary -data tracked by the ``Instruction`` class itself is the opcode (instruction -type) and the parent BasicBlock_ the ``Instruction`` is embedded into. To +data tracked by the `Instruction` class itself is the opcode (instruction +type) and the parent {ref}`BasicBlock ` the `Instruction` is embedded into. To represent a specific type of instruction, one of many subclasses of -``Instruction`` are used. +`Instruction` are used. -Because the ``Instruction`` class subclasses the User_ class, its operands can -be accessed in the same way as for other ``User``\ s (with the -``getOperand()``/``getNumOperands()`` and ``op_begin()``/``op_end()`` methods). -An important file for the ``Instruction`` class is the ``llvm/Instruction.def`` +Because the `Instruction` class subclasses the {ref}`User ` class, its operands can +be accessed in the same way as for other `User`s (with the +`getOperand()`/`getNumOperands()` and `op_begin()`/`op_end()` methods). +An important file for the `Instruction` class is the `llvm/Instruction.def` file. This file contains some meta-data about the various different types of instructions in LLVM. It describes the enum values that are used as opcodes -(for example ``Instruction::Add`` and ``Instruction::ICmp``), as well as the -concrete sub-classes of ``Instruction`` that implement the instruction (for -example BinaryOperator_ and CmpInst_). Unfortunately, the use of macros in this +(for example `Instruction::Add` and `Instruction::ICmp`), as well as the +concrete sub-classes of `Instruction` that implement the instruction (for +example {ref}`BinaryOperator ` and {ref}`CmpInst `). Unfortunately, the use of macros in this file confuses doxygen, so these enum values don't show up correctly in the -`doxygen output `_. +[doxygen output](https://llvm.org/doxygen/classllvm_1_1Instruction.html). -.. _s_Instruction: +(s_Instruction)= -Important Subclasses of the ``Instruction`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Subclasses of the `Instruction` class -.. _BinaryOperator: +(BinaryOperator)= -* ``BinaryOperator`` +* `BinaryOperator` This subclass represents all two operand instructions whose operands must be the same type, except for the comparison instructions. -.. _CastInst: +(CastInst)= -* ``CastInst`` +* `CastInst` This subclass is the parent of the 12 casting instructions. It provides common operations on cast instructions. -.. _CmpInst: +(CmpInst)= -* ``CmpInst`` +* `CmpInst` This subclass represents the two comparison instructions, - `ICmpInst `_ (integer operands), and - `FCmpInst `_ (floating point operands). + {ref}`ICmpInst ` (integer operands), and + {ref}`FCmpInst ` (floating point operands). -.. _m_Instruction: +(m_Instruction)= -Important Public Members of the ``Instruction`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `Instruction` class -* ``BasicBlock *getParent()`` +* `BasicBlock *getParent()` - Returns the BasicBlock_ that this - ``Instruction`` is embedded into. + Returns the {ref}`BasicBlock ` that this + `Instruction` is embedded into. -* ``bool mayWriteToMemory()`` +* `bool mayWriteToMemory()` - Returns true if the instruction writes to memory, i.e., it is a ``call``, - ``free``, ``invoke``, or ``store``. + Returns true if the instruction writes to memory, i.e., it is a `call`, + `free`, `invoke`, or `store`. -* ``unsigned getOpcode()`` +* `unsigned getOpcode()` - Returns the opcode for the ``Instruction``. + Returns the opcode for the `Instruction`. -* ``Instruction *clone() const`` +* `Instruction *clone() const` Returns another instance of the specified instruction, identical in all ways to the original except that the instruction has no parent (i.e., it's not - embedded into a BasicBlock_), and it has no name. + embedded into a {ref}`BasicBlock `), and it has no name. -.. _Constant: +(Constant)= -The ``Constant`` class and subclasses -------------------------------------- +### The `Constant` class and subclasses Constant represents a base class for different types of constants. It is subclassed by ConstantInt, ConstantArray, etc. for representing the various -types of Constants. GlobalValue_ is also a subclass, which represents the +types of Constants. {ref}`GlobalValue ` is also a subclass, which represents the address of a global variable or function. -.. _s_Constant: +(s_Constant)= -Important Subclasses of Constant -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Subclasses of Constant * ConstantInt : This subclass of Constant represents an integer constant of any width. - * ``const APInt& getValue() const``: Returns the underlying - value of this constant, an ``APInt`` value. + * `const APInt& getValue() const`: Returns the underlying + value of this constant, an `APInt` value. - * ``int64_t getSExtValue() const``: Converts the underlying APInt value to an - ``int64_t`` via sign extension. If the value (not the bit width) of the APInt - is too large to fit in an ``int64_t``, an assertion will result. For this + * `int64_t getSExtValue() const`: Converts the underlying APInt value to an + `int64_t` via sign extension. If the value (not the bit width) of the APInt + is too large to fit in an `int64_t`, an assertion will result. For this reason, use of this method is discouraged. - * ``uint64_t getZExtValue() const``: Converts the underlying ``APInt`` value - to a ``uint64_t`` via zero extension. If the value (not the bit width) of the - APInt is too large to fit in a ``uint64_t``, an assertion will result. For this + * `uint64_t getZExtValue() const`: Converts the underlying `APInt` value + to a `uint64_t` via zero extension. If the value (not the bit width) of the + APInt is too large to fit in a `uint64_t`, an assertion will result. For this reason, use of this method is discouraged. - * ``static ConstantInt* get(const APInt& Val)``: Returns the ConstantInt - object that represents the value provided by ``Val``. The type is implied - as the IntegerType that corresponds to the bit width of ``Val``. + * `static ConstantInt* get(const APInt& Val)`: Returns the ConstantInt + object that represents the value provided by `Val`. The type is implied + as the IntegerType that corresponds to the bit width of `Val`. - * ``static ConstantInt* get(const Type *Ty, uint64_t Val)``: Returns the - ConstantInt object that represents the value provided by ``Val`` for integer - type ``Ty``. + * `static ConstantInt* get(const Type *Ty, uint64_t Val)`: Returns the + ConstantInt object that represents the value provided by `Val` for integer + type `Ty`. * ConstantFP : This class represents a floating point constant. - * ``double getValue() const``: Returns the underlying value of this constant. + * `double getValue() const`: Returns the underlying value of this constant. * ConstantArray : This represents a constant array. - * ``const std::vector &getValues() const``: Returns a vector of + * `const std::vector &getValues() const`: Returns a vector of component constants that makeup this array. * ConstantStruct : This represents a constant struct. - * ``const std::vector &getValues() const``: Returns a vector of + * `const std::vector &getValues() const`: Returns a vector of component constants that makeup this array. * GlobalValue : This represents either a global variable or a function. In either case, the value is a constant fixed address (after linking). -.. _GlobalValue: +(GlobalValue)= -The ``GlobalValue`` class -------------------------- +### The `GlobalValue` class -``#include "llvm/IR/GlobalValue.h"`` +`#include "llvm/IR/GlobalValue.h"` -header source: `GlobalValue.h -`_ +header source: [GlobalValue.h](https://llvm.org/doxygen/GlobalValue_8h_source.html) -doxygen info: `GlobalValue Class -`_ +doxygen info: [GlobalValue Class](https://llvm.org/doxygen/classllvm_1_1GlobalValue.html) -Superclasses: Constant_, User_, Value_ +Superclasses: {ref}`Constant `, {ref}`User `, {ref}`Value ` -Global values ( GlobalVariable_\ s or :ref:`Function `\ s) are the -only LLVM values that are visible in the bodies of all :ref:`Function -`\ s. Because they are visible at global scope, they are also +Global values ( {ref}`GlobalVariable `s or {ref}`Function `s) are the +only LLVM values that are visible in the bodies of all {ref}`Function `s. Because they are visible at global scope, they are also subject to linking with other globals defined in different translation units. -To control the linking process, ``GlobalValue``\ s know their linkage rules. -Specifically, ``GlobalValue``\ s know whether they have internal or external -linkage, as defined by the ``LinkageTypes`` enumeration. +To control the linking process, `GlobalValue`s know their linkage rules. +Specifically, `GlobalValue`s know whether they have internal or external +linkage, as defined by the `LinkageTypes` enumeration. -If a ``GlobalValue`` has internal linkage (equivalent to being ``static`` in C), +If a `GlobalValue` has internal linkage (equivalent to being `static` in C), it is not visible to code outside the current translation unit, and does not participate in linking. If it has external linkage, it is visible to external code, and does participate in linking. In addition to linkage information, -``GlobalValue``\ s keep track of which Module_ they are currently part of. +`GlobalValue`s keep track of which {ref}`Module ` they are currently part of. -Because ``GlobalValue``\ s are memory objects, they are always referred to by -their **address**. As such, the Type_ of a global is always a pointer to its -contents. It is important to remember this when using the ``GetElementPtrInst`` +Because `GlobalValue`s are memory objects, they are always referred to by +their **address**. As such, the {ref}`Type ` of a global is always a pointer to its +contents. It is important to remember this when using the `GetElementPtrInst` instruction because this pointer must be dereferenced first. For example, if -you have a ``GlobalVariable`` (a subclass of ``GlobalValue)`` that is an array -of 24 ints, type ``[24 x i32]``, then the ``GlobalVariable`` is a pointer to +you have a `GlobalVariable` (a subclass of `GlobalValue)` that is an array +of 24 ints, type `[24 x i32]`, then the `GlobalVariable` is a pointer to that array. Although the address of the first element of this array and the -value of the ``GlobalVariable`` are the same, they have different types. The -``GlobalVariable``'s type is ``[24 x i32]``. The first element's type is -``i32.`` Because of this, accessing a global value requires you to dereference -the pointer with ``GetElementPtrInst`` first, then its elements can be accessed. -This is explained in the `LLVM Language Reference Manual -`_. +value of the `GlobalVariable` are the same, they have different types. The +`GlobalVariable`'s type is `[24 x i32]`. The first element's type is +`i32.` Because of this, accessing a global value requires you to dereference +the pointer with `GetElementPtrInst` first, then its elements can be accessed. +This is explained in the {ref}`LLVM Language Reference Manual `. -.. _m_GlobalValue: +(m_GlobalValue)= -Important Public Members of the ``GlobalValue`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `GlobalValue` class -* | ``bool hasInternalLinkage() const`` - | ``bool hasExternalLinkage() const`` - | ``void setInternalLinkage(bool HasInternalLinkage)`` +* `bool hasInternalLinkage() const`\ + `bool hasExternalLinkage() const`\ + `void setInternalLinkage(bool HasInternalLinkage)` - These methods manipulate the linkage characteristics of the ``GlobalValue``. + These methods manipulate the linkage characteristics of the `GlobalValue`. -* ``Module *getParent()`` +* `Module *getParent()` - This returns the Module_ that the + This returns the {ref}`Module ` that the GlobalValue is currently embedded into. -.. _c_Function: +(c_Function)= -The ``Function`` class ----------------------- +### The `Function` class -``#include "llvm/IR/Function.h"`` +`#include "llvm/IR/Function.h"` -header source: `Function.h `_ +header source: [Function.h](https://llvm.org/doxygen/Function_8h_source.html) -doxygen info: `Function Class -`_ +doxygen info: [Function Class](https://llvm.org/doxygen/classllvm_1_1Function.html) -Superclasses: GlobalValue_, Constant_, User_, Value_ +Superclasses: {ref}`GlobalValue `, {ref}`Constant `, {ref}`User `, {ref}`Value ` -The ``Function`` class represents a single procedure in LLVM. It is actually +The `Function` class represents a single procedure in LLVM. It is actually one of the more complex classes in the LLVM hierarchy because it must keep track -of a large amount of data. The ``Function`` class keeps track of a list of -BasicBlock_\ s, a list of formal Argument_\ s, and a SymbolTable_. +of a large amount of data. The `Function` class keeps track of a list of +{ref}`BasicBlock `s, a list of formal {ref}`Argument `s, and a {ref}`SymbolTable `. -The list of BasicBlock_\ s is the most commonly used part of ``Function`` +The list of {ref}`BasicBlock `s is the most commonly used part of `Function` objects. The list imposes an implicit ordering of the blocks in the function, which indicate how the code will be laid out by the backend. Additionally, the -first BasicBlock_ is the implicit entry node for the ``Function``. It is not +first {ref}`BasicBlock ` is the implicit entry node for the `Function`. It is not legal in LLVM to explicitly branch to this initial block. There are no implicit exit nodes, and in fact there may be multiple exit nodes from a single -``Function``. If the BasicBlock_ list is empty, this indicates that the -``Function`` is actually a function declaration: the actual body of the function +`Function`. If the {ref}`BasicBlock ` list is empty, this indicates that the +`Function` is actually a function declaration: the actual body of the function hasn't been linked in yet. -In addition to a list of BasicBlock_\ s, the ``Function`` class also keeps track -of the list of formal Argument_\ s that the function receives. This container -manages the lifetime of the Argument_ nodes, just like the BasicBlock_ list does -for the BasicBlock_\ s. +In addition to a list of {ref}`BasicBlock `s, the `Function` class also keeps track +of the list of formal {ref}`Argument `s that the function receives. This container +manages the lifetime of the {ref}`Argument ` nodes, just like the {ref}`BasicBlock ` list does +for the {ref}`BasicBlock `s. -The SymbolTable_ is a very rarely used LLVM feature that is only used when you -have to look up a value by name. Aside from that, the SymbolTable_ is used +The {ref}`SymbolTable ` is a very rarely used LLVM feature that is only used when you +have to look up a value by name. Aside from that, the {ref}`SymbolTable ` is used internally to make sure that there are not conflicts between the names of -Instruction_\ s, BasicBlock_\ s, or Argument_\ s in the function body. +{ref}`Instruction `s, {ref}`BasicBlock `s, or {ref}`Argument `s in the function body. -Note that ``Function`` is a GlobalValue_ and therefore also a Constant_. The +Note that `Function` is a {ref}`GlobalValue ` and therefore also a {ref}`Constant `. The value of the function is its address (after linking) which is guaranteed to be constant. -.. _m_Function: +(m_Function)= -Important Public Members of the ``Function`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `Function` -* ``Function(const FunctionType *Ty, LinkageTypes Linkage, - const std::string &N = "", Module* Parent = 0)`` +* `Function(const FunctionType *Ty, LinkageTypes Linkage, + const std::string &N = "", Module* Parent = 0)` - Constructor used when you need to create new ``Function``\ s to add the + Constructor used when you need to create new `Function`s to add the program. The constructor must specify the type of the function to create and - what type of linkage the function should have. The FunctionType_ argument + what type of linkage the function should have. The {ref}`FunctionType ` argument specifies the formal arguments and return value for the function. The same - FunctionType_ value can be used to create multiple functions. The ``Parent`` + {ref}`FunctionType ` value can be used to create multiple functions. The `Parent` argument specifies the Module in which the function is defined. If this argument is provided, the function will automatically be inserted into that module's list of functions. -* ``bool isDeclaration()`` +* `bool isDeclaration()` - Return whether or not the ``Function`` has a body defined. If the function is + Return whether or not the `Function` has a body defined. If the function is "external", it does not have a body, and thus must be resolved by linking with a function defined in a different translation unit. -* | ``Function::iterator`` - Typedef for basic block list iterator - | ``Function::const_iterator`` - Typedef for const_iterator. - | ``begin()``, ``end()``, ``size()``, ``empty()``, ``insert()``, - ``splice()``, ``erase()`` +* `Function::iterator` - Typedef for basic block list iterator\ + `Function::const_iterator` - Typedef for const_iterator.\ + `begin()`, `end()`, `size()`, `empty()`, `insert()`, + `splice()`, `erase()` These are forwarding methods that make it easy to access the contents of a - ``Function`` object's BasicBlock_ list. + `Function` object's {ref}`BasicBlock ` list. -* | ``Function::arg_iterator`` - Typedef for the argument list iterator - | ``Function::const_arg_iterator`` - Typedef for const_iterator. - | ``arg_begin()``, ``arg_end()``, ``arg_size()``, ``arg_empty()`` +* `Function::arg_iterator` - Typedef for the argument list iterator\ + `Function::const_arg_iterator` - Typedef for const_iterator.\ + `arg_begin()`, `arg_end()`, `arg_size()`, `arg_empty()` These are forwarding methods that make it easy to access the contents of a - ``Function`` object's Argument_ list. + `Function` object's {ref}`Argument ` list. -* ``Function::ArgumentListType &getArgumentList()`` +* `Function::ArgumentListType &getArgumentList()` - Returns the list of Argument_. This is necessary to use when you need to + Returns the list of {ref}`Argument `. This is necessary to use when you need to update the list or perform a complex action that doesn't have a forwarding method. -* ``BasicBlock &getEntryBlock()`` +* `BasicBlock &getEntryBlock()` - Returns the entry ``BasicBlock`` for the function. Because the entry block + Returns the entry `BasicBlock` for the function. Because the entry block for the function is always the first block, this returns the first block of - the ``Function``. + the `Function`. -* | ``Type *getReturnType()`` - | ``FunctionType *getFunctionType()`` +* `Type *getReturnType()`\ + `FunctionType *getFunctionType()` - This traverses the Type_ of the ``Function`` and returns the return type of - the function, or the FunctionType_ of the actual function. + This traverses the {ref}`Type ` of the `Function` and returns the return type of + the function, or the {ref}`FunctionType ` of the actual function. -* ``SymbolTable *getSymbolTable()`` +* `SymbolTable *getSymbolTable()` - Return a pointer to the SymbolTable_ for this ``Function``. + Return a pointer to the {ref}`SymbolTable ` for this `Function`. -.. _GlobalVariable: +(GlobalVariable)= -The ``GlobalVariable`` class ----------------------------- +### The `GlobalVariable` class -``#include "llvm/IR/GlobalVariable.h"`` +`#include "llvm/IR/GlobalVariable.h"` -header source: `GlobalVariable.h -`_ +header source: [GlobalVariable.h](https://llvm.org/doxygen/GlobalVariable_8h_source.html) -doxygen info: `GlobalVariable Class -`_ +doxygen info: [GlobalVariable Class](https://llvm.org/doxygen/classllvm_1_1GlobalVariable.html) -Superclasses: GlobalValue_, Constant_, User_, Value_ +Superclasses: {ref}`GlobalValue `, {ref}`Constant `, {ref}`User `, {ref}`Value ` -Global variables are represented with the (surprise surprise) ``GlobalVariable`` -class. Like functions, ``GlobalVariable``\ s are also subclasses of -GlobalValue_, and as such are always referenced by their address (global values +Global variables are represented with the (surprise surprise) `GlobalVariable` +class. Like functions, `GlobalVariable`s are also subclasses of +{ref}`GlobalValue `, and as such are always referenced by their address (global values must live in memory, so their "name" refers to their constant address). See -GlobalValue_ for more on this. Global variables may have an initial value -(which must be a Constant_), and if they have an initializer, they may be marked +{ref}`GlobalValue ` for more on this. Global variables may have an initial value +(which must be a {ref}`Constant `), and if they have an initializer, they may be marked as "constant" themselves (indicating that their contents never change at runtime). -.. _m_GlobalVariable: +(m_GlobalVariable)= -Important Public Members of the ``GlobalVariable`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `GlobalVariable` class -* ``GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage, - Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)`` +* `GlobalVariable(const Type *Ty, bool isConstant, LinkageTypes &Linkage, + Constant *Initializer = 0, const std::string &Name = "", Module* Parent = 0)` - Create a new global variable of the specified type. If ``isConstant`` is true + Create a new global variable of the specified type. If `isConstant` is true then the global variable will be marked as unchanging for the program. The Linkage parameter specifies the type of linkage (internal, external, weak, linkonce, appending) for the variable. If the linkage is InternalLinkage, @@ -4163,72 +3866,67 @@ Important Public Members of the ``GlobalVariable`` class the resultant global variable will have internal linkage. AppendingLinkage concatenates together all instances (in different translation units) of the variable into a single variable but is only applicable to arrays. See the - `LLVM Language Reference `_ for further details + [LLVM Language Reference](https://llvm.org/docs/LangRef.html#modulestructure) for further details on linkage types. Optionally an initializer, a name, and the module to put the variable into may be specified for the global variable as well. -* ``bool isConstant() const`` +* `bool isConstant() const` Returns true if this is a global variable that is known not to be modified at runtime. -* ``bool hasInitializer()`` +* `bool hasInitializer()` - Returns true if this ``GlobalVariable`` has an initializer. + Returns true if this `GlobalVariable` has an initializer. -* ``Constant *getInitializer()`` +* `Constant *getInitializer()` - Returns the initial value for a ``GlobalVariable``. It is not legal to call + Returns the initial value for a `GlobalVariable`. It is not legal to call this method if there is no initializer. -.. _BasicBlock: +(BasicBlock)= -The ``BasicBlock`` class ------------------------- +### The `BasicBlock` class -``#include "llvm/IR/BasicBlock.h"`` +`#include "llvm/IR/BasicBlock.h"` -header source: `BasicBlock.h -`_ +header source: [BasicBlock.h](https://llvm.org/doxygen/BasicBlock_8h_source.html) -doxygen info: `BasicBlock Class -`_ +doxygen info: [BasicBlock Class](https://llvm.org/doxygen/classllvm_1_1BasicBlock.html) -Superclass: Value_ +Superclass: {ref}`Value ` This class represents a single entry single exit section of the code, commonly -known as a basic block by the compiler community. The ``BasicBlock`` class -maintains a list of Instruction_\ s, which form the body of the block. Matching +known as a basic block by the compiler community. The `BasicBlock` class +maintains a list of {ref}`Instruction `s, which form the body of the block. Matching the language definition, the last element of this list of instructions is always a terminator instruction. In addition to tracking the list of instructions that make up the block, the -``BasicBlock`` class also keeps track of the :ref:`Function ` that +`BasicBlock` class also keeps track of the {ref}`Function ` that it is embedded into. -Note that ``BasicBlock``\ s themselves are Value_\ s, because they are +Note that `BasicBlock`s themselves are {ref}`Value `s, because they are referenced by instructions like branches and can go in the switch tables. -``BasicBlock``\ s have type ``label``. +`BasicBlock`s have type `label`. -.. _m_BasicBlock: +(m_BasicBlock)= -Important Public Members of the ``BasicBlock`` class -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Important Public Members of the `BasicBlock` class -* ``BasicBlock(const std::string &Name = "", Function *Parent = 0)`` +* `BasicBlock(const std::string &Name = "", Function *Parent = 0)` - The ``BasicBlock`` constructor is used to create new basic blocks for + The `BasicBlock` constructor is used to create new basic blocks for insertion into a function. The constructor optionally takes a name for the - new block, and a :ref:`Function ` to insert it into. If the - ``Parent`` parameter is specified, the new ``BasicBlock`` is automatically - inserted at the end of the specified :ref:`Function `, if not - specified, the ``BasicBlock`` must be manually inserted into the :ref:`Function - `. - -* | ``BasicBlock::iterator`` - Typedef for instruction list iterator - | ``BasicBlock::const_iterator`` - Typedef for const_iterator. - | ``begin()``, ``end()``, ``front()``, ``back()``, - ``size()``, ``empty()``, ``splice()`` + new block, and a {ref}`Function ` to insert it into. If the + `Parent` parameter is specified, the new `BasicBlock` is automatically + inserted at the end of the specified {ref}`Function `, if not + specified, the `BasicBlock` must be manually inserted into the {ref}`Function `. + +* `BasicBlock::iterator` - Typedef for instruction list iterator\ + `BasicBlock::const_iterator` - Typedef for const_iterator.\ + `begin()`, `end()`, `front()`, `back()`, + `size()`, `empty()`, `splice()` STL-style functions for accessing the instruction list. These methods and typedefs are forwarding functions that have the same @@ -4236,21 +3934,20 @@ Important Public Members of the ``BasicBlock`` class expose the underlying instruction list of a basic block in a way that is easy to manipulate. -* ``Function *getParent()`` +* `Function *getParent()` - Returns a pointer to :ref:`Function ` the block is embedded into, + Returns a pointer to {ref}`Function ` the block is embedded into, or a null pointer if it is homeless. -* ``Instruction *getTerminator()`` +* `Instruction *getTerminator()` Returns a pointer to the terminator instruction that appears at the end of the - ``BasicBlock``. If there is no terminator instruction, or if the last + `BasicBlock`. If there is no terminator instruction, or if the last instruction in the block is not a terminator, then a null pointer is returned. -.. _Argument: +(Argument)= -The ``Argument`` class ----------------------- +### The `Argument` class This subclass of Value defines the interface for incoming formal arguments to a function. A Function maintains a list of its formal arguments. An argument has diff --git a/llvm/docs/RFCProcess.md b/llvm/docs/RFCProcess.md index 2c339eb0cd557..e45c2782301dc 100644 --- a/llvm/docs/RFCProcess.md +++ b/llvm/docs/RFCProcess.md @@ -1,22 +1,20 @@ -================================= -Request For Comment (RFC) process -================================= +# Request For Comment (RFC) process -.. contents:: - :local: - :depth: 1 +```{contents} +:local: +:depth: 1 +``` + +## Introduction -Introduction -============ Substantive changes to LLVM projects need to be acceptable to the wider community, which requires gaining community consensus to adopt the changes. This is done by posting an RFC and obtaining feedback about the proposal. -Process -======= +## Process + +### Writing an RFC -Writing an RFC --------------- The process begins with writing a proposal for the changes you'd like to see made. The proposal should include: @@ -26,10 +24,10 @@ made. The proposal should include: * any open questions the community should address. The proposal should be posted to the appropriate forum on -`Discourse `_. +[Discourse](https://discourse.llvm.org/). + +### Feedback Period -Feedback Period ---------------- Once the RFC is posted, the community will provide feedback on the proposal. The feedback period is a collaborative effort between the community and the proposal authors. Authors should take the community's feedback into @@ -45,14 +43,13 @@ discussion is actively continuing on the proposal. After posting a major proposal, it is common to receive lots of conflicting feedback from different parties, or no feedback at all, leaving authors without -clear next steps. As a community, we are aiming for `"rough consensus" -`_, similar in spirit to what is -described in `IETF RFC7282 `_. +clear next steps. As a community, we are aiming for ["rough consensus"][rc], +similar in spirit to what is +described in [IETF RFC7282][rfc7282]. This requires considering and addressing all of the objections to the RFC, and confirming that we can all live with the tradeoffs embodied in the proposal. -The LLVM Area Teams (defined in `LP0004 -`_) +The LLVM Area Teams (defined in [LP0004][lp0004]) are responsible for facilitating project decision making. In cases where there isn't obvious agreement, area teams should step in to restate their perceived consensus. In cases of deeper disagreement, area teams should try to identify @@ -60,20 +57,23 @@ the next steps for the proposal, such as gathering more data, changing the proposal, or rejecting it in the absence of major changes in the design or context. They can also act as moderators by scheduling calls for participants to speak directly to resolve disagreements, subject to normal -:ref:`Code of Conduct ` guidelines. +{ref}`Code of Conduct ` guidelines. Once the design of the new feature is finalized, the work itself should be done -as a series of :ref:`incremental changes `, not as a long-term development branch. +as a series of {ref}`incremental changes `, not as a long-term development branch. +[rc]: https://en.wikipedia.org/wiki/Rough_consensus +[rfc7282]: https://datatracker.ietf.org/doc/html/rfc7282 +[lp0004]: https://github.com/llvm/llvm-www/blob/main/proposals/LP0004-project-governance.md + +### Trivial Acceptance or Rejection -Trivial Acceptance or Rejection -------------------------------- Some proposals have obvious consensus (for or against) after discussion in the community. It is acceptable to presume a post which appears to have obvious consensus has been accepted. -Non-trivial Acceptance or Rejection ------------------------------------ +### Non-trivial Acceptance or Rejection + If the proposal does not have obvious consensus after community discussion, a maintainer for each of the impacted parts of the project should explicitly accept or reject the RFC by leaving a comment stating their decision and @@ -81,16 +81,16 @@ possibly detailing any provisions for their acceptance. Overall consensus is determined once a maintainer from each impacted part of the project has accepted the proposal. -Low Engagement Level -~~~~~~~~~~~~~~~~~~~~ +#### Low Engagement Level + If the proposal gets little or no engagement by the community, it is a sign that the proposal does not have consensus and is rejected. Engagement means comments on the proposal. If there are few or no comments but the are a lot of people pressing the like/heart button on the post, the appropriate area team can make a value judgement on whether to accept or reject. -After Acceptance ----------------- +### After Acceptance + Once an RFC has been accepted, the authors may begin merging pull requests related to the proposal. While the RFC process typically makes reviewing the pull requests go more smoothly, the review process may identify additional @@ -98,16 +98,16 @@ necessary changes to the proposal. Minor changes to the proposal do not require an additional RFC. However, if the proposal changes significantly in a material way, the authors may be asked to run another RFC. -After Rejection ---------------- +### After Rejection + Any rejected RFC can be brought back to the community as a new RFC in the future. The new RFC should either clearly identify new information that may change the community's perception of the proposal and/or explicitly address the concerns previously raised by the community. It is helpful to explicitly call out such information in the subsequent RFC. -Suggestions on Getting a Change Accepted ----------------------------------------- +### Suggestions on Getting a Change Accepted + These are some suggestions for how to get a major change accepted: * Make it targeted, and avoid touching components irrelevant to the task. @@ -122,4 +122,4 @@ These are some suggestions for how to get a major change accepted: * Compilers are foundational infrastructure, so there is a high quality bar, and the burden of proof is on the proposer. If reviewers repeatedly ask for an unreasonable amount of evidence or data, proposal authors can escalate to - the area team to resolve disagreements. \ No newline at end of file + the area team to resolve disagreements. diff --git a/llvm/docs/Reference.md b/llvm/docs/Reference.md index 56e367388b1a8..cddcc9056128d 100644 --- a/llvm/docs/Reference.md +++ b/llvm/docs/Reference.md @@ -1,258 +1,243 @@ -Reference -========= +# Reference LLVM and API reference documentation. -.. contents:: - :local: - -.. toctree:: - :hidden: - - HowToUseAttributes - CommandGuide/index - CommandGuide/llvm-reduce - OptBisect - SymbolizerMarkupFormat - PDB/index - GarbageCollection - Statepoints - LibFuzzer - FuzzingLLVM - LangRef - UndefinedBehavior - InAlloca - BitCodeFormat - MIRLangRef - GlobalISel/index - ConvergentOperations - TestingGuide - TestSuiteGuide - GwpAsan - XRay - XRayExample - FaultMaps - Atomics - ExceptionHandling - Extensions - HowToSetUpLLVMStyleRTTI - BlockFrequencyTerminology - BranchWeightMetadata - GetElementPtr - ScudoHardenedAllocator - MemoryModelRelaxationAnnotations - MemTagSanitizer - DependenceGraphs/index - SpeculativeLoadHardening - SegmentedStacks - MarkedUpDisassembly - StackMaps - Coroutines - PointerAuth - YamlIO - ConvergenceAndUniformity - MLGO - ContentAddressableStorage - CIBestPractices - AIToolPolicy - CalleeTypeMetadata - CallGraphSection - InterfaceExportAnnotations - PCSectionsMetadata - QualGroup - Security - SecurityTransparencyReports - SystemLibrary - TransformMetadata - TypeMetadata - XRayFDRFormat - -API Reference -------------- - -`Doxygen generated documentation `_ - (`classes `_) - -:doc:`HowToUseAttributes` - Answers some questions about the new Attributes infrastructure. - -LLVM Reference --------------- - -====================== -Command Line Utilities -====================== - -:doc:`LLVM Command Guide ` - A reference manual for the LLVM command line utilities ("man" pages for LLVM - tools). - -:doc:`llvm-reduce ` - Automatic bug finder and test-case reducer description and usage - information. - -:doc:`OptBisect` - A command line option for debugging optimization-induced failures. - -:doc:`SymbolizerMarkupFormat` - A reference for the log symbolizer markup accepted by ``llvm-symbolizer``. - -:doc:`The Microsoft PDB File Format ` - A detailed description of the Microsoft PDB (Program Database) file format. - -================== -Garbage Collection -================== - -:doc:`GarbageCollection` - The interfaces source-language compilers should use for compiling GC'd - programs. - -:doc:`Statepoints` - This describes a set of experimental extensions for garbage - collection support. - -========= +```{contents} +:local: +``` + +```{toctree} +:hidden: + +HowToUseAttributes +CommandGuide/index +CommandGuide/llvm-reduce +OptBisect +SymbolizerMarkupFormat +PDB/index +GarbageCollection +Statepoints LibFuzzer -========= +FuzzingLLVM +LangRef +UndefinedBehavior +InAlloca +BitCodeFormat +MIRLangRef +GlobalISel/index +ConvergentOperations +TestingGuide +TestSuiteGuide +GwpAsan +XRay +XRayExample +FaultMaps +Atomics +ExceptionHandling +Extensions +HowToSetUpLLVMStyleRTTI +BlockFrequencyTerminology +BranchWeightMetadata +GetElementPtr +ScudoHardenedAllocator +MemoryModelRelaxationAnnotations +MemTagSanitizer +DependenceGraphs/index +SpeculativeLoadHardening +SegmentedStacks +MarkedUpDisassembly +StackMaps +Coroutines +PointerAuth +YamlIO +ConvergenceAndUniformity +MLGO +ContentAddressableStorage +CIBestPractices +AIToolPolicy +CalleeTypeMetadata +CallGraphSection +InterfaceExportAnnotations +PCSectionsMetadata +QualGroup +Security +SecurityTransparencyReports +SystemLibrary +TransformMetadata +TypeMetadata +XRayFDRFormat +``` -:doc:`LibFuzzer` - A library for writing in-process guided fuzzers. +## API Reference -:doc:`FuzzingLLVM` - Information on writing and using Fuzzers to find bugs in LLVM. +[Doxygen generated documentation](https://llvm.org/doxygen/) +: ([classes](https://llvm.org/doxygen/inherits.html)) -======== -LLVM IR -======== +{doc}`HowToUseAttributes` +: Answers some questions about the new Attributes infrastructure. -:doc:`LLVM Language Reference Manual ` - Defines the LLVM intermediate representation and the assembly form of the - different nodes. +## LLVM Reference -:doc:`Undefined Behavior (UB) ` - A guide on what UB/undef/poison are and when to use each one. +### Command Line Utilities -:doc:`InAlloca` - Description of the ``inalloca`` argument attribute. +{doc}`LLVM Command Guide ` +: A reference manual for the LLVM command line utilities ("man" pages for LLVM + tools). -:doc:`BitCodeFormat` - This describes the file format and encoding used for LLVM "bc" files. +{doc}`llvm-reduce ` +: Automatic bug finder and test-case reducer description and usage + information. -:doc:`Machine IR (MIR) Format Reference Manual ` - A reference manual for the MIR serialization format, which is used to test - LLVM's code generation passes. +{doc}`OptBisect` +: A command line option for debugging optimization-induced failures. -:doc:`GlobalISel/index` - This describes the prototype instruction selection replacement, GlobalISel. +{doc}`SymbolizerMarkupFormat` +: A reference for the log symbolizer markup accepted by `llvm-symbolizer`. -:doc:`ConvergentOperations` - Description of ``convergent`` operation semantics and related intrinsics. +{doc}`The Microsoft PDB File Format ` +: A detailed description of the Microsoft PDB (Program Database) file format. -===================== -Testing and Debugging -===================== +### Garbage Collection -:doc:`LLVM Testing Infrastructure Guide ` - A reference manual for using the LLVM testing infrastructure. +{doc}`GarbageCollection` +: The interfaces source-language compilers should use for compiling GC'd + programs. -:doc:`TestSuiteGuide` - Describes how to compile and run the test-suite benchmarks. +{doc}`Statepoints` +: This describes a set of experimental extensions for garbage + collection support. +### LibFuzzer -:doc:`GwpAsan` - A sampled heap memory error detection toolkit designed for production use. +{doc}`LibFuzzer` +: A library for writing in-process guided fuzzers. -==== -XRay -==== +{doc}`FuzzingLLVM` +: Information on writing and using Fuzzers to find bugs in LLVM. + +### LLVM IR + +{doc}`LLVM Language Reference Manual ` +: Defines the LLVM intermediate representation and the assembly form of the + different nodes. + +{doc}`Undefined Behavior (UB) ` +: A guide on what UB/undef/poison are and when to use each one. + +{doc}`InAlloca` +: Description of the `inalloca` argument attribute. + +{doc}`BitCodeFormat` +: This describes the file format and encoding used for LLVM "bc" files. + +{doc}`Machine IR (MIR) Format Reference Manual ` +: A reference manual for the MIR serialization format, which is used to test + LLVM's code generation passes. + +{doc}`GlobalISel/index` +: This describes the prototype instruction selection replacement, GlobalISel. + +{doc}`ConvergentOperations` +: Description of `convergent` operation semantics and related intrinsics. + +### Testing and Debugging + +{doc}`LLVM Testing Infrastructure Guide ` +: A reference manual for using the LLVM testing infrastructure. + +{doc}`TestSuiteGuide` +: Describes how to compile and run the test-suite benchmarks. + + +{doc}`GwpAsan` +: A sampled heap memory error detection toolkit designed for production use. + +### XRay -:doc:`XRay` - High-level documentation of how to use XRay in LLVM. +{doc}`XRay` +: High-level documentation of how to use XRay in LLVM. -:doc:`XRayExample` - An example of how to debug an application with XRay. +{doc}`XRayExample` +: An example of how to debug an application with XRay. -================= -Additional Topics -================= +### Additional Topics -:doc:`FaultMaps` - LLVM support for folding control flow into faulting machine instructions. +{doc}`FaultMaps` +: LLVM support for folding control flow into faulting machine instructions. -:doc:`Atomics` - Information about LLVM's concurrency model. +{doc}`Atomics` +: Information about LLVM's concurrency model. -:doc:`ExceptionHandling` - This document describes the design and implementation of exception handling - in LLVM. +{doc}`ExceptionHandling` +: This document describes the design and implementation of exception handling + in LLVM. -:doc:`Extensions` - LLVM-specific extensions to tools and formats LLVM seeks compatibility with. +{doc}`Extensions` +: LLVM-specific extensions to tools and formats LLVM seeks compatibility with. -:doc:`HowToSetUpLLVMStyleRTTI` - How to make ``isa<>``, ``dyn_cast<>``, etc. available for clients of your - class hierarchy. +{doc}`HowToSetUpLLVMStyleRTTI` +: How to make `isa<>`, `dyn_cast<>`, etc. available for clients of your + class hierarchy. -:doc:`BlockFrequencyTerminology` - Provides information about terminology used in the ``BlockFrequencyInfo`` - analysis pass. +{doc}`BlockFrequencyTerminology` +: Provides information about terminology used in the `BlockFrequencyInfo` + analysis pass. -:doc:`BranchWeightMetadata` - Provides information about Branch Prediction Information. +{doc}`BranchWeightMetadata` +: Provides information about Branch Prediction Information. -:doc:`GetElementPtr` - Answers to some very frequent questions about LLVM's most frequently - misunderstood instruction. +{doc}`GetElementPtr` +: Answers to some very frequent questions about LLVM's most frequently + misunderstood instruction. -:doc:`ScudoHardenedAllocator` - A library that implements a security-hardened `malloc()`. +{doc}`ScudoHardenedAllocator` +: A library that implements a security-hardened `malloc()`. -:doc:`MemoryModelRelaxationAnnotations` - Target-defined relaxation to LLVM's concurrency model. +{doc}`MemoryModelRelaxationAnnotations` +: Target-defined relaxation to LLVM's concurrency model. -:doc:`MemTagSanitizer` - Security hardening for production code aiming to mitigate memory - related vulnerabilities. Based on the Armv8.5-A Memory Tagging Extension. +{doc}`MemTagSanitizer` +: Security hardening for production code aiming to mitigate memory + related vulnerabilities. Based on the Armv8.5-A Memory Tagging Extension. -:doc:`Dependence Graphs ` - A description of the design of the various dependence graphs such as - the DDG (Data Dependence Graph). +{doc}`Dependence Graphs ` +: A description of the design of the various dependence graphs such as + the DDG (Data Dependence Graph). -:doc:`SpeculativeLoadHardening` - A description of the Speculative Load Hardening mitigation for Spectre v1. +{doc}`SpeculativeLoadHardening` +: A description of the Speculative Load Hardening mitigation for Spectre v1. -:doc:`SegmentedStacks` - This document describes segmented stacks and how they are used in LLVM. +{doc}`SegmentedStacks` +: This document describes segmented stacks and how they are used in LLVM. -:doc:`MarkedUpDisassembly` - This document describes the optional rich disassembly output syntax. +{doc}`MarkedUpDisassembly` +: This document describes the optional rich disassembly output syntax. -:doc:`StackMaps` - LLVM support for mapping instruction addresses to the location of - values and allowing code to be patched. +{doc}`StackMaps` +: LLVM support for mapping instruction addresses to the location of + values and allowing code to be patched. -:doc:`Coroutines` - LLVM support for coroutines. +{doc}`Coroutines` +: LLVM support for coroutines. -:doc:`PointerAuth` - A description of pointer authentication, its LLVM IR representation, and its - support in the backend. +{doc}`PointerAuth` +: A description of pointer authentication, its LLVM IR representation, and its + support in the backend. -:doc:`YamlIO` - A reference guide for using LLVM's YAML I/O library. +{doc}`YamlIO` +: A reference guide for using LLVM's YAML I/O library. -:doc:`ConvergenceAndUniformity` - A description of uniformity analysis in the presence of irreducible - control flow, and its implementation. +{doc}`ConvergenceAndUniformity` +: A description of uniformity analysis in the presence of irreducible + control flow, and its implementation. -:doc:`MLGO` - Facilities for ML-Guided Optimization, such as collecting IR corpora from a - build, interfacing with ML models, an exposing features for training. +{doc}`MLGO` +: Facilities for ML-Guided Optimization, such as collecting IR corpora from a + build, interfacing with ML models, an exposing features for training. -:doc:`ContentAddressableStorage` - A reference guide for using LLVM's CAS library. +{doc}`ContentAddressableStorage` +: A reference guide for using LLVM's CAS library. -:doc:`CIBestPractices` - A list of guidelines and best practices to use when working on LLVM's - CI systems. +{doc}`CIBestPractices` +: A list of guidelines and best practices to use when working on LLVM's + CI systems. diff --git a/llvm/docs/SourceLevelDebugging.md b/llvm/docs/SourceLevelDebugging.md index 815fccec7aa40..74bb8840a56e4 100644 --- a/llvm/docs/SourceLevelDebugging.md +++ b/llvm/docs/SourceLevelDebugging.md @@ -1,21 +1,18 @@ -================================ -Source Level Debugging with LLVM -================================ +# Source Level Debugging with LLVM -.. contents:: - :local: +```{contents} +:local: +``` -Introduction -============ +## Introduction This document is the central repository for all information pertaining to debug -information in LLVM. It describes the :ref:`actual format that the LLVM debug +information in LLVM. It describes the {ref}`actual format that the LLVM debug information takes `, which is useful for those interested in creating front-ends or dealing directly with the information. Further, this document provides specific examples of what debug information for C/C++ looks like. -Philosophy behind LLVM debugging information --------------------------------------------- +### Philosophy behind LLVM debugging information The idea of the LLVM debugging information is to capture how the important pieces of the source-language's Abstract Syntax Tree map onto LLVM code. @@ -26,7 +23,7 @@ important ones are: compiler. No transformations, analyses, or code generators should need to be modified because of debugging information. -* LLVM optimizations should interact in :ref:`well-defined and easily described +* LLVM optimizations should interact in {ref}`well-defined and easily described ways ` with the debugging information. * Because LLVM is designed to support arbitrary programming languages, @@ -43,22 +40,20 @@ important ones are: debuggers, like GDB or DBX. The approach used by the LLVM implementation is to use a small set of -:ref:`debug records ` to define a mapping +{ref}`debug records ` to define a mapping between LLVM program objects and the source-level objects. The description of the source-level program is maintained in LLVM metadata in an -:ref:`implementation-defined format ` (the C/C++ front-end -currently uses working draft 7 of the `DWARF 3 standard -`_). +{ref}`implementation-defined format ` (the C/C++ front-end +currently uses working draft 7 of the [DWARF 3 standard](http://www.eagercon.com/dwarf/dwarf3std.htm)). When a program is being debugged, a debugger interacts with the user and turns the stored debug information into source-language specific information. As such, a debugger must be aware of the source-language, and is thus tied to a specific language or family of languages. -.. _intro_consumers: +(intro_consumers)= -Debug information consumers ---------------------------- +### Debug information consumers The role of debug information is to provide meta information normally stripped away during the compilation process. This meta information provides an LLVM @@ -67,20 +62,20 @@ code. Currently, there are two backend consumers of debug info: DwarfDebug and CodeViewDebug. DwarfDebug produces DWARF suitable for use with GDB, LLDB, and -other DWARF-based debuggers. :ref:`CodeViewDebug ` produces CodeView, +other DWARF-based debuggers. {ref}`CodeViewDebug ` produces CodeView, the Microsoft debug info format, which is usable with Microsoft debuggers such as Visual Studio and WinDBG. LLVM's debug information format is mostly derived from and inspired by DWARF, but it is feasible to translate into other target debug info formats such as STABS. -SamplePGO (also known as `AutoFDO `_) +SamplePGO (also known as [AutoFDO](https://gcc.gnu.org/wiki/AutoFDO)) is a variant of profile-guided optimizations which uses hardware sampling based profilers to collect branch frequency data with low overhead in production environments. It relies on debug information to associate profile information with LLVM IR which is then used to guide optimization heuristics. Maintaining deterministic and distinct source locations is necessary to maximize the accuracy of mapping hardware sample counts to LLVM IR. For example, DWARF -`discriminators `_ allow +[discriminators](https://wiki.dwarfstd.org/Path_Discriminators.md) allow SamplePGO to distinguish between multiple paths of execution which map to the same source line. @@ -88,10 +83,9 @@ It would also be reasonable to use debug information to feed profiling tools for analysis of generated code, or, tools for reconstructing the original source from generated code. -.. _intro_debugopt: +(intro_debugopt)= -Debug information and optimizations ------------------------------------ +### Debug information and optimizations An extremely high priority of LLVM debugging information is to make it interact well with optimizations and analysis. In particular, the LLVM debug @@ -99,7 +93,7 @@ information provides the following guarantees: * LLVM debug information **always provides information to accurately read the source-level state of the program**, regardless of which LLVM - optimizations have been run. :doc:`HowToUpdateDebugInfo` specifies how debug + optimizations have been run. {doc}`HowToUpdateDebugInfo` specifies how debug info should be updated in various kinds of code transformations to avoid breaking this guarantee, and how to preserve as much useful debug info as possible. Note that some optimizations may impact the ability to modify the @@ -121,18 +115,17 @@ information provides the following guarantees: is automatically removed. Basically, the debug information allows you to compile a program with -"``-O0 -g``" and get full debug information, allowing you to arbitrarily modify +"`-O0 -g`" and get full debug information, allowing you to arbitrarily modify the program as it executes from a debugger. Compiling a program with -"``-O3 -g``" gives you full debug information that is always available and +"`-O3 -g`" gives you full debug information that is always available and accurate for reading (e.g., you get accurate stack traces despite tail call elimination and inlining), but you might lose the ability to modify the program and call functions which were optimized out of the program, or inlined away completely. -.. _variables_and_variable_fragments: +(variables_and_variable_fragments)= -Variables and Variable Fragments -================================ +## Variables and Variable Fragments In this document "variable" refers generally to any source language object which can have a value, including at least: @@ -141,34 +134,31 @@ which can have a value, including at least: - Constants - Formal parameters -.. note:: - - There is no special provision for "true" constants in LLVM today, and - they are instead treated as local or global variables. - -A variable is represented by a :ref:`local variable ` or -:ref:`global variable ` metadata node. +```{note} +There is no special provision for "true" constants in LLVM today, and +they are instead treated as local or global variables. +``` +A variable is represented by a {ref}`local variable ` or +{ref}`global variable ` metadata node. A "variable fragment" (or just "fragment") is a contiguous span of bits of a variable. -A :ref:`debug record ` which refers to a :ref:`diexpression` -ending with a ``DW_OP_LLVM_fragment`` operation describes a fragment of the +A {ref}`debug record ` which refers to a {ref}`diexpression` +ending with a `DW_OP_LLVM_fragment` operation describes a fragment of the variable it refers to. -The operands of the ``DW_OP_LLVM_fragment`` operation encode the bit offset of +The operands of the `DW_OP_LLVM_fragment` operation encode the bit offset of the fragment relative to the start of the variable, and the size of the fragment in bits, respectively. -.. note:: - - The ``DW_OP_LLVM_fragment`` operation acts only to encode the fragment - information, and does not have an effect on the semantics of the expression. +```{note} +The `DW_OP_LLVM_fragment` operation acts only to encode the fragment +information, and does not have an effect on the semantics of the expression. +``` +(format)= -.. _format: - -Debugging information format -============================ +## Debugging information format LLVM debugging information has been carefully designed to make it possible for the optimizer to optimize the program and debugging information without @@ -191,31 +181,30 @@ debugger to interpret the information. To provide basic functionality, the LLVM debugger does have to make some assumptions about the source-level language being debugged, though it keeps these to a minimum. The only common features that the LLVM debugger assumes -exist are :ref:`source files `, and :ref:`program objects +exist are {ref}`source files `, and {ref}`program objects `. These abstract objects are used by a debugger to form stack traces, show information about local variables, etc. This section of the documentation first describes the representation aspects -common to any source-language. :ref:`ccxx_frontend` describes the data layout +common to any source-language. {ref}`ccxx_frontend` describes the data layout conventions used by the C and C++ front-ends. -Debug information descriptors are :ref:`specialized metadata nodes -`, first-class subclasses of ``Metadata``. +Debug information descriptors are {ref}`specialized metadata nodes +`, first-class subclasses of `Metadata`. There are two models for defining the values of source variables at different states of the program and tracking these values through optimization and code -generation: :ref:`debug records `, the current default, and -:ref:`intrinsic function calls `, which are +generation: {ref}`debug records `, the current default, and +{ref}`intrinsic function calls `, which are non-default but currently supported for backwards compatibility - though these two models must never be mixed within an IR module. For an explanation of why we changed to the new model, how it works, and guidance on how to update old -code or IR to use debug records, see the `RemoveDIs `_ +code or IR to use debug records, see the {doc}`RemoveDIs ` document. -.. _debug_records: +(debug_records)= -Debug Records -------------- +### Debug Records Debug records define the value that a source variable has during execution of the program; they appear interleaved with instructions, although they are not @@ -225,207 +214,185 @@ compiler. LLVM uses several types of debug records to define source variables. The common syntax for these records is: -.. code-block:: llvm - - #dbg_([, ]* ) - ; Using the intrinsic model, the above is equivalent to: - call void llvm.dbg.([metadata , ]*), !dbg - +```llvm + #dbg_([, ]* ) +; Using the intrinsic model, the above is equivalent to: +call void llvm.dbg.([metadata , ]*), !dbg +``` Debug records are always printed with an extra level of indentation compared to instructions, and always have the prefix `#dbg_` and a list of comma-separated arguments in parentheses, as with a `call`. -``#dbg_declare`` -^^^^^^^^^^^^^^^^ - -.. code-block:: llvm - - #dbg_declare([Value|MDNode], DILocalVariable, DIExpression, DILocation) +#### `#dbg_declare` +```llvm +#dbg_declare([Value|MDNode], DILocalVariable, DIExpression, DILocation) +``` This record provides information about a local element (e.g., variable). The -first argument is an SSA ``ptr`` value corresponding to a variable address, and -is typically a static ``alloca`` in the function entry block. The second -argument is a :ref:`local variable ` containing a description -of the variable. The third argument is a :ref:`complex expression -`. The fourth argument is a :ref:`source location `. -A ``#dbg_declare`` record describes the *address* of a source variable. - -.. code-block:: llvm - - %i.addr = alloca i32, align 4 - #dbg_declare(ptr %i.addr, !1, !DIExpression(), !2) - ; ... - !1 = !DILocalVariable(name: "i", ...) ; int i - !2 = !DILocation(...) - ; ... - %buffer = alloca [256 x i8], align 8 - ; The address of i is buffer+64. - #dbg_declare(ptr %buffer, !3, !DIExpression(DW_OP_plus, 64), !4) - ; ... - !3 = !DILocalVariable(name: "i", ...) ; int i - !4 = !DILocation(...) - -A frontend should generate exactly one ``#dbg_declare`` record at the point +first argument is an SSA `ptr` value corresponding to a variable address, and +is typically a static `alloca` in the function entry block. The second +argument is a {ref}`local variable ` containing a description +of the variable. The third argument is a {ref}`complex expression +`. The fourth argument is a {ref}`source location `. +A `#dbg_declare` record describes the *address* of a source variable. + +```llvm +%i.addr = alloca i32, align 4 + #dbg_declare(ptr %i.addr, !1, !DIExpression(), !2) +; ... +!1 = !DILocalVariable(name: "i", ...) ; int i +!2 = !DILocation(...) +; ... +%buffer = alloca [256 x i8], align 8 +; The address of i is buffer+64. + #dbg_declare(ptr %buffer, !3, !DIExpression(DW_OP_plus, 64), !4) +; ... +!3 = !DILocalVariable(name: "i", ...) ; int i +!4 = !DILocation(...) +``` +A frontend should generate exactly one `#dbg_declare` record at the point of declaration of a source variable. Optimization passes that fully promote the variable from memory to SSA values will replace this record with possibly -multiple ``#dbg_value``` records. Passes that delete stores are effectively -partial promotion, and they will insert a mix of ``#dbg_value`` records to +multiple `#dbg_value` records. Passes that delete stores are effectively +partial promotion, and they will insert a mix of `#dbg_value` records to track the source variable value when it is available. After optimization, there -may be multiple ``#dbg_declare`` records describing the program points where +may be multiple `#dbg_declare` records describing the program points where the variables lives in memory. All calls for the same concrete source variable must agree on the memory location. -``#dbg_value`` -^^^^^^^^^^^^^^ - -.. code-block:: llvm - - #dbg_value([Value|DIArgList|MDNode], DILocalVariable, DIExpression, DILocation) +#### `#dbg_value` +```llvm +#dbg_value([Value|DIArgList|MDNode], DILocalVariable, DIExpression, DILocation) +``` This record provides information when a user source variable is set to a new value. The first argument is the new value. The second argument is a -:ref:`local variable ` containing a description of the -variable. The third argument is a :ref:`complex expression `. -The fourth argument is a :ref:`source location `. +{ref}`local variable ` containing a description of the +variable. The third argument is a {ref}`complex expression `. +The fourth argument is a {ref}`source location `. -A ``#dbg_value`` record describes the *value* of a source variable +A `#dbg_value` record describes the *value* of a source variable directly, not its address. Note that the value operand of this intrinsic may be indirect (i.e, a pointer to the source variable), provided that interpreting the complex expression derives the direct value. -``#dbg_declare_value`` -^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: llvm - - #dbg_declare_value([Value|MDNode], DILocalVariable, DIExpression, DILocation) +#### `#dbg_declare_value` +```llvm +#dbg_declare_value([Value|MDNode], DILocalVariable, DIExpression, DILocation) +``` This record provides information about a local element (e.g., variable). The -first argument is used to compute the value of the variable throughout the -entire function. The second argument is a -:ref:`local variable ` containing a description of the -variable. The third argument is a :ref:`complex expression `. The -foruth argument is a :ref:`source location `. A -``#dbg_declare_value`` record describes describes the *value* of a source -variable directly, not its address. The difference between a ``#dbg_value`` and -a ``#dbg_declare_value`` is that, just like a ``#dbg_declare``, a frontend -should generate exactly one ``#dbg_declare_value`` record. The idea is to have -``#dbg_declare`` guarantees but be able to describe a value rather than the +first argument is used to compute the value of the variable throughout the +entire function. The second argument is a +{ref}`local variable ` containing a description of the +variable. The third argument is a {ref}`complex expression `. The +foruth argument is a {ref}`source location `. A +`#dbg_declare_value` record describes describes the *value* of a source +variable directly, not its address. The difference between a `#dbg_value` and +a `#dbg_declare_value` is that, just like a `#dbg_declare`, a frontend +should generate exactly one `#dbg_declare_value` record. The idea is to have +`#dbg_declare` guarantees but be able to describe a value rather than the address of a value. -``#dbg_assign`` -^^^^^^^^^^^^^^^ -.. toctree:: - :hidden: - - AssignmentTracking - -.. code-block:: llvm +#### `#dbg_assign` - #dbg_assign( [Value|DIArgList|MDNode] Value, - DILocalVariable Variable, - DIExpression ValueExpression, - DIAssignID ID, - [Value|MDNode] Address, - DIExpression AddressExpression, - DILocation SourceLocation ) +```{toctree} +:hidden: +AssignmentTracking +``` +```llvm +#dbg_assign( [Value|DIArgList|MDNode] Value, + DILocalVariable Variable, + DIExpression ValueExpression, + DIAssignID ID, + [Value|MDNode] Address, + DIExpression AddressExpression, + DILocation SourceLocation ) +``` This record marks the position in IR where a source assignment occurred. It encodes the value of the variable. It references the store, if any, that performs the assignment, and the destination address. -The first three arguments are the same as for a ``#dbg_value``. The fourth -argument is a ``DIAssignID`` used to reference a store. The fifth is the -destination of the store, the sixth is a :ref:`complex expression -` that modifies it, and the seventh is a :ref:`source location +The first three arguments are the same as for a `#dbg_value`. The fourth +argument is a `DIAssignID` used to reference a store. The fifth is the +destination of the store, the sixth is a {ref}`complex expression +` that modifies it, and the seventh is a {ref}`source location `. -See :doc:`AssignmentTracking` for more info. +See {doc}`AssignmentTracking` for more info. -Debugger intrinsic functions ----------------------------- +### Debugger intrinsic functions -.. warning:: +```{warning} +These intrinsics are deprecated, please use {ref}`debug records +` instead. For more details see {doc}`RemoveDIs `. +``` +(format_common_intrinsics)= - These intrinsics are deprecated, please use :ref:`debug records - ` instead. For more details see `RemoveDIs - `_. - -.. _format_common_intrinsics: - -In intrinsic-mode, LLVM uses several intrinsic functions (name prefixed with "``llvm.dbg``") to +In intrinsic-mode, LLVM uses several intrinsic functions (name prefixed with "`llvm.dbg`") to track source local variables through optimization and code generation. These intrinsic functions each correspond to one of the debug records above, with a few syntactic differences: each argument to a debugger intrinsic must be wrapped -as metadata, meaning it must be prefixed with ``metadata``, and the -``DILocation`` argument in each record must be a metadata attachment to the +as metadata, meaning it must be prefixed with `metadata`, and the +`DILocation` argument in each record must be a metadata attachment to the call instruction, meaning it appears after the argument list with the prefix -``!dbg``. - -``llvm.dbg.declare`` -^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: llvm - - void @llvm.dbg.declare(metadata, metadata, metadata) - -This intrinsic is equivalent to ``#dbg_declare``: - -.. code-block:: llvm - - #dbg_declare(i32* %i.addr, !1, !DIExpression(), !2) - call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !1, - metadata !DIExpression()), !dbg !2 - -``llvm.dbg.value`` -^^^^^^^^^^^^^^^^^^ - -.. code-block:: llvm - - void @llvm.dbg.value(metadata, metadata, metadata) - -This intrinsic is equivalent to ``#dbg_value``: - -.. code-block:: llvm - - #dbg_value(i32 %i, !1, !DIExpression(), !2) - call void @llvm.dbg.value(metadata i32 %i, metadata !1, - metadata !DIExpression()), !dbg !2 - -``llvm.dbg.assign`` -^^^^^^^^^^^^^^^^^^^ - -.. code-block:: llvm - - void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) - -This intrinsic is equivalent to ``#dbg_assign``: - -.. code-block:: llvm - - #dbg_assign(i32 %i, !1, !DIExpression(), !2, - ptr %i.addr, !DIExpression(), !3) - call void @llvm.dbg.assign( - metadata i32 %i, metadata !1, metadata !DIExpression(), metadata !2, - metadata ptr %i.addr, metadata !DIExpression(), metadata !3), !dbg !3 - -.. _diexpression: - -DIExpression ------------- - -Debug expressions are represented as :ref:`specialized-metadata`. +`!dbg`. + +#### `llvm.dbg.declare` + +```llvm +void @llvm.dbg.declare(metadata, metadata, metadata) +``` +This intrinsic is equivalent to `#dbg_declare`: + +```llvm + #dbg_declare(i32* %i.addr, !1, !DIExpression(), !2) +call void @llvm.dbg.declare(metadata i32* %i.addr, metadata !1, + metadata !DIExpression()), !dbg !2 +``` +#### `llvm.dbg.value` + +```llvm +void @llvm.dbg.value(metadata, metadata, metadata) +``` +This intrinsic is equivalent to `#dbg_value`: + +```llvm + #dbg_value(i32 %i, !1, !DIExpression(), !2) +call void @llvm.dbg.value(metadata i32 %i, metadata !1, + metadata !DIExpression()), !dbg !2 +``` +#### `llvm.dbg.assign` + +```llvm +void @llvm.dbg.assign(metadata, metadata, metadata, metadata, metadata, metadata) +``` +This intrinsic is equivalent to `#dbg_assign`: + +```llvm + #dbg_assign(i32 %i, !1, !DIExpression(), !2, + ptr %i.addr, !DIExpression(), !3) +call void @llvm.dbg.assign( + metadata i32 %i, metadata !1, metadata !DIExpression(), metadata !2, + metadata ptr %i.addr, metadata !DIExpression(), metadata !3), !dbg !3 +``` +(diexpression)= + +### DIExpression + +Debug expressions are represented as {ref}`specialized-metadata`. Debug expressions are interpreted left-to-right: start by pushing the value/address operand of the record onto a stack, then repeatedly push and -evaluate opcodes from the ``DIExpression`` until the final variable description +evaluate opcodes from the `DIExpression` until the final variable description is produced. The opcodes available in these expressions are described in -:ref:`dwarf-opcodes` and :ref:`internal-opcodes`. +{ref}`dwarf-opcodes` and {ref}`internal-opcodes`. DWARF specifies three kinds of simple location descriptions: register, memory, and implicit location descriptions. Note that a location description is @@ -434,206 +401,200 @@ change over the course of the program. Register and memory location descriptions describe the *concrete location* of a source variable (in the sense that a debugger might modify its value), whereas *implicit locations* describe merely the actual *value* of a source variable which might not exist -in registers or in memory (see ``DW_OP_stack_value``). +in registers or in memory (see `DW_OP_stack_value`). -A ``#dbg_declare`` record describes an indirect value (the address) of a source +A `#dbg_declare` record describes an indirect value (the address) of a source variable. The first operand of the record must be an address of some kind. A -``DIExpression`` operand to the record refines this address to produce a +`DIExpression` operand to the record refines this address to produce a concrete location for the source variable. -A ``#dbg_value`` record describes the direct value of a source variable. The +A `#dbg_value` record describes the direct value of a source variable. The first operand of the record may be a direct or indirect value. A -``DIExpression`` operand to the record refines the first operand to produce a +`DIExpression` operand to the record refines the first operand to produce a direct value. For example, if the first operand is an indirect value, it may be -necessary to insert ``DW_OP_deref`` into the ``DIExpression`` in order to +necessary to insert `DW_OP_deref` into the `DIExpression` in order to produce a valid debug record. -.. note:: - - A ``DIExpression`` is interpreted in the same way regardless of which kind - of debug record it's attached to. - - ``DIExpression``\s are always printed and parsed inline; they can never be - referenced by an ID (e.g. ``!1``). +```{note} +A `DIExpression` is interpreted in the same way regardless of which kind +of debug record it's attached to. -.. _dwarf-opcodes: +`DIExpression`s are always printed and parsed inline; they can never be +referenced by an ID (e.g. `!1`). +``` +(dwarf-opcodes)= -DWARF Opcodes -^^^^^^^^^^^^^ +#### DWARF Opcodes When possible LLVM reuses DWARF opcodes and gives them identical semantics in LLVM expressions as in DWARF expressions. The current supported opcode vocabulary is limited, but includes at least: -- ``DW_OP_deref`` dereferences the top of the expression stack. -- ``DW_OP_plus`` pops the last two entries from the expression stack, adds +- `DW_OP_deref` dereferences the top of the expression stack. +- `DW_OP_plus` pops the last two entries from the expression stack, adds them together and pushes the result to the expression stack. -- ``DW_OP_minus`` pops the last two entries from the expression stack, subtracts +- `DW_OP_minus` pops the last two entries from the expression stack, subtracts the last entry from the second last entry and appends the result to the expression stack. -- ``DW_OP_plus_uconst, 93`` adds ``93`` to the value on top of the stack. -- ``DW_OP_swap`` swaps top two stack entries. -- ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top +- `DW_OP_plus_uconst, 93` adds `93` to the value on top of the stack. +- `DW_OP_swap` swaps top two stack entries. +- `DW_OP_xderef` provides extended dereference mechanism. The entry at the top of the stack is treated as an address. The second stack entry is treated as an address space identifier. The two entries are popped and then an implementation defined value is pushed on the stack. -- ``DW_OP_stack_value`` may appear at most once in an expression, and must be - the last opcode if ``DW_OP_LLVM_fragment`` is not present, or the second last - opcode if ``DW_OP_LLVM_fragment`` is present. It pops the top value of the +- `DW_OP_stack_value` may appear at most once in an expression, and must be + the last opcode if `DW_OP_LLVM_fragment` is not present, or the second last + opcode if `DW_OP_LLVM_fragment` is present. It pops the top value of the expression stack and makes an implicit value location with that value. -- ``DW_OP_breg`` (or ``DW_OP_bregx``) represents a content on the provided +- `DW_OP_breg` (or `DW_OP_bregx`) represents a content on the provided signed offset of the specified register. The opcode is only generated by the - ``AsmPrinter`` pass to describe call site parameter value which requires an + `AsmPrinter` pass to describe call site parameter value which requires an expression over two registers. -- ``DW_OP_push_object_address`` pushes the address of the object which can then +- `DW_OP_push_object_address` pushes the address of the object which can then serve as a descriptor in subsequent calculation. This opcode can be used to calculate bounds of a Fortran allocatable array which has array descriptors. -- ``DW_OP_over`` duplicates the entry currently second in the stack at the top +- `DW_OP_over` duplicates the entry currently second in the stack at the top of the stack. This opcode can be used to calculate bounds of a Fortran assumed rank array which has rank known at run time and current dimension number is implicitly first element of the stack. -.. _internal-opcodes: +(internal-opcodes)= -Internal Opcodes -^^^^^^^^^^^^^^^^ +#### Internal Opcodes Where the DWARF equivalent is not suitable, or no DWARF equivalent exists, LLVM defines internal-only opcodes which have no direct analog in DWARF. -.. note:: - - Some opcodes do not influence the final DWARF expression directly, instead - encoding information logically belonging to the debug records which use - them. - -- ``DW_OP_LLVM_fragment, , `` may appear at most once in an +```{note} +Some opcodes do not influence the final DWARF expression directly, instead +encoding information logically belonging to the debug records which use +them. +``` +- `DW_OP_LLVM_fragment, , ` may appear at most once in an expression, and must be the last opcode. It specifies the bit offset and bit size of the variable fragment being described by the record or intrinsic - using the expression. Note that contrary to ``DW_OP_bit_piece``, the offset + using the expression. Note that contrary to `DW_OP_bit_piece`, the offset is describing the location within the described source variable. At DWARF generation time all fragments for the same variable are collected together - and DWARF ``DW_OP_piece`` and ``DW_OP_bit_piece`` opcodes are used to + and DWARF `DW_OP_piece` and `DW_OP_bit_piece` opcodes are used to describe a composite with pieces corresponding to the fragments. (This does not affect the semantics of the expression containing it.) -- ``DW_OP_LLVM_convert, 16, DW_ATE_signed`` specifies a bit size and encoding - (``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the - expression stack is to be converted. Maps into a ``DW_OP_convert`` operation +- `DW_OP_LLVM_convert, 16, DW_ATE_signed` specifies a bit size and encoding + (`16` and `DW_ATE_signed` here, respectively) to which the top of the + expression stack is to be converted. Maps into a `DW_OP_convert` operation that references a base type constructed from the supplied values. -- ``DW_OP_LLVM_tag_offset, tag_offset`` specifies that a memory tag should be +- `DW_OP_LLVM_tag_offset, tag_offset` specifies that a memory tag should be optionally applied to the pointer. The memory tag is derived from the given tag offset in an implementation-defined manner. (This does not affect the semantics of the expression containing it.) -- ``DW_OP_LLVM_entry_value, N`` evaluates a sub-expression as-if it were +- `DW_OP_LLVM_entry_value, N` evaluates a sub-expression as-if it were evaluated upon entry to the current call frame. The sub-expression replaces the operations which comprise it, i.e. all such operations are evaluated only in the frame entry context. The sub-expression begins with the operation which immediately precedes - ``DW_OP_LLVM_entry_value, N`` in the ``DIExpression``. If no such operation - exists (i.e. the expression begins with ``DW_OP_LLVM_entry_value, N``), the + `DW_OP_LLVM_entry_value, N` in the `DIExpression`. If no such operation + exists (i.e. the expression begins with `DW_OP_LLVM_entry_value, N`), the implicit operation which pushes the first debug argument of the containing - marker/pseudo is used instead. The value ``N`` must always be at least ``1``, - as this first operation cannot be omitted and is counted in ``N``. + marker/pseudo is used instead. The value `N` must always be at least `1`, + as this first operation cannot be omitted and is counted in `N`. - The rest of the sub-expression comprises the ``(N - 1)`` operations following - ``DW_OP_LLVM_entry_value, N`` in the ``DIExpression``. + The rest of the sub-expression comprises the `(N - 1)` operations following + `DW_OP_LLVM_entry_value, N` in the `DIExpression`. Due to framework limitations: - - ``N`` must not be greater than ``1``. In other words, ``N`` must equal - ``1``, and the sub-expression comprises only the operation immediately - preceding ``DW_OP_LLVM_entry_value, N``. - - ``DW_OP_LLVM_entry_value, N`` must be either the first operation of a - ``DIExpression`` or the second operation if the expression begins with - ``DW_OP_LLVM_arg, 0``. + - `N` must not be greater than `1`. In other words, `N` must equal + `1`, and the sub-expression comprises only the operation immediately + preceding `DW_OP_LLVM_entry_value, N`. + - `DW_OP_LLVM_entry_value, N` must be either the first operation of a + `DIExpression` or the second operation if the expression begins with + `DW_OP_LLVM_arg, 0`. - The first operation must refer to a register value. - Taken together, these limitations mean that ``DW_OP_LLVM_entry_value`` can + Taken together, these limitations mean that `DW_OP_LLVM_entry_value` can only currently be used to push the value a single register had on entry to the current stack frame. - For example, ``!DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_entry_value, 1, - DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)`` specifies an expression - where the entry value of the first argument to the ``DIExpression`` is added + For example, `!DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_entry_value, 1, + DW_OP_LLVM_arg, 1, DW_OP_plus, DW_OP_stack_value)` specifies an expression + where the entry value of the first argument to the `DIExpression` is added to the non-entry value of the second argument, and the result is used as the value for an implicit value location. - When targeting DWARF, a ``DBG_VALUE(reg, ..., - DIExpression(DW_OP_LLVM_entry_value, 1, ...)`` is lowered to - ``DW_OP_entry_value [reg], ...``, which pushes the value ``reg`` had upon + When targeting DWARF, a `DBG_VALUE(reg, ..., + DIExpression(DW_OP_LLVM_entry_value, 1, ...)` is lowered to + `DW_OP_entry_value [reg], ...`, which pushes the value `reg` had upon frame entry onto the DWARF expression stack. - Because ``DW_OP_LLVM_entry_value`` is currently limited to registers, it is + Because `DW_OP_LLVM_entry_value` is currently limited to registers, it is usually used in MIR, but it is also allowed in LLVM IR when targeting a - :ref:`swiftasync ` argument. The operation is introduced by: + {ref}`swiftasync ` argument. The operation is introduced by: - - ``LiveDebugValues`` pass, which applies it to function parameters that + - `LiveDebugValues` pass, which applies it to function parameters that are unmodified throughout the function. Support is limited to simple register location descriptions, or as indirect locations (e.g., parameters passed-by-value to a callee via a pointer to a temporary copy made in the caller). - - ``AsmPrinter`` pass when a call site parameter value - (``DW_AT_call_site_parameter_value``) is represented as entry value of + - `AsmPrinter` pass when a call site parameter value + (`DW_AT_call_site_parameter_value`) is represented as entry value of the parameter. - - ``CoroSplit`` pass, which may move variables from ``alloca``\s into a + - `CoroSplit` pass, which may move variables from `alloca`s into a coroutine frame. If the coroutine frame is a - :ref:`swiftasync ` argument, the variable is described with - an ``DW_OP_LLVM_entry_value`` operation. + {ref}`swiftasync ` argument, the variable is described with + an `DW_OP_LLVM_entry_value` operation. -- ``DW_OP_LLVM_implicit_pointer`` It specifies the dereferenced value. It can +- `DW_OP_LLVM_implicit_pointer` It specifies the dereferenced value. It can be used to represent pointer variables which are optimized out but the value it points to is known. This operator is required as it is different than - DWARF operator ``DW_OP_implicit_pointer`` in representation and specification + DWARF operator `DW_OP_implicit_pointer` in representation and specification (number and types of operands) and later can not be used as multiple level. - Examples using ``DW_OP_LLVM_implicit_pointer``: - - .. code-block:: text - - IR for "*ptr = 4;" - -------------- - #dbg_value(i32 4, !17, !DIExpression(DW_OP_LLVM_implicit_pointer), !20) - !17 = !DILocalVariable(name: "ptr", scope: !12, file: !3, line: 5, - type: !18) - !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) - !19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !20 = !DILocation(line: 10, scope: !12) - - IR for "**ptr = 4;" - -------------- - #dbg_value(i32 4, !17, - !DIExpression(DW_OP_LLVM_implicit_pointer, DW_OP_LLVM_implicit_pointer), - !21) - !17 = !DILocalVariable(name: "ptr", scope: !12, file: !3, line: 5, - type: !18) - !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) - !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) - !20 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - !21 = !DILocation(line: 10, scope: !12) - -- ``DW_OP_LLVM_arg, N`` is used in debug intrinsics that refer to more than one + Examples using `DW_OP_LLVM_implicit_pointer`: + + ```text + IR for "*ptr = 4;" + -------------- + #dbg_value(i32 4, !17, !DIExpression(DW_OP_LLVM_implicit_pointer), !20) + !17 = !DILocalVariable(name: "ptr", scope: !12, file: !3, line: 5, + type: !18) + !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) + !19 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !20 = !DILocation(line: 10, scope: !12) + + IR for "**ptr = 4;" + -------------- + #dbg_value(i32 4, !17, + !DIExpression(DW_OP_LLVM_implicit_pointer, DW_OP_LLVM_implicit_pointer), + !21) + !17 = !DILocalVariable(name: "ptr", scope: !12, file: !3, line: 5, + type: !18) + !18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64) + !19 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) + !20 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) + !21 = !DILocation(line: 10, scope: !12) + ``` +- `DW_OP_LLVM_arg, N` is used in debug intrinsics that refer to more than one value, such as one that calculates the sum of two registers. This is always used in combination with an ordered list of values, such that - ``DW_OP_LLVM_arg, N`` refers to the ``N``\ :sup:`th` element in that list. - For example, ``!DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, - DW_OP_minus, DW_OP_stack_value)`` used with the list ``(%reg1, %reg2)`` would + `DW_OP_LLVM_arg, N` refers to the `N` {sup}`th` element in that list. + For example, `!DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, + DW_OP_minus, DW_OP_stack_value)` used with the list `(%reg1, %reg2)` would evaluate to an implicit value location that has the value of - ``%reg1 - reg2``. This list of values should be provided by the containing + `%reg1 - reg2`. This list of values should be provided by the containing intrinsic/instruction. -- ``DW_OP_LLVM_extract_bits_sext, 16, 8,`` specifies the offset and size - (``16`` and ``8`` here, respectively) of bits that are to be extracted and +- `DW_OP_LLVM_extract_bits_sext, 16, 8,` specifies the offset and size + (`16` and `8` here, respectively) of bits that are to be extracted and sign-extended from the value at the top of the expression stack. If the top of the expression stack is a memory location then these bits are extracted from - the value pointed to by that memory location. Maps into a ``DW_OP_shl`` - followed by ``DW_OP_shra``. -- ``DW_OP_LLVM_extract_bits_zext`` behaves similarly to - ``DW_OP_LLVM_extract_bits_sext``, but zero-extends instead of sign-extending. - Maps into a ``DW_OP_shl`` followed by ``DW_OP_shr``. + the value pointed to by that memory location. Maps into a `DW_OP_shl` + followed by `DW_OP_shra`. +- `DW_OP_LLVM_extract_bits_zext` behaves similarly to + `DW_OP_LLVM_extract_bits_sext`, but zero-extends instead of sign-extending. + Maps into a `DW_OP_shl` followed by `DW_OP_shr`. -Object lifetimes and scoping -============================ +## Object lifetimes and scoping In many languages, the local variables in functions can have their lifetimes or scopes limited to a subset of a function. In the C family of languages, for @@ -647,157 +608,149 @@ In order to handle this, the LLVM debug format uses the metadata attached to LLVM instructions to encode line number and scoping information. Consider the following C fragment, for example: -.. code-block:: c - - 1. void foo() { - 2. int X = 21; - 3. int Y = 22; - 4. { - 5. int Z = 23; - 6. Z = X; - 7. } - 8. X = Y; - 9. } - +```c +1. void foo() { +2. int X = 21; +3. int Y = 22; +4. { +5. int Z = 23; +6. Z = X; +7. } +8. X = Y; +9. } +``` Compiled to LLVM, this function would be represented like this: -.. code-block:: text - - ; Function Attrs: nounwind ssp uwtable - define void @foo() #0 !dbg !4 { - entry: - %X = alloca i32, align 4 - %Y = alloca i32, align 4 - %Z = alloca i32, align 4 - #dbg_declare(ptr %X, !11, !DIExpression(), !13) - store i32 21, i32* %X, align 4, !dbg !13 - #dbg_declare(ptr %Y, !14, !DIExpression(), !15) - store i32 22, i32* %Y, align 4, !dbg !15 - #dbg_declare(ptr %Z, !16, !DIExpression(), !18) - store i32 23, i32* %Z, align 4, !dbg !18 - %0 = load i32, i32* %X, align 4, !dbg !20 - store i32 %0, i32* %Z, align 4, !dbg !21 - %1 = load i32, i32* %Y, align 4, !dbg !22 - store i32 %1, i32* %X, align 4, !dbg !23 - ret void, !dbg !24 - } - - attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } - attributes #1 = { nounwind readnone } - - !llvm.dbg.cu = !{!0} - !llvm.module.flags = !{!7, !8, !9} - !llvm.ident = !{!10} - - !0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) - !1 = !DIFile(filename: "/dev/stdin", directory: "/Users/dexonsmith/data/llvm/debug-info") - !2 = !{} - !3 = !{!4} - !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, retainedNodes: !2) - !5 = !DISubroutineType(types: !6) - !6 = !{null} - !7 = !{i32 2, !"Dwarf Version", i32 2} - !8 = !{i32 2, !"Debug Info Version", i32 3} - !9 = !{i32 1, !"PIC Level", i32 2} - !10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"} - !11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12) - !12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) - !13 = !DILocation(line: 2, column: 9, scope: !4) - !14 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12) - !15 = !DILocation(line: 3, column: 9, scope: !4) - !16 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12) - !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) - !18 = !DILocation(line: 5, column: 11, scope: !17) - !29 = !DILocation(line: 6, column: 11, scope: !17) - !20 = !DILocation(line: 6, column: 9, scope: !17) - !21 = !DILocation(line: 8, column: 9, scope: !4) - !22 = !DILocation(line: 8, column: 7, scope: !4) - !23 = !DILocation(line: 9, column: 3, scope: !4) - - +```text +; Function Attrs: nounwind ssp uwtable +define void @foo() #0 !dbg !4 { +entry: + %X = alloca i32, align 4 + %Y = alloca i32, align 4 + %Z = alloca i32, align 4 + #dbg_declare(ptr %X, !11, !DIExpression(), !13) + store i32 21, i32* %X, align 4, !dbg !13 + #dbg_declare(ptr %Y, !14, !DIExpression(), !15) + store i32 22, i32* %Y, align 4, !dbg !15 + #dbg_declare(ptr %Z, !16, !DIExpression(), !18) + store i32 23, i32* %Z, align 4, !dbg !18 + %0 = load i32, i32* %X, align 4, !dbg !20 + store i32 %0, i32* %Z, align 4, !dbg !21 + %1 = load i32, i32* %Y, align 4, !dbg !22 + store i32 %1, i32* %X, align 4, !dbg !23 + ret void, !dbg !24 +} + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8, !9} +!llvm.ident = !{!10} + +!0 = !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2) +!1 = !DIFile(filename: "/dev/stdin", directory: "/Users/dexonsmith/data/llvm/debug-info") +!2 = !{} +!3 = !{!4} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !{i32 2, !"Dwarf Version", i32 2} +!8 = !{i32 2, !"Debug Info Version", i32 3} +!9 = !{i32 1, !"PIC Level", i32 2} +!10 = !{!"clang version 3.7.0 (trunk 231150) (llvm/trunk 231154)"} +!11 = !DILocalVariable(name: "X", scope: !4, file: !1, line: 2, type: !12) +!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!13 = !DILocation(line: 2, column: 9, scope: !4) +!14 = !DILocalVariable(name: "Y", scope: !4, file: !1, line: 3, type: !12) +!15 = !DILocation(line: 3, column: 9, scope: !4) +!16 = !DILocalVariable(name: "Z", scope: !18, file: !1, line: 5, type: !12) +!17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 5, column: 11, scope: !17) +!29 = !DILocation(line: 6, column: 11, scope: !17) +!20 = !DILocation(line: 6, column: 9, scope: !17) +!21 = !DILocation(line: 8, column: 9, scope: !4) +!22 = !DILocation(line: 8, column: 7, scope: !4) +!23 = !DILocation(line: 9, column: 3, scope: !4) + +``` This example illustrates a few important details about LLVM debugging -information. In particular, it shows how the ``#dbg_declare`` record and +information. In particular, it shows how the `#dbg_declare` record and location information, which are attached to an instruction, are applied together to allow a debugger to analyze the relationship between statements, variable definitions, and the code used to implement the function. -.. code-block:: llvm - - #dbg_declare(ptr %X, !11, !DIExpression(), !13) - ; [debug line = 2:9] [debug variable = X] - -The first record ``#dbg_declare`` encodes debugging information for the -variable ``X``. The location ``!13`` at the end of the record provides -scope information for the variable ``X``. - -.. code-block:: text - - !13 = !DILocation(line: 2, column: 9, scope: !4) - !4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, - isLocal: false, isDefinition: true, scopeLine: 1, - isOptimized: false, retainedNodes: !2) - -Here ``!13`` is metadata providing :ref:`location information `. -In this example, scope is encoded by ``!4``, a :ref:`subprogram descriptor +```llvm +#dbg_declare(ptr %X, !11, !DIExpression(), !13) +; [debug line = 2:9] [debug variable = X] +``` +The first record `#dbg_declare` encodes debugging information for the +variable `X`. The location `!13` at the end of the record provides +scope information for the variable `X`. + +```text +!13 = !DILocation(line: 2, column: 9, scope: !4) +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, + isLocal: false, isDefinition: true, scopeLine: 1, + isOptimized: false, retainedNodes: !2) +``` +Here `!13` is metadata providing {ref}`location information `. +In this example, scope is encoded by `!4`, a {ref}`subprogram descriptor `. This way the location information parameter to the records -indicates that the variable ``X`` is declared at line number 2 at a function -level scope in function ``foo``. +indicates that the variable `X` is declared at line number 2 at a function +level scope in function `foo`. Now, let's take another example. -.. code-block:: llvm - - #dbg_declare(ptr %Z, !16, !DIExpression(), !18) - ; [debug line = 5:11] [debug variable = Z] - -The third record ``#dbg_declare`` encodes debugging information for -variable ``Z``. The metadata ``!18`` at the end of the record provides -scope information for the variable ``Z``. - -.. code-block:: text - - !17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) - !18 = !DILocation(line: 5, column: 11, scope: !17) - -Here ``!18`` indicates that ``Z`` is declared at line number 5 and column -number 11 inside of lexical scope ``!17``. The lexical scope itself resides -inside of subprogram ``!4`` described above. +```llvm +#dbg_declare(ptr %Z, !16, !DIExpression(), !18) +; [debug line = 5:11] [debug variable = Z] +``` +The third record `#dbg_declare` encodes debugging information for +variable `Z`. The metadata `!18` at the end of the record provides +scope information for the variable `Z`. + +```text +!17 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 5) +!18 = !DILocation(line: 5, column: 11, scope: !17) +``` +Here `!18` indicates that `Z` is declared at line number 5 and column +number 11 inside of lexical scope `!17`. The lexical scope itself resides +inside of subprogram `!4` described above. The scope information attached to each instruction provides a straightforward way to find instructions covered by a scope. -Object lifetime in optimized code -================================= +## Object lifetime in optimized code In the example above, every variable assignment uniquely corresponds to a memory store to the variable's position on the stack. However, in heavily optimized code LLVM promotes most variables into SSA values, which can eventually be placed in physical registers or memory locations. To track SSA values through compilation, when objects are promoted to SSA values a -``#dbg_value`` record is created for each assignment, recording the -variable's new location. Compared with the ``#dbg_declare`` record: +`#dbg_value` record is created for each assignment, recording the +variable's new location. Compared with the `#dbg_declare` record: -* A ``#dbg_value`` terminates the effects that any preceding records have on +* A `#dbg_value` terminates the effects that any preceding records have on any common bits of a common variable. - .. note:: - - The current implementation generally terminates the effect of every - record in its entirety if any of its effects would be terminated, rather - than carrying forward the effect of previous records for non-overlapping - bits as it would be permitted to do by this definition. This is allowed - just as dropping any debug information at any point in the compilation is - allowed. - - One exception to this is :doc:`AssignmentTracking` where certain - memory-based locations are carried forward partially in some situations. - -* The ``#dbg_value``'s position in the IR defines where in the instruction + ```{note} + The current implementation generally terminates the effect of every + record in its entirety if any of its effects would be terminated, rather + than carrying forward the effect of previous records for non-overlapping + bits as it would be permitted to do by this definition. This is allowed + just as dropping any debug information at any point in the compilation is + allowed. + + One exception to this is {doc}`AssignmentTracking` where certain + memory-based locations are carried forward partially in some situations. + ``` +* The `#dbg_value`'s position in the IR defines where in the instruction stream the variable's value changes. * Operands can be constants, indicating the variable is assigned a constant value. -Care must be taken to update ``#dbg_value`` records when optimization +Care must be taken to update `#dbg_value` records when optimization passes alter or move instructions and blocks -- the developer could observe such changes reflected in the value of variables when debugging the program. For any execution of the optimized program, the set of variable values presented to the @@ -808,128 +761,123 @@ damaging their understanding of the optimized program and undermining their trust in the debugger. Sometimes perfectly preserving variable locations is not possible, often when a -redundant calculation is optimized out. In such cases, a ``#dbg_value`` -with operand ``poison`` should be used, to terminate earlier variable locations -and let the debugger present ``optimized out`` to the developer. Withholding +redundant calculation is optimized out. In such cases, a `#dbg_value` +with operand `poison` should be used, to terminate earlier variable locations +and let the debugger present `optimized out` to the developer. Withholding these potentially stale variable values from the developer diminishes the amount of available debug information, but increases the reliability of the remaining information. To illustrate some potential issues, consider the following example: -.. code-block:: llvm - - define i32 @foo(i32 %bar, i1 %cond) { - entry: - #dbg_value(i32 0, !1, !DIExpression(), !4) - br i1 %cond, label %truebr, label %falsebr - truebr: - %tval = add i32 %bar, 1 - #dbg_value(i32 %tval, !1, !DIExpression(), !4) - %g1 = call i32 @gazonk() - br label %exit - falsebr: - %fval = add i32 %bar, 2 - #dbg_value(i32 %fval, !1, !DIExpression(), !4) - %g2 = call i32 @gazonk() - br label %exit - exit: - %merge = phi [ %tval, %truebr ], [ %fval, %falsebr ] - %g = phi [ %g1, %truebr ], [ %g2, %falsebr ] - #dbg_value(i32 %merge, !1, !DIExpression(), !4) - #dbg_value(i32 %g, !3, !DIExpression(), !4) - %plusten = add i32 %merge, 10 - %toret = add i32 %plusten, %g - #dbg_value(i32 %toret, !1, !DIExpression(), !4) - ret i32 %toret - } - -Containing two source-level variables in ``!1`` and ``!3``. The function could, +```llvm +define i32 @foo(i32 %bar, i1 %cond) { +entry: + #dbg_value(i32 0, !1, !DIExpression(), !4) + br i1 %cond, label %truebr, label %falsebr +truebr: + %tval = add i32 %bar, 1 + #dbg_value(i32 %tval, !1, !DIExpression(), !4) + %g1 = call i32 @gazonk() + br label %exit +falsebr: + %fval = add i32 %bar, 2 + #dbg_value(i32 %fval, !1, !DIExpression(), !4) + %g2 = call i32 @gazonk() + br label %exit +exit: + %merge = phi [ %tval, %truebr ], [ %fval, %falsebr ] + %g = phi [ %g1, %truebr ], [ %g2, %falsebr ] + #dbg_value(i32 %merge, !1, !DIExpression(), !4) + #dbg_value(i32 %g, !3, !DIExpression(), !4) + %plusten = add i32 %merge, 10 + %toret = add i32 %plusten, %g + #dbg_value(i32 %toret, !1, !DIExpression(), !4) + ret i32 %toret +} +``` +Containing two source-level variables in `!1` and `!3`. The function could, perhaps, be optimized into the following code: -.. code-block:: llvm - - define i32 @foo(i32 %bar, i1 %cond) { - entry: - %g = call i32 @gazonk() - %addoper = select i1 %cond, i32 11, i32 12 - %plusten = add i32 %bar, %addoper - %toret = add i32 %plusten, %g - ret i32 %toret - } - -What ``#dbg_value`` records should be placed to represent the original variable +```llvm +define i32 @foo(i32 %bar, i1 %cond) { +entry: + %g = call i32 @gazonk() + %addoper = select i1 %cond, i32 11, i32 12 + %plusten = add i32 %bar, %addoper + %toret = add i32 %plusten, %g + ret i32 %toret +} +``` +What `#dbg_value` records should be placed to represent the original variable locations in this code? Unfortunately the second, third, and fourth -``#dbg_value``\s for ``!1`` in the source function have had their operands -(``%tval``, ``%fval``, ``%merge``) optimized out. Assuming we cannot recover -them, we might consider this placement of ``#dbg_value``\s: - -.. code-block:: llvm - - define i32 @foo(i32 %bar, i1 %cond) { - entry: - #dbg_value(i32 0, !1, !DIExpression(), !4) - %g = call i32 @gazonk() - #dbg_value(i32 %g, !3, !DIExpression(), !4) - %addoper = select i1 %cond, i32 11, i32 12 - %plusten = add i32 %bar, %addoper - %toret = add i32 %plusten, %g - #dbg_value(i32 %toret, !1, !DIExpression(), !4) - ret i32 %toret - } - -However, this will cause ``!3`` to have the return value of ``@gazonk()`` at -the same time as ``!1`` has the constant value zero -- a pair of assignments +`#dbg_value`s for `!1` in the source function have had their operands +(`%tval`, `%fval`, `%merge`) optimized out. Assuming we cannot recover +them, we might consider this placement of `#dbg_value`s: + +```llvm +define i32 @foo(i32 %bar, i1 %cond) { +entry: + #dbg_value(i32 0, !1, !DIExpression(), !4) + %g = call i32 @gazonk() + #dbg_value(i32 %g, !3, !DIExpression(), !4) + %addoper = select i1 %cond, i32 11, i32 12 + %plusten = add i32 %bar, %addoper + %toret = add i32 %plusten, %g + #dbg_value(i32 %toret, !1, !DIExpression(), !4) + ret i32 %toret +} +``` +However, this will cause `!3` to have the return value of `@gazonk()` at +the same time as `!1` has the constant value zero -- a pair of assignments that never occurred in the unoptimized program. To avoid this, we must terminate -the range that ``!1`` has the constant value assignment by inserting an poison -``#dbg_value`` before the ``#dbg_value`` for ``!3``: - -.. code-block:: llvm - - define i32 @foo(i32 %bar, i1 %cond) { - entry: - #dbg_value(i32 0, !1, !DIExpression(), !2) - %g = call i32 @gazonk() - #dbg_value(i32 poison, !1, !DIExpression(), !2) - #dbg_value(i32 %g, !3, !DIExpression(), !2) - %addoper = select i1 %cond, i32 11, i32 12 - %plusten = add i32 %bar, %addoper - %toret = add i32 %plusten, %g - #dbg_value(i32 %toret, !1, !DIExpression(), !2) - ret i32 %toret - } - -There are a few other ``#dbg_value`` configurations that mean it terminates +the range that `!1` has the constant value assignment by inserting an poison +`#dbg_value` before the `#dbg_value` for `!3`: + +```llvm +define i32 @foo(i32 %bar, i1 %cond) { +entry: + #dbg_value(i32 0, !1, !DIExpression(), !2) + %g = call i32 @gazonk() + #dbg_value(i32 poison, !1, !DIExpression(), !2) + #dbg_value(i32 %g, !3, !DIExpression(), !2) + %addoper = select i1 %cond, i32 11, i32 12 + %plusten = add i32 %bar, %addoper + %toret = add i32 %plusten, %g + #dbg_value(i32 %toret, !1, !DIExpression(), !2) + ret i32 %toret +} +``` +There are a few other `#dbg_value` configurations that mean it terminates dominating location definitions without adding a new location. The complete list is: -* Any location operand is ``poison`` (or ``undef``). -* Any location operand is an empty metadata tuple (``!{}``) (which cannot - occur in a ``!DIArgList``). -* There are no location operands (empty ``DIArgList``) and the ``DIExpression`` +* Any location operand is `poison` (or `undef`). +* Any location operand is an empty metadata tuple (`!{}`) (which cannot + occur in a `!DIArgList`). +* There are no location operands (empty `DIArgList`) and the `DIExpression` is empty. -This class of ``#dbg_value`` that kills variable locations is called a "kill -``#dbg_value``" or "kill location", and for legacy reasons the term "``undef -#dbg_value``" may be used in existing code. The ``DbgVariableIntrinsic`` -methods ``isKillLocation`` and ``setKillLocation`` should be used where +This class of `#dbg_value` that kills variable locations is called a "kill +`#dbg_value`" or "kill location", and for legacy reasons the term "`undef +#dbg_value`" may be used in existing code. The `DbgVariableIntrinsic` +methods `isKillLocation` and `setKillLocation` should be used where possible rather than inspecting location operands directly to check or set -whether a ``#dbg_value`` is a kill location. +whether a `#dbg_value` is a kill location. -In general, if any ``#dbg_value`` has its operand optimized out and cannot be -recovered, then a kill ``#dbg_value`` is necessary to terminate earlier -variable locations. Additional kill ``#dbg_values`` may be necessary when the +In general, if any `#dbg_value` has its operand optimized out and cannot be +recovered, then a kill `#dbg_value` is necessary to terminate earlier +variable locations. Additional kill `#dbg_values` may be necessary when the debugger can observe re-ordering of assignments. -How variable location metadata is transformed during CodeGen -============================================================ +## How variable location metadata is transformed during CodeGen LLVM preserves debug information throughout mid-level and backend passes, ultimately producing a mapping between source-level information and instruction ranges. This is relatively straightforward for line number information, as mapping instructions to line numbers is a simple association. For variable locations -however the story is more complex. As each ``#dbg_value`` record +however the story is more complex. As each `#dbg_value` record represents a source-level assignment of a value to a source variable, the debug records effectively embed a small imperative program within the LLVM IR. By the end of CodeGen, this becomes a mapping from each @@ -946,19 +894,18 @@ significantly change the ordering of the program, and occurs in a number of different passes. Some variable locations are not transformed during CodeGen. Stack locations -specified by ``#dbg_declare`` are valid and unchanging for the entire duration -of the function, and are recorded in a simple ``MachineFunction`` table. +specified by `#dbg_declare` are valid and unchanging for the entire duration +of the function, and are recorded in a simple `MachineFunction` table. Location changes in the prologue and epilogue of a function are also ignored: frame setup and destruction may take several instructions, require a disproportionate amount of debugging information in the output binary to describe, and should be stepped over by debuggers anyway. -Variable locations in Instruction Selection and MIR ---------------------------------------------------- +### Variable locations in Instruction Selection and MIR Instruction selection creates a MIR function from an IR function, and just as -it transforms ``intermediate`` instructions into machine instructions, so must -``intermediate`` variable locations become machine variable locations. Within +it transforms `intermediate` instructions into machine instructions, so must +`intermediate` variable locations become machine variable locations. Within IR, variable locations are always identified by a Value, but in MIR there can be different types of variable locations. In addition, some IR locations become unavailable, for example if the operation of multiple IR instructions are @@ -966,58 +913,56 @@ combined into one machine instruction (such as multiply-and-accumulate) then intermediate Values are lost. To track variable locations through instruction selection, they are first separated into locations that do not depend on code generation (constants, stack locations, allocated virtual registers) and those -that do. For those that do, debug metadata is attached to ``SDNode``\s in -``SelectionDAG``\s. After instruction selection has occurred and a MIR function -is created, if the ``SDNode`` associated with debug metadata is allocated a +that do. For those that do, debug metadata is attached to `SDNode`s in +`SelectionDAG`s. After instruction selection has occurred and a MIR function +is created, if the `SDNode` associated with debug metadata is allocated a virtual register, that virtual register is used as the variable location. If -the ``SDNode`` is folded into a machine instruction or otherwise transformed +the `SDNode` is folded into a machine instruction or otherwise transformed into a non-register, the variable location becomes unavailable. Locations that are unavailable are treated as if they have been optimized out: -in IR the location would be assigned ``undef`` by a debug record, and in MIR +in IR the location would be assigned `undef` by a debug record, and in MIR the equivalent location is used. After MIR locations are assigned to each variable, machine pseudo-instructions -corresponding to each ``#dbg_value`` record are inserted. There are two +corresponding to each `#dbg_value` record are inserted. There are two forms of this type of instruction. -The first form, ``DBG_VALUE``, appears thus: - -.. code-block:: text - - DBG_VALUE %1, $noreg, !123, !DIExpression() +The first form, `DBG_VALUE`, appears thus: +```text +DBG_VALUE %1, $noreg, !123, !DIExpression() +``` And has the following operands: * The first operand can record the variable location as a register, a frame index, an immediate, or the base address register if the original - debug record referred to memory. ``$noreg`` indicates the variable - location is undefined, equivalent to an ``undef #dbg_value`` operand. + debug record referred to memory. `$noreg` indicates the variable + location is undefined, equivalent to an `undef #dbg_value` operand. * The type of the second operand indicates whether the variable location is - directly referred to by the ``DBG_VALUE``, or whether it is indirect. The - ``$noreg`` register signifies the former, an immediate operand (0) the + directly referred to by the `DBG_VALUE`, or whether it is indirect. The + `$noreg` register signifies the former, an immediate operand (0) the latter. * Operand 3 is the Variable field of the original debug record. * Operand 4 is the Expression field of the original debug record. -The second form, ``DBG_VALUE_LIST``, appears thus: - -.. code-block:: text - - DBG_VALUE_LIST !123, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus), %1, %2 +The second form, `DBG_VALUE_LIST`, appears thus: +```text +DBG_VALUE_LIST !123, !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_LLVM_arg, 1, DW_OP_plus), %1, %2 +``` And has the following operands: * The first operand is the Variable field of the original debug record. * The second operand is the Expression field of the original debug record. * Any number of operands, from the 3rd onwards, record a sequence of variable location operands, which may take any of the same values as the first - operand of the ``DBG_VALUE`` instruction above. These variable location + operand of the `DBG_VALUE` instruction above. These variable location operands are inserted into the final DWARF Expression in positions indicated - by the ``DW_OP_LLVM_arg`` operator in the :ref:`diexpression`. + by the `DW_OP_LLVM_arg` operator in the {ref}`diexpression`. -The position at which the ``DBG_VALUE``\s are inserted should correspond to the -positions of their matching ``#dbg_value`` records in the IR block. As with +The position at which the `DBG_VALUE`s are inserted should correspond to the +positions of their matching `#dbg_value` records in the IR block. As with optimization, LLVM aims to preserve the order in which variable assignments -occurred in the source program. However, ``SelectionDAG`` performs some +occurred in the source program. However, `SelectionDAG` performs some instruction scheduling, which can reorder assignments (discussed below). Function parameter locations are moved to the beginning of the function if they're not already, to ensure they're immediately available on function entry. @@ -1025,82 +970,79 @@ they're not already, to ensure they're immediately available on function entry. To demonstrate variable locations during instruction selection, consider the following example: -.. code-block:: llvm - - define i32 @foo(i32* %addr) { - entry: - #dbg_value(i32 0, !3, !DIExpression(), !5) - br label %bb1, !dbg !5 - - bb1: ; preds = %bb1, %entry - %bar.0 = phi i32 [ 0, %entry ], [ %add, %bb1 ] - #dbg_value(i32 %bar.0, !3, !DIExpression(), !5) - %addr1 = getelementptr i32, i32 *%addr, i32 1, !dbg !5 - #dbg_value(i32 *%addr1, !3, !DIExpression(), !5) - %loaded1 = load i32, i32* %addr1, !dbg !5 - %addr2 = getelementptr i32, i32 *%addr, i32 %bar.0, !dbg !5 - #dbg_value(i32 *%addr2, !3, !DIExpression(), !5) - %loaded2 = load i32, i32* %addr2, !dbg !5 - %add = add i32 %bar.0, 1, !dbg !5 - #dbg_value(i32 %add, !3, !DIExpression(), !5) - %added = add i32 %loaded1, %loaded2 - %cond = icmp ult i32 %added, %bar.0, !dbg !5 - br i1 %cond, label %bb1, label %bb2, !dbg !5 - - bb2: ; preds = %bb1 - ret i32 0, !dbg !5 - } - -If one compiles this IR with ``llc -o - -start-after=codegen-prepare -stop-after=expand-isel-pseudos -mtriple=x86_64--``, the following MIR is produced: - -.. code-block:: text - - bb.0.entry: - successors: %bb.1(0x80000000) - liveins: $rdi - - %2:gr64 = COPY $rdi - %3:gr32 = MOV32r0 implicit-def dead $eflags - DBG_VALUE 0, $noreg, !3, !DIExpression(), debug-location !5 - - bb.1.bb1: - successors: %bb.1(0x7c000000), %bb.2(0x04000000) - - %0:gr32 = PHI %3, %bb.0, %1, %bb.1 - DBG_VALUE %0, $noreg, !3, !DIExpression(), debug-location !5 - DBG_VALUE %2, $noreg, !3, !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), debug-location !5 - %4:gr32 = MOV32rm %2, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) - %5:gr64_nosp = MOVSX64rr32 %0, debug-location !5 - DBG_VALUE $noreg, $noreg, !3, !DIExpression(), debug-location !5 - %1:gr32 = INC32r %0, implicit-def dead $eflags, debug-location !5 - DBG_VALUE %1, $noreg, !3, !DIExpression(), debug-location !5 - %6:gr32 = ADD32rm %4, %2, 4, killed %5, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.addr2) - %7:gr32 = SUB32rr %6, %0, implicit-def $eflags, debug-location !5 - JB_1 %bb.1, implicit $eflags, debug-location !5 - JMP_1 %bb.2, debug-location !5 - - bb.2.bb2: - %8:gr32 = MOV32r0 implicit-def dead $eflags - $eax = COPY %8, debug-location !5 - RET 0, $eax, debug-location !5 - -Observe first that there is a ``DBG_VALUE`` instruction for every ``#dbg_value`` +```llvm +define i32 @foo(i32* %addr) { +entry: + #dbg_value(i32 0, !3, !DIExpression(), !5) + br label %bb1, !dbg !5 + +bb1: ; preds = %bb1, %entry + %bar.0 = phi i32 [ 0, %entry ], [ %add, %bb1 ] + #dbg_value(i32 %bar.0, !3, !DIExpression(), !5) + %addr1 = getelementptr i32, i32 *%addr, i32 1, !dbg !5 + #dbg_value(i32 *%addr1, !3, !DIExpression(), !5) + %loaded1 = load i32, i32* %addr1, !dbg !5 + %addr2 = getelementptr i32, i32 *%addr, i32 %bar.0, !dbg !5 + #dbg_value(i32 *%addr2, !3, !DIExpression(), !5) + %loaded2 = load i32, i32* %addr2, !dbg !5 + %add = add i32 %bar.0, 1, !dbg !5 + #dbg_value(i32 %add, !3, !DIExpression(), !5) + %added = add i32 %loaded1, %loaded2 + %cond = icmp ult i32 %added, %bar.0, !dbg !5 + br i1 %cond, label %bb1, label %bb2, !dbg !5 + +bb2: ; preds = %bb1 + ret i32 0, !dbg !5 +} +``` +If one compiles this IR with `llc -o - -start-after=codegen-prepare -stop-after=expand-isel-pseudos -mtriple=x86_64--`, the following MIR is produced: + +```text +bb.0.entry: + successors: %bb.1(0x80000000) + liveins: $rdi + + %2:gr64 = COPY $rdi + %3:gr32 = MOV32r0 implicit-def dead $eflags + DBG_VALUE 0, $noreg, !3, !DIExpression(), debug-location !5 + +bb.1.bb1: + successors: %bb.1(0x7c000000), %bb.2(0x04000000) + + %0:gr32 = PHI %3, %bb.0, %1, %bb.1 + DBG_VALUE %0, $noreg, !3, !DIExpression(), debug-location !5 + DBG_VALUE %2, $noreg, !3, !DIExpression(DW_OP_plus_uconst, 4, DW_OP_stack_value), debug-location !5 + %4:gr32 = MOV32rm %2, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) + %5:gr64_nosp = MOVSX64rr32 %0, debug-location !5 + DBG_VALUE $noreg, $noreg, !3, !DIExpression(), debug-location !5 + %1:gr32 = INC32r %0, implicit-def dead $eflags, debug-location !5 + DBG_VALUE %1, $noreg, !3, !DIExpression(), debug-location !5 + %6:gr32 = ADD32rm %4, %2, 4, killed %5, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.addr2) + %7:gr32 = SUB32rr %6, %0, implicit-def $eflags, debug-location !5 + JB_1 %bb.1, implicit $eflags, debug-location !5 + JMP_1 %bb.2, debug-location !5 + +bb.2.bb2: + %8:gr32 = MOV32r0 implicit-def dead $eflags + $eax = COPY %8, debug-location !5 + RET 0, $eax, debug-location !5 +``` +Observe first that there is a `DBG_VALUE` instruction for every `#dbg_value` record in the source IR, ensuring no source level assignments go missing. Then consider the different ways in which variable locations have been recorded: -* For the first ``#dbg_value`` an immediate operand is used to record a zero value. -* The ``#dbg_value`` of the PHI instruction leads to a ``DBG_VALUE`` of virtual register - ``%0``. +* For the first `#dbg_value` an immediate operand is used to record a zero value. +* The `#dbg_value` of the PHI instruction leads to a `DBG_VALUE` of virtual register + `%0`. * The first GEP has its effect folded into the first load instruction (as a 4-byte offset), but the variable location is salvaged by folding - the GEPs effect into the ``DIExpression``. + the GEPs effect into the `DIExpression`. * The second GEP is also folded into the corresponding load. However, it is - insufficiently simple to be salvaged, and is emitted as a ``$noreg`` - ``DBG_VALUE``, indicating that the variable takes on an undefined location. -* The final ``#dbg_value`` has its Value placed in virtual register ``%1``. + insufficiently simple to be salvaged, and is emitted as a `$noreg` + `DBG_VALUE`, indicating that the variable takes on an undefined location. +* The final `#dbg_value` has its Value placed in virtual register `%1`. -Instruction Scheduling ----------------------- +### Instruction Scheduling A number of passes can reschedule instructions, notably instruction selection and the pre-and-post RA machine schedulers. Instruction scheduling can @@ -1109,83 +1051,78 @@ case the instruction sequence could be completely reversed. In such circumstances LLVM follows the principle applied to optimizations, that it is better for the debugger not to display any state than a misleading state. Thus, whenever instructions are advanced in order of execution, any -corresponding ``DBG_VALUE`` is kept in its original position, and if an instruction +corresponding `DBG_VALUE` is kept in its original position, and if an instruction is delayed then the variable is given an undefined location for the duration of the delay. To illustrate, consider this pseudo-MIR: -.. code-block:: text - - %1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) - DBG_VALUE %1, $noreg, !1, !2 - %4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags - DBG_VALUE %4, $noreg, !3, !4 - %7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags - DBG_VALUE %7, $noreg, !5, !6 - -Imagine that the ``SUB32rr`` were moved forward to give us the following MIR: - -.. code-block:: text - - %7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags - %1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) - DBG_VALUE %1, $noreg, !1, !2 - %4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags - DBG_VALUE %4, $noreg, !3, !4 - DBG_VALUE %7, $noreg, !5, !6 - +```text +%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) +DBG_VALUE %1, $noreg, !1, !2 +%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags +DBG_VALUE %4, $noreg, !3, !4 +%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags +DBG_VALUE %7, $noreg, !5, !6 +``` +Imagine that the `SUB32rr` were moved forward to give us the following MIR: + +```text +%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags +%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) +DBG_VALUE %1, $noreg, !1, !2 +%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags +DBG_VALUE %4, $noreg, !3, !4 +DBG_VALUE %7, $noreg, !5, !6 +``` In this circumstance LLVM would leave the MIR as shown above. Were we to move -the ``DBG_VALUE`` of virtual register %7 upwards with the ``SUB32rr``, we would re-order +the `DBG_VALUE` of virtual register %7 upwards with the `SUB32rr`, we would re-order assignments and introduce a new state of the program. Whereas with the solution above, the debugger will see one fewer combination of variable values, because -``!3`` and ``!5`` will change value at the same time. This is preferred over +`!3` and `!5` will change value at the same time. This is preferred over misrepresenting the original program. -In comparison, if one sunk the ``MOV32rm``, LLVM would produce the following: - -.. code-block:: text - - DBG_VALUE $noreg, $noreg, !1, !2 - %4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags - DBG_VALUE %4, $noreg, !3, !4 - %7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags - DBG_VALUE %7, $noreg, !5, !6 - %1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) - DBG_VALUE %1, $noreg, !1, !2 - -Here, to avoid presenting a state in which the first assignment to ``!1`` -disappears, the ``DBG_VALUE`` at the top of the block assigns the variable the +In comparison, if one sunk the `MOV32rm`, LLVM would produce the following: + +```text +DBG_VALUE $noreg, $noreg, !1, !2 +%4:gr32 = ADD32rr %3, %2, implicit-def dead $eflags +DBG_VALUE %4, $noreg, !3, !4 +%7:gr32 = SUB32rr %6, %5, implicit-def dead $eflags +DBG_VALUE %7, $noreg, !5, !6 +%1:gr32 = MOV32rm %0, 1, $noreg, 4, $noreg, debug-location !5 :: (load 4 from %ir.addr1) +DBG_VALUE %1, $noreg, !1, !2 +``` +Here, to avoid presenting a state in which the first assignment to `!1` +disappears, the `DBG_VALUE` at the top of the block assigns the variable the undefined location, until its value is available at the end of the block where -an additional ``DBG_VALUE`` is added. Were any other ``DBG_VALUE`` for ``!1`` to occur -in the instructions that the ``MOV32rm`` was sunk past, the ``DBG_VALUE`` for ``%1`` +an additional `DBG_VALUE` is added. Were any other `DBG_VALUE` for `!1` to occur +in the instructions that the `MOV32rm` was sunk past, the `DBG_VALUE` for `%1` would be dropped and the debugger would never observe it in the variable. This accurately reflects that the value is not available during the corresponding portion of the original program. -Variable locations during Register Allocation ---------------------------------------------- +### Variable locations during Register Allocation To avoid debug instructions interfering with the register allocator, the -``LiveDebugVariables`` pass extracts variable locations from a MIR function and -deletes the corresponding ``DBG_VALUE`` instructions. Some localized copy +`LiveDebugVariables` pass extracts variable locations from a MIR function and +deletes the corresponding `DBG_VALUE` instructions. Some localized copy propagation is performed within blocks. After register allocation, the -``VirtRegRewriter`` pass re-inserts ``DBG_VALUE`` instructions in their +`VirtRegRewriter` pass re-inserts `DBG_VALUE` instructions in their original positions, translating virtual register references into their physical machine locations. To avoid encoding incorrect variable locations, in this pass -any ``DBG_VALUE`` of a virtual register that is not live, is replaced by the -undefined location. The ``LiveDebugVariables`` may insert redundant -``DBG_VALUE``\s because of virtual register rewriting. These will be -subsequently removed by the ``RemoveRedundantDebugValues`` pass. +any `DBG_VALUE` of a virtual register that is not live, is replaced by the +undefined location. The `LiveDebugVariables` may insert redundant +`DBG_VALUE`s because of virtual register rewriting. These will be +subsequently removed by the `RemoveRedundantDebugValues` pass. -``LiveDebugValues`` expansion of variable locations ---------------------------------------------------- +### `LiveDebugValues` expansion of variable locations After all optimizations have run and shortly before emission, the -``LiveDebugValue``\s pass runs to achieve two aims: +`LiveDebugValue`s pass runs to achieve two aims: * To propagate the location of variables through copies and register spills, * For every block, to record every valid variable location in that block. -After this pass the ``DBG_VALUE`` instruction changes meaning: rather than +After this pass the `DBG_VALUE` instruction changes meaning: rather than corresponding to a source-level assignment where the variable may change value, it asserts the location of a variable in a block, and loses effect outside the block. Propagating variable locations through copies and spills is @@ -1193,78 +1130,76 @@ straightforward: determining the variable location in every basic block requires the consideration of control flow. Consider the following IR, which presents several difficulties: -.. code-block:: text - - define dso_local i32 @foo(i1 %cond, i32 %input) !dbg !12 { - entry: - br i1 %cond, label %truebr, label %falsebr - - bb1: - %value = phi i32 [ %value1, %truebr ], [ %value2, %falsebr ] - br label %exit, !dbg !26 - - truebr: - #dbg_value(i32 %input, !30, !DIExpression(), !24) - #dbg_value(i32 1, !23, !DIExpression(), !24) - %value1 = add i32 %input, 1 - br label %bb1 - - falsebr: - #dbg_value(i32 %input, !30, !DIExpression(), !24) - #dbg_value(i32 2, !23, !DIExpression(), !24) - %value2 = add i32 %input, 2 - br label %bb1 - - exit: - ret i32 %value, !dbg !30 - } - +```text +define dso_local i32 @foo(i1 %cond, i32 %input) !dbg !12 { +entry: + br i1 %cond, label %truebr, label %falsebr + +bb1: + %value = phi i32 [ %value1, %truebr ], [ %value2, %falsebr ] + br label %exit, !dbg !26 + +truebr: + #dbg_value(i32 %input, !30, !DIExpression(), !24) + #dbg_value(i32 1, !23, !DIExpression(), !24) + %value1 = add i32 %input, 1 + br label %bb1 + +falsebr: + #dbg_value(i32 %input, !30, !DIExpression(), !24) + #dbg_value(i32 2, !23, !DIExpression(), !24) + %value2 = add i32 %input, 2 + br label %bb1 + +exit: + ret i32 %value, !dbg !30 +} +``` Here the difficulties are: * The control flow is roughly the opposite of basic block order -* The value of the ``!23`` variable merges into ``%bb1``, but there is no PHI +* The value of the `!23` variable merges into `%bb1`, but there is no PHI node -As mentioned above, the ``#dbg_value`` records essentially form an +As mentioned above, the `#dbg_value` records essentially form an imperative program embedded in the IR, with each record defining a variable -location. This *could* be converted to an SSA form by ``mem2reg``, in the same way +location. This *could* be converted to an SSA form by `mem2reg`, in the same way that it uses use-def chains to identify control flow merges and insert phi nodes for IR Values. However, because debug variable locations are defined for every machine instruction, in effect every IR instruction uses every variable location, which would lead to a large number of debugging records being generated. -Examining the example above, variable ``!30`` is assigned ``%input`` on both -conditional paths through the function, while ``!23`` is assigned differing -constant values on either path. Where control flow merges in ``%bb1`` we would -want ``!30`` to keep its location (``%input``), but ``!23`` to become undefined -as we cannot determine at runtime what value it should have in ``%bb1`` without -inserting a PHI node. ``mem2reg`` does not insert the PHI node to avoid changing -CodeGen when debugging is enabled, and does not insert the other ``#dbg_values`` +Examining the example above, variable `!30` is assigned `%input` on both +conditional paths through the function, while `!23` is assigned differing +constant values on either path. Where control flow merges in `%bb1` we would +want `!30` to keep its location (`%input`), but `!23` to become undefined +as we cannot determine at runtime what value it should have in `%bb1` without +inserting a PHI node. `mem2reg` does not insert the PHI node to avoid changing +CodeGen when debugging is enabled, and does not insert the other `#dbg_values` to avoid adding very large numbers of records. -Instead, ``LiveDebugValue``\s determines variable locations when control +Instead, `LiveDebugValue`s determines variable locations when control flow merges. A dataflow analysis is used to propagate locations between blocks: when control flow merges, if a variable has the same location in all predecessors then that location is propagated into the successor. If the predecessor locations disagree, the location becomes undefined. -Once ``LiveDebugValue``\s has run, every block should have all valid variable -locations described by ``DBG_VALUE`` instructions within the block. Very little +Once `LiveDebugValue`s has run, every block should have all valid variable +locations described by `DBG_VALUE` instructions within the block. Very little effort is then required by supporting classes (such as -``DbgEntityHistoryCalculator``) to build a map of each instruction to every +`DbgEntityHistoryCalculator`) to build a map of each instruction to every valid variable location, without the need to consider control flow. From the example above, it is otherwise difficult to determine that the location -of variable ``!30`` should flow "up" into block ``%bb1``, but that the location -of variable ``!23`` should not flow "down" into the ``%exit`` block. +of variable `!30` should flow "up" into block `%bb1`, but that the location +of variable `!23` should not flow "down" into the `%exit` block. -.. _ccxx_frontend: +(ccxx_frontend)= -C/C++ front-end specific debug information -========================================== +## C/C++ front-end specific debug information The C and C++ front-ends represent information about the program in a -format that is effectively identical to `DWARF `_ +format that is effectively identical to [DWARF](http://www.dwarfstd.org/) in terms of information content. This allows code generators to trivially support native debuggers by generating standard dwarf information, and contains enough information for non-dwarf targets to @@ -1279,261 +1214,238 @@ source-language front-ends, the information used should be documented here. The following sections provide examples of a few C/C++ constructs and the debug information that would best describe those constructs. The -canonical references are the ``DINode`` classes defined in -``include/llvm/IR/DebugInfoMetadata.h`` and the implementations of the -helper functions in ``lib/IR/DIBuilder.cpp``. +canonical references are the `DINode` classes defined in +`include/llvm/IR/DebugInfoMetadata.h` and the implementations of the +helper functions in `lib/IR/DIBuilder.cpp`. -C/C++ source file information ------------------------------ +### C/C++ source file information -``llvm::Instruction`` provides easy access to metadata attached to an +`llvm::Instruction` provides easy access to metadata attached to an instruction. One can extract line number information encoded in LLVM IR using -``Instruction::getDebugLoc()`` and ``DILocation::getLine()``. - -.. code-block:: c++ - - if (DILocation *Loc = I->getDebugLoc()) { // Here I is an LLVM instruction - unsigned Line = Loc->getLine(); - StringRef File = Loc->getFilename(); - StringRef Dir = Loc->getDirectory(); - bool ImplicitCode = Loc->isImplicitCode(); - } - -When the flag ``ImplicitCode`` is true then it means that the Instruction has been +`Instruction::getDebugLoc()` and `DILocation::getLine()`. + +```c++ +if (DILocation *Loc = I->getDebugLoc()) { // Here I is an LLVM instruction + unsigned Line = Loc->getLine(); + StringRef File = Loc->getFilename(); + StringRef Dir = Loc->getDirectory(); + bool ImplicitCode = Loc->isImplicitCode(); +} +``` +When the flag `ImplicitCode` is true then it means that the Instruction has been added by the front-end but doesn't correspond to source code written by the user. For example -.. code-block:: c++ - - if (MyBoolean) { - MyObject MO; - ... - } - -At the end of the scope the ``MyObject``'s destructor is called but it isn't written +```c++ +if (MyBoolean) { + MyObject MO; + ... +} +``` +At the end of the scope the `MyObject`'s destructor is called but it isn't written explicitly. This information is useful to avoid having counters on brackets when making code coverage. -C/C++ global variable information ---------------------------------- +### C/C++ global variable information Given an integer global variable declared as follows: -.. code-block:: c - - _Alignas(8) int MyGlobal = 100; - +```c +_Alignas(8) int MyGlobal = 100; +``` a C/C++ front-end would generate the following descriptors: -.. code-block:: text - - ;; - ;; Define the global itself. - ;; - @MyGlobal = global i32 100, align 8, !dbg !0 +```text +;; +;; Define the global itself. +;; +@MyGlobal = global i32 100, align 8, !dbg !0 - ;; - ;; List of debug info of globals - ;; - !llvm.dbg.cu = !{!1} +;; +;; List of debug info of globals +;; +!llvm.dbg.cu = !{!1} - ;; Some unrelated metadata. - !llvm.module.flags = !{!6, !7} - !llvm.ident = !{!8} +;; Some unrelated metadata. +!llvm.module.flags = !{!6, !7} +!llvm.ident = !{!8} - ;; Define the global variable itself - !0 = distinct !DIGlobalVariable(name: "MyGlobal", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true, align: 64) +;; Define the global variable itself +!0 = distinct !DIGlobalVariable(name: "MyGlobal", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true, align: 64) - ;; Define the compile unit. - !1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, - producer: "clang version 4.0.0", - isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, - enums: !3, globals: !4) +;; Define the compile unit. +!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, + producer: "clang version 4.0.0", + isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, + enums: !3, globals: !4) - ;; - ;; Define the file - ;; - !2 = !DIFile(filename: "/dev/stdin", - directory: "/Users/dexonsmith/data/llvm/debug-info") +;; +;; Define the file +;; +!2 = !DIFile(filename: "/dev/stdin", + directory: "/Users/dexonsmith/data/llvm/debug-info") - ;; An empty array. - !3 = !{} +;; An empty array. +!3 = !{} - ;; The Array of Global Variables - !4 = !{!0} +;; The Array of Global Variables +!4 = !{!0} - ;; - ;; Define the type - ;; - !5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +;; +;; Define the type +;; +!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) - ;; Dwarf version to output. - !6 = !{i32 2, !"Dwarf Version", i32 4} +;; Dwarf version to output. +!6 = !{i32 2, !"Dwarf Version", i32 4} - ;; Debug info schema version. - !7 = !{i32 2, !"Debug Info Version", i32 3} +;; Debug info schema version. +!7 = !{i32 2, !"Debug Info Version", i32 3} - ;; Compiler identification - !8 = !{!"clang version 4.0.0"} +;; Compiler identification +!8 = !{!"clang version 4.0.0"} - -The align value in ``DIGlobalVariable`` description specifies variable alignment in -case it was forced by C11 ``_Alignas()``, C++11 ``alignas()`` keywords or compiler -attribute ``__attribute__((aligned ()))``. In other case (when this field is missing) +``` +The align value in `DIGlobalVariable` description specifies variable alignment in +case it was forced by C11 `_Alignas()`, C++11 `alignas()` keywords or compiler +attribute `__attribute__((aligned ()))`. In other case (when this field is missing) alignment is considered default. This is used when producing DWARF output -for ``DW_AT_alignment`` value. +for `DW_AT_alignment` value. -C/C++ function information --------------------------- +### C/C++ function information Given a function declared as follows: -.. code-block:: c - - int main(int argc, char *argv[]) { - return 0; - } - +```c +int main(int argc, char *argv[]) { + return 0; +} +``` a C/C++ front-end would generate the following descriptors: -.. code-block:: text +```text +;; +;; Define the anchor for subprograms. +;; +!4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, + isLocal: false, isDefinition: true, scopeLine: 1, + flags: DIFlagPrototyped, isOptimized: false, + retainedNodes: !2) - ;; - ;; Define the anchor for subprograms. - ;; - !4 = !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, - isLocal: false, isDefinition: true, scopeLine: 1, - flags: DIFlagPrototyped, isOptimized: false, - retainedNodes: !2) +;; +;; Define the subprogram itself. +;; +define i32 @main(i32 %argc, i8** %argv) !dbg !4 { +... +} +``` +## C++ specific debug information - ;; - ;; Define the subprogram itself. - ;; - define i32 @main(i32 %argc, i8** %argv) !dbg !4 { - ... - } - -C++ specific debug information -============================== - -C++ special member functions information ----------------------------------------- +### C++ special member functions information -DWARF v5 introduces attributes defined to enhance debugging information of C++ programs. LLVM can generate (or omit) these appropriate DWARF attributes. In C++ a special member function Ctors, Dtors, Copy/Move Ctors, assignment operators can be declared with C++11 keyword deleted. This is represented in LLVM using ``spFlags`` value ``DISPFlagDeleted``. +DWARF v5 introduces attributes defined to enhance debugging information of C++ programs. LLVM can generate (or omit) these appropriate DWARF attributes. In C++ a special member function Ctors, Dtors, Copy/Move Ctors, assignment operators can be declared with C++11 keyword deleted. This is represented in LLVM using `spFlags` value `DISPFlagDeleted`. Given a class declaration with copy constructor declared as deleted: -.. code-block:: c - - class foo { - public: - foo(const foo&) = deleted; - }; - +```c +class foo { + public: + foo(const foo&) = deleted; +}; +``` A C++ frontend would generate the following: -.. code-block:: text - - !17 = !DISubprogram(name: "foo", scope: !11, file: !1, line: 5, type: !18, scopeLine: 5, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted) - +```text +!17 = !DISubprogram(name: "foo", scope: !11, file: !1, line: 5, type: !18, scopeLine: 5, flags: DIFlagPublic | DIFlagPrototyped, spFlags: DISPFlagDeleted) +``` and this will produce an additional DWARF attribute as: -.. code-block:: text - - DW_TAG_subprogram [7] * - DW_AT_name [DW_FORM_strx1] (indexed (00000006) string = "foo") - DW_AT_decl_line [DW_FORM_data1] (5) - ... - DW_AT_deleted [DW_FORM_flag_present] (true) - -Fortran specific debug information -================================== - -Fortran function information ----------------------------- - -There are a few DWARF attributes defined to support client debugging of Fortran programs. LLVM can generate (or omit) the appropriate DWARF attributes for the prefix-specs of ELEMENTAL, PURE, IMPURE, RECURSIVE, and NON_RECURSIVE. This is done by using the ``spFlags`` values: ``DISPFlagElemental``, ``DISPFlagPure``, and ``DISPFlagRecursive``. +```text +DW_TAG_subprogram [7] * + DW_AT_name [DW_FORM_strx1] (indexed (00000006) string = "foo") + DW_AT_decl_line [DW_FORM_data1] (5) + ... + DW_AT_deleted [DW_FORM_flag_present] (true) +``` +## Fortran specific debug information -.. code-block:: fortran +### Fortran function information - elemental function elem_func(a) +There are a few DWARF attributes defined to support client debugging of Fortran programs. LLVM can generate (or omit) the appropriate DWARF attributes for the prefix-specs of ELEMENTAL, PURE, IMPURE, RECURSIVE, and NON_RECURSIVE. This is done by using the `spFlags` values: `DISPFlagElemental`, `DISPFlagPure`, and `DISPFlagRecursive`. +```fortran +elemental function elem_func(a) +``` a Fortran front-end would generate the following descriptors: -.. code-block:: text - - !11 = distinct !DISubprogram(name: "subroutine2", scope: !1, file: !1, - line: 5, type: !8, scopeLine: 6, - spFlags: DISPFlagDefinition | DISPFlagElemental, unit: !0, - retainedNodes: !2) - +```text +!11 = distinct !DISubprogram(name: "subroutine2", scope: !1, file: !1, + line: 5, type: !8, scopeLine: 6, + spFlags: DISPFlagDefinition | DISPFlagElemental, unit: !0, + retainedNodes: !2) +``` and this will materialize an additional DWARF attribute as: -.. code-block:: text - - DW_TAG_subprogram [3] - DW_AT_low_pc [DW_FORM_addr] (0x0000000000000010 ".text") - DW_AT_high_pc [DW_FORM_data4] (0x00000001) - ... - DW_AT_elemental [DW_FORM_flag_present] (true) - -There are a few DWARF tags defined to represent Fortran specific constructs i.e ``DW_TAG_string_type`` for representing Fortran character(n). In LLVM, this is represented as ``DIStringType``. - -.. code-block:: fortran - - character(len=*), intent(in) :: string - +```text +DW_TAG_subprogram [3] + DW_AT_low_pc [DW_FORM_addr] (0x0000000000000010 ".text") + DW_AT_high_pc [DW_FORM_data4] (0x00000001) + ... + DW_AT_elemental [DW_FORM_flag_present] (true) +``` +There are a few DWARF tags defined to represent Fortran specific constructs i.e `DW_TAG_string_type` for representing Fortran character(n). In LLVM, this is represented as `DIStringType`. + +```fortran +character(len=*), intent(in) :: string +``` a Fortran front-end would generate the following descriptors: -.. code-block:: text +```text +!DILocalVariable(name: "string", arg: 1, scope: !10, file: !3, line: 4, type: !15) +!DIStringType(name: "character(*)!2", stringLength: !16, stringLengthExpression: !DIExpression(), size: 32) +``` +A fortran deferred-length character can also contain the information of raw storage of the characters in addition to the length of the string. This information is encoded in the stringLocationExpression field. Based on this information, `DW_AT_data_location` attribute is emitted in a `DW_TAG_string_type` debug info. - !DILocalVariable(name: "string", arg: 1, scope: !10, file: !3, line: 4, type: !15) - !DIStringType(name: "character(*)!2", stringLength: !16, stringLengthExpression: !DIExpression(), size: 32) - -A fortran deferred-length character can also contain the information of raw storage of the characters in addition to the length of the string. This information is encoded in the stringLocationExpression field. Based on this information, ``DW_AT_data_location`` attribute is emitted in a ``DW_TAG_string_type`` debug info. - - !DIStringType(name: "character(*)!2", stringLengthExpression: !DIExpression(), stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref), size: 32) +```llvm +!DIStringType(name: "character(*)!2", stringLengthExpression: !DIExpression(), + stringLocationExpression: !DIExpression(DW_OP_push_object_address, DW_OP_deref), + size: 32) +``` and this will materialize in DWARF tags as: -.. code-block:: text - - DW_TAG_string_type - DW_AT_name ("character(*)!2") - DW_AT_string_length (0x00000064) - 0x00000064: DW_TAG_variable - DW_AT_location (DW_OP_fbreg +16) - DW_AT_type (0x00000083 "integer*8") - DW_AT_data_location (DW_OP_push_object_address, DW_OP_deref) - ... - DW_AT_artificial (true) - +```text +DW_TAG_string_type + DW_AT_name ("character(*)!2") + DW_AT_string_length (0x00000064) +0x00000064: DW_TAG_variable + DW_AT_location (DW_OP_fbreg +16) + DW_AT_type (0x00000083 "integer*8") + DW_AT_data_location (DW_OP_push_object_address, DW_OP_deref) + ... + DW_AT_artificial (true) +``` A Fortran front-end may need to generate a *trampoline* function to call a function defined in a different compilation unit. In this case, the front-end can emit the following descriptor for the trampoline function: -.. code-block:: text - - !DISubprogram(name: "sub1_.t0p", linkageName: "sub1_.t0p", scope: !4, file: !4, type: !5, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !7, retainedNodes: !24, targetFuncName: "sub1_") - +```text +!DISubprogram(name: "sub1_.t0p", linkageName: "sub1_.t0p", scope: !4, file: !4, type: !5, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !7, retainedNodes: !24, targetFuncName: "sub1_") +``` The targetFuncName field is the name of the function that the trampoline calls. This descriptor results in the following DWARF tag: -.. code-block:: text - - DW_TAG_subprogram - ... - DW_AT_linkage_name ("sub1_.t0p") - DW_AT_name ("sub1_.t0p") - DW_AT_trampoline ("sub1_") - -Debugging information format -============================ +```text +DW_TAG_subprogram + ... + DW_AT_linkage_name ("sub1_.t0p") + DW_AT_name ("sub1_.t0p") + DW_AT_trampoline ("sub1_") +``` +## Debugging information format -Debugging Information Extension for Objective-C Properties ----------------------------------------------------------- +### Debugging Information Extension for Objective-C Properties -Introduction -^^^^^^^^^^^^ +#### Introduction Objective-C provides a simpler way to declare and define accessor methods using declared properties. The language provides features to declare a property and @@ -1547,8 +1459,7 @@ encoding of Objective-C properties. This proposal describes DWARF extensions to encode Objective-C properties, which the debugger can use to let developers inspect Objective-C properties. -Proposal -^^^^^^^^ +#### Proposal Objective-C properties exist separately from class members. A property can be defined only by "setter" and "getter" selectors, and be calculated anew on each @@ -1556,69 +1467,67 @@ access. Or a property can just be a direct access to some declared ivar. Finally it can have an ivar "automatically synthesized" for it by the compiler, in which case the property can be referred to in user code directly using the standard C dereference syntax as well as through the property "dot" syntax, but -there is no entry in the ``@interface`` declaration corresponding to this ivar. +there is no entry in the `@interface` declaration corresponding to this ivar. To facilitate debugging, these properties we will add a new DWARF TAG into the -``DW_TAG_structure_type`` definition for the class to hold the description of a +`DW_TAG_structure_type` definition for the class to hold the description of a given property, and a set of DWARF attributes that provide said description. The property tag will also contain the name and declared type of the property. If there is a related ivar, there will also be a DWARF property attribute placed -in the ``DW_TAG_member`` DIE for that ivar referring back to the property TAG +in the `DW_TAG_member` DIE for that ivar referring back to the property TAG for that property. And in the case where the compiler synthesizes the ivar -directly, the compiler is expected to generate a ``DW_TAG_member`` for that -ivar (with the ``DW_AT_artificial`` set to 1), whose name will be the name used +directly, the compiler is expected to generate a `DW_TAG_member` for that +ivar (with the `DW_AT_artificial` set to 1), whose name will be the name used to access this ivar directly in code, and with the property attribute pointing back to the property it is backing. The following examples will serve as illustration for our discussion: -.. code-block:: objc - - @interface I1 { - int n2; - } - - @property int p1; - @property int p2; - @end +```objc +@interface I1 { + int n2; +} - @implementation I1 - @synthesize p1; - @synthesize p2 = n2; - @end +@property int p1; +@property int p2; +@end +@implementation I1 +@synthesize p1; +@synthesize p2 = n2; +@end +``` This produces the following DWARF (this is a "pseudo dwarfdump" output): -.. code-block:: none - - 0x00000100: TAG_structure_type [7] * - AT_APPLE_runtime_class( 0x10 ) - AT_name( "I1" ) - AT_decl_file( "Objc_Property.m" ) - AT_decl_line( 3 ) - - 0x00000110 TAG_APPLE_property - AT_name ( "p1" ) - AT_type ( {0x00000150} ( int ) ) - - 0x00000120: TAG_APPLE_property - AT_name ( "p2" ) - AT_type ( {0x00000150} ( int ) ) - - 0x00000130: TAG_member [8] - AT_name( "_p1" ) - AT_APPLE_property ( {0x00000110} "p1" ) - AT_type( {0x00000150} ( int ) ) - AT_artificial ( 0x1 ) - - 0x00000140: TAG_member [8] - AT_name( "n2" ) - AT_APPLE_property ( {0x00000120} "p2" ) - AT_type( {0x00000150} ( int ) ) - - 0x00000150: AT_type( ( int ) ) - +```text +0x00000100: TAG_structure_type [7] * + AT_APPLE_runtime_class( 0x10 ) + AT_name( "I1" ) + AT_decl_file( "Objc_Property.m" ) + AT_decl_line( 3 ) + +0x00000110 TAG_APPLE_property + AT_name ( "p1" ) + AT_type ( {0x00000150} ( int ) ) + +0x00000120: TAG_APPLE_property + AT_name ( "p2" ) + AT_type ( {0x00000150} ( int ) ) + +0x00000130: TAG_member [8] + AT_name( "_p1" ) + AT_APPLE_property ( {0x00000110} "p1" ) + AT_type( {0x00000150} ( int ) ) + AT_artificial ( 0x1 ) + +0x00000140: TAG_member [8] + AT_name( "n2" ) + AT_APPLE_property ( {0x00000120} "p2" ) + AT_type( {0x00000150} ( int ) ) + +0x00000150: AT_type( ( int ) ) +``` Note, the current convention is that the name of the ivar for an auto-synthesized property is the name of the property from which it derives with an underscore prepended, as is shown in the example. But we actually @@ -1626,151 +1535,119 @@ don't need to know this convention, since we are given the name of the ivar directly. Also, it is common practice in ObjC to have different property declarations in -the ``@interface`` and ``@implementation`` - e.g. to provide a read-only property in +the `@interface` and `@implementation` - e.g. to provide a read-only property in the interface, and a read-write interface in the implementation. In that case, the compiler should emit whichever property declaration will be in force in the current translation unit. Developers can decorate a property with attributes which are encoded using -``DW_AT_APPLE_property_attribute``. - -.. code-block:: objc - - @property (readonly, nonatomic) int pr; - -.. code-block:: none - - TAG_APPLE_property [8] - AT_name( "pr" ) - AT_type ( {0x00000147} (int) ) - AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic) - +`DW_AT_APPLE_property_attribute`. + +```objc +@property (readonly, nonatomic) int pr; +``` +```text +TAG_APPLE_property [8] + AT_name( "pr" ) + AT_type ( {0x00000147} (int) ) + AT_APPLE_property_attribute (DW_APPLE_PROPERTY_readonly, DW_APPLE_PROPERTY_nonatomic) +``` The setter and getter method names are attached to the property using -``DW_AT_APPLE_property_setter`` and ``DW_AT_APPLE_property_getter`` attributes. - -.. code-block:: objc - - @interface I1 - @property (setter=myOwnP3Setter:) int p3; - -(void)myOwnP3Setter:(int)a; - @end - - @implementation I1 - @synthesize p3; - -(void)myOwnP3Setter:(int)a{ } - @end - +`DW_AT_APPLE_property_setter` and `DW_AT_APPLE_property_getter` attributes. + +```objc +@interface I1 +@property (setter=myOwnP3Setter:) int p3; +-(void)myOwnP3Setter:(int)a; +@end + +@implementation I1 +@synthesize p3; +-(void)myOwnP3Setter:(int)a{ } +@end +``` The DWARF for this would be: -.. code-block:: none - - 0x000003bd: TAG_structure_type [7] * - AT_APPLE_runtime_class( 0x10 ) - AT_name( "I1" ) - AT_decl_file( "Objc_Property.m" ) - AT_decl_line( 3 ) - - 0x000003cd TAG_APPLE_property - AT_name ( "p3" ) - AT_APPLE_property_setter ( "myOwnP3Setter:" ) - AT_type( {0x00000147} ( int ) ) - - 0x000003f3: TAG_member [8] - AT_name( "_p3" ) - AT_type ( {0x00000147} ( int ) ) - AT_APPLE_property ( {0x000003cd} ) - AT_artificial ( 0x1 ) - -New DWARF Tags -^^^^^^^^^^^^^^ +```text +0x000003bd: TAG_structure_type [7] * + AT_APPLE_runtime_class( 0x10 ) + AT_name( "I1" ) + AT_decl_file( "Objc_Property.m" ) + AT_decl_line( 3 ) + +0x000003cd TAG_APPLE_property + AT_name ( "p3" ) + AT_APPLE_property_setter ( "myOwnP3Setter:" ) + AT_type( {0x00000147} ( int ) ) + +0x000003f3: TAG_member [8] + AT_name( "_p3" ) + AT_type ( {0x00000147} ( int ) ) + AT_APPLE_property ( {0x000003cd} ) + AT_artificial ( 0x1 ) +``` +#### New DWARF Tags -+-----------------------+--------+ | TAG | Value | -+=======================+========+ +|-----------------------|--------| | DW_TAG_APPLE_property | 0x4200 | -+-----------------------+--------+ -New DWARF Attributes -^^^^^^^^^^^^^^^^^^^^ +#### New DWARF Attributes -+--------------------------------+--------+-----------+ | Attribute | Value | Classes | -+================================+========+===========+ +|--------------------------------|--------|-----------| | DW_AT_APPLE_property | 0x3fed | Reference | -+--------------------------------+--------+-----------+ | DW_AT_APPLE_property_getter | 0x3fe9 | String | -+--------------------------------+--------+-----------+ | DW_AT_APPLE_property_setter | 0x3fea | String | -+--------------------------------+--------+-----------+ | DW_AT_APPLE_property_attribute | 0x3feb | Constant | -+--------------------------------+--------+-----------+ -New DWARF Constants -^^^^^^^^^^^^^^^^^^^ +#### New DWARF Constants -+--------------------------------------+-------+ | Name | Value | -+======================================+=======+ +|--------------------------------------|-------| | DW_APPLE_PROPERTY_readonly | 0x01 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_getter | 0x02 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_assign | 0x04 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_readwrite | 0x08 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_retain | 0x10 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_copy | 0x20 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_nonatomic | 0x40 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_setter | 0x80 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_atomic | 0x100 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_weak | 0x200 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_strong | 0x400 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_unsafe_unretained | 0x800 | -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_nullability | 0x1000| -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_null_resettable | 0x2000| -+--------------------------------------+-------+ | DW_APPLE_PROPERTY_class | 0x4000| -+--------------------------------------+-------+ -Name Accelerator Tables ------------------------ +### Name Accelerator Tables -Introduction -^^^^^^^^^^^^ +#### Introduction -The "``.debug_pubnames``" and "``.debug_pubtypes``" formats are not what a -debugger needs. The "``pub``" in the section name indicates that the entries +The "`.debug_pubnames`" and "`.debug_pubtypes`" formats are not what a +debugger needs. The "`pub`" in the section name indicates that the entries in the table are publicly visible names only. This means no static or hidden -functions show up in the "``.debug_pubnames``". No static variables or private -class variables are in the "``.debug_pubtypes``". Many compilers add different +functions show up in the "`.debug_pubnames`". No static variables or private +class variables are in the "`.debug_pubtypes`". Many compilers add different things to these tables, so we can't rely upon the contents between gcc, icc, or clang. The typical query given by users tends not to match up with the contents of these tables. For example, the DWARF spec states that "In the case of the name of a function member or static data member of a C++ structure, class or union, -the name presented in the "``.debug_pubnames``" section is not the simple name -given by the ``DW_AT_name attribute`` of the referenced debugging information +the name presented in the "`.debug_pubnames`" section is not the simple name +given by the `DW_AT_name attribute` of the referenced debugging information entry, but rather the fully qualified name of the data or function member." So the only names in these tables for complex C++ entries is a fully qualified name. Debugger users tend not to enter their search strings as -"``a::b::c(int,const Foo&) const``", but rather as "``c``", "``b::c``" , or -"``a::b::c``". So the name entered in the name table must be demangled in +"`a::b::c(int,const Foo&) const`", but rather as "`c`", "`b::c`" , or +"`a::b::c`". So the name entered in the name table must be demangled in order to chop it up appropriately and additional names must be manually entered into the table to make it effective as a name lookup table for debuggers to use. -All debuggers currently ignore the "``.debug_pubnames``" table as a result of +All debuggers currently ignore the "`.debug_pubnames`" table as a result of its inconsistent and useless public-only name content making it a waste of space in the object file. These tables, when they are written to disk, are not sorted in any way, leaving every debugger to do its own parsing and sorting. @@ -1786,8 +1663,8 @@ need. These tables are also insufficient for what a debugger like LLDB needs. LLDB uses clang for its expression parsing where LLDB acts as a PCH. LLDB is then -often asked to look for type "``foo``" or namespace "``bar``", or list items in -namespace "``baz``". Namespaces are not included in the pubnames or pubtypes +often asked to look for type "`foo`" or namespace "`bar`", or list items in +namespace "`baz`". Namespaces are not included in the pubnames or pubtypes tables. Since clang asks a lot of questions when it is parsing an expression, we need to be very fast when looking up names, as it happens a lot. Having new accelerator tables that are optimized for very quick lookups will benefit this @@ -1819,298 +1696,283 @@ case of debuggers we optimized for lookups that fail most of the time. Each table that is defined should have strict rules on exactly what is in the accelerator tables and documented so clients can rely on the content. -Hash Tables -^^^^^^^^^^^ +#### Hash Tables -Standard Hash Tables -"""""""""""""""""""" +##### Standard Hash Tables Typical hash tables have a header, buckets, and each bucket points to the bucket contents: -.. code-block:: none - - .------------. - | HEADER | - |------------| - | BUCKETS | - |------------| - | DATA | - `------------' - +```text +.------------. +| HEADER | +|------------| +| BUCKETS | +|------------| +| DATA | +`------------' +``` The BUCKETS are an array of offsets to DATA for each hash: -.. code-block:: none - - .------------. - | 0x00001000 | BUCKETS[0] - | 0x00002000 | BUCKETS[1] - | 0x00002200 | BUCKETS[2] - | 0x000034f0 | BUCKETS[3] - | | ... - | 0xXXXXXXXX | BUCKETS[n_buckets] - '------------' - -So for ``bucket[3]`` in the example above, we have an offset into the table +```text +.------------. +| 0x00001000 | BUCKETS[0] +| 0x00002000 | BUCKETS[1] +| 0x00002200 | BUCKETS[2] +| 0x000034f0 | BUCKETS[3] +| | ... +| 0xXXXXXXXX | BUCKETS[n_buckets] +'------------' +``` +So for `bucket[3]` in the example above, we have an offset into the table 0x000034f0 which points to a chain of entries for the bucket. Each bucket must contain a next pointer, full 32-bit hash value, the string itself, and the data for the current string value. -.. code-block:: none - - .------------. - 0x000034f0: | 0x00003500 | next pointer - | 0x12345678 | 32-bit hash - | "erase" | string value - | data[n] | HashData for this bucket - |------------| - 0x00003500: | 0x00003550 | next pointer - | 0x29273623 | 32-bit hash - | "dump" | string value - | data[n] | HashData for this bucket - |------------| - 0x00003550: | 0x00000000 | next pointer - | 0x82638293 | 32-bit hash - | "main" | string value - | data[n] | HashData for this bucket - `------------' - +```text + .------------. +0x000034f0: | 0x00003500 | next pointer + | 0x12345678 | 32-bit hash + | "erase" | string value + | data[n] | HashData for this bucket + |------------| +0x00003500: | 0x00003550 | next pointer + | 0x29273623 | 32-bit hash + | "dump" | string value + | data[n] | HashData for this bucket + |------------| +0x00003550: | 0x00000000 | next pointer + | 0x82638293 | 32-bit hash + | "main" | string value + | data[n] | HashData for this bucket + `------------' +``` The problem with this layout for debuggers is that we need to optimize for the negative lookup case where the symbol we're searching for is not present. So -if we were to lookup "``printf``" in the table above, we would make a 32-bit -hash for "``printf``", it might match ``bucket[3]``. We would need to go to +if we were to lookup "`printf`" in the table above, we would make a 32-bit +hash for "`printf`", it might match `bucket[3]`. We would need to go to the offset 0x000034f0 and start looking to see if our 32-bit hash matches. To do so, we need to read the next pointer, then read the hash, compare it, and skip to the next bucket. Each time we are skipping many bytes in memory and touching new pages just to do the compare on the full 32-bit hash. All of these accesses then tell us that we didn't have a match. -Name Hash Tables -"""""""""""""""" +##### Name Hash Tables To solve the issues mentioned above, we have structured the hash tables a bit differently: a header, buckets, an array of all unique 32-bit hash values, followed by an array of hash value data offsets, one for each hash value, then the data for all hash values: -.. code-block:: none - - .-------------. - | HEADER | - |-------------| - | BUCKETS | - |-------------| - | HASHES | - |-------------| - | OFFSETS | - |-------------| - | DATA | - `-------------' - -The ``BUCKETS`` in the name tables are an index into the ``HASHES`` array. By +```text +.-------------. +| HEADER | +|-------------| +| BUCKETS | +|-------------| +| HASHES | +|-------------| +| OFFSETS | +|-------------| +| DATA | +`-------------' +``` +The `BUCKETS` in the name tables are an index into the `HASHES` array. By making all of the full 32-bit hash values contiguous in memory, we allow ourselves to efficiently check for a match while touching as little memory as possible. Most often checking the 32-bit hash values is as far as the lookup goes. If it does match, it usually is a match with no collisions. So for a -table with "``n_buckets``" buckets, and "``n_hashes``" unique 32-bit hash -values, we can clarify the contents of the ``BUCKETS``, ``HASHES`` and -``OFFSETS`` as: - -.. code-block:: none - - .-------------------------. - | HEADER.magic | uint32_t - | HEADER.version | uint16_t - | HEADER.hash_function | uint16_t - | HEADER.bucket_count | uint32_t - | HEADER.hashes_count | uint32_t - | HEADER.header_data_len | uint32_t - | HEADER_DATA | HeaderData - |-------------------------| - | BUCKETS | uint32_t[n_buckets] // 32-bit hash indexes - |-------------------------| - | HASHES | uint32_t[n_hashes] // 32-bit hash values - |-------------------------| - | OFFSETS | uint32_t[n_hashes] // 32-bit offsets to hash value data - |-------------------------| - | ALL HASH DATA | - `-------------------------' - +table with "`n_buckets`" buckets, and "`n_hashes`" unique 32-bit hash +values, we can clarify the contents of the `BUCKETS`, `HASHES` and +`OFFSETS` as: + +```text +.-------------------------. +| HEADER.magic | uint32_t +| HEADER.version | uint16_t +| HEADER.hash_function | uint16_t +| HEADER.bucket_count | uint32_t +| HEADER.hashes_count | uint32_t +| HEADER.header_data_len | uint32_t +| HEADER_DATA | HeaderData +|-------------------------| +| BUCKETS | uint32_t[n_buckets] // 32-bit hash indexes +|-------------------------| +| HASHES | uint32_t[n_hashes] // 32-bit hash values +|-------------------------| +| OFFSETS | uint32_t[n_hashes] // 32-bit offsets to hash value data +|-------------------------| +| ALL HASH DATA | +`-------------------------' +``` So taking the exact same data from the standard hash example above, we end up with: -.. code-block:: none - - .------------. - | HEADER | - |------------| - | 0 | BUCKETS[0] - | 2 | BUCKETS[1] - | 5 | BUCKETS[2] - | 6 | BUCKETS[3] - | | ... - | ... | BUCKETS[n_buckets] - |------------| - | 0x........ | HASHES[0] - | 0x........ | HASHES[1] - | 0x........ | HASHES[2] - | 0x........ | HASHES[3] - | 0x........ | HASHES[4] - | 0x........ | HASHES[5] - | 0x12345678 | HASHES[6] hash for BUCKETS[3] - | 0x29273623 | HASHES[7] hash for BUCKETS[3] - | 0x82638293 | HASHES[8] hash for BUCKETS[3] - | 0x........ | HASHES[9] - | 0x........ | HASHES[10] - | 0x........ | HASHES[11] - | 0x........ | HASHES[12] - | 0x........ | HASHES[13] - | 0x........ | HASHES[n_hashes] - |------------| - | 0x........ | OFFSETS[0] - | 0x........ | OFFSETS[1] - | 0x........ | OFFSETS[2] - | 0x........ | OFFSETS[3] - | 0x........ | OFFSETS[4] - | 0x........ | OFFSETS[5] - | 0x000034f0 | OFFSETS[6] offset for BUCKETS[3] - | 0x00003500 | OFFSETS[7] offset for BUCKETS[3] - | 0x00003550 | OFFSETS[8] offset for BUCKETS[3] - | 0x........ | OFFSETS[9] - | 0x........ | OFFSETS[10] - | 0x........ | OFFSETS[11] - | 0x........ | OFFSETS[12] - | 0x........ | OFFSETS[13] - | 0x........ | OFFSETS[n_hashes] - |------------| - | | - | | - | | - | | - | | - |------------| - 0x000034f0: | 0x00001203 | .debug_str ("erase") - | 0x00000004 | A 32-bit array count - number of HashData with name "erase" - | 0x........ | HashData[0] - | 0x........ | HashData[1] - | 0x........ | HashData[2] - | 0x........ | HashData[3] - | 0x00000000 | String offset into .debug_str (terminate data for hash) - |------------| - 0x00003500: | 0x00001203 | String offset into .debug_str ("collision") - | 0x00000002 | A 32-bit array count - number of HashData with name "collision" - | 0x........ | HashData[0] - | 0x........ | HashData[1] - | 0x00001203 | String offset into .debug_str ("dump") - | 0x00000003 | A 32-bit array count - number of HashData with name "dump" - | 0x........ | HashData[0] - | 0x........ | HashData[1] - | 0x........ | HashData[2] - | 0x00000000 | String offset into .debug_str (terminate data for hash) - |------------| - 0x00003550: | 0x00001203 | String offset into .debug_str ("main") - | 0x00000009 | A 32-bit array count - number of HashData with name "main" - | 0x........ | HashData[0] - | 0x........ | HashData[1] - | 0x........ | HashData[2] - | 0x........ | HashData[3] - | 0x........ | HashData[4] - | 0x........ | HashData[5] - | 0x........ | HashData[6] - | 0x........ | HashData[7] - | 0x........ | HashData[8] - | 0x00000000 | String offset into .debug_str (terminate data for hash) - `------------' - +```text + .------------. + | HEADER | + |------------| + | 0 | BUCKETS[0] + | 2 | BUCKETS[1] + | 5 | BUCKETS[2] + | 6 | BUCKETS[3] + | | ... + | ... | BUCKETS[n_buckets] + |------------| + | 0x........ | HASHES[0] + | 0x........ | HASHES[1] + | 0x........ | HASHES[2] + | 0x........ | HASHES[3] + | 0x........ | HASHES[4] + | 0x........ | HASHES[5] + | 0x12345678 | HASHES[6] hash for BUCKETS[3] + | 0x29273623 | HASHES[7] hash for BUCKETS[3] + | 0x82638293 | HASHES[8] hash for BUCKETS[3] + | 0x........ | HASHES[9] + | 0x........ | HASHES[10] + | 0x........ | HASHES[11] + | 0x........ | HASHES[12] + | 0x........ | HASHES[13] + | 0x........ | HASHES[n_hashes] + |------------| + | 0x........ | OFFSETS[0] + | 0x........ | OFFSETS[1] + | 0x........ | OFFSETS[2] + | 0x........ | OFFSETS[3] + | 0x........ | OFFSETS[4] + | 0x........ | OFFSETS[5] + | 0x000034f0 | OFFSETS[6] offset for BUCKETS[3] + | 0x00003500 | OFFSETS[7] offset for BUCKETS[3] + | 0x00003550 | OFFSETS[8] offset for BUCKETS[3] + | 0x........ | OFFSETS[9] + | 0x........ | OFFSETS[10] + | 0x........ | OFFSETS[11] + | 0x........ | OFFSETS[12] + | 0x........ | OFFSETS[13] + | 0x........ | OFFSETS[n_hashes] + |------------| + | | + | | + | | + | | + | | + |------------| +0x000034f0: | 0x00001203 | .debug_str ("erase") + | 0x00000004 | A 32-bit array count - number of HashData with name "erase" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x........ | HashData[3] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + |------------| +0x00003500: | 0x00001203 | String offset into .debug_str ("collision") + | 0x00000002 | A 32-bit array count - number of HashData with name "collision" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x00001203 | String offset into .debug_str ("dump") + | 0x00000003 | A 32-bit array count - number of HashData with name "dump" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + |------------| +0x00003550: | 0x00001203 | String offset into .debug_str ("main") + | 0x00000009 | A 32-bit array count - number of HashData with name "main" + | 0x........ | HashData[0] + | 0x........ | HashData[1] + | 0x........ | HashData[2] + | 0x........ | HashData[3] + | 0x........ | HashData[4] + | 0x........ | HashData[5] + | 0x........ | HashData[6] + | 0x........ | HashData[7] + | 0x........ | HashData[8] + | 0x00000000 | String offset into .debug_str (terminate data for hash) + `------------' +``` So we still have all of the same data, we just organize it more efficiently for -debugger lookup. If we repeat the same "``printf``" lookup from above, we -would hash "``printf``" and find it matches ``BUCKETS[3]`` by taking the 32-bit -hash value and modulo it by ``n_buckets``. ``BUCKETS[3]`` contains "6" which -is the index into the ``HASHES`` table. We would then compare any consecutive -32-bit hash values in the ``HASHES`` array as long as the hashes would be in -``BUCKETS[3]``. We do this by verifying that each subsequent hash value modulo -``n_buckets`` is still 3. In the case of a failed lookup we would access the -memory for ``BUCKETS[3]``, and then compare a few consecutive 32-bit hashes +debugger lookup. If we repeat the same "`printf`" lookup from above, we +would hash "`printf`" and find it matches `BUCKETS[3]` by taking the 32-bit +hash value and modulo it by `n_buckets`. `BUCKETS[3]` contains "6" which +is the index into the `HASHES` table. We would then compare any consecutive +32-bit hash values in the `HASHES` array as long as the hashes would be in +`BUCKETS[3]`. We do this by verifying that each subsequent hash value modulo +`n_buckets` is still 3. In the case of a failed lookup we would access the +memory for `BUCKETS[3]`, and then compare a few consecutive 32-bit hashes before we know that we have no match. We don't end up marching through multiple words of memory and we really keep the number of processor data cache lines being accessed as small as possible. The string hash that is used for these lookup tables is the Daniel J. -Bernstein hash which is also used in the ELF ``GNU_HASH`` sections. It is a +Bernstein hash which is also used in the ELF `GNU_HASH` sections. It is a very good hash for all kinds of names in programs with very few hash collisions. -Empty buckets are designated by using an invalid hash index of ``UINT32_MAX``. +Empty buckets are designated by using an invalid hash index of `UINT32_MAX`. -Details -^^^^^^^ +#### Details These name hash tables are designed to be generic where specializations of the -table get to define additional data that goes into the header ("``HeaderData``"), -how the string value is stored ("``KeyType``") and the content of the data for each +table get to define additional data that goes into the header ("`HeaderData`"), +how the string value is stored ("`KeyType`") and the content of the data for each hash value. -Header Layout -""""""""""""" +##### Header Layout The header has a fixed part, and the specialized part. The exact format of the header is: -.. code-block:: c - - struct Header - { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number - uint16_t hash_function; // The hash function enumeration that was used - uint32_t bucket_count; // The number of buckets in this hash table - uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table - uint32_t header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment - // Specifically the length of the following HeaderData field - this does not - // include the size of the preceding fields - HeaderData header_data; // Implementation specific header data - }; - -The header starts with a 32-bit "``magic``" value which must be ``'HASH'`` +```c +struct Header +{ + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number + uint16_t hash_function; // The hash function enumeration that was used + uint32_t bucket_count; // The number of buckets in this hash table + uint32_t hashes_count; // The total number of unique hash values and hash data offsets in this table + uint32_t header_data_len; // The bytes to skip to get to the hash indexes (buckets) for correct alignment + // Specifically the length of the following HeaderData field - this does not + // include the size of the preceding fields + HeaderData header_data; // Implementation specific header data +}; +``` +The header starts with a 32-bit "`magic`" value which must be `'HASH'` encoded as an ASCII integer. This allows the detection of the start of the hash table and also allows the table's byte order to be determined so the table -can be correctly extracted. The "``magic``" value is followed by a 16-bit -``version`` number which allows the table to be revised and modified in the -future. The current version number is 1. ``hash_function`` is a ``uint16_t`` +can be correctly extracted. The "`magic`" value is followed by a 16-bit +`version` number which allows the table to be revised and modified in the +future. The current version number is 1. `hash_function` is a `uint16_t` enumeration that specifies which hash function was used to produce this table. The current values for the hash function enumerations include: -.. code-block:: c - - enum HashFunctionType - { - eHashFunctionDJB = 0u, // Daniel J Bernstein hash function - }; - -``bucket_count`` is a 32-bit unsigned integer that represents how many buckets -are in the ``BUCKETS`` array. ``hashes_count`` is the number of unique 32-bit -hash values that are in the ``HASHES`` array, and is the same number of offsets -are contained in the ``OFFSETS`` array. ``header_data_len`` specifies the size -in bytes of the ``HeaderData`` that is filled in by specialized versions of +```c +enum HashFunctionType +{ + eHashFunctionDJB = 0u, // Daniel J Bernstein hash function +}; +``` +`bucket_count` is a 32-bit unsigned integer that represents how many buckets +are in the `BUCKETS` array. `hashes_count` is the number of unique 32-bit +hash values that are in the `HASHES` array, and is the same number of offsets +are contained in the `OFFSETS` array. `header_data_len` specifies the size +in bytes of the `HeaderData` that is filled in by specialized versions of this table. -Fixed Lookup -"""""""""""" +##### Fixed Lookup The header is followed by the buckets, hashes, offsets, and hash value data. -.. code-block:: c - - struct FixedTable - { - uint32_t buckets[Header.bucket_count]; // An array of hash indexes into the "hashes[]" array below - uint32_t hashes [Header.hashes_count]; // Every unique 32-bit hash for the entire table is in this table - uint32_t offsets[Header.hashes_count]; // An offset that corresponds to each item in the "hashes[]" array above - }; - -``buckets`` is an array of 32-bit indexes into the ``hashes`` array. The -``hashes`` array contains all of the 32-bit hash values for all names in the -hash table. Each hash in the ``hashes`` table has an offset in the ``offsets`` +```c +struct FixedTable +{ + uint32_t buckets[Header.bucket_count]; // An array of hash indexes into the "hashes[]" array below + uint32_t hashes [Header.hashes_count]; // Every unique 32-bit hash for the entire table is in this table + uint32_t offsets[Header.hashes_count]; // An offset that corresponds to each item in the "hashes[]" array above +}; +``` +`buckets` is an array of 32-bit indexes into the `hashes` array. The +`hashes` array contains all of the 32-bit hash values for all names in the +hash table. Each hash in the `hashes` table has an offset in the `offsets` array that points to the data for the hash value. This table setup makes it very easy to repurpose these tables to contain @@ -2124,79 +1986,74 @@ able to store the data efficiently so we have used some of the DWARF features that enable efficient data storage to define exactly what kind of data we store for each name. -The ``HeaderData`` contains a definition of the contents of each HashData chunk. +The `HeaderData` contains a definition of the contents of each HashData chunk. We might want to store an offset to all of the debug information entries (DIEs) for each name. To keep things extensible, we create a list of items, or Atoms, that are contained in the data for each name. First comes the type of the data in each atom: -.. code-block:: c - - enum AtomType - { - eAtomTypeNULL = 0u, - eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding - eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question - eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2 - eAtomTypeNameFlags = 4u, // Flags from enum NameFlags - eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags - }; - +```c +enum AtomType +{ + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as DW_FORM_data1 (if no tags exceed 255) or DW_FORM_data2 + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u, // Flags from enum TypeFlags +}; +``` The enumeration values and their meanings are: -.. code-block:: none - - eAtomTypeNULL - a termination atom that specifies the end of the atom list - eAtomTypeDIEOffset - an offset into the .debug_info section for the DWARF DIE for this name - eAtomTypeCUOffset - an offset into the .debug_info section for the CU that contains the DIE - eAtomTypeDIETag - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is - eAtomTypeNameFlags - Flags for functions and global variables (isFunction, isInlined, isExternal...) - eAtomTypeTypeFlags - Flags for types (isCXXClass, isObjCClass, ...) - +```text +eAtomTypeNULL - a termination atom that specifies the end of the atom list +eAtomTypeDIEOffset - an offset into the .debug_info section for the DWARF DIE for this name +eAtomTypeCUOffset - an offset into the .debug_info section for the CU that contains the DIE +eAtomTypeDIETag - The DW_TAG_XXX enumeration value so you don't have to parse the DWARF to see what it is +eAtomTypeNameFlags - Flags for functions and global variables (isFunction, isInlined, isExternal...) +eAtomTypeTypeFlags - Flags for types (isCXXClass, isObjCClass, ...) +``` Then we allow each atom type to define the atom type and how the data for each atom type data is encoded: -.. code-block:: c - - struct Atom - { - uint16_t type; // AtomType enum value - uint16_t form; // DWARF DW_FORM_XXX defines - }; - -The ``form`` type above is from the DWARF specification and defines the exact +```c +struct Atom +{ + uint16_t type; // AtomType enum value + uint16_t form; // DWARF DW_FORM_XXX defines +}; +``` +The `form` type above is from the DWARF specification and defines the exact encoding of the data for the Atom type. See the DWARF specification for the -``DW_FORM_`` definitions. - -.. code-block:: c - - struct HeaderData - { - uint32_t die_offset_base; - uint32_t atom_count; - Atoms atoms[atom_count0]; - }; - -``HeaderData`` defines the base DIE offset that should be added to any atoms -that are encoded using the ``DW_FORM_ref1``, ``DW_FORM_ref2``, -``DW_FORM_ref4``, ``DW_FORM_ref8`` or ``DW_FORM_ref_udata``. It also defines -what is contained in each ``HashData`` object -- ``Atom.form`` tells us how large -each field will be in the ``HashData`` and the ``Atom.type`` tells us how this data +`DW_FORM_` definitions. + +```c +struct HeaderData +{ + uint32_t die_offset_base; + uint32_t atom_count; + Atoms atoms[atom_count0]; +}; +``` +`HeaderData` defines the base DIE offset that should be added to any atoms +that are encoded using the `DW_FORM_ref1`, `DW_FORM_ref2`, +`DW_FORM_ref4`, `DW_FORM_ref8` or `DW_FORM_ref_udata`. It also defines +what is contained in each `HashData` object -- `Atom.form` tells us how large +each field will be in the `HashData` and the `Atom.type` tells us how this data should be interpreted. -For the current implementations of the "``.apple_names``" (all functions + -globals), the "``.apple_types``" (names of all types that are defined), and -the "``.apple_namespaces``" (all namespaces), we currently set the ``Atom`` +For the current implementations of the "`.apple_names`" (all functions + +globals), the "`.apple_types`" (names of all types that are defined), and +the "`.apple_namespaces`" (all namespaces), we currently set the `Atom` array to be: -.. code-block:: c - - HeaderData.atom_count = 1; - HeaderData.atoms[0].type = eAtomTypeDIEOffset; - HeaderData.atoms[0].form = DW_FORM_data4; - -This defines the contents to be the DIE offset (``eAtomTypeDIEOffset``) that is -encoded as a 32-bit value (``DW_FORM_data4``). This allows a single name to have +```c +HeaderData.atom_count = 1; +HeaderData.atoms[0].type = eAtomTypeDIEOffset; +HeaderData.atoms[0].form = DW_FORM_data4; +``` +This defines the contents to be the DIE offset (`eAtomTypeDIEOffset`) that is +encoded as a 32-bit value (`DW_FORM_data4`). This allows a single name to have multiple matching DIEs in a single file, which could come up with an inlined function for instance. Future tables could include more information about the DIE such as flags indicating if the DIE is a function, method, block, @@ -2207,89 +2064,84 @@ The KeyType for the DWARF table is a 32-bit string table offset into the may already contain copies of all of the strings. This helps make sure, with help from the compiler, that we reuse the strings between all of the DWARF sections and keeps the hash table size down. Another benefit to having the -compiler generate all strings as ``DW_FORM_strp`` in the debug info, is that +compiler generate all strings as `DW_FORM_strp` in the debug info, is that DWARF parsing can be made much faster. After a lookup is made, we get an offset into the hash data. The hash data needs to be able to deal with 32-bit hash collisions, so the chunk of data at the offset in the hash data consists of a triple: -.. code-block:: c - - uint32_t str_offset - uint32_t hash_data_count - HashData[hash_data_count] - +```c +uint32_t str_offset +uint32_t hash_data_count +HashData[hash_data_count] +``` If "str_offset" is zero, then the bucket contents are done. 99.9% of the hash data chunks contain a single item (no 32-bit hash collision): -.. code-block:: none - - .------------. - | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") - | 0x00000004 | uint32_t HashData count - | 0x........ | uint32_t HashData[0] DIE offset - | 0x........ | uint32_t HashData[1] DIE offset - | 0x........ | uint32_t HashData[2] DIE offset - | 0x........ | uint32_t HashData[3] DIE offset - | 0x00000000 | uint32_t KeyType (end of hash chain) - `------------' - +```text +.------------. +| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") +| 0x00000004 | uint32_t HashData count +| 0x........ | uint32_t HashData[0] DIE offset +| 0x........ | uint32_t HashData[1] DIE offset +| 0x........ | uint32_t HashData[2] DIE offset +| 0x........ | uint32_t HashData[3] DIE offset +| 0x00000000 | uint32_t KeyType (end of hash chain) +`------------' +``` If there are collisions, you will have multiple valid string offsets: -.. code-block:: none - - .------------. - | 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") - | 0x00000004 | uint32_t HashData count - | 0x........ | uint32_t HashData[0] DIE offset - | 0x........ | uint32_t HashData[1] DIE offset - | 0x........ | uint32_t HashData[2] DIE offset - | 0x........ | uint32_t HashData[3] DIE offset - | 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print") - | 0x00000002 | uint32_t HashData count - | 0x........ | uint32_t HashData[0] DIE offset - | 0x........ | uint32_t HashData[1] DIE offset - | 0x00000000 | uint32_t KeyType (end of hash chain) - `------------' - +```text +.------------. +| 0x00001023 | uint32_t KeyType (.debug_str[0x0001023] => "main") +| 0x00000004 | uint32_t HashData count +| 0x........ | uint32_t HashData[0] DIE offset +| 0x........ | uint32_t HashData[1] DIE offset +| 0x........ | uint32_t HashData[2] DIE offset +| 0x........ | uint32_t HashData[3] DIE offset +| 0x00002023 | uint32_t KeyType (.debug_str[0x0002023] => "print") +| 0x00000002 | uint32_t HashData count +| 0x........ | uint32_t HashData[0] DIE offset +| 0x........ | uint32_t HashData[1] DIE offset +| 0x00000000 | uint32_t KeyType (end of hash chain) +`------------' +``` Current testing with real world C++ binaries has shown that there is around 1 32-bit hash collision per 100,000 name entries. -Contents -^^^^^^^^ +#### Contents As we said, we want to strictly define exactly what is included in the -different tables. For DWARF, we have 3 tables: "``.apple_names``", -"``.apple_types``", and "``.apple_namespaces``". - -"``.apple_names``" sections should contain an entry for each DWARF DIE whose -``DW_TAG`` is a ``DW_TAG_label``, ``DW_TAG_inlined_subroutine``, or -``DW_TAG_subprogram`` that has address attributes: ``DW_AT_low_pc``, -``DW_AT_high_pc``, ``DW_AT_ranges`` or ``DW_AT_entry_pc``. It also contains -``DW_TAG_variable`` DIEs that have a ``DW_OP_addr`` in the location (global and +different tables. For DWARF, we have 3 tables: "`.apple_names`", +"`.apple_types`", and "`.apple_namespaces`". + +"`.apple_names`" sections should contain an entry for each DWARF DIE whose +`DW_TAG` is a `DW_TAG_label`, `DW_TAG_inlined_subroutine`, or +`DW_TAG_subprogram` that has address attributes: `DW_AT_low_pc`, +`DW_AT_high_pc`, `DW_AT_ranges` or `DW_AT_entry_pc`. It also contains +`DW_TAG_variable` DIEs that have a `DW_OP_addr` in the location (global and static variables). All global and static variables should be included, including those scoped within functions and classes. For example using the following code: -.. code-block:: c +```c +static int var = 0; +void f () +{ static int var = 0; - - void f () - { - static int var = 0; - } - -Both of the static ``var`` variables would be included in the table. All +} +``` +Both of the static `var` variables would be included in the table. All functions should emit both their full names and their basenames. For C or C++, the full name is the mangled name (if available) which is usually in the -``DW_AT_MIPS_linkage_name`` attribute, and the ``DW_AT_name`` contains the +`DW_AT_MIPS_linkage_name` attribute, and the `DW_AT_name` contains the function basename. If global or static variables have a mangled name in a -``DW_AT_MIPS_linkage_name`` attribute, this should be emitted along with the -simple name found in the ``DW_AT_name`` attribute. +`DW_AT_MIPS_linkage_name` attribute, this should be emitted along with the +simple name found in the `DW_AT_name` attribute. -"``.apple_types``" sections should contain an entry for each DWARF DIE whose +"`.apple_types`" sections should contain an entry for each DWARF DIE whose tag is one of: * DW_TAG_array_type @@ -2318,75 +2170,70 @@ tag is one of: * DW_TAG_unspecified_type * DW_TAG_shared_type -Only entries with a ``DW_AT_name`` attribute are included, and the entry must -not be a forward declaration (``DW_AT_declaration`` attribute with a non-zero +Only entries with a `DW_AT_name` attribute are included, and the entry must +not be a forward declaration (`DW_AT_declaration` attribute with a non-zero value). For example, using the following code: -.. code-block:: c - - int main () - { - int *b = 0; - return *b; - } - +```c +int main () +{ + int *b = 0; + return *b; +} +``` We get a few type DIEs: -.. code-block:: none - - 0x00000067: TAG_base_type [5] - AT_encoding( DW_ATE_signed ) - AT_name( "int" ) - AT_byte_size( 0x04 ) - - 0x0000006e: TAG_pointer_type [6] - AT_type( {0x00000067} ( int ) ) - AT_byte_size( 0x08 ) +```text +0x00000067: TAG_base_type [5] + AT_encoding( DW_ATE_signed ) + AT_name( "int" ) + AT_byte_size( 0x04 ) -The ``DW_TAG_pointer_type`` is not included because it does not have a ``DW_AT_name``. +0x0000006e: TAG_pointer_type [6] + AT_type( {0x00000067} ( int ) ) + AT_byte_size( 0x08 ) +``` +The `DW_TAG_pointer_type` is not included because it does not have a `DW_AT_name`. -"``.apple_namespaces``" section should contain all ``DW_TAG_namespace`` DIEs. +"`.apple_namespaces`" section should contain all `DW_TAG_namespace` DIEs. If we run into a namespace that has no name this is an anonymous namespace, and -the name should be output as "``(anonymous namespace)``" (without the quotes). -Why? This matches the output of the ``abi::cxa_demangle()`` that is in the +the name should be output as "`(anonymous namespace)`" (without the quotes). +Why? This matches the output of the `abi::cxa_demangle()` that is in the standard C++ library that demangles mangled names. -Language Extensions and File Format Changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Language Extensions and File Format Changes -.. _llvm_language_dialect: +(llvm_language_dialect)= -LLVM Language Dialect -""""""""""""""""""""" +##### LLVM Language Dialect -LLVM emits an optional ``DW_AT_LLVM_language_dialect`` attribute on -``DW_TAG_compile_unit`` to identify an execution-model dialect of the -source-level language declared by ``DW_AT_language`` / -``DW_AT_LLVM_source_language_name``. +LLVM emits an optional `DW_AT_LLVM_language_dialect` attribute on +`DW_TAG_compile_unit` to identify an execution-model dialect of the +source-level language declared by `DW_AT_language` / +`DW_AT_LLVM_source_language_name`. The attribute value is encoded as an unsigned integer from the -``DW_LLVM_LANG_DIALECT_*`` enumeration: +`DW_LLVM_LANG_DIALECT_*` enumeration: -* ``DW_LLVM_LANG_DIALECT_simt`` (``0x01``) -- single-instruction, +* `DW_LLVM_LANG_DIALECT_simt` (`0x01`) -- single-instruction, multiple-thread execution model. -* ``DW_LLVM_LANG_DIALECT_tile`` (``0x02``) -- tile-based execution model. +* `DW_LLVM_LANG_DIALECT_tile` (`0x02`) -- tile-based execution model. -To express "no dialect specified", the ``dialect:`` field on -:ref:`DICompileUnit ` is simply omitted; no -``DW_AT_LLVM_language_dialect`` attribute is emitted in that case. +To express "no dialect specified", the `dialect:` field on +{ref}`DICompileUnit ` is simply omitted; no +`DW_AT_LLVM_language_dialect` attribute is emitted in that case. -Objective-C Extensions -"""""""""""""""""""""" +##### Objective-C Extensions -"``.apple_objc``" section should contain all ``DW_TAG_subprogram`` DIEs for an +"`.apple_objc`" section should contain all `DW_TAG_subprogram` DIEs for an Objective-C class. The name used in the hash table is the name of the Objective-C class itself. If the Objective-C class has a category, then an entry is made for both the class name without the category, and for the class name with the category. So if we have a DIE at offset 0x1234 with a name of -method "``-[NSString(my_additions) stringWithSpecialString:]``", we would add -an entry for "``NSString``" that points to DIE 0x1234, and an entry for -"``NSString(my_additions)``" that points to 0x1234. This allows us to quickly +method "`-[NSString(my_additions) stringWithSpecialString:]`", we would add +an entry for "`NSString`" that points to DIE 0x1234, and an entry for +"`NSString(my_additions)`" that points to 0x1234. This allows us to quickly track down all Objective-C methods for an Objective-C class when doing expressions. It is needed because of the dynamic nature of Objective-C where anyone can add methods to a class. The DWARF for Objective-C methods is also @@ -2398,35 +2245,32 @@ given the Objective-C class name, or quickly find all methods and class functions for a class + category name. This table does not contain any selector names, it just maps Objective-C class names (or class names + category) to all of the methods and class functions. The selectors are added -as function basenames in the "``.debug_names``" section. +as function basenames in the "`.debug_names`" section. -In the "``.apple_names``" section for Objective-C functions, the full name is -the entire function name with the brackets ("``-[NSString -stringWithCString:]``") and the basename is the selector only -("``stringWithCString:``"). +In the "`.apple_names`" section for Objective-C functions, the full name is +the entire function name with the brackets ("`-[NSString +stringWithCString:]`") and the basename is the selector only +("`stringWithCString:`"). -Mach-O Changes -"""""""""""""" +##### Mach-O Changes The sections names for the apple hash tables are for non-mach-o files. For -mach-o files, the sections should be contained in the ``__DWARF`` segment with +mach-o files, the sections should be contained in the `__DWARF` segment with names as follows: -* "``.apple_names``" -> "``__apple_names``" -* "``.apple_types``" -> "``__apple_types``" -* "``.apple_namespaces``" -> "``__apple_namespac``" (16 character limit) -* "``.apple_objc``" -> "``__apple_objc``" +* "`.apple_names`" -> "`__apple_names`" +* "`.apple_types`" -> "`__apple_types`" +* "`.apple_namespaces`" -> "`__apple_namespac`" (16 character limit) +* "`.apple_objc`" -> "`__apple_objc`" -.. _codeview: +(codeview)= -CodeView Debug Info Format -========================== +## CodeView Debug Info Format LLVM supports emitting CodeView, the Microsoft debug info format, and this section describes the design and implementation of that support. -Format Background ------------------ +### Format Background CodeView as a format is clearly oriented around C++ debugging, and in C++, the majority of debug information tends to be type information. Therefore, the @@ -2436,19 +2280,19 @@ merged across translation units. Both type information and symbol information is generally stored as a sequence of records, where each record begins with a 16-bit record size and a 16-bit record kind. -Type information is usually stored in the ``.debug$T`` section of the object +Type information is usually stored in the `.debug$T` section of the object file. All other debug info, such as line info, string table, symbol info, and -inlinee info, is stored in one or more ``.debug$S`` sections. There may only be -one ``.debug$T`` section per object file, since all other debug info refers to -it. If a PDB (enabled by the ``/Zi`` MSVC option) was used during compilation, -the ``.debug$T`` section will contain only an ``LF_TYPESERVER2`` record pointing +inlinee info, is stored in one or more `.debug$S` sections. There may only be +one `.debug$T` section per object file, since all other debug info refers to +it. If a PDB (enabled by the `/Zi` MSVC option) was used during compilation, +the `.debug$T` section will contain only an `LF_TYPESERVER2` record pointing to the PDB. When using PDBs, symbol information appears to remain in the object -file ``.debug$S`` sections. +file `.debug$S` sections. Type records are referred to by their index, which is the number of records in -the stream before a given record plus ``0x1000``. Many common basic types, such +the stream before a given record plus `0x1000`. Many common basic types, such as the basic integral types and unqualified pointers to them, are represented -using type indices less than ``0x1000``. Such basic types are built in to +using type indices less than `0x1000`. Such basic types are built in to CodeView consumers and do not require type records. Each type record may only contain type indices that are less than its own type @@ -2456,31 +2300,36 @@ index. This ensures that the graph of type stream references is acyclic. While the source-level type graph may contain cycles through pointer types (consider a linked list struct), these cycles are removed from the type stream by always referring to the forward declaration record of user-defined record types. Only -"symbol" records in the ``.debug$S`` streams may refer to complete, +"symbol" records in the `.debug$S` streams may refer to complete, non-forward-declaration type records. -Working with CodeView ---------------------- +### Working with CodeView These are instructions for some common tasks for developers working to improve LLVM's CodeView support. Most of them revolve around using the CodeView dumper -embedded in ``llvm-readobj``. +embedded in `llvm-readobj`. -* Testing MSVC's output:: +* Testing MSVC's output: - $ cl -c -Z7 foo.cpp # Use /Z7 to keep types in the object file - $ llvm-readobj --codeview foo.obj + ```console + $ cl -c -Z7 foo.cpp # Use /Z7 to keep types in the object file + $ llvm-readobj --codeview foo.obj + ``` -* Getting LLVM IR debug info out of Clang:: +* Getting LLVM IR debug info out of Clang: - $ clang -g -gcodeview --target=x86_64-windows-msvc foo.cpp -S -emit-llvm + ```console + $ clang -g -gcodeview --target=x86_64-windows-msvc foo.cpp -S -emit-llvm + ``` Use this to generate LLVM IR for LLVM test cases. -* Generate and dump CodeView from LLVM IR metadata:: +* Generate and dump CodeView from LLVM IR metadata: - $ llc foo.ll -filetype=obj -o foo.obj - $ llvm-readobj --codeview foo.obj > foo.txt + ```console + $ llc foo.ll -filetype=obj -o foo.obj + $ llvm-readobj --codeview foo.obj > foo.txt + ``` Use this pattern in lit test cases and FileCheck the output of llvm-readobj diff --git a/llvm/docs/TableGen/index.md b/llvm/docs/TableGen/index.md index 0f1190c54627a..a85fdf3f39766 100644 --- a/llvm/docs/TableGen/index.md +++ b/llvm/docs/TableGen/index.md @@ -1,19 +1,18 @@ -================= -TableGen Overview -================= +# TableGen Overview -.. contents:: - :local: +```{contents} +:local: +``` -.. toctree:: - :hidden: +```{toctree} +:hidden: - BackEnds - BackGuide - ProgRef +BackEnds +BackGuide +ProgRef +``` -Introduction -============ +## Introduction TableGen's purpose is to help a human develop and maintain records of domain-specific information. Because there may be a large number of these @@ -23,141 +22,140 @@ amount of duplication in the description, reduces the chance of error, and makes it easier to structure domain-specific information. The TableGen front end parses a file, instantiates the declarations, and -hands the result off to a domain-specific `backend`_ for processing. See -the :doc:`TableGen Programmer's Reference <./ProgRef>` for an in-depth -description of TableGen. See :doc:`tblgen - Description to C++ Code -<../CommandGuide/tblgen>` for details on the ``*-tblgen`` commands -that run the various flavors of TableGen. +hands the result off to a domain-specific {ref}`backend ` for processing. See +the {doc}`TableGen Programmer's Reference <./ProgRef>` for an in-depth +description of TableGen. See +{doc}`tblgen - Description to C++ Code <../CommandGuide/tblgen>` for details on the +`*-tblgen` commands that run the various flavors of TableGen. -The current major users of TableGen are :doc:`The LLVM Target-Independent -Code Generator <../CodeGenerator>` and the `Clang diagnostics and attributes -`_. +The current major users of TableGen are +{doc}`The LLVM Target-Independent Code Generator <../CodeGenerator>` and the +[Clang diagnostics and attributes][clang-diagnostics-and-attributes]. + +[clang-diagnostics-and-attributes]: https://clang.llvm.org/docs/UsersManual.html#controlling-errors-and-warnings Note that if you work with TableGen frequently and use emacs or vim, you can find an emacs "TableGen mode" and a vim language file in the -``llvm/utils/emacs`` and ``llvm/utils/vim`` directories of your LLVM +`llvm/utils/emacs` and `llvm/utils/vim` directories of your LLVM distribution, respectively. -.. _intro: +(intro)= -The TableGen program -==================== +## The TableGen program -TableGen files are interpreted by the TableGen program: ``llvm-tblgen`` available -in your build directory under ``bin``. It is not installed in the system (or where +TableGen files are interpreted by the TableGen program: `llvm-tblgen` available +in your build directory under `bin`. It is not installed in the system (or where your sysroot is set to), since it has no use beyond LLVM's build process. -Running TableGen ----------------- +### Running TableGen TableGen runs just like any other LLVM tool. The first (optional) argument -specifies the file to read. If a filename is not specified, ``llvm-tblgen`` +specifies the file to read. If a filename is not specified, `llvm-tblgen` reads from standard input. -The ``-o`` option specifies the output file or ``-`` to output to +The `-o` option specifies the output file or `-` to output to stdout. Where TableGen produces multiple output files, the option specifies the name of the main output file, which also works as the name prefix for other output files. -To be useful, one of the `backends`_ must be used. These backends are -selectable on the command line (type '``llvm-tblgen -help``' for a list). For +To be useful, one of the {ref}`backends ` must be used. These backends are +selectable on the command line (type '`llvm-tblgen -help`' for a list). For example, to get a list of all of the definitions that subclass a particular type (which can be useful for building up an enum list of these records), use the -``-print-enums`` option: - -.. code-block:: bash - - $ llvm-tblgen X86.td -print-enums -class=Register - AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX, - ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP, - MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D, - R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15, - R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI, - RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, - XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5, - XMM6, XMM7, XMM8, XMM9, - - $ llvm-tblgen X86.td -print-enums -class=Instruction - ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri, - ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8, - ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm, - ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr, - ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ... +`-print-enums` option: + +```console +$ llvm-tblgen X86.td -print-enums -class=Register +AH, AL, AX, BH, BL, BP, BPL, BX, CH, CL, CX, DH, DI, DIL, DL, DX, EAX, EBP, EBX, +ECX, EDI, EDX, EFLAGS, EIP, ESI, ESP, FP0, FP1, FP2, FP3, FP4, FP5, FP6, IP, +MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, R10, R10B, R10D, R10W, R11, R11B, R11D, +R11W, R12, R12B, R12D, R12W, R13, R13B, R13D, R13W, R14, R14B, R14D, R14W, R15, +R15B, R15D, R15W, R8, R8B, R8D, R8W, R9, R9B, R9D, R9W, RAX, RBP, RBX, RCX, RDI, +RDX, RIP, RSI, RSP, SI, SIL, SP, SPL, ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7, +XMM0, XMM1, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM2, XMM3, XMM4, XMM5, +XMM6, XMM7, XMM8, XMM9, + +$ llvm-tblgen X86.td -print-enums -class=Instruction +ABS_F, ABS_Fp32, ABS_Fp64, ABS_Fp80, ADC32mi, ADC32mi8, ADC32mr, ADC32ri, +ADC32ri8, ADC32rm, ADC32rr, ADC64mi32, ADC64mi8, ADC64mr, ADC64ri32, ADC64ri8, +ADC64rm, ADC64rr, ADD16mi, ADD16mi8, ADD16mr, ADD16ri, ADD16ri8, ADD16rm, +ADD16rr, ADD32mi, ADD32mi8, ADD32mr, ADD32ri, ADD32ri8, ADD32rm, ADD32rr, +ADD64mi32, ADD64mi8, ADD64mr, ADD64ri32, ... +``` The default backend prints out all of the records. There is also a general backend which outputs all the records as a JSON data structure, enabled using the `-dump-json` option. -If you plan to use TableGen, you will most likely have to write a `backend`_ +If you plan to use TableGen, you will most likely have to write a {ref}`backend ` that extracts the information specific to what you need and formats it in the appropriate way. You can do this by extending TableGen itself in C++ or by writing a script in any language that can consume the JSON output. -Example -------- +### Example With no other arguments, `llvm-tblgen` parses the specified file and prints out all of the classes, then all of the definitions. This is a good way to see what the -various definitions expand to fully. Running this on the ``X86.td`` file prints +various definitions expand to fully. Running this on the `X86.td` file prints this (at the time of this writing): -.. code-block:: text - - ... - def ADD32rr { // Instruction X86Inst I - string Namespace = "X86"; - dag OutOperandList = (outs GR32:$dst); - dag InOperandList = (ins GR32:$src1, GR32:$src2); - string AsmString = "add{l}\t{$src2, $dst|$dst, $src2}"; - list Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]; - list Uses = []; - list Defs = [EFLAGS]; - list Predicates = []; - int CodeSize = 3; - int AddedComplexity = 0; - bit isReturn = 0; - bit isBranch = 0; - bit isIndirectBranch = 0; - bit isBarrier = 0; - bit isCall = 0; - bit canFoldAsLoad = 0; - bit mayLoad = 0; - bit mayStore = 0; - bit isImplicitDef = 0; - bit isConvertibleToThreeAddress = 1; - bit isCommutable = 1; - bit isTerminator = 0; - bit isReMaterializable = 0; - bit isPredicable = 0; - bit hasDelaySlot = 0; - bit usesCustomInserter = 0; - bit hasCtrlDep = 0; - bit isNotDuplicable = 0; - bit hasSideEffects = 0; - InstrItinClass Itinerary = NoItinerary; - string Constraints = ""; - string DisableEncoding = ""; - bits<8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 }; - Format Form = MRMDestReg; - bits<6> FormBits = { 0, 0, 0, 0, 1, 1 }; - ImmType ImmT = NoImm; - bits<3> ImmTypeBits = { 0, 0, 0 }; - bit hasOpSizePrefix = 0; - bit hasAdSizePrefix = 0; - bits<4> Prefix = { 0, 0, 0, 0 }; - bit hasREX_WPrefix = 0; - FPFormat FPForm = ?; - bits<3> FPFormBits = { 0, 0, 0 }; - } - ... - -This definition corresponds to the 32-bit register-register ``add`` instruction -of the x86 architecture. ``def ADD32rr`` defines a record named -``ADD32rr``, and the comment at the end of the line indicates the superclasses +```text +... +def ADD32rr { // Instruction X86Inst I + string Namespace = "X86"; + dag OutOperandList = (outs GR32:$dst); + dag InOperandList = (ins GR32:$src1, GR32:$src2); + string AsmString = "add{l}\t{$src2, $dst|$dst, $src2}"; + list Pattern = [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]; + list Uses = []; + list Defs = [EFLAGS]; + list Predicates = []; + int CodeSize = 3; + int AddedComplexity = 0; + bit isReturn = 0; + bit isBranch = 0; + bit isIndirectBranch = 0; + bit isBarrier = 0; + bit isCall = 0; + bit canFoldAsLoad = 0; + bit mayLoad = 0; + bit mayStore = 0; + bit isImplicitDef = 0; + bit isConvertibleToThreeAddress = 1; + bit isCommutable = 1; + bit isTerminator = 0; + bit isReMaterializable = 0; + bit isPredicable = 0; + bit hasDelaySlot = 0; + bit usesCustomInserter = 0; + bit hasCtrlDep = 0; + bit isNotDuplicable = 0; + bit hasSideEffects = 0; + InstrItinClass Itinerary = NoItinerary; + string Constraints = ""; + string DisableEncoding = ""; + bits<8> Opcode = { 0, 0, 0, 0, 0, 0, 0, 1 }; + Format Form = MRMDestReg; + bits<6> FormBits = { 0, 0, 0, 0, 1, 1 }; + ImmType ImmT = NoImm; + bits<3> ImmTypeBits = { 0, 0, 0 }; + bit hasOpSizePrefix = 0; + bit hasAdSizePrefix = 0; + bits<4> Prefix = { 0, 0, 0, 0 }; + bit hasREX_WPrefix = 0; + FPFormat FPForm = ?; + bits<3> FPFormBits = { 0, 0, 0 }; +} +... +``` + +This definition corresponds to the 32-bit register-register `add` instruction +of the x86 architecture. `def ADD32rr` defines a record named +`ADD32rr`, and the comment at the end of the line indicates the superclasses of the definition. The body of the record contains all of the data that TableGen assembled for the record, indicating that the instruction is part of -the ``X86`` namespace, the pattern indicating how the instruction is selected by +the `X86` namespace, the pattern indicating how the instruction is selected by the code generator, that it is a two-address instruction, has a particular encoding, etc. The contents and semantics of the information in the record are specific to the needs of the X86 backend, and are only shown as an example. @@ -167,31 +165,29 @@ by the code generator, and specifying it all manually would be unmaintainable, prone to bugs, and tiring to do in the first place. Because we are using TableGen, all of the information was derived from the following definition: -.. code-block:: text - - let Defs = [EFLAGS], - isCommutable = 1, // X = ADD Y,Z --> X = ADD Z,Y - isConvertibleToThreeAddress = 1 in // Can transform into LEA. - def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), - (ins GR32:$src1, GR32:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>; - -This definition makes use of the custom class ``I`` (extended from the custom -class ``X86Inst``), which is defined in the X86-specific TableGen file, to +```text +let Defs = [EFLAGS], + isCommutable = 1, // X = ADD Y,Z --> X = ADD Z,Y + isConvertibleToThreeAddress = 1 in // Can transform into LEA. +def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst), + (ins GR32:$src1, GR32:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", + [(set GR32:$dst, (add GR32:$src1, GR32:$src2))]>; +``` + +This definition makes use of the custom class `I` (extended from the custom +class `X86Inst`), which is defined in the X86-specific TableGen file, to factor out the common features that instructions of its class share. A key feature of TableGen is that it allows the end user to define the abstractions they prefer to use when describing their information. -Syntax -====== +## Syntax TableGen has a syntax loosely based on C++ templates, with built-in types and specification. In addition, TableGen's syntax introduces some automation concepts like multiclass, foreach, let, etc. -Basic concepts --------------- +### Basic concepts TableGen files consist of two key parts: 'classes' and 'definitions', both of which are considered 'records'. @@ -199,40 +195,40 @@ which are considered 'records'. **TableGen records** have a unique name, a list of values, and a list of superclasses. The list of values is the main data that TableGen builds for each record; it is this that holds the domain-specific information for the -application. The interpretation of this data is left to a specific `backend`_, +application. The interpretation of this data is left to a specific {ref}`backend `, but the structure and format rules are taken care of and fixed by TableGen. **TableGen definitions** are the concrete form of 'records'. These generally do -not have any undefined values and are marked with the '``def``' keyword. +not have any undefined values and are marked with the '`def`' keyword. -.. code-block:: text +```text +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; +``` - def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", - "Enable ARMv8 FP">; - -In this example, ``FeatureFPARMv8`` is ``SubtargetFeature`` record initialised +In this example, `FeatureFPARMv8` is `SubtargetFeature` record initialised with some values. The names of the classes are defined via the keyword `class` either in the same file or some other included. Most target -TableGen files include the generic ones in ``include/llvm/Target``. +TableGen files include the generic ones in `include/llvm/Target`. **TableGen classes** are abstract records that are used to build and describe other records. These classes allow the end-user to build abstractions for -either the domain they are targeting (such as ``Register``, ``RegisterClass``, and -``Instruction`` in the LLVM code generator) or for the implementor to help factor -out common properties of records (such as ``FPInst``, which is used to represent +either the domain they are targeting (such as `Register`, `RegisterClass`, and +`Instruction` in the LLVM code generator) or for the implementor to help factor +out common properties of records (such as `FPInst`, which is used to represent floating point instructions in the X86 backend). TableGen keeps track of all of the classes that are used to build up a definition, so the backend can find all -definitions of a particular class, such as ``Instruction``. - -.. code-block:: text +definitions of a particular class, such as `Instruction`. - class ProcNoItin Features> - : Processor; +```text +class ProcNoItin Features> + : Processor; +``` -Here, the class ``ProcNoItin``, receiving parameters ``Name`` of type ``string`` and -a list of target features is specializing the class ``Processor`` by passing the -arguments down as well as hard-coding ``NoItineraries``. +Here, the class `ProcNoItin`, receiving parameters `Name` of type `string` and +a list of target features is specializing the class `Processor` by passing the +arguments down as well as hard-coding `NoItineraries`. **TableGen multiclasses** are groups of abstract records that are instantiated all at once. Each instantiation can result in multiple TableGen definitions. @@ -240,36 +236,35 @@ If a multiclass inherits from another multiclass, the definitions in the sub-multiclass become part of the current multiclass, as if they were declared in the current multiclass. -.. code-block:: text - - multiclass ro_signed_pats { - def : Pat<(i32 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") - Base, Offset, Extend)>; - - def : Pat<(i64 (!cast("sextload" # sty) address)), - (!cast("LDRS" # T # "x_" # Rm # "_RegOffset") - Base, Offset, Extend)>; - } - - defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, - !foreach(decls.pattern, address, - !subst(SHIFT, imm_eq0, decls.pattern)), - i8>; - -See the :doc:`TableGen Programmer's Reference <./ProgRef>` for an in-depth +```text +multiclass ro_signed_pats { +def : Pat<(i32 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "w_" # Rm # "_RegOffset") + Base, Offset, Extend)>; + +def : Pat<(i64 (!cast("sextload" # sty) address)), + (!cast("LDRS" # T # "x_" # Rm # "_RegOffset") + Base, Offset, Extend)>; +} + +defm : ro_signed_pats<"B", Rm, Base, Offset, Extend, + !foreach(decls.pattern, address, + !subst(SHIFT, imm_eq0, decls.pattern)), + i8>; +``` + +See the {doc}`TableGen Programmer's Reference <./ProgRef>` for an in-depth description of TableGen. -.. _backend: -.. _backends: +(backend)= +(backends)= -TableGen backends -================= +## TableGen backends TableGen files have no real meaning without a backend. The default operation -when running ``*-tblgen`` is to print the information in a textual format, but +when running `*-tblgen` is to print the information in a textual format, but that's only useful for debugging the TableGen files themselves. The power in TableGen is, however, to interpret the source files into an internal representation that can be generated into anything you want. @@ -284,19 +279,17 @@ Pre-processed output should be used if the same information needs to be used in different contexts (like Instruction names), so your backend should print a meta-information list that can be shaped into different compile-time formats. -See :doc:`TableGen BackEnds <./BackEnds>` for a list of available -backends, and see the :doc:`TableGen Backend Developer's Guide <./BackGuide>` +See {doc}`TableGen BackEnds <./BackEnds>` for a list of available +backends, and see the {doc}`TableGen Backend Developer's Guide <./BackGuide>` for information on how to write and debug a new backend. -Tools and Resources -=================== +## Tools and Resources In addition to this documentation, a list of tools and resources for TableGen can be found in TableGen's -`README `_. +[README](https://github.com/llvm/llvm-project/blob/main/llvm/utils/TableGen/README.md). -TableGen Deficiencies -===================== +## TableGen Deficiencies Despite being very generic, TableGen has some deficiencies that have been pointed out numerous times. The common theme is that, while TableGen allows diff --git a/llvm/docs/TestingGuide.md b/llvm/docs/TestingGuide.md index 9a53838f7172c..67ddd69f08ae9 100644 --- a/llvm/docs/TestingGuide.md +++ b/llvm/docs/TestingGuide.md @@ -1,83 +1,76 @@ -================================= -LLVM Testing Infrastructure Guide -================================= +# LLVM Testing Infrastructure Guide -.. contents:: - :local: +```{contents} +:local: +``` -.. toctree:: - :hidden: +```{toctree} +:hidden: - TestSuiteGuide +TestSuiteGuide +``` -Overview -======== +## Overview This document is the reference manual for the LLVM testing infrastructure. It documents the structure of the LLVM testing infrastructure, the tools needed to use it, and how to add and run tests. -Requirements -============ +## Requirements In order to use the LLVM testing infrastructure, you will need all of the -software required to build LLVM, as well as `Python `_ 3.8 or +software required to build LLVM, as well as [Python](http://python.org) 3.8 or later. -LLVM Testing Infrastructure Organization -======================================== +## LLVM Testing Infrastructure Organization The LLVM testing infrastructure contains three major categories of tests: unit tests, regression tests, and whole programs. The unit tests and regression -tests are contained inside the LLVM repository itself under ``llvm/unittests`` -and ``llvm/test`` respectively and are expected to always pass. They should be +tests are contained inside the LLVM repository itself under `llvm/unittests` +and `llvm/test` respectively and are expected to always pass. They should be run before every commit. The whole-program tests are referred to as the "LLVM test suite" (or -"test-suite") and are in the ``test-suite`` -`repository on GitHub `_. +"test-suite") and are in the `test-suite` +[repository on GitHub](https://github.com/llvm/llvm-test-suite.git). For historical reasons, these tests are also referred to as the "nightly tests" in places, which is less ambiguous than "test-suite" and remains in use although we run them much more often than nightly. -Unit tests ----------- +### Unit tests -Unit tests are written using `Google Test `_ -and `Google Mock `_ -and are located in the ``llvm/unittests`` directory. +Unit tests are written using [Google Test](https://github.com/google/googletest/blob/master/docs/primer.md) +and [Google Mock](https://github.com/google/googletest/blob/master/docs/gmock_for_dummies.md) +and are located in the `llvm/unittests` directory. In general, unit tests are reserved for targeting the support library and other generic data structure. We prefer relying on regression tests for testing transformations and analysis on the IR. -Regression tests ----------------- +### Regression tests The regression tests are small pieces of code that test a specific feature of LLVM or trigger a specific bug in LLVM. The language they are written in depends on the part of LLVM being tested. These tests are driven by -the :doc:`Lit ` testing tool (which is part of LLVM), and -are located in the ``llvm/test`` directory. +the {doc}`Lit ` testing tool (which is part of LLVM), and +are located in the `llvm/test` directory. Typically, when a bug is found in LLVM, a regression test containing just enough code to reproduce the problem should be written and placed somewhere underneath this directory. For example, it can be a small piece of LLVM IR distilled from an actual application or benchmark. -Testing Analysis ----------------- +### Testing Analysis An analysis is a pass to infer properties on some part of the IR without transforming it. They are tested in general using the same infrastructure as the regression tests, by creating a separate "Printer" pass to consume the analysis result and print it on the standard output in a textual format suitable for FileCheck. -See `llvm/test/Analysis/BranchProbabilityInfo/loop.ll `_ +See [llvm/test/Analysis/BranchProbabilityInfo/loop.ll](https://github.com/llvm/llvm-project/blob/main/llvm/test/Analysis/BranchProbabilityInfo/loop.ll) for an example of such test. -``test-suite`` --------------- +### `test-suite` The test suite contains whole programs, which are pieces of code which can be compiled and linked into a stand-alone program that can be @@ -94,244 +87,237 @@ serve as a way of benchmarking LLVM performance, both in terms of the efficiency of the programs generated as well as the speed with which LLVM compiles, optimizes, and generates code. -The test-suite is located in the ``test-suite`` -`repository on GitHub `_. +The test-suite is located in the `test-suite` +[repository on GitHub](https://github.com/llvm/llvm-test-suite.git). -See the :doc:`TestSuiteGuide` for details. +See the {doc}`TestSuiteGuide` for details. -Debugging Information tests ---------------------------- +### Debugging Information tests The test suite contains tests to check the quality of debugging information. The tests are written in C-based languages or in LLVM assembly language. These tests are compiled and run under a debugger. The debugger output -is checked to validate the debugging information. See ``README.txt`` in the +is checked to validate the debugging information. See `README.txt` in the test suite for more information. This test suite is located in the -``cross-project-tests/debuginfo-tests`` directory. +`cross-project-tests/debuginfo-tests` directory. -Quick start -=========== +## Quick start The tests are located in two separate repositories. The unit and regression tests are in the main "llvm"/ directory under the directories -``llvm/unittests`` and ``llvm/test`` (so you get these tests for free with the -main LLVM tree). Use ``make check-all`` to run the unit and regression tests +`llvm/unittests` and `llvm/test` (so you get these tests for free with the +main LLVM tree). Use `make check-all` to run the unit and regression tests after building LLVM. -The ``test-suite`` module contains more comprehensive tests including whole C -and C++ programs. See the :doc:`TestSuiteGuide` for details. +The `test-suite` module contains more comprehensive tests including whole C +and C++ programs. See the {doc}`TestSuiteGuide` for details. -Unit and Regression tests -------------------------- +### Unit and Regression tests -To run all of the LLVM unit tests, use the ``check-llvm-unit`` target: +To run all of the LLVM unit tests, use the `check-llvm-unit` target: -.. code-block:: bash +```bash +% make check-llvm-unit +``` - % make check-llvm-unit +To run all of the LLVM regression tests, use the `check-llvm` target: -To run all of the LLVM regression tests, use the ``check-llvm`` target: - -.. code-block:: bash - - % make check-llvm +```bash +% make check-llvm +``` In order to get reasonable testing performance, build LLVM and subprojects in release mode, i.e., -.. code-block:: bash +```bash +% cmake -DCMAKE_BUILD_TYPE="Release" -DLLVM_ENABLE_ASSERTIONS=On +``` - % cmake -DCMAKE_BUILD_TYPE="Release" -DLLVM_ENABLE_ASSERTIONS=On - -If you have `Clang `_ checked out and built, you +If you have [Clang](https://clang.llvm.org/) checked out and built, you can run the LLVM and Clang tests simultaneously using: -.. code-block:: bash - - % make check-all +```bash +% make check-all +``` -To run the tests with Valgrind (Memcheck by default), use the ``LIT_OPTS`` make +To run the tests with Valgrind (Memcheck by default), use the `LIT_OPTS` make variable to pass the required options to lit. For example, you can use: -.. code-block:: bash - - % make check LIT_OPTS="-v --vg --vg-leak" +```bash +% make check LIT_OPTS="-v --vg --vg-leak" +``` to enable testing with Valgrind and with leak checking enabled. -To run individual tests or subsets of tests, you can use the ``llvm-lit`` +To run individual tests or subsets of tests, you can use the `llvm-lit` script which is built as part of LLVM. For example, to run the -``Integer/BitPacked.ll`` test by itself, you can run: - -.. code-block:: bash +`Integer/BitPacked.ll` test by itself, you can run: - % llvm-lit /llvm/test/Integer/BitPacked.ll +```bash +% llvm-lit /llvm/test/Integer/BitPacked.ll +``` -.. note:: - The test files are in the ``llvm-project`` directory, not the directory you - are building LLVM in. +```{note} +The test files are in the `llvm-project` directory, not the directory you +are building LLVM in. +``` Or you can run a whole folder of tests. To run all of the ARM CodeGen tests: -.. code-block:: bash - - % llvm-lit /llvm/test/CodeGen/ARM +```bash +% llvm-lit /llvm/test/CodeGen/ARM +``` The regression tests will use the Python psutil module only if installed in a **non-user** location. Under Linux, install with sudo or within a virtual environment. Under Windows, install Python for all users and then run -``pip install psutil`` in an elevated command prompt. +`pip install psutil` in an elevated command prompt. -For more information on using the :program:`lit` tool, see ``llvm-lit --help`` -or the :doc:`lit man page `. +For more information on using the {program}`lit` tool, see `llvm-lit --help` +or the {doc}`lit man page `. -Debugging Information tests ---------------------------- +### Debugging Information tests -To run debugging information tests simply add the ``cross-project-tests`` -project to your ``LLVM_ENABLE_PROJECTS`` define on the cmake +To run debugging information tests simply add the `cross-project-tests` +project to your `LLVM_ENABLE_PROJECTS` define on the cmake command-line. -Regression test structure -========================= +## Regression test structure -The LLVM regression tests are driven by :program:`lit` and are located in the -``llvm/test`` directory. +The LLVM regression tests are driven by {program}`lit` and are located in the +`llvm/test` directory. This directory contains a large array of small tests that exercise various features of LLVM and to ensure that regressions do not occur. The directory is broken into several subdirectories, each focused on a particular area of LLVM. -Writing new regression tests ----------------------------- +### Writing new regression tests The regression test structure is very simple but does require some -information to be set. This information is gathered via ``cmake`` -and is written to a file, ``test/lit.site.cfg.py`` in the build directory. -The ``llvm/test`` Makefile does this work for you. +information to be set. This information is gathered via `cmake` +and is written to a file, `test/lit.site.cfg.py` in the build directory. +The `llvm/test` Makefile does this work for you. In order for the regression tests to work, each directory of tests must -have a ``lit.local.cfg`` file. :program:`lit` looks for this file to determine +have a `lit.local.cfg` file. {program}`lit` looks for this file to determine how to run the tests. This file is just Python code and thus is very flexible, but we've standardized it for the LLVM regression tests. If -you're adding a directory of tests, just copy ``lit.local.cfg`` from -another directory to get running. The standard ``lit.local.cfg`` simply +you're adding a directory of tests, just copy `lit.local.cfg` from +another directory to get running. The standard `lit.local.cfg` simply specifies which files to look in for tests. Any directory that contains -only directories does not need the ``lit.local.cfg`` file. Read the :doc:`Lit -documentation ` for more information. +only directories does not need the `lit.local.cfg` file. Read the +{doc}`Lit documentation ` for more information. -Each test file must contain lines starting with "RUN:" that tell :program:`lit` -how to run it. If there are no ``RUN`` lines, :program:`lit` will issue an error +Each test file must contain lines starting with "RUN:" that tell {program}`lit` +how to run it. If there are no `RUN` lines, {program}`lit` will issue an error while running a test. -``RUN`` lines are specified in the comments of the test program using the -keyword ``RUN`` followed by a colon, and lastly the command (pipeline) -to execute. Together, these lines form the "script" that :program:`lit` -executes to run the test case. The syntax of the ``RUN`` lines is similar to a +`RUN` lines are specified in the comments of the test program using the +keyword `RUN` followed by a colon, and lastly the command (pipeline) +to execute. Together, these lines form the "script" that {program}`lit` +executes to run the test case. The syntax of the `RUN` lines is similar to a shell's syntax for pipelines including I/O redirection and variable substitution. However, even though these lines may *look* like a shell -script, they are not. ``RUN`` lines are interpreted by :program:`lit`. +script, they are not. `RUN` lines are interpreted by {program}`lit`. Consequently, the syntax differs from shell in a few ways. You can specify -as many ``RUN`` lines as needed. +as many `RUN` lines as needed. -:program:`lit` performs substitution on each ``RUN`` line to replace LLVM tool names +{program}`lit` performs substitution on each `RUN` line to replace LLVM tool names with the full paths to the executable built for each tool (in -``$(LLVM_OBJ_ROOT)/bin``). This ensures that :program:`lit` does +`$(LLVM_OBJ_ROOT)/bin`). This ensures that {program}`lit` does not invoke any stray LLVM tools in the user's path during testing. -Each ``RUN`` line is executed on its own, distinct from other lines unless -its last character is ``\``. This continuation character causes the ``RUN`` +Each `RUN` line is executed on its own, distinct from other lines unless +its last character is `\`. This continuation character causes the `RUN` line to be concatenated with the next one. In this way, you can build up long pipelines of commands without making huge line lengths. The lines -ending in ``\`` are concatenated until a ``RUN`` line that doesn't end in -``\`` is found. This concatenated set of ``RUN`` lines then constitutes one -execution. :program:`lit` will substitute variables and arrange for the pipeline +ending in `\` are concatenated until a `RUN` line that doesn't end in +`\` is found. This concatenated set of `RUN` lines then constitutes one +execution. {program}`lit` will substitute variables and arrange for the pipeline to be executed. If any process in the pipeline fails, the entire line (and test case) fails too. -Below is an example of legal ``RUN`` lines in a ``.ll`` file: - -.. code-block:: llvm +Below is an example of legal `RUN` lines in a `.ll` file: - ; RUN: llvm-as < %s | llvm-dis > %t1 - ; RUN: llvm-dis < %s.bc-13 > %t2 - ; RUN: diff %t1 %t2 +```llvm +; RUN: llvm-as < %s | llvm-dis > %t1 +; RUN: llvm-dis < %s.bc-13 > %t2 +; RUN: diff %t1 %t2 +``` -As with a Unix shell, the ``RUN`` lines permit pipelines and I/O +As with a Unix shell, the `RUN` lines permit pipelines and I/O redirection to be used. There are some quoting rules that you must pay attention to when writing -your ``RUN`` lines. In general, nothing needs to be quoted. :program:`lit` won't +your `RUN` lines. In general, nothing needs to be quoted. {program}`lit` won't strip off any quote characters, so they will get passed to the invoked program. -To avoid this use curly braces to tell :program:`lit` that it should treat +To avoid this use curly braces to tell {program}`lit` that it should treat everything enclosed as one value. -In general, you should strive to keep your ``RUN`` lines as simple as possible, +In general, you should strive to keep your `RUN` lines as simple as possible, using them only to run tools that generate textual output you can then examine. The recommended way to examine output to figure out if the test passes is using -the :doc:`FileCheck tool `. *[The usage of grep in ``RUN`` +the {doc}`FileCheck tool `. *[The usage of grep in `RUN` lines is deprecated - please do not send or commit patches that use it.]* Put related tests into a single file rather than having a separate file per test. Check if there are files already covering your feature and consider adding your code there instead of creating a new file. -Generating assertions in regression tests ------------------------------------------ +### Generating assertions in regression tests Some regression test cases are very large and complex to write/update by hand. In that case, to reduce the manual work, we can use the scripts available in -``llvm/utils/`` to generate the assertions. +`llvm/utils/` to generate the assertions. -For example, to generate assertions in an :program:`llc`-based test, after -adding one or more ``RUN`` lines, use: +For example, to generate assertions in an {program}`llc`-based test, after +adding one or more `RUN` lines, use: - .. code-block:: bash +```bash +% llvm/utils/update_llc_test_checks.py --llc-binary build/bin/llc test.ll +``` - % llvm/utils/update_llc_test_checks.py --llc-binary build/bin/llc test.ll - -This will generate FileCheck assertions, and insert a ``NOTE:`` line at the +This will generate FileCheck assertions, and insert a `NOTE:` line at the top to indicate that assertions were automatically generated. If you want to update assertions in an existing test case, pass the `-u` option -which first checks the ``NOTE:`` line exists and matches the script name. +which first checks the `NOTE:` line exists and matches the script name. Sometimes, a test absolutely depends on hand-written assertions and should not -have assertions automatically generated. In that case, add the text ``NOTE: Do -not autogenerate`` to the first line, and the scripts will skip that test. It +have assertions automatically generated. In that case, add the text `NOTE: Do +not autogenerate` to the first line, and the scripts will skip that test. It is a good idea to explain why generated assertions will not work for the test so future developers will understand what is going on. These are the most common scripts and their purposes/applications in generating assertions: -.. code-block:: none - - update_analyze_test_checks.py - opt -passes='print' +``` +update_analyze_test_checks.py +opt -passes='print' - update_cc_test_checks.py - C/C++, or clang/clang++ (IR checks) +update_cc_test_checks.py +C/C++, or clang/clang++ (IR checks) - update_llc_test_checks.py - llc (assembly checks) +update_llc_test_checks.py +llc (assembly checks) - update_mca_test_checks.py - llvm-mca +update_mca_test_checks.py +llvm-mca - update_mir_test_checks.py - llc (MIR checks) +update_mir_test_checks.py +llc (MIR checks) - update_test_checks.py - opt +update_test_checks.py +opt - update_llubi_test_checks.py - llubi +update_llubi_test_checks.py +llubi +``` -Precommit workflow for tests ----------------------------- +### Precommit workflow for tests If the test does not crash, assert, or infinite loop, commit the test with baseline check-lines first. That is, the test will show a miscompile or @@ -345,8 +331,7 @@ Remove TODO/FIXME comments added in the previous step if a problem is solved. Baseline tests (no-functional-change or NFC patch) may be pushed to main without pre-commit review if you have commit access. -Best practices for regression tests ------------------------------------ +### Best practices for regression tests - Use auto-generated check lines (produced by the scripts mentioned above) whenever feasible. @@ -354,19 +339,19 @@ Best practices for regression tests are relevant issues in the bug tracker, add references to those bug reports (for example, "See PR999 for more details"). - Avoid undefined behavior and poison/undef values unless necessary. For - example, do not use patterns like ``br i1 undef``, which are likely to break + example, do not use patterns like `br i1 undef`, which are likely to break as a result of future optimizations. - Minimize tests by removing unnecessary instructions, metadata, attributes, - etc. Tools like ``llvm-reduce`` can partially automate this, but usually + etc. Tools like `llvm-reduce` can partially automate this, but usually some manual cleanup is still required. - Outside PhaseOrdering tests, only run a minimal set of passes. For example, - prefer ``opt -S -passes=instcombine`` over ``opt -S -O3``. -- Avoid unnamed instructions/blocks (such as ``%0`` or ``1:``), because they may + prefer `opt -S -passes=instcombine` over `opt -S -O3`. +- Avoid unnamed instructions/blocks (such as `%0` or `1:`), because they may require renumbering on future test modifications. These can be removed by - running the test through ``opt -S -passes=instnamer``. + running the test through `opt -S -passes=instnamer`. - Try to give values (including variables, blocks and functions) meaningful names, and avoid retaining complex names generated by the optimization - pipeline (such as ``%foo.0.0.0.0.0.0``). + pipeline (such as `%foo.0.0.0.0.0.0`). - Omit target triples and datalayouts when possible. If the test case only exercises the code path of interest with a specific target triple and/or datalayout, then they should be in the test, but otherwise they @@ -376,203 +361,201 @@ Best practices for regression tests an existing file rather than creating a new one. If your test requires a specific target, prefer putting it in a target-specific test subdirectory rather than requiring a specific backend to be built with a - ``REQUIRES:`` line. -- Use a minimal number of non-default (``CHECK-``) check prefixes. - Avoid setting a check prefix if all ``FileCheck`` invocations in the test + `REQUIRES:` line. +- Use a minimal number of non-default (`CHECK-`) check prefixes. + Avoid setting a check prefix if all `FileCheck` invocations in the test expect the same output. -Extra files ------------ +### Extra files -If your test requires extra files besides the file containing the ``RUN:`` lines, +If your test requires extra files besides the file containing the `RUN:` lines, and the extra files are small, consider specifying them in the same file and -using ``split-file`` to extract them. For example, +using `split-file` to extract them. For example, -.. code-block:: llvm +```llvm +; RUN: split-file %s %t +; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s - ; RUN: split-file %s %t - ; RUN: llvm-link -S %t/a.ll %t/b.ll | FileCheck %s +; CHECK: ... - ; CHECK: ... +;--- a.ll +... +;--- b.ll +... +``` - ;--- a.ll - ... - ;--- b.ll - ... +The parts are separated by the regex `^(.|//)--- `. -The parts are separated by the regex ``^(.|//)--- ``. +If you want to test relative line numbers like `[[#@LINE+1]]`, specify +`--leading-lines` to add leading empty lines to preserve line numbers. -If you want to test relative line numbers like ``[[#@LINE+1]]``, specify -``--leading-lines`` to add leading empty lines to preserve line numbers. +If the extra files are large, the idiomatic place to put them is in a subdirectory `Inputs`. +You can then refer to the extra files as `%S/Inputs/foo.bar`. -If the extra files are large, the idiomatic place to put them is in a subdirectory ``Inputs``. -You can then refer to the extra files as ``%S/Inputs/foo.bar``. +For example, consider `test/Linker/ident.ll`. The directory structure is +as follows: -For example, consider ``test/Linker/ident.ll``. The directory structure is -as follows:: - - test/ - Linker/ - ident.ll - Inputs/ - ident.a.ll - ident.b.ll +``` +test/ + Linker/ + ident.ll + Inputs/ + ident.a.ll + ident.b.ll +``` For convenience, these are the contents: -.. code-block:: llvm - - ;;;;; ident.ll: +```llvm +;;;;; ident.ll: - ; RUN: llvm-link %S/Inputs/ident.a.ll %S/Inputs/ident.b.ll -S | FileCheck %s +; RUN: llvm-link %S/Inputs/ident.a.ll %S/Inputs/ident.b.ll -S | FileCheck %s - ; Verify that multiple input llvm.ident metadata are linked together. +; Verify that multiple input llvm.ident metadata are linked together. - ; CHECK-DAG: !llvm.ident = !{!0, !1, !2} - ; CHECK-DAG: "Compiler V1" - ; CHECK-DAG: "Compiler V2" - ; CHECK-DAG: "Compiler V3" +; CHECK-DAG: !llvm.ident = !{!0, !1, !2} +; CHECK-DAG: "Compiler V1" +; CHECK-DAG: "Compiler V2" +; CHECK-DAG: "Compiler V3" - ;;;;; Inputs/ident.a.ll: +;;;;; Inputs/ident.a.ll: - !llvm.ident = !{!0, !1} - !0 = metadata !{metadata !"Compiler V1"} - !1 = metadata !{metadata !"Compiler V2"} +!llvm.ident = !{!0, !1} +!0 = metadata !{metadata !"Compiler V1"} +!1 = metadata !{metadata !"Compiler V2"} - ;;;;; Inputs/ident.b.ll: +;;;;; Inputs/ident.b.ll: - !llvm.ident = !{!0} - !0 = metadata !{metadata !"Compiler V3"} +!llvm.ident = !{!0} +!0 = metadata !{metadata !"Compiler V3"} +``` -For symmetry, ``ident.ll`` is just a dummy file that doesn't -actually participate in the test besides holding the ``RUN:`` lines. +For symmetry, `ident.ll` is just a dummy file that doesn't +actually participate in the test besides holding the `RUN:` lines. -.. note:: +```{note} +Some existing tests use `RUN: true` in extra files instead of just +putting the extra files in an `Inputs/` directory. This pattern is +deprecated. +``` - Some existing tests use ``RUN: true`` in extra files instead of just - putting the extra files in an ``Inputs/`` directory. This pattern is - deprecated. - -Elaborated tests ----------------- +### Elaborated tests Generally, IR and assembly test files benefit from being cleaned to remove unnecessary details. However, for tests requiring elaborate IR or assembly files where cleanup is less practical (e.g., a large amount of debug information output from Clang), you can include generation instructions within -``split-file`` part called ``gen``. Then, run -``llvm/utils/update_test_body.py`` on the test file to generate the needed +`split-file` part called `gen`. Then, run +`llvm/utils/update_test_body.py` on the test file to generate the needed content. -.. code-block:: none - - ; RUN: rm -rf %t && split-file %s %t && cd %t - ; RUN: opt -S a.ll ... | FileCheck %s +``` +; RUN: rm -rf %t && split-file %s %t && cd %t +; RUN: opt -S a.ll ... | FileCheck %s - ; CHECK: hello +; CHECK: hello - ;--- a.cc - int va; - ;--- gen - clang --target=x86_64-linux -S -emit-llvm -g a.cc -o - +;--- a.cc +int va; +;--- gen +clang --target=x86_64-linux -S -emit-llvm -g a.cc -o - - ;--- a.ll - # content generated by the script 'gen' +;--- a.ll +# content generated by the script 'gen' +``` -.. code-block:: bash +```bash +PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll +``` - PATH=/path/to/clang_build/bin:$PATH llvm/utils/update_test_body.py path/to/test.ll - -The script will prepare extra files with ``split-file``, invoke ``gen``, and -then rewrite the part after ``gen`` with its stdout. +The script will prepare extra files with `split-file`, invoke `gen`, and +then rewrite the part after `gen` with its stdout. For convenience, if the test needs a single assembly file, you can also wrap -``gen`` and its required files with ``.ifdef`` and ``.endif``. Then you can -skip ``split-file`` in ``RUN`` lines. - -.. code-block:: none - - # RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o - # RUN: ... | FileCheck %s - - # CHECK: hello - - .ifdef GEN - #--- a.cc - int va; - #--- gen - clang --target=x86_64-linux -S -g a.cc -o - - .endif - # content generated by the script 'gen' - -.. note:: - - Consider specifying an explicit target triple to avoid differences when - regeneration is needed on another machine. - - ``gen`` is invoked with ``PWD`` set to ``/proc/self/cwd``. Clang commands - don't need ``-fdebug-compilation-dir=`` since its default value is ``PWD``. - - Check prefixes should be placed before ``.endif`` since the part after - ``.endif`` is replaced. - -If the test body contains multiple files, you can print ``---`` separators and -utilize ``split-file`` in ``RUN`` lines. - -.. code-block:: none - - # RUN: rm -rf %t && split-file %s %t && cd %t - ... - - #--- a.cc - int va; - #--- b.cc - int vb; - #--- gen - clang --target=x86_64-linux -S -O1 -g a.cc -o - - echo '#--- b.s' - clang --target=x86_64-linux -S -O1 -g b.cc -o - - #--- a.s - -Fragile tests -------------- +`gen` and its required files with `.ifdef` and `.endif`. Then you can +skip `split-file` in `RUN` lines. + +``` +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o +# RUN: ... | FileCheck %s + +# CHECK: hello + +.ifdef GEN +#--- a.cc +int va; +#--- gen +clang --target=x86_64-linux -S -g a.cc -o - +.endif +# content generated by the script 'gen' +``` + +```{note} +Consider specifying an explicit target triple to avoid differences when +regeneration is needed on another machine. + +`gen` is invoked with `PWD` set to `/proc/self/cwd`. Clang commands +don't need `-fdebug-compilation-dir=` since its default value is `PWD`. + +Check prefixes should be placed before `.endif` since the part after +`.endif` is replaced. +``` + +If the test body contains multiple files, you can print `---` separators and +utilize `split-file` in `RUN` lines. + +``` +# RUN: rm -rf %t && split-file %s %t && cd %t +... + +#--- a.cc +int va; +#--- b.cc +int vb; +#--- gen +clang --target=x86_64-linux -S -O1 -g a.cc -o - +echo '#--- b.s' +clang --target=x86_64-linux -S -O1 -g b.cc -o - +#--- a.s +``` + +### Fragile tests It is easy to write a fragile test that could fail spuriously if the tool being -tested outputs a full path to the input file. For example, :program:`opt` by -default outputs a ``ModuleID``: +tested outputs a full path to the input file. For example, {program}`opt` by +default outputs a `ModuleID`: -.. code-block:: console +```console +$ cat example.ll +define i32 @main() nounwind { + ret i32 0 +} - $ cat example.ll - define i32 @main() nounwind { - ret i32 0 - } +$ opt -S /path/to/example.ll +; ModuleID = '/path/to/example.ll' - $ opt -S /path/to/example.ll - ; ModuleID = '/path/to/example.ll' +define i32 @main() nounwind { + ret i32 0 +} +``` - define i32 @main() nounwind { - ret i32 0 - } +`ModuleID` can unexpectedly match against `CHECK` lines. For example: -``ModuleID`` can unexpectedly match against ``CHECK`` lines. For example: +```llvm +; RUN: opt -S %s | FileCheck -.. code-block:: llvm +define i32 @main() nounwind { + ; CHECK-NOT: load + ret i32 0 +} +``` - ; RUN: opt -S %s | FileCheck +This test will fail if placed into a `download` directory. - define i32 @main() nounwind { - ; CHECK-NOT: load - ret i32 0 - } +To make your tests robust, always use `opt ... < %s` in the `RUN` line. +{program}`opt` does not output a `ModuleID` when input comes from stdin. -This test will fail if placed into a ``download`` directory. - -To make your tests robust, always use ``opt ... < %s`` in the ``RUN`` line. -:program:`opt` does not output a ``ModuleID`` when input comes from stdin. - -Platform-Specific Tests ------------------------ +### Platform-Specific Tests Whenever adding tests that require the knowledge of a specific platform, either related to code generated, specific output or back-end features, @@ -584,45 +567,45 @@ The first problem is to check for target-specific output, for example sizes of structures, paths and architecture names, for example: * Tests containing Windows paths will fail on Linux and vice versa. -* Tests that check for ``x86_64`` somewhere in the text will fail anywhere else. +* Tests that check for `x86_64` somewhere in the text will fail anywhere else. * Tests where the debug information calculates the size of types and structures. Also, if the test relies on any behaviour that is coded in any back-end, it must go in its own directory. So, for instance, code generator tests for ARM go -into ``test/CodeGen/ARM`` and so on. Those directories contain a special -``lit`` configuration file that ensures all tests in that directory will +into `test/CodeGen/ARM` and so on. Those directories contain a special +`lit` configuration file that ensures all tests in that directory will only run if a specific back-end is compiled and available. -For instance, on ``test/CodeGen/ARM``, the ``lit.local.cfg`` is: - -.. code-block:: python +For instance, on `test/CodeGen/ARM`, the `lit.local.cfg` is: - config.suffixes = ['.ll', '.c', '.cpp', '.test'] - if not 'ARM' in config.root.targets: - config.unsupported = True +```python +config.suffixes = ['.ll', '.c', '.cpp', '.test'] +if not 'ARM' in config.root.targets: + config.unsupported = True +``` Other platform-specific tests are those that depend on a specific feature -of a specific sub-architecture, for example only to Intel chips that support ``AVX2``. +of a specific sub-architecture, for example only to Intel chips that support `AVX2`. -For instance, ``test/CodeGen/X86/psubus.ll`` tests three sub-architecture +For instance, `test/CodeGen/X86/psubus.ll` tests three sub-architecture variants: -.. code-block:: llvm - - ; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2 - ; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 - ; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 +```llvm +; RUN: llc -mcpu=core2 < %s | FileCheck %s -check-prefix=SSE2 +; RUN: llc -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1 +; RUN: llc -mcpu=core-avx2 < %s | FileCheck %s -check-prefix=AVX2 +``` And the checks are different: -.. code-block:: llvm - - ; SSE2: @test1 - ; SSE2: psubusw LCPI0_0(%rip), %xmm0 - ; AVX1: @test1 - ; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 - ; AVX2: @test1 - ; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 +```llvm +; SSE2: @test1 +; SSE2: psubusw LCPI0_0(%rip), %xmm0 +; AVX1: @test1 +; AVX1: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 +; AVX2: @test1 +; AVX2: vpsubusw LCPI0_0(%rip), %xmm0, %xmm0 +``` So, if you're testing for a behaviour that you know is platform-specific or depends on special features of sub-architectures, you must add the specific @@ -630,479 +613,495 @@ triple, test with the specific FileCheck and put it into the specific directory that will filter out all other architectures. -Constraining test execution ---------------------------- +### Constraining test execution Some tests can be run only in specific configurations, such as -with debug builds or on particular platforms. Use ``REQUIRES`` -and ``UNSUPPORTED`` to control when the test is enabled. +with debug builds or on particular platforms. Use `REQUIRES` +and `UNSUPPORTED` to control when the test is enabled. Some tests are expected to fail. For example, there may be a known bug -that the test detects. Use ``XFAIL`` to mark a test as an expected failure. -An ``XFAIL`` test will be successful if its execution fails, and +that the test detects. Use `XFAIL` to mark a test as an expected failure. +An `XFAIL` test will be successful if its execution fails, and will be a failure if its execution succeeds. -.. code-block:: llvm +```llvm +; This test will be only enabled in the build with asserts. +; REQUIRES: asserts +; This test is disabled when running on Linux. +; UNSUPPORTED: system-linux +; This test is expected to fail when targeting PowerPC. +; XFAIL: target=powerpc{{.*}} +``` - ; This test will be only enabled in the build with asserts. - ; REQUIRES: asserts - ; This test is disabled when running on Linux. - ; UNSUPPORTED: system-linux - ; This test is expected to fail when targeting PowerPC. - ; XFAIL: target=powerpc{{.*}} - -``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated +`REQUIRES` and `UNSUPPORTED` and `XFAIL` all accept a comma-separated list of boolean expressions. The values in each expression may be: -- Features added to ``config.available_features`` by configuration files such as ``lit.cfg``. +- Features added to `config.available_features` by configuration files such as `lit.cfg`. String comparison of features is case-sensitive. Furthermore, a boolean expression can - contain any Python regular expression enclosed in ``{{ }}``, in which case the boolean + contain any Python regular expression enclosed in `{{ }}`, in which case the boolean expression is satisfied if any feature matches the regular expression. Regular - expressions can appear inside an identifier, so for example ``he{{l+}}o`` would match - ``helo``, ``hello``, ``helllo``, and so on. -- The default target triple, preceded by the string ``target=`` (for example, - ``target=x86_64-pc-windows-msvc``). Typically, regular expressions are used - to match parts of the triple (for example, ``target={{.*}}-windows{{.*}}`` + expressions can appear inside an identifier, so for example `he{{l+}}o` would match + `helo`, `hello`, `helllo`, and so on. +- The default target triple, preceded by the string `target=` (for example, + `target=x86_64-pc-windows-msvc`). Typically, regular expressions are used + to match parts of the triple (for example, `target={{.*}}-windows{{.*}}` to match any Windows target triple). -| ``REQUIRES`` enables the test if all expressions are true. -| ``UNSUPPORTED`` disables the test if any expression is true. -| ``XFAIL`` expects the test to fail if any expression is true. - -Use, ``XFAIL: *`` if the test is expected to fail everywhere. Similarly, use -``UNSUPPORTED: target={{.*}}`` to disable the test everywhere. +- `REQUIRES` enables the test if all expressions are true. +- `UNSUPPORTED` disables the test if any expression is true. +- `XFAIL` expects the test to fail if any expression is true. -.. code-block:: llvm +Use, `XFAIL: *` if the test is expected to fail everywhere. Similarly, use +`UNSUPPORTED: target={{.*}}` to disable the test everywhere. - ; This test is disabled when running on Windows, - ; and is disabled when targeting Linux, except for Android Linux. - ; UNSUPPORTED: system-windows, target={{.*linux.*}} && !target={{.*android.*}} - ; This test is expected to fail when targeting PowerPC or running on Darwin. - ; XFAIL: target=powerpc{{.*}}, system-darwin +```llvm +; This test is disabled when running on Windows, +; and is disabled when targeting Linux, except for Android Linux. +; UNSUPPORTED: system-windows, target={{.*linux.*}} && !target={{.*android.*}} +; This test is expected to fail when targeting PowerPC or running on Darwin. +; XFAIL: target=powerpc{{.*}}, system-darwin +``` +### Tips for writing constraints -Tips for writing constraints ----------------------------- +**`REQUIRES` and `UNSUPPORTED`** -**``REQUIRES`` and ``UNSUPPORTED``** - -These are logical inverses. In principle, ``UNSUPPORTED`` isn't absolutely -necessary (the logical negation could be used with ``REQUIRES`` to get +These are logical inverses. In principle, `UNSUPPORTED` isn't absolutely +necessary (the logical negation could be used with `REQUIRES` to get exactly the same effect), but it can make these clauses easier to read and -understand. Generally, people use ``REQUIRES`` to state things that the test -depends on to operate correctly, and ``UNSUPPORTED`` to exclude cases where +understand. Generally, people use `REQUIRES` to state things that the test +depends on to operate correctly, and `UNSUPPORTED` to exclude cases where the test is expected never to work. -**``UNSUPPORTED`` and ``XFAIL``** +**`UNSUPPORTED` and `XFAIL`** Both of these indicate that the test isn't expected to work; however, they -have different effects. ``UNSUPPORTED`` causes the test to be skipped; +have different effects. `UNSUPPORTED` causes the test to be skipped; this saves execution time, but then you'll never know whether the test -actually would start working. Conversely, ``XFAIL`` actually runs the test +actually would start working. Conversely, `XFAIL` actually runs the test but expects a failure output, taking extra execution time but alerting you -if/when the test begins to behave correctly (an ``XPASS`` test result). You +if/when the test begins to behave correctly (an `XPASS` test result). You need to decide which is more appropriate in each case. -**Using ``target=...``** +**Using `target=...`** Checking the target triple can be tricky; it's easy to mis-specify. For -example, ``target=mips{{.*}}`` will match not only mips, but also mipsel, -mips64, and mips64el. ``target={{.*}}-linux-gnu`` will match +example, `target=mips{{.*}}` will match not only mips, but also mipsel, +mips64, and mips64el. `target={{.*}}-linux-gnu` will match x86_64-unknown-linux-gnu, but not armv8l-unknown-linux-gnueabihf. -Prefer to use hyphens to delimit triple components (``target=mips-{{.*}}``) +Prefer to use hyphens to delimit triple components (`target=mips-{{.*}}`) and it's generally a good idea to use a trailing wildcard to allow for unexpected suffixes. Also, it's generally better to write regular expressions that use entire triple components than to do something clever to shorten them. For example, to match both freebsd and netbsd in an expression, you could write -``target={{.*(free|net)bsd.*}}`` and that would work. However, it would -prevent a ``grep freebsd`` from finding this test. Better to use: -``target={{.+-freebsd.*}} || target={{.+-netbsd.*}}`` +`target={{.*(free|net)bsd.*}}` and that would work. However, it would +prevent a `grep freebsd` from finding this test. Better to use: +`target={{.+-freebsd.*}} || target={{.+-netbsd.*}}` -Substitutions -------------- +### Substitutions Besides replacing LLVM tool names, the following substitutions are performed in -``RUN`` lines: +`RUN` lines: + +`%%` + +: Replaced by a single `%`. This allows escaping other substitutions. + +`%s` -``%%`` - Replaced by a single ``%``. This allows escaping other substitutions. +: File path to the test case's source. This is suitable for passing on the + command line as the input to an LLVM tool. -``%s`` - File path to the test case's source. This is suitable for passing on the - command line as the input to an LLVM tool. + Example: `/home/user/llvm/test/MC/ELF/foo_test.s` - Example: ``/home/user/llvm/test/MC/ELF/foo_test.s`` +`%S` -``%S`` - Directory path to the test case's source. +: Directory path to the test case's source. - Example: ``/home/user/llvm/test/MC/ELF`` + Example: `/home/user/llvm/test/MC/ELF` -``%t`` - File path to a temporary file name that can be used for this test case. - The file name won't conflict with other test cases. You can append to it - if you need multiple temporaries. This is useful as the destination of - some redirected output. +`%t` - Example: ``/home/user/llvm.build/test/MC/ELF/Output/foo_test.s.tmp`` +: File path to a temporary file name that can be used for this test case. + The file name won't conflict with other test cases. You can append to it + if you need multiple temporaries. This is useful as the destination of + some redirected output. -``%T`` - Directory of ``%t``. Deprecated. Shouldn't be used, because it can be easily - misused and cause race conditions between tests. + Example: `/home/user/llvm.build/test/MC/ELF/Output/foo_test.s.tmp` - Use ``rm -rf %t && mkdir %t`` instead if a temporary directory is necessary. +`%T` - Example: ``/home/user/llvm.build/test/MC/ELF/Output`` +: Directory of `%t`. Deprecated. Shouldn't be used, because it can be easily + misused and cause race conditions between tests. -``%{pathsep}`` + Use `rm -rf %t && mkdir %t` instead if a temporary directory is necessary. - Expands to the path separator, i.e. ``:`` (or ``;`` on Windows). + Example: `/home/user/llvm.build/test/MC/ELF/Output` -``%{fs-src-root}`` - Expands to the root component of file system paths for the source directory, - i.e. ``/`` on Unix systems or ``C:\`` (or another drive) on Windows. +`%{pathsep}` -``%{fs-tmp-root}`` - Expands to the root component of file system paths for the test's temporary - directory, i.e. ``/`` on Unix systems or ``C:\`` (or another drive) on - Windows. +: Expands to the path separator, i.e. `:` (or `;` on Windows). -``%{fs-sep}`` - Expands to the file system separator, i.e. ``/`` or ``\`` on Windows. +`%{fs-src-root}` -``%/s, %/S, %/t, %/T`` +: Expands to the root component of file system paths for the source directory, + i.e. `/` on Unix systems or `C:\` (or another drive) on Windows. - Act like the corresponding substitution above but replace any ``\`` - character with a ``/``. This is useful to normalize path separators. +`%{fs-tmp-root}` - Example: ``%s: C:\Desktop Files/foo_test.s.tmp`` +: Expands to the root component of file system paths for the test's temporary + directory, i.e. `/` on Unix systems or `C:\` (or another drive) on + Windows. - Example: ``%/s: C:/Desktop Files/foo_test.s.tmp`` +`%{fs-sep}` -``%{s:real}, %{S:real}, %{t:real}, %{T:real}`` -``%{/s:real}, %{/S:real}, %{/t:real}, %{/T:real}`` +: Expands to the file system separator, i.e. `/` or `\` on Windows. - Act like the corresponding substitution, including with ``/``, but use - the real path by expanding all symbolic links and substitute drives. +`%/s, %/S, %/t, %/T` - Example: ``%s: S:\foo_test.s.tmp`` +: Act like the corresponding substitution above but replace any `\` + character with a `/`. This is useful to normalize path separators. - Example: ``%{/s:real}: C:/SDrive/foo_test.s.tmp`` + Example: `%s: C:\Desktop Files/foo_test.s.tmp` -``%:s, %:S, %:t, %:T`` + Example: `%/s: C:/Desktop Files/foo_test.s.tmp` - Act like the corresponding substitution above but remove colons at - the beginning of Windows paths. This is useful to allow concatenation - of absolute paths on Windows to produce a legal path. +`%{s:real}, %{S:real}, %{t:real}, %{T:real}` \ +`%{/s:real}, %{/S:real}, %{/t:real}, %{/T:real}` - Example: ``%s: C:\Desktop Files\foo_test.s.tmp`` +: Act like the corresponding substitution, including with `/`, but use + the real path by expanding all symbolic links and substitute drives. - Example: ``%:s: C\Desktop Files\foo_test.s.tmp`` + Example: `%s: S:\foo_test.s.tmp` -``%errc_`` + Example: `%{/s:real}: C:/SDrive/foo_test.s.tmp` - Some error messages may be substituted to allow different spellings - based on the host platform. +`%:s, %:S, %:t, %:T` - The following error codes are currently supported: - ENOENT, EISDIR, EINVAL, EACCES. +: Act like the corresponding substitution above but remove colons at + the beginning of Windows paths. This is useful to allow concatenation + of absolute paths on Windows to produce a legal path. - Example: ``Linux %errc_ENOENT: No such file or directory`` + Example: `%s: C:\Desktop Files\foo_test.s.tmp` - Example: ``Windows %errc_ENOENT: no such file or directory`` + Example: `%:s: C\Desktop Files\foo_test.s.tmp` -``%if feature %{%} %else %{%}`` +`%errc_` - Conditional substitution: if ``feature`` is available it expands to - ````, otherwise it expands to ````. - ``%else %{%}`` is optional and treated like ``%else %{%}`` - if not present. +: Some error messages may be substituted to allow different spellings + based on the host platform. -``%(line)``, ``%(line+)``, ``%(line-)`` + The following error codes are currently supported: + ENOENT, EISDIR, EINVAL, EACCES. - The number of the line where this substitution is used, with an - optional integer offset. These expand only if they appear - immediately in ``RUN:``, ``DEFINE:``, and ``REDEFINE:`` directives. - Occurrences in substitutions defined elsewhere are never expanded. - For example, this can be used in tests with multiple ``RUN`` lines, - which reference the test file's line numbers. + Example: `Linux %errc_ENOENT: No such file or directory` + + Example: `Windows %errc_ENOENT: no such file or directory` + +`%if feature %{%} %else %{%}` + +: Conditional substitution: if `feature` is available it expands to + ``, otherwise it expands to ``. + `%else %{%}` is optional and treated like `%else %{%}` + if not present. + +`%(line)`, `%(line+)`, `%(line-)` + +: The number of the line where this substitution is used, with an + optional integer offset. These expand only if they appear + immediately in `RUN:`, `DEFINE:`, and `REDEFINE:` directives. + Occurrences in substitutions defined elsewhere are never expanded. + For example, this can be used in tests with multiple `RUN` lines, + which reference the test file's line numbers. **LLVM-specific substitutions:** -``%shlibext`` - The suffix for the host platforms shared library files. This includes the - period as the first character. +`%shlibext` + +: The suffix for the host platforms shared library files. This includes the + period as the first character. - Example: ``.so`` (Linux), ``.dylib`` (macOS), ``.dll`` (Windows) + Example: `.so` (Linux), `.dylib` (macOS), `.dll` (Windows) -``%exeext`` - The suffix for the host platforms executable files. This includes the - period as the first character. +`%exeext` - Example: ``.exe`` (Windows), empty on Linux. +: The suffix for the host platforms executable files. This includes the + period as the first character. + + Example: `.exe` (Windows), empty on Linux. **Clang-specific substitutions:** -``%clang`` - Invokes the Clang driver. +`%clang` + +: Invokes the Clang driver. -``%clang_cpp`` - Invokes the Clang driver as the preprocessor. +`%clang_cpp` -``%clang_cl`` - Invokes the CL-compatible Clang driver. +: Invokes the Clang driver as the preprocessor. -``%clangxx`` - Invokes the G++-compatible Clang driver. +`%clang_cl` -``%clang_cc1`` - Invokes the Clang frontend. +: Invokes the CL-compatible Clang driver. -``%itanium_abi_triple``, ``%ms_abi_triple`` - These substitutions can be used to get the current target triple adjusted to - the desired ABI. For example, if the test suite is running with the - ``i686-pc-win32`` target, ``%itanium_abi_triple`` will expand to - ``i686-pc-mingw32``. This allows a test to run with a specific ABI without - constraining it to a specific triple. +`%clangxx` + +: Invokes the G++-compatible Clang driver. + +`%clang_cc1` + +: Invokes the Clang frontend. + +`%itanium_abi_triple`, `%ms_abi_triple` + +: These substitutions can be used to get the current target triple adjusted to + the desired ABI. For example, if the test suite is running with the + `i686-pc-win32` target, `%itanium_abi_triple` will expand to + `i686-pc-mingw32`. This allows a test to run with a specific ABI without + constraining it to a specific triple. **FileCheck-specific substitutions:** -``%ProtectFileCheckOutput`` - This should precede a ``FileCheck`` call if and only if the call's textual - output affects test results. It's usually easy to tell: just look for - redirection or piping of the ``FileCheck`` call's stdout or stderr. +`%ProtectFileCheckOutput` + +: This should precede a `FileCheck` call if and only if the call's textual + output affects test results. It's usually easy to tell: just look for + redirection or piping of the `FileCheck` call's stdout or stderr. -.. _Test-specific substitutions: +(Test-specific substitutions)= **Test-specific substitutions:** Additional substitutions can be defined as follows: -- Lit configuration files (e.g., ``lit.cfg`` or ``lit.local.cfg``) can define +- Lit configuration files (e.g., `lit.cfg` or `lit.local.cfg`) can define substitutions for all tests in a test directory. They do so by extending the - substitution list, ``config.substitutions``. Each item in the list is a tuple + substitution list, `config.substitutions`. Each item in the list is a tuple consisting of a pattern and its replacement, which lit applies as plain text - (even if it contains sequences that Python's ``re.sub`` considers to be + (even if it contains sequences that Python's `re.sub` considers to be escape sequences). - To define substitutions within a single test file, lit supports the - ``DEFINE:`` and ``REDEFINE:`` directives, described in detail below. So that + `DEFINE:` and `REDEFINE:` directives, described in detail below. So that they have no effect on other test files, these directives modify a copy of the substitution list that is produced by lit configuration files. For example, the following directives can be inserted into a test file to define -``%{cflags}`` and ``%{fcflags}`` substitutions with empty initial values, which -serve as the parameters of another newly defined ``%{check}`` substitution: +`%{cflags}` and `%{fcflags}` substitutions with empty initial values, which +serve as the parameters of another newly defined `%{check}` substitution: -.. code-block:: llvm +```llvm +; DEFINE: %{cflags} = +; DEFINE: %{fcflags} = - ; DEFINE: %{cflags} = - ; DEFINE: %{fcflags} = - - ; DEFINE: %{check} = \ - ; DEFINE: %clang_cc1 -verify -fopenmp -fopenmp-version=51 %{cflags} \ - ; DEFINE: -emit-llvm -o - %s | \ - ; DEFINE: FileCheck %{fcflags} %s +; DEFINE: %{check} = \ +; DEFINE: %clang_cc1 -verify -fopenmp -fopenmp-version=51 %{cflags} \ +; DEFINE: -emit-llvm -o - %s | \ +; DEFINE: FileCheck %{fcflags} %s +``` Alternatively, the above substitutions can be defined in a lit configuration file to be shared with other test files. Either way, the test file can then specify directives like the following to redefine the parameter substitutions as -desired before each use of ``%{check}`` in a ``RUN:`` line: - -.. code-block:: llvm +desired before each use of `%{check}` in a `RUN:` line: - ; REDEFINE: %{cflags} = -triple x86_64-apple-darwin10.6.0 -fopenmp-simd - ; REDEFINE: %{fcflags} = -check-prefix=SIMD - ; RUN: %{check} +```llvm +; REDEFINE: %{cflags} = -triple x86_64-apple-darwin10.6.0 -fopenmp-simd +; REDEFINE: %{fcflags} = -check-prefix=SIMD +; RUN: %{check} - ; REDEFINE: %{cflags} = -triple x86_64-unknown-linux-gnu -fopenmp-simd - ; REDEFINE: %{fcflags} = -check-prefix=SIMD - ; RUN: %{check} +; REDEFINE: %{cflags} = -triple x86_64-unknown-linux-gnu -fopenmp-simd +; REDEFINE: %{fcflags} = -check-prefix=SIMD +; RUN: %{check} - ; REDEFINE: %{cflags} = -triple x86_64-apple-darwin10.6.0 - ; REDEFINE: %{fcflags} = -check-prefix=NO-SIMD - ; RUN: %{check} +; REDEFINE: %{cflags} = -triple x86_64-apple-darwin10.6.0 +; REDEFINE: %{fcflags} = -check-prefix=NO-SIMD +; RUN: %{check} - ; REDEFINE: %{cflags} = -triple x86_64-unknown-linux-gnu - ; REDEFINE: %{fcflags} = -check-prefix=NO-SIMD - ; RUN: %{check} +; REDEFINE: %{cflags} = -triple x86_64-unknown-linux-gnu +; REDEFINE: %{fcflags} = -check-prefix=NO-SIMD +; RUN: %{check} +``` -Besides providing initial values, the initial ``DEFINE:`` directives for the +Besides providing initial values, the initial `DEFINE:` directives for the parameter substitutions in the above example serve a second purpose: they -establish the substitution order so that both ``%{check}`` and its parameters +establish the substitution order so that both `%{check}` and its parameters expand as desired. There's a simple way to remember the required definition order in a test file: define a substitution before any substitution that might refer to it. In general, substitution expansion behaves as follows: -- Upon arriving at each ``RUN:`` line, lit expands all substitutions in that - ``RUN:`` line using their current values from the substitution list. No - substitution expansion is performed immediately at ``DEFINE:`` and - ``REDEFINE:`` directives except ``%(line)``, ``%(line+)``, and - ``%(line-)``. -- When expanding substitutions in a ``RUN:`` line, lit makes only one pass +- Upon arriving at each `RUN:` line, lit expands all substitutions in that + `RUN:` line using their current values from the substitution list. No + substitution expansion is performed immediately at `DEFINE:` and + `REDEFINE:` directives except `%(line)`, `%(line+)`, and + `%(line-)`. +- When expanding substitutions in a `RUN:` line, lit makes only one pass through the substitution list by default. In this case, a substitution must have been inserted earlier in the substitution list than any substitution appearing in its value in order for the latter to expand. (For greater flexibility, you can enable multiple passes through the substitution list by - setting `recursiveExpansionLimit`_ in a lit configuration file.) + setting {ref}`recursiveExpansionLimit ` in a lit + configuration file.) - While lit configuration files can insert anywhere in the substitution list, - the insertion behavior of the ``DEFINE:`` and ``REDEFINE:`` directives is + the insertion behavior of the `DEFINE:` and `REDEFINE:` directives is specified below and is designed specifically for the use case presented in the example above. - Defining a substitution in terms of itself, whether directly or via other substitutions, should be avoided. It usually produces an infinitely recursive definition that cannot be fully expanded. It does *not* define the - substitution in terms of its previous value, even when using ``REDEFINE:``. + substitution in terms of its previous value, even when using `REDEFINE:`. -The relationship between the ``DEFINE:`` and ``REDEFINE:`` directive is +The relationship between the `DEFINE:` and `REDEFINE:` directive is analogous to the relationship between a variable declaration and variable assignment in many programming languages: -- ``DEFINE: %{name} = value`` +- `DEFINE: %{name} = value` This directive assigns the specified value to a new substitution whose - pattern is ``%{name}``, or it reports an error if there is already a - substitution whose pattern contains ``%{name}`` because that could produce + pattern is `%{name}`, or it reports an error if there is already a + substitution whose pattern contains `%{name}` because that could produce confusing expansions (e.g., a lit configuration file might define a - substitution with the pattern ``%{name}\[0\]``). The new substitution is + substitution with the pattern `%{name}\[0\]`). The new substitution is inserted at the start of the substitution list so that it will expand first. Thus, its value can contain any substitution previously defined, whether in the same test file or in a lit configuration file, and both will expand. -- ``REDEFINE: %{name} = value`` +- `REDEFINE: %{name} = value` This directive assigns the specified value to an existing substitution whose - pattern is ``%{name}``, or it reports an error if there are no substitutions + pattern is `%{name}`, or it reports an error if there are no substitutions with that pattern or if there are multiple substitutions whose patterns - contain ``%{name}``. The substitution's current position in the substitution + contain `%{name}`. The substitution's current position in the substitution list does not change so that expansion order relative to other existing substitutions is preserved. -The following properties apply to both the ``DEFINE:`` and ``REDEFINE:`` +The following properties apply to both the `DEFINE:` and `REDEFINE:` directives: - **Substitution name**: In the directive, whitespace immediately before or - after ``%{name}`` is optional and discarded. ``%{name}`` must start with - ``%{``, it must end with ``}``, and the rest must start with a letter or + after `%{name}` is optional and discarded. `%{name}` must start with + `%{`, it must end with `}`, and the rest must start with a letter or underscore and contain only alphanumeric characters, hyphens, underscores, and colons. This syntax has a few advantages: - - It is impossible for ``%{name}`` to contain sequences that are special in - Python's ``re.sub`` patterns. Otherwise, attempting to specify - ``%{name}`` as a substitution pattern in a lit configuration file could + - It is impossible for `%{name}` to contain sequences that are special in + Python's `re.sub` patterns. Otherwise, attempting to specify + `%{name}` as a substitution pattern in a lit configuration file could produce confusing expansions. - The braces help avoid the possibility that another substitution's pattern - will match part of ``%{name}`` or vice-versa, producing confusing + will match part of `%{name}` or vice-versa, producing confusing expansions. However, the patterns of substitutions defined by lit configuration files and by lit itself are not restricted to this form, so overlaps are still theoretically possible. - **Substitution value**: The value includes all text from the first - non-whitespace character after ``=`` to the last non-whitespace character. If - there is no non-whitespace character after ``=``, the value is the empty - string. Escape sequences that can appear in Python ``re.sub`` replacement + non-whitespace character after `=` to the last non-whitespace character. If + there is no non-whitespace character after `=`, the value is the empty + string. Escape sequences that can appear in Python `re.sub` replacement strings are treated as plain text in the value. - **Line continuations**: If the last non-whitespace character on the line after - ``:`` is ``\``, then the next directive must use the same directive keyword - (e.g., ``DEFINE:``) , and it is an error if there is no additional directive. + `:` is `\`, then the next directive must use the same directive keyword + (e.g., `DEFINE:`) , and it is an error if there is no additional directive. That directive serves as a continuation. That is, before following the rules - above to parse the text after ``:`` in either directive, lit joins that text - together to form a single directive, replaces the ``\`` with a single space, + above to parse the text after `:` in either directive, lit joins that text + together to form a single directive, replaces the `\` with a single space, and removes any other whitespace that is now adjacent to that space. A continuation can be continued in the same manner. A continuation containing - only whitespace after its ``:`` is an error. + only whitespace after its `:` is an error. -.. _recursiveExpansionLimit: +(recursiveExpansionLimit)= **recursiveExpansionLimit:** -As described in the previous section, when expanding substitutions in a ``RUN:`` +As described in the previous section, when expanding substitutions in a `RUN:` line, lit makes only one pass through the substitution list by default. Thus, if substitutions are not defined in the proper order, some will remain in the -``RUN:`` line unexpanded. For example, the following directives refer to -``%{inner}`` within ``%{outer}`` but do not define ``%{inner}`` until after -``%{outer}``: +`RUN:` line unexpanded. For example, the following directives refer to +`%{inner}` within `%{outer}` but do not define `%{inner}` until after +`%{outer}`: -.. code-block:: llvm +```llvm +; By default, this definition order does not enable full expansion. - ; By default, this definition order does not enable full expansion. +; DEFINE: %{outer} = %{inner} +; DEFINE: %{inner} = expanded - ; DEFINE: %{outer} = %{inner} - ; DEFINE: %{inner} = expanded +; RUN: echo '%{outer}' +``` - ; RUN: echo '%{outer}' +`DEFINE:` inserts substitutions at the start of the substitution list, so +`%{inner}` expands first but has no effect because the original `RUN:` line +does not contain `%{inner}`. Next, `%{outer}` expands, and the output of +the `echo` command becomes: -``DEFINE:`` inserts substitutions at the start of the substitution list, so -``%{inner}`` expands first but has no effect because the original ``RUN:`` line -does not contain ``%{inner}``. Next, ``%{outer}`` expands, and the output of -the ``echo`` command becomes: - -.. code-block:: shell - - %{inner} +```shell +%{inner} +``` Of course, one way to fix this simple case is to reverse the definitions of -``%{outer}`` and ``%{inner}``. However, if a test has a complex set of +`%{outer}` and `%{inner}`. However, if a test has a complex set of substitutions that can all reference each other, there might not exist a sufficient substitution order. To address such use cases, lit configuration files support -``config.recursiveExpansionLimit``, which can be set to a non-negative integer +`config.recursiveExpansionLimit`, which can be set to a non-negative integer to specify the maximum number of passes through the substitution list. Thus, in the above example, setting the limit to 2 would cause lit to make a second pass -that expands ``%{inner}`` in the ``RUN:`` line, and the output from the ``echo`` +that expands `%{inner}` in the `RUN:` line, and the output from the `echo` command would then be: -.. code-block:: shell +```shell +expanded +``` - expanded - -To improve performance, lit will stop making passes when it notices the ``RUN:`` +To improve performance, lit will stop making passes when it notices the `RUN:` line has stopped changing. In the above example, setting the limit higher than 2 is thus harmless. To facilitate debugging, after reaching the limit, lit will make one extra pass -and report an error if the ``RUN:`` line changes again. In the above example, +and report an error if the `RUN:` line changes again. In the above example, setting the limit to 1 will thus cause lit to report an error instead of producing incorrect output. -Options -------- +### Options The llvm lit configuration allows some things to be customized with user options: -``llc``, ``opt``, ... - Substitute the respective llvm tool name with a custom command line. This +`llc`, `opt`, ... + +: Substitute the respective llvm tool name with a custom command line. This allows to specify custom paths and default arguments for these tools. Example: % llvm-lit "-Dllc=llc -verify-machineinstrs" -``run_long_tests`` - Enable the execution of long running tests. +`run_long_tests` -``llvm_site_config`` - Load the specified lit configuration instead of the default one. +: Enable the execution of long running tests. +`llvm_site_config` -Other Features --------------- +: Load the specified lit configuration instead of the default one. -To make ``RUN`` line writing easier, several helper programs are available. These -helpers are in the ``PATH`` when running tests, so you can just call them using + +### Other Features + +To make `RUN` line writing easier, several helper programs are available. These +helpers are in the `PATH` when running tests, so you can just call them using their name. For example: -``not`` - This program runs its arguments and then inverts the result code from it. - Zero result codes become 1. Non-zero result codes become 0. +`not` + +: This program runs its arguments and then inverts the result code from it. + Zero result codes become 1. Non-zero result codes become 0. -To make the output more useful, :program:`lit` will scan +To make the output more useful, {program}`lit` will scan the lines of the test case for ones that contain a pattern that matches -``PR[0-9]+``. This is the syntax for specifying a PR (Problem Report) number +`PR[0-9]+`. This is the syntax for specifying a PR (Problem Report) number that is related to the test case. The number after "PR" specifies the LLVM Bugzilla number. When a PR number is specified, it will be used in the pass/fail reporting. This is useful to quickly get some context when @@ -1110,7 +1109,7 @@ a test fails. Finally, any line that contains "END." will cause the special interpretation of lines to terminate. This is generally done right after -the last ``RUN:`` line. This has two side effects: +the last `RUN:` line. This has two side effects: (a) it prevents special interpretation of lines that are part of the test program, not the instructions to the test case, and diff --git a/llvm/docs/UserGuides.md b/llvm/docs/UserGuides.md index 1a5ff1e0e06dd..72ebd3b9cedce 100644 --- a/llvm/docs/UserGuides.md +++ b/llvm/docs/UserGuides.md @@ -1,343 +1,398 @@ -User Guides -=========== +# User Guides NOTE: If you are a user who is only interested in using an LLVM-based compiler, -you should look into `Clang `_ instead. The +you should look into [Clang](https://clang.llvm.org) instead. The documentation here is intended for users who have a need to work with the intermediate LLVM representation. -.. contents:: - :local: - -.. toctree:: - :hidden: - - HowToBuildOnARM - HowToBuildWithPGO - HowToCrossCompileLLVM - CoverageMappingFormat - CFIVerify - BuildingADistribution - CMake - Docker - SupportLibrary - AdvancedBuilds - WritingAnLLVMNewPMPass - WritingAnLLVMPass - Passes - StackSafetyAnalysis - MergeFunctions - AliasAnalysis - MemorySSA - MemProf - LoopTerminology - CycleTerminology - Vectorizers - LinkTimeOptimization - DTLTO - GoldPlugin - Remarks - SourceLevelDebugging - HowToUpdateDebugInfo - Instrumentor - InstrRefDebugInfo - RemoveDIsDebugInfo - KeyInstructionsDebugInfo - InstrProfileFormat - InstCombineContributorGuide - WritingAnLLVMBackend - CodeGenerator - TableGen/index - GlobalISel/MIRPatterns - MCJITDesignAndImplementation - ORCv2 - JITLink - DebuggingJITedCode - CommandLine - ExtendingLLVM - AddingConstrainedIntrinsics - HowToBuildWindowsItaniumPrograms - HowToCrossCompileBuiltinsOnArm - BigEndianNEON - AArch64SME - CompileCudaWithLLVM - NVPTXUsage - AMDGPUUsage - AMDGPUAsyncOperations - AMDGPUDwarfExtensionsForHeterogeneousDebugging - AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack - AMDGPUExecutionSynchronization - AMDGPUMemoryModel - SPIRVUsage - DirectXUsage - RISCVUsage - RISCV/RISCVVectorExtension - RISCV/RISCVVCIX - SandboxIR - Telemetry - LFI - AdminTasks - Benchmarking - CMakePrimer - CodeOfConduct - FatLTO - GitHub - MarkdownQuickstartTemplate - MisExpect - OpaquePointers - NewPassManager - ReportingGuide - ResponseGuide - TableGenFundamentals - yaml2obj - -Clang ------ - -:doc:`HowToBuildOnARM` - Notes on building and testing LLVM/Clang on ARM. - -:doc:`HowToBuildWithPGO` - Notes on building LLVM/Clang with PGO. - -:doc:`HowToCrossCompileLLVM` - Notes on cross-building and testing LLVM/Clang. - -`How to build the C, C++, ObjC, and ObjC++ front end`__ - Instructions for building the clang front-end from source. - - .. __: https://clang.llvm.org/get_started.html - -:doc:`CoverageMappingFormat` +```{contents} +:local: +``` + +```{toctree} +:hidden: + +HowToBuildOnARM +HowToBuildWithPGO +HowToCrossCompileLLVM +CoverageMappingFormat +CFIVerify +BuildingADistribution +CMake +Docker +SupportLibrary +AdvancedBuilds +WritingAnLLVMNewPMPass +WritingAnLLVMPass +Passes +StackSafetyAnalysis +MergeFunctions +AliasAnalysis +MemorySSA +MemProf +LoopTerminology +CycleTerminology +Vectorizers +LinkTimeOptimization +DTLTO +GoldPlugin +Remarks +SourceLevelDebugging +HowToUpdateDebugInfo +Instrumentor +InstrRefDebugInfo +RemoveDIsDebugInfo +KeyInstructionsDebugInfo +InstrProfileFormat +InstCombineContributorGuide +WritingAnLLVMBackend +CodeGenerator +TableGen/index +GlobalISel/MIRPatterns +MCJITDesignAndImplementation +ORCv2 +JITLink +DebuggingJITedCode +CommandLine +ExtendingLLVM +AddingConstrainedIntrinsics +HowToBuildWindowsItaniumPrograms +HowToCrossCompileBuiltinsOnArm +BigEndianNEON +AArch64SME +CompileCudaWithLLVM +NVPTXUsage +AMDGPUUsage +AMDGPUAsyncOperations +AMDGPUDwarfExtensionsForHeterogeneousDebugging +AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack +AMDGPUExecutionSynchronization +AMDGPUMemoryModel +SPIRVUsage +DirectXUsage +RISCVUsage +RISCV/RISCVVectorExtension +RISCV/RISCVVCIX +SandboxIR +Telemetry +LFI +AdminTasks +Benchmarking +CMakePrimer +CodeOfConduct +FatLTO +GitHub +MarkdownQuickstartTemplate +MisExpect +OpaquePointers +NewPassManager +ReportingGuide +ResponseGuide +TableGenFundamentals +yaml2obj +``` + +## Clang + +- {doc}`HowToBuildOnARM` + + Notes on building and testing LLVM/Clang on ARM. + +- {doc}`HowToBuildWithPGO` + + Notes on building LLVM/Clang with PGO. + +- {doc}`HowToCrossCompileLLVM` + + Notes on cross-building and testing LLVM/Clang. + +- [How to build the C, C++, ObjC, and ObjC++ front end](https://clang.llvm.org/get_started.html) + + Instructions for building the clang front-end from source. + +- {doc}`CoverageMappingFormat` + This describes the format and encoding used for LLVM’s code coverage mapping. -:doc:`CFIVerify` +- {doc}`CFIVerify` + A description of the verification tool for Control Flow Integrity. -LLVM Builds and Distributions ------------------------------ +## LLVM Builds and Distributions + +- {doc}`BuildingADistribution` -:doc:`BuildingADistribution` A best-practices guide for using LLVM's CMake build system to package and distribute LLVM-based tools. -:doc:`CMake` - An addendum to the main Getting Started guide for those using the `CMake - build system `_. +- {doc}`CMake` + + An addendum to the main Getting Started guide for those using the [CMake + build system](http://www.cmake.org). + +- {doc}`Docker` + + A reference for using Dockerfiles provided with LLVM. + +- {doc}`Support Library ` + + This document describes the LLVM Support Library (`lib/Support`) and + how to keep LLVM source code portable. -:doc:`Docker` - A reference for using Dockerfiles provided with LLVM. +- {doc}`AdvancedBuilds` -:doc:`Support Library ` - This document describes the LLVM Support Library (``lib/Support``) and - how to keep LLVM source code portable. + This document describes more advanced build configurations. -:doc:`AdvancedBuilds` - This document describes more advanced build configurations. +## Optimizations -Optimizations -------------- +- {doc}`WritingAnLLVMNewPMPass` -:doc:`WritingAnLLVMNewPMPass` - Information on how to write LLVM transformations under the new pass - manager. + Information on how to write LLVM transformations under the new pass + manager. -:doc:`WritingAnLLVMPass` - Information on how to write LLVM transformations and analyses under the - legacy pass manager. +- {doc}`WritingAnLLVMPass` -:doc:`Passes` - A list of optimizations and analyses implemented in LLVM. + Information on how to write LLVM transformations and analyses under the + legacy pass manager. + +- {doc}`Passes` + + A list of optimizations and analyses implemented in LLVM. + +- {doc}`StackSafetyAnalysis` -:doc:`StackSafetyAnalysis` This document describes the design of the stack safety analysis of local variables. -:doc:`MergeFunctions` +- {doc}`MergeFunctions` + Describes functions merging optimization. -:doc:`AliasAnalysis` - Information on how to write a new alias analysis implementation or how to - use existing analyses. +- {doc}`AliasAnalysis` + + Information on how to write a new alias analysis implementation or how to + use existing analyses. + +- {doc}`MemorySSA` + + Information about the MemorySSA utility in LLVM, as well as how to use it. -:doc:`MemorySSA` - Information about the MemorySSA utility in LLVM, as well as how to use it. +- {doc}`MemProf` -:doc:`MemProf` - User guide and internals of MemProf, profile guided optimizations for memory. + User guide and internals of MemProf, profile guided optimizations for memory. + +- {doc}`LoopTerminology` -:doc:`LoopTerminology` A document describing Loops and associated terms as used in LLVM. -:doc:`CycleTerminology` +- {doc}`CycleTerminology` + A document describing cycles as a generalization of loops. -:doc:`Vectorizers` - This document describes the current status of vectorization in LLVM. +- {doc}`Vectorizers` + + This document describes the current status of vectorization in LLVM. + +- {doc}`LinkTimeOptimization` + + This document describes the interface between LLVM intermodular optimizer + and the linker and its design + +- {doc}`DTLTO` + + This document describes the DTLTO implementation, which allows for + distributing ThinLTO backend compilations without requiring support from + the build system. + +- {doc}`GoldPlugin` + + How to build your programs with link-time optimization on Linux. + +- {doc}`Remarks` + + A reference on the implementation of remarks in LLVM. + +- {doc}`Source Level Debugging with LLVM ` + + This document describes the design and philosophy behind the LLVM + source-level debugger. + +- {doc}`How to Update Debug Info ` + + This document specifies how to correctly update debug info in various kinds + of code transformations. -:doc:`LinkTimeOptimization` - This document describes the interface between LLVM intermodular optimizer - and the linker and its design +- {doc}`InstrRefDebugInfo` -:doc:`DTLTO` - This document describes the DTLTO implementation, which allows for - distributing ThinLTO backend compilations without requiring support from - the build system. + This document explains how LLVM uses value tracking, or instruction + referencing, to determine variable locations for debug info in the final + stages of compilation. -:doc:`GoldPlugin` - How to build your programs with link-time optimization on Linux. +- {doc}`RemoveDIsDebugInfo` -:doc:`Remarks` - A reference on the implementation of remarks in LLVM. + This is a migration guide describing how to move from debug info using + intrinsics such as dbg.value to using the non-instruction DbgRecord object. -:doc:`Source Level Debugging with LLVM ` - This document describes the design and philosophy behind the LLVM - source-level debugger. +- {doc}`KeyInstructionsDebugInfo` -:doc:`How to Update Debug Info ` - This document specifies how to correctly update debug info in various kinds - of code transformations. + This document explains how the debug info feature Key Instructions is + implemented in LLVM. -:doc:`InstrRefDebugInfo` - This document explains how LLVM uses value tracking, or instruction - referencing, to determine variable locations for debug info in the final - stages of compilation. +- {doc}`InstrProfileFormat` -:doc:`RemoveDIsDebugInfo` - This is a migration guide describing how to move from debug info using - intrinsics such as dbg.value to using the non-instruction DbgRecord object. + This document explains two binary formats of instrumentation-based profiles. -:doc:`KeyInstructionsDebugInfo` - This document explains how the debug info feature Key Instructions is - implemented in LLVM. +- {doc}`InstCombineContributorGuide` -:doc:`InstrProfileFormat` - This document explains two binary formats of instrumentation-based profiles. + This document specifies guidelines for contributions for InstCombine and + related passes. -:doc:`InstCombineContributorGuide` - This document specifies guidelines for contributions for InstCombine and - related passes. +- {doc}`Instrumentor` -:doc:`Instrumentor` - A comprehensive guide to the highly configurable Instrumentor pass for custom - program instrumentation, including the interactive configuration wizard. + A comprehensive guide to the highly configurable Instrumentor pass for custom + program instrumentation, including the interactive configuration wizard. +## Code Generation -Code Generation ---------------- +- {doc}`WritingAnLLVMBackend` -:doc:`WritingAnLLVMBackend` - Information on how to write LLVM backends for machine targets. + Information on how to write LLVM backends for machine targets. -:doc:`CodeGenerator` - The design and implementation of the LLVM code generator. Useful if you are - working on retargetting LLVM to a new architecture, designing a new codegen - pass, or enhancing existing components. +- {doc}`CodeGenerator` -:doc:`TableGen ` - Describes the TableGen tool, which is used heavily by the LLVM code - generator. + The design and implementation of the LLVM code generator. Useful if you are + working on retargetting LLVM to a new architecture, designing a new codegen + pass, or enhancing existing components. -========== -GlobalISel -========== +- {doc}`TableGen ` -:doc:`MIRPatterns ` - Describes the design of MIR Patterns and how to use them. + Describes the TableGen tool, which is used heavily by the LLVM code + generator. -=== -JIT -=== +### GlobalISel -:doc:`MCJITDesignAndImplementation` - Describes the inner workings of MCJIT execution engine. +- {doc}`MIRPatterns ` -:doc:`ORCv2` - Describes the design and implementation of the ORC APIs, including some - usage examples, and a guide for users transitioning from ORCv1 to ORCv2. + Describes the design of MIR Patterns and how to use them. -:doc:`JITLink` - Describes the design and APIs for the JITLink library, ORC's new JIT - linker. +### JIT -:doc:`DebuggingJITedCode` - How to debug JITed code with GDB. +- {doc}`MCJITDesignAndImplementation` -Additional Topics ------------------ + Describes the inner workings of MCJIT execution engine. + +- {doc}`ORCv2` + + Describes the design and implementation of the ORC APIs, including some + usage examples, and a guide for users transitioning from ORCv1 to ORCv2. + +- {doc}`JITLink` + + Describes the design and APIs for the JITLink library, ORC's new JIT + linker. + +- {doc}`DebuggingJITedCode` + + How to debug JITed code with GDB. + +## Additional Topics + +- {doc}`CommandLine` -:doc:`CommandLine` Provides information on using the command line parsing library. -:doc:`ExtendingLLVM` +- {doc}`ExtendingLLVM` + Look here to see how to add instructions and intrinsics to LLVM. -:doc:`AddingConstrainedIntrinsics` - Gives the steps necessary when adding a new constrained math intrinsic - to LLVM. +- {doc}`AddingConstrainedIntrinsics` + + Gives the steps necessary when adding a new constrained math intrinsic + to LLVM. + +- {doc}`HowToBuildWindowsItaniumPrograms` + + Notes on assembling a Windows Itanium environment. -:doc:`HowToBuildWindowsItaniumPrograms` - Notes on assembling a Windows Itanium environment. +- {doc}`HowToCrossCompileBuiltinsOnArm` -:doc:`HowToCrossCompileBuiltinsOnArm` - Notes on cross-building and testing the compiler-rt builtins for Arm. + Notes on cross-building and testing the compiler-rt builtins for Arm. + +- {doc}`BigEndianNEON` -:doc:`BigEndianNEON` LLVM's support for generating NEON instructions on big endian ARM targets is somewhat nonintuitive. This document explains the implementation and rationale. -:doc:`AArch64SME` +- {doc}`AArch64SME` + LLVM's support for AArch64 SME ACLE and ABI. -:doc:`CompileCudaWithLLVM` +- {doc}`CompileCudaWithLLVM` + LLVM support for CUDA. -:doc:`NVPTXUsage` - This document describes using the NVPTX backend to compile GPU kernels. +- {doc}`NVPTXUsage` + + This document describes using the NVPTX backend to compile GPU kernels. + +- {doc}`AMDGPUUsage` + + This document describes using the AMDGPU backend to compile GPU kernels. + +- {doc}`AMDGPUAsyncOperations` + + Builtins for invoking asynchronous data transfer operations in AMD GPUs. + +- {doc}`AMDGPUMemoryModel` + + This document describes *AMDGPU Memory Model* which overrides the + {ref}`LLVM memory model ` when a program is compiled for the + AMDGPU target. + +- {doc}`AMDGPUDwarfExtensionsForHeterogeneousDebugging` + + This document describes DWARF extensions to support heterogeneous debugging + for targets such as the AMDGPU backend. + +- {doc}`AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack` + + This document describes a DWARF extension to allow location descriptions on + the DWARF expression stack. It is part of + {doc}`AMDGPUDwarfExtensionsForHeterogeneousDebugging`. + +- {doc}`AMDGPUExecutionSynchronization` + + This document describes how execution of threads can be synchronized on AMD GPUs. + +- {doc}`SPIRVUsage` + + This document describes using the SPIR-V target to compile GPU kernels. -:doc:`AMDGPUUsage` - This document describes using the AMDGPU backend to compile GPU kernels. +- {doc}`DirectXUsage` -:doc:`AMDGPUAsyncOperations` - Builtins for invoking asynchronous data transfer operations in AMD GPUs. + This document describes using the DirectX target to compile GPU code for the + DirectX runtime. -:doc:`AMDGPUMemoryModel` - This document describes *AMDGPU Memory Model* which overrides the :ref:`LLVM - memory model` when a program is compiled for the AMDGPU target. +- {doc}`RISCVUsage` -:doc:`AMDGPUDwarfExtensionsForHeterogeneousDebugging` - This document describes DWARF extensions to support heterogeneous debugging - for targets such as the AMDGPU backend. + This document describes using the RISC-V target. -:doc:`AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack/AMDGPUDwarfExtensionAllowLocationDescriptionOnTheDwarfExpressionStack` - This document describes a DWARF extension to allow location descriptions on - the DWARF expression stack. It is part of - :doc:`AMDGPUDwarfExtensionsForHeterogeneousDebugging`. +- {doc}`RISCV/RISCVVectorExtension` -:doc:`AMDGPUExecutionSynchronization` - This document describes how execution of threads can be synchronized on AMD GPUs. + This document describes how the RISC-V Vector extension can be expressed in LLVM IR and how code is generated for it in the backend. -:doc:`SPIRVUsage` - This document describes using the SPIR-V target to compile GPU kernels. +- {doc}`RISCV/RISCVVCIX` -:doc:`DirectXUsage` - This document describes using the DirectX target to compile GPU code for the - DirectX runtime. + This document shows how the scheduling information for RISC-V's `XSfvcp` extension -- SiFive Vector Coprocessor Interface (VCIX) -- works and how to customize them. -:doc:`RISCVUsage` - This document describes using the RISC-V target. +- {doc}`Sandbox IR ` -:doc:`RISCV/RISCVVectorExtension` - This document describes how the RISC-V Vector extension can be expressed in LLVM IR and how code is generated for it in the backend. + This document describes the design and usage of Sandbox IR, a transactional layer over LLVM IR. -:doc:`RISCV/RISCVVCIX` - This document shows how the scheduling information for RISC-V's ``XSfvcp`` extension -- SiFive Vector Coprocessor Interface (VCIX) -- works and how to customize them. +- {doc}`Telemetry` -:doc:`Sandbox IR ` - This document describes the design and usage of Sandbox IR, a transactional layer over LLVM IR. + This document describes the Telemetry framework in LLVM. -:doc:`Telemetry` - This document describes the Telemetry framework in LLVM. +- {doc}`LFI ` -:doc:`LFI ` - This document describes the Lightweight Fault Isolation (LFI) target in LLVM. + This document describes the Lightweight Fault Isolation (LFI) target in LLVM. diff --git a/llvm/docs/WritingAnLLVMBackend.md b/llvm/docs/WritingAnLLVMBackend.md index cab647125742e..1a9def75e1cb1 100644 --- a/llvm/docs/WritingAnLLVMBackend.md +++ b/llvm/docs/WritingAnLLVMBackend.md @@ -1,17 +1,16 @@ -======================= -Writing an LLVM Backend -======================= +# Writing an LLVM Backend -.. toctree:: - :hidden: +```{toctree} +:hidden: - HowToUseInstrMappings +HowToUseInstrMappings +``` -.. contents:: - :local: +```{contents} +:local: +``` -Introduction -============ +## Introduction This document describes techniques for writing compiler backends that convert the LLVM Intermediate Representation (IR) to code for a specified machine or @@ -24,216 +23,214 @@ ARM, and SPARC. The backend may also be used to generate code targeted at SPUs of the Cell processor or GPUs to support the execution of compute kernels. The document focuses on existing examples found in subdirectories of -``llvm/lib/Target`` in a downloaded LLVM release. In particular, this document +`llvm/lib/Target` in a downloaded LLVM release. In particular, this document focuses on the example of creating a static compiler (one that emits text assembly) for a SPARC target, because SPARC has fairly standard characteristics, such as a RISC instruction set and straightforward calling conventions. -Audience --------- +### Audience The audience for this document is anyone who needs to write an LLVM backend to generate code for a specific hardware or software target. -Prerequisite Reading --------------------- +### Prerequisite Reading These essential documents must be read before reading this document: -* `LLVM Language Reference Manual `_ --- a reference manual for +* {doc}`LLVM Language Reference Manual ` --- a reference manual for the LLVM assembly language. -* :doc:`CodeGenerator` --- a guide to the components (classes and code +* {doc}`CodeGenerator` --- a guide to the components (classes and code generation algorithms) for translating the LLVM internal representation into machine code for a specified target. Pay particular attention to the descriptions of code generation stages: Instruction Selection, Scheduling and Formation, SSA-based Optimization, Register Allocation, Prolog/Epilog Code Insertion, Late Machine Code Optimizations, and Code Emission. -* :doc:`TableGen/index` --- a document that describes the TableGen - (``tblgen``) application that manages domain-specific information to support +* {doc}`TableGen/index` --- a document that describes the TableGen + (`tblgen`) application that manages domain-specific information to support LLVM code generation. TableGen processes input from a target description - file (``.td`` suffix) and generates C++ code that can be used for code + file (`.td` suffix) and generates C++ code that can be used for code generation. -* :doc:`WritingAnLLVMPass` --- The assembly printer is a ``FunctionPass``, as - are several ``SelectionDAG`` processing steps. +* {doc}`WritingAnLLVMPass` --- The assembly printer is a `FunctionPass`, as + are several `SelectionDAG` processing steps. -To follow the SPARC examples in this document, have a copy of `The SPARC -Architecture Manual, Version 8 `_ for -reference. For details about the ARM instruction set, refer to the `ARM -Architecture Reference Manual `_. For more about -the GNU Assembler format (``GAS``), see `Using As -`_, especially for the +To follow the SPARC examples in this document, have a copy of [The SPARC +Architecture Manual, Version 8] for +reference. For details about the ARM instruction set, refer to the [ARM +Architecture Reference Manual]. For more about +the GNU Assembler format (`GAS`), see [Using As], especially for the assembly printer. "Using As" contains a list of target machine dependent features. -Basic Steps ------------ +[The SPARC Architecture Manual, Version 8]: http://www.sparc.org/standards/V8.pdf +[ARM Architecture Reference Manual]: http://infocenter.arm.com/ +[Using As]: http://sourceware.org/binutils/docs/as/index.html + +### Basic Steps To write a compiler backend for LLVM that converts the LLVM IR to code for a specified target (machine or other language), follow these steps: -* Create a subclass of the ``TargetMachine`` class that describes +* Create a subclass of the `TargetMachine` class that describes characteristics of your target machine. Copy existing examples of specific - ``TargetMachine`` class and header files; for example, start with - ``SparcTargetMachine.cpp`` and ``SparcTargetMachine.h``, but change the file - names for your target. Similarly, change code that references "``Sparc``" to + `TargetMachine` class and header files; for example, start with + `SparcTargetMachine.cpp` and `SparcTargetMachine.h`, but change the file + names for your target. Similarly, change code that references "`Sparc`" to reference your target. * Describe the register set of the target. Use TableGen to generate code for register definition, register aliases, and register classes from a - target-specific ``RegisterInfo.td`` input file. You should also write - additional code for a subclass of the ``TargetRegisterInfo`` class that + target-specific `RegisterInfo.td` input file. You should also write + additional code for a subclass of the `TargetRegisterInfo` class that represents the class register file data used for register allocation and also describes the interactions between registers. * Describe the instruction set of the target. Use TableGen to generate code for target-specific instructions from target-specific versions of - ``TargetInstrFormats.td`` and ``TargetInstrInfo.td``. You should write - additional code for a subclass of the ``TargetInstrInfo`` class to represent + `TargetInstrFormats.td` and `TargetInstrInfo.td`. You should write + additional code for a subclass of the `TargetInstrInfo` class to represent machine instructions supported by the target machine. * Describe the selection and conversion of the LLVM IR from a Directed Acyclic Graph (DAG) representation of instructions to native target-specific instructions. Use TableGen to generate code that matches patterns and selects instructions based on additional information in a target-specific - version of ``TargetInstrInfo.td``. Write code for ``XXXISelDAGToDAG.cpp``, - where ``XXX`` identifies the specific target, to perform pattern matching and - DAG-to-DAG instruction selection. Also write code in ``XXXISelLowering.cpp`` + version of `TargetInstrInfo.td`. Write code for `XXXISelDAGToDAG.cpp`, + where `XXX` identifies the specific target, to perform pattern matching and + DAG-to-DAG instruction selection. Also write code in `XXXISelLowering.cpp` to replace or remove operations and data types that are not supported natively in a SelectionDAG. * Write code for an assembly printer that converts LLVM IR to a GAS format for your target machine. You should add assembly strings to the instructions - defined in your target-specific version of ``TargetInstrInfo.td``. You - should also write code for a subclass of ``AsmPrinter`` that performs the - LLVM-to-assembly conversion and a trivial subclass of ``TargetAsmInfo``. + defined in your target-specific version of `TargetInstrInfo.td`. You + should also write code for a subclass of `AsmPrinter` that performs the + LLVM-to-assembly conversion and a trivial subclass of `TargetAsmInfo`. * Optionally, add support for subtargets (i.e., variants with different capabilities). You should also write code for a subclass of the - ``TargetSubtarget`` class, which allows you to use the ``-mcpu=`` and - ``-mattr=`` command-line options. + `TargetSubtarget` class, which allows you to use the `-mcpu=` and + `-mattr=` command-line options. * Optionally, add JIT support and create a machine code emitter (subclass of - ``TargetJITInfo``) that is used to emit binary code directly into memory. + `TargetJITInfo`) that is used to emit binary code directly into memory. -In the ``.cpp`` and ``.h``. files, initially stub up these methods and then +In the `.cpp` and `.h`. files, initially stub up these methods and then implement them later. Initially, you may not know which private members that the class will need and which components will need to be subclassed. -Preliminaries -------------- +### Preliminaries To actually create your compiler backend, you need to create and modify a few files. The absolute minimum is discussed here. But to actually use the LLVM target-independent code generator, you must perform the steps described in the -:doc:`LLVM Target-Independent Code Generator ` document. +{doc}`LLVM Target-Independent Code Generator ` document. -First, you should create a subdirectory under ``lib/Target`` to hold all the +First, you should create a subdirectory under `lib/Target` to hold all the files related to your target. If your target is called "Dummy", create the -directory ``lib/Target/Dummy``. +directory `lib/Target/Dummy`. -In this new directory, create a ``CMakeLists.txt``. It is easiest to copy a -``CMakeLists.txt`` of another target and modify it. It should at least contain -the ``LLVM_TARGET_DEFINITIONS`` variable. The library can be named ``LLVMDummy`` +In this new directory, create a `CMakeLists.txt`. It is easiest to copy a +`CMakeLists.txt` of another target and modify it. It should at least contain +the `LLVM_TARGET_DEFINITIONS` variable. The library can be named `LLVMDummy` (for example, see the MIPS target). Alternatively, you can split the library -into ``LLVMDummyCodeGen`` and ``LLVMDummyAsmPrinter``, the latter of which -should be implemented in a subdirectory below ``lib/Target/Dummy`` (for example, +into `LLVMDummyCodeGen` and `LLVMDummyAsmPrinter`, the latter of which +should be implemented in a subdirectory below `lib/Target/Dummy` (for example, see the PowerPC target). -Note that these two naming schemes are hardcoded into ``llvm-config``. Using -any other naming scheme will confuse ``llvm-config`` and produce a lot of -(seemingly unrelated) linker errors when linking ``llc``. +Note that these two naming schemes are hardcoded into `llvm-config`. Using +any other naming scheme will confuse `llvm-config` and produce a lot of +(seemingly unrelated) linker errors when linking `llc`. To make your target actually do something, you need to implement a subclass of -``TargetMachine``. This implementation should typically be in the file -``lib/Target/DummyTargetMachine.cpp``, but any file in the ``lib/Target`` +`TargetMachine`. This implementation should typically be in the file +`lib/Target/DummyTargetMachine.cpp`, but any file in the `lib/Target` directory will be built and should work. To use LLVM's target-independent code generator, you should do what all current machine backends do: create a -subclass of ``CodeGenTargetMachineImpl``. (To create a target from scratch, create a -subclass of ``TargetMachine``.) +subclass of `CodeGenTargetMachineImpl`. (To create a target from scratch, create a +subclass of `TargetMachine`.) -To get LLVM to actually build and link your target, you need to run ``cmake`` -with ``-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=Dummy``. This will build your +To get LLVM to actually build and link your target, you need to run `cmake` +with `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=Dummy`. This will build your target without needing to add it to the list of all the targets. -Once your target is stable, you can add it to the ``LLVM_ALL_TARGETS`` variable -located in the main ``CMakeLists.txt``. +Once your target is stable, you can add it to the `LLVM_ALL_TARGETS` variable +located in the main `CMakeLists.txt`. -Target Machine -============== +## Target Machine -``CodeGenTargetMachineImpl`` is designed as a base class for targets implemented with -the LLVM target-independent code generator. The ``CodeGenTargetMachineImpl`` class +`CodeGenTargetMachineImpl` is designed as a base class for targets implemented with +the LLVM target-independent code generator. The `CodeGenTargetMachineImpl` class should be specialized by a concrete target class that implements the various -virtual methods. ``CodeGenTargetMachineImpl`` is defined as a subclass of -``TargetMachine`` in ``include/llvm/CodeGen/CodeGenTargetMachineImpl.h``. The -``TargetMachine`` class implementation (``include/llvm/Target/TargetMachine.cpp``) +virtual methods. `CodeGenTargetMachineImpl` is defined as a subclass of +`TargetMachine` in `include/llvm/CodeGen/CodeGenTargetMachineImpl.h`. The +`TargetMachine` class implementation (`include/llvm/Target/TargetMachine.cpp`) also processes numerous command-line options. -To create a concrete target-specific subclass of ``CodeGenTargetMachineImpl``, start -by copying an existing ``TargetMachine`` class and header. You should name the +To create a concrete target-specific subclass of `CodeGenTargetMachineImpl`, start +by copying an existing `TargetMachine` class and header. You should name the files that you create to reflect your specific target. For instance, for the -SPARC target, name the files ``SparcTargetMachine.h`` and -``SparcTargetMachine.cpp``. +SPARC target, name the files `SparcTargetMachine.h` and +`SparcTargetMachine.cpp`. -For a target machine ``XXX``, the implementation of ``XXXTargetMachine`` must +For a target machine `XXX`, the implementation of `XXXTargetMachine` must have access methods to obtain objects that represent target components. These -methods are named ``get*Info``, and are intended to obtain the instruction set -(``getInstrInfo``), register set (``getRegisterInfo``), stack frame layout -(``getFrameInfo``), and similar information. ``XXXTargetMachine`` must also -implement the ``getDataLayout`` method to access an object with target-specific +methods are named `get*Info`, and are intended to obtain the instruction set +(`getInstrInfo`), register set (`getRegisterInfo`), stack frame layout +(`getFrameInfo`), and similar information. `XXXTargetMachine` must also +implement the `getDataLayout` method to access an object with target-specific data characteristics, such as data type size and alignment requirements. -For instance, for the SPARC target, the header file ``SparcTargetMachine.h`` -declares prototypes for several ``get*Info`` and ``getDataLayout`` methods that +For instance, for the SPARC target, the header file `SparcTargetMachine.h` +declares prototypes for several `get*Info` and `getDataLayout` methods that simply return a class member. -.. code-block:: c++ - - namespace llvm { +```c++ +namespace llvm { - class Module; +class Module; - class SparcTargetMachine : public CodeGenTargetMachineImpl { - const DataLayout DataLayout; // Calculates type size & alignment - SparcSubtarget Subtarget; - SparcInstrInfo InstrInfo; - TargetFrameInfo FrameInfo; +class SparcTargetMachine : public CodeGenTargetMachineImpl { + const DataLayout DataLayout; // Calculates type size & alignment + SparcSubtarget Subtarget; + SparcInstrInfo InstrInfo; + TargetFrameInfo FrameInfo; - protected: - virtual const TargetAsmInfo *createTargetAsmInfo() const; +protected: + virtual const TargetAsmInfo *createTargetAsmInfo() const; - public: - SparcTargetMachine(const Module &M, const std::string &FS); +public: + SparcTargetMachine(const Module &M, const std::string &FS); - virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; } - virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; } - virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; } - virtual const TargetRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - virtual const DataLayout *getDataLayout() const { return &DataLayout; } + virtual const SparcInstrInfo *getInstrInfo() const {return &InstrInfo; } + virtual const TargetFrameInfo *getFrameInfo() const {return &FrameInfo; } + virtual const TargetSubtarget *getSubtargetImpl() const{return &Subtarget; } + virtual const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + virtual const DataLayout *getDataLayout() const { return &DataLayout; } - // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, bool Fast); - virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast); - }; + // Pass Pipeline Configuration + virtual bool addInstSelector(PassManagerBase &PM, bool Fast); + virtual bool addPreEmitPass(PassManagerBase &PM, bool Fast); +}; - } // end namespace llvm +} // end namespace llvm +``` -* ``getInstrInfo()`` -* ``getRegisterInfo()`` -* ``getFrameInfo()`` -* ``getDataLayout()`` -* ``getSubtargetImpl()`` +* `getInstrInfo()` +* `getRegisterInfo()` +* `getFrameInfo()` +* `getDataLayout()` +* `getSubtargetImpl()` For some targets, you also need to support the following methods: -* ``getTargetLowering()`` -* ``getJITInfo()`` +* `getTargetLowering()` +* `getJITInfo()` Some architectures, such as GPUs, do not support jumping to an arbitrary program location and implement branching using masked execution and loop using @@ -241,81 +238,80 @@ special instructions around the loop body. In order to avoid CFG modifications that introduce irreducible control flow not handled by such hardware, a target must call `setRequiresStructuredCFG(true)` when being initialized. -In addition, the ``XXXTargetMachine`` constructor should specify a -``TargetDescription`` string that determines the data layout for the target +In addition, the `XXXTargetMachine` constructor should specify a +`TargetDescription` string that determines the data layout for the target machine, including characteristics such as pointer size, alignment, and -endianness. For example, the constructor for ``SparcTargetMachine`` contains +endianness. For example, the constructor for `SparcTargetMachine` contains the following: -.. code-block:: c++ +```c++ +SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS) + : DataLayout("E-p:32:32-f128:128:128"), + Subtarget(M, FS), InstrInfo(Subtarget), + FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { +} +``` - SparcTargetMachine::SparcTargetMachine(const Module &M, const std::string &FS) - : DataLayout("E-p:32:32-f128:128:128"), - Subtarget(M, FS), InstrInfo(Subtarget), - FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { - } - -Hyphens separate portions of the ``TargetDescription`` string. +Hyphens separate portions of the `TargetDescription` string. -* An upper-case "``E``" in the string indicates a big-endian target data model. - A lower-case "``e``" indicates little-endian. +* An upper-case "`E`" in the string indicates a big-endian target data model. + A lower-case "`e`" indicates little-endian. -* "``p:``" is followed by pointer information: size, ABI alignment, and - preferred alignment. If only two figures follow "``p:``", then the first +* "`p:`" is followed by pointer information: size, ABI alignment, and + preferred alignment. If only two figures follow "`p:`", then the first value is pointer size, and the second value is both ABI and preferred alignment. -* Then a letter for numeric type alignment: "``i``", "``f``", "``v``", or - "``a``" (corresponding to integer, floating point, vector, or aggregate). - "``i``", "``v``", or "``a``" are followed by ABI alignment and preferred - alignment. "``f``" is followed by three values: the first indicates the size +* Then a letter for numeric type alignment: "`i`", "`f`", "`v`", or + "`a`" (corresponding to integer, floating point, vector, or aggregate). + "`i`", "`v`", or "`a`" are followed by ABI alignment and preferred + alignment. "`f`" is followed by three values: the first indicates the size of a long double, then ABI alignment, and then ABI preferred alignment. -Target Registration -=================== +## Target Registration -You must also register your target with the ``TargetRegistry``, which is what +You must also register your target with the `TargetRegistry`, which is what other LLVM tools use to be able to lookup and use your target at runtime. The -``TargetRegistry`` can be used directly, but for most targets there are helper +`TargetRegistry` can be used directly, but for most targets there are helper templates which should take care of the work for you. -All targets should declare a global ``Target`` object which is used to -represent the target during registration. Then, in the target's ``TargetInfo`` -library, the target should define that object and use the ``RegisterTarget`` +All targets should declare a global `Target` object which is used to +represent the target during registration. Then, in the target's `TargetInfo` +library, the target should define that object and use the `RegisterTarget` template to register the target. For example, the Sparc registration code looks like this: -.. code-block:: c++ +```c++ +Target llvm::getTheSparcTarget(); - Target llvm::getTheSparcTarget(); +extern "C" void LLVMInitializeSparcTargetInfo() { + RegisterTarget + X(getTheSparcTarget(), "sparc", "Sparc"); +} +``` - extern "C" void LLVMInitializeSparcTargetInfo() { - RegisterTarget - X(getTheSparcTarget(), "sparc", "Sparc"); - } - -This allows the ``TargetRegistry`` to look up the target by name or by target +This allows the `TargetRegistry` to look up the target by name or by target triple. In addition, most targets will also register additional features which are available in separate libraries. These registration steps are separate, because some clients may wish to only link in some parts of the target --- the JIT code generator does not require the use of the assembler printer, for example. Here is an example of registering the Sparc assembly printer: -.. code-block:: c++ +```c++ +extern "C" void LLVMInitializeSparcAsmPrinter() { + RegisterAsmPrinter X(getTheSparcTarget()); +} +``` - extern "C" void LLVMInitializeSparcAsmPrinter() { - RegisterAsmPrinter X(getTheSparcTarget()); - } +For more information, see "[`llvm/Target/TargetRegistry.h`][TargetRegistry]". -For more information, see "`llvm/Target/TargetRegistry.h -`_". +[TargetRegistry]: doxygen:TargetRegistry_8h-source.html -Register Set and Register Classes -================================= +## Register Set and Register Classes You should describe a concrete target-specific class that represents the -register file of a target machine. This class is called ``XXXRegisterInfo`` -(where ``XXX`` identifies the target) and represents the class register file +register file of a target machine. This class is called `XXXRegisterInfo` +(where `XXX` identifies the target) and represents the class register file data that is used for register allocation. It also describes the interactions between registers. @@ -331,366 +327,362 @@ registers. Much of the code for registers, including register definition, register aliases, and register classes, is generated by TableGen from -``XXXRegisterInfo.td`` input files and placed in ``XXXGenRegisterInfo.h.inc`` -and ``XXXGenRegisterInfo.inc`` output files. Some of the code in the -implementation of ``XXXRegisterInfo`` requires hand-coding. +`XXXRegisterInfo.td` input files and placed in `XXXGenRegisterInfo.h.inc` +and `XXXGenRegisterInfo.inc` output files. Some of the code in the +implementation of `XXXRegisterInfo` requires hand-coding. -Defining a Register -------------------- +### Defining a Register -The ``XXXRegisterInfo.td`` file typically starts with register definitions for -a target machine. The ``Register`` class (specified in ``Target.td``) is used -to define an object for each register. The specified string ``n`` becomes the -``Name`` of the register. The basic ``Register`` object does not have any +The `XXXRegisterInfo.td` file typically starts with register definitions for +a target machine. The `Register` class (specified in `Target.td`) is used +to define an object for each register. The specified string `n` becomes the +`Name` of the register. The basic `Register` object does not have any subregisters and does not specify any aliases. -.. code-block:: text - - class Register { - string Namespace = ""; - string AsmName = n; - string Name = n; - int SpillSize = 0; - int SpillAlignment = 0; - list Aliases = []; - list SubRegs = []; - list DwarfNumbers = []; - } - -For example, in the ``X86RegisterInfo.td`` file, there are register definitions -that utilize the ``Register`` class, such as: - -.. code-block:: text - - def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>; - -This defines the register ``AL`` and assigns it values (with ``DwarfRegNum``) -that are used by ``gcc``, ``gdb``, or a debug information writer to identify a -register. For register ``AL``, ``DwarfRegNum`` takes an array of 3 values +```text +class Register { + string Namespace = ""; + string AsmName = n; + string Name = n; + int SpillSize = 0; + int SpillAlignment = 0; + list Aliases = []; + list SubRegs = []; + list DwarfNumbers = []; +} +``` + +For example, in the `X86RegisterInfo.td` file, there are register definitions +that utilize the `Register` class, such as: + +```text +def AL : Register<"AL">, DwarfRegNum<[0, 0, 0]>; +``` + +This defines the register `AL` and assigns it values (with `DwarfRegNum`) +that are used by `gcc`, `gdb`, or a debug information writer to identify a +register. For register `AL`, `DwarfRegNum` takes an array of 3 values representing 3 different modes: the first element is for X86-64, the second for exception handling (EH) on X86-32, and the third is generic. -1 is a special Dwarf number that indicates the gcc number is undefined, and -2 indicates the register number is invalid for this mode. -From the previously described line in the ``X86RegisterInfo.td`` file, TableGen -generates this code in the ``X86GenRegisterInfo.inc`` file: - -.. code-block:: c++ - - static const unsigned GR8[] = { X86::AL, ... }; - - const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 }; - - const TargetRegisterDesc RegisterDescriptors[] = { - ... - { "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ... +From the previously described line in the `X86RegisterInfo.td` file, TableGen +generates this code in the `X86GenRegisterInfo.inc` file: -From the register info file, TableGen generates a ``TargetRegisterDesc`` object -for each register. ``TargetRegisterDesc`` is defined in -``include/llvm/Target/TargetRegisterInfo.h`` with the following fields: +```c++ +static const unsigned GR8[] = { X86::AL, ... }; -.. code-block:: c++ +const unsigned AL_AliasSet[] = { X86::AX, X86::EAX, X86::RAX, 0 }; - struct TargetRegisterDesc { - const char *AsmName; // Assembly language name for the register - const char *Name; // Printable name for the reg (for debugging) - const unsigned *AliasSet; // Register Alias Set - const unsigned *SubRegs; // Sub-register set - const unsigned *ImmSubRegs; // Immediate sub-register set - const unsigned *SuperRegs; // Super-register set - }; - -TableGen uses the entire target description file (``.td``) to determine text -names for the register (in the ``AsmName`` and ``Name`` fields of -``TargetRegisterDesc``) and the relationships of other registers to the defined -register (in the other ``TargetRegisterDesc`` fields). In this example, other -definitions establish the registers "``AX``", "``EAX``", and "``RAX``" as -aliases for one another, so TableGen generates a null-terminated array -(``AL_AliasSet``) for this register alias set. - -The ``Register`` class is commonly used as a base class for more complex -classes. In ``Target.td``, the ``Register`` class is the base for the -``RegisterWithSubRegs`` class that is used to define registers that need to -specify subregisters in the ``SubRegs`` list, as shown here: - -.. code-block:: text - - class RegisterWithSubRegs subregs> : Register { - let SubRegs = subregs; - } - -In ``SparcRegisterInfo.td``, additional register classes are defined for SPARC: -a ``Register`` subclass, ``SparcReg``, and further subclasses: ``Ri``, ``Rf``, -and ``Rd``. SPARC registers are identified by 5-bit ID numbers, which is a -feature common to these subclasses. Note the use of "``let``" expressions to -override values that are initially defined in a superclass (such as ``SubRegs`` -field in the ``Rd`` class). - -.. code-block:: text - - class SparcReg : Register { - field bits<5> Num; - let Namespace = "SP"; - } - // Ri - 32-bit integer registers - class Ri num, string n> : - SparcReg { - let Num = num; - } - // Rf - 32-bit floating-point registers - class Rf num, string n> : - SparcReg { - let Num = num; - } - // Rd - Slots in the FP register file for 64-bit floating-point values. - class Rd num, string n, list subregs> : SparcReg { - let Num = num; - let SubRegs = subregs; - } - -In the ``SparcRegisterInfo.td`` file, there are register definitions that -utilize these subclasses of ``Register``, such as: - -.. code-block:: text - - def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; - def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; +const TargetRegisterDesc RegisterDescriptors[] = { ... - def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; - def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; - ... - def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; - def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>; - -The last two registers shown above (``D0`` and ``D1``) are double-precision +{ "AL", "AL", AL_AliasSet, Empty_SubRegsSet, Empty_SubRegsSet, AL_SuperRegsSet }, ... +``` + +From the register info file, TableGen generates a `TargetRegisterDesc` object +for each register. `TargetRegisterDesc` is defined in +`include/llvm/Target/TargetRegisterInfo.h` with the following fields: + +```c++ +struct TargetRegisterDesc { + const char *AsmName; // Assembly language name for the register + const char *Name; // Printable name for the reg (for debugging) + const unsigned *AliasSet; // Register Alias Set + const unsigned *SubRegs; // Sub-register set + const unsigned *ImmSubRegs; // Immediate sub-register set + const unsigned *SuperRegs; // Super-register set +}; +``` + +TableGen uses the entire target description file (`.td`) to determine text +names for the register (in the `AsmName` and `Name` fields of +`TargetRegisterDesc`) and the relationships of other registers to the defined +register (in the other `TargetRegisterDesc` fields). In this example, other +definitions establish the registers "`AX`", "`EAX`", and "`RAX`" as +aliases for one another, so TableGen generates a null-terminated array +(`AL_AliasSet`) for this register alias set. + +The `Register` class is commonly used as a base class for more complex +classes. In `Target.td`, the `Register` class is the base for the +`RegisterWithSubRegs` class that is used to define registers that need to +specify subregisters in the `SubRegs` list, as shown here: + +```text +class RegisterWithSubRegs subregs> : Register { + let SubRegs = subregs; +} +``` + +In `SparcRegisterInfo.td`, additional register classes are defined for SPARC: +a `Register` subclass, `SparcReg`, and further subclasses: `Ri`, `Rf`, +and `Rd`. SPARC registers are identified by 5-bit ID numbers, which is a +feature common to these subclasses. Note the use of "`let`" expressions to +override values that are initially defined in a superclass (such as `SubRegs` +field in the `Rd` class). + +```text +class SparcReg : Register { + field bits<5> Num; + let Namespace = "SP"; +} +// Ri - 32-bit integer registers +class Ri num, string n> : +SparcReg { + let Num = num; +} +// Rf - 32-bit floating-point registers +class Rf num, string n> : +SparcReg { + let Num = num; +} +// Rd - Slots in the FP register file for 64-bit floating-point values. +class Rd num, string n, list subregs> : SparcReg { + let Num = num; + let SubRegs = subregs; +} +``` + +In the `SparcRegisterInfo.td` file, there are register definitions that +utilize these subclasses of `Register`, such as: + +```text +def G0 : Ri< 0, "G0">, DwarfRegNum<[0]>; +def G1 : Ri< 1, "G1">, DwarfRegNum<[1]>; +... +def F0 : Rf< 0, "F0">, DwarfRegNum<[32]>; +def F1 : Rf< 1, "F1">, DwarfRegNum<[33]>; +... +def D0 : Rd< 0, "F0", [F0, F1]>, DwarfRegNum<[32]>; +def D1 : Rd< 2, "F2", [F2, F3]>, DwarfRegNum<[34]>; +``` + +The last two registers shown above (`D0` and `D1`) are double-precision floating-point registers that are aliases for pairs of single-precision floating-point sub-registers. In addition to aliases, the sub-register and super-register relationships of the defined register are in fields of a -register's ``TargetRegisterDesc``. +register's `TargetRegisterDesc`. -Defining a Register Class -------------------------- +### Defining a Register Class -The ``RegisterClass`` class (specified in ``Target.td``) is used to define an +The `RegisterClass` class (specified in `Target.td`) is used to define an object that represents a group of related registers and also defines the default allocation order of the registers. A target description file -``XXXRegisterInfo.td`` that uses ``Target.td`` can construct register classes +`XXXRegisterInfo.td` that uses `Target.td` can construct register classes using the following class: -.. code-block:: text - - class RegisterClass regTypes, int alignment, dag regList> { - string Namespace = namespace; - list RegTypes = regTypes; - int Size = 0; // spill size, in bits; zero lets tblgen pick the size - int Alignment = alignment; +```text +class RegisterClass regTypes, int alignment, dag regList> { + string Namespace = namespace; + list RegTypes = regTypes; + int Size = 0; // spill size, in bits; zero lets tblgen pick the size + int Alignment = alignment; - // CopyCost is the cost of copying a value between two registers - // default value 1 means a single instruction - // A negative value means copying is extremely expensive or impossible - int CopyCost = 1; - dag MemberList = regList; + // CopyCost is the cost of copying a value between two registers + // default value 1 means a single instruction + // A negative value means copying is extremely expensive or impossible + int CopyCost = 1; + dag MemberList = regList; - // for register classes that are subregisters of this class - list SubRegClassList = []; + // for register classes that are subregisters of this class + list SubRegClassList = []; - code MethodProtos = [{}]; // to insert arbitrary code - code MethodBodies = [{}]; - } + code MethodProtos = [{}]; // to insert arbitrary code + code MethodBodies = [{}]; +} +``` -To define a ``RegisterClass``, use the following 4 arguments: +To define a `RegisterClass`, use the following 4 arguments: * The first argument of the definition is the name of the namespace. -* The second argument is a list of ``ValueType`` register type values that are - defined in ``include/llvm/CodeGen/ValueTypes.td``. Defined values include - integer types (such as ``i16``, ``i32``, and ``i1`` for Boolean), - floating-point types (``f32``, ``f64``), and vector types (for example, - ``v8i16`` for an ``8 x i16`` vector). All registers in a ``RegisterClass`` - must have the same ``ValueType``, but some registers may store vector data in +* The second argument is a list of `ValueType` register type values that are + defined in `include/llvm/CodeGen/ValueTypes.td`. Defined values include + integer types (such as `i16`, `i32`, and `i1` for Boolean), + floating-point types (`f32`, `f64`), and vector types (for example, + `v8i16` for an `8 x i16` vector). All registers in a `RegisterClass` + must have the same `ValueType`, but some registers may store vector data in different configurations. For example a register that can process a 128-bit vector may be able to handle 16 8-bit integer elements, 8 16-bit integers, 4 32-bit integers, and so on. -* The third argument of the ``RegisterClass`` definition specifies the +* The third argument of the `RegisterClass` definition specifies the alignment required of the registers when they are stored or loaded to memory. -* The final argument, ``regList``, specifies which registers are in this class. - If an alternative allocation order method is not specified, then ``regList`` +* The final argument, `regList`, specifies which registers are in this class. + If an alternative allocation order method is not specified, then `regList` also defines the order of allocation used by the register allocator. Besides - simply listing registers with ``(add R0, R1, ...)``, more advanced set - operators are available. See ``include/llvm/Target/Target.td`` for more + simply listing registers with `(add R0, R1, ...)`, more advanced set + operators are available. See `include/llvm/Target/Target.td` for more information. -In ``SparcRegisterInfo.td``, three ``RegisterClass`` objects are defined: -``FPRegs``, ``DFPRegs``, and ``IntRegs``. For all three register classes, the -first argument defines the namespace with the string "``SP``". ``FPRegs`` -defines a group of 32 single-precision floating-point registers (``F0`` to -``F31``); ``DFPRegs`` defines a group of 16 double-precision registers -(``D0-D15``). - -.. code-block:: text - - // F0, F1, F2, ..., F31 - def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; - - def DFPRegs : RegisterClass<"SP", [f64], 64, - (add D0, D1, D2, D3, D4, D5, D6, D7, D8, - D9, D10, D11, D12, D13, D14, D15)>; - - def IntRegs : RegisterClass<"SP", [i32], 32, - (add L0, L1, L2, L3, L4, L5, L6, L7, - I0, I1, I2, I3, I4, I5, - O0, O1, O2, O3, O4, O5, O7, - G1, - // Non-allocatable regs: - G2, G3, G4, - O6, // stack ptr - I6, // frame ptr - I7, // return address - G0, // constant zero - G5, G6, G7 // reserved for kernel - )>; - -Using ``SparcRegisterInfo.td`` with TableGen generates several output files +In `SparcRegisterInfo.td`, three `RegisterClass` objects are defined: +`FPRegs`, `DFPRegs`, and `IntRegs`. For all three register classes, the +first argument defines the namespace with the string "`SP`". `FPRegs` +defines a group of 32 single-precision floating-point registers (`F0` to +`F31`); `DFPRegs` defines a group of 16 double-precision registers +(`D0-D15`). + +```text +// F0, F1, F2, ..., F31 +def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>; + +def DFPRegs : RegisterClass<"SP", [f64], 64, + (add D0, D1, D2, D3, D4, D5, D6, D7, D8, + D9, D10, D11, D12, D13, D14, D15)>; + +def IntRegs : RegisterClass<"SP", [i32], 32, + (add L0, L1, L2, L3, L4, L5, L6, L7, + I0, I1, I2, I3, I4, I5, + O0, O1, O2, O3, O4, O5, O7, + G1, + // Non-allocatable regs: + G2, G3, G4, + O6, // stack ptr + I6, // frame ptr + I7, // return address + G0, // constant zero + G5, G6, G7 // reserved for kernel + )>; +``` + +Using `SparcRegisterInfo.td` with TableGen generates several output files that are intended for inclusion in other source code that you write. -``SparcRegisterInfo.td`` generates ``SparcGenRegisterInfo.h.inc``, which should +`SparcRegisterInfo.td` generates `SparcGenRegisterInfo.h.inc`, which should be included in the header file for the implementation of the SPARC register -implementation that you write (``SparcRegisterInfo.h``). In -``SparcGenRegisterInfo.h.inc`` a new structure is defined called -``SparcGenRegisterInfo`` that uses ``TargetRegisterInfo`` as its base. It also -specifies types, based upon the defined register classes: ``DFPRegsClass``, -``FPRegsClass``, and ``IntRegsClass``. - -``SparcRegisterInfo.td`` also generates ``SparcGenRegisterInfo.inc``, which is -included at the bottom of ``SparcRegisterInfo.cpp``, the SPARC register +implementation that you write (`SparcRegisterInfo.h`). In +`SparcGenRegisterInfo.h.inc` a new structure is defined called +`SparcGenRegisterInfo` that uses `TargetRegisterInfo` as its base. It also +specifies types, based upon the defined register classes: `DFPRegsClass`, +`FPRegsClass`, and `IntRegsClass`. + +`SparcRegisterInfo.td` also generates `SparcGenRegisterInfo.inc`, which is +included at the bottom of `SparcRegisterInfo.cpp`, the SPARC register implementation. The code below shows only the generated integer registers and -associated register classes. The order of registers in ``IntRegs`` reflects -the order in the definition of ``IntRegs`` in the target description file. - -.. code-block:: c++ - - // IntRegs Register Class... - static const unsigned IntRegs[] = { - SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5, - SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3, - SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3, - SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3, - SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5, - SP::G6, SP::G7, +associated register classes. The order of registers in `IntRegs` reflects +the order in the definition of `IntRegs` in the target description file. + +```c++ +// IntRegs Register Class... +static const unsigned IntRegs[] = { + SP::L0, SP::L1, SP::L2, SP::L3, SP::L4, SP::L5, + SP::L6, SP::L7, SP::I0, SP::I1, SP::I2, SP::I3, + SP::I4, SP::I5, SP::O0, SP::O1, SP::O2, SP::O3, + SP::O4, SP::O5, SP::O7, SP::G1, SP::G2, SP::G3, + SP::G4, SP::O6, SP::I6, SP::I7, SP::G0, SP::G5, + SP::G6, SP::G7, +}; + +// IntRegsVTs Register Class Value Types... +static const MVT::ValueType IntRegsVTs[] = { + MVT::i32, MVT::Other +}; + +namespace SP { // Register class instances + DFPRegsClass DFPRegsRegClass; + FPRegsClass FPRegsRegClass; + IntRegsClass IntRegsRegClass; +... + // IntRegs Sub-register Classes... + static const TargetRegisterClass* const IntRegsSubRegClasses [] = { + NULL }; - - // IntRegsVTs Register Class Value Types... - static const MVT::ValueType IntRegsVTs[] = { - MVT::i32, MVT::Other +... + // IntRegs Super-register Classes.. + static const TargetRegisterClass* const IntRegsSuperRegClasses [] = { + NULL + }; +... + // IntRegs Register Class sub-classes... + static const TargetRegisterClass* const IntRegsSubclasses [] = { + NULL + }; +... + // IntRegs Register Class super-classes... + static const TargetRegisterClass* const IntRegsSuperclasses [] = { + NULL }; - namespace SP { // Register class instances - DFPRegsClass DFPRegsRegClass; - FPRegsClass FPRegsRegClass; - IntRegsClass IntRegsRegClass; - ... - // IntRegs Sub-register Classes... - static const TargetRegisterClass* const IntRegsSubRegClasses [] = { - NULL - }; - ... - // IntRegs Super-register Classes.. - static const TargetRegisterClass* const IntRegsSuperRegClasses [] = { - NULL - }; - ... - // IntRegs Register Class sub-classes... - static const TargetRegisterClass* const IntRegsSubclasses [] = { - NULL - }; - ... - // IntRegs Register Class super-classes... - static const TargetRegisterClass* const IntRegsSuperclasses [] = { - NULL - }; - - IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, - IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, - IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {} - } + IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, + IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, + IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {} +} +``` The register allocators will avoid using reserved registers, and callee saved registers are not used until all the volatile registers have been used. That is usually good enough, but in some cases it may be necessary to provide custom allocation orders. -Implement a subclass of ``TargetRegisterInfo`` ----------------------------------------------- +### Implement a subclass of `TargetRegisterInfo` -The final step is to hand code portions of ``XXXRegisterInfo``, which -implements the interface described in ``TargetRegisterInfo.h`` (see -:ref:`TargetRegisterInfo`). These functions return ``0``, ``NULL``, or -``false``, unless overridden. Here is a list of functions that are overridden -for the SPARC implementation in ``SparcRegisterInfo.cpp``: +The final step is to hand code portions of `XXXRegisterInfo`, which +implements the interface described in `TargetRegisterInfo.h` (see +{ref}`TargetRegisterInfo`). These functions return `0`, `NULL`, or +`false`, unless overridden. Here is a list of functions that are overridden +for the SPARC implementation in `SparcRegisterInfo.cpp`: -* ``getCalleeSavedRegs`` --- Returns a list of callee-saved registers in the +* `getCalleeSavedRegs` --- Returns a list of callee-saved registers in the order of the desired callee-save stack frame offset. -* ``getReservedRegs`` --- Returns a bitset indexed by physical register +* `getReservedRegs` --- Returns a bitset indexed by physical register numbers, indicating if a particular register is unavailable. -* ``hasFP`` --- Return a Boolean indicating if a function should have a +* `hasFP` --- Return a Boolean indicating if a function should have a dedicated frame pointer register. -* ``eliminateCallFramePseudoInstr`` --- If call frame setup or destroy pseudo +* `eliminateCallFramePseudoInstr` --- If call frame setup or destroy pseudo instructions are used, this can be called to eliminate them. -* ``eliminateFrameIndex`` --- Eliminate abstract frame indices from +* `eliminateFrameIndex` --- Eliminate abstract frame indices from instructions that may use them. -* ``emitPrologue`` --- Insert prologue code into the function. +* `emitPrologue` --- Insert prologue code into the function. -* ``emitEpilogue`` --- Insert epilogue code into the function. +* `emitEpilogue` --- Insert epilogue code into the function. -.. _instruction-set: +(instruction-set)= -Instruction Set -=============== +## Instruction Set During the early stages of code generation, the LLVM IR code is converted to a -``SelectionDAG`` with nodes that are instances of the ``SDNode`` class -containing target instructions. An ``SDNode`` has an opcode, operands, type +`SelectionDAG` with nodes that are instances of the `SDNode` class +containing target instructions. An `SDNode` has an opcode, operands, type requirements, and operation properties. For example, is an operation commutative, does an operation load from memory. The various operation node -types are described in the ``include/llvm/CodeGen/SelectionDAGNodes.h`` file -(values of the ``NodeType`` enum in the ``ISD`` namespace). +types are described in the `include/llvm/CodeGen/SelectionDAGNodes.h` file +(values of the `NodeType` enum in the `ISD` namespace). -TableGen uses the following target description (``.td``) input files to +TableGen uses the following target description (`.td`) input files to generate much of the code for instruction definition: -* ``Target.td`` --- Where the ``Instruction``, ``Operand``, ``InstrInfo``, and +* `Target.td` --- Where the `Instruction`, `Operand`, `InstrInfo`, and other fundamental classes are defined. -* ``TargetSelectionDAG.td`` --- Used by ``SelectionDAG`` instruction selection - generators, contains ``SDTC*`` classes (selection DAG type constraint), - definitions of ``SelectionDAG`` nodes (such as ``imm``, ``cond``, ``bb``, - ``add``, ``fadd``, ``sub``), and pattern support (``Pattern``, ``Pat``, - ``PatFrag``, ``PatLeaf``, ``ComplexPattern``. +* `TargetSelectionDAG.td` --- Used by `SelectionDAG` instruction selection + generators, contains `SDTC*` classes (selection DAG type constraint), + definitions of `SelectionDAG` nodes (such as `imm`, `cond`, `bb`, + `add`, `fadd`, `sub`), and pattern support (`Pattern`, `Pat`, + `PatFrag`, `PatLeaf`, `ComplexPattern`. -* ``XXXInstrFormats.td`` --- Patterns for definitions of target-specific +* `XXXInstrFormats.td` --- Patterns for definitions of target-specific instructions. -* ``XXXInstrInfo.td`` --- Target-specific definitions of instruction templates, +* `XXXInstrInfo.td` --- Target-specific definitions of instruction templates, condition codes, and instructions of an instruction set. For architecture modifications, a different file name may be used. For example, for Pentium - with SSE instruction, this file is ``X86InstrSSE.td``, and for Pentium with - MMX, this file is ``X86InstrMMX.td``. + with SSE instruction, this file is `X86InstrSSE.td`, and for Pentium with + MMX, this file is `X86InstrMMX.td`. -There is also a target-specific ``XXX.td`` file, where ``XXX`` is the name of -the target. The ``XXX.td`` file includes the other ``.td`` input files, but +There is also a target-specific `XXX.td` file, where `XXX` is the name of +the target. The `XXX.td` file includes the other `.td` input files, but its contents are only directly important for subtargets. -You should describe a concrete target-specific class ``XXXInstrInfo`` that +You should describe a concrete target-specific class `XXXInstrInfo` that represents machine instructions supported by a target machine. -``XXXInstrInfo`` contains an array of ``XXXInstrDescriptor`` objects, each of +`XXXInstrInfo` contains an array of `XXXInstrDescriptor` objects, each of which describes one instruction. An instruction descriptor defines: * Opcode mnemonic @@ -699,25 +691,25 @@ which describes one instruction. An instruction descriptor defines: * Target-independent properties (such as memory access, is commutable) * Target-specific flags -The Instruction class (defined in ``Target.td``) is mostly used as a base for +The Instruction class (defined in `Target.td`) is mostly used as a base for more complex instruction classes. -.. code-block:: text - - class Instruction { - string Namespace = ""; - dag OutOperandList; // A dag containing the MI def operand list. - dag InOperandList; // A dag containing the MI use operand list. - string AsmString = ""; // The .s format to print the instruction with. - list Pattern; // Set to the DAG pattern for this instruction. - list Uses = []; - list Defs = []; - list Predicates = []; // predicates turned into isel match code - ... remainder not shown for space ... - } - -A ``SelectionDAG`` node (``SDNode``) should contain an object representing a -target-specific instruction that is defined in ``XXXInstrInfo.td``. The +```text +class Instruction { + string Namespace = ""; + dag OutOperandList; // A dag containing the MI def operand list. + dag InOperandList; // A dag containing the MI use operand list. + string AsmString = ""; // The .s format to print the instruction with. + list Pattern; // Set to the DAG pattern for this instruction. + list Uses = []; + list Defs = []; + list Predicates = []; // predicates turned into isel match code + ... remainder not shown for space ... +} +``` + +A `SelectionDAG` node (`SDNode`) should contain an object representing a +target-specific instruction that is defined in `XXXInstrInfo.td`. The instruction objects should represent instructions from the architecture manual of the target machine (such as the SPARC Architecture Manual for the SPARC target). @@ -725,8 +717,8 @@ target). A single instruction from the architecture manual is often modeled as multiple target instructions, depending upon its operands. For example, a manual might describe an add instruction that takes a register or an immediate operand. An -LLVM target could model this with two instructions named ``ADDri`` and -``ADDrr``. +LLVM target could model this with two instructions named `ADDri` and +`ADDrr`. You should define a class for each instruction category and define each opcode as a subclass of the category with appropriate parameters such as the fixed @@ -736,50 +728,50 @@ Also you should specify how the instruction should be printed when the automatic assembly printer is used. As is described in the SPARC Architecture Manual, Version 8, there are three -major 32-bit formats for instructions. Format 1 is only for the ``CALL`` -instruction. Format 2 is for branch on condition codes and ``SETHI`` (set high +major 32-bit formats for instructions. Format 1 is only for the `CALL` +instruction. Format 2 is for branch on condition codes and `SETHI` (set high bits of a register) instructions. Format 3 is for other instructions. -Each of these formats has corresponding classes in ``SparcInstrFormat.td``. -``InstSP`` is a base class for other instruction classes. Additional base +Each of these formats has corresponding classes in `SparcInstrFormat.td`. +`InstSP` is a base class for other instruction classes. Additional base classes are specified for more precise formats: for example in -``SparcInstrFormat.td``, ``F2_1`` is for ``SETHI``, and ``F2_2`` is for -branches. There are three other base classes: ``F3_1`` for register/register -operations, ``F3_2`` for register/immediate operations, and ``F3_3`` for -floating-point operations. ``SparcInstrInfo.td`` also adds the base class -``Pseudo`` for synthetic SPARC instructions. - -``SparcInstrInfo.td`` largely consists of operand and instruction definitions -for the SPARC target. In ``SparcInstrInfo.td``, the following target -description file entry, ``LDrr``, defines the Load Integer instruction for a -Word (the ``LD`` SPARC opcode) from a memory address to a register. The first -parameter, the value 3 (``11``\ :sub:`2`), is the operation value for this -category of operation. The second parameter (``000000``\ :sub:`2`) is the -specific operation value for ``LD``/Load Word. The third parameter is the -output destination, which is a register operand and defined in the ``Register`` -target description file (``IntRegs``). - -.. code-block:: text - - def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr), - "ld [$addr], $dst", - [(set i32:$dst, (load ADDRrr:$addr))]>; +`SparcInstrFormat.td`, `F2_1` is for `SETHI`, and `F2_2` is for +branches. There are three other base classes: `F3_1` for register/register +operations, `F3_2` for register/immediate operations, and `F3_3` for +floating-point operations. `SparcInstrInfo.td` also adds the base class +`Pseudo` for synthetic SPARC instructions. + +`SparcInstrInfo.td` largely consists of operand and instruction definitions +for the SPARC target. In `SparcInstrInfo.td`, the following target +description file entry, `LDrr`, defines the Load Integer instruction for a +Word (the `LD` SPARC opcode) from a memory address to a register. The first +parameter, the value 3 (`11`{sub}`2`), is the operation value for this +category of operation. The second parameter (`000000`{sub}`2`) is the +specific operation value for `LD`/Load Word. The third parameter is the +output destination, which is a register operand and defined in the `Register` +target description file (`IntRegs`). + +```text +def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr), + "ld [$addr], $dst", + [(set i32:$dst, (load ADDRrr:$addr))]>; +``` The fourth parameter is the input source, which uses the address operand -``MEMrr`` that is defined earlier in ``SparcInstrInfo.td``: +`MEMrr` that is defined earlier in `SparcInstrInfo.td`: -.. code-block:: text - - def MEMrr : Operand { - let PrintMethod = "printMemOperand"; - let MIOperandInfo = (ops IntRegs, IntRegs); - } +```text +def MEMrr : Operand { + let PrintMethod = "printMemOperand"; + let MIOperandInfo = (ops IntRegs, IntRegs); +} +``` The fifth parameter is a string that is used by the assembly printer and can be left as an empty string until the assembly printer interface is implemented. The sixth and final parameter is the pattern used to match the instruction -during the SelectionDAG Select Phase described in :doc:`CodeGenerator`. -This parameter is detailed in the next section, :ref:`instruction-selector`. +during the SelectionDAG Select Phase described in {doc}`CodeGenerator`. +This parameter is detailed in the next section, {ref}`instruction-selector`. Instruction class definitions are not overloaded for different operand types, so separate versions of instructions are needed for register, memory, or @@ -787,169 +779,167 @@ immediate value operands. For example, to perform a Load Integer instruction for a Word from an immediate operand to a register, the following instruction class is defined: -.. code-block:: text - - def LDri : F3_2 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMri $rs1, $simm13):$addr), - "ld [$addr], $dst", - [(set i32:$rd, (load ADDRri:$addr))]>; +```text +def LDri : F3_2 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMri $rs1, $simm13):$addr), + "ld [$addr], $dst", + [(set i32:$rd, (load ADDRri:$addr))]>; +``` Writing these definitions for so many similar instructions can involve a lot of -cut and paste. In ``.td`` files, the ``multiclass`` directive enables the +cut and paste. In `.td` files, the `multiclass` directive enables the creation of templates to define several instruction classes at once (using the -``defm`` directive). For example in ``SparcInstrInfo.td``, the ``multiclass`` -pattern ``F3_12`` is defined to create 2 instruction classes each time -``F3_12`` is invoked: - -.. code-block:: text - - multiclass F3_12 Op3Val, SDNode OpNode> { - def rr : F3_1 <2, Op3Val, - (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs1), - !strconcat(OpcStr, " $rs1, $rs2, $rd"), - [(set i32:$rd, (OpNode i32:$rs1, i32:$rs2))]>; - def ri : F3_2 <2, Op3Val, - (outs IntRegs:$rd), (ins IntRegs:$rs1, i32imm:$simm13), - !strconcat(OpcStr, " $rs1, $simm13, $rd"), - [(set i32:$rd, (OpNode i32:$rs1, simm13:$simm13))]>; - } - -So when the ``defm`` directive is used for the ``XOR`` and ``ADD`` -instructions, as seen below, it creates four instruction objects: ``XORrr``, -``XORri``, ``ADDrr``, and ``ADDri``. - -.. code-block:: text - - defm XOR : F3_12<"xor", 0b000011, xor>; - defm ADD : F3_12<"add", 0b000000, add>; - -``SparcInstrInfo.td`` also includes definitions for condition codes that are +`defm` directive). For example in `SparcInstrInfo.td`, the `multiclass` +pattern `F3_12` is defined to create 2 instruction classes each time +`F3_12` is invoked: + +```text +multiclass F3_12 Op3Val, SDNode OpNode> { + def rr : F3_1 <2, Op3Val, + (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs1), + !strconcat(OpcStr, " $rs1, $rs2, $rd"), + [(set i32:$rd, (OpNode i32:$rs1, i32:$rs2))]>; + def ri : F3_2 <2, Op3Val, + (outs IntRegs:$rd), (ins IntRegs:$rs1, i32imm:$simm13), + !strconcat(OpcStr, " $rs1, $simm13, $rd"), + [(set i32:$rd, (OpNode i32:$rs1, simm13:$simm13))]>; +} +``` + +So when the `defm` directive is used for the `XOR` and `ADD` +instructions, as seen below, it creates four instruction objects: `XORrr`, +`XORri`, `ADDrr`, and `ADDri`. + +```text +defm XOR : F3_12<"xor", 0b000011, xor>; +defm ADD : F3_12<"add", 0b000000, add>; +``` + +`SparcInstrInfo.td` also includes definitions for condition codes that are referenced by branch instructions. The following definitions in -``SparcInstrInfo.td`` indicate the bit location of the SPARC condition code. -For example, the 10\ :sup:`th` bit represents the "greater than" condition for -integers, and the 22\ :sup:`nd` bit represents the "greater than" condition for +`SparcInstrInfo.td` indicate the bit location of the SPARC condition code. +For example, the 10{sup}`th` bit represents the "greater than" condition for +integers, and the 22{sup}`nd` bit represents the "greater than" condition for floats. -.. code-block:: text - - def ICC_NE : ICC_VAL< 9>; // Not Equal - def ICC_E : ICC_VAL< 1>; // Equal - def ICC_G : ICC_VAL<10>; // Greater - ... - def FCC_U : FCC_VAL<23>; // Unordered - def FCC_G : FCC_VAL<22>; // Greater - def FCC_UG : FCC_VAL<21>; // Unordered or Greater - ... +```text +def ICC_NE : ICC_VAL< 9>; // Not Equal +def ICC_E : ICC_VAL< 1>; // Equal +def ICC_G : ICC_VAL<10>; // Greater +... +def FCC_U : FCC_VAL<23>; // Unordered +def FCC_G : FCC_VAL<22>; // Greater +def FCC_UG : FCC_VAL<21>; // Unordered or Greater +... +``` -(Note that ``Sparc.h`` also defines enums that correspond to the same SPARC -condition codes. Care must be taken to ensure the values in ``Sparc.h`` -correspond to the values in ``SparcInstrInfo.td``. I.e., ``SPCC::ICC_NE = 9``, -``SPCC::FCC_U = 23`` and so on.) +(Note that `Sparc.h` also defines enums that correspond to the same SPARC +condition codes. Care must be taken to ensure the values in `Sparc.h` +correspond to the values in `SparcInstrInfo.td`. I.e., `SPCC::ICC_NE = 9`, +`SPCC::FCC_U = 23` and so on.) -Instruction Operand Mapping ---------------------------- +### Instruction Operand Mapping The code generator backend maps instruction operands to fields in the -instruction. Whenever a bit in the instruction encoding ``Inst`` is assigned -to field without a concrete value, an operand from the ``outs`` or ``ins`` list +instruction. Whenever a bit in the instruction encoding `Inst` is assigned +to field without a concrete value, an operand from the `outs` or `ins` list is expected to have a matching name. This operand then populates that undefined -field. For example, the Sparc target defines the ``XNORrr`` instruction as a -``F3_1`` format instruction having three operands: the output ``$rd``, and the -inputs ``$rs1``, and ``$rs2``. - -.. code-block:: text - - def XNORrr : F3_1<2, 0b000111, - (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2), - "xnor $rs1, $rs2, $rd", - [(set i32:$rd, (not (xor i32:$rs1, i32:$rs2)))]>; - -The instruction templates in ``SparcInstrFormats.td`` show the base class for -``F3_1`` is ``InstSP``. - -.. code-block:: text - - class InstSP pattern> : Instruction { - field bits<32> Inst; - let Namespace = "SP"; - bits<2> op; - let Inst{31-30} = op; - dag OutOperandList = outs; - dag InOperandList = ins; - let AsmString = asmstr; - let Pattern = pattern; - } - -``InstSP`` defines the ``op`` field, and uses it to define bits 30 and 31 of the +field. For example, the Sparc target defines the `XNORrr` instruction as a +`F3_1` format instruction having three operands: the output `$rd`, and the +inputs `$rs1`, and `$rs2`. + +```text +def XNORrr : F3_1<2, 0b000111, + (outs IntRegs:$rd), (ins IntRegs:$rs1, IntRegs:$rs2), + "xnor $rs1, $rs2, $rd", + [(set i32:$rd, (not (xor i32:$rs1, i32:$rs2)))]>; +``` + +The instruction templates in `SparcInstrFormats.td` show the base class for +`F3_1` is `InstSP`. + +```text +class InstSP pattern> : Instruction { + field bits<32> Inst; + let Namespace = "SP"; + bits<2> op; + let Inst{31-30} = op; + dag OutOperandList = outs; + dag InOperandList = ins; + let AsmString = asmstr; + let Pattern = pattern; +} +``` + +`InstSP` defines the `op` field, and uses it to define bits 30 and 31 of the instruction, but does not assign a value to it. -.. code-block:: text - - class F3 pattern> - : InstSP { - bits<5> rd; - bits<6> op3; - bits<5> rs1; - let op{1} = 1; // Op = 2 or 3 - let Inst{29-25} = rd; - let Inst{24-19} = op3; - let Inst{18-14} = rs1; - } - -``F3`` defines the ``rd``, ``op3``, and ``rs1`` fields, and uses them in the +```text +class F3 pattern> + : InstSP { + bits<5> rd; + bits<6> op3; + bits<5> rs1; + let op{1} = 1; // Op = 2 or 3 + let Inst{29-25} = rd; + let Inst{24-19} = op3; + let Inst{18-14} = rs1; +} +``` + +`F3` defines the `rd`, `op3`, and `rs1` fields, and uses them in the instruction, and again does not assign values. -.. code-block:: text - - class F3_1 opVal, bits<6> op3val, dag outs, dag ins, - string asmstr, list pattern> : F3 { - bits<8> asi = 0; // asi not currently used - bits<5> rs2; - let op = opVal; - let op3 = op3val; - let Inst{13} = 0; // i field = 0 - let Inst{12-5} = asi; // address space identifier - let Inst{4-0} = rs2; - } - -``F3_1`` assigns a value to ``op`` and ``op3`` fields, and defines the ``rs2`` -field. Therefore, a ``F3_1`` format instruction will require a definition for -``rd``, ``rs1``, and ``rs2`` in order to fully specify the instruction encoding. - -The ``XNORrr`` instruction then provides those three operands in its +```text +class F3_1 opVal, bits<6> op3val, dag outs, dag ins, + string asmstr, list pattern> : F3 { + bits<8> asi = 0; // asi not currently used + bits<5> rs2; + let op = opVal; + let op3 = op3val; + let Inst{13} = 0; // i field = 0 + let Inst{12-5} = asi; // address space identifier + let Inst{4-0} = rs2; +} +``` + +`F3_1` assigns a value to `op` and `op3` fields, and defines the `rs2` +field. Therefore, a `F3_1` format instruction will require a definition for +`rd`, `rs1`, and `rs2` in order to fully specify the instruction encoding. + +The `XNORrr` instruction then provides those three operands in its OutOperandList and InOperandList, which bind to the corresponding fields, and thus complete the instruction encoding. For some instructions, a single operand may contain sub-operands. As shown -earlier, the instruction ``LDrr`` uses an input operand of type ``MEMrr``. This +earlier, the instruction `LDrr` uses an input operand of type `MEMrr`. This operand type contains two register sub-operands, defined by the -``MIOperandInfo`` value to be ``(ops IntRegs, IntRegs)``. +`MIOperandInfo` value to be `(ops IntRegs, IntRegs)`. -.. code-block:: text +```text +def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr), + "ld [$addr], $dst", + [(set i32:$dst, (load ADDRrr:$addr))]>; +``` - def LDrr : F3_1 <3, 0b000000, (outs IntRegs:$rd), (ins (MEMrr $rs1, $rs2):$addr), - "ld [$addr], $dst", - [(set i32:$dst, (load ADDRrr:$addr))]>; - -As this instruction is also the ``F3_1`` format, it will expect operands named -``rd``, ``rs1``, and ``rs2`` as well. In order to allow this, a complex operand +As this instruction is also the `F3_1` format, it will expect operands named +`rd`, `rs1`, and `rs2` as well. In order to allow this, a complex operand can optionally give names to each of its sub-operands. In this example -``MEMrr``'s first sub-operand is named ``$rs1``, the second ``$rs2``, and the -operand as a whole is also given the name ``$addr``. +`MEMrr`'s first sub-operand is named `$rs1`, the second `$rs2`, and the +operand as a whole is also given the name `$addr`. When a particular instruction doesn't use all the operands that the instruction format defines, a constant value may instead be bound to one or all. For -example, the ``RDASR`` instruction only takes a single register operand, so we -assign a constant zero to ``rs2``: - -.. code-block:: text +example, the `RDASR` instruction only takes a single register operand, so we +assign a constant zero to `rs2`: - let rs2 = 0 in - def RDASR : F3_1<2, 0b101000, - (outs IntRegs:$rd), (ins ASRRegs:$rs1), - "rd $rs1, $rd", []>; +```text +let rs2 = 0 in + def RDASR : F3_1<2, 0b101000, + (outs IntRegs:$rd), (ins ASRRegs:$rs1), + "rd $rs1, $rd", []>; +``` -Instruction Operand Name Mapping -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Instruction Operand Name Mapping TableGen will also generate a function called getNamedOperandIdx() which can be used to look up an operand's index in a MachineInstr based on its @@ -958,14 +948,14 @@ TableGen definition will add all of its operands to an enumeration llvm::XXX:OpName and also add an entry for it into the OperandMap table, which can be queried using getNamedOperandIdx() -.. code-block:: text +```text +int DstIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::dst); // => 0 +int BIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::b); // => 1 +int CIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::c); // => 2 +int DIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::d); // => -1 - int DstIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::dst); // => 0 - int BIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::b); // => 1 - int CIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::c); // => 2 - int DIndex = SP::getNamedOperandIdx(SP::XNORrr, SP::OpName::d); // => -1 - - ... +... +``` The entries in the OpName enum are taken verbatim from the TableGen definitions, so operands with lowercase names will have lower case entries in the enum. @@ -976,67 +966,65 @@ For example: XXXInstrInfo.cpp: -.. code-block:: c++ - - // For getNamedOperandIdx() function definition. - #define GET_INSTRINFO_NAMED_OPS - #include "XXXGenInstrInfo.inc" +```c++ +// For getNamedOperandIdx() function definition. +#define GET_INSTRINFO_NAMED_OPS +#include "XXXGenInstrInfo.inc" +``` XXXInstrInfo.h: -.. code-block:: c++ - - // For OpName enum and getNamedOperandIdx declaration. - #define GET_INSTRINFO_OPERAND_ENUM - #include "XXXGenInstrInfo.inc" +```c++ +// For OpName enum and getNamedOperandIdx declaration. +#define GET_INSTRINFO_OPERAND_ENUM +#include "XXXGenInstrInfo.inc" +``` -Instruction Operand Types -^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Instruction Operand Types TableGen will also generate an enumeration consisting of all named Operand types defined in the backend, in the llvm::XXX::OpTypes namespace. Some common immediate Operand types (for instance i8, i32, i64, f32, f64) -are defined for all targets in ``include/llvm/Target/Target.td``, and are +are defined for all targets in `include/llvm/Target/Target.td`, and are available in each Target's OpTypes enum. Also, only named Operand types appear in the enumeration: anonymous types are ignored. -For example, the X86 backend defines ``brtarget`` and ``brtarget8``, both -instances of the TableGen ``Operand`` class, which represent branch target +For example, the X86 backend defines `brtarget` and `brtarget8`, both +instances of the TableGen `Operand` class, which represent branch target operands: -.. code-block:: text - - def brtarget : Operand; - def brtarget8 : Operand; +```text +def brtarget : Operand; +def brtarget8 : Operand; +``` This results in: -.. code-block:: c++ - - namespace X86 { - namespace OpTypes { - enum OperandType { - ... - brtarget, - brtarget8, - ... - i32imm, - i64imm, - ... - OPERAND_TYPE_LIST_END - } // End namespace OpTypes - } // End namespace X86 +```c++ +namespace X86 { +namespace OpTypes { +enum OperandType { + ... + brtarget, + brtarget8, + ... + i32imm, + i64imm, + ... + OPERAND_TYPE_LIST_END +} // End namespace OpTypes +} // End namespace X86 +``` In typical TableGen fashion, to use the enum, you will need to define a preprocessor macro: -.. code-block:: c++ - - #define GET_INSTRINFO_OPERAND_TYPES_ENUM // For OpTypes enum - #include "XXXGenInstrInfo.inc" +```c++ +#define GET_INSTRINFO_OPERAND_TYPES_ENUM // For OpTypes enum +#include "XXXGenInstrInfo.inc" +``` -Instruction Scheduling ----------------------- +### Instruction Scheduling Instruction itineraries can be queried using MCDesc::getSchedClass(). The value can be named by an enumeration in llvm::XXX::Sched namespace generated @@ -1044,406 +1032,398 @@ by TableGen in XXXGenInstrInfo.inc. The name of the schedule classes are the same as provided in XXXSchedule.td plus a default NoItinerary class. The schedule models are generated by TableGen by the SubtargetEmitter, -using the ``CodeGenSchedModels`` class. This is distinct from the itinerary -method of specifying machine resource use. The tool ``utils/schedcover.py`` +using the `CodeGenSchedModels` class. This is distinct from the itinerary +method of specifying machine resource use. The tool `utils/schedcover.py` can be used to determine which instructions have been covered by the schedule model description and which haven't. The first step is to use the -instructions below to create an output file. Then run ``schedcover.py`` on the +instructions below to create an output file. Then run `schedcover.py` on the output file: -.. code-block:: shell - - $ /utils/schedcover.py /lib/Target/AArch64/tblGenSubtarget.with - instruction, default, CortexA53Model, CortexA57Model, CycloneModel, ExynosM3Model, FalkorModel, KryoModel, ThunderX2T99Model, ThunderXT8XModel - ABSv16i8, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_2VXVY_2cyc, KryoWrite_2cyc_XY_XY_150ln, , - ABSv1i64, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_1VXVY_2cyc, KryoWrite_2cyc_XY_noRSV_67ln, , - ... +```console +$ /utils/schedcover.py /lib/Target/AArch64/tblGenSubtarget.with +instruction, default, CortexA53Model, CortexA57Model, CycloneModel, ExynosM3Model, FalkorModel, KryoModel, ThunderX2T99Model, ThunderXT8XModel +ABSv16i8, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_2VXVY_2cyc, KryoWrite_2cyc_XY_XY_150ln, , +ABSv1i64, WriteV, , , CyWriteV3, M3WriteNMISC1, FalkorWr_1VXVY_2cyc, KryoWrite_2cyc_XY_noRSV_67ln, , +... +``` To capture the debug output from generating a schedule model, change to the appropriate target directory and use the following command: -command with the ``subtarget-emitter`` debug option: - -.. code-block:: shell - - $ /bin/llvm-tblgen -debug-only=subtarget-emitter -gen-subtarget \ - -I /lib/Target/ -I /include \ - -I /lib/Target /lib/Target//.td \ - -o /lib/Target//GenSubtargetInfo.inc.tmp \ - > tblGenSubtarget.dbg 2>&1 - -Where ```` is the build directory, ``src`` is the source directory, -and ```` is the name of the target. +command with the `subtarget-emitter` debug option: + +```console +$ /bin/llvm-tblgen -debug-only=subtarget-emitter -gen-subtarget \ + -I /lib/Target/ -I /include \ + -I /lib/Target /lib/Target//.td \ + -o /lib/Target//GenSubtargetInfo.inc.tmp \ + > tblGenSubtarget.dbg 2>&1 +``` + +Where `` is the build directory, `src` is the source directory, +and `` is the name of the target. To double check that the above command is what is needed, one can capture the exact TableGen command from a build by using: -.. code-block:: shell +```console +$ VERBOSE=1 make ... +``` - $ VERBOSE=1 make ... +and search for `llvm-tblgen` commands in the output. -and search for ``llvm-tblgen`` commands in the output. - -Instruction Relation Mapping ----------------------------- +### Instruction Relation Mapping This TableGen feature is used to relate instructions with each other. It is particularly useful when you have multiple instruction formats and need to switch between them after instruction selection. This entire feature is driven -by relation models which can be defined in ``XXXInstrInfo.td`` files +by relation models which can be defined in `XXXInstrInfo.td` files according to the target-specific instruction set. Relation models are defined -using ``InstrMapping`` class as a base. TableGen parses all the models +using `InstrMapping` class as a base. TableGen parses all the models and generates instruction relation maps using the specified information. -Relation maps are emitted as tables in the ``XXXGenInstrInfo.inc`` file +Relation maps are emitted as tables in the `XXXGenInstrInfo.inc` file along with the functions to query them. For the detailed information on how to -use this feature, please refer to :doc:`HowToUseInstrMappings`. +use this feature, please refer to {doc}`HowToUseInstrMappings`. -Implement a subclass of ``TargetInstrInfo`` -------------------------------------------- +### Implement a subclass of `TargetInstrInfo` -The final step is to hand code portions of ``XXXInstrInfo``, which implements -the interface described in ``TargetInstrInfo.h`` (see :ref:`TargetInstrInfo`). -These functions return ``0`` or a Boolean or they assert, unless overridden. +The final step is to hand code portions of `XXXInstrInfo`, which implements +the interface described in `TargetInstrInfo.h` (see {ref}`TargetInstrInfo`). +These functions return `0` or a Boolean or they assert, unless overridden. Here's a list of functions that are overridden for the SPARC implementation in -``SparcInstrInfo.cpp``: +`SparcInstrInfo.cpp`: -* ``isLoadFromStackSlot`` --- If the specified machine instruction is a direct +* `isLoadFromStackSlot` --- If the specified machine instruction is a direct load from a stack slot, return the register number of the destination and the - ``FrameIndex`` of the stack slot. + `FrameIndex` of the stack slot. -* ``isStoreToStackSlot`` --- If the specified machine instruction is a direct +* `isStoreToStackSlot` --- If the specified machine instruction is a direct store to a stack slot, return the register number of the destination and the - ``FrameIndex`` of the stack slot. + `FrameIndex` of the stack slot. -* ``copyPhysReg`` --- Copy values between a pair of physical registers. +* `copyPhysReg` --- Copy values between a pair of physical registers. -* ``storeRegToStackSlot`` --- Store a register value to a stack slot. +* `storeRegToStackSlot` --- Store a register value to a stack slot. -* ``loadRegFromStackSlot`` --- Load a register value from a stack slot. +* `loadRegFromStackSlot` --- Load a register value from a stack slot. -* ``storeRegToAddr`` --- Store a register value to memory. +* `storeRegToAddr` --- Store a register value to memory. -* ``loadRegFromAddr`` --- Load a register value from memory. +* `loadRegFromAddr` --- Load a register value from memory. -* ``foldMemoryOperand`` --- Attempt to combine instructions of any load or +* `foldMemoryOperand` --- Attempt to combine instructions of any load or store instruction for the specified operand(s). -Branch Folding and If Conversion --------------------------------- +### Branch Folding and If Conversion Performance can be improved by combining instructions or by eliminating -instructions that are never reached. The ``analyzeBranch`` method in -``XXXInstrInfo`` may be implemented to examine conditional instructions and -remove unnecessary instructions. ``analyzeBranch`` looks at the end of a +instructions that are never reached. The `analyzeBranch` method in +`XXXInstrInfo` may be implemented to examine conditional instructions and +remove unnecessary instructions. `analyzeBranch` looks at the end of a machine basic block (MBB) for opportunities for improvement, such as branch -folding and if conversion. The ``BranchFolder`` and ``IfConverter`` machine -function passes (see the source files ``BranchFolding.cpp`` and -``IfConversion.cpp`` in the ``lib/CodeGen`` directory) call ``analyzeBranch`` +folding and if conversion. The `BranchFolder` and `IfConverter` machine +function passes (see the source files `BranchFolding.cpp` and +`IfConversion.cpp` in the `lib/CodeGen` directory) call `analyzeBranch` to improve the control flow graph that represents the instructions. -Several implementations of ``analyzeBranch`` (for ARM, Alpha, and X86) can be -examined as models for your own ``analyzeBranch`` implementation. Since SPARC -does not implement a useful ``analyzeBranch``, the ARM target implementation is +Several implementations of `analyzeBranch` (for ARM, Alpha, and X86) can be +examined as models for your own `analyzeBranch` implementation. Since SPARC +does not implement a useful `analyzeBranch`, the ARM target implementation is shown below. -``analyzeBranch`` returns a Boolean value and takes four parameters: +`analyzeBranch` returns a Boolean value and takes four parameters: -* ``MachineBasicBlock &MBB`` --- The incoming block to be examined. +* `MachineBasicBlock &MBB` --- The incoming block to be examined. -* ``MachineBasicBlock *&TBB`` --- A destination block that is returned. For a - conditional branch that evaluates to true, ``TBB`` is the destination. +* `MachineBasicBlock *&TBB` --- A destination block that is returned. For a + conditional branch that evaluates to true, `TBB` is the destination. -* ``MachineBasicBlock *&FBB`` --- For a conditional branch that evaluates to - false, ``FBB`` is returned as the destination. +* `MachineBasicBlock *&FBB` --- For a conditional branch that evaluates to + false, `FBB` is returned as the destination. -* ``std::vector &Cond`` --- List of operands to evaluate a +* `std::vector &Cond` --- List of operands to evaluate a condition for a conditional branch. In the simplest case, if a block ends without a branch, then it falls through -to the successor block. No destination blocks are specified for either ``TBB`` -or ``FBB``, so both parameters return ``NULL``. The start of the -``analyzeBranch`` (see code below for the ARM target) shows the function +to the successor block. No destination blocks are specified for either `TBB` +or `FBB`, so both parameters return `NULL`. The start of the +`analyzeBranch` (see code below for the ARM target) shows the function parameters and the code for the simplest case. -.. code-block:: c++ - - bool ARMInstrInfo::analyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - std::vector &Cond) const - { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) - return false; +```c++ +bool ARMInstrInfo::analyzeBranch(MachineBasicBlock &MBB, + MachineBasicBlock *&TBB, + MachineBasicBlock *&FBB, + std::vector &Cond) const +{ + MachineBasicBlock::iterator I = MBB.end(); + if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) + return false; +``` If a block ends with a single unconditional branch instruction, then -``analyzeBranch`` (shown below) should return the destination of that branch in -the ``TBB`` parameter. - -.. code-block:: c++ +`analyzeBranch` (shown below) should return the destination of that branch in +the `TBB` parameter. - if (LastOpc == ARM::B || LastOpc == ARM::tB) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } +```c++ + if (LastOpc == ARM::B || LastOpc == ARM::tB) { + TBB = LastInst->getOperand(0).getMBB(); + return false; + } +``` If a block ends with two unconditional branches, then the second branch is never reached. In that situation, as shown below, remove the last branch -instruction and return the penultimate branch in the ``TBB`` parameter. - -.. code-block:: c++ - - if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) && - (LastOpc == ARM::B || LastOpc == ARM::tB)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - I->eraseFromParent(); - return false; - } +instruction and return the penultimate branch in the `TBB` parameter. + +```c++ + if ((SecondLastOpc == ARM::B || SecondLastOpc == ARM::tB) && + (LastOpc == ARM::B || LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + I = LastInst; + I->eraseFromParent(); + return false; + } +``` A block may end with a single conditional branch instruction that falls through to successor block if the condition evaluates to false. In that case, -``analyzeBranch`` (shown below) should return the destination of that -conditional branch in the ``TBB`` parameter and a list of operands in the -``Cond`` parameter to evaluate the condition. - -.. code-block:: c++ - - if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; - } +`analyzeBranch` (shown below) should return the destination of that +conditional branch in the `TBB` parameter and a list of operands in the +`Cond` parameter to evaluate the condition. + +```c++ + if (LastOpc == ARM::Bcc || LastOpc == ARM::tBcc) { + // Block ends with fall-through condbranch. + TBB = LastInst->getOperand(0).getMBB(); + Cond.push_back(LastInst->getOperand(1)); + Cond.push_back(LastInst->getOperand(2)); + return false; + } +``` If a block ends with both a conditional branch and an ensuing unconditional -branch, then ``analyzeBranch`` (shown below) should return the conditional +branch, then `analyzeBranch` (shown below) should return the conditional branch destination (assuming it corresponds to a conditional evaluation of -"``true``") in the ``TBB`` parameter and the unconditional branch destination -in the ``FBB`` (corresponding to a conditional evaluation of "``false``"). A -list of operands to evaluate the condition should be returned in the ``Cond`` +"`true`") in the `TBB` parameter and the unconditional branch destination +in the `FBB` (corresponding to a conditional evaluation of "`false`"). A +list of operands to evaluate the condition should be returned in the `Cond` parameter. -.. code-block:: c++ - - unsigned SecondLastOpc = SecondLastInst->getOpcode(); +```c++ + unsigned SecondLastOpc = SecondLastInst->getOpcode(); - if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || - (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) || + (SecondLastOpc == ARM::tBcc && LastOpc == ARM::tB)) { + TBB = SecondLastInst->getOperand(0).getMBB(); + Cond.push_back(SecondLastInst->getOperand(1)); + Cond.push_back(SecondLastInst->getOperand(2)); + FBB = LastInst->getOperand(0).getMBB(); + return false; + } +``` For the last two cases (ending with a single conditional branch or ending with one conditional and one unconditional branch), the operands returned in the -``Cond`` parameter can be passed to methods of other instructions to create new -branches or perform other operations. An implementation of ``analyzeBranch`` -requires the helper methods ``removeBranch`` and ``insertBranch`` to manage +`Cond` parameter can be passed to methods of other instructions to create new +branches or perform other operations. An implementation of `analyzeBranch` +requires the helper methods `removeBranch` and `insertBranch` to manage subsequent operations. -``analyzeBranch`` should return false indicating success in most circumstances. -``analyzeBranch`` should only return true when the method is stumped about what +`analyzeBranch` should return false indicating success in most circumstances. +`analyzeBranch` should only return true when the method is stumped about what to do, for example, if a block has three terminating branches. -``analyzeBranch`` may return true if it encounters a terminator it cannot +`analyzeBranch` may return true if it encounters a terminator it cannot handle, such as an indirect branch. -.. _instruction-selector: +(instruction-selector)= -Instruction Selector -==================== +## Instruction Selector -LLVM uses a ``SelectionDAG`` to represent LLVM IR instructions, and nodes of -the ``SelectionDAG`` ideally represent native target instructions. During code +LLVM uses a `SelectionDAG` to represent LLVM IR instructions, and nodes of +the `SelectionDAG` ideally represent native target instructions. During code generation, instruction selection passes are performed to convert non-native DAG instructions into native target-specific instructions. The pass described -in ``XXXISelDAGToDAG.cpp`` is used to match patterns and perform DAG-to-DAG +in `XXXISelDAGToDAG.cpp` is used to match patterns and perform DAG-to-DAG instruction selection. Optionally, a pass may be defined (in -``XXXBranchSelector.cpp``) to perform similar DAG-to-DAG operations for branch -instructions. Later, the code in ``XXXISelLowering.cpp`` replaces or removes +`XXXBranchSelector.cpp`) to perform similar DAG-to-DAG operations for branch +instructions. Later, the code in `XXXISelLowering.cpp` replaces or removes operations and data types not supported natively (legalizes) in a -``SelectionDAG``. +`SelectionDAG`. TableGen generates code for instruction selection using the following target description input files: -* ``XXXInstrInfo.td`` --- Contains definitions of instructions in a - target-specific instruction set, generates ``XXXGenDAGISel.inc``, which is - included in ``XXXISelDAGToDAG.cpp``. +* `XXXInstrInfo.td` --- Contains definitions of instructions in a + target-specific instruction set, generates `XXXGenDAGISel.inc`, which is + included in `XXXISelDAGToDAG.cpp`. -* ``XXXCallingConv.td`` --- Contains the calling and return value conventions - for the target architecture, and it generates ``XXXGenCallingConv.inc``, - which is included in ``XXXISelLowering.cpp``. +* `XXXCallingConv.td` --- Contains the calling and return value conventions + for the target architecture, and it generates `XXXGenCallingConv.inc`, + which is included in `XXXISelLowering.cpp`. The implementation of an instruction selection pass must include a header that -declares the ``FunctionPass`` class or a subclass of ``FunctionPass``. In -``XXXTargetMachine.cpp``, a Pass Manager (PM) should add each instruction +declares the `FunctionPass` class or a subclass of `FunctionPass`. In +`XXXTargetMachine.cpp`, a Pass Manager (PM) should add each instruction selection pass into the queue of passes to run. -The LLVM static compiler (``llc``) is an excellent tool for visualizing the -contents of DAGs. To display the ``SelectionDAG`` before or after specific -processing phases, use the command line options for ``llc``, described at -:ref:`SelectionDAG-Process`. +The LLVM static compiler (`llc`) is an excellent tool for visualizing the +contents of DAGs. To display the `SelectionDAG` before or after specific +processing phases, use the command line options for `llc`, described at +{ref}`SelectionDAG-Process`. To describe instruction selector behavior, you should add patterns for lowering -LLVM code into a ``SelectionDAG`` as the last parameter of the instruction -definitions in ``XXXInstrInfo.td``. For example, in ``SparcInstrInfo.td``, +LLVM code into a `SelectionDAG` as the last parameter of the instruction +definitions in `XXXInstrInfo.td`. For example, in `SparcInstrInfo.td`, this entry defines a register store operation, and the last parameter describes a pattern with the store DAG operator. -.. code-block:: text +```text +def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), + "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>; +``` - def STrr : F3_1< 3, 0b000100, (outs), (ins MEMrr:$addr, IntRegs:$src), - "st $src, [$addr]", [(store i32:$src, ADDRrr:$addr)]>; +`ADDRrr` is a memory mode that is also defined in `SparcInstrInfo.td`: -``ADDRrr`` is a memory mode that is also defined in ``SparcInstrInfo.td``: +```text +def ADDRrr : ComplexPattern; +``` -.. code-block:: text - - def ADDRrr : ComplexPattern; - -The definition of ``ADDRrr`` refers to ``SelectADDRrr``, which is a function +The definition of `ADDRrr` refers to `SelectADDRrr`, which is a function defined in an implementation of the Instructor Selector (such as -``SparcISelDAGToDAG.cpp``). +`SparcISelDAGToDAG.cpp`). -In ``lib/Target/TargetSelectionDAG.td``, the DAG operator for store is defined +In `lib/Target/TargetSelectionDAG.td`, the DAG operator for store is defined below: -.. code-block:: text - - def store : PatFrag<(ops node:$val, node:$ptr), - (unindexedstore node:$val, node:$ptr)> { - let IsStore = true; - let IsTruncStore = false; - } - -``XXXInstrInfo.td`` also generates (in ``XXXGenDAGISel.inc``) the -``SelectCode`` method that is used to call the appropriate processing method -for an instruction. In this example, ``SelectCode`` calls ``Select_ISD_STORE`` -for the ``ISD::STORE`` opcode. - -.. code-block:: c++ - - SDNode *SelectCode(SDValue N) { - ... - MVT::ValueType NVT = N.getNode()->getValueType(0); - switch (N.getOpcode()) { - case ISD::STORE: { - switch (NVT) { - default: - return Select_ISD_STORE(N); - break; - } +```text +def store : PatFrag<(ops node:$val, node:$ptr), + (unindexedstore node:$val, node:$ptr)> { + let IsStore = true; + let IsTruncStore = false; +} +``` + +`XXXInstrInfo.td` also generates (in `XXXGenDAGISel.inc`) the +`SelectCode` method that is used to call the appropriate processing method +for an instruction. In this example, `SelectCode` calls `Select_ISD_STORE` +for the `ISD::STORE` opcode. + +```c++ +SDNode *SelectCode(SDValue N) { + ... + MVT::ValueType NVT = N.getNode()->getValueType(0); + switch (N.getOpcode()) { + case ISD::STORE: { + switch (NVT) { + default: + return Select_ISD_STORE(N); break; } - ... + break; + } + ... +``` -The pattern for ``STrr`` is matched, so elsewhere in ``XXXGenDAGISel.inc``, -code for ``STrr`` is created for ``Select_ISD_STORE``. The ``Emit_22`` method -is also generated in ``XXXGenDAGISel.inc`` to complete the processing of this +The pattern for `STrr` is matched, so elsewhere in `XXXGenDAGISel.inc`, +code for `STrr` is created for `Select_ISD_STORE`. The `Emit_22` method +is also generated in `XXXGenDAGISel.inc` to complete the processing of this instruction. -.. code-block:: c++ - - SDNode *Select_ISD_STORE(const SDValue &N) { - SDValue Chain = N.getOperand(0); - if (Predicate_store(N.getNode())) { - SDValue N1 = N.getOperand(1); - SDValue N2 = N.getOperand(2); - SDValue CPTmp0; - SDValue CPTmp1; - - // Pattern: (st:void i32:i32:$src, - // ADDRrr:i32:$addr)<> - // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src) - // Pattern complexity = 13 cost = 1 size = 0 - if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) && - N1.getNode()->getValueType(0) == MVT::i32 && - N2.getNode()->getValueType(0) == MVT::i32) { - return Emit_22(N, SP::STrr, CPTmp0, CPTmp1); - } - ... +```c++ +SDNode *Select_ISD_STORE(const SDValue &N) { + SDValue Chain = N.getOperand(0); + if (Predicate_store(N.getNode())) { + SDValue N1 = N.getOperand(1); + SDValue N2 = N.getOperand(2); + SDValue CPTmp0; + SDValue CPTmp1; + + // Pattern: (st:void i32:i32:$src, + // ADDRrr:i32:$addr)<> + // Emits: (STrr:void ADDRrr:i32:$addr, IntRegs:i32:$src) + // Pattern complexity = 13 cost = 1 size = 0 + if (SelectADDRrr(N, N2, CPTmp0, CPTmp1) && + N1.getNode()->getValueType(0) == MVT::i32 && + N2.getNode()->getValueType(0) == MVT::i32) { + return Emit_22(N, SP::STrr, CPTmp0, CPTmp1); + } +... +``` -The SelectionDAG Legalize Phase -------------------------------- +### The SelectionDAG Legalize Phase The Legalize phase converts a DAG to use types and operations that are natively supported by the target. For natively unsupported types and operations, you -need to add code to the target-specific ``XXXTargetLowering`` implementation to +need to add code to the target-specific `XXXTargetLowering` implementation to convert unsupported types and operations to supported ones. -In the constructor for the ``XXXTargetLowering`` class, first use the -``addRegisterClass`` method to specify which types are supported and which +In the constructor for the `XXXTargetLowering` class, first use the +`addRegisterClass` method to specify which types are supported and which register classes are associated with them. The code for the register classes -are generated by TableGen from ``XXXRegisterInfo.td`` and placed in -``XXXGenRegisterInfo.h.inc``. For example, the implementation of the -constructor for the SparcTargetLowering class (in ``SparcISelLowering.cpp``) +are generated by TableGen from `XXXRegisterInfo.td` and placed in +`XXXGenRegisterInfo.h.inc`. For example, the implementation of the +constructor for the SparcTargetLowering class (in `SparcISelLowering.cpp`) starts with the following code: -.. code-block:: c++ - - addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); - addRegisterClass(MVT::f32, SP::FPRegsRegisterClass); - addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); +```c++ +addRegisterClass(MVT::i32, SP::IntRegsRegisterClass); +addRegisterClass(MVT::f32, SP::FPRegsRegisterClass); +addRegisterClass(MVT::f64, SP::DFPRegsRegisterClass); +``` -You should examine the node types in the ``ISD`` namespace -(``include/llvm/CodeGen/SelectionDAGNodes.h``) and determine which operations +You should examine the node types in the `ISD` namespace +(`include/llvm/CodeGen/SelectionDAGNodes.h`) and determine which operations the target natively supports. For operations that do **not** have native -support, add a callback to the constructor for the ``XXXTargetLowering`` class, -so the instruction selection process knows what to do. The ``TargetLowering`` -class callback methods (declared in ``llvm/Target/TargetLowering.h``) are: - -* ``setOperationAction`` --- General operation. -* ``setLoadExtAction`` --- Load with extension. -* ``setTruncStoreAction`` --- Truncating store. -* ``setIndexedLoadAction`` --- Indexed load. -* ``setIndexedStoreAction`` --- Indexed store. -* ``setConvertAction`` --- Type conversion. -* ``setCondCodeAction`` --- Support for a given condition code. - -Note: on older releases, ``setLoadXAction`` is used instead of -``setLoadExtAction``. Also, on older releases, ``setCondCodeAction`` may not +support, add a callback to the constructor for the `XXXTargetLowering` class, +so the instruction selection process knows what to do. The `TargetLowering` +class callback methods (declared in `llvm/Target/TargetLowering.h`) are: + +* `setOperationAction` --- General operation. +* `setLoadExtAction` --- Load with extension. +* `setTruncStoreAction` --- Truncating store. +* `setIndexedLoadAction` --- Indexed load. +* `setIndexedStoreAction` --- Indexed store. +* `setConvertAction` --- Type conversion. +* `setCondCodeAction` --- Support for a given condition code. + +Note: on older releases, `setLoadXAction` is used instead of +`setLoadExtAction`. Also, on older releases, `setCondCodeAction` may not be supported. Examine your release to see what methods are specifically supported. These callbacks are used to determine that an operation does or does not work with a specified type (or types). And in all cases, the third parameter is a -``LegalAction`` type enum value: ``Promote``, ``Expand``, ``Custom``, or -``Legal``. ``SparcISelLowering.cpp`` contains examples of all four -``LegalAction`` values. +`LegalAction` type enum value: `Promote`, `Expand`, `Custom`, or +`Legal`. `SparcISelLowering.cpp` contains examples of all four +`LegalAction` values. -Promote -^^^^^^^ +#### Promote For an operation without native support for a given type, the specified type may be promoted to a larger type that is supported. For example, SPARC does -not support a sign-extending load for Boolean values (``i1`` type), so in -``SparcISelLowering.cpp`` the third parameter below, ``Promote``, changes -``i1`` type values to a large type before loading. +not support a sign-extending load for Boolean values (`i1` type), so in +`SparcISelLowering.cpp` the third parameter below, `Promote`, changes +`i1` type values to a large type before loading. -.. code-block:: c++ +```c++ +setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); +``` - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - -Expand -^^^^^^ +#### Expand For a type without native support, a value may need to be broken down further, rather than promoted. For an operation without native support, a combination of other operations may be used to similar effect. In SPARC, the floating-point sine and cosine trig operations are supported by expansion to -other operations, as indicated by the third parameter, ``Expand``, to -``setOperationAction``: - -.. code-block:: c++ +other operations, as indicated by the third parameter, `Expand`, to +`setOperationAction`: - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); +```c++ +setOperationAction(ISD::FSIN, MVT::f32, Expand); +setOperationAction(ISD::FCOS, MVT::f32, Expand); +``` -Custom -^^^^^^ +#### Custom For some operations, simple type promotion or operation expansion may be insufficient. In some cases, a special intrinsic function must be implemented. @@ -1452,67 +1432,65 @@ For example, a constant value may require special treatment, or an operation may require spilling and restoring registers in the stack and working with register allocators. -As seen in ``SparcISelLowering.cpp`` code below, to perform a type conversion +As seen in `SparcISelLowering.cpp` code below, to perform a type conversion from a floating point value to a signed integer, first the -``setOperationAction`` should be called with ``Custom`` as the third parameter: +`setOperationAction` should be called with `Custom` as the third parameter: -.. code-block:: c++ +```c++ +setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); +``` - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - -In the ``LowerOperation`` method, for each ``Custom`` operation, a case +In the `LowerOperation` method, for each `Custom` operation, a case statement should be added to indicate what function to call. In the following -code, an ``FP_TO_SINT`` opcode will call the ``LowerFP_TO_SINT`` method: - -.. code-block:: c++ +code, an `FP_TO_SINT` opcode will call the `LowerFP_TO_SINT` method: - SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { - switch (Op.getOpcode()) { - case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); - ... - } +```c++ +SDValue SparcTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { + switch (Op.getOpcode()) { + case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); + ... } +} +``` -Finally, the ``LowerFP_TO_SINT`` method is implemented, using an FP register to +Finally, the `LowerFP_TO_SINT` method is implemented, using an FP register to convert the floating-point value to an integer. -.. code-block:: c++ - - static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { - assert(Op.getValueType() == MVT::i32); - Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0)); - return DAG.getNode(ISD::BITCAST, MVT::i32, Op); - } +```c++ +static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + assert(Op.getValueType() == MVT::i32); + Op = DAG.getNode(SPISD::FTOI, MVT::f32, Op.getOperand(0)); + return DAG.getNode(ISD::BITCAST, MVT::i32, Op); +} +``` -Legal -^^^^^ +#### Legal -The ``Legal`` ``LegalizeAction`` enum value simply indicates that an operation -**is** natively supported. ``Legal`` represents the default condition, so it -is rarely used. In ``SparcISelLowering.cpp``, the action for ``CTPOP`` (an +The `Legal` `LegalizeAction` enum value simply indicates that an operation +**is** natively supported. `Legal` represents the default condition, so it +is rarely used. In `SparcISelLowering.cpp`, the action for `CTPOP` (an operation to count the bits set in an integer) is natively supported only for -SPARC v9. The following code enables the ``Expand`` conversion technique for +SPARC v9. The following code enables the `Expand` conversion technique for non-v9 SPARC implementations. -.. code-block:: c++ - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - ... - if (TM.getSubtarget().isV9()) - setOperationAction(ISD::CTPOP, MVT::i32, Legal); +```c++ +setOperationAction(ISD::CTPOP, MVT::i32, Expand); +... +if (TM.getSubtarget().isV9()) + setOperationAction(ISD::CTPOP, MVT::i32, Legal); +``` -.. _backend-calling-convs: +(backend-calling-convs)= -Calling Conventions -------------------- +### Calling Conventions -To support target-specific calling conventions, ``XXXGenCallingConv.td`` uses -interfaces (such as ``CCIfType`` and ``CCAssignToReg``) that are defined in -``lib/Target/TargetCallingConv.td``. TableGen can take the target descriptor -file ``XXXGenCallingConv.td`` and generate the header file -``XXXGenCallingConv.inc``, which is typically included in -``XXXISelLowering.cpp``. You can use the interfaces in -``TargetCallingConv.td`` to specify: +To support target-specific calling conventions, `XXXGenCallingConv.td` uses +interfaces (such as `CCIfType` and `CCAssignToReg`) that are defined in +`lib/Target/TargetCallingConv.td`. TableGen can take the target descriptor +file `XXXGenCallingConv.td` and generate the header file +`XXXGenCallingConv.inc`, which is typically included in +`XXXISelLowering.cpp`. You can use the interfaces in +`TargetCallingConv.td` to specify: * The order of parameter allocation. @@ -1523,115 +1501,114 @@ file ``XXXGenCallingConv.td`` and generate the header file * Whether the caller or callee unwinds the stack. -The following example demonstrates the use of the ``CCIfType`` and -``CCAssignToReg`` interfaces. If the ``CCIfType`` predicate is true (that is, -if the current argument is of type ``f32`` or ``f64``), then the action is -performed. In this case, the ``CCAssignToReg`` action assigns the argument -value to the first available register: either ``R0`` or ``R1``. +The following example demonstrates the use of the `CCIfType` and +`CCAssignToReg` interfaces. If the `CCIfType` predicate is true (that is, +if the current argument is of type `f32` or `f64`), then the action is +performed. In this case, the `CCAssignToReg` action assigns the argument +value to the first available register: either `R0` or `R1`. -.. code-block:: text +```text +CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>> +``` - CCIfType<[f32,f64], CCAssignToReg<[R0, R1]>> - -``SparcCallingConv.td`` contains definitions for a target-specific return-value -calling convention (``RetCC_Sparc32``) and a basic 32-bit C calling convention -(``CC_Sparc32``). The definition of ``RetCC_Sparc32`` (shown below) indicates +`SparcCallingConv.td` contains definitions for a target-specific return-value +calling convention (`RetCC_Sparc32`) and a basic 32-bit C calling convention +(`CC_Sparc32`). The definition of `RetCC_Sparc32` (shown below) indicates which registers are used for specified scalar return types. A single-precision -float is returned to register ``F0``, and a double-precision float goes to -register ``D0``. A 32-bit integer is returned in register ``I0`` or ``I1``. - -.. code-block:: text - - def RetCC_Sparc32 : CallingConv<[ - CCIfType<[i32], CCAssignToReg<[I0, I1]>>, - CCIfType<[f32], CCAssignToReg<[F0]>>, - CCIfType<[f64], CCAssignToReg<[D0]>> - ]>; - -The definition of ``CC_Sparc32`` in ``SparcCallingConv.td`` introduces -``CCAssignToStack``, which assigns the value to a stack slot with the specified +float is returned to register `F0`, and a double-precision float goes to +register `D0`. A 32-bit integer is returned in register `I0` or `I1`. + +```text +def RetCC_Sparc32 : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[I0, I1]>>, + CCIfType<[f32], CCAssignToReg<[F0]>>, + CCIfType<[f64], CCAssignToReg<[D0]>> +]>; +``` + +The definition of `CC_Sparc32` in `SparcCallingConv.td` introduces +`CCAssignToStack`, which assigns the value to a stack slot with the specified size and alignment. In the example below, the first parameter, 4, indicates the size of the slot, and the second parameter, also 4, indicates the stack alignment along 4-byte units. (Special cases: if size is zero, then the ABI size is used; if alignment is zero, then the ABI alignment is used.) -.. code-block:: text - - def CC_Sparc32 : CallingConv<[ - // All arguments get passed in integer registers if there is space. - CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, - CCAssignToStack<4, 4> - ]>; +```text +def CC_Sparc32 : CallingConv<[ + // All arguments get passed in integer registers if there is space. + CCIfType<[i32, f32, f64], CCAssignToReg<[I0, I1, I2, I3, I4, I5]>>, + CCAssignToStack<4, 4> +]>; +``` -``CCDelegateTo`` is another commonly used interface, which tries to find a +`CCDelegateTo` is another commonly used interface, which tries to find a specified sub-calling convention, and, if a match is found, it is invoked. In -the following example (in ``X86CallingConv.td``), the definition of -``RetCC_X86_32_C`` ends with ``CCDelegateTo``. After the current value is -assigned to the register ``ST0`` or ``ST1``, the ``RetCC_X86Common`` is +the following example (in `X86CallingConv.td`), the definition of +`RetCC_X86_32_C` ends with `CCDelegateTo`. After the current value is +assigned to the register `ST0` or `ST1`, the `RetCC_X86Common` is invoked. -.. code-block:: text +```text +def RetCC_X86_32_C : CallingConv<[ + CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>, + CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>, + CCDelegateTo +]>; +``` - def RetCC_X86_32_C : CallingConv<[ - CCIfType<[f32], CCAssignToReg<[ST0, ST1]>>, - CCIfType<[f64], CCAssignToReg<[ST0, ST1]>>, - CCDelegateTo - ]>; - -``CCIfCC`` is an interface that attempts to match the given name to the current +`CCIfCC` is an interface that attempts to match the given name to the current calling convention. If the name identifies the current calling convention, then a specified action is invoked. In the following example (in -``X86CallingConv.td``), if the ``Fast`` calling convention is in use, then -``RetCC_X86_32_Fast`` is invoked. If the ``SSECall`` calling convention is in -use, then ``RetCC_X86_32_SSE`` is invoked. - -.. code-block:: text - - def RetCC_X86_32 : CallingConv<[ - CCIfCC<"CallingConv::Fast", CCDelegateTo>, - CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo>, - CCDelegateTo - ]>; - -``CCAssignToRegAndStack`` is the same as ``CCAssignToReg``, but also allocates +`X86CallingConv.td`), if the `Fast` calling convention is in use, then +`RetCC_X86_32_Fast` is invoked. If the `SSECall` calling convention is in +use, then `RetCC_X86_32_SSE` is invoked. + +```text +def RetCC_X86_32 : CallingConv<[ + CCIfCC<"CallingConv::Fast", CCDelegateTo>, + CCIfCC<"CallingConv::X86_SSECall", CCDelegateTo>, + CCDelegateTo +]>; +``` + +`CCAssignToRegAndStack` is the same as `CCAssignToReg`, but also allocates a stack slot, when some register is used. Basically, it works like: -``CCIf, CCAssignToStack>``. +`CCIf, CCAssignToStack>`. -.. code-block:: text - - class CCAssignToRegAndStack regList, int size, int align> - : CCAssignToReg { - int Size = size; - int Align = align; - } +```text +class CCAssignToRegAndStack regList, int size, int align> + : CCAssignToReg { + int Size = size; + int Align = align; +} +``` Other calling convention interfaces include: -* ``CCIf `` --- If the predicate matches, apply the action. +* `CCIf ` --- If the predicate matches, apply the action. -* ``CCIfInReg `` --- If the argument is marked with the "``inreg``" +* `CCIfInReg ` --- If the argument is marked with the "`inreg`" attribute, then apply the action. -* ``CCIfNest `` --- If the argument is marked with the "``nest``" +* `CCIfNest ` --- If the argument is marked with the "`nest`" attribute, then apply the action. -* ``CCIfNotVarArg `` --- If the current function does not take a +* `CCIfNotVarArg ` --- If the current function does not take a variable number of arguments, apply the action. -* ``CCAssignToRegWithShadow `` --- similar to - ``CCAssignToReg``, but with a shadow list of registers. +* `CCAssignToRegWithShadow ` --- similar to + `CCAssignToReg`, but with a shadow list of registers. -* ``CCPassByVal `` --- Assign value to a stack slot with the +* `CCPassByVal ` --- Assign value to a stack slot with the minimum specified size and alignment. -* ``CCPromoteToType `` --- Promote the current value to the specified +* `CCPromoteToType ` --- Promote the current value to the specified type. -* ``CallingConv <[actions]>`` --- Define each calling convention that is +* `CallingConv <[actions]>` --- Define each calling convention that is supported. -Assembly Printer -================ +## Assembly Printer During the code emission stage, the code generator may utilize an LLVM pass to produce assembly output. To do this, you want to implement the code for a @@ -1639,107 +1616,106 @@ printer that converts LLVM IR to a GAS-format assembly language for your target machine, using the following steps: * Define all the assembly strings for your target, adding them to the - instructions defined in the ``XXXInstrInfo.td`` file. (See - :ref:`instruction-set`.) TableGen will produce an output file - (``XXXGenAsmWriter.inc``) with an implementation of the ``printInstruction`` - method for the ``XXXAsmPrinter`` class. + instructions defined in the `XXXInstrInfo.td` file. (See + {ref}`instruction-set`.) TableGen will produce an output file + (`XXXGenAsmWriter.inc`) with an implementation of the `printInstruction` + method for the `XXXAsmPrinter` class. -* Write ``XXXTargetAsmInfo.h``, which contains the bare-bones declaration of - the ``XXXTargetAsmInfo`` class (a subclass of ``TargetAsmInfo``). +* Write `XXXTargetAsmInfo.h`, which contains the bare-bones declaration of + the `XXXTargetAsmInfo` class (a subclass of `TargetAsmInfo`). -* Write ``XXXTargetAsmInfo.cpp``, which contains target-specific values for - ``TargetAsmInfo`` properties and sometimes new implementations for methods. +* Write `XXXTargetAsmInfo.cpp`, which contains target-specific values for + `TargetAsmInfo` properties and sometimes new implementations for methods. -* Write ``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that +* Write `XXXAsmPrinter.cpp`, which implements the `AsmPrinter` class that performs the LLVM-to-assembly conversion. -The code in ``XXXTargetAsmInfo.h`` is usually a trivial declaration of the -``XXXTargetAsmInfo`` class for use in ``XXXTargetAsmInfo.cpp``. Similarly, -``XXXTargetAsmInfo.cpp`` usually has a few declarations of ``XXXTargetAsmInfo`` -replacement values that override the default values in ``TargetAsmInfo.cpp``. -For example in ``SparcTargetAsmInfo.cpp``: - -.. code-block:: c++ - - SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) { - Data16bitsDirective = "\t.half\t"; - Data32bitsDirective = "\t.word\t"; - Data64bitsDirective = 0; // .xword is only supported by V9. - ZeroDirective = "\t.skip\t"; - CommentString = "!"; - ConstantPoolSection = "\t.section \".rodata\",#alloc\n"; - } - -The X86 assembly printer implementation (``X86TargetAsmInfo``) is an example -where the target-specific ``TargetAsmInfo`` class uses an overridden methods: -``ExpandInlineAsm``. - -A target-specific implementation of ``AsmPrinter`` is written in -``XXXAsmPrinter.cpp``, which implements the ``AsmPrinter`` class that converts +The code in `XXXTargetAsmInfo.h` is usually a trivial declaration of the +`XXXTargetAsmInfo` class for use in `XXXTargetAsmInfo.cpp`. Similarly, +`XXXTargetAsmInfo.cpp` usually has a few declarations of `XXXTargetAsmInfo` +replacement values that override the default values in `TargetAsmInfo.cpp`. +For example in `SparcTargetAsmInfo.cpp`: + +```c++ +SparcTargetAsmInfo::SparcTargetAsmInfo(const SparcTargetMachine &TM) { + Data16bitsDirective = "\t.half\t"; + Data32bitsDirective = "\t.word\t"; + Data64bitsDirective = 0; // .xword is only supported by V9. + ZeroDirective = "\t.skip\t"; + CommentString = "!"; + ConstantPoolSection = "\t.section \".rodata\",#alloc\n"; +} +``` + +The X86 assembly printer implementation (`X86TargetAsmInfo`) is an example +where the target-specific `TargetAsmInfo` class uses an overridden methods: +`ExpandInlineAsm`. + +A target-specific implementation of `AsmPrinter` is written in +`XXXAsmPrinter.cpp`, which implements the `AsmPrinter` class that converts the LLVM to printable assembly. The implementation must include the following -headers that have declarations for the ``AsmPrinter`` and -``MachineFunctionPass`` classes. The ``MachineFunctionPass`` is a subclass of -``FunctionPass``. +headers that have declarations for the `AsmPrinter` and +`MachineFunctionPass` classes. The `MachineFunctionPass` is a subclass of +`FunctionPass`. -.. code-block:: c++ +```c++ +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +``` - #include "llvm/CodeGen/AsmPrinter.h" - #include "llvm/CodeGen/MachineFunctionPass.h" - -As a ``FunctionPass``, ``AsmPrinter`` first calls ``doInitialization`` to set -up the ``AsmPrinter``. In ``SparcAsmPrinter``, a ``Mangler`` object is +As a `FunctionPass`, `AsmPrinter` first calls `doInitialization` to set +up the `AsmPrinter`. In `SparcAsmPrinter`, a `Mangler` object is instantiated to process variable names. -In ``XXXAsmPrinter.cpp``, the ``runOnMachineFunction`` method (declared in -``MachineFunctionPass``) must be implemented for ``XXXAsmPrinter``. In -``MachineFunctionPass``, the ``runOnFunction`` method invokes -``runOnMachineFunction``. Target-specific implementations of -``runOnMachineFunction`` differ, but generally do the following to process each +In `XXXAsmPrinter.cpp`, the `runOnMachineFunction` method (declared in +`MachineFunctionPass`) must be implemented for `XXXAsmPrinter`. In +`MachineFunctionPass`, the `runOnFunction` method invokes +`runOnMachineFunction`. Target-specific implementations of +`runOnMachineFunction` differ, but generally do the following to process each machine function: -* Call ``SetupMachineFunction`` to perform initialization. +* Call `SetupMachineFunction` to perform initialization. -* Call ``EmitConstantPool`` to print out (to the output stream) constants which +* Call `EmitConstantPool` to print out (to the output stream) constants which have been spilled to memory. -* Call ``EmitJumpTableInfo`` to print out jump tables used by the current +* Call `EmitJumpTableInfo` to print out jump tables used by the current function. * Print out the label for the current function. * Print out the code for the function, including basic block labels and the - assembly for the instruction (using ``printInstruction``) + assembly for the instruction (using `printInstruction`) -The ``XXXAsmPrinter`` implementation must also include the code generated by -TableGen that is output in the ``XXXGenAsmWriter.inc`` file. The code in -``XXXGenAsmWriter.inc`` contains an implementation of the ``printInstruction`` +The `XXXAsmPrinter` implementation must also include the code generated by +TableGen that is output in the `XXXGenAsmWriter.inc` file. The code in +`XXXGenAsmWriter.inc` contains an implementation of the `printInstruction` method that may call these methods: -* ``printOperand`` -* ``printMemOperand`` -* ``printCCOperand`` (for conditional statements) -* ``printDataDirective`` -* ``printDeclare`` -* ``printImplicitDef`` -* ``printInlineAsm`` +* `printOperand` +* `printMemOperand` +* `printCCOperand` (for conditional statements) +* `printDataDirective` +* `printDeclare` +* `printImplicitDef` +* `printInlineAsm` -The implementations of ``printDeclare``, ``printImplicitDef``, -``printInlineAsm``, and ``printLabel`` in ``AsmPrinter.cpp`` are generally +The implementations of `printDeclare`, `printImplicitDef`, +`printInlineAsm`, and `printLabel` in `AsmPrinter.cpp` are generally adequate for printing assembly and do not need to be overridden. -The ``printOperand`` method is implemented with a long ``switch``/``case`` +The `printOperand` method is implemented with a long `switch`/`case` statement for the type of operand: register, immediate, basic block, external symbol, global address, constant pool index, or jump table index. For an -instruction with a memory address operand, the ``printMemOperand`` method +instruction with a memory address operand, the `printMemOperand` method should be implemented to generate the proper output. Similarly, -``printCCOperand`` should be used to print a conditional operand. +`printCCOperand` should be used to print a conditional operand. -``doFinalization`` should be overridden in ``XXXAsmPrinter``, and it should be -called to shut down the assembly printer. During ``doFinalization``, global +`doFinalization` should be overridden in `XXXAsmPrinter`, and it should be +called to shut down the assembly printer. During `doFinalization`, global variables and constants are printed to output. -Subtarget Support -================= +## Subtarget Support Subtarget support is used to inform the code generation process of instruction set variations for a given chip set. For example, the LLVM SPARC @@ -1754,13 +1730,13 @@ The UltraSPARC architecture combines V9 with UltraSPARC Visual Instruction Set extensions. If subtarget support is needed, you should implement a target-specific -``XXXSubtarget`` class for your architecture. This class should process the -command-line options ``-mcpu=`` and ``-mattr=``. +`XXXSubtarget` class for your architecture. This class should process the +command-line options `-mcpu=` and `-mattr=`. -TableGen uses definitions in the ``Target.td`` and ``Sparc.td`` files to -generate code in ``SparcGenSubtarget.inc``. In ``Target.td``, shown below, the -``SubtargetFeature`` interface is defined. The first 4 string parameters of -the ``SubtargetFeature`` interface are a feature name, a XXXSubtarget field set +TableGen uses definitions in the `Target.td` and `Sparc.td` files to +generate code in `SparcGenSubtarget.inc`. In `Target.td`, shown below, the +`SubtargetFeature` interface is defined. The first 4 string parameters of +the `SubtargetFeature` interface are a feature name, a XXXSubtarget field set by the feature, the value of the XXXSubtarget field, and a description of the feature. (The fifth parameter is a list of features whose presence is implied, and its default value is an empty array.) @@ -1772,253 +1748,250 @@ of an enum constant. If multiple features use the same integer field, the field will be set to the maximum value of all enabled features that share the field. -.. code-block:: text - - class SubtargetFeature i = []> { - string Name = n; - string FieldName = f; - string Value = v; - string Desc = d; - list Implies = i; - } - -In the ``Sparc.td`` file, the ``SubtargetFeature`` is used to define the +```text +class SubtargetFeature i = []> { + string Name = n; + string FieldName = f; + string Value = v; + string Desc = d; + list Implies = i; +} +``` + +In the `Sparc.td` file, the `SubtargetFeature` is used to define the following features. -.. code-block:: text - - def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", - "Enable SPARC-V9 instructions">; - def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8", - "UseV8DeprecatedInsts", "true", - "Enable deprecated V8 instructions in V9 mode">; - def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true", - "Enable UltraSPARC Visual Instruction Set extensions">; - -Elsewhere in ``Sparc.td``, the ``Proc`` class is defined and then is used to +```text +def FeatureV9 : SubtargetFeature<"v9", "IsV9", "true", + "Enable SPARC-V9 instructions">; +def FeatureV8Deprecated : SubtargetFeature<"deprecated-v8", + "UseV8DeprecatedInsts", "true", + "Enable deprecated V8 instructions in V9 mode">; +def FeatureVIS : SubtargetFeature<"vis", "IsVIS", "true", + "Enable UltraSPARC Visual Instruction Set extensions">; +``` + +Elsewhere in `Sparc.td`, the `Proc` class is defined and then is used to define particular SPARC processor subtypes that may have the previously described features. -.. code-block:: text - - class Proc Features> - : Processor; - - def : Proc<"generic", []>; - def : Proc<"v8", []>; - def : Proc<"supersparc", []>; - def : Proc<"sparclite", []>; - def : Proc<"f934", []>; - def : Proc<"hypersparc", []>; - def : Proc<"sparclite86x", []>; - def : Proc<"sparclet", []>; - def : Proc<"tsc701", []>; - def : Proc<"v9", [FeatureV9]>; - def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>; - def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>; - def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; - -From ``Target.td`` and ``Sparc.td`` files, the resulting -``SparcGenSubtarget.inc`` specifies enum values to identify the features, +```text +class Proc Features> + : Processor; + +def : Proc<"generic", []>; +def : Proc<"v8", []>; +def : Proc<"supersparc", []>; +def : Proc<"sparclite", []>; +def : Proc<"f934", []>; +def : Proc<"hypersparc", []>; +def : Proc<"sparclite86x", []>; +def : Proc<"sparclet", []>; +def : Proc<"tsc701", []>; +def : Proc<"v9", [FeatureV9]>; +def : Proc<"ultrasparc", [FeatureV9, FeatureV8Deprecated]>; +def : Proc<"ultrasparc3", [FeatureV9, FeatureV8Deprecated]>; +def : Proc<"ultrasparc3-vis", [FeatureV9, FeatureV8Deprecated, FeatureVIS]>; +``` + +From `Target.td` and `Sparc.td` files, the resulting +`SparcGenSubtarget.inc` specifies enum values to identify the features, arrays of constants to represent the CPU features and CPU subtypes, and the -``ParseSubtargetFeatures`` method that parses the features string that sets -specified subtarget options. The generated ``SparcGenSubtarget.inc`` file -should be included in the ``SparcSubtarget.cpp``. The target-specific -implementation of the ``XXXSubtarget`` method should follow this pseudocode: - -.. code-block:: c++ - - XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) { - // Set the default features - // Determine default and user specified characteristics of the CPU - // Call ParseSubtargetFeatures(FS, CPU) to parse the features string - // Perform any additional operations - } - -JIT Support -=========== +`ParseSubtargetFeatures` method that parses the features string that sets +specified subtarget options. The generated `SparcGenSubtarget.inc` file +should be included in the `SparcSubtarget.cpp`. The target-specific +implementation of the `XXXSubtarget` method should follow this pseudocode: + +```c++ +XXXSubtarget::XXXSubtarget(const Module &M, const std::string &FS) { + // Set the default features + // Determine default and user specified characteristics of the CPU + // Call ParseSubtargetFeatures(FS, CPU) to parse the features string + // Perform any additional operations +} +``` + +## JIT Support The implementation of a target machine optionally includes a Just-In-Time (JIT) code generator that emits machine code and auxiliary structures as binary output that can be written directly to memory. To do this, implement JIT code generation by performing the following steps: -* Write an ``XXXCodeEmitter.cpp`` file that contains a machine function pass +* Write an `XXXCodeEmitter.cpp` file that contains a machine function pass that transforms target-machine instructions into relocatable machine code. -* Write an ``XXXJITInfo.cpp`` file that implements the JIT interfaces for +* Write an `XXXJITInfo.cpp` file that implements the JIT interfaces for target-specific code-generation activities, such as emitting machine code and stubs. -* Modify ``XXXTargetMachine`` so that it provides a ``TargetJITInfo`` object - through its ``getJITInfo`` method. +* Modify `XXXTargetMachine` so that it provides a `TargetJITInfo` object + through its `getJITInfo` method. There are several different approaches to writing the JIT support code. For instance, TableGen and target descriptor files may be used for creating a JIT code generator, but are not mandatory. For the Alpha and PowerPC target -machines, TableGen is used to generate ``XXXGenCodeEmitter.inc``, which +machines, TableGen is used to generate `XXXGenCodeEmitter.inc`, which contains the binary coding of machine instructions and the -``getBinaryCodeForInstr`` method to access those codes. Other JIT +`getBinaryCodeForInstr` method to access those codes. Other JIT implementations do not. -Both ``XXXJITInfo.cpp`` and ``XXXCodeEmitter.cpp`` must include the -``llvm/CodeGen/MachineCodeEmitter.h`` header file that defines the -``MachineCodeEmitter`` class containing code for several callback functions +Both `XXXJITInfo.cpp` and `XXXCodeEmitter.cpp` must include the +`llvm/CodeGen/MachineCodeEmitter.h` header file that defines the +`MachineCodeEmitter` class containing code for several callback functions that write data (in bytes, words, strings, etc.) to the output stream. -Machine Code Emitter --------------------- - -In ``XXXCodeEmitter.cpp``, a target-specific of the ``Emitter`` class is -implemented as a function pass (subclass of ``MachineFunctionPass``). The -target-specific implementation of ``runOnMachineFunction`` (invoked by -``runOnFunction`` in ``MachineFunctionPass``) iterates through the -``MachineBasicBlock`` calls ``emitInstruction`` to process each instruction and -emit binary code. ``emitInstruction`` is largely implemented with case -statements on the instruction types defined in ``XXXInstrInfo.h``. For -example, in ``X86CodeEmitter.cpp``, the ``emitInstruction`` method is built -around the following ``switch``/``case`` statements: - -.. code-block:: c++ - - switch (Desc->TSFlags & X86::FormMask) { - case X86II::Pseudo: // for not yet implemented instructions - ... // or pseudo-instructions - break; - case X86II::RawFrm: // for instructions with a fixed opcode value - ... - break; - case X86II::AddRegFrm: // for instructions that have one register operand - ... // added to their opcode - break; - case X86II::MRMDestReg:// for instructions that use the Mod/RM byte - ... // to specify a destination (register) - break; - case X86II::MRMDestMem:// for instructions that use the Mod/RM byte - ... // to specify a destination (memory) - break; - case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte - ... // to specify a source (register) - break; - case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte - ... // to specify a source (memory) - break; - case X86II::MRM0r: case X86II::MRM1r: // for instructions that operate on - case X86II::MRM2r: case X86II::MRM3r: // a REGISTER r/m operand and - case X86II::MRM4r: case X86II::MRM5r: // use the Mod/RM byte and a field - case X86II::MRM6r: case X86II::MRM7r: // to hold extended opcode data - ... - break; - case X86II::MRM0m: case X86II::MRM1m: // for instructions that operate on - case X86II::MRM2m: case X86II::MRM3m: // a MEMORY r/m operand and - case X86II::MRM4m: case X86II::MRM5m: // use the Mod/RM byte and a field - case X86II::MRM6m: case X86II::MRM7m: // to hold extended opcode data - ... - break; - case X86II::MRMInitReg: // for instructions whose source and - ... // destination are the same register - break; - } +### Machine Code Emitter + +In `XXXCodeEmitter.cpp`, a target-specific of the `Emitter` class is +implemented as a function pass (subclass of `MachineFunctionPass`). The +target-specific implementation of `runOnMachineFunction` (invoked by +`runOnFunction` in `MachineFunctionPass`) iterates through the +`MachineBasicBlock` calls `emitInstruction` to process each instruction and +emit binary code. `emitInstruction` is largely implemented with case +statements on the instruction types defined in `XXXInstrInfo.h`. For +example, in `X86CodeEmitter.cpp`, the `emitInstruction` method is built +around the following `switch`/`case` statements: + +```c++ +switch (Desc->TSFlags & X86::FormMask) { +case X86II::Pseudo: // for not yet implemented instructions + ... // or pseudo-instructions + break; +case X86II::RawFrm: // for instructions with a fixed opcode value + ... + break; +case X86II::AddRegFrm: // for instructions that have one register operand + ... // added to their opcode + break; +case X86II::MRMDestReg:// for instructions that use the Mod/RM byte + ... // to specify a destination (register) + break; +case X86II::MRMDestMem:// for instructions that use the Mod/RM byte + ... // to specify a destination (memory) + break; +case X86II::MRMSrcReg: // for instructions that use the Mod/RM byte + ... // to specify a source (register) + break; +case X86II::MRMSrcMem: // for instructions that use the Mod/RM byte + ... // to specify a source (memory) + break; +case X86II::MRM0r: case X86II::MRM1r: // for instructions that operate on +case X86II::MRM2r: case X86II::MRM3r: // a REGISTER r/m operand and +case X86II::MRM4r: case X86II::MRM5r: // use the Mod/RM byte and a field +case X86II::MRM6r: case X86II::MRM7r: // to hold extended opcode data + ... + break; +case X86II::MRM0m: case X86II::MRM1m: // for instructions that operate on +case X86II::MRM2m: case X86II::MRM3m: // a MEMORY r/m operand and +case X86II::MRM4m: case X86II::MRM5m: // use the Mod/RM byte and a field +case X86II::MRM6m: case X86II::MRM7m: // to hold extended opcode data + ... + break; +case X86II::MRMInitReg: // for instructions whose source and + ... // destination are the same register + break; +} +``` The implementations of these case statements often first emit the opcode and then get the operand(s). Then depending upon the operand, helper methods may -be called to process the operand(s). For example, in ``X86CodeEmitter.cpp``, -for the ``X86II::AddRegFrm`` case, the first data emitted (by ``emitByte``) is +be called to process the operand(s). For example, in `X86CodeEmitter.cpp`, +for the `X86II::AddRegFrm` case, the first data emitted (by `emitByte`) is the opcode added to the register operand. Then an object representing the -machine operand, ``MO1``, is extracted. The helper methods such as -``isImmediate``, ``isGlobalAddress``, ``isExternalSymbol``, -``isConstantPoolIndex``, and ``isJumpTableIndex`` determine the operand type. -(``X86CodeEmitter.cpp`` also has private methods such as ``emitConstant``, -``emitGlobalAddress``, ``emitExternalSymbolAddress``, ``emitConstPoolAddress``, -and ``emitJumpTableAddress`` that emit the data into the output stream.) - -.. code-block:: c++ - - case X86II::AddRegFrm: - MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); - - if (CurOp != NumOps) { - const MachineOperand &MO1 = MI.getOperand(CurOp++); - unsigned Size = X86InstrInfo::sizeOfImm(Desc); - if (MO1.isImmediate()) - emitConstant(MO1.getImm(), Size); - else { - unsigned rt = Is64BitMode ? X86::reloc_pcrel_word - : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); - if (Opcode == X86::MOV64ri) - rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? - if (MO1.isGlobalAddress()) { - bool NeedStub = isa(MO1.getGlobal()); - bool isLazy = gvNeedsLazyPtr(MO1.getGlobal()); - emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, isLazy); - } else if (MO1.isExternalSymbol()) - emitExternalSymbolAddress(MO1.getSymbolName(), rt); - else if (MO1.isConstantPoolIndex()) - emitConstPoolAddress(MO1.getIndex(), rt); - else if (MO1.isJumpTableIndex()) - emitJumpTableAddress(MO1.getIndex(), rt); - } +machine operand, `MO1`, is extracted. The helper methods such as +`isImmediate`, `isGlobalAddress`, `isExternalSymbol`, +`isConstantPoolIndex`, and `isJumpTableIndex` determine the operand type. +(`X86CodeEmitter.cpp` also has private methods such as `emitConstant`, +`emitGlobalAddress`, `emitExternalSymbolAddress`, `emitConstPoolAddress`, +and `emitJumpTableAddress` that emit the data into the output stream.) + +```c++ +case X86II::AddRegFrm: + MCE.emitByte(BaseOpcode + getX86RegNum(MI.getOperand(CurOp++).getReg())); + + if (CurOp != NumOps) { + const MachineOperand &MO1 = MI.getOperand(CurOp++); + unsigned Size = X86InstrInfo::sizeOfImm(Desc); + if (MO1.isImmediate()) + emitConstant(MO1.getImm(), Size); + else { + unsigned rt = Is64BitMode ? X86::reloc_pcrel_word + : (IsPIC ? X86::reloc_picrel_word : X86::reloc_absolute_word); + if (Opcode == X86::MOV64ri) + rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? + if (MO1.isGlobalAddress()) { + bool NeedStub = isa(MO1.getGlobal()); + bool isLazy = gvNeedsLazyPtr(MO1.getGlobal()); + emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, + NeedStub, isLazy); + } else if (MO1.isExternalSymbol()) + emitExternalSymbolAddress(MO1.getSymbolName(), rt); + else if (MO1.isConstantPoolIndex()) + emitConstPoolAddress(MO1.getIndex(), rt); + else if (MO1.isJumpTableIndex()) + emitJumpTableAddress(MO1.getIndex(), rt); } - break; + } + break; +``` -In the previous example, ``XXXCodeEmitter.cpp`` uses the variable ``rt``, which -is a ``RelocationType`` enum that may be used to relocate addresses (for -example, a global address with a PIC base offset). The ``RelocationType`` enum -for that target is defined in the short target-specific ``XXXRelocations.h`` -file. The ``RelocationType`` is used by the ``relocate`` method defined in -``XXXJITInfo.cpp`` to rewrite addresses for referenced global symbols. +In the previous example, `XXXCodeEmitter.cpp` uses the variable `rt`, which +is a `RelocationType` enum that may be used to relocate addresses (for +example, a global address with a PIC base offset). The `RelocationType` enum +for that target is defined in the short target-specific `XXXRelocations.h` +file. The `RelocationType` is used by the `relocate` method defined in +`XXXJITInfo.cpp` to rewrite addresses for referenced global symbols. -For example, ``X86Relocations.h`` specifies the following relocation types for +For example, `X86Relocations.h` specifies the following relocation types for the X86 addresses. In all four cases, the relocated value is added to the -value already in memory. For ``reloc_pcrel_word`` and ``reloc_picrel_word``, +value already in memory. For `reloc_pcrel_word` and `reloc_picrel_word`, there is an additional initial adjustment. -.. code-block:: c++ +```c++ +enum RelocationType { + reloc_pcrel_word = 0, // add reloc value after adjusting for the PC loc + reloc_picrel_word = 1, // add reloc value after adjusting for the PIC base + reloc_absolute_word = 2, // absolute relocation; no additional adjustment + reloc_absolute_dword = 3 // absolute relocation; no additional adjustment +}; +``` - enum RelocationType { - reloc_pcrel_word = 0, // add reloc value after adjusting for the PC loc - reloc_picrel_word = 1, // add reloc value after adjusting for the PIC base - reloc_absolute_word = 2, // absolute relocation; no additional adjustment - reloc_absolute_dword = 3 // absolute relocation; no additional adjustment - }; - -Target JIT Info ---------------- +### Target JIT Info -``XXXJITInfo.cpp`` implements the JIT interfaces for target-specific +`XXXJITInfo.cpp` implements the JIT interfaces for target-specific code-generation activities, such as emitting machine code and stubs. At -minimum, a target-specific version of ``XXXJITInfo`` implements the following: +minimum, a target-specific version of `XXXJITInfo` implements the following: -* ``getLazyResolverFunction`` --- Initializes the JIT, gives the target a +* `getLazyResolverFunction` --- Initializes the JIT, gives the target a function that is used for compilation. -* ``emitFunctionStub`` --- Returns a native function with a specified address +* `emitFunctionStub` --- Returns a native function with a specified address for a callback function. -* ``relocate`` --- Changes the addresses of referenced globals, based on +* `relocate` --- Changes the addresses of referenced globals, based on relocation types. * Callback function that are wrappers to a function stub that is used when the real target is not initially known. -``getLazyResolverFunction`` is generally trivial to implement. It makes the -incoming parameter as the global ``JITCompilerFunction`` and returns the +`getLazyResolverFunction` is generally trivial to implement. It makes the +incoming parameter as the global `JITCompilerFunction` and returns the callback function that will be used a function wrapper. For the Alpha target -(in ``AlphaJITInfo.cpp``), the ``getLazyResolverFunction`` implementation is +(in `AlphaJITInfo.cpp`), the `getLazyResolverFunction` implementation is simply: -.. code-block:: c++ - - TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction( - JITCompilerFn F) { - JITCompilerFunction = F; - return AlphaCompilationCallback; - } +```c++ +TargetJITInfo::LazyResolverFn AlphaJITInfo::getLazyResolverFunction( + JITCompilerFn F) { + JITCompilerFunction = F; + return AlphaCompilationCallback; +} +``` -For the X86 target, the ``getLazyResolverFunction`` implementation is a little +For the X86 target, the `getLazyResolverFunction` implementation is a little more complicated, because it returns a different callback function for processors with SSE instructions and XMM registers. diff --git a/llvm/docs/WritingAnLLVMNewPMPass.md b/llvm/docs/WritingAnLLVMNewPMPass.md index dd094e393d7a3..d430c06b38563 100644 --- a/llvm/docs/WritingAnLLVMNewPMPass.md +++ b/llvm/docs/WritingAnLLVMNewPMPass.md @@ -1,19 +1,19 @@ -==================== -Writing an LLVM Pass -==================== +# Writing an LLVM Pass -.. program:: opt +```{program} opt +``` -.. contents:: - :local: +```{contents} +:local: +``` -Introduction --- What is a pass? -================================ +## Introduction --- What is a pass? -.. warning:: - This document deals with the new pass manager. LLVM uses the legacy pass - manager for the codegen pipeline. For more details, see - :doc:`WritingAnLLVMPass` and :doc:`NewPassManager`. +```{warning} +This document deals with the new pass manager. LLVM uses the legacy pass +manager for the codegen pipeline. For more details, see +{doc}`WritingAnLLVMPass` and {doc}`NewPassManager`. +``` The LLVM pass framework is an important part of the LLVM system, because LLVM passes are where most of the interesting parts of the compiler exist. Passes @@ -24,19 +24,18 @@ are, above all, a structuring technique for compiler code. Unlike passes under the legacy pass manager where the pass interface is defined via inheritance, passes under the new pass manager rely on concept-based polymorphism, meaning there is no explicit interface (see -comments in ``PassManager.h`` for more details). All LLVM passes inherit from -the CRTP mix-in ``OptionalPassInfoMixin`` or -``RequiredPassInfoMixin``. The pass should have a ``run()`` -method which returns a ``PreservedAnalyses`` and takes in some unit of IR +comments in `PassManager.h` for more details). All LLVM passes inherit from +the CRTP mix-in `OptionalPassInfoMixin` or +`RequiredPassInfoMixin`. The pass should have a `run()` +method which returns a `PreservedAnalyses` and takes in some unit of IR along with an analysis manager. For example, a function pass would have a -``PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);`` method. +`PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);` method. We start by showing you how to construct a pass, from setting up the build, creating the pass, to executing and testing it. Looking at existing passes is always a great way to learn details. -Quick Start --- Writing hello world -=================================== +## Quick Start --- Writing hello world Here we describe how to write the "hello world" of passes. The "HelloWorld" pass is designed to simply print out the name of non-external functions that @@ -46,247 +45,240 @@ it just inspects it. The code below already exists; feel free to create a pass with a different name alongside the HelloWorld source files. -.. _writing-an-llvm-npm-pass-build: +(writing-an-llvm-npm-pass-build)= -Setting up the build --------------------- +### Setting up the build -First, configure and build LLVM as described in :doc:`GettingStarted`. +First, configure and build LLVM as described in {doc}`GettingStarted`. Next, we will reuse an existing directory (creating a new directory involves messing around with more CMake files than we want). For this example, we'll use -``llvm/lib/Transforms/Utils/HelloWorld.cpp``, which has already been created. +`llvm/lib/Transforms/Utils/HelloWorld.cpp`, which has already been created. If you'd like to create your own pass, add a new source file into -``llvm/lib/Transforms/Utils/CMakeLists.txt`` (assuming you want your pass in -the ``Transforms/Utils`` directory. +`llvm/lib/Transforms/Utils/CMakeLists.txt` (assuming you want your pass in +the `Transforms/Utils` directory. Now that we have the build set up for a new pass, we need to write the code for the pass itself. -.. _writing-an-llvm-npm-pass-basiccode: +(writing-an-llvm-npm-pass-basiccode)= -Basic code required -------------------- +### Basic code required Now that the build is setup for a new pass, we just have to write it. First we need to define the pass in a header file. We'll create -``llvm/include/llvm/Transforms/Utils/HelloWorld.h``. The file should +`llvm/include/llvm/Transforms/Utils/HelloWorld.h`. The file should contain the following boilerplate: -.. code-block:: c++ +```c++ +#ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +#define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - #ifndef LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - #define LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +#include "llvm/IR/PassManager.h" - #include "llvm/IR/PassManager.h" +namespace llvm { - namespace llvm { +class HelloWorldPass : public OptionalPassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; - class HelloWorldPass : public OptionalPassInfoMixin { - public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - }; +} // namespace llvm - } // namespace llvm +#endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H +``` - #endif // LLVM_TRANSFORMS_HELLONEW_HELLOWORLD_H - -This creates the class for the pass with a declaration of the ``run()`` +This creates the class for the pass with a declaration of the `run()` method which actually runs the pass. Inheriting from -``OptionalPassInfoMixin`` or ``RequiredPassInfoMixin`` sets up +`OptionalPassInfoMixin` or `RequiredPassInfoMixin` sets up some more boilerplate so that we don't have to write it ourselves. -``RequiredPassInfoMixin`` should be used for passes that cannot be skipped -(e.g. ``AlwaysInlinerPass``), while ``OptionalPassInfoMixin`` should be used +`RequiredPassInfoMixin` should be used for passes that cannot be skipped +(e.g. `AlwaysInlinerPass`), while `OptionalPassInfoMixin` should be used for passes that can be skipped (e.g. optimization passes). -Our class is in the ``llvm`` namespace so that we don't pollute the global +Our class is in the `llvm` namespace so that we don't pollute the global namespace. -Next we'll create ``llvm/lib/Transforms/Utils/HelloWorld.cpp``, starting +Next we'll create `llvm/lib/Transforms/Utils/HelloWorld.cpp`, starting with -.. code-block:: c++ - - #include "llvm/Transforms/Utils/HelloWorld.h" +```c++ +#include "llvm/Transforms/Utils/HelloWorld.h" +``` ... to include the header file we just created. -.. code-block:: c++ - - using namespace llvm; +```c++ +using namespace llvm; +``` ... is required because the functions from the include files live in the llvm namespace. This should only be done in non-header files. -Next we have the pass's ``run()`` definition: +Next we have the pass's `run()` definition: -.. code-block:: c++ - - PreservedAnalyses HelloWorldPass::run(Function &F, - FunctionAnalysisManager &AM) { - errs() << F.getName() << "\n"; - return PreservedAnalyses::all(); - } +```c++ +PreservedAnalyses HelloWorldPass::run(Function &F, + FunctionAnalysisManager &AM) { + errs() << F.getName() << "\n"; + return PreservedAnalyses::all(); +} +``` ... which simply prints out the name of the function to stderr. The pass manager will ensure that the pass will be run on every function in a module. -The ``PreservedAnalyses`` return value says that all analyses (e.g. dominator +The `PreservedAnalyses` return value says that all analyses (e.g. dominator tree) are still valid after this pass since we didn't modify any functions. That's it for the pass itself. Now in order to "register" the pass, we need to add it to a couple places. Add the following to -``llvm/lib/Passes/PassRegistry.def`` in the ``FUNCTION_PASS`` section - -.. code-block:: c++ +`llvm/lib/Passes/PassRegistry.def` in the `FUNCTION_PASS` section - FUNCTION_PASS("helloworld", HelloWorldPass()) +```c++ +FUNCTION_PASS("helloworld", HelloWorldPass()) +``` ... which adds the pass under the name "helloworld". -``llvm/lib/Passes/PassRegistry.def`` is #include'd into -``llvm/lib/Passes/PassBuilder.cpp`` multiple times for various reasons. Since +`llvm/lib/Passes/PassRegistry.def` is #include'd into +`llvm/lib/Passes/PassBuilder.cpp` multiple times for various reasons. Since it constructs our pass, we need to also add the proper #include in -``llvm/lib/Passes/PassBuilder.cpp``: - -.. code-block:: c++ +`llvm/lib/Passes/PassBuilder.cpp`: - #include "llvm/Transforms/Utils/HelloWorld.h" +```c++ +#include "llvm/Transforms/Utils/HelloWorld.h" +``` This should be all the code necessary for our pass, now it's time to compile and run it. -Running a pass with ``opt`` ---------------------------- +### Running a pass with `opt` -Now that you have a brand new shiny pass, we can build :program:`opt` and use +Now that you have a brand new shiny pass, we can build {program}`opt` and use it to run some LLVM IR through the pass. -.. code-block:: console +```console +$ ninja -C build/ opt +# or whatever build system/build directory you are using - $ ninja -C build/ opt - # or whatever build system/build directory you are using +$ cat /tmp/a.ll +define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a +} - $ cat /tmp/a.ll - define i32 @foo() { - %a = add i32 2, 3 - ret i32 %a - } +define void @bar() { + ret void +} - define void @bar() { - ret void - } - - $ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld - foo - bar +$ build/bin/opt -disable-output /tmp/a.ll -passes=helloworld +foo +bar +``` Our pass ran and printed the names of functions as expected! -Testing a pass --------------- +### Testing a pass Testing our pass is important to prevent future regressions. We'll add a lit -test at ``llvm/test/Transforms/Utils/helloworld.ll``. See -:doc:`TestingGuide` for more information on testing. - -.. code-block:: llvm - - $ cat llvm/test/Transforms/Utils/helloworld.ll - ; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s +test at `llvm/test/Transforms/Utils/helloworld.ll`. See +{doc}`TestingGuide` for more information on testing. - ; CHECK: {{^}}foo{{$}} - define i32 @foo() { - %a = add i32 2, 3 - ret i32 %a - } +```llvm +$ cat llvm/test/Transforms/Utils/helloworld.ll +; RUN: opt -disable-output -passes=helloworld %s 2>&1 | FileCheck %s - ; CHECK-NEXT: {{^}}bar{{$}} - define void @bar() { - ret void - } +; CHECK: {{^}}foo{{$}} +define i32 @foo() { + %a = add i32 2, 3 + ret i32 %a +} - $ ninja -C build check-llvm - # runs our new test alongside all other llvm lit tests +; CHECK-NEXT: {{^}}bar{{$}} +define void @bar() { + ret void +} -FAQs -==== +$ ninja -C build check-llvm +# runs our new test alongside all other llvm lit tests +``` -Required passes ---------------- +## FAQs -A pass that inherits from ``RequiredPassInfoMixin`` is a required pass. For example: +### Required passes -.. code-block:: c++ +A pass that inherits from `RequiredPassInfoMixin` is a required pass. For example: - class HelloWorldPass : public RequiredPassInfoMixin { - public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); - }; +```c++ +class HelloWorldPass : public RequiredPassInfoMixin { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; +``` A required pass is a pass that may not be skipped. An example of a required -pass is ``AlwaysInlinerPass``, which must always be run to preserve -``alwaysinline`` semantics. Pass managers are required since they may contain +pass is `AlwaysInlinerPass`, which must always be run to preserve +`alwaysinline` semantics. Pass managers are required since they may contain other required passes. -An example of how a pass can be skipped is the ``optnone`` function +An example of how a pass can be skipped is the `optnone` function attribute, which specifies that optimizations should not be run on the -function. Required passes will still be run on ``optnone`` functions. +function. Required passes will still be run on `optnone` functions. For more implementation details, see -``PassInstrumentation::runBeforePass()``. +`PassInstrumentation::runBeforePass()`. -Registering passes as plugins ------------------------------ +### Registering passes as plugins LLVM provides a mechanism to register pass plugins within various tools like -``clang`` or ``opt``. A pass plugin can add passes to default optimization -pipelines or to be manually run via tools like ``opt``. For more information, -see :doc:`NewPassManager`. +`clang` or `opt`. A pass plugin can add passes to default optimization +pipelines or to be manually run via tools like `opt`. For more information, +see {doc}`NewPassManager`. Create a CMake project at the root of the repo alongside other projects. This project must contain the following minimal -``CMakeLists.txt``: +`CMakeLists.txt`: -.. code-block:: cmake +```cmake +add_llvm_pass_plugin(MyPassName source.cpp) +``` - add_llvm_pass_plugin(MyPassName source.cpp) - -See the definition of ``add_llvm_pass_plugin`` for more CMake details. +See the definition of `add_llvm_pass_plugin` for more CMake details. The pass must provide at least one of two entry points for the new pass manager, one for static registration and one for dynamically loaded plugins: -- ``llvm::PassPluginLibraryInfo get##Name##PluginInfo();`` -- ``extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;`` +- `llvm::PassPluginLibraryInfo get##Name##PluginInfo();` +- `extern "C" ::llvm::PassPluginLibraryInfo llvmGetPassPluginInfo() LLVM_ATTRIBUTE_WEAK;` Pass plugins are compiled and linked dynamically by default. Setting -``LLVM_${NAME}_LINK_INTO_TOOLS`` to ``ON`` turns the project into a statically +`LLVM_${NAME}_LINK_INTO_TOOLS` to `ON` turns the project into a statically linked extension. -For an in-tree example, see ``llvm/examples/Bye/``. - -To make ``PassBuilder`` aware of statically linked pass plugins: - -.. code-block:: c++ +For an in-tree example, see `llvm/examples/Bye/`. - // Declare plugin extension function declarations. - #define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); - #include "llvm/Support/Extension.def" +To make `PassBuilder` aware of statically linked pass plugins: - ... +```c++ +// Declare plugin extension function declarations. +#define HANDLE_EXTENSION(Ext) llvm::PassPluginLibraryInfo get##Ext##PluginInfo(); +#include "llvm/Support/Extension.def" - // Register plugin extensions in PassBuilder. - #define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); - #include "llvm/Support/Extension.def" +... -To make ``PassBuilder`` aware of dynamically linked pass plugins: +// Register plugin extensions in PassBuilder. +#define HANDLE_EXTENSION(Ext) get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB); +#include "llvm/Support/Extension.def" +``` -.. code-block:: c++ +To make `PassBuilder` aware of dynamically linked pass plugins: - // Load plugin dynamically. - auto Plugin = PassPlugin::Load(PathToPlugin); - if (!Plugin) - report_error(); - // Register plugin extensions in PassBuilder. - Plugin.registerPassBuilderCallbacks(PB); +```c++ +// Load plugin dynamically. +auto Plugin = PassPlugin::Load(PathToPlugin); +if (!Plugin) + report_error(); +// Register plugin extensions in PassBuilder. +Plugin.registerPassBuilderCallbacks(PB); +``` diff --git a/llvm/docs/WritingAnLLVMPass.md b/llvm/docs/WritingAnLLVMPass.md index 216e693087d96..91f81a3e0c048 100644 --- a/llvm/docs/WritingAnLLVMPass.md +++ b/llvm/docs/WritingAnLLVMPass.md @@ -1,21 +1,21 @@ -======================================== -Writing an LLVM Pass (legacy PM version) -======================================== +# Writing an LLVM Pass (legacy PM version) -.. program:: opt +```{program} opt +``` -.. contents:: - :local: +```{contents} +:local: +``` -Introduction --- What is a pass? -================================ +## Introduction --- What is a pass? -.. warning:: - This document deals with the legacy pass manager. LLVM uses the new pass - manager for the optimization pipeline (the codegen pipeline - still uses the legacy pass manager), which has its own way of defining - passes. For more details, see :doc:`WritingAnLLVMNewPMPass` and - :doc:`NewPassManager`. +```{warning} +This document deals with the legacy pass manager. LLVM uses the new pass +manager for the optimization pipeline (the codegen pipeline +still uses the legacy pass manager), which has its own way of defining +passes. For more details, see {doc}`WritingAnLLVMNewPMPass` and +{doc}`NewPassManager`. +``` The LLVM Pass Framework is an important part of the LLVM system, because LLVM passes are where most of the interesting parts of the compiler exist. Passes @@ -23,40 +23,35 @@ perform the transformations and optimizations that make up the compiler, they build the analysis results that are used by these transformations, and they are, above all, a structuring technique for compiler code. -All LLVM passes are subclasses of the `Pass -`_ class, which implement -functionality by overriding virtual methods inherited from ``Pass``. Depending -on how your pass works, you should inherit from the :ref:`ModulePass -` , :ref:`CallGraphSCCPass -`, :ref:`FunctionPass -` , or :ref:`LoopPass -`, or :ref:`RegionPass -` classes, which gives the system more +All LLVM passes are subclasses of the [Pass](doxygen:classllvm_1_1Pass.html) class, which implement +functionality by overriding virtual methods inherited from `Pass`. Depending +on how your pass works, you should inherit from the +{ref}`ModulePass ` , {ref}`CallGraphSCCPass `, +{ref}`FunctionPass ` , or {ref}`LoopPass `, or +{ref}`RegionPass ` classes, which gives the system more information about what your pass does, and how it can be combined with other passes. One of the main features of the LLVM Pass Framework is that it schedules passes to run in an efficient way based on the constraints that your pass meets (which are indicated by which class they derive from). -.. _writing-an-llvm-pass-pass-classes: +(writing-an-llvm-pass-pass-classes)= -Pass classes and requirements -============================= +## Pass classes and requirements One of the first things that you should do when designing a new pass is to decide what class you should subclass for your pass. Here we talk about the classes available, from the most general to the most specific. -When choosing a superclass for your ``Pass``, you should choose the **most +When choosing a superclass for your `Pass`, you should choose the **most specific** class possible, while still being able to meet the requirements listed. This gives the LLVM Pass Infrastructure information necessary to optimize how passes are run, so that the resultant compiler isn't unnecessarily slow. -The ``ImmutablePass`` class ---------------------------- +### The `ImmutablePass` class -The most plain and boring type of pass is the "`ImmutablePass -`_" class. This pass +The most plain and boring type of pass is the +"[ImmutablePass](doxygen:classllvm_1_1ImmutablePass.html)" class. This pass type is used for passes that do not have to be run, do not change state, and never need to be updated. This is not a normal type of transformation or analysis, but can provide information about the current compiler configuration. @@ -65,413 +60,388 @@ Although this pass class is very infrequently used, it is important for providing information about the current target machine being compiled for, and other static information that can affect the various transformations. -``ImmutablePass``\ es never invalidate other transformations, are never +`ImmutablePass`es never invalidate other transformations, are never invalidated, and are never "run". -.. _writing-an-llvm-pass-ModulePass: +(writing-an-llvm-pass-ModulePass)= -The ``ModulePass`` class ------------------------- +### The `ModulePass` class -The `ModulePass `_ class +The [ModulePass](doxygen:classllvm_1_1ModulePass.html) class is the most general of all superclasses that you can use. Deriving from -``ModulePass`` indicates that your pass uses the entire program as a unit, +`ModulePass` indicates that your pass uses the entire program as a unit, referring to function bodies in no predictable order, or adding and removing -functions. Because nothing is known about the behavior of ``ModulePass`` +functions. Because nothing is known about the behavior of `ModulePass` subclasses, no optimization can be done for their execution. A module pass can use function level passes (e.g. dominators) using the -``getAnalysis`` interface ``getAnalysis(llvm::Function *)`` to +`getAnalysis` interface `getAnalysis(llvm::Function *)` to provide the function to retrieve analysis result for, if the function pass does not require any module or immutable passes. Note that this can only be done for functions for which the analysis ran, e.g. in the case of dominators you -should only ask for the ``DominatorTree`` for function definitions, not +should only ask for the `DominatorTree` for function definitions, not declarations. -To write a correct ``ModulePass`` subclass, derive from ``ModulePass`` and -override the ``runOnModule`` method with the following signature: +To write a correct `ModulePass` subclass, derive from `ModulePass` and +override the `runOnModule` method with the following signature: -The ``runOnModule`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `runOnModule` method -.. code-block:: c++ +```cpp +virtual bool runOnModule(Module &M) = 0; +``` - virtual bool runOnModule(Module &M) = 0; +The `runOnModule` method performs the interesting work of the pass. It +should return `true` if the module was modified by the transformation and +`false` otherwise. -The ``runOnModule`` method performs the interesting work of the pass. It -should return ``true`` if the module was modified by the transformation and -``false`` otherwise. +(writing-an-llvm-pass-CallGraphSCCPass)= -.. _writing-an-llvm-pass-CallGraphSCCPass: +### The `CallGraphSCCPass` class -The ``CallGraphSCCPass`` class ------------------------------- - -The `CallGraphSCCPass -`_ is used by +The [CallGraphSCCPass](doxygen:classllvm_1_1CallGraphSCCPass.html) is used by passes that need to traverse the program bottom-up on the call graph (callees -before callers). Deriving from ``CallGraphSCCPass`` provides some mechanics -for building and traversing the ``CallGraph``, but also allows the system to -optimize execution of ``CallGraphSCCPass``\ es. If your pass meets the +before callers). Deriving from `CallGraphSCCPass` provides some mechanics +for building and traversing the `CallGraph`, but also allows the system to +optimize execution of `CallGraphSCCPass`es. If your pass meets the requirements outlined below, and doesn't meet the requirements of a -:ref:`FunctionPass `, you should derive from -``CallGraphSCCPass``. +{ref}`FunctionPass `, you should derive from +`CallGraphSCCPass`. -``TODO``: explain briefly what SCC, Tarjan's algo, and B-U mean. +`TODO`: explain briefly what SCC, Tarjan's algo, and B-U mean. To be explicit, CallGraphSCCPass subclasses are: -#. ... *not allowed* to inspect or modify any ``Function``\ s other than those +1. ... *not allowed* to inspect or modify any `Function`s other than those in the current SCC and the direct callers and direct callees of the SCC. -#. ... *required* to preserve the current ``CallGraph`` object, updating it to +1. ... *required* to preserve the current `CallGraph` object, updating it to reflect any changes made to the program. -#. ... *not allowed* to add or remove SCC's from the current Module, though +1. ... *not allowed* to add or remove SCC's from the current Module, though they may change the contents of an SCC. -#. ... *allowed* to add or remove global variables from the current Module. -#. ... *allowed* to maintain state across invocations of :ref:`runOnSCC - ` (including global data). +1. ... *allowed* to add or remove global variables from the current Module. +1. ... *allowed* to maintain state across invocations of + {ref}`runOnSCC ` (including global data). -Implementing a ``CallGraphSCCPass`` is slightly tricky in some cases because it +Implementing a `CallGraphSCCPass` is slightly tricky in some cases because it has to handle SCCs with more than one node in it. All of the virtual methods -described below should return ``true`` if they modified the program, or -``false`` if they didn't. - -The ``doInitialization(CallGraph &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +described below should return `true` if they modified the program, or +`false` if they didn't. -.. code-block:: c++ +#### The `doInitialization(CallGraph &)` method - virtual bool doInitialization(CallGraph &CG); +```cpp +virtual bool doInitialization(CallGraph &CG); +``` -The ``doInitialization`` method is allowed to do most of the things that -``CallGraphSCCPass``\ es are not allowed to do. They can add and remove -functions, get pointers to functions, etc. The ``doInitialization`` method is +The `doInitialization` method is allowed to do most of the things that +`CallGraphSCCPass`es are not allowed to do. They can add and remove +functions, get pointers to functions, etc. The `doInitialization` method is designed to do simple initialization type of stuff that does not depend on the -SCCs being processed. The ``doInitialization`` method call is not scheduled to +SCCs being processed. The `doInitialization` method call is not scheduled to overlap with any other pass executions (thus it should be very fast). -.. _writing-an-llvm-pass-runOnSCC: - -The ``runOnSCC`` method -^^^^^^^^^^^^^^^^^^^^^^^ +(writing-an-llvm-pass-runOnSCC)= -.. code-block:: c++ +#### The `runOnSCC` method - virtual bool runOnSCC(CallGraphSCC &SCC) = 0; +```cpp +virtual bool runOnSCC(CallGraphSCC &SCC) = 0; +``` -The ``runOnSCC`` method performs the interesting work of the pass, and should -return ``true`` if the module was modified by the transformation, ``false`` +The `runOnSCC` method performs the interesting work of the pass, and should +return `true` if the module was modified by the transformation, `false` otherwise. -The ``doFinalization(CallGraph &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `doFinalization(CallGraph &)` method -.. code-block:: c++ +```cpp +virtual bool doFinalization(CallGraph &CG); +``` - virtual bool doFinalization(CallGraph &CG); +The `doFinalization` method is an infrequently used method that is called +when the pass framework has finished calling +{ref}`runOnSCC ` for every SCC in the program being compiled. -The ``doFinalization`` method is an infrequently used method that is called -when the pass framework has finished calling :ref:`runOnSCC -` for every SCC in the program being compiled. +(writing-an-llvm-pass-FunctionPass)= -.. _writing-an-llvm-pass-FunctionPass: +### The `FunctionPass` class -The ``FunctionPass`` class --------------------------- - -In contrast to ``ModulePass`` subclasses, `FunctionPass -`_ subclasses do have a +In contrast to `ModulePass` subclasses, +[FunctionPass](doxygen:classllvm_1_1Pass.html) subclasses do have a predictable, local behavior that can be expected by the system. All -``FunctionPass`` execute on each function in the program independent of all of -the other functions in the program. ``FunctionPass``\ es do not require that -they are executed in a particular order, and ``FunctionPass``\ es do not modify +`FunctionPass` execute on each function in the program independent of all of +the other functions in the program. `FunctionPass`es do not require that +they are executed in a particular order, and `FunctionPass`es do not modify external functions. -To be explicit, ``FunctionPass`` subclasses are not allowed to: +To be explicit, `FunctionPass` subclasses are not allowed to: -#. Inspect or modify a ``Function`` other than the one currently being processed. -#. Add or remove ``Function``\ s from the current ``Module``. -#. Add or remove global variables from the current ``Module``. -#. Maintain state across invocations of :ref:`runOnFunction - ` (including global data). +1. Inspect or modify a `Function` other than the one currently being processed. +1. Add or remove `Function`s from the current `Module`. +1. Add or remove global variables from the current `Module`. +1. Maintain state across invocations of + {ref}`runOnFunction ` (including global data). -Implementing a ``FunctionPass`` is usually straightforward. ``FunctionPass``\ -es may override three virtual methods to do their work. All of these methods -should return ``true`` if they modified the program, or ``false`` if they +Implementing a `FunctionPass` is usually straightforward. `FunctionPass`es may +override three virtual methods to do their work. All of these methods +should return `true` if they modified the program, or `false` if they didn't. -.. _writing-an-llvm-pass-doInitialization-mod: - -The ``doInitialization(Module &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(writing-an-llvm-pass-doInitialization-mod)= -.. code-block:: c++ +#### The `doInitialization(Module &)` method - virtual bool doInitialization(Module &M); +```cpp +virtual bool doInitialization(Module &M); +``` -The ``doInitialization`` method is allowed to do most of the things that -``FunctionPass``\ es are not allowed to do. They can add and remove functions, -get pointers to functions, etc. The ``doInitialization`` method is designed to +The `doInitialization` method is allowed to do most of the things that +`FunctionPass`es are not allowed to do. They can add and remove functions, +get pointers to functions, etc. The `doInitialization` method is designed to do simple initialization type of stuff that does not depend on the functions -being processed. The ``doInitialization`` method call is not scheduled to +being processed. The `doInitialization` method call is not scheduled to overlap with any other pass executions (thus it should be very fast). -A good example of how this method should be used is the `LowerAllocations -`_ pass. This pass -converts ``malloc`` and ``free`` instructions into platform dependent -``malloc()`` and ``free()`` function calls. It uses the ``doInitialization`` -method to get a reference to the ``malloc`` and ``free`` functions that it +A good example of how this method should be used is the +[LowerAllocations](doxygen:LowerAllocations_8cpp-source.html) pass. This pass +converts `malloc` and `free` instructions into platform dependent +`malloc()` and `free()` function calls. It uses the `doInitialization` +method to get a reference to the `malloc` and `free` functions that it needs, adding prototypes to the module if necessary. -.. _writing-an-llvm-pass-runOnFunction: +(writing-an-llvm-pass-runOnFunction)= -The ``runOnFunction`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `runOnFunction` method -.. code-block:: c++ +```cpp +virtual bool runOnFunction(Function &F) = 0; +``` - virtual bool runOnFunction(Function &F) = 0; - -The ``runOnFunction`` method must be implemented by your subclass to do the -transformation or analysis work of your pass. As usual, a ``true`` value +The `runOnFunction` method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a `true` value should be returned if the function is modified. -.. _writing-an-llvm-pass-doFinalization-mod: - -The ``doFinalization(Module &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +(writing-an-llvm-pass-doFinalization-mod)= -.. code-block:: c++ +#### The `doFinalization(Module &)` method - virtual bool doFinalization(Module &M); +```cpp +virtual bool doFinalization(Module &M); +``` -The ``doFinalization`` method is an infrequently used method that is called -when the pass framework has finished calling :ref:`runOnFunction -` for every function in the program being +The `doFinalization` method is an infrequently used method that is called +when the pass framework has finished calling +{ref}`runOnFunction ` for every function in the program being compiled. -.. _writing-an-llvm-pass-LoopPass: +(writing-an-llvm-pass-LoopPass)= -The ``LoopPass`` class ----------------------- +### The `LoopPass` class -All ``LoopPass`` execute on each :ref:`loop ` in the function -independent of all of the other loops in the function. ``LoopPass`` processes +All `LoopPass` execute on each {ref}`loop ` in the function +independent of all of the other loops in the function. `LoopPass` processes loops in loop nest order such that outer most loop is processed last. -``LoopPass`` subclasses are allowed to update loop nest using ``LPPassManager`` +`LoopPass` subclasses are allowed to update loop nest using `LPPassManager` interface. Implementing a loop pass is usually straightforward. -``LoopPass``\ es may override three virtual methods to do their work. All -these methods should return ``true`` if they modified the program, or ``false`` +`LoopPass`es may override three virtual methods to do their work. All +these methods should return `true` if they modified the program, or `false` if they didn't. -A ``LoopPass`` subclass which is intended to run as part of the main loop pass +A `LoopPass` subclass which is intended to run as part of the main loop pass pipeline needs to preserve all of the same *function* analyses that the other loop passes in its pipeline require. To make that easier, -a ``getLoopAnalysisUsage`` function is provided by ``LoopUtils.h``. It can be -called within the subclass's ``getAnalysisUsage`` override to get consistent -and correct behavior. Analogously, ``INITIALIZE_PASS_DEPENDENCY(LoopPass)`` +a `getLoopAnalysisUsage` function is provided by `LoopUtils.h`. It can be +called within the subclass's `getAnalysisUsage` override to get consistent +and correct behavior. Analogously, `INITIALIZE_PASS_DEPENDENCY(LoopPass)` will initialize this set of function analyses. -The ``doInitialization(Loop *, LPPassManager &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: c++ +#### The `doInitialization(Loop *, LPPassManager &)` method - virtual bool doInitialization(Loop *, LPPassManager &LPM); +```cpp +virtual bool doInitialization(Loop *, LPPassManager &LPM); +``` -The ``doInitialization`` method is designed to do simple initialization type of +The `doInitialization` method is designed to do simple initialization type of stuff that does not depend on the functions being processed. The -``doInitialization`` method call is not scheduled to overlap with any other -pass executions (thus it should be very fast). ``LPPassManager`` interface -should be used to access ``Function`` or ``Module`` level analysis information. +`doInitialization` method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). `LPPassManager` interface +should be used to access `Function` or `Module` level analysis information. -.. _writing-an-llvm-pass-runOnLoop: +(writing-an-llvm-pass-runOnLoop)= -The ``runOnLoop`` method -^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `runOnLoop` method -.. code-block:: c++ +```cpp +virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0; +``` - virtual bool runOnLoop(Loop *, LPPassManager &LPM) = 0; - -The ``runOnLoop`` method must be implemented by your subclass to do the -transformation or analysis work of your pass. As usual, a ``true`` value -should be returned if the function is modified. ``LPPassManager`` interface +The `runOnLoop` method must be implemented by your subclass to do the +transformation or analysis work of your pass. As usual, a `true` value +should be returned if the function is modified. `LPPassManager` interface should be used to update loop nest. -The ``doFinalization()`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: c++ +#### The `doFinalization()` method - virtual bool doFinalization(); +```cpp +virtual bool doFinalization(); +``` -The ``doFinalization`` method is an infrequently used method that is called -when the pass framework has finished calling :ref:`runOnLoop -` for every loop in the program being compiled. +The `doFinalization` method is an infrequently used method that is called +when the pass framework has finished calling +{ref}`runOnLoop ` for every loop in the program being compiled. -.. _writing-an-llvm-pass-RegionPass: +(writing-an-llvm-pass-RegionPass)= -The ``RegionPass`` class ------------------------- +### The `RegionPass` class -``RegionPass`` is similar to :ref:`LoopPass `, +`RegionPass` is similar to {ref}`LoopPass `, but executes on each single entry single exit region in the function. -``RegionPass`` processes regions in nested order such that the outer most +`RegionPass` processes regions in nested order such that the outer most region is processed last. -``RegionPass`` subclasses are allowed to update the region tree by using the -``RGPassManager`` interface. You may override three virtual methods of -``RegionPass`` to implement your own region pass. All these methods should -return ``true`` if they modified the program, or ``false`` if they did not. +`RegionPass` subclasses are allowed to update the region tree by using the +`RGPassManager` interface. You may override three virtual methods of +`RegionPass` to implement your own region pass. All these methods should +return `true` if they modified the program, or `false` if they did not. -The ``doInitialization(Region *, RGPassManager &)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `doInitialization(Region *, RGPassManager &)` method -.. code-block:: c++ +```cpp +virtual bool doInitialization(Region *, RGPassManager &RGM); +``` - virtual bool doInitialization(Region *, RGPassManager &RGM); - -The ``doInitialization`` method is designed to do simple initialization type of +The `doInitialization` method is designed to do simple initialization type of stuff that does not depend on the functions being processed. The -``doInitialization`` method call is not scheduled to overlap with any other -pass executions (thus it should be very fast). ``RPPassManager`` interface -should be used to access ``Function`` or ``Module`` level analysis information. - -.. _writing-an-llvm-pass-runOnRegion: +`doInitialization` method call is not scheduled to overlap with any other +pass executions (thus it should be very fast). `RPPassManager` interface +should be used to access `Function` or `Module` level analysis information. -The ``runOnRegion`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^ +(writing-an-llvm-pass-runOnRegion)= -.. code-block:: c++ +#### The `runOnRegion` method - virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0; +```cpp +virtual bool runOnRegion(Region *, RGPassManager &RGM) = 0; +``` -The ``runOnRegion`` method must be implemented by your subclass to do the +The `runOnRegion` method must be implemented by your subclass to do the transformation or analysis work of your pass. As usual, a true value should be -returned if the region is modified. ``RGPassManager`` interface should be used to +returned if the region is modified. `RGPassManager` interface should be used to update region tree. -The ``doFinalization()`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: c++ +#### The `doFinalization()` method - virtual bool doFinalization(); +```cpp +virtual bool doFinalization(); +``` -The ``doFinalization`` method is an infrequently used method that is called -when the pass framework has finished calling :ref:`runOnRegion -` for every region in the program being +The `doFinalization` method is an infrequently used method that is called +when the pass framework has finished calling +{ref}`runOnRegion ` for every region in the program being compiled. -The ``MachineFunctionPass`` class ---------------------------------- +### The `MachineFunctionPass` class -A ``MachineFunctionPass`` is a part of the LLVM code generator that executes on +A `MachineFunctionPass` is a part of the LLVM code generator that executes on the machine-dependent representation of each LLVM function in the program. Code generator passes are registered and initialized specially by -``TargetMachine::addPassesToEmitFile`` and similar routines, so they cannot -generally be run from the :program:`opt` or :program:`bugpoint` commands. +`TargetMachine::addPassesToEmitFile` and similar routines, so they cannot +generally be run from the {program}`opt` or {program}`bugpoint` commands. -A ``MachineFunctionPass`` is also a ``FunctionPass``, so all the restrictions -that apply to a ``FunctionPass`` also apply to it. ``MachineFunctionPass``\ es -also have additional restrictions. In particular, ``MachineFunctionPass``\ es +A `MachineFunctionPass` is also a `FunctionPass`, so all the restrictions +that apply to a `FunctionPass` also apply to it. `MachineFunctionPass`es +also have additional restrictions. In particular, `MachineFunctionPass`es are not allowed to do any of the following: -#. Modify or create any LLVM IR ``Instruction``\ s, ``BasicBlock``\ s, - ``Argument``\ s, ``Function``\ s, ``GlobalVariable``\ s, - ``GlobalAlias``\ es, or ``Module``\ s. -#. Modify a ``MachineFunction`` other than the one currently being processed. -#. Maintain state across invocations of :ref:`runOnMachineFunction - ` (including global data). +1. Modify or create any LLVM IR `Instruction`s, `BasicBlock`s, + `Argument`s, `Function`s, `GlobalVariable`s, + `GlobalAlias`es, or `Module`s. +1. Modify a `MachineFunction` other than the one currently being processed. +1. Maintain state across invocations of + {ref}`runOnMachineFunction ` (including global data). -.. _writing-an-llvm-pass-runOnMachineFunction: +(writing-an-llvm-pass-runOnMachineFunction)= -The ``runOnMachineFunction(MachineFunction &MF)`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `runOnMachineFunction(MachineFunction &MF)` method -.. code-block:: c++ +```cpp +virtual bool runOnMachineFunction(MachineFunction &MF) = 0; +``` - virtual bool runOnMachineFunction(MachineFunction &MF) = 0; +`runOnMachineFunction` can be considered the main entry point of a +`MachineFunctionPass`; that is, you should override this method to do the +work of your `MachineFunctionPass`. -``runOnMachineFunction`` can be considered the main entry point of a -``MachineFunctionPass``; that is, you should override this method to do the -work of your ``MachineFunctionPass``. - -The ``runOnMachineFunction`` method is called on every ``MachineFunction`` in a -``Module``, so that the ``MachineFunctionPass`` may perform optimizations on +The `runOnMachineFunction` method is called on every `MachineFunction` in a +`Module`, so that the `MachineFunctionPass` may perform optimizations on the machine-dependent representation of the function. If you want to get at -the LLVM ``Function`` for the ``MachineFunction`` you're working on, use -``MachineFunction``'s ``getFunction()`` accessor method --- but remember, you -may not modify the LLVM ``Function`` or its contents from a -``MachineFunctionPass``. +the LLVM `Function` for the `MachineFunction` you're working on, use +`MachineFunction`'s `getFunction()` accessor method --- but remember, you +may not modify the LLVM `Function` or its contents from a +`MachineFunctionPass`. -.. _writing-an-llvm-pass-registration: +(writing-an-llvm-pass-registration)= -Pass registration ------------------ +### Pass registration -Passes are registered with the ``RegisterPass`` template. The template +Passes are registered with the `RegisterPass` template. The template parameter is the name of the pass that is to be used on the command line to specify that the pass should be added to a program. The first argument is the -name of the pass, which is to be used for the :option:`-help` output of +name of the pass, which is to be used for the {option}`-help` output of programs, as well as for debug output generated by the `--debug-pass` option. If you want your pass to be easily dumpable, you should implement the virtual print method: -The ``print`` method -^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: c++ +#### The `print` method - virtual void print(llvm::raw_ostream &O, const Module *M) const; +```cpp +virtual void print(llvm::raw_ostream &O, const Module *M) const; +``` -The ``print`` method must be implemented by "analyses" in order to print a +The `print` method must be implemented by "analyses" in order to print a human-readable version of the analysis results. This is useful for debugging an analysis itself, as well as for other people to figure out how an analysis -works. Use the opt ``-analyze`` argument to invoke this method. +works. Use the opt `-analyze` argument to invoke this method. -The ``llvm::raw_ostream`` parameter specifies the stream to write the results -on, and the ``Module`` parameter gives a pointer to the top level module of the -program that has been analyzed. Note however that this pointer may be ``NULL`` -in certain circumstances (such as calling the ``Pass::dump()`` from a +The `llvm::raw_ostream` parameter specifies the stream to write the results +on, and the `Module` parameter gives a pointer to the top level module of the +program that has been analyzed. Note however that this pointer may be `NULL` +in certain circumstances (such as calling the `Pass::dump()` from a debugger), so it should only be used to enhance debug output, it should not be depended on. -Scheduling a MachineFunctionPass -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Scheduling a MachineFunctionPass -Backends create a ``TargetPassConfig`` and use ``addPass`` to schedule -``MachineFunctionPass``\ es. External plugins can register a callback to modify +Backends create a `TargetPassConfig` and use `addPass` to schedule +`MachineFunctionPass`es. External plugins can register a callback to modify and insert additional passes: -.. code-block:: c++ - - RegisterTargetPassConfigCallback X{[](auto &TM, auto &PM, auto *TPC) { - TPC->insertPass(/* ... */); - TPC->substitutePass(/* ... */); - }}; +```cpp +RegisterTargetPassConfigCallback X{[](auto &TM, auto &PM, auto *TPC) { + TPC->insertPass(/* ... */); + TPC->substitutePass(/* ... */); +}}; +``` Note that passes still have to be registered: -.. code-block:: c++ - - __attribute__((constructor)) static void initCodeGenPlugin() { - initializeExamplePass(*PassRegistry::getPassRegistry()); - } +```cpp +__attribute__((constructor)) static void initCodeGenPlugin() { + initializeExamplePass(*PassRegistry::getPassRegistry()); +} +``` -.. _writing-an-llvm-pass-interaction: +(writing-an-llvm-pass-interaction)= -Specifying interactions between passes --------------------------------------- +### Specifying interactions between passes -One of the main responsibilities of the ``PassManager`` is to make sure that -passes interact with each other correctly. Because ``PassManager`` tries to -:ref:`optimize the execution of passes ` it +One of the main responsibilities of the `PassManager` is to make sure that +passes interact with each other correctly. Because `PassManager` tries to +{ref}`optimize the execution of passes ` it must know how the passes interact with each other and what dependencies exist between the various passes. To track this, each pass can declare the set of passes that are required to be executed before the current pass, and the passes @@ -480,47 +450,43 @@ which are invalidated by the current pass. Typically this functionality is used to require that analysis results are computed before your pass is run. Running arbitrary transformation passes can invalidate the computed analysis results, which is what the invalidation set -specifies. If a pass does not implement the :ref:`getAnalysisUsage -` method, it defaults to not having any +specifies. If a pass does not implement the +{ref}`getAnalysisUsage ` method, it defaults to not having any prerequisite passes, and invalidating **all** other passes. -.. _writing-an-llvm-pass-getAnalysisUsage: +(writing-an-llvm-pass-getAnalysisUsage)= -The ``getAnalysisUsage`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `getAnalysisUsage` method -.. code-block:: c++ +```cpp +virtual void getAnalysisUsage(AnalysisUsage &Info) const; +``` - virtual void getAnalysisUsage(AnalysisUsage &Info) const; - -By implementing the ``getAnalysisUsage`` method, the required and invalidated +By implementing the `getAnalysisUsage` method, the required and invalidated sets may be specified for your transformation. The implementation should fill -in the `AnalysisUsage -`_ object with +in the [AnalysisUsage](doxygen:classllvm_1_1AnalysisUsage.html) object with information about which passes are required and not invalidated. To do this, a -pass may call any of the following methods on the ``AnalysisUsage`` object: +pass may call any of the following methods on the `AnalysisUsage` object: -The ``AnalysisUsage::addRequired<>`` and ``AnalysisUsage::addRequiredTransitive<>`` methods -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `AnalysisUsage::addRequired<>` and `AnalysisUsage::addRequiredTransitive<>` methods If your pass requires a previous pass to be executed (an analysis for example), it can use one of these methods to arrange for it to be run before your pass. LLVM has many different types of analyses and passes that can be required, -spanning the range from ``DominatorSet`` to ``BreakCriticalEdges``. Requiring -``BreakCriticalEdges``, for example, guarantees that there will be no critical +spanning the range from `DominatorSet` to `BreakCriticalEdges`. Requiring +`BreakCriticalEdges`, for example, guarantees that there will be no critical edges in the CFG when your pass has been run. Some analyses chain to other analyses to do their job. For example, an -`AliasAnalysis `_ implementation is required to :ref:`chain -` to other alias analysis passes. In cases where -analyses chain, the ``addRequiredTransitive`` method should be used instead of -the ``addRequired`` method. This informs the ``PassManager`` that the +{doc}`AliasAnalysis ` implementation is required to +{ref}`chain ` to other alias analysis passes. In cases where +analyses chain, the `addRequiredTransitive` method should be used instead of +the `addRequired` method. This informs the `PassManager` that the transitively required pass should be alive as long as the requiring pass is. -The ``AnalysisUsage::addPreserved<>`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `AnalysisUsage::addPreserved<>` method -One of the jobs of the ``PassManager`` is to optimize how and when analyses are +One of the jobs of the `PassManager` is to optimize how and when analyses are run. In particular, it attempts to avoid recomputing data unless it needs to. For this reason, passes are allowed to declare that they preserve (i.e., they don't invalidate) an existing analysis if it's available. For example, a @@ -528,187 +494,180 @@ simple constant folding pass would not modify the CFG, so it can't possibly affect the results of dominator analysis. By default, all passes are assumed to invalidate all others. -The ``AnalysisUsage`` class provides several methods which are useful in -certain circumstances that are related to ``addPreserved``. In particular, the -``setPreservesAll`` method can be called to indicate that the pass does not +The `AnalysisUsage` class provides several methods which are useful in +certain circumstances that are related to `addPreserved`. In particular, the +`setPreservesAll` method can be called to indicate that the pass does not modify the LLVM program at all (which is true for analyses), and the -``setPreservesCFG`` method can be used by transformations that change +`setPreservesCFG` method can be used by transformations that change instructions in the program but do not modify the CFG or terminator instructions. -``addPreserved`` is particularly useful for transformations like -``BreakCriticalEdges``. This pass knows how to update a small set of loop and +`addPreserved` is particularly useful for transformations like +`BreakCriticalEdges`. This pass knows how to update a small set of loop and dominator related analyses if they exist, so it can preserve them, despite the fact that it hacks on the CFG. -Example implementations of ``getAnalysisUsage`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -.. code-block:: c++ +#### Example implementations of `getAnalysisUsage` - // This example modifies the program, but does not modify the CFG - void LICM::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - } +```cpp +// This example modifies the program, but does not modify the CFG +void LICM::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addRequired(); +} +``` -.. _writing-an-llvm-pass-getAnalysis: +(writing-an-llvm-pass-getAnalysis)= -The ``getAnalysis<>`` and ``getAnalysisIfAvailable<>`` methods -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `getAnalysis<>` and `getAnalysisIfAvailable<>` methods -The ``Pass::getAnalysis<>`` method is automatically inherited by your class, +The `Pass::getAnalysis<>` method is automatically inherited by your class, providing you with access to the passes that you declared that you required -with the :ref:`getAnalysisUsage ` +with the {ref}`getAnalysisUsage ` method. It takes a single template argument that specifies which pass class you want, and returns a reference to that pass. For example: -.. code-block:: c++ - - bool LICM::runOnFunction(Function &F) { - LoopInfo &LI = getAnalysis().getLoopInfo(); - //... - } +```cpp +bool LICM::runOnFunction(Function &F) { + LoopInfo &LI = getAnalysis().getLoopInfo(); + //... +} +``` This method call returns a reference to the pass desired. You may get a runtime assertion failure if you attempt to get an analysis that you did not -declare as required in your :ref:`getAnalysisUsage -` implementation. This method can be -called by your ``run*`` method implementation, or by any other local method -invoked by your ``run*`` method. +declare as required in your +{ref}`getAnalysisUsage ` implementation. This method can be +called by your `run*` method implementation, or by any other local method +invoked by your `run*` method. A module level pass can use function level analysis info using this interface. For example: -.. code-block:: c++ - - bool ModuleLevelPass::runOnModule(Module &M) { - //... - DominatorTree &DT = getAnalysis(Func); - //... - } +```cpp +bool ModuleLevelPass::runOnModule(Module &M) { + //... + DominatorTree &DT = getAnalysis(Func); + //... +} +``` -In above example, ``runOnFunction`` for ``DominatorTree`` is called by pass +In above example, `runOnFunction` for `DominatorTree` is called by pass manager before returning a reference to the desired pass. If your pass is capable of updating analyses if they exist (e.g., -``BreakCriticalEdges``, as described above), you can use the -``getAnalysisIfAvailable`` method, which returns a pointer to the analysis if +`BreakCriticalEdges`, as described above), you can use the +`getAnalysisIfAvailable` method, which returns a pointer to the analysis if it is active. For example: -.. code-block:: c++ - - if (DominatorSet *DS = getAnalysisIfAvailable()) { - // A DominatorSet is active. This code will update it. - } +```cpp +if (DominatorSet *DS = getAnalysisIfAvailable()) { + // A DominatorSet is active. This code will update it. +} +``` -Pass Statistics -=============== +## Pass Statistics -The `Statistic `_ class is +The [Statistic](doxygen:Statistic_8h_source.html) class is designed to be an easy way to expose various success metrics from passes. -These statistics are printed at the end of a run, when the :option:`-stats` -command line option is enabled on the command line. See the :ref:`Statistics -section ` in the Programmer's Manual for details. +These statistics are printed at the end of a run, when the {option}`-stats` +command line option is enabled on the command line. See the +{ref}`Statistics section ` in the Programmer's Manual for details. -.. _writing-an-llvm-pass-passmanager: +(writing-an-llvm-pass-passmanager)= -What PassManager does ---------------------- +### What PassManager does -The `PassManager `_ `class -`_ takes a list of -passes, ensures their :ref:`prerequisites ` +The [PassManager](doxygen:PassManager_8h_source.html) +[class](doxygen:classllvm_1_1PassManager.html) takes a list of +passes, ensures their {ref}`prerequisites ` are set up correctly, and then schedules passes to run efficiently. All of the LLVM tools that run passes use the PassManager for execution of these passes. The PassManager does two main things to try to reduce the execution time of a series of passes: -#. **Share analysis results.** The ``PassManager`` attempts to avoid +1. **Share analysis results.** The `PassManager` attempts to avoid recomputing analysis results as much as possible. This means keeping track of which analyses are available already, which analyses get invalidated, and which analyses are needed to be run for a pass. An important part of work - is that the ``PassManager`` tracks the exact lifetime of all analysis - results, allowing it to :ref:`free memory - ` allocated to holding analysis results + is that the `PassManager` tracks the exact lifetime of all analysis + results, allowing it to {ref}`free memory ` allocated to holding analysis results as soon as they are no longer needed. -#. **Pipeline the execution of passes on the program.** The ``PassManager`` +1. **Pipeline the execution of passes on the program.** The `PassManager` attempts to get better cache and memory usage behavior out of a series of passes by pipelining the passes together. This means that, given a series - of consecutive :ref:`FunctionPass `, it - will execute all of the :ref:`FunctionPass - ` on the first function, then all of the - :ref:`FunctionPasses ` on the second + of consecutive {ref}`FunctionPass `, it + will execute all of the + {ref}`FunctionPass ` on the first function, then all of the + {ref}`FunctionPasses ` on the second function, etc... until the entire program has been run through the passes. This improves the cache behavior of the compiler, because it is only touching the LLVM program representation for a single function at a time, instead of traversing the entire program. It reduces the memory consumption - of compiler, because, for example, only one `DominatorSet - `_ needs to be + of compiler, because, for example, only one + [DominatorSet](doxygen:classllvm_1_1DominatorSet.html) needs to be calculated at a time. -The effectiveness of the ``PassManager`` is influenced directly by how much +The effectiveness of the `PassManager` is influenced directly by how much information it has about the behaviors of the passes it is scheduling. For example, the "preserved" set is intentionally conservative in the face of an -unimplemented :ref:`getAnalysisUsage ` +unimplemented {ref}`getAnalysisUsage ` method. Not implementing when it should be implemented will have the effect of not allowing any analysis results to live across the execution of your pass. -The ``PassManager`` class exposes a ``--debug-pass`` command line options that +The `PassManager` class exposes a `--debug-pass` command line options that is useful for debugging pass execution, seeing how things work, and diagnosing when you should be preserving more analyses than you currently are. (To get -information about all of the variants of the ``--debug-pass`` option, just type -"``llc -help-hidden``"). +information about all of the variants of the `--debug-pass` option, just type +"`llc -help-hidden`"). By using the --debug-pass=Structure option, for example, we can see inspect the default optimization pipelines, e.g. (the output has been trimmed): -.. code-block:: console - - $ llc -mtriple=arm64-- -O3 -debug-pass=Structure file.ll > /dev/null - (...) - ModulePass Manager - Pre-ISel Intrinsic Lowering - FunctionPass Manager - Expand IR instructions - Expand Atomic instructions - SVE intrinsics optimizations - FunctionPass Manager - Dominator Tree Construction +```console +$ llc -mtriple=arm64-- -O3 -debug-pass=Structure file.ll > /dev/null +(...) +ModulePass Manager +Pre-ISel Intrinsic Lowering +FunctionPass Manager + Expand IR instructions + Expand Atomic instructions +SVE intrinsics optimizations FunctionPass Manager - Simplify the CFG Dominator Tree Construction - Natural Loop Information - Canonicalize natural loops - (...) +FunctionPass Manager + Simplify the CFG + Dominator Tree Construction + Natural Loop Information + Canonicalize natural loops +(...) +``` -.. _writing-an-llvm-pass-releaseMemory: +(writing-an-llvm-pass-releaseMemory)= -The ``releaseMemory`` method -^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### The `releaseMemory` method -.. code-block:: c++ +```cpp +virtual void releaseMemory(); +``` - virtual void releaseMemory(); - -The ``PassManager`` automatically determines when to compute analysis results, +The `PassManager` automatically determines when to compute analysis results, and how long to keep them around for. Because the lifetime of the pass object itself is effectively the entire duration of the compilation process, we need some way to free analysis results when they are no longer useful. The -``releaseMemory`` virtual method is the way to do this. +`releaseMemory` virtual method is the way to do this. If you are writing an analysis or any other pass that retains a significant amount of state (for use by another pass which "requires" your pass and uses -the :ref:`getAnalysis ` method) you should -implement ``releaseMemory`` to, well, release the memory allocated to maintain -this internal state. This method is called after the ``run*`` method for the -class, before the next call of ``run*`` in your pass. +the {ref}`getAnalysis ` method) you should +implement `releaseMemory` to, well, release the memory allocated to maintain +this internal state. This method is called after the `run*` method for the +class, before the next call of `run*` in your pass. -Registering dynamically loaded passes -===================================== +## Registering dynamically loaded passes *Size matters* when constructing production quality tools using LLVM, both for the purposes of distribution, and for regulating the resident code size when @@ -718,109 +677,106 @@ configurations later on. You want to be able to do all this, and, provide feedback to the user. This is where pass registration comes into play. The fundamental mechanisms for pass registration are the -``MachinePassRegistry`` class and subclasses of ``MachinePassRegistryNode``. +`MachinePassRegistry` class and subclasses of `MachinePassRegistryNode`. -An instance of ``MachinePassRegistry`` is used to maintain a list of -``MachinePassRegistryNode`` objects. This instance maintains the list and +An instance of `MachinePassRegistry` is used to maintain a list of +`MachinePassRegistryNode` objects. This instance maintains the list and communicates additions and deletions to the command line interface. -An instance of ``MachinePassRegistryNode`` subclass is used to maintain +An instance of `MachinePassRegistryNode` subclass is used to maintain information provided about a particular pass. This information includes the command line name, the command help string and the address of the function used to create an instance of the pass. A global static constructor of one of these -instances *registers* with a corresponding ``MachinePassRegistry``, the static +instances *registers* with a corresponding `MachinePassRegistry`, the static destructor *unregisters*. Thus a pass that is statically linked in the tool will be registered at start up. A dynamically loaded pass will register on load and unregister at unload. -Using existing registries -------------------------- +### Using existing registries There are predefined registries to track instruction scheduling -(``RegisterScheduler``) and register allocation (``RegisterRegAlloc``) machine +(`RegisterScheduler`) and register allocation (`RegisterRegAlloc`) machine passes. Here we will describe how to *register* a register allocator machine pass. Implement your register allocator machine pass. In your register allocator -``.cpp`` file add the following include: - -.. code-block:: c++ +`.cpp` file add the following include: - #include "llvm/CodeGen/RegAllocRegistry.h" +```cpp +#include "llvm/CodeGen/RegAllocRegistry.h" +``` -Also in your register allocator ``.cpp`` file, define a creator function in the +Also in your register allocator `.cpp` file, define a creator function in the form: -.. code-block:: c++ - - FunctionPass *createMyRegisterAllocator() { - return new MyRegisterAllocator(); - } +```cpp +FunctionPass *createMyRegisterAllocator() { + return new MyRegisterAllocator(); +} +``` Note that the signature of this function should match the type of -``RegisterRegAlloc::FunctionPassCtor``. In the same file add the "installing" +`RegisterRegAlloc::FunctionPassCtor`. In the same file add the "installing" declaration, in the form: -.. code-block:: c++ - - static RegisterRegAlloc myRegAlloc("myregalloc", - "my register allocator help string", - createMyRegisterAllocator); +```cpp +static RegisterRegAlloc myRegAlloc("myregalloc", + "my register allocator help string", + createMyRegisterAllocator); +``` Note the two spaces prior to the help string produces a tidy result on the -:option:`-help` query. - -.. code-block:: console - - $ llc -help - ... - -regalloc - Register allocator to use (default=linearscan) - =linearscan - linear scan register allocator - =local - local register allocator - =simple - simple register allocator - =myregalloc - my register allocator help string - ... - -And that's it. The user is now free to use ``-regalloc=myregalloc`` as an +{option}`-help` query. + +```console +$ llc -help + ... + -regalloc - Register allocator to use (default=linearscan) + =linearscan - linear scan register allocator + =local - local register allocator + =simple - simple register allocator + =myregalloc - my register allocator help string + ... +``` + +And that's it. The user is now free to use `-regalloc=myregalloc` as an option. Registering instruction schedulers is similar except use the -``RegisterScheduler`` class. Note that the -``RegisterScheduler::FunctionPassCtor`` is significantly different from -``RegisterRegAlloc::FunctionPassCtor``. +`RegisterScheduler` class. Note that the +`RegisterScheduler::FunctionPassCtor` is significantly different from +`RegisterRegAlloc::FunctionPassCtor`. To force the load/linking of your register allocator into the -:program:`llc`/:program:`lli` tools, add your creator function's global -declaration to ``Passes.h`` and add a "pseudo" call line to -``llvm/Codegen/LinkAllCodegenComponents.h``. +{program}`llc`/{program}`lli` tools, add your creator function's global +declaration to `Passes.h` and add a "pseudo" call line to +`llvm/Codegen/LinkAllCodegenComponents.h`. -Creating new registries ------------------------ +### Creating new registries The easiest way to get started is to clone one of the existing registries; we -recommend ``llvm/CodeGen/RegAllocRegistry.h``. The key things to modify are -the class name and the ``FunctionPassCtor`` type. +recommend `llvm/CodeGen/RegAllocRegistry.h`. The key things to modify are +the class name and the `FunctionPassCtor` type. Then you need to declare the registry. Example: if your pass registry is -``RegisterMyPasses`` then define: - -.. code-block:: c++ +`RegisterMyPasses` then define: - MachinePassRegistry RegisterMyPasses::Registry; +```cpp +MachinePassRegistry RegisterMyPasses::Registry; +``` And finally, declare the command line option for your passes. Example: -.. code-block:: c++ +```cpp +cl::opt > +MyPassOpt("mypass", + cl::init(&createDefaultMyPass), + cl::desc("my pass option help")); +``` - cl::opt > - MyPassOpt("mypass", - cl::init(&createDefaultMyPass), - cl::desc("my pass option help")); - -Here the command option is "``mypass``", with ``createDefaultMyPass`` as the +Here the command option is "`mypass`", with `createDefaultMyPass` as the default creator. -Using GDB with dynamically loaded passes ----------------------------------------- +### Using GDB with dynamically loaded passes Unfortunately, using GDB with dynamically loaded passes is not as easy as it should be. First of all, you can't set a breakpoint in a shared object that @@ -829,49 +785,47 @@ functions in shared objects. Here are some suggestions to debugging your pass with GDB. For sake of discussion, I'm going to assume that you are debugging a -transformation invoked by :program:`opt`, although nothing described here +transformation invoked by {program}`opt`, although nothing described here depends on that. -Setting a breakpoint in your pass -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +#### Setting a breakpoint in your pass First thing you do is start gdb on the opt process: -.. code-block:: console - - $ gdb opt - GNU gdb 5.0 - Copyright 2000 Free Software Foundation, Inc. - GDB is free software, covered by the GNU General Public License, and you are - welcome to change it and/or distribute copies of it under certain conditions. - Type "show copying" to see the conditions. - There is absolutely no warranty for GDB. Type "show warranty" for details. - This GDB was configured as "sparc-sun-solaris2.6"... - (gdb) - -Note that :program:`opt` has a lot of debugging information in it, so it takes +```console +$ gdb opt +GNU gdb 5.0 +Copyright 2000 Free Software Foundation, Inc. +GDB is free software, covered by the GNU General Public License, and you are +welcome to change it and/or distribute copies of it under certain conditions. +Type "show copying" to see the conditions. +There is absolutely no warranty for GDB. Type "show warranty" for details. +This GDB was configured as "sparc-sun-solaris2.6"... +(gdb) +``` + +Note that {program}`opt` has a lot of debugging information in it, so it takes time to load. Be patient. Since we cannot set a breakpoint in our pass yet (the shared object isn't loaded until runtime), we must execute the process, and have it stop before it invokes our pass, but after it has loaded the shared object. The most foolproof way of doing this is to set a breakpoint in -``PassManager::run`` and then run the process with the arguments you want: - -.. code-block:: console - - $ (gdb) break llvm::PassManager::run - Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70. - (gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] - Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] - Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70 - 70 bool PassManager::run(Module &M) { return PM->run(M); } - (gdb) - -Once the :program:`opt` stops in the ``PassManager::run`` method you are now +`PassManager::run` and then run the process with the arguments you want: + +```console +$ (gdb) break llvm::PassManager::run +Breakpoint 1 at 0x2413bc: file Pass.cpp, line 70. +(gdb) run test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] +Starting program: opt test.bc -load $(LLVMTOP)/llvm/Debug+Asserts/lib/[libname].so -[passoption] +Breakpoint 1, PassManager::run (this=0xffbef174, M=@0x70b298) at Pass.cpp:70 +70 bool PassManager::run(Module &M) { return PM->run(M); } +(gdb) +``` + +Once the {program}`opt` stops in the `PassManager::run` method you are now free to set breakpoints in your pass so that you can trace through execution or do other standard debugging stuff. -Miscellaneous Problems -^^^^^^^^^^^^^^^^^^^^^^ +#### Miscellaneous Problems Once you have the basics down, there are a couple of problems that GDB has, some with solutions, some without. @@ -880,16 +834,15 @@ some with solutions, some without. good job getting stack traces and stepping through inline functions. When a pass is dynamically loaded however, it somehow completely loses this capability. The only solution I know of is to de-inline a function (move it - from the body of a class to a ``.cpp`` file). + from the body of a class to a `.cpp` file). * Restarting the program breaks breakpoints. After following the information above, you have succeeded in getting some breakpoints planted in your pass. - Next thing you know, you restart the program (i.e., you type "``run``" again), + Next thing you know, you restart the program (i.e., you type "`run`" again), and you start getting errors about breakpoints being unsettable. The only way I have found to "fix" this problem is to delete the breakpoints that are already set in your pass, run the program, and re-set the breakpoints once - execution stops in ``PassManager::run``. + execution stops in `PassManager::run`. Hopefully these tips will help with common case debugging situations. If you'd -like to contribute some tips of your own, just contact `Chris -`_. +like to contribute some tips of your own, just contact [Chris](mailto:sabre@nondot.org). diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index ca750dded019f..af9781b66c692 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -39,7 +39,14 @@ if not tags.has("builder-man"): raise else: - myst_enable_extensions = ["substitution"] + myst_enable_extensions = ["deflist", "substitution"] + myst_url_schemes = { + "http": None, + "https": None, + "mailto": None, + "ftp": None, + "doxygen": {"url": "/doxygen/{{path}}"}, + } # Automatic anchors for markdown titles myst_heading_anchors = 6 diff --git a/llvm/docs/index.md b/llvm/docs/index.md index 4fd63dcf61e94..e933810197c42 100644 --- a/llvm/docs/index.md +++ b/llvm/docs/index.md @@ -1,10 +1,11 @@ -About -======== +# About -.. warning:: +```{warning} +If you are using a released version of LLVM, see [the download page] +to find your documentation. +``` - If you are using a released version of LLVM, see `the download page - `_ to find your documentation. +[the download page]: https://llvm.org/releases/ The LLVM compiler infrastructure supports a wide range of projects, from industrial strength compilers to specialized JIT applications to small @@ -13,96 +14,91 @@ research projects. Similarly, documentation is broken down into several high-level groupings targeted at different audiences: -LLVM Design & Overview -====================== +# LLVM Design & Overview Several introductory papers and presentations. -.. toctree:: - :hidden: +```{toctree} +:hidden: - FAQ - Lexicon +FAQ +Lexicon +``` -:doc:`FAQ` - Frequently asked questions. +{doc}`FAQ` +: Frequently asked questions. -:doc:`Lexicon` - Glossary. +{doc}`Lexicon` +: Glossary. -`Introduction to the LLVM Compiler`__ - Presentation providing a users introduction to LLVM. +[Introduction to the LLVM Compiler] +: Presentation providing a users introduction to LLVM. - .. __: https://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html - -`Intro to LLVM`__ - A chapter from the book "The Architecture of Open Source Applications" that +[Intro to LLVM] +: A chapter from the book "The Architecture of Open Source Applications" that describes high-level design decisions that shaped LLVM. - .. __: http://www.aosabook.org/en/llvm.html - - -`LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation`__ - Design overview. - - .. __: https://llvm.org/pubs/2004-01-30-CGO-LLVM.html +[LLVM: A Compilation Framework for Lifelong Program Analysis & Transformation][llvm-lifelong] +: Design overview. -`LLVM: An Infrastructure for Multi-Stage Optimization`__ - More details (quite old now). +[LLVM: An Infrastructure for Multi-Stage Optimization][llvm-multi-stage] +: More details (quite old now). - .. __: https://llvm.org/pubs/2002-12-LattnerMSThesis.html +[Introduction to the LLVM Compiler]: https://llvm.org/pubs/2008-10-04-ACAT-LLVM-Intro.html +[Intro to LLVM]: http://www.aosabook.org/en/llvm.html +[llvm-lifelong]: https://llvm.org/pubs/2004-01-30-CGO-LLVM.html +[llvm-multi-stage]: https://llvm.org/pubs/2002-12-LattnerMSThesis.html -Documentation -============= +# Documentation Getting Started, How-tos, Developer Guides, and Tutorials. -.. toctree:: - :hidden: +```{toctree} +:hidden: - GettingStartedTutorials - Reference - UserGuides - DiscourseMigrationGuide +GettingStartedTutorials +Reference +UserGuides +DiscourseMigrationGuide +``` -:doc:`GettingStartedTutorials` - For those new to the LLVM system. +{doc}`GettingStartedTutorials` +: For those new to the LLVM system. -:doc:`UserGuides` - User guides and How-tos. +{doc}`UserGuides` +: User guides and How-tos. -:doc:`Reference` - LLVM and API reference documentation. +{doc}`Reference` +: LLVM and API reference documentation. -:doc:`DiscourseMigrationGuide` - Guide for users to migrate to Discourse +{doc}`DiscourseMigrationGuide` +: Guide for users to migrate to Discourse -Community -========= +# Community LLVM welcomes contributions of all kinds. To learn more, see the following articles: -.. toctree:: - :hidden: +```{toctree} +:hidden: - GettingInvolved - RFCProcess - ProjectGovernance +GettingInvolved +RFCProcess +ProjectGovernance +``` -* :doc:`GettingInvolved` -* :ref:`development-process` -* :doc:`RFCProcess` -* :doc:`ProjectGovernance` -* :ref:`lists-forums` -* :ref:`meetups-social-events` -* :ref:`community-proposals` +* {doc}`GettingInvolved` +* {ref}`development-process` +* {doc}`RFCProcess` +* {doc}`ProjectGovernance` +* {ref}`lists-forums` +* {ref}`meetups-social-events` +* {ref}`community-proposals` Reporting a security issue -* :ref:`report-security-issue` +* {ref}`report-security-issue` -Indices and tables -================== +# Indices and tables -* :ref:`genindex` -* :ref:`search` +* {ref}`genindex` +* {ref}`search` From cff70078caa17c0dcea1f53f17cafad22e6bc974 Mon Sep 17 00:00:00 2001 From: Kevin Sala Penades Date: Mon, 22 Jun 2026 16:08:06 -0700 Subject: [PATCH 099/511] [offload] Add kernel replay tool to tests' dependencies (#205209) --- offload/test/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/offload/test/CMakeLists.txt b/offload/test/CMakeLists.txt index 13e8c8d93d3d3..771944d8aa8d7 100644 --- a/offload/test/CMakeLists.txt +++ b/offload/test/CMakeLists.txt @@ -43,12 +43,14 @@ foreach(CURRENT_TARGET IN LISTS SYSTEM_TARGETS) configure_file(lit.site.cfg.in ${CURRENT_TARGET}/lit.site.cfg @ONLY) endforeach() +# The tools that are needed or used by tests. +set(OFFLOAD_TOOLS llvm-offload-device-info llvm-omp-kernel-replay) add_offload_testsuite(check-libomptarget "Running libomptarget tests" ${LIBOMPTARGET_LIT_TESTSUITES} EXCLUDE_FROM_CHECK_ALL - DEPENDS llvm-offload-device-info omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS ${OFFLOAD_TOOLS} omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) # Add liboffload unit tests - the test binary will run on all available devices @@ -64,7 +66,7 @@ add_offload_testsuite(check-offload ${LIBOMPTARGET_LIT_TESTSUITES} ${CMAKE_CURRENT_BINARY_DIR}/unit EXCLUDE_FROM_CHECK_ALL - DEPENDS llvm-offload-device-info omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} + DEPENDS ${OFFLOAD_TOOLS} omptarget ${OMP_DEPEND} ${LIBOMPTARGET_TESTED_PLUGINS} LLVMOffload OffloadUnitTests ARGS ${LIBOMPTARGET_LIT_ARG_LIST}) From 4fb77076bef36922b11d82fc45f00f40bf49cae1 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 22 Jun 2026 16:09:52 -0700 Subject: [PATCH 100/511] [lldb] Use heuristics to extend rather than replace error message (#205196) When an attach fails, HandlePacket_A tries to explain why. The last two checks are heuristics that discard any error debugserver already produced for this specific failure. The guess can be wrong, for example the PT_DENY_ATTACH test case from #204688 is incorrectly reported as failing due to it running in a non-interactive debug session on the bots. Include debugserver's real error into the heuristic message, instead of replacing it, so the real reason is never lost. --- lldb/tools/debugserver/source/RNBRemote.cpp | 23 ++++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp index 1c7c3eb8e41d7..91d611f9ecbc5 100644 --- a/lldb/tools/debugserver/source/RNBRemote.cpp +++ b/lldb/tools/debugserver/source/RNBRemote.cpp @@ -4116,20 +4116,27 @@ rnb_err_t RNBRemote::HandlePacket_v(const char *p) { process_username + "'"; return SendErrorPacket("E96", msg); } + // The remaining checks can only guess at the cause from the session + // environment. When debugserver does have an error of its own, fold in + // the actual message so it's never lost. + auto with_err_str = [&err_str](std::string explanation) -> std::string { + if (err_str[0] != '\0') + return explanation + " (" + std::string(err_str) + ")"; + return explanation; + }; if (!login_session_has_gui_access() && !developer_mode_enabled()) { DNBLogError("Developer mode is not enabled and this is a " "non-interactive session"); - return SendErrorPacket("E96", "developer mode is " - "not enabled on this machine " - "and this is a non-interactive " - "debug session."); + return SendErrorPacket( + "E96", with_err_str("developer mode is not enabled on this " + "machine and this is a non-interactive " + "debug session.")); } if (!login_session_has_gui_access()) { DNBLogError("This is a non-interactive session"); - return SendErrorPacket("E96", "this is a " - "non-interactive debug session, " - "cannot get permission to debug " - "processes."); + return SendErrorPacket( + "E96", with_err_str("this is a non-interactive debug session, " + "cannot get permission to debug processes.")); } } From d6eea10e9638ea10a81f62737f061343b7439858 Mon Sep 17 00:00:00 2001 From: Akshay K Date: Mon, 22 Jun 2026 19:18:45 -0400 Subject: [PATCH 101/511] [CIR] Use the AST result type for sizeof/alignof constants (#203942) On targets where `size_t` is narrower than 64 bits (e.g. `i686`), CIR codegen for `sizeof`/`alignof`/`__builtin_vectorelements` crashes with a type/value bitwidth mismatch. The result of these expressions is `size_t`, but the emitted integer constant was built with a hardcoded 64-bit type. `EvaluateKnownConstInt` returns an `APSInt` with the width of the AST result type (32 bits on this target), so it no longer matches the `IntAttr`'s type and trips the `IntAttr` verifier. ### How to Reproduce ```c++ using size_t = decltype(sizeof(int)); size_t size_of_int() { return sizeof(int); } clang -cc1 -std=c++20 -triple i686-unknown-linux-gnu -fclangir \ -emit-cir test.cpp -o test.cir error: type and value bitwidth mismatch: 64 != 32 clang: mlir/include/mlir/IR/StorageUniquerSupport.h:180: ... Assertion `succeeded(ConcreteT::verifyInvariants(...))' failed. #11 cir::IntAttr::get(mlir::Type, llvm::APInt const&) #12 ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr(...) #13 CIRGenFunction::emitScalarExpr(...) ``` ### Fix Form the IntAttr using the converted AST result type (convertType(e->getType())) instead of a hardcoded 64-bit type, matching classic codegen. The same path covers the fixed-vector __builtin_vectorelements case. --- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 8 ++-- .../unary-expr-or-type-trait-32bit.cpp | 38 +++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) create mode 100644 clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index c7e19c38dbba1..5e8bb9df83ab3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -2773,16 +2773,18 @@ mlir::Value ScalarExprEmitter::VisitUnaryExprOrTypeTraitExpr( e->getSourceRange(), "VisitUnaryExprOrTypeTraitExpr: sizeOf scalable vector"); return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, + loc, cir::IntAttr::get(cgf.cgm.sizeTy, e->EvaluateKnownConstInt(cgf.getContext()))); } return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, vecTy.getSize())); + loc, cir::IntAttr::get(cgf.cgm.sizeTy, vecTy.getSize())); } + // The result type is size_t (target-dependent width); use it so the IntAttr + // width matches the APInt from EvaluateKnownConstInt. return builder.getConstant( - loc, cir::IntAttr::get(cgf.cgm.uInt64Ty, + loc, cir::IntAttr::get(cgf.cgm.sizeTy, e->EvaluateKnownConstInt(cgf.getContext()))); } diff --git a/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp b/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp new file mode 100644 index 0000000000000..e24278c09e622 --- /dev/null +++ b/clang/test/CIR/CodeGen/unary-expr-or-type-trait-32bit.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s --check-prefix=LLVM +// RUN: %clang_cc1 -std=c++20 -triple i686-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM + +// The result of sizeof/alignof/__builtin_vectorelements is size_t, which is +// 32 bits wide on this target. The emitted constants must use that width +// rather than a hardcoded 64-bit type. + +using size_t = decltype(sizeof(int)); + +size_t size_of_int() { return sizeof(int); } + +// CIR-LABEL: cir.func {{.*}}@_Z11size_of_intv() -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z11size_of_intv() +// LLVM: {{.*}}i32 4 + +size_t align_of_double() { return alignof(double); } + +// CIR-LABEL: cir.func {{.*}}@_Z15align_of_doublev() -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z15align_of_doublev() +// LLVM: {{.*}}i32 4 + +typedef int vi4 __attribute__((vector_size(16))); + +size_t vector_elements(vi4 v) { return __builtin_vectorelements(v); } + +// CIR-LABEL: cir.func {{.*}}@_Z15vector_elementsDv4_i({{.*}}) -> {{.*}}!u32i +// CIR: cir.const #cir.int<4> : !u32i + +// LLVM-LABEL: define {{.*}}i32 @_Z15vector_elementsDv4_i +// LLVM: {{.*}}i32 4 From cf8be6d76991b81516b7e3431312e2fb875471fb Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Mon, 22 Jun 2026 16:19:23 -0700 Subject: [PATCH 102/511] [lldb] Remove ConstString from Breakpoint::GetMatchingFileLine (#205210) --- lldb/include/lldb/Breakpoint/Breakpoint.h | 2 +- lldb/source/Breakpoint/Breakpoint.cpp | 3 ++- lldb/source/Commands/CommandObjectBreakpoint.cpp | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/Breakpoint.h b/lldb/include/lldb/Breakpoint/Breakpoint.h index c0bad98c95b29..67a741c6c9251 100644 --- a/lldb/include/lldb/Breakpoint/Breakpoint.h +++ b/lldb/include/lldb/Breakpoint/Breakpoint.h @@ -511,7 +511,7 @@ class Breakpoint : public std::enable_shared_from_this, /// size is 0 and true is returned, it means the breakpoint fully matches /// the /// description. - bool GetMatchingFileLine(ConstString filename, uint32_t line_number, + bool GetMatchingFileLine(llvm::StringRef filename, uint32_t line_number, BreakpointLocationCollection &loc_coll); void GetFilterDescription(Stream *s); diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index 201d8d20c4901..07412cd092f0d 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -1064,7 +1064,8 @@ void Breakpoint::GetResolverDescription(Stream *s) { m_resolver_sp->GetDescription(s); } -bool Breakpoint::GetMatchingFileLine(ConstString filename, uint32_t line_number, +bool Breakpoint::GetMatchingFileLine(llvm::StringRef filename, + uint32_t line_number, BreakpointLocationCollection &loc_coll) { // TODO: To be correct, this method needs to fill the breakpoint location // collection diff --git a/lldb/source/Commands/CommandObjectBreakpoint.cpp b/lldb/source/Commands/CommandObjectBreakpoint.cpp index f11af5fff2362..38b79d6c33922 100644 --- a/lldb/source/Commands/CommandObjectBreakpoint.cpp +++ b/lldb/source/Commands/CommandObjectBreakpoint.cpp @@ -2536,7 +2536,7 @@ class CommandObjectBreakpointClear : public CommandObjectParsed { switch (break_type) { case eClearTypeFileAndLine: // Breakpoint by source position { - const ConstString filename(m_options.m_filename); + llvm::StringRef filename(m_options.m_filename); BreakpointLocationCollection loc_coll; for (size_t i = 0; i < num_breakpoints; ++i) { From 66fe7d9dc0d86c3af47d7ee286d67390f36949f4 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 22 Jun 2026 16:53:11 -0700 Subject: [PATCH 103/511] Revert "[lldb][Windows] Remember server's primary stop thread on gdb-remote stops" (#205220) Reverts llvm/llvm-project#203525 because it breaks TestRealDefinition.py --- .../Plugins/Process/gdb-remote/ProcessGDBRemote.cpp | 11 +---------- .../Plugins/Process/gdb-remote/ProcessGDBRemote.h | 3 --- 2 files changed, 1 insertion(+), 13 deletions(-) diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp index 85b9516a416fb..a986d0350bf57 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp @@ -2636,8 +2636,6 @@ StateType ProcessGDBRemote::SetThreadStopInfo(StringExtractor &stop_packet) { SetAddressableBitMasks(addressable_bits); - m_last_stop_primary_tid = tid; - ThreadSP thread_sp = SetThreadStopInfo( tid, expedited_register_map, signo, thread_name, reason, description, exc_type, exc_data, thread_dispatch_qaddr, queue_vars_valid, @@ -2686,14 +2684,7 @@ void ProcessGDBRemote::RefreshStateAfterStop() { if (m_initial_tid != LLDB_INVALID_THREAD_ID) { m_thread_list.SetSelectedThreadByID(m_initial_tid); m_initial_tid = LLDB_INVALID_THREAD_ID; - } else if (m_last_stop_primary_tid != LLDB_INVALID_THREAD_ID && - StateIsRunningState(m_last_broadcast_state)) { - if (ThreadSP primary_thread_sp = - m_thread_list.FindThreadByProtocolID(m_last_stop_primary_tid, - /*can_update=*/false)) - m_thread_list.SetSelectedThreadByID(primary_thread_sp->GetID()); - } - m_last_stop_primary_tid = LLDB_INVALID_THREAD_ID; + } // Let all threads recover from stopping and do any clean up based on the // previous thread state (if any). diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h index 7a030cd966a18..0a3386082c388 100644 --- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h +++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h @@ -322,9 +322,6 @@ class ProcessGDBRemote : public Process, lldb::CommandObjectSP m_command_sp; int64_t m_breakpoint_pc_offset; lldb::tid_t m_initial_tid; // The initial thread ID, given by stub on attach - lldb::tid_t m_last_stop_primary_tid = - LLDB_INVALID_THREAD_ID; // Thread ID extracted from the most recent - // T-packet's "thread:" key. bool m_use_g_packet_for_reading; bool m_allow_flash_writes; From 75e83265982064b3d2504f7b8de4d4b2cea0eb44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 22 Jun 2026 16:53:26 -0700 Subject: [PATCH 104/511] [flang][cuda][openacc] Emit an error when CUDA symbols are imported with CUDA disabled (#205207) --- flang/lib/Semantics/mod-file.cpp | 29 +++++++++++++++++++++++++++++ flang/test/Semantics/modfile84.f90 | 17 +++++++++++++++++ 2 files changed, 46 insertions(+) create mode 100644 flang/test/Semantics/modfile84.f90 diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 89a535c6ff6f9..ac44f17009344 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -11,6 +11,7 @@ #include "flang/Common/restorer.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/message.h" +#include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parsing.h" #include "flang/Parser/unparse.h" #include "flang/Semantics/scope.h" @@ -72,6 +73,7 @@ static bool FileContentsMatch( const std::string &, const std::string &, const std::string &); static ModuleCheckSumType ComputeCheckSum(const std::string_view &); static std::string CheckSumString(ModuleCheckSumType); +static bool ProgramHasCUDAAttrs(const parser::Program &); // Collect symbols needed for a subprogram interface class SubprogramSymbolCollector { @@ -1703,6 +1705,13 @@ Scope *ModFileReader::Read(SourceName name, std::optional isIntrinsic, return nullptr; } parser::Program &parseTree{context_.SaveParseTree(std::move(*parsedProgram))}; + if (context_.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC) && + !context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA) && + ProgramHasCUDAAttrs(parseTree)) { + Say("use", name, ancestorName, + "CUDA is not enabled, but '%s' defines CUDA symbols"_err_en_US, + sourceFile->path()); + } Scope *parentScope; // the scope this module/submodule goes into if (!isIntrinsic.has_value()) { for (const auto &dir : context_.intrinsicModuleDirectories()) { @@ -1823,6 +1832,26 @@ static std::optional GetSubmoduleParent( } } +struct CUDAAttrProgramVisitor { + template bool Pre(const A &) { return true; } + template void Post(const A &) {} + bool Pre(const common::CUDADataAttr &) { + foundCUDAAttrs = true; + return false; + } + bool Pre(const common::CUDASubprogramAttrs &) { + foundCUDAAttrs = true; + return false; + } + bool foundCUDAAttrs{false}; +}; + +static bool ProgramHasCUDAAttrs(const parser::Program &program) { + CUDAAttrProgramVisitor visitor; + parser::Walk(program, visitor); + return visitor.foundCUDAAttrs; +} + void SubprogramSymbolCollector::Collect() { const auto &details{symbol_.get()}; isInterface_ = details.isInterface(); diff --git a/flang/test/Semantics/modfile84.f90 b/flang/test/Semantics/modfile84.f90 new file mode 100644 index 0000000000000..2abcd8083eea6 --- /dev/null +++ b/flang/test/Semantics/modfile84.f90 @@ -0,0 +1,17 @@ +! RUN: split-file %s %t +! RUN: %flang_fc1 -fsyntax-only -x cuda -module-dir %t %t/m.cuf +! RUN: not %flang_fc1 -fsyntax-only -fopenacc -module-dir %t %t/use.f90 2>&1 | FileCheck %s + +!--- m.cuf +module modfile84m + real, device :: d +contains + attributes(device) subroutine s() + end subroutine +end module + +!--- use.f90 +use modfile84m +end + +! CHECK: error: Cannot use module file for module 'modfile84m': CUDA is not enabled, but '{{.*modfile84m.mod}}' defines CUDA symbols From 68b9c8039cf7cdfb26db8a87c620f5d0d9d967be Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 22 Jun 2026 17:10:30 -0700 Subject: [PATCH 105/511] [lldb] Fix LuaState after #205210 (#205219) --- lldb/source/Plugins/ScriptInterpreter/Lua/LuaState.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lldb/source/Plugins/ScriptInterpreter/Lua/LuaState.cpp b/lldb/source/Plugins/ScriptInterpreter/Lua/LuaState.cpp index 0a4001f625f43..672e694e6bad3 100644 --- a/lldb/source/Plugins/ScriptInterpreter/Lua/LuaState.cpp +++ b/lldb/source/Plugins/ScriptInterpreter/Lua/LuaState.cpp @@ -149,8 +149,8 @@ llvm::Error LuaState::LoadModule(llvm::StringRef filename) { return e; } - ConstString module_name = file.GetFileNameStrippingExtension(); - lua_setglobal(m_lua_state, module_name.GetCString()); + const std::string module_name = file.GetFileNameStrippingExtension().str(); + lua_setglobal(m_lua_state, module_name.c_str()); return llvm::Error::success(); } From a2932fbda44e279bea6cca5bf4ee40cf3e8c7ec2 Mon Sep 17 00:00:00 2001 From: superdusty Date: Tue, 23 Jun 2026 08:22:32 +0800 Subject: [PATCH 106/511] [Clang] Fix crash when comparing fixed point type with BitInt (#199912) Fixes #196948 Added checks in `handleFixedPointConversion`: reject fixed point/BitInt comparisons Now clang properly emits an error instead of crashing. --------- Co-authored-by: cry <2091136672@foxmail.com> --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaExpr.cpp | 4 ++++ clang/test/SemaCXX/ext-int.cpp | 7 ++++++- 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 42c5dc16ea2e1..e86f1d9602bed 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -685,6 +685,7 @@ Improvements to Coverage Mapping Bug Fixes in This Version ------------------------- +- Fixed an assertion when comparing a fixed point type with a ``_BitInt`` type. (#GH196948) - Fixed atomic boolean compound assignment; the conversion back to atomic bool would be miscompiled. (#GH33210) - Correctly handle default template argument when establishing subsumption. (#GH188640) - Fixed a failed assertion in the preprocessor when ``__has_embed`` parameters are missing parentheses. (#GH175088) diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 675d0bd389e28..25df8fda7dc56 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -1753,6 +1753,10 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS, // At this point, we have two different arithmetic types. + if ((LHSType->isFixedPointType() && RHSType->isBitIntType()) || + (LHSType->isBitIntType() && RHSType->isFixedPointType())) + return QualType(); + // Diagnose attempts to convert between __ibm128, __float128 and long double // where such conversions currently can't be handled. if (unsupportedTypeConversion(*this, LHSType, RHSType)) diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp index 281ae3d3c1779..b2d0ac57e19ad 100644 --- a/clang/test/SemaCXX/ext-int.cpp +++ b/clang/test/SemaCXX/ext-int.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion -Wno-unused -Wunevaluated-expression -triple aarch64-unknown-unknown +// RUN: %clang_cc1 -fsyntax-only -ffixed-point -verify %s -Wimplicit-int-conversion -Wno-unused -Wunevaluated-expression -triple aarch64-unknown-unknown template struct HasExtInt { @@ -316,3 +316,8 @@ void FromPaper1() { void FromPaper2(_BitInt(8) a1, _BitInt(24) a2) { static_assert(__is_same(decltype(a1 * (_BitInt(32))a2), _BitInt(32)), ""); } + +namespace GH196948{ + constexpr _BitInt(128) i = 42; + static_assert(i == 42.0k); // expected-error {{invalid operands to binary expression ('const _BitInt(128)' and '_Accum')}} +} From 46e4d161003abb8361e218201e4ad837ca9a349f Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 22 Jun 2026 17:24:30 -0700 Subject: [PATCH 107/511] [lldb] Survive ptrace(PT_DENY_ATTACH) when attaching (#204688) (#205198) A process can opt out of being debugged with ptrace(PT_DENY_ATTACH). The XNU kernel enforces this by delivering SIGSEGV to the *attaching* process while it is still inside the ptrace(PT_ATTACHEXC) syscall. This means debugserver gets killed before it can inspect the result. LLDB only sees the dropped connection ("error: attach failed: lost connection"). The condition can't be detected up front: the target's P_LNOATTACH flag is not exposed to userspace. To work around this, install a temporary SIGSEGV handler around the ptrace(PT_ATTACHEXC) call in AttachForDebug and siglongjmp back out if it fires, turning the fatal signal into an EPERM that propagates to lldb as a clear message: ``` error: attach failed: cannot attach to process N because it has disabled debugging via ptrace(PT_DENY_ATTACH) ``` The handler only acts on a SIGSEGV that arrives on the attaching thread while the guard is armed. Any other SIGSEGV restores the default handler and re-raises. Genuine, unrelated, crashes still report normally. rdar://44542907 Assisted-by: Claude --- lldb/test/API/macosx/deny-attach/Makefile | 3 + .../API/macosx/deny-attach/TestDenyAttach.py | 36 ++++++++ lldb/test/API/macosx/deny-attach/main.c | 60 ++++++++++++ .../debugserver/source/MacOSX/MachProcess.mm | 92 ++++++++++++++++++- 4 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 lldb/test/API/macosx/deny-attach/Makefile create mode 100644 lldb/test/API/macosx/deny-attach/TestDenyAttach.py create mode 100644 lldb/test/API/macosx/deny-attach/main.c diff --git a/lldb/test/API/macosx/deny-attach/Makefile b/lldb/test/API/macosx/deny-attach/Makefile new file mode 100644 index 0000000000000..10495940055b6 --- /dev/null +++ b/lldb/test/API/macosx/deny-attach/Makefile @@ -0,0 +1,3 @@ +C_SOURCES := main.c + +include Makefile.rules diff --git a/lldb/test/API/macosx/deny-attach/TestDenyAttach.py b/lldb/test/API/macosx/deny-attach/TestDenyAttach.py new file mode 100644 index 0000000000000..f061bee51c31f --- /dev/null +++ b/lldb/test/API/macosx/deny-attach/TestDenyAttach.py @@ -0,0 +1,36 @@ +import time +import lldb +from lldbsuite.test.decorators import * +from lldbsuite.test.lldbtest import * +from lldbsuite.test import lldbutil + + +class DenyAttachTestCase(TestBase): + NO_DEBUG_INFO_TESTCASE = True + + @skipUnlessDarwin + @skipIfDarwinEmbedded # PT_DENY_ATTACH attach behavior differs on ios/tvos/etc + @skipIfAsan # Attach tests time out inconsistently under asan. + def test_attach_to_deny_attach_process(self): + """Attaching to a PT_DENY_ATTACH process reports an error, not a crash.""" + self.build() + exe = self.getBuildArtifact("a.out") + + # Use a file as a synchronization point between test and inferior: the + # inferior writes its pid only after it has called PT_DENY_ATTACH. + pid_file_path = lldbutil.append_to_process_working_directory( + self, "pid_file_%d" % (int(time.time())) + ) + self.addTearDownHook( + lambda: self.run_platform_command("rm %s" % (pid_file_path)) + ) + + popen = self.spawnSubprocess(exe, [pid_file_path]) + pid = lldbutil.wait_for_file_on_target(self, pid_file_path) + + self.expect( + "process attach -p " + pid, + startstr="error: attach failed:", + substrs=["PT_DENY_ATTACH"], + error=True, + ) diff --git a/lldb/test/API/macosx/deny-attach/main.c b/lldb/test/API/macosx/deny-attach/main.c new file mode 100644 index 0000000000000..1f19f6928604a --- /dev/null +++ b/lldb/test/API/macosx/deny-attach/main.c @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +// Write our pid into file_name atomically (write to a temp file, then rename) +// so the test never observes a partially written pid. +static int write_pid(const char *file_name) { + char tmp_name[1024]; + snprintf(tmp_name, sizeof(tmp_name), "%s_tmp", file_name); + + int fd = open(tmp_name, O_CREAT | O_WRONLY | O_TRUNC, S_IRUSR | S_IWUSR); + if (fd == -1) { + fprintf(stderr, "open(%s) failed: %s\n", tmp_name, strerror(errno)); + return 1; + } + + char buffer[64]; + int len = snprintf(buffer, sizeof(buffer), "%ld", (long)getpid()); + int result = 0; + if (write(fd, buffer, len) == -1) { + fprintf(stderr, "write failed: %s\n", strerror(errno)); + result = 1; + } + close(fd); + + if (rename(tmp_name, file_name) == -1) { + fprintf(stderr, "rename failed: %s\n", strerror(errno)); + result = 1; + } + return result; +} + +int main(int argc, char const *argv[]) { + if (argc < 2) { + fprintf(stderr, "invalid number of command line arguments\n"); + return 1; + } + + // Tell the kernel to refuse all debugger attachments to this process. Any + // subsequent ptrace(PT_ATTACHEXC) against us makes the kernel deliver SIGSEGV + // to the attaching process (debugserver). + if (ptrace(PT_DENY_ATTACH, 0, 0, 0) == -1) { + fprintf(stderr, "ptrace(PT_DENY_ATTACH) failed: %s\n", strerror(errno)); + return 1; + } + + if (write_pid(argv[1]) != 0) + return 1; + + // Wait for the debugger to try (and fail) to attach. + while (1) + sleep(60); + + return 0; +} diff --git a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm index d79313846438b..3374267f7bde8 100644 --- a/lldb/tools/debugserver/source/MacOSX/MachProcess.mm +++ b/lldb/tools/debugserver/source/MacOSX/MachProcess.mm @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -2936,11 +2937,79 @@ static uint64_t bits(uint64_t value, uint32_t msbit, uint32_t lsbit) { return NULL; } +namespace { +// The XNU kernel enforces ptrace(PT_DENY_ATTACH) by delivering a SIGSEGV to the +// process that tries to attach, while it is still inside the ptrace() syscall. +// That kills debugserver outright instead of failing the call with an error. +// This leaves lldb unable to tell the user why the attach failed. The condition +// can't be detected up front because the target's P_LNOATTACH flag isn't +// exposed to userspace, so instead we install a temporary SIGSEGV handler +// around the ptrace() call and jump back out of it if the signal fires, turning +// the fatal signal into a clean error. + +sigjmp_buf g_deny_attach_jmpbuf; +// Only act on the SIGSEGV if it arrives on the thread that armed the guard +// while a PT_ATTACHEXC call is in flight; anything else is a genuine crash. +volatile sig_atomic_t g_deny_attach_armed = 0; +pthread_t g_deny_attach_thread; + +void DenyAttachSIGSEGVHandler(int signo) { + if (g_deny_attach_armed && + pthread_equal(pthread_self(), g_deny_attach_thread)) { + g_deny_attach_armed = 0; + siglongjmp(g_deny_attach_jmpbuf, 1); + } + // Not the deny-attach case: restore the default disposition and re-raise so a + // real crash is still reported the usual way. + signal(signo, SIG_DFL); + raise(signo); +} + +// Wrapper around ptrace(PT_ATTACHEXC, pid) that survives the SIGSEGV the kernel +// sends when `pid` has called ptrace(PT_DENY_ATTACH). On a normal attach it +// behaves exactly like ptrace() (returning its result with errno set). If the +// attach is rejected via the deny-attach signal it sets `denied_attach` and +// returns -1 with errno set to EPERM. +int PTraceAttachExcDenyAttachSafe(pid_t pid, bool &denied_attach) { + denied_attach = false; + + struct sigaction new_action = {}; + struct sigaction old_action = {}; + new_action.sa_handler = DenyAttachSIGSEGVHandler; + sigemptyset(&new_action.sa_mask); + // SA_NODEFER so a genuine fault inside the handler crashes normally instead + // of deadlocking with SIGSEGV blocked. + new_action.sa_flags = SA_NODEFER; + + if (::sigaction(SIGSEGV, &new_action, &old_action) != 0) { + // Couldn't install the handler; fall back to the unguarded call. + return ::ptrace(PT_ATTACHEXC, pid, 0, 0); + } + + g_deny_attach_thread = pthread_self(); + int result; + int saved_errno; + if (sigsetjmp(g_deny_attach_jmpbuf, 1) == 0) { + g_deny_attach_armed = 1; + result = ::ptrace(PT_ATTACHEXC, pid, 0, 0); + saved_errno = errno; + g_deny_attach_armed = 0; + } else { + // The kernel delivered SIGSEGV: the target denied the attach. + denied_attach = true; + result = -1; + saved_errno = EPERM; + } + + ::sigaction(SIGSEGV, &old_action, nullptr); + errno = saved_errno; + return result; +} +} // namespace + pid_t MachProcess::AttachForDebug( - pid_t pid, - const RNBContext::IgnoredExceptions &ignored_exceptions, - char *err_str, - size_t err_len) { + pid_t pid, const RNBContext::IgnoredExceptions &ignored_exceptions, + char *err_str, size_t err_len) { // Clear out and clean up from any current state Clear(); if (pid != 0) { @@ -2973,7 +3042,8 @@ static uint64_t bits(uint64_t value, uint32_t msbit, uint32_t lsbit) { DNBLog("[LaunchAttach] (%d) About to ptrace(PT_ATTACHEXC, %d)...", getpid(), pid); errno = 0; - int ptrace_result = ::ptrace(PT_ATTACHEXC, pid, 0, 0); + bool denied_attach = false; + int ptrace_result = PTraceAttachExcDenyAttachSafe(pid, denied_attach); int ptrace_errno = errno; DNBLog("[LaunchAttach] (%d) Completed ptrace(PT_ATTACHEXC, %d) == %d", getpid(), pid, ptrace_result); @@ -2990,6 +3060,18 @@ static uint64_t bits(uint64_t value, uint32_t msbit, uint32_t lsbit) { m_flags |= eMachProcessFlagsAttached; DNBLogThreadedIf(LOG_PROCESS, "successfully attached to pid %d", pid); return m_pid; + } else if (denied_attach) { + // The target denied being debugged via ptrace(PT_DENY_ATTACH). The kernel + // would normally kill debugserver for attempting this; we caught the + // signal instead, so report a useful error rather than crashing. + snprintf(err_str, err_len, + "cannot attach to process %d because it has disabled debugging " + "via ptrace(PT_DENY_ATTACH). Attach earlier, put a breakpoint " + "on ptrace and return 0.", + pid); + DNBLogError("[LaunchAttach] (%d) MachProcess::AttachForDebug pid %d " + "denied attach via ptrace(PT_DENY_ATTACH)", + getpid(), pid); } else { ::snprintf(err_str, err_len, "%s", err.AsString()); DNBLogError( From 737772b31daf23b287f5c996447fe26c37b335a3 Mon Sep 17 00:00:00 2001 From: Alexandre Isoard Date: Mon, 22 Jun 2026 17:31:51 -0700 Subject: [PATCH 108/511] [HLSL] Emit lifetime.start before copy-in for inout parameters (#191917) For inout parameters, Clang was emitting lifetime.start after the copy-in store that initializes the temporary. Per LLVM's lifetime semantics, any access to memory outside its lifetime is undefined behavior, so the copy-in store was technically UB and the value was undefined after lifetime.start. Move EmitLifetimeStart into EmitHLSLOutArgLValues so that it is emitted before EmitInitializationToLValue, putting the copy-in store within the lifetime of the temporary. --------- Co-authored-by: Alexandre Isoard Co-authored-by: Deric C. --- clang/lib/CodeGen/CGExpr.cpp | 7 +- .../BasicFeatures/OutArgLifetime.hlsl | 91 +++++++++++++++++++ 2 files changed, 96 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenHLSL/BasicFeatures/OutArgLifetime.hlsl diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 746746c828acc..465a020e38e74 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -6360,6 +6360,11 @@ CodeGenFunction::EmitHLSLOutArgLValues(const HLSLOutArgExpr *E, QualType Ty) { Address OutTemp = CreateIRTempWithoutCast(ExprTy); LValue TempLV = MakeAddrLValue(OutTemp, ExprTy); + // Start the lifetime before the copy-in so that the temporary is live when + // the initial value is written. This ensures the store is within the + // lifetime and is not killed by a store undef inserted at lifetime.start. + EmitLifetimeStart(OutTemp.getBasePointer()); + if (E->isInOut()) EmitInitializationToLValue(E->getCastedTemporary()->getSourceExpr(), TempLV); @@ -6376,8 +6381,6 @@ LValue CodeGenFunction::EmitHLSLOutArgExpr(const HLSLOutArgExpr *E, llvm::Value *Addr = TempLV.getAddress().getBasePointer(); llvm::Type *ElTy = ConvertTypeForMem(TempLV.getType()); - EmitLifetimeStart(Addr); - Address TmpAddr(Addr, ElTy, TempLV.getAlignment()); Args.addWriteback(BaseLV, TmpAddr, nullptr, E->getWritebackCast()); Args.add(RValue::get(TmpAddr, *this), Ty); diff --git a/clang/test/CodeGenHLSL/BasicFeatures/OutArgLifetime.hlsl b/clang/test/CodeGenHLSL/BasicFeatures/OutArgLifetime.hlsl new file mode 100644 index 0000000000000..be7e58f9d224b --- /dev/null +++ b/clang/test/CodeGenHLSL/BasicFeatures/OutArgLifetime.hlsl @@ -0,0 +1,91 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple dxil-pc-shadermodel6.0-library -O1 -disable-llvm-passes -emit-llvm -finclude-default-header -o - %s | FileCheck %s + +// Check that lifetime.start for an inout argument temporary is emitted +// *before* the copy-in store, so that the store is within the lifetime +// and is not treated as undefined behavior. + +// CHECK-LABEL: define hidden void @_Z9incrementRi( +// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4, !tbaa [[INTPTR_TBAA6:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 4, !tbaa [[INTPTR_TBAA6]], !nonnull [[META9:![0-9]+]], !align [[META10:![0-9]+]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 1 +// CHECK-NEXT: store i32 [[ADD]], ptr [[TMP1]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: ret void +// +void increment(inout int I) { I += 1; } + +// CHECK-LABEL: define hidden void @_Z5resetRi( +// CHECK-SAME: ptr noalias noundef nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[I_ADDR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: store ptr [[I]], ptr [[I_ADDR]], align 4, !tbaa [[INTPTR_TBAA6]] +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[I_ADDR]], align 4, !tbaa [[INTPTR_TBAA6]], !nonnull [[META9]], !align [[META10]] +// CHECK-NEXT: store i32 0, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: ret void +// +void reset(out int I) { I = 0; } + +// The lifetime.start must come before the copy-in load/store sequence. +// CHECK-LABEL: define hidden noundef i32 @_Z10inout_testi( +// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[TMP1]], ptr [[TMP]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: call void @_Z9incrementRi(ptr noalias noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR4:[0-9]+]] [ "convergencectrl"(token [[TMP0]]) ] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[TMP2]], ptr [[X_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP]]) #[[ATTR3]] +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_ADDR]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: ret i32 [[TMP3]] +// +int inout_test(int X) { + increment(X); + return X; +} + +// For `out` parameters there is no copy-in, so lifetime.start just needs +// to appear before the call with no intervening store to the temporary. +// CHECK-LABEL: define hidden noundef i32 @_Z8out_testv( +// CHECK-SAME: ) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[TMP0:%.*]] = call token @llvm.experimental.convergence.entry() +// CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[X]]) #[[ATTR3]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TMP]]) #[[ATTR3]] +// CHECK-NEXT: call void @_Z5resetRi(ptr noalias noundef nonnull align 4 dereferenceable(4) [[TMP]]) #[[ATTR4]] [ "convergencectrl"(token [[TMP0]]) ] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TMP]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: store i32 [[TMP1]], ptr [[X]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[TMP]]) #[[ATTR3]] +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA2]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[X]]) #[[ATTR3]] +// CHECK-NEXT: ret i32 [[TMP2]] +// +int out_test() { + int X; + reset(X); + return X; +} + +//. +// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0} +// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[INTPTR_TBAA6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"p1 int", [[META8:![0-9]+]], i64 0} +// CHECK: [[META8]] = !{!"any pointer", [[META4]], i64 0} +// CHECK: [[META9]] = !{} +// CHECK: [[META10]] = !{i64 4} +//. From 778cdd38a74652f3938d578ae1823c90c3750f1d Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 23 Jun 2026 02:41:44 +0200 Subject: [PATCH 109/511] [OpenCL] Warn if filter_mode is linear in read_image{i|ui} (#204086) Per OpenCL spec: The read_image{i|ui} calls support a nearest filter only. The filter_mode specified in sampler must be set to CLK_FILTER_NEAREST; otherwise the values returned are undefined. Warn users when they apply a linear filter accidentally. Address https://github.com/intel/compute-runtime/issues/379#issuecomment-4592083032 Assisted-by: Claude Sonnet 4.6 --- .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/include/clang/Sema/SemaOpenCL.h | 2 + clang/lib/Sema/SemaExpr.cpp | 7 ++ clang/lib/Sema/SemaOpenCL.cpp | 48 +++++++++++ .../read-image-integer-linear-filter.cl | 80 +++++++++++++++++++ 5 files changed, 139 insertions(+) create mode 100644 clang/test/SemaOpenCL/read-image-integer-linear-filter.cl diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 922c74b0857fc..cb5f124c609ce 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11838,6 +11838,8 @@ def err_sampler_initializer_not_integer : Error< "sampler_t initialization requires 32-bit integer, not %0">; def warn_sampler_initializer_invalid_bits : Warning< "sampler initializer has invalid %0 bits">, InGroup, DefaultIgnore; +def warn_sampler_argument_invalid_filter : Warning< + "'%0' sampler must use CLK_FILTER_NEAREST">, InGroup; def err_sampler_argument_required : Error< "sampler_t variable required - got %0">; def err_wrong_sampler_addressspace: Error< diff --git a/clang/include/clang/Sema/SemaOpenCL.h b/clang/include/clang/Sema/SemaOpenCL.h index 04b2b617fb12f..51c2e1703b504 100644 --- a/clang/include/clang/Sema/SemaOpenCL.h +++ b/clang/include/clang/Sema/SemaOpenCL.h @@ -100,6 +100,8 @@ class SemaOpenCL : public SemaBase { bool checkBuiltinKernelWorkGroupSize(CallExpr *TheCall); bool checkBuiltinNDRangeAndBlock(CallExpr *TheCall); + + void checkBuiltinReadImage(FunctionDecl *FDecl, CallExpr *Call); }; } // namespace clang diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 25df8fda7dc56..7c868d176e803 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -59,6 +59,7 @@ #include "clang/Sema/SemaFixItUtils.h" #include "clang/Sema/SemaHLSL.h" #include "clang/Sema/SemaObjC.h" +#include "clang/Sema/SemaOpenCL.h" #include "clang/Sema/SemaOpenMP.h" #include "clang/Sema/SemaPseudoObject.h" #include "clang/Sema/Template.h" @@ -7287,6 +7288,12 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, } } + // Check read_image{i|ui} sampler argument before ConvertArgumentsForCall + // replaces sampler DeclRefExprs with their integer initializers. + if (getLangOpts().OpenCL && FDecl) { + OpenCL().checkBuiltinReadImage(FDecl, TheCall); + } + if (Proto) { if (ConvertArgumentsForCall(TheCall, Fn, FDecl, Proto, Args, RParenLoc, IsExecConfig)) diff --git a/clang/lib/Sema/SemaOpenCL.cpp b/clang/lib/Sema/SemaOpenCL.cpp index f11a40e3964ff..4151972c67473 100644 --- a/clang/lib/Sema/SemaOpenCL.cpp +++ b/clang/lib/Sema/SemaOpenCL.cpp @@ -12,7 +12,9 @@ #include "clang/Sema/SemaOpenCL.h" #include "clang/AST/Attr.h" +#include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" +#include "clang/AST/Expr.h" #include "clang/Basic/DiagnosticSema.h" #include "clang/Sema/ParsedAttr.h" #include "clang/Sema/Sema.h" @@ -576,4 +578,50 @@ bool SemaOpenCL::checkBuiltinToAddr(unsigned BuiltinID, CallExpr *Call) { return false; } +void SemaOpenCL::checkBuiltinReadImage(FunctionDecl *FDecl, CallExpr *Call) { + IdentifierInfo *II = FDecl->getIdentifier(); + if (!II) + return; + StringRef Name = II->getName(); + if (Name != "read_imagei" && Name != "read_imageui") + return; + + if (FDecl->getNumParams() < 2) + return; + QualType ParamTy = FDecl->getParamDecl(1)->getType().getCanonicalType(); + if (!ParamTy->isSamplerT()) + return; + Expr *SamplerArg = Call->getArg(1); + + Expr *IntExpr = nullptr; + Expr *Inner = SamplerArg->IgnoreParenCasts(); + + if (auto *DRE = dyn_cast(Inner)) { + if (auto *Var = dyn_cast(DRE->getDecl())) { + if (const Expr *Init = Var->getAnyInitializer()) { + Init = Init->IgnoreParenImpCasts(); + if (Init->getType()->isIntegerType()) + IntExpr = const_cast(Init); + } + } + } else if (Inner->getType()->isIntegerType()) { + IntExpr = Inner; + } + + if (!IntExpr) + return; + + Expr::EvalResult EVResult; + if (!IntExpr->EvaluateAsInt(EVResult, getASTContext())) + return; + + uint64_t SamplerValue = EVResult.Val.getInt().getLimitedValue(); + // Must stay in sync with CLK_FILTER_* defines in opencl-c-base.h. + constexpr unsigned FilterModeMask = 0x30u; + constexpr unsigned FilterModeLinear = 0x20u; + if ((SamplerValue & FilterModeMask) == FilterModeLinear) + Diag(SamplerArg->getExprLoc(), diag::warn_sampler_argument_invalid_filter) + << Name << SamplerArg->getSourceRange(); +} + } // namespace clang diff --git a/clang/test/SemaOpenCL/read-image-integer-linear-filter.cl b/clang/test/SemaOpenCL/read-image-integer-linear-filter.cl new file mode 100644 index 0000000000000..a90f689a307d9 --- /dev/null +++ b/clang/test/SemaOpenCL/read-image-integer-linear-filter.cl @@ -0,0 +1,80 @@ +// RUN: %clang_cc1 %s -verify -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header +// RUN: %clang_cc1 %s -verify=nowarn -fsyntax-only -cl-std=CL2.0 -fdeclare-opencl-builtins -finclude-default-header -Wno-spir-compat +// nowarn-no-diagnostics + +// OpenCL spec: read_imagei and read_imageui support nearest filter only. +// CLK_FILTER_LINEAR in the sampler results in undefined behavior; warn. + +// Program scope samplers. +__constant sampler_t glb_linear = + CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_MIRRORED_REPEAT; +__constant sampler_t glb_nearest = + CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_NEAREST | CLK_ADDRESS_CLAMP_TO_EDGE; + +kernel void test_read_imageui_global_sampler(read_only image2d_t img) { + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, glb_linear, coord); // expected-warning{{'read_imageui' sampler must use CLK_FILTER_NEAREST}} + u = read_imageui(img, glb_nearest, coord); // no warning +} + +kernel void test_read_imagei_global_sampler(read_only image2d_t img) { + int2 coord = (int2)(0, 0); + int4 i = read_imagei(img, glb_linear, coord); // expected-warning{{'read_imagei' sampler must use CLK_FILTER_NEAREST}} +} + +kernel void test_read_imageui_local_constant(read_only image2d_t img) { + __constant sampler_t s_linear = + CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP; + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, s_linear, coord); // expected-warning{{'read_imageui' sampler must use CLK_FILTER_NEAREST}} +} + +kernel void test_read_imageui_nearest_constant(read_only image2d_t img) { + __constant sampler_t s_nearest = + CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_NONE; + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, s_nearest, coord); // no warning +} + +kernel void test_read_imageui_local(read_only image2d_t img) { + sampler_t s_linear = + CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP; + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, s_linear, coord); // expected-warning{{'read_imageui' sampler must use CLK_FILTER_NEAREST}} +} + +kernel void test_read_imageui_nearest(read_only image2d_t img) { + sampler_t s_nearest = + CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_NONE; + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, s_nearest, coord); // no warning +} + +kernel void test_read_imageui_literal(read_only image2d_t img) { + int2 coord = (int2)(0, 0); + // CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP = 0x21 + uint4 u = read_imageui(img, CLK_NORMALIZED_COORDS_TRUE | CLK_FILTER_LINEAR | CLK_ADDRESS_CLAMP, coord); // expected-warning{{'read_imageui' sampler must use CLK_FILTER_NEAREST}} + // CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_NONE = 0x10 + u = read_imageui(img, CLK_NORMALIZED_COORDS_FALSE | CLK_FILTER_NEAREST | CLK_ADDRESS_NONE, coord); // no warning +} + +kernel void test_read_imageui_parameter(read_only image2d_t img, sampler_t smp) { + int2 coord = (int2)(0, 0); + uint4 u = read_imageui(img, smp, coord); // no warning +} + +kernel void test_read_imagef_linear(read_only image2d_t img) { + // read_imagef supports linear filtering: no warning. + float2 coord = (float2)(0.5f, 0.5f); + float4 f = read_imagef(img, glb_linear, coord); // no warning +} + +// Samplerless 1D image reads: integer coordinate must not be mistaken for a +// sampler value even when it looks like CLK_FILTER_LINEAR (e.g. 0x20). +kernel void test_read_imageui_samplerless(read_only image1d_t img) { + uint4 u = read_imageui(img, 0x10); // no warning + u = read_imageui(img, 0x20); // no warning + u = read_imageui(img, 0x30); // no warning + int4 i = read_imagei(img, 0x10); // no warning + i = read_imagei(img, 0x20); // no warning +} From 61df40d0a9cdfcb93cf6c903a1894b087c70217b Mon Sep 17 00:00:00 2001 From: yonghong-song Date: Mon, 22 Jun 2026 18:02:14 -0700 Subject: [PATCH 110/511] [BPF] Increase BPFMaxStoresPerMemFunc from 128 to 192 (#205222) With commits [1] and [2], memory operations like memcpy/memmove lower to a sequence of loads/stores whose width is the minimum of the source and destination alignment, and the store count is bounded by BPFMaxStoresPerMemFunc. For 1-byte alignment, the maximum copy length that can be inlined is therefore 128 bytes. This may regress cases that previously inlined. Consider a memcpy with src alignment 8, dst alignment 1 and size 136. After [1]/[2], the store width is the minimum alignment (1 byte), so the store count is 136, which exceeds the 128 limit and the copy falls back. Before [1]/[2], the store count was computed with a fixed 8-byte unit regardless of the actual alignment (each unit expands to 8 one-byte stores when the minimum alignment is 1), so the total count was only 17 (136/8 < 128) and the copy was inlined. Raise the limit from 128 to 192 to mitigate. Alternatively, users can increase alignment to avoid the regression. [1] https://github.com/llvm/llvm-project/pull/201119 [2] https://github.com/llvm/llvm-project/pull/204042 --- llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp index 8b7ac6a4730f7..3e39a7393f251 100644 --- a/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp +++ b/llvm/lib/Target/BPF/BPFSelectionDAGInfo.cpp @@ -22,7 +22,7 @@ using namespace llvm; #define DEBUG_TYPE "bpf-selectiondag-info" static cl::opt BPFMaxStoresPerMemFunc( - "bpf-max-stores-per-memfunc", cl::Hidden, cl::init(128), + "bpf-max-stores-per-memfunc", cl::Hidden, cl::init(192), cl::desc("Set the maximum number of stores for inlined BPF memory " "intrinsics")); From 86667c0c1452fee4b3bf84912dce6892e1bb17f6 Mon Sep 17 00:00:00 2001 From: SiHuaN Date: Tue, 23 Jun 2026 09:34:03 +0800 Subject: [PATCH 111/511] [RISCV][P-ext] packed exchanged add/sub codegen (#203473) Wire up the already-defined exchanged add/sub instructions pas/psa/psas/pssa/paas/pasa with llvm.riscv.* intrinsics and isel patterns. --- llvm/include/llvm/IR/IntrinsicsRISCV.td | 8 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 70 +++++++- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 24 +++ llvm/test/CodeGen/RISCV/rvp-simd-32.ll | 54 ++++++ llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 174 ++++++++++++++++++++ 5 files changed, 328 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index a82b17591f780..531f48c38622e 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -2060,6 +2060,14 @@ class RVPBinaryIntrinsic // Packed Absolute Value and Absolute Difference def int_riscv_pabd : RVPBinaryIntrinsic; def int_riscv_pabdu : RVPBinaryIntrinsic; + + // Packed Exchanged Addition and Subtraction. + def int_riscv_pas : RVPBinaryIntrinsic; + def int_riscv_psa : RVPBinaryIntrinsic; + def int_riscv_psas : RVPBinaryIntrinsic; + def int_riscv_pssa : RVPBinaryIntrinsic; + def int_riscv_paas : RVPBinaryIntrinsic; + def int_riscv_pasa : RVPBinaryIntrinsic; } // TargetPrefix = "riscv" //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index c9c807cfa78aa..958fdc3812408 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11711,6 +11711,59 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getRegister(RISCV::X4, PtrVT); } + case Intrinsic::riscv_pas: + case Intrinsic::riscv_psa: + case Intrinsic::riscv_psas: + case Intrinsic::riscv_pssa: + case Intrinsic::riscv_paas: + case Intrinsic::riscv_pasa: { + // v2i32 has no paired instruction on RV32; split into a pair of i32 ops + // with cross-lane operands. The exchange shape is: even result uses + // (S1[0], S2[1]); odd result uses (S1[1], S2[0]). + if (Subtarget.is64Bit() || Op.getSimpleValueType() != MVT::v2i32) + break; + + unsigned EvenOpc, OddOpc; + switch (IntNo) { + case Intrinsic::riscv_pas: + EvenOpc = ISD::SUB; + OddOpc = ISD::ADD; + break; + case Intrinsic::riscv_psa: + EvenOpc = ISD::ADD; + OddOpc = ISD::SUB; + break; + case Intrinsic::riscv_psas: + EvenOpc = ISD::SSUBSAT; + OddOpc = ISD::SADDSAT; + break; + case Intrinsic::riscv_pssa: + EvenOpc = ISD::SADDSAT; + OddOpc = ISD::SSUBSAT; + break; + case Intrinsic::riscv_paas: + EvenOpc = RISCVISD::ASUB; + OddOpc = ISD::AVGFLOORS; + break; + case Intrinsic::riscv_pasa: + EvenOpc = ISD::AVGFLOORS; + OddOpc = RISCVISD::ASUB; + break; + default: + llvm_unreachable("Unexpected exchanged add/sub intrinsic"); + } + + SDValue S1 = Op.getOperand(1); + SDValue S2 = Op.getOperand(2); + SDValue S1Even = DAG.getExtractVectorElt(DL, MVT::i32, S1, 0); + SDValue S1Odd = DAG.getExtractVectorElt(DL, MVT::i32, S1, 1); + SDValue S2Even = DAG.getExtractVectorElt(DL, MVT::i32, S2, 0); + SDValue S2Odd = DAG.getExtractVectorElt(DL, MVT::i32, S2, 1); + + SDValue REven = DAG.getNode(EvenOpc, DL, MVT::i32, S1Even, S2Odd); + SDValue ROdd = DAG.getNode(OddOpc, DL, MVT::i32, S1Odd, S2Even); + return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, REven, ROdd); + } case Intrinsic::riscv_orc_b: case Intrinsic::riscv_brev8: case Intrinsic::riscv_sha256sig0: @@ -15700,7 +15753,13 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::riscv_pasub: case Intrinsic::riscv_pasubu: case Intrinsic::riscv_pabd: - case Intrinsic::riscv_pabdu: { + case Intrinsic::riscv_pabdu: + case Intrinsic::riscv_pas: + case Intrinsic::riscv_psa: + case Intrinsic::riscv_psas: + case Intrinsic::riscv_pssa: + case Intrinsic::riscv_paas: + case Intrinsic::riscv_pasa: { EVT VT = N->getValueType(0); if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16)) return; @@ -15725,6 +15784,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::riscv_pabdu: Opc = ISD::ABDU; break; + default: + Opc = ISD::INTRINSIC_WO_CHAIN; + break; } EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16; @@ -15733,7 +15795,11 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef); SDValue Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(2), Undef); - SDValue Res = DAG.getNode(Opc, DL, WideVT, Op0, Op1); + SDValue Res; + if (Opc == ISD::INTRINSIC_WO_CHAIN) + Res = DAG.getNode(Opc, DL, WideVT, N->getOperand(0), Op0, Op1); + else + Res = DAG.getNode(Opc, DL, WideVT, Op0, Op1); Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, DAG.getVectorIdxConstant(0, DL))); return; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index e63eb9ce50e83..f82520e0dcc3b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1992,6 +1992,14 @@ let Predicates = [HasStdExtP] in { def : PatPSh1Add; def : PatPSSh1SAdd; + // 16-bit exchanged add/sub patterns + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + // 8-bit averaging patterns def : PatGprGpr; def : PatGprGpr; @@ -2279,6 +2287,14 @@ let append Predicates = [IsRV32] in { def : PatPSh1AddPair; def : PatPSSh1SAddPair; + // 16-bit exchanged add/sub patterns + def : PatGprPairGprPair; + def : PatGprPairGprPair; + def : PatGprPairGprPair; + def : PatGprPairGprPair; + def : PatGprPairGprPair; + def : PatGprPairGprPair; + // 8-bit averaging patterns def : PatGprPairGprPair; def : PatGprPairGprPair; @@ -2502,6 +2518,14 @@ let append Predicates = [IsRV64] in { def : PatPSh1Add; def : PatPSSh1SAdd; + // 32-bit exchanged add/sub patterns + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + def : PatGprGpr; + // 32-bit averaging patterns def : PatGprGpr; def : PatGprGpr; diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll index 561068b3a2e29..c10e54011d34b 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll @@ -2407,3 +2407,57 @@ define <2 x i16> @test_pabdu_v2i16(<2 x i16> %a, <2 x i16> %b) { %res = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> %a, <2 x i16> %b) ret <2 x i16> %res } + +define <2 x i16> @test_pas_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pas_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: pas.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pas.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_psa_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_psa_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: psa.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.psa.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_psas_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_psas_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: psas.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.psas.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_pssa_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pssa_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: pssa.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pssa.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_paas_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_paas_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: paas.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.paas.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} + +define <2 x i16> @test_pasa_x_h(<2 x i16> %a, <2 x i16> %b) { +; CHECK-LABEL: test_pasa_x_h: +; CHECK: # %bb.0: +; CHECK-NEXT: pasa.hx a0, a0, a1 +; CHECK-NEXT: ret + %res = call <2 x i16> @llvm.riscv.pasa.v2i16(<2 x i16> %a, <2 x i16> %b) + ret <2 x i16> %res +} diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll index 363b1dd33534e..b43f3bb47843f 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll @@ -5148,3 +5148,177 @@ define <4 x i16> @test_pabdu_v4i16(<4 x i16> %a, <4 x i16> %b) { %res = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> %a, <4 x i16> %b) ret <4 x i16> %res } + +define <4 x i16> @test_pas_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pas_x_h: +; RV32: # %bb.0: +; RV32-NEXT: pas.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pas_x_h: +; RV64: # %bb.0: +; RV64-NEXT: pas.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pas.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_psa_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_psa_x_h: +; RV32: # %bb.0: +; RV32-NEXT: psa.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psa_x_h: +; RV64: # %bb.0: +; RV64-NEXT: psa.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.psa.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_psas_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_psas_x_h: +; RV32: # %bb.0: +; RV32-NEXT: psas.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psas_x_h: +; RV64: # %bb.0: +; RV64-NEXT: psas.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.psas.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_pssa_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pssa_x_h: +; RV32: # %bb.0: +; RV32-NEXT: pssa.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pssa_x_h: +; RV64: # %bb.0: +; RV64-NEXT: pssa.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pssa.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_paas_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_paas_x_h: +; RV32: # %bb.0: +; RV32-NEXT: paas.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paas_x_h: +; RV64: # %bb.0: +; RV64-NEXT: paas.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.paas.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <4 x i16> @test_pasa_x_h(<4 x i16> %a, <4 x i16> %b) { +; RV32-LABEL: test_pasa_x_h: +; RV32: # %bb.0: +; RV32-NEXT: pasa.dhx a0, a0, a2 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasa_x_h: +; RV64: # %bb.0: +; RV64-NEXT: pasa.hx a0, a0, a1 +; RV64-NEXT: ret + %res = call <4 x i16> @llvm.riscv.pasa.v4i16(<4 x i16> %a, <4 x i16> %b) + ret <4 x i16> %res +} + +define <2 x i32> @test_pas_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_pas_x_w: +; RV32: # %bb.0: +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: sub a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pas_x_w: +; RV64: # %bb.0: +; RV64-NEXT: pas.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.pas.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_psa_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_psa_x_w: +; RV32: # %bb.0: +; RV32-NEXT: sub a1, a1, a2 +; RV32-NEXT: add a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psa_x_w: +; RV64: # %bb.0: +; RV64-NEXT: psa.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.psa.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_psas_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_psas_x_w: +; RV32: # %bb.0: +; RV32-NEXT: sadd a1, a1, a2 +; RV32-NEXT: ssub a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_psas_x_w: +; RV64: # %bb.0: +; RV64-NEXT: psas.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.psas.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_pssa_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_pssa_x_w: +; RV32: # %bb.0: +; RV32-NEXT: ssub a1, a1, a2 +; RV32-NEXT: sadd a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pssa_x_w: +; RV64: # %bb.0: +; RV64-NEXT: pssa.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.pssa.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_paas_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_paas_x_w: +; RV32: # %bb.0: +; RV32-NEXT: aadd a1, a1, a2 +; RV32-NEXT: asub a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_paas_x_w: +; RV64: # %bb.0: +; RV64-NEXT: paas.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.paas.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} + +define <2 x i32> @test_pasa_x_w(<2 x i32> %a, <2 x i32> %b) { +; RV32-LABEL: test_pasa_x_w: +; RV32: # %bb.0: +; RV32-NEXT: asub a1, a1, a2 +; RV32-NEXT: aadd a0, a0, a3 +; RV32-NEXT: ret +; +; RV64-LABEL: test_pasa_x_w: +; RV64: # %bb.0: +; RV64-NEXT: pasa.wx a0, a0, a1 +; RV64-NEXT: ret + %res = call <2 x i32> @llvm.riscv.pasa.v2i32(<2 x i32> %a, <2 x i32> %b) + ret <2 x i32> %res +} From fd13d6b4cc6cde96a6f378d8df72251dd67d0144 Mon Sep 17 00:00:00 2001 From: lijinpei-amd Date: Tue, 23 Jun 2026 09:46:04 +0800 Subject: [PATCH 112/511] [Verifier] Require !callsite with !memprof metadata (#205053) Fixes: https://github.com/llvm/llvm-project/issues/181237 --- llvm/lib/IR/Verifier.cpp | 3 +++ llvm/test/Verifier/memprof-metadata-bad.ll | 16 ++++++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 2a0892d1af11a..648446555793b 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -5343,6 +5343,9 @@ void Verifier::visitCallStackMetadata(MDNode *MD) { void Verifier::visitMemProfMetadata(Instruction &I, MDNode *MD) { Check(isa(I), "!memprof metadata should only exist on calls", &I); + if (isa(I)) + Check(I.hasMetadata(LLVMContext::MD_callsite), + "!memprof metadata requires !callsite metadata", &I, MD); Check(MD->getNumOperands() >= 1, "!memprof annotations should have at least 1 metadata operand " "(MemInfoBlock)", diff --git a/llvm/test/Verifier/memprof-metadata-bad.ll b/llvm/test/Verifier/memprof-metadata-bad.ll index be212f062ef0b..408e47b12b12a 100644 --- a/llvm/test/Verifier/memprof-metadata-bad.ll +++ b/llvm/test/Verifier/memprof-metadata-bad.ll @@ -3,13 +3,14 @@ define ptr @test1() { entry: - %call1 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !0 - %call2 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !1 - %call3 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !3 - %call4 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !5 + %call1 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !0, !callsite !9 + %call2 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !1, !callsite !9 + %call3 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !3, !callsite !9 + %call4 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !5, !callsite !9 %call5 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !7, !callsite !9 - %call6 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !12 - %call7 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !15 + %call6 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !12, !callsite !9 + %call7 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !15, !callsite !9 + %call8 = call noalias dereferenceable_or_null(40) ptr @malloc(i64 noundef 40), !memprof !18 ret ptr %call5 } @@ -54,6 +55,9 @@ declare dso_local noalias noundef ptr @malloc(i64 noundef) ; CHECK: Not all !memprof MemInfoBlock operands 2 to N are MDNode with 2 operands !16 = !{!14, !"default", !17} !17 = !{i64 789} +!18 = !{!19} +; CHECK: !memprof metadata requires !callsite metadata +!19 = !{!14, !"default"} !9 = !{i64 123} ; CHECK: call stack metadata operand should be constant integer !10 = !{!"wrongtype"} From e10ad510eb0c7bae270dd5de79b278bf592b7ce7 Mon Sep 17 00:00:00 2001 From: Jianjian Guan Date: Tue, 23 Jun 2026 10:27:45 +0800 Subject: [PATCH 113/511] [CIR] Add support for __builtin_nontemporal_store and __builtin_nontemporal_load (#197872) Add nontemporal attribute to cir.load and cir.store ops. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++-- clang/include/clang/CIR/Dialect/IR/CIROps.td | 5 ++ clang/include/clang/CIR/MissingFeatures.h | 1 - clang/lib/CIR/CodeGen/CIRGenAtomic.cpp | 1 + clang/lib/CIR/CodeGen/CIRGenBuilder.h | 12 +-- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 20 ++++- clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp | 1 + clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 19 ++--- clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 +- clang/lib/CIR/CodeGen/CIRGenValue.h | 7 ++ clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp | 1 + .../CIR/Dialect/Transforms/EHABILowering.cpp | 8 +- .../lib/CIR/Dialect/Transforms/FlattenCFG.cpp | 20 +++-- .../TargetLowering/LowerItaniumCXXABI.cpp | 5 ++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 11 +-- .../CodeGenBuiltins/builtin-nontemporal.cpp | 77 +++++++++++++++++++ 16 files changed, 161 insertions(+), 47 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 121eed5f8ba9a..0db205f8d5b79 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -229,11 +229,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, - bool isVolatile = false, uint64_t alignment = 0) { + bool isVolatile = false, uint64_t alignment = 0, + bool isNontemporal = false) { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, isVolatile, - alignmentAttr, cir::SyncScopeKindAttr{}, - cir::MemOrderAttr{}); + isNontemporal, alignmentAttr, + cir::SyncScopeKindAttr{}, cir::MemOrderAttr{}); } mlir::Value createAlignedLoad(mlir::Location loc, mlir::Value ptr, @@ -380,15 +381,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, mlir::Value dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (mlir::cast(dst.getType()).getPointee() != val.getType()) dst = createPtrBitcast(dst, val.getType()); - return cir::StoreOp::create(*this, loc, val, dst, isVolatile, align, scope, - order); + return cir::StoreOp::create(*this, loc, val, dst, isVolatile, isNontemporal, + align, scope, order); } /// Emit a load from an boolean flag variable. @@ -426,7 +427,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { mlir::IntegerAttr alignmentAttr = getAlignmentAttr(alignment); auto addr = createAlloca(loc, getPointerTo(type), {}, alignmentAttr); return cir::LoadOp::create(*this, loc, addr, /*isDeref=*/false, - /*isVolatile=*/false, alignmentAttr, + /*isVolatile=*/false, /*nontemporal=*/false, + alignmentAttr, /*sync_scope=*/{}, /*mem_order=*/{}); } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 9dae3534991e5..f4f22cd297ea6 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -694,6 +694,7 @@ def CIR_LoadOp : CIR_Op<"load", [ [MemRead]>:$addr, UnitAttr:$isDeref, UnitAttr:$is_volatile, + UnitAttr:$is_nontemporal, OptionalAttr:$alignment, OptionalAttr:$sync_scope, OptionalAttr:$mem_order); @@ -702,6 +703,7 @@ def CIR_LoadOp : CIR_Op<"load", [ let assemblyFormat = [{ (`deref` $isDeref^)? (`volatile` $is_volatile^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -793,12 +795,14 @@ def CIR_StoreOp : CIR_Op<"store", [ Arg:$addr, UnitAttr:$is_volatile, + UnitAttr:$is_nontemporal, OptionalAttr:$alignment, OptionalAttr:$sync_scope, OptionalAttr:$mem_order); let assemblyFormat = [{ (`volatile` $is_volatile^)? + (`nontemporal` $is_nontemporal^)? (`align` `(` $alignment^ `)`)? (`syncscope` `(` $sync_scope^ `)`)? (`atomic` `(` $mem_order^ `)`)? @@ -809,6 +813,7 @@ def CIR_StoreOp : CIR_Op<"store", [ // Non-volatile, non-atomic store with default alignment. OpBuilder<(ins "mlir::Value":$value, "mlir::Value":$addr), [{ build($_builder, $_state, value, addr, /*is_volatile=*/mlir::UnitAttr(), + /*is_nontemporal=*/mlir::UnitAttr(), /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index c09db49a955ac..9a1546fe14e65 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -47,7 +47,6 @@ struct MissingFeatures { // Load/store attributes static bool opLoadEmitScalarRangeCheck() { return false; } - static bool opLoadStoreNontemporal() { return false; } static bool opLoadStoreTbaa() { return false; } static bool opLoadStoreAtomic() { return false; } static bool opLoadStoreObjC() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 6ba6bc1c0405a..4ac6f4506b2cd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -705,6 +705,7 @@ static void emitAtomicOp(CIRGenFunction &cgf, AtomicExpr *expr, Address dest, assert(!cir::MissingFeatures::atomicSyncScopeID()); builder.createStore(loc, loadVal1, ptr, expr->isVolatile(), + /*isNontemporal=*/false, /*align=*/mlir::IntegerAttr{}, scopeAttr, orderAttr); return; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 3204ba1a319f0..b8db0d9157aa6 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -577,10 +577,11 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::LoadOp createLoad(mlir::Location loc, Address addr, - bool isVolatile = false) { + bool isVolatile = false, bool isNontemporal = false) { mlir::IntegerAttr align = getAlignmentAttr(addr.getAlignment()); return cir::LoadOp::create(*this, loc, addr.getPointer(), /*isDeref=*/false, - isVolatile, /*alignment=*/align, + isVolatile, isNontemporal, + /*alignment=*/align, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -592,7 +593,8 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { uint64_t alignment = align ? align->value() : 0; mlir::IntegerAttr alignAttr = getAlignmentAttr(alignment); return cir::LoadOp::create(*this, loc, ptr, /*isDeref=*/false, - /*isVolatile=*/false, alignAttr, + /*isVolatile=*/false, /*isNontemporal=*/false, + alignAttr, /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); } @@ -604,14 +606,14 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } cir::StoreOp createStore(mlir::Location loc, mlir::Value val, Address dst, - bool isVolatile = false, + bool isVolatile = false, bool isNontemporal = false, mlir::IntegerAttr align = {}, cir::SyncScopeKindAttr scope = {}, cir::MemOrderAttr order = {}) { if (!align) align = getAlignmentAttr(dst.getAlignment()); return CIRBaseBuilderTy::createStore(loc, val, dst.getPointer(), isVolatile, - align, scope, order); + isNontemporal, align, scope, order); } /// Create a cir.complex.real_ptr operation that derives a pointer to the real diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index a483eb635f0e2..4fb7ffc13a2ce 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -2126,8 +2126,24 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, cir::SyncScopeKind::System)); return RValue::get(nullptr); } - case Builtin::BI__builtin_nontemporal_load: - case Builtin::BI__builtin_nontemporal_store: + case Builtin::BI__builtin_nontemporal_load: { + Address addr = emitPointerWithAlignment(e->getArg(0)); + LValue lv = makeAddrLValue(addr, e->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + mlir::Value val = emitLoadOfScalar(lv, e->getExprLoc()); + return RValue::get(val); + } + case Builtin::BI__builtin_nontemporal_store: { + mlir::Value val = emitScalarExpr(e->getArg(0)); + Address addr = emitPointerWithAlignment(e->getArg(1)); + val = emitToMemory(val, e->getArg(0)->getType()); + LValue lv = makeAddrLValue(addr, e->getArg(0)->getType(), + LValueBaseInfo(AlignmentSource::Type)); + lv.setNontemporal(true); + emitStoreOfScalar(val, lv, /*isInit=*/false); + return RValue::get(nullptr); + } case Builtin::BI__c11_atomic_is_lock_free: case Builtin::BI__atomic_is_lock_free: case Builtin::BI__atomic_test_and_set: diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp index 3c654761b9903..4db2d7259c6ba 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinNVPTX.cpp @@ -1067,6 +1067,7 @@ static mlir::Value packArgsIntoNVPTXFormatBuffer(CIRGenFunction &cgf, dataLayout.getABITypeAlign(argTypes[i]).value()); cir::StoreOp::create(builder, loc, arg.getKnownRValue().getValue(), member, /*is_volatile=*/false, + /*isNontemporal=*/false, builder.getAlignmentAttr(abiAlign), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr{}); diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index beaedd853f57b..fa14b45cbb015 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -499,12 +499,7 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, Address addr, } assert(currSrcLoc && "must pass in source location"); - builder.createStore(*currSrcLoc, value, addr, isVolatile); - - if (isNontemporal) { - cgm.errorNYI(addr.getPointer().getLoc(), "emitStoreOfScalar nontemporal"); - return; - } + builder.createStore(*currSrcLoc, value, addr, isVolatile, isNontemporal); assert(!cir::MissingFeatures::opTBAA()); } @@ -736,12 +731,13 @@ void CIRGenFunction::emitStoreOfScalar(mlir::Value value, LValue lvalue, emitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(), lvalue.getType(), lvalue.getBaseInfo(), isInit, - /*isNontemporal=*/false); + lvalue.isNontemporal()); } mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, SourceLocation loc, - LValueBaseInfo baseInfo) { + LValueBaseInfo baseInfo, + bool isNontemporal) { // Traditional LLVM codegen handles thread local separately, CIR handles // as part of getAddrOfGlobalVar (GetGlobalOp). mlir::Type eltTy = addr.getElementType(); @@ -771,7 +767,8 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, assert(!cir::MissingFeatures::opLoadEmitScalarRangeCheck()); - mlir::Value loadOp = builder.createLoad(getLoc(loc), addr, isVolatile); + mlir::Value loadOp = + builder.createLoad(getLoc(loc), addr, isVolatile, isNontemporal); if (!ty->isBooleanType() && ty->hasBooleanRepresentation()) cgm.errorNYI("emitLoadOfScalar: boolean type with boolean representation"); @@ -780,10 +777,10 @@ mlir::Value CIRGenFunction::emitLoadOfScalar(Address addr, bool isVolatile, mlir::Value CIRGenFunction::emitLoadOfScalar(LValue lvalue, SourceLocation loc) { - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return emitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(), - lvalue.getType(), loc, lvalue.getBaseInfo()); + lvalue.getType(), loc, lvalue.getBaseInfo(), + lvalue.isNontemporal()); } /// Given an expression that represents a value lvalue, this diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 317151c8d61c6..d0b936f45378d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -2095,7 +2095,8 @@ class CIRGenFunction : public CIRGenTypeCache { /// l-value. mlir::Value emitLoadOfScalar(LValue lvalue, SourceLocation loc); mlir::Value emitLoadOfScalar(Address addr, bool isVolatile, QualType ty, - SourceLocation loc, LValueBaseInfo baseInfo); + SourceLocation loc, LValueBaseInfo baseInfo, + bool isNontemporal = false); /// Emit code to compute a designator that specifies the location /// of the expression. @@ -2305,6 +2306,7 @@ class CIRGenFunction : public CIRGenTypeCache { builder.restoreInsertionPoint(outermostConditional->getInsertPoint()); builder.createStore( value.getLoc(), value, addr, /*isVolatile=*/false, + /*isNontemporal=*/false, mlir::IntegerAttr::get( mlir::IntegerType::get(value.getContext(), 64), (uint64_t)addr.getAlignment().getAsAlign().value())); diff --git a/clang/lib/CIR/CodeGen/CIRGenValue.h b/clang/lib/CIR/CodeGen/CIRGenValue.h index e70dac5851189..b291b8c76f1ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenValue.h +++ b/clang/lib/CIR/CodeGen/CIRGenValue.h @@ -173,6 +173,9 @@ class LValue { mlir::Type elementType; LValueBaseInfo baseInfo; const CIRGenBitFieldInfo *bitFieldInfo{nullptr}; + // This flag shows if a nontemporal load/stores should be used when accessing + // this lvalue. + bool nontemporal; void initialize(clang::QualType type, clang::Qualifiers quals, clang::CharUnits alignment, LValueBaseInfo baseInfo) { @@ -187,6 +190,7 @@ class LValue { assert(this->alignment == alignment.getQuantity() && "Alignment exceeds allowed max!"); this->baseInfo = baseInfo; + this->nontemporal = false; } public: @@ -200,6 +204,9 @@ class LValue { bool isVolatileQualified() const { return quals.hasVolatile(); } + bool isNontemporal() const { return nontemporal; } + void setNontemporal(bool v) { nontemporal = v; } + unsigned getVRQualifiers() const { return quals.getCVRQualifiers() & ~clang::Qualifiers::Const; } diff --git a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp index 8a82bcb19454e..73b35c7f00c2d 100644 --- a/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRMemorySlot.cpp @@ -142,6 +142,7 @@ DeletionKind cir::CopyOp::removeBlockingUses( if (loadsFrom(slot)) cir::StoreOp::create(builder, getLoc(), reachingDefinition, getDst(), /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr{}, /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem-order=*/cir::MemOrderAttr()); diff --git a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp index e6c76fed6f78a..0e39fa15d377b 100644 --- a/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/EHABILowering.cpp @@ -678,7 +678,7 @@ ItaniumEHLowering::lowerConstructCatchParam(cir::ConstructCatchParamOp op, mlir::Value casted = cir::CastOp::create(builder, loc, paramAddrType.getPointee(), cir::CastKind::bitcast, exnObj); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); op.erase(); return success(); } @@ -853,7 +853,7 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::TrivialCopy: { @@ -874,13 +874,13 @@ void ItaniumEHLowering::lowerInitCatchParam(cir::InitCatchParamOp op) { cir::CastKind::bitcast, exnPtr); auto loadOp = cir::LoadOp::create(builder, loc, elementType, srcPtr); cir::StoreOp::create(builder, loc, loadOp.getResult(), paramAddr, {}, {}, - {}, {}); + {}, {}, {}); break; } case InitCatchKind::Pointer: { mlir::Value casted = cir::CastOp::create(builder, loc, elementType, cir::CastKind::bitcast, exnPtr); - cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}); + cir::StoreOp::create(builder, loc, casted, paramAddr, {}, {}, {}, {}, {}); break; } case InitCatchKind::Objc: diff --git a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp index ddeeb98fee820..c487e645e30cd 100644 --- a/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp +++ b/clang/lib/CIR/Dialect/Transforms/FlattenCFG.cpp @@ -971,16 +971,18 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPoint(exitOp); cir::StoreOp::create(rewriter, loc, operand, alloca, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } // Reload the value from the temporary alloca in the destination block. rewriter.setInsertionPointToEnd(destBlock); - auto loaded = cir::LoadOp::create( - rewriter, loc, alloca, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto loaded = + cir::LoadOp::create(rewriter, loc, alloca, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); returnValues.push_back(loaded); } } @@ -1290,10 +1292,11 @@ class CIRCleanupScopeOpFlattening rewriter.setInsertionPointToEnd(exitBlock); // Load the destination slot value. - auto slotValue = cir::LoadOp::create( - rewriter, loc, destSlot, /*isDeref=*/false, - /*isVolatile=*/false, /*alignment=*/mlir::IntegerAttr(), - cir::SyncScopeKindAttr(), cir::MemOrderAttr()); + auto slotValue = + cir::LoadOp::create(rewriter, loc, destSlot, /*isDeref=*/false, + /*isVolatile=*/false, /*isNontemporal=*/false, + /*alignment=*/mlir::IntegerAttr(), + cir::SyncScopeKindAttr(), cir::MemOrderAttr()); // Create destination blocks for each exit and collect switch case info. llvm::SmallVector caseValues; @@ -1322,6 +1325,7 @@ class CIRCleanupScopeOpFlattening rewriter, loc, cir::IntAttr::get(s32Type, exit.destinationId)); cir::StoreOp::create(rewriter, loc, destIdConst, destSlot, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); rewriter.replaceOpWithNewOp(exit.exitOp, cleanupEntry); diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp index cccbe70876c3f..6e12a13787a2a 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/LowerItaniumCXXABI.cpp @@ -382,6 +382,7 @@ void LowerItaniumCXXABI::lowerGetMethod( mlir::Value vtablePtr = cir::LoadOp::create(b, loc, vtablePtrPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -409,6 +410,7 @@ void LowerItaniumCXXABI::lowerGetMethod( cir::CastKind::bitcast, vfpAddr); auto fnPtr = cir::LoadOp::create(b, loc, vfpPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr{}, /*mem_order=*/cir::MemOrderAttr()); @@ -782,6 +784,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, vptrPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -795,6 +798,7 @@ static mlir::Value buildDynamicCastToVoidAfterNullCheck( builder, loc, offsetToTopSlotPtr, /*isDeref=*/false, /*is_volatile=*/false, + /*isNontemporal=*/false, /*alignment=*/builder.getI64IntegerAttr(vtableElemAlign), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); @@ -904,6 +908,7 @@ mlir::Value LowerItaniumCXXABI::readArrayCookieImpl( builder, loc, countPtrTy, cir::CastKind::bitcast, countBytePtr); return cir::LoadOp::create( builder, loc, countPtr, /*isDeref=*/false, /*isVolatile=*/false, + /*isNontemporal=*/false, builder.getI64IntegerAttr(countAlignment.getQuantity()), cir::SyncScopeKindAttr(), cir::MemOrderAttr()); } diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 25fa6d1625301..27eba4ee326a5 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1860,15 +1860,12 @@ mlir::LogicalResult CIRToLLVMLoadOpLowering::matchAndRewrite( assert(!cir::MissingFeatures::lowerModeOptLevel()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); - std::optional llvmSyncScope = getLLVMSyncScope(op.getSyncScope()); mlir::LLVM::LoadOp newLoad = mlir::LLVM::LoadOp::create( rewriter, op->getLoc(), llvmTy, adaptor.getAddr(), alignment, - op.getIsVolatile(), /*isNonTemporal=*/false, + op.getIsVolatile(), /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariant=*/false, /*isInvariantGroup=*/false, ordering, llvmSyncScope.value_or(std::string())); @@ -1916,8 +1913,6 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( // Convert adapted value to its memory type if needed. mlir::Value value = emitToMemory(rewriter, dataLayout, op.getValue().getType(), adaptor.getValue()); - // TODO: nontemporal. - assert(!cir::MissingFeatures::opLoadStoreNontemporal()); assert(!cir::MissingFeatures::opLoadStoreTbaa()); std::optional llvmSyncScope = @@ -1926,8 +1921,8 @@ mlir::LogicalResult CIRToLLVMStoreOpLowering::matchAndRewrite( mlir::LLVM::StoreOp storeOp = mlir::LLVM::StoreOp::create( rewriter, op->getLoc(), value, adaptor.getAddr(), alignment, op.getIsVolatile(), - /*isNonTemporal=*/false, /*isInvariantGroup=*/false, memorder, - llvmSyncScope.value_or(std::string())); + /*isNonTemporal=*/op.getIsNontemporal(), /*isInvariantGroup=*/false, + memorder, llvmSyncScope.value_or(std::string())); rewriter.replaceOp(op, storeOp); assert(!cir::MissingFeatures::opLoadStoreTbaa()); return mlir::LogicalResult::success(); diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp new file mode 100644 index 0000000000000..ec834049ecc44 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtin-nontemporal.cpp @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o - | FileCheck %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s -check-prefix=LLVM + +signed char sc; +unsigned char uc; +signed short ss; +unsigned short us; +signed int si; +unsigned int ui; +signed long long sll; +unsigned long long ull; +float f1, f2; +double d1, d2; + +void test_nontemporal_store() { +// CIR-LABEL: cir.func {{.*}}@_Z22test_nontemporal_storev +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !u8i, !cir.ptr +// CIR: cir.store nontemporal align(1) {{%.*}}, {{%.*}} : !s8i, !cir.ptr +// CIR: cir.store nontemporal align(2) {{%.*}}, {{%.*}} : !u16i, !cir.ptr +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !s32i, !cir.ptr +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !u64i, !cir.ptr +// CIR: cir.store nontemporal align(4) {{%.*}}, {{%.*}} : !cir.float, !cir.ptr +// CIR: cir.store nontemporal align(8) {{%.*}}, {{%.*}} : !cir.double, !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z22test_nontemporal_storev +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @uc, align 1, !nontemporal +// LLVM: store i8 1, ptr @sc, align 1, !nontemporal +// LLVM: store i16 1, ptr @us, align 2, !nontemporal +// LLVM: store i32 1, ptr @si, align 4, !nontemporal +// LLVM: store i64 1, ptr @ull, align 8, !nontemporal +// LLVM: store float 1.0{{.*}}, ptr @f1, align 4, !nontemporal +// LLVM: store double 1.0{{.*}}, ptr @d1, align 8, !nontemporal +// LLVM: ret void + + __builtin_nontemporal_store(true, &uc); + __builtin_nontemporal_store(1, &uc); + __builtin_nontemporal_store(1, &sc); + __builtin_nontemporal_store(1, &us); + __builtin_nontemporal_store(1, &si); + __builtin_nontemporal_store(1, &ull); + __builtin_nontemporal_store(1.0, &f1); + __builtin_nontemporal_store(1.0, &d1); +} + +void test_nontemporal_load() { +// CIR-LABEL: cir.func {{.*}}@_Z21test_nontemporal_loadv +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr, !s8i +// CIR: cir.load nontemporal align(1) {{%.*}} : !cir.ptr, !u8i +// CIR: cir.load nontemporal align(2) {{%.*}} : !cir.ptr, !s16i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr, !u32i +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr, !s64i +// CIR: cir.load nontemporal align(4) {{%.*}} : !cir.ptr, !cir.float +// CIR: cir.load nontemporal align(8) {{%.*}} : !cir.ptr, !cir.double +// CIR: cir.return + +// LLVM-LABEL: define dso_local void @_Z21test_nontemporal_loadv +// LLVM: load i8, ptr @sc, align 1, !nontemporal +// LLVM: load i8, ptr @uc, align 1, !nontemporal +// LLVM: load i16, ptr @ss, align 2, !nontemporal +// LLVM: load i32, ptr @ui, align 4, !nontemporal +// LLVM: load i64, ptr @sll, align 8, !nontemporal +// LLVM: load float, ptr @f2, align 4, !nontemporal +// LLVM: load double, ptr @d2, align 8, !nontemporal +// LLVM: ret void + + uc = __builtin_nontemporal_load(&sc); + sc = __builtin_nontemporal_load(&uc); + us = __builtin_nontemporal_load(&ss); + si = __builtin_nontemporal_load(&ui); + ull = __builtin_nontemporal_load(&sll); + f1 = __builtin_nontemporal_load(&f2); + d1 = __builtin_nontemporal_load(&d2); +} From cba0e5d024f656eeab0e620a17c7754d6b75ea6a Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 23 Jun 2026 04:44:01 +0200 Subject: [PATCH 114/511] [Driver][SYCL] Fix crash with SYCL AMDGCN targets when LTO enabled by default (#22376) After commit 859ee9d83ef2, HIPAMDToolChain::getDefaultLTOMode() returns LTOK_Full, which is the toolchain used for SYCL AMDGCN targets. This caused IsUsingLTO=true even without an explicit -foffload-lto flag, triggering the SYCL LTO unsupported diagnostic on a null LTOArg. Guard the SYCL branch of the diagnostic with LTOArg being non-null, so the error only fires when the user explicitly passed -foffload-lto. Fixes: - clang/test/Driver/sycl-device-traits-macros-amdgcn.cpp CMPLRLLVM-76332 - libdevice build error: unsupported option '-foffload-lto' for target 'amdgcn-amd-amdhsa' Co-authored-by: Claude Sonnet 4.6 --- clang/lib/Driver/ToolChains/Clang.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6f65db463bf81..c725d2b7dccbe 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -6187,9 +6187,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if ((IsDeviceOffloadAction && !JA.isDeviceOffloading(Action::OFK_OpenMP) && !Triple.isAMDGPU() && !Triple.isSPIRV() && !IsUsingOffloadNewDriver) || - (JA.isDeviceOffloading(Action::OFK_SYCL) && !IsSYCLLTOSupported)) { + (JA.isDeviceOffloading(Action::OFK_SYCL) && !IsSYCLLTOSupported && + LTOArg)) { D.Diag(diag::err_drv_unsupported_opt_for_target) - << (LTOArg ? LTOArg->getAsString(Args) : "-foffload-lto") + << LTOArg->getAsString(Args) << Triple.getTriple(); } else if (Triple.isNVPTX() && !IsRDCMode && JA.isDeviceOffloading(Action::OFK_Cuda)) { From 49d6ac146988cfb637968a16ed5b251133a25d4d Mon Sep 17 00:00:00 2001 From: Wenju He Date: Tue, 23 Jun 2026 04:44:23 +0200 Subject: [PATCH 115/511] [Clang][SYCL] Fix test clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp (#22377) c52c817f08e1 switched GPU arch from --gpu-name to -target-cpu inside the llvm-foreach clang invocation. CMPLRLLVM-76332 --------- Co-authored-by: Claude Sonnet 4.6 --- clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp b/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp index 1665d4253ff8f..d5f549d72df1c 100644 --- a/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp +++ b/clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp @@ -33,7 +33,7 @@ // MACRO_NVIDIA: clang{{.*}} "-fsycl-is-host" // MACRO_NVIDIA: "-D__SYCL_TARGET_NVIDIA_GPU_[[MAC_STR]]__" // MACRO_NVIDIA: clang{{.*}} "-triple" "nvptx64-nvidia-cuda" -// DEVICE_NVIDIA: llvm-foreach{{.*}} "--gpu-name" "[[DEV_STR]]" +// DEVICE_NVIDIA: "-target-cpu" "[[DEV_STR]]" /// test for invalid nvidia arch From 2545b402c892fcf7c06e51abed21f067833a6d72 Mon Sep 17 00:00:00 2001 From: ZT Qin Date: Tue, 23 Jun 2026 11:07:53 +0800 Subject: [PATCH 116/511] [OpenMPOpt][Attributor] Selectively seed deglobalization AAs (#198710) This addresses a compile-time issue observed on a large generated C++ translation unit compiled with `-fopenmp`. The source code is not OpenMP-heavy. It mainly consists of generated function-registration wrappers, template instantiations, lambdas, and small helper functions. However, because the TU is compiled with OpenMP enabled, `OpenMPOptCGSCCPass` runs and drives Attributor on a module with many functions. `OpenMPOpt::registerAAsForFunction` currently eagerly creates the deglobalization AAs for every function in OpenMP device modules: * `AAHeapToShared` * `AAHeapToStack` Most generated wrapper/helper functions in the motivating workload do not contain `__kmpc_alloc_shared`, removable allocations, or free-like calls. In those cases, the deglobalization AAs have no useful candidates but still contribute to Attributor initialization and fixed-point work. This patch makes the deglobalization AA seeding more selective: * only create `AAHeapToShared` for functions with non-empty `__kmpc_alloc_shared` uses; * only create `AAHeapToStack` for functions containing removable allocation or free-like call candidates; * make `AAHeapToShared` use the per-function runtime use vector instead of scanning all users of the runtime declaration. I also measured the previously attempted pieces independently. The compile-time improvement comes from the selective seeding change. The Attributor use-caching change and the `registerAAsForFunction` registration cache did not show meaningful benefit on the motivating workload, so they have been removed from this PR. On the motivating TU, selective seeding reduced total compile time from about 3473s to 271s, and `OpenMPOptCGSCCPass` time from about 3232s to 111s. --------- Co-authored-by: jnudhf Co-authored-by: Shilei Tian --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 44 +++++++++++++---- .../reduced/openmp_opt_constant_type_crash.ll | 47 ++++++++++++++----- .../OpenMP/single_threaded_execution.ll | 6 +-- 3 files changed, 71 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index e1287e1353c86..012ea3aad490d 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" @@ -3461,10 +3462,13 @@ struct AAHeapToSharedFunction : public AAHeapToShared { bool &) -> std::optional { return nullptr; }; Function *F = getAnchorScope(); - for (User *U : RFI.Declaration->users()) - if (CallBase *CB = dyn_cast(U)) { - if (CB->getFunction() != F) - continue; + const OMPInformationCache::RuntimeFunctionInfo::UseVector *Uses = + RFI.getUseVector(*F); + if (!Uses) + return; + + for (Use *U : *Uses) + if (CallBase *CB = dyn_cast(U->getUser())) { MallocCalls.insert(CB); A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), SCB); @@ -5585,13 +5589,22 @@ void OpenMPOpt::registerAAs(bool IsModulePass) { } void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { - if (!DisableOpenMPOptDeglobalization) - A.getOrCreateAAFor(IRPosition::function(F)); - A.getOrCreateAAFor(IRPosition::function(F)); - if (!DisableOpenMPOptDeglobalization) - A.getOrCreateAAFor(IRPosition::function(F)); + auto &OMPInfoCache = static_cast(A.getInfoCache()); + + IRPosition FPos = IRPosition::function(F); + A.getOrCreateAAFor(FPos); if (F.hasFnAttribute(Attribute::Convergent)) - A.getOrCreateAAFor(IRPosition::function(F)); + A.getOrCreateAAFor(FPos); + + bool FunctionUsesSharedAlloc = false; + if (!DisableOpenMPOptDeglobalization) { + const OMPInformationCache::RuntimeFunctionInfo::UseVector *SharedAllocUses = + OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared].getUseVector( + const_cast(F)); + FunctionUsesSharedAlloc = SharedAllocUses && !SharedAllocUses->empty(); + } + bool HasHeapToStackCandidate = false; + const TargetLibraryInfo *TLI = nullptr; for (auto &I : instructions(F)) { if (auto *LI = dyn_cast(&I)) { @@ -5603,6 +5616,12 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { continue; } if (auto *CI = dyn_cast(&I)) { + if (!DisableOpenMPOptDeglobalization && !HasHeapToStackCandidate) { + if (!TLI) + TLI = A.getInfoCache().getTargetLibraryInfoForFunction(F); + HasHeapToStackCandidate = + isRemovableAlloc(CI, TLI) || getFreedOperand(CI, TLI); + } if (CI->isIndirectCall()) A.getOrCreateAAFor( IRPosition::callsite_function(*CI)); @@ -5625,6 +5644,11 @@ void OpenMPOpt::registerAAsForFunction(Attributor &A, const Function &F) { } } } + + if (FunctionUsesSharedAlloc) + A.getOrCreateAAFor(FPos); + if (HasHeapToStackCandidate) + A.getOrCreateAAFor(FPos); } const char AAICVTracker::ID = 0; diff --git a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll index c9f0c3dacb9e1..065cd03b4f2dc 100644 --- a/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll +++ b/llvm/test/Transforms/Attributor/reduced/openmp_opt_constant_type_crash.ll @@ -69,13 +69,12 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: norecurse -; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl11ViewMappingIvJNS_10ViewTraitsIPPdJNS_11LayoutRightENS_18ScratchMemorySpaceINS_12Experimental12OpenMPTargetEEENS_12MemoryTraitsILj1EEEEEENS0_5ALL_tEiEE6assignINS2_IS3_JNS_12LayoutStrideENS_6DeviceIS8_S9_EESB_EEEEEvRNS1_IT_JvEEERKNS1_ISC_JvEEESD_i.internalized -; CHECK-SAME: () #[[ATTR0:[0-9]+]] { +; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl11ViewMappingIvJNS_10ViewTraitsIPPdJNS_11LayoutRightENS_18ScratchMemorySpaceINS_12Experimental12OpenMPTargetEEENS_12MemoryTraitsILj1EEEEEENS0_5ALL_tEiEE6assignINS2_IS3_JNS_12LayoutStrideENS_6DeviceIS8_S9_EESB_EEEEEvRNS1_IT_JvEEERKNS1_ISC_JvEEESD_i.internalized() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[EXTENTS11:%.*]] = alloca [0 x [0 x %"struct.Kokkos::Impl::SubviewExtents.448"]], i32 0, align 8, addrspace(5) ; CHECK-NEXT: [[EXTENTS_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[EXTENTS11]] to ptr ; CHECK-NEXT: call void @_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EEC2IJLm0ELm0EEJNS0_5ALL_tEiEEERKNS0_13ViewDimensionIJXspT_EEEEDpT0_.internalized(ptr [[EXTENTS_ASCAST]]) +; CHECK-NEXT: call void @_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvEC2INS2_IJLm0ELm0EEEENS_11LayoutRightEEERKNS1_IT_T0_vEERKNS0_14SubviewExtentsIXsrS9_4rankELj1EEE.internalized(ptr [[EXTENTS_ASCAST]]) ; CHECK-NEXT: ret void ; ; @@ -88,11 +87,10 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: norecurse nosync nounwind memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EEC2IJLm0ELm0EEJNS0_5ALL_tEiEEERKNS0_13ViewDimensionIJXspT_EEEEDpT0_.internalized -; CHECK-SAME: (ptr [[THIS:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-SAME: (ptr [[THIS:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = call i1 @_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized(ptr writeonly [[THIS]]) +; CHECK-NEXT: [[CALL:%.*]] = call i1 @_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized(ptr [[THIS]]) ; CHECK-NEXT: ret void ; ; @@ -103,6 +101,14 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret void ; ; +; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvEC2INS2_IJLm0ELm0EEEENS_11LayoutRightEEERKNS1_IT_T0_vEERKNS0_14SubviewExtentsIXsrS9_4rankELj1EEE.internalized +; CHECK-SAME: (ptr [[SUB:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CALL191:%.*]] = call i32 @_ZNK6Kokkos4Impl14SubviewExtentsILj2ELj1EE11range_indexIiEEjT_.internalized(ptr [[SUB]]) +; CHECK-NEXT: [[CALL201:%.*]] = call i64 @_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvE6strideINS2_IJLm0ELm0EEEENS_11LayoutRightEEEmjRKNS1_IT_T0_vEE.internalized(i32 [[CALL191]]) +; CHECK-NEXT: ret void +; +; ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvEC2INS2_IJLm0ELm0EEEENS_11LayoutRightEEERKNS1_IT_T0_vEERKNS0_14SubviewExtentsIXsrS9_4rankELj1EEE ; CHECK-SAME: (ptr [[SUB:%.*]]) { ; CHECK-NEXT: entry: @@ -111,11 +117,10 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret void ; ; -; CHECK: Function Attrs: norecurse nosync nounwind memory(argmem: write) ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl14SubviewExtentsILj2ELj1EE3setIJLm0ELm0EEJiEEEbjjRKNS0_13ViewDimensionIJXspT_EEEENS0_5ALL_tEDpT0_.internalized -; CHECK-SAME: (ptr writeonly captures(none) [[THIS:%.*]]) #[[ATTR1]] { +; CHECK-SAME: (ptr [[THIS:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[THIS]] to ptr addrspace(5) +; CHECK-NEXT: store i64 0, ptr [[THIS]], align 8 ; CHECK-NEXT: ret i1 false ; ; @@ -126,6 +131,13 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret i1 false ; ; +; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvE6strideINS2_IJLm0ELm0EEEENS_11LayoutRightEEEmjRKNS1_IT_T0_vEE.internalized +; CHECK-SAME: (i32 [[R:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: store i32 [[R]], ptr null, align 4294967296 +; CHECK-NEXT: ret i64 0 +; +; ; CHECK-LABEL: define {{[^@]+}}@_ZN6Kokkos4Impl10ViewOffsetINS0_13ViewDimensionIJLm0EEEENS_12LayoutStrideEvE6strideINS2_IJLm0ELm0EEEENS_11LayoutRightEEEmjRKNS1_IT_T0_vEE ; CHECK-SAME: (i32 [[R:%.*]]) { ; CHECK-NEXT: entry: @@ -133,6 +145,19 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret i64 0 ; ; +; CHECK-LABEL: define {{[^@]+}}@_ZNK6Kokkos4Impl14SubviewExtentsILj2ELj1EE11range_indexIiEEjT_.internalized +; CHECK-SAME: (ptr [[THIS:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 1, 0 +; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]] +; CHECK: cond.true: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[THIS]], align 4 +; CHECK-NEXT: br label [[COND_END]] +; CHECK: cond.end: +; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TMP0]], [[COND_TRUE]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: ret i32 [[COND]] +; +; ; CHECK-LABEL: define {{[^@]+}}@_ZNK6Kokkos4Impl14SubviewExtentsILj2ELj1EE11range_indexIiEEjT_ ; CHECK-SAME: (ptr [[THIS:%.*]]) { ; CHECK-NEXT: entry: @@ -146,10 +171,6 @@ cond.end: ; preds = %cond.true, %entry ; CHECK-NEXT: ret i32 [[COND]] ; ;. -; CHECK: attributes #[[ATTR0]] = { norecurse } -; CHECK: attributes #[[ATTR1]] = { norecurse nosync nounwind memory(argmem: write) } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind memory(write) } -;. ; CHECK: [[META0:![0-9]+]] = !{i32 7, !"openmp", i32 50} ; CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp-device", i32 50} ;. diff --git a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll index 70b9ce41c1a43..8b71a132e8541 100644 --- a/llvm/test/Transforms/OpenMP/single_threaded_execution.ll +++ b/llvm/test/Transforms/OpenMP/single_threaded_execution.ll @@ -16,6 +16,9 @@ ; CHECK: [openmp-opt] Basic block @kernel if.then is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.else is executed by a single thread. ; CHECK-NOT: [openmp-opt] Basic block @kernel if.end is executed by a single thread. +; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. +; CHECK: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. +; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. define ptx_kernel void @kernel(ptr %dyn) "kernel" { %call = call i32 @__kmpc_target_init(ptr @kernel_kernel_environment, ptr %dyn) %cmp = icmp eq i32 %call, -1 @@ -46,9 +49,6 @@ entry: ; REMARKS: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible. ; REMARKS-NOT: remark: single_threaded_execution.c:1:0: Could not internalize function. Some optimizations may not be possible. -; CHECK-NOT: [openmp-opt] Basic block @nvptx entry is executed by a single thread. -; CHECK-DAG: [openmp-opt] Basic block @nvptx if.then is executed by a single thread. -; CHECK-NOT: [openmp-opt] Basic block @nvptx if.end is executed by a single thread. ; Function Attrs: noinline define void @nvptx() { entry: From 294e7224274e08661629340f18c6559b5e0fd042 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Mon, 22 Jun 2026 20:22:58 -0700 Subject: [PATCH 117/511] Reland [Allocator] Keep bump pointer at a minimum alignment (#205240) Reland #203718 (reverted in #205091) by making computation in integer domain to avoid UB (nullptr + non-zero offset). Add a `MinAlign` template parameter (default 8, sizeof(size_t) on 64-bit platforms) so that the common case `Alignment <= MinAlign` can skip realigning `CurPtr`. This is achieved by rounding each allocation's size up to MinAlign, so the bump pointer stays MinAlign-aligned between allocations. SpecificBumpPtrAllocator::DestroyAll() walks objects at a fixed sizeof(T) stride and needs tight packing, so it uses MinAlign=1. (alignof(T) would pack just as tightly and reuse the default instantiation, but T may be incomplete here, e.g. `SpecificBumpPtrAllocator`.) Its `Allocate` still skips the realign: the slab is max_align_t-aligned and every size is a multiple of alignof(T), so the bump pointer stays alignof(T)-aligned and we can just request alignment 1. Over-aligned types (alignof(T) > alignof(max_align_t)) keep requesting alignof(T). Also use `bit_ceil` instead of `NextPowerOf2` in the placement operator new alignment heuristic. NextPowerOf2 is strictly greater, so an 8-byte pointer-sized object asked for alignment 16 and missed the fast path; bit_ceil(8) == 8 keeps it on the fast path. Aided by Claude Opus 4.8 --- llvm/include/llvm/Support/Allocator.h | 58 +++++++++++++++++------- llvm/unittests/Support/AllocatorTest.cpp | 19 ++++++++ 2 files changed, 60 insertions(+), 17 deletions(-) diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index fffcbd9f3c1d8..f58f73227a2a9 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -59,11 +59,16 @@ LLVM_ABI void printBumpPtrAllocatorStats(unsigned NumSlabs, /// /// The GrowthDelay specifies after how many allocated slabs the allocator /// increases the size of the slabs. +/// +/// MinAlign keeps the bump pointer aligned between allocations: each size is +/// rounded up to a multiple of MinAlign so the fast path can skip realigning +/// CurPtr when the requested alignment is no greater than MinAlign. template + size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128, + size_t MinAlign = 8> class BumpPtrAllocatorImpl - : public AllocatorBase>, + : public AllocatorBase>, private detail::AllocatorHolder { using AllocTy = detail::AllocatorHolder; @@ -75,6 +80,10 @@ class BumpPtrAllocatorImpl static_assert(GrowthDelay > 0, "GrowthDelay must be at least 1 which already increases the" "slab size after each allocated slab."); + static_assert(MinAlign > 0 && (MinAlign & (MinAlign - 1)) == 0, + "MinAlign must be a power of two"); + static_assert(MinAlign <= alignof(std::max_align_t), + "MinAlign must not exceed the alignment of fresh slabs"); BumpPtrAllocatorImpl() = default; @@ -150,14 +159,17 @@ class BumpPtrAllocatorImpl // Keep track of how many bytes we've allocated. BytesAllocated += Size; - uintptr_t AlignedPtr = alignAddr(CurPtr, Alignment); - size_t SizeToAllocate = Size; #if LLVM_ADDRESS_SANITIZER_BUILD // Add trailing bytes as a "red zone" under ASan. SizeToAllocate += RedZoneSize; #endif + SizeToAllocate = alignToPowerOf2(SizeToAllocate, MinAlign); + // CurPtr is already MinAlign-aligned, so only a stricter request realigns. + uintptr_t AlignedPtr = uintptr_t(CurPtr); + if (Alignment.value() > MinAlign) + AlignedPtr = alignAddr(CurPtr, Alignment); uintptr_t AllocEndPtr = AlignedPtr + SizeToAllocate; assert(AllocEndPtr >= uintptr_t(CurPtr) && "Alignment + Size must not overflow"); @@ -388,7 +400,13 @@ using BumpPtrAllocator = BumpPtrAllocatorImpl<>; /// This allows calling the destructor in DestroyAll() and when the allocator is /// destroyed. template class SpecificBumpPtrAllocator { - BumpPtrAllocator Allocator; + // DestroyAll() walks objects at a fixed sizeof(T) stride, so it needs tight + // packing: MinAlign=1 disables the size rounding. (alignof(T) would pack just + // as tightly and reuse the default instantiation, but T may be incomplete + // here, e.g. SpecificBumpPtrAllocator.) + using BumpPtrAllocatorTy = + BumpPtrAllocatorImpl; + BumpPtrAllocatorTy Allocator; public: SpecificBumpPtrAllocator() { @@ -417,7 +435,7 @@ template class SpecificBumpPtrAllocator { for (auto I = Allocator.Slabs.begin(), E = Allocator.Slabs.end(); I != E; ++I) { - size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( + size_t AllocatedSlabSize = BumpPtrAllocatorTy::computeSlabSize( std::distance(Allocator.Slabs.begin(), I)); char *Begin = (char *)alignAddr(*I, Align::Of()); char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr @@ -437,7 +455,14 @@ template class SpecificBumpPtrAllocator { } /// Allocate space for an array of objects without constructing them. - T *Allocate(size_t num = 1) { return Allocator.Allocate(num); } + T *Allocate(size_t num = 1) { + // Slabs are max_align_t-aligned and every size is a multiple of alignof(T), + // so the bump pointer is already alignof(T)-aligned. Request alignment 1 so + // the fast path skips realigning CurPtr; over-aligned T still needs it. + if constexpr (alignof(T) <= alignof(std::max_align_t)) + return static_cast(Allocator.Allocate(num * sizeof(T), Align())); + return Allocator.Allocate(num); + } /// \return An index uniquely and reproducibly identifying /// an input pointer \p Ptr in the given allocator. @@ -450,20 +475,19 @@ template class SpecificBumpPtrAllocator { } // end namespace llvm template + size_t GrowthDelay, size_t MinAlign> void * operator new(size_t Size, llvm::BumpPtrAllocatorImpl &Allocator) { - return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size), - alignof(std::max_align_t))); + GrowthDelay, MinAlign> &Allocator) { + return Allocator.Allocate( + Size, std::min(llvm::bit_ceil(Size), alignof(std::max_align_t))); } template -void operator delete(void *, - llvm::BumpPtrAllocatorImpl &) { -} + size_t GrowthDelay, size_t MinAlign> +void operator delete( + void *, llvm::BumpPtrAllocatorImpl &) {} #endif // LLVM_SUPPORT_ALLOCATOR_H diff --git a/llvm/unittests/Support/AllocatorTest.cpp b/llvm/unittests/Support/AllocatorTest.cpp index 2337f34143bad..d6f80e0948dc4 100644 --- a/llvm/unittests/Support/AllocatorTest.cpp +++ b/llvm/unittests/Support/AllocatorTest.cpp @@ -279,4 +279,23 @@ TEST(AllocatorTest, TestBigAlignment) { EXPECT_GT(MockSlabAllocator::GetLastSlabSize(), 4096u); } +// Over-aligned element type: Allocate() honors alignof(T) > MinAlign and +// DestroyAll() runs every destructor. +TEST(AllocatorTest, TestOverAlignedSpecific) { + unsigned NumDtorCalls = 0; + struct alignas(32) S { + unsigned *Calls; + ~S() { ++*Calls; } + }; + { + SpecificBumpPtrAllocator Alloc; + for (int I = 0; I != 4; ++I) { + S *P = Alloc.Allocate(); + EXPECT_EQ(0u, reinterpret_cast(P) & 31u); + P->Calls = &NumDtorCalls; + } + } + EXPECT_EQ(4u, NumDtorCalls); +} + } // anonymous namespace From e4bec898a3db3325385f2c5985ef825833f499fe Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jun 2026 21:18:13 -0700 Subject: [PATCH 118/511] [RISCV][P-ext] Rename pwcvt/pncvt pseudoinstructions for RV64. (#205227) We need to add a 'w' to the suffix to indicate it operates on a word and not a register pair like on RV32. See https://github.com/riscv/riscv-p-spec/pull/303 --- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 19 +++++----- llvm/test/CodeGen/RISCV/rvp-simd-32.ll | 8 ++--- llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 46 ++++++++++++++---------- llvm/test/MC/RISCV/rv64p-aliases-valid.s | 32 ++++++++--------- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index f82520e0dcc3b..122f52c7e3dfe 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1635,16 +1635,15 @@ let append Predicates = [IsRV64] in { def : InstAlias<"pmsgt.w $rd, $rs, $rt", (PMSLT_W GPR:$rd, GPR:$rt, GPR:$rs), 0>; def : InstAlias<"pmsgtu.w $rd, $rs, $rt", (PMSLTU_W GPR:$rd, GPR:$rt, GPR:$rs), 0>; - // No pwcvt.b/h for RV64. - def : InstAlias<"pwcvtu.b $rd, $rs", (ZIP8P GPR:$rd, GPR:$rs, X0)>; - def : InstAlias<"pwcvtu.h $rd, $rs", (ZIP16P GPR:$rd, GPR:$rs, X0)>; - def : InstAlias<"pwcvth.b $rd, $rs", (ZIP8P GPR:$rd, X0, GPR:$rs)>; - def : InstAlias<"pwcvth.h $rd, $rs", (ZIP16P GPR:$rd, X0, GPR:$rs)>; - - def : InstAlias<"pncvt.b $rd, $rs", (UNZIP8P GPR:$rd, GPR:$rs, X0)>; - def : InstAlias<"pncvt.h $rd, $rs", (UNZIP16P GPR:$rd, GPR:$rs, X0)>; - def : InstAlias<"pncvth.b $rd, $rs", (UNZIP8HP GPR:$rd, GPR:$rs, X0)>; - def : InstAlias<"pncvth.h $rd, $rs", (UNZIP16HP GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pwcvtu.wb $rd, $rs", (ZIP8P GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pwcvtu.wh $rd, $rs", (ZIP16P GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pwcvth.wb $rd, $rs", (ZIP8P GPR:$rd, X0, GPR:$rs)>; + def : InstAlias<"pwcvth.wh $rd, $rs", (ZIP16P GPR:$rd, X0, GPR:$rs)>; + + def : InstAlias<"pncvt.wb $rd, $rs", (UNZIP8P GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pncvt.wh $rd, $rs", (UNZIP16P GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pncvth.wb $rd, $rs", (UNZIP8HP GPR:$rd, GPR:$rs, X0)>; + def : InstAlias<"pncvth.wh $rd, $rs", (UNZIP16HP GPR:$rd, GPR:$rs, X0)>; } // append Predicates = [IsRV64] let append Predicates = [IsRV32] in { diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll index c10e54011d34b..7e4c7d4317eac 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll @@ -1310,8 +1310,8 @@ define <4 x i8> @test_pmulhsu_b(<4 x i8> %a, <4 x i8> %b) { ; ; RV64-LABEL: test_pmulhsu_b: ; RV64: # %bb.0: -; RV64-NEXT: pwcvtu.b a0, a0 -; RV64-NEXT: pwcvtu.b a1, a1 +; RV64-NEXT: pwcvtu.wb a0, a0 +; RV64-NEXT: pwcvtu.wb a1, a1 ; RV64-NEXT: psext.h.b a0, a0 ; RV64-NEXT: pmul.w.h11 a2, a0, a1 ; RV64-NEXT: pmul.w.h00 a0, a0, a1 @@ -1346,8 +1346,8 @@ define <4 x i8> @test_pmulhsu_b_commuted(<4 x i8> %a, <4 x i8> %b) { ; ; RV64-LABEL: test_pmulhsu_b_commuted: ; RV64: # %bb.0: -; RV64-NEXT: pwcvtu.b a1, a1 -; RV64-NEXT: pwcvtu.b a0, a0 +; RV64-NEXT: pwcvtu.wb a1, a1 +; RV64-NEXT: pwcvtu.wb a0, a0 ; RV64-NEXT: psext.h.b a1, a1 ; RV64-NEXT: pmul.w.h11 a2, a0, a1 ; RV64-NEXT: pmul.w.h00 a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll index b43f3bb47843f..957b92a3fd607 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll @@ -2448,19 +2448,19 @@ define <8 x i8> @test_pmulhsu_b(<8 x i8> %a, <8 x i8> %b) { ; RV64-NEXT: ppaire.b a2, a3, a2 ; RV64-NEXT: ppaire.b a3, a5, a4 ; RV64-NEXT: ppaire.h a2, a3, a2 -; RV64-NEXT: pwcvtu.b a2, a2 +; RV64-NEXT: pwcvtu.wb a2, a2 ; RV64-NEXT: srli a3, a1, 56 ; RV64-NEXT: srli a4, a1, 48 ; RV64-NEXT: psext.h.b a2, a2 ; RV64-NEXT: ppaire.b a3, a4, a3 ; RV64-NEXT: srli a4, a1, 40 ; RV64-NEXT: srli a5, a1, 32 -; RV64-NEXT: pwcvtu.b a0, a0 +; RV64-NEXT: pwcvtu.wb a0, a0 ; RV64-NEXT: ppaire.b a4, a5, a4 ; RV64-NEXT: psext.h.b a0, a0 ; RV64-NEXT: ppaire.h a3, a4, a3 -; RV64-NEXT: pwcvtu.b a3, a3 -; RV64-NEXT: pwcvtu.b a1, a1 +; RV64-NEXT: pwcvtu.wb a3, a3 +; RV64-NEXT: pwcvtu.wb a1, a1 ; RV64-NEXT: pmul.w.h11 a4, a2, a3 ; RV64-NEXT: pmul.w.h00 a2, a2, a3 ; RV64-NEXT: pmul.w.h11 a3, a0, a1 @@ -2524,12 +2524,12 @@ define <8 x i8> @test_pmulhsu_b_commuted(<8 x i8> %a, <8 x i8> %b) { ; RV64-NEXT: ppaire.b a3, a5, a4 ; RV64-NEXT: srli a4, a1, 40 ; RV64-NEXT: srli a5, a1, 32 -; RV64-NEXT: pwcvtu.b a2, a2 +; RV64-NEXT: pwcvtu.wb a2, a2 ; RV64-NEXT: ppaire.b a4, a5, a4 -; RV64-NEXT: pwcvtu.b a0, a0 +; RV64-NEXT: pwcvtu.wb a0, a0 ; RV64-NEXT: ppaire.h a3, a4, a3 -; RV64-NEXT: pwcvtu.b a3, a3 -; RV64-NEXT: pwcvtu.b a1, a1 +; RV64-NEXT: pwcvtu.wb a3, a3 +; RV64-NEXT: pwcvtu.wb a1, a1 ; RV64-NEXT: psext.h.b a3, a3 ; RV64-NEXT: psext.h.b a1, a1 ; RV64-NEXT: pmul.w.h11 a4, a2, a3 @@ -4668,19 +4668,29 @@ define <2 x i32> @test_bitreverse_v2i32(<2 x i32> %a) { } define <4 x i16> @test_zext_v4i8_to_v4i16(<4 x i8> %a) { -; CHECK-LABEL: test_zext_v4i8_to_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: pwcvtu.b a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: test_zext_v4i8_to_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: pwcvtu.b a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_zext_v4i8_to_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: pwcvtu.wb a0, a0 +; RV64-NEXT: ret %res = zext <4 x i8> %a to <4 x i16> ret <4 x i16> %res } define <2 x i32> @test_zext_v2i16_to_v2i32(<2 x i16> %a) { -; CHECK-LABEL: test_zext_v2i16_to_v2i32: -; CHECK: # %bb.0: -; CHECK-NEXT: pwcvtu.h a0, a0 -; CHECK-NEXT: ret +; RV32-LABEL: test_zext_v2i16_to_v2i32: +; RV32: # %bb.0: +; RV32-NEXT: pwcvtu.h a0, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: test_zext_v2i16_to_v2i32: +; RV64: # %bb.0: +; RV64-NEXT: pwcvtu.wh a0, a0 +; RV64-NEXT: ret %res = zext <2 x i16> %a to <2 x i32> ret <2 x i32> %res } @@ -4693,7 +4703,7 @@ define <4 x i16> @test_sext_v4i8_to_v4i16(<4 x i8> %a) { ; ; RV64-LABEL: test_sext_v4i8_to_v4i16: ; RV64: # %bb.0: -; RV64-NEXT: pwcvtu.b a0, a0 +; RV64-NEXT: pwcvtu.wb a0, a0 ; RV64-NEXT: psext.h.b a0, a0 ; RV64-NEXT: ret %res = sext <4 x i8> %a to <4 x i16> @@ -4708,7 +4718,7 @@ define <2 x i32> @test_sext_v2i16_to_v2i32(<2 x i16> %a) { ; ; RV64-LABEL: test_sext_v2i16_to_v2i32: ; RV64: # %bb.0: -; RV64-NEXT: pwcvtu.h a0, a0 +; RV64-NEXT: pwcvtu.wh a0, a0 ; RV64-NEXT: psext.w.h a0, a0 ; RV64-NEXT: ret %res = sext <2 x i16> %a to <2 x i32> diff --git a/llvm/test/MC/RISCV/rv64p-aliases-valid.s b/llvm/test/MC/RISCV/rv64p-aliases-valid.s index 16929bb2eef2a..bfbba59b1790b 100644 --- a/llvm/test/MC/RISCV/rv64p-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64p-aliases-valid.s @@ -126,36 +126,36 @@ pmsgt.w a0, a1, a2 pmsgtu.w a3, a4, a5 # CHECK-S-OBJ-NOALIAS: zip8p a4, a5, zero -# CHECK-S-OBJ: pwcvtu.b a4, a5 -pwcvtu.b a4, a5 +# CHECK-S-OBJ: pwcvtu.wb a4, a5 +pwcvtu.wb a4, a5 # CHECK-S-OBJ-NOALIAS: zip16p a6, a7, zero -# CHECK-S-OBJ: pwcvtu.h a6, a7 -pwcvtu.h a6, a7 +# CHECK-S-OBJ: pwcvtu.wh a6, a7 +pwcvtu.wh a6, a7 # CHECK-S-OBJ-NOALIAS: zip8p s0, zero, s1 -# CHECK-S-OBJ: pwcvth.b s0, s1 -pwcvth.b s0, s1 +# CHECK-S-OBJ: pwcvth.wb s0, s1 +pwcvth.wb s0, s1 # CHECK-S-OBJ-NOALIAS: zip16p s2, zero, s3 -# CHECK-S-OBJ: pwcvth.h s2, s3 -pwcvth.h s2, s3 +# CHECK-S-OBJ: pwcvth.wh s2, s3 +pwcvth.wh s2, s3 # CHECK-S-OBJ-NOALIAS: unzip8p s3, s4, zero -# CHECK-S-OBJ: pncvt.b s3, s4 -pncvt.b s3, s4 +# CHECK-S-OBJ: pncvt.wb s3, s4 +pncvt.wb s3, s4 # CHECK-S-OBJ-NOALIAS: unzip16p s5, s6, zero -# CHECK-S-OBJ: pncvt.h s5, s6 -pncvt.h s5, s6 +# CHECK-S-OBJ: pncvt.wh s5, s6 +pncvt.wh s5, s6 # CHECK-S-OBJ-NOALIAS: unzip8hp s7, s8, zero -# CHECK-S-OBJ: pncvth.b s7, s8 -pncvth.b s7, s8 +# CHECK-S-OBJ: pncvth.wb s7, s8 +pncvth.wb s7, s8 # CHECK-S-OBJ-NOALIAS: unzip16hp s9, s10, zero -# CHECK-S-OBJ: pncvth.h s9, s10 -pncvth.h s9, s10 +# CHECK-S-OBJ: pncvth.wh s9, s10 +pncvth.wh s9, s10 # CHECK-S-OBJ-NOALIAS: pli.b s10, 17 # CHECK-S-OBJ: pli.b s10, 17 From 9a7248ebdfcfde1d60e967f61f3971c1fb05c820 Mon Sep 17 00:00:00 2001 From: Anutosh Bhat Date: Tue, 23 Jun 2026 10:04:11 +0530 Subject: [PATCH 119/511] [mlir][arith] Fix APInt bitwidth mismatch crash in int-range-optimizations (#205110) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes https://github.com/llvm/llvm-project/issues/204909 When an op's `areTypesCompatible()` hook accepts integers of different widths across a region boundary, the range analysis can propagate a constant range whose APInt bitwidth does not match the IR type of the destination value. This caused `IntegerAttr::get` to `assert` in `maybeReplaceWithConstant`. Fix by bailing out in `maybeReplaceWithConstant` when the bitwidths mismatch, and adding the same check to the needsReplacing lambda in matchAndRewrite. The second guard is necessary to mirror the existing isIntOrIndex() guard — without it the pattern claims success without changing the IR, causing the greedy rewrite driver to loop. We should now see ``` anutosh491@Anutoshs-MacBook-Air mlir-build % ./bin/mlir-opt -int-range-optimizations bin/a.mlir module { func.func @m0() -> i32 { %c1_i64 = arith.constant 1 : i64 %c0_i32 = arith.constant 0 : i32 %0 = "test.region_types_compat"(%c0_i32) ({ ^bb0(%arg0: i64): test.types_compat_yield %c1_i64 : i64 }) : (i32) -> i32 return %0 : i32 } } ``` --- .../Transforms/IntRangeOptimizations.cpp | 29 ++++++++++++++----- .../Dialect/Arith/int-range-opts-crash.mlir | 14 +++++++++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp index 9fcda39089b2c..298c0dc2f3bda 100644 --- a/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/IntRangeOptimizations.cpp @@ -75,6 +75,13 @@ LogicalResult maybeReplaceWithConstant(DataFlowSolver &solver, // will crash, so eagerly check for an integer type to avoid this. if (!getElementTypeOrSelf(type).isIntOrIndex()) return failure(); + + // Bail out if the inferred APInt bitwidth does not match the storage width + // of the IR type; IntegerAttr::get would assert otherwise. + unsigned storageWidth = ConstantIntRanges::getStorageBitwidth(type); + if (storageWidth != 0 && maybeConstValue->getBitWidth() != storageWidth) + return failure(); + Location loc = value.getLoc(); Operation *maybeDefiningOp = value.getDefiningOp(); Dialect *valueDialect = @@ -137,14 +144,22 @@ struct MaterializeKnownConstantValues : public RewritePattern { if (matchPattern(op, m_Constant())) return failure(); - // We need to check isIntOrIndex() here as well to avoid infinite loops in - // the greedy pattern rewriter. If we only check it in - // maybeReplaceWithConstant, this lambda might still return true for - // non-integral types, causing the pattern to match and claim success - // without making any changes, leading to non-convergence. + // We need to check isIntOrIndex() and APInt bitwidth compatibility here + // as well to avoid infinite loops in the greedy pattern rewriter. If we + // only check in maybeReplaceWithConstant, this lambda might still return + // true for values that cannot be materialized, causing the pattern to + // match and claim success without making any changes, leading to + // non-convergence. auto needsReplacing = [&](Value v) { - return getElementTypeOrSelf(v.getType()).isIntOrIndex() && - getMaybeConstantValue(solver, v).has_value() && !v.use_empty(); + if (!getElementTypeOrSelf(v.getType()).isIntOrIndex()) + return false; + std::optional maybeConstValue = getMaybeConstantValue(solver, v); + if (!maybeConstValue.has_value() || v.use_empty()) + return false; + unsigned storageWidth = + ConstantIntRanges::getStorageBitwidth(v.getType()); + return storageWidth == 0 || + maybeConstValue->getBitWidth() == storageWidth; }; bool hasConstantResults = llvm::any_of(op->getResults(), needsReplacing); if (op->getNumRegions() == 0) diff --git a/mlir/test/Dialect/Arith/int-range-opts-crash.mlir b/mlir/test/Dialect/Arith/int-range-opts-crash.mlir index fa763c163160d..99aa03714edb9 100644 --- a/mlir/test/Dialect/Arith/int-range-opts-crash.mlir +++ b/mlir/test/Dialect/Arith/int-range-opts-crash.mlir @@ -1,5 +1,19 @@ // RUN: mlir-opt -int-range-optimizations %s | FileCheck %s +// CHECK-LABEL: func.func @repro_bitwidth_mismatch +func.func @repro_bitwidth_mismatch() -> i32 { + %c0_i32 = arith.constant 0 : i32 + // CHECK: test.region_types_compat + %0 = "test.region_types_compat"(%c0_i32) ({ + ^bb0(%arg0: i64): + %c1_i64 = arith.constant 1 : i64 + test.types_compat_yield %c1_i64 : i64 + }) : (i32) -> i32 + return %0 : i32 +} + +// ----- + // CHECK-LABEL: func.func @repro_crash() -> !test.i32 { func.func @repro_crash() -> !test.i32 { %cst = arith.constant 1 : i32 From e5387135645cf2845631481b1e88859a0f181191 Mon Sep 17 00:00:00 2001 From: Yonah Goldberg Date: Mon, 22 Jun 2026 21:36:44 -0700 Subject: [PATCH 120/511] Patch tryCanonicalizeStructToVector to handle split slice tails (#201434) We choose a vector alloca over a struct alloca when all users of the alloca are memory or lifetime intrinsics. But we only accounted for slices that start in the corresponding partition. We have to also check that all split slice tails overlapping the partition are memory or lifetime intrinsics I also updated the `PassRegistry.def` to include the new pass option because we forgot to add that. --- llvm/lib/Passes/PassRegistry.def | 2 +- llvm/lib/Transforms/Scalar/SROA.cpp | 21 ++++++--- .../SROA/struct-to-vector-subpartition.ll | 44 +++++++++++++++++++ 3 files changed, 59 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 7bc0f71f106d3..370106e225a9b 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -725,7 +725,7 @@ FUNCTION_PASS_WITH_PARAMS( FUNCTION_PASS_WITH_PARAMS( "sroa", "SROAPass", [](SROAOptions PreserveCFG) { return SROAPass(PreserveCFG); }, - parseSROAOptions, "preserve-cfg;modify-cfg") + parseSROAOptions, "preserve-cfg;modify-cfg;aggregate-to-vector") FUNCTION_PASS_WITH_PARAMS( "structurizecfg", "StructurizeCFG", [](bool SkipUniformRegions) { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp index 0563086539ddf..40f99e4341d47 100644 --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -5310,7 +5310,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { /// packed. This can sometimes eliminate allocas because structs cannot get /// promoted to LLVM values, but vectors can. /// -/// We only apply this transformation when all users of the alloca are memory +/// We only apply this transformation when all users of the partition are memory /// intrinsics. Otherwise, if there is a load or store of some other type to the /// partition, SROA would select that type. /// @@ -5347,20 +5347,27 @@ static FixedVectorType *tryCanonicalizeStructToVector(StructType *STy, if (StructSize != VectorSize) return nullptr; - for (const Slice &S : P) { + auto IsIgnorableOrMemIntrinsicSlice = [](const Slice &S) { if (S.isDead()) - continue; + return true; auto *U = S.getUse(); if (!U) - continue; + return true; User *Usr = U->getUser(); if (isa(Usr) || isa(Usr)) - continue; + return true; - if (!isa(Usr)) + return isa(Usr); + }; + + for (const Slice &S : P) + if (!IsIgnorableOrMemIntrinsicSlice(S)) + return nullptr; + + for (const Slice *S : P.splitSliceTails()) + if (!IsIgnorableOrMemIntrinsicSlice(*S)) return nullptr; - } return VTy; } diff --git a/llvm/test/Transforms/SROA/struct-to-vector-subpartition.ll b/llvm/test/Transforms/SROA/struct-to-vector-subpartition.ll index 9edb8492aa460..1599a1cae67ef 100644 --- a/llvm/test/Transforms/SROA/struct-to-vector-subpartition.ll +++ b/llvm/test/Transforms/SROA/struct-to-vector-subpartition.ll @@ -67,3 +67,47 @@ merge: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %sel, i64 16, i1 false) ret void } + +; SROA sees these slices: +; [0,8) load ptr +; [0,32) store i256, splittable +; [8,16) load i64, splittable +; [16,32) memcpy source, splittable +; +; These form three partitions: +; [0,8) contains the ptr load and the store i256 slice that starts at 0 +; [8,16) contains the i64 load, plus the store i256 split tail +; [16,32) contains the memcpy source, plus the store i256 split tail +; +; The [16,32) subpartition has type { i64, i64 }, and the only slice that +; starts in the partition is a memcpy. However, the whole-alloca i256 store is a +; split tail overlapping the subpartition, so it must block struct-to-vector +; fallback canonicalization. + +; CHECK-LABEL: define void @test_split_tail_store_blocks_subpartition_type( +; CHECK-NOT: <2 x i64> +; CHECK: [[TAIL_SHIFT:%.*]] = lshr i256 %x, 128 +; CHECK-NOT: <2 x i64> +; CHECK: [[TAIL:%.*]] = trunc i256 [[TAIL_SHIFT]] to i128 +; CHECK-NOT: <2 x i64> +; CHECK: store i128 [[TAIL]], ptr %dst, align 8 +; CHECK-NOT: <2 x i64> +; CHECK: ret void +define void @test_split_tail_store_blocks_subpartition_type(ptr %dst, i256 %x) { +entry: + %a = alloca { ptr, i64, i64, i64 }, align 8 + store i256 %x, ptr %a, align 8 + + ; Force earlier partition boundaries so [16,32) is selected as a subpartition + ; whose type from getTypePartition is { i64, i64 }. + %p = load ptr, ptr %a, align 8 + %puse = ptrtoint ptr %p to i64 + %gep.a.8 = getelementptr inbounds i8, ptr %a, i64 8 + %v8 = load i64, ptr %gep.a.8, align 8 + %use = add i64 %puse, %v8 + store i64 %use, ptr %dst, align 8 + + %gep.a.16 = getelementptr inbounds i8, ptr %a, i64 16 + call void @llvm.memcpy.p0.p0.i64(ptr align 8 %dst, ptr align 8 %gep.a.16, i64 16, i1 false) + ret void +} From e3e3a78dc889e0e40d9f682ac2fe2e2fea6b72b6 Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Mon, 22 Jun 2026 22:19:15 -0700 Subject: [PATCH 121/511] [RISCV] Update Xqcilo Pseudos (#196422) This changes the Xqcilo pseudos to instead emit a sequence of `qc.e.li` followed by a standard load/store annotated with %qc.access. The new sequence is easier for our linker to relax. This Change was written with the assistance of AI. --- llvm/docs/ReleaseNotes.md | 1 + .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 131 ++++++++++++- llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td | 15 +- llvm/test/MC/RISCV/xqcilo-pseudos-invalid.s | 18 +- llvm/test/MC/RISCV/xqcilo-pseudos-valid.s | 179 ++++++++++++++---- 5 files changed, 288 insertions(+), 56 deletions(-) diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index cf0e8aeb97da1..213dd52d9a228 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -255,6 +255,7 @@ Makes programs 10x faster by doing Special New Thing. * Adds experimental assembler support for batched dot-product extensions(Zvqwbdota8i, Zvqwbdota16i, Zvfwbdota16bf, Zvfqwbdota8f and Zvfbdota32f). * Adds experimental assembler support for dot-product extensions(Zvqwdota8i, Zvqwdota16i, Zvfwdota16bf and Zvfqwdota8f). * `-mtune=generic` now uses the scheduling model from SpacemiT X60 instead of an empty scheduling model. +* The Xqcilo pseudos now emit sequences that can be relaxed. ### Changes to the WebAssembly Backend diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 9362a600b12b3..cfc943d3fe25f 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -160,6 +160,12 @@ class RISCVAsmParser : public MCTargetAsmParser { void emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, MCStreamer &Out, bool HasTmpReg); + // Helper to emit Xqcilo pseudo load/store as qc.e.li + PseudoQCAccess pair. + // For loads: qc.e.li rd, sym; lx rd, 0(rd), %qc.access(sym) + // For stores: qc.e.li rt, sym; sx rs, 0(rt), %qc.access(sym) + void emitQCELILoadStoreSymbol(MCInst &Inst, unsigned Opcode, SMLoc IDLoc, + MCStreamer &Out, bool HasTmpReg); + // Helper to emit pseudo sign/zero extend instruction. void emitPseudoExtend(MCInst &Inst, bool SignExtend, int64_t Width, SMLoc IDLoc, MCStreamer &Out); @@ -3735,6 +3741,91 @@ void RISCVAsmParser::emitLoadStoreSymbol(MCInst &Inst, unsigned Opcode, Out); } +void RISCVAsmParser::emitQCELILoadStoreSymbol(MCInst &Inst, unsigned Opcode, + SMLoc IDLoc, MCStreamer &Out, + bool HasTmpReg) { + // For loads (HasTmpReg=false): operands are [rd, symbol] + // qc.e.li rd, symbol + // lx rd, 0(rd), %qc.access(symbol) [possibly compressed] + // + // For stores (HasTmpReg=true): operands are [rt, rs, symbol] + // qc.e.li rt, symbol + // sx rs, 0(rt), %qc.access(symbol) [possibly compressed] + MCRegister AddrReg = Inst.getOperand(0).getReg(); + unsigned SymbolOpIdx = HasTmpReg ? 2 : 1; + const MCExpr *Symbol = Inst.getOperand(SymbolOpIdx).getExpr(); + + emitToStreamer(Out, + MCInstBuilder(RISCV::QC_E_LI).addReg(AddrReg).addExpr(Symbol)); + + MCContext &Ctx = getContext(); + const MCExpr *AccessExpr = + MCSpecifierExpr::create(Symbol, RISCV::S_QC_ACCESS, Ctx); + + // We have to manually compress the QCAccess pseudos as the current + // CompressPat mechanism does not support them. Each entry pairs the + // compressed opcode with the subtarget feature it requires. + struct CompressedForm { + unsigned Opcode; + unsigned Feature; + }; + std::optional Compressed; + switch (Opcode) { + default: + break; + case RISCV::PseudoQCAccessLBU: + Compressed = {RISCV::PseudoQCAccessC_LBU, RISCV::FeatureStdExtZcb}; + break; + case RISCV::PseudoQCAccessLH: + Compressed = {RISCV::PseudoQCAccessC_LH, RISCV::FeatureStdExtZcb}; + break; + case RISCV::PseudoQCAccessLHU: + Compressed = {RISCV::PseudoQCAccessC_LHU, RISCV::FeatureStdExtZcb}; + break; + case RISCV::PseudoQCAccessLW: + Compressed = {RISCV::PseudoQCAccessC_LW, RISCV::FeatureStdExtZca}; + break; + case RISCV::PseudoQCAccessSB: + Compressed = {RISCV::PseudoQCAccessC_SB, RISCV::FeatureStdExtZcb}; + break; + case RISCV::PseudoQCAccessSH: + Compressed = {RISCV::PseudoQCAccessC_SH, RISCV::FeatureStdExtZcb}; + break; + case RISCV::PseudoQCAccessSW: + Compressed = {RISCV::PseudoQCAccessC_SW, RISCV::FeatureStdExtZca}; + break; + } + + // For stores, both the data register and the address register must be in + // GPRC for the compressed form; for loads AddrReg serves as both. + bool CanUseGPRC = + RISCVMCRegisterClasses[RISCV::GPRCRegClassID].contains(AddrReg); + if (HasTmpReg && CanUseGPRC) { + MCRegister DataReg = Inst.getOperand(1).getReg(); + CanUseGPRC = + RISCVMCRegisterClasses[RISCV::GPRCRegClassID].contains(DataReg); + } + + bool UseCompressed = + Compressed && getSTI().hasFeature(Compressed->Feature) && CanUseGPRC; + + unsigned ActualOpcode = UseCompressed ? Compressed->Opcode : Opcode; + if (HasTmpReg) { + MCRegister DataReg = Inst.getOperand(1).getReg(); + emitToStreamer(Out, MCInstBuilder(ActualOpcode) + .addReg(DataReg) + .addReg(AddrReg) + .addImm(0) + .addExpr(AccessExpr)); + } else { + emitToStreamer(Out, MCInstBuilder(ActualOpcode) + .addReg(AddrReg) + .addReg(AddrReg) + .addImm(0) + .addExpr(AccessExpr)); + } +} + void RISCVAsmParser::emitPseudoExtend(MCInst &Inst, bool SignExtend, int64_t Width, SMLoc IDLoc, MCStreamer &Out) { @@ -4197,23 +4288,18 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, emitLoadTLSGDAddress(Inst, IDLoc, Out); return false; case RISCV::PseudoLB: - case RISCV::PseudoQC_E_LB: emitLoadStoreSymbol(Inst, RISCV::LB, IDLoc, Out, /*HasTmpReg=*/false); return false; case RISCV::PseudoLBU: - case RISCV::PseudoQC_E_LBU: emitLoadStoreSymbol(Inst, RISCV::LBU, IDLoc, Out, /*HasTmpReg=*/false); return false; case RISCV::PseudoLH: - case RISCV::PseudoQC_E_LH: emitLoadStoreSymbol(Inst, RISCV::LH, IDLoc, Out, /*HasTmpReg=*/false); return false; case RISCV::PseudoLHU: - case RISCV::PseudoQC_E_LHU: emitLoadStoreSymbol(Inst, RISCV::LHU, IDLoc, Out, /*HasTmpReg=*/false); return false; case RISCV::PseudoLW: - case RISCV::PseudoQC_E_LW: emitLoadStoreSymbol(Inst, RISCV::LW, IDLoc, Out, /*HasTmpReg=*/false); return false; case RISCV::PseudoLWU: @@ -4238,15 +4324,12 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, emitLoadStoreSymbol(Inst, RISCV::FLQ, IDLoc, Out, /*HasTmpReg=*/true); return false; case RISCV::PseudoSB: - case RISCV::PseudoQC_E_SB: emitLoadStoreSymbol(Inst, RISCV::SB, IDLoc, Out, /*HasTmpReg=*/true); return false; case RISCV::PseudoSH: - case RISCV::PseudoQC_E_SH: emitLoadStoreSymbol(Inst, RISCV::SH, IDLoc, Out, /*HasTmpReg=*/true); return false; case RISCV::PseudoSW: - case RISCV::PseudoQC_E_SW: emitLoadStoreSymbol(Inst, RISCV::SW, IDLoc, Out, /*HasTmpReg=*/true); return false; case RISCV::PseudoSD: @@ -4255,6 +4338,38 @@ bool RISCVAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, case RISCV::PseudoSD_RV32: emitLoadStoreSymbol(Inst, RISCV::SD_RV32, IDLoc, Out, /*HasTmpReg=*/true); return false; + case RISCV::PseudoQC_E_LB: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessLB, IDLoc, Out, + /*HasTmpReg=*/false); + return false; + case RISCV::PseudoQC_E_LBU: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessLBU, IDLoc, Out, + /*HasTmpReg=*/false); + return false; + case RISCV::PseudoQC_E_LH: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessLH, IDLoc, Out, + /*HasTmpReg=*/false); + return false; + case RISCV::PseudoQC_E_LHU: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessLHU, IDLoc, Out, + /*HasTmpReg=*/false); + return false; + case RISCV::PseudoQC_E_LW: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessLW, IDLoc, Out, + /*HasTmpReg=*/false); + return false; + case RISCV::PseudoQC_E_SB: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessSB, IDLoc, Out, + /*HasTmpReg=*/true); + return false; + case RISCV::PseudoQC_E_SH: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessSH, IDLoc, Out, + /*HasTmpReg=*/true); + return false; + case RISCV::PseudoQC_E_SW: + emitQCELILoadStoreSymbol(Inst, RISCV::PseudoQCAccessSW, IDLoc, Out, + /*HasTmpReg=*/true); + return false; case RISCV::PseudoFSH: emitLoadStoreSymbol(Inst, RISCV::FSH, IDLoc, Out, /*HasTmpReg=*/true); return false; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index d4a4295da76b9..20d48bf881029 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -1335,12 +1335,13 @@ def PseudoLongQC_E_BGEI : LongBcciPseudo; def PseudoLongQC_E_BLTUI : LongBcciPseudo; def PseudoLongQC_E_BGEUI : LongBcciPseudo; -// Load/Store pseudos with QC.E.* Mnemonics. These expand to an AUIPC + -// (Standard) Load/Store sequence, as this can materialize all 32-bit addresses, -// and is shorter than e.g. an AUIPC + Xqcilo Load/Store sequence. These -// sequences can be turned back into a single Xqcilo instruction using linker -// relaxation. -let Predicates = [HasVendorXqcilo, IsRV32] in { +// Load/Store pseudos with QC.E.* Mnemonics. These expand to a QC.E.LI + +// PseudoQCAccess Load/Store sequence. The QC_E_LI materializes the symbol +// address, and the PseudoQCAccess instruction carries a %qc.access relocation +// that enables linker relaxation back to a single Xqcilo instruction. +// Opportunistic compression is applied to the load/store when the registers +// are in the GPRC class and Zca/Zcb is available. +let Predicates = [HasVendorXqcilo, HasVendorXqcili, IsRV32] in { def PseudoQC_E_LB : PseudoLoad<"qc.e.lb">; def PseudoQC_E_LBU : PseudoLoad<"qc.e.lbu">; def PseudoQC_E_LH : PseudoLoad<"qc.e.lh">; @@ -1350,7 +1351,7 @@ def PseudoQC_E_LW : PseudoLoad<"qc.e.lw">; def PseudoQC_E_SB : PseudoStore<"qc.e.sb">; def PseudoQC_E_SH : PseudoStore<"qc.e.sh">; def PseudoQC_E_SW : PseudoStore<"qc.e.sw">; -} // Predicates = [HasVendorXqcilo, IsRV32] +} // Predicates = [HasVendorXqcilo, HasVendorXqcili, IsRV32] let Predicates = [HasShortForwardBranchIALU] in { def PseudoCCQC_LI : SFBQC_LI; diff --git a/llvm/test/MC/RISCV/xqcilo-pseudos-invalid.s b/llvm/test/MC/RISCV/xqcilo-pseudos-invalid.s index 6efc5a23345bc..d3f0ee5db90bc 100644 --- a/llvm/test/MC/RISCV/xqcilo-pseudos-invalid.s +++ b/llvm/test/MC/RISCV/xqcilo-pseudos-invalid.s @@ -1,5 +1,5 @@ # Xqcilo - Qualcomm uC Large Offset Load Store extension -# RUN: not llvm-mc %s -triple=riscv32 -mattr=+xqcilo \ +# RUN: not llvm-mc %s -triple=riscv32 -mattr=+xqcilo,+xqcili \ # RUN: 2>&1 | FileCheck -check-prefixes=CHECK-ENABLED %s # RUN: not llvm-mc %s -triple=riscv32 -mattr=-xqcilo \ # RUN: 2>&1 | FileCheck -check-prefixes=CHECK-DISABLED %s @@ -40,21 +40,21 @@ qc.e.sh a0, 0xf000, t0 # CHECK-DISABLED: [[@LINE+1]]:21: error: invalid operand for instruction qc.e.sw a0, 0xf000, t0 -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.lb a0, undefined -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.lbu a0, undefined -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.lh a0, undefined -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.lhu a0, undefined -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.lw a0, undefined -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.sb a0, undefined, t0 -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.sh a0, undefined, t0 -# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) +# CHECK-DISABLED: [[@LINE+1]]:1: error: instruction requires the following: 'Xqcili' (Qualcomm uC Load Large Immediate Extension), 'Xqcilo' (Qualcomm uC Large Offset Load Store Extension) qc.e.sw a0, undefined, t0 # CHECK-ENABLED: [[@LINE+2]]:1: error: too few operands for instruction diff --git a/llvm/test/MC/RISCV/xqcilo-pseudos-valid.s b/llvm/test/MC/RISCV/xqcilo-pseudos-valid.s index 2330564351f15..647b2b6700364 100644 --- a/llvm/test/MC/RISCV/xqcilo-pseudos-valid.s +++ b/llvm/test/MC/RISCV/xqcilo-pseudos-valid.s @@ -1,43 +1,158 @@ # Xqcilo - Qualcomm uC Large Offset Load Store extension -# RUN: llvm-mc %s -triple=riscv32 -mattr=+xqcilo \ -# RUN: | FileCheck -check-prefixes=CHECK %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xqcilo,+xqcili -M no-aliases \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-BASE %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xqcilo,+xqcili,+zca -M no-aliases \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-ZCA %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xqcilo,+xqcili,+zcb -M no-aliases \ +# RUN: | FileCheck -check-prefixes=CHECK,CHECK-ZCB %s +# RUN: llvm-mc %s -triple=riscv32 -mattr=+xqcilo,+xqcili,+zcb -filetype=obj \ +# RUN: | llvm-objdump -d -r --mattr=+xqcilo,+xqcili,+zcb \ +# RUN: --no-print-imm-hex - \ +# RUN: | FileCheck -check-prefix=CHECK-OBJ %s -# CHECK-LABEL: .Lpcrel_hi0 -# CHECK-NEXT: auipc a0, %pcrel_hi(undefined) -# CHECK-NEXT: lb a0, %pcrel_lo(.Lpcrel_hi0)(a0) -qc.e.lb a0, undefined +# Basic expansion — use t1 (x6, not in GPRC) so no Zcb compression occurs. +# +xqcilo implies Zca, so qc.e.li always compresses to c.li with -M no-aliases. -# CHECK-LABEL: .Lpcrel_hi1 -# CHECK-NEXT: auipc a0, %pcrel_hi(undefined) -# CHECK-NEXT: lbu a0, %pcrel_lo(.Lpcrel_hi1)(a0) +# CHECK: c.li t1, undefined +# CHECK-NEXT: lb t1, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lb t1, 0(t1) +qc.e.lb t1, undefined + +# CHECK: c.li t1, undefined +# CHECK-NEXT: lbu t1, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lbu t1, 0(t1) +qc.e.lbu t1, undefined + +# CHECK: c.li t1, undefined +# CHECK-NEXT: lh t1, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lh t1, 0(t1) +qc.e.lh t1, undefined + +# CHECK: c.li t1, undefined +# CHECK-NEXT: lhu t1, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lhu t1, 0(t1) +qc.e.lhu t1, undefined + +# CHECK: c.li t1, undefined +# CHECK-NEXT: lw t1, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lw t1, 0(t1) +qc.e.lw t1, undefined + +# Stores: address register t1 (non-GPRC), no Zcb compression. + +# CHECK: c.li t1, undefined +# CHECK-NEXT: sb a0, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sb a0, 0(t1) +qc.e.sb a0, undefined, t1 + +# CHECK: c.li t1, undefined +# CHECK-NEXT: sh a0, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sh a0, 0(t1) +qc.e.sh a0, undefined, t1 + +# CHECK: c.li t1, undefined +# CHECK-NEXT: sw a0, 0(t1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li t1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sw a0, 0(t1) +qc.e.sw a0, undefined, t1 + +# lw with a GPRC register (a0 = x10): +xqcilo implies Zca, so lw always +# compresses to c.lw in all runs. The objdump shows the c.lw alias as lw. + +# CHECK: c.li a0, undefined +# CHECK-NEXT: c.lw a0, 0(a0), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a0, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lw a0, 0(a0) +qc.e.lw a0, undefined + +# sw with GPRC registers: +xqcilo implies Zca, so sw always compresses to +# c.sw in all runs. The objdump shows the c.sw alias as sw. + +# CHECK: c.li a1, undefined +# CHECK-NEXT: c.sw a0, 0(a1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sw a0, 0(a1) +qc.e.sw a0, undefined, a1 + +# lbu/lh/lhu with GPRC registers: compressed to c.lbu/c.lh/c.lhu only with Zcb. +# The objdump shows the Zcb compressed forms as their unaliased equivalents. + +# CHECK: c.li a0, undefined +# CHECK-BASE-NEXT: lbu a0, 0(a0), %qc.access(undefined) +# CHECK-ZCA-NEXT: lbu a0, 0(a0), %qc.access(undefined) +# CHECK-ZCB-NEXT: c.lbu a0, 0(a0), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a0, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lbu a0, 0(a0) qc.e.lbu a0, undefined -# CHECK-LABEL: .Lpcrel_hi2 -# CHECK-NEXT: auipc a0, %pcrel_hi(undefined) -# CHECK-NEXT: lh a0, %pcrel_lo(.Lpcrel_hi2)(a0) +# CHECK: c.li a0, undefined +# CHECK-BASE-NEXT: lh a0, 0(a0), %qc.access(undefined) +# CHECK-ZCA-NEXT: lh a0, 0(a0), %qc.access(undefined) +# CHECK-ZCB-NEXT: c.lh a0, 0(a0), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a0, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lh a0, 0(a0) qc.e.lh a0, undefined -# CHECK-LABEL: .Lpcrel_hi3 -# CHECK-NEXT: auipc a0, %pcrel_hi(undefined) -# CHECK-NEXT: lhu a0, %pcrel_lo(.Lpcrel_hi3)(a0) +# CHECK: c.li a0, undefined +# CHECK-BASE-NEXT: lhu a0, 0(a0), %qc.access(undefined) +# CHECK-ZCA-NEXT: lhu a0, 0(a0), %qc.access(undefined) +# CHECK-ZCB-NEXT: c.lhu a0, 0(a0), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a0, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: lhu a0, 0(a0) qc.e.lhu a0, undefined -# CHECK-LABEL: .Lpcrel_hi4 -# CHECK-NEXT: auipc a0, %pcrel_hi(undefined) -# CHECK-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi4)(a0) -qc.e.lw a0, undefined - -# CHECK-LABEL: .Lpcrel_hi5 -# CHECK-NEXT: auipc t0, %pcrel_hi(undefined) -# CHECK-NEXT: sb a0, %pcrel_lo(.Lpcrel_hi5)(t0) -qc.e.sb a0, undefined, t0 +# sb/sh with GPRC registers: compressed to c.sb/c.sh only with Zcb. +# The objdump shows the Zcb compressed forms as their unaliased equivalents. -# CHECK-LABEL: .Lpcrel_hi6 -# CHECK-NEXT: auipc t0, %pcrel_hi(undefined) -# CHECK-NEXT: sh a0, %pcrel_lo(.Lpcrel_hi6)(t0) -qc.e.sh a0, undefined, t0 +# CHECK: c.li a1, undefined +# CHECK-BASE-NEXT: sb a0, 0(a1), %qc.access(undefined) +# CHECK-ZCA-NEXT: sb a0, 0(a1), %qc.access(undefined) +# CHECK-ZCB-NEXT: c.sb a0, 0(a1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sb a0, 0(a1) +qc.e.sb a0, undefined, a1 -# CHECK-LABEL: .Lpcrel_hi7 -# CHECK-NEXT: auipc t0, %pcrel_hi(undefined) -# CHECK-NEXT: sw a0, %pcrel_lo(.Lpcrel_hi7)(t0) -qc.e.sw a0, undefined, t0 +# CHECK: c.li a1, undefined +# CHECK-BASE-NEXT: sh a0, 0(a1), %qc.access(undefined) +# CHECK-ZCA-NEXT: sh a0, 0(a1), %qc.access(undefined) +# CHECK-ZCB-NEXT: c.sh a0, 0(a1), %qc.access(undefined) +# CHECK-OBJ: qc.e.li a1, 0 +# CHECK-OBJ-NEXT: R_RISCV_VENDOR QUALCOMM +# CHECK-OBJ-NEXT: R_RISCV_QC_E_32 undefined +# CHECK-OBJ-NEXT: sh a0, 0(a1) +qc.e.sh a0, undefined, a1 From 520a0326899aacc74f9249adf326478b2c14f513 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Mon, 22 Jun 2026 22:22:02 -0700 Subject: [PATCH 122/511] Revert "[flang][cuda][openacc] Emit an error when CUDA symbols are imported with CUDA disabled" (#205254) Reverts llvm/llvm-project#205207 --- flang/lib/Semantics/mod-file.cpp | 29 ----------------------------- flang/test/Semantics/modfile84.f90 | 17 ----------------- 2 files changed, 46 deletions(-) delete mode 100644 flang/test/Semantics/modfile84.f90 diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index ac44f17009344..89a535c6ff6f9 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -11,7 +11,6 @@ #include "flang/Common/restorer.h" #include "flang/Evaluate/tools.h" #include "flang/Parser/message.h" -#include "flang/Parser/parse-tree-visitor.h" #include "flang/Parser/parsing.h" #include "flang/Parser/unparse.h" #include "flang/Semantics/scope.h" @@ -73,7 +72,6 @@ static bool FileContentsMatch( const std::string &, const std::string &, const std::string &); static ModuleCheckSumType ComputeCheckSum(const std::string_view &); static std::string CheckSumString(ModuleCheckSumType); -static bool ProgramHasCUDAAttrs(const parser::Program &); // Collect symbols needed for a subprogram interface class SubprogramSymbolCollector { @@ -1705,13 +1703,6 @@ Scope *ModFileReader::Read(SourceName name, std::optional isIntrinsic, return nullptr; } parser::Program &parseTree{context_.SaveParseTree(std::move(*parsedProgram))}; - if (context_.languageFeatures().IsEnabled(common::LanguageFeature::OpenACC) && - !context_.languageFeatures().IsEnabled(common::LanguageFeature::CUDA) && - ProgramHasCUDAAttrs(parseTree)) { - Say("use", name, ancestorName, - "CUDA is not enabled, but '%s' defines CUDA symbols"_err_en_US, - sourceFile->path()); - } Scope *parentScope; // the scope this module/submodule goes into if (!isIntrinsic.has_value()) { for (const auto &dir : context_.intrinsicModuleDirectories()) { @@ -1832,26 +1823,6 @@ static std::optional GetSubmoduleParent( } } -struct CUDAAttrProgramVisitor { - template bool Pre(const A &) { return true; } - template void Post(const A &) {} - bool Pre(const common::CUDADataAttr &) { - foundCUDAAttrs = true; - return false; - } - bool Pre(const common::CUDASubprogramAttrs &) { - foundCUDAAttrs = true; - return false; - } - bool foundCUDAAttrs{false}; -}; - -static bool ProgramHasCUDAAttrs(const parser::Program &program) { - CUDAAttrProgramVisitor visitor; - parser::Walk(program, visitor); - return visitor.foundCUDAAttrs; -} - void SubprogramSymbolCollector::Collect() { const auto &details{symbol_.get()}; isInterface_ = details.isInterface(); diff --git a/flang/test/Semantics/modfile84.f90 b/flang/test/Semantics/modfile84.f90 deleted file mode 100644 index 2abcd8083eea6..0000000000000 --- a/flang/test/Semantics/modfile84.f90 +++ /dev/null @@ -1,17 +0,0 @@ -! RUN: split-file %s %t -! RUN: %flang_fc1 -fsyntax-only -x cuda -module-dir %t %t/m.cuf -! RUN: not %flang_fc1 -fsyntax-only -fopenacc -module-dir %t %t/use.f90 2>&1 | FileCheck %s - -!--- m.cuf -module modfile84m - real, device :: d -contains - attributes(device) subroutine s() - end subroutine -end module - -!--- use.f90 -use modfile84m -end - -! CHECK: error: Cannot use module file for module 'modfile84m': CUDA is not enabled, but '{{.*modfile84m.mod}}' defines CUDA symbols From 4524e526e355d65043362e50a38e8cd97c284b30 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 23 Jun 2026 15:24:00 +1000 Subject: [PATCH 123/511] [orc-rt] Replace AAHandlerTraits with CallableArgInfo. NFCI. (#205257) CallableArgInfo provides a superset of AAHandlerTraits functionality, so we don't need the latter. --- orc-rt/include/orc-rt/AllocAction.h | 25 ++----------------------- 1 file changed, 2 insertions(+), 23 deletions(-) diff --git a/orc-rt/include/orc-rt/AllocAction.h b/orc-rt/include/orc-rt/AllocAction.h index 6d9c8ed2146cd..25d27c4748388 100644 --- a/orc-rt/include/orc-rt/AllocAction.h +++ b/orc-rt/include/orc-rt/AllocAction.h @@ -13,34 +13,13 @@ #ifndef ORC_RT_ALLOCACTION_H #define ORC_RT_ALLOCACTION_H +#include "orc-rt/CallableTraitsHelper.h" #include "orc-rt/Error.h" #include "orc-rt/WrapperFunction.h" #include namespace orc_rt { -namespace detail { - -template -struct AAHandlerTraits - : public AAHandlerTraits< - decltype(&std::remove_cv_t>:: - operator())> {}; - -template -struct AAHandlerTraits { - typedef std::tuple ArgTuple; -}; - -template -struct AAHandlerTraits - : public AAHandlerTraits {}; - -template -struct AAHandlerTraits - : public AAHandlerTraits {}; - -} // namespace detail /// An AllocActionFn is a function that takes an argument blob and returns an /// empty WrapperFunctionBuffer on success, or an out-of-band error on failure. @@ -52,7 +31,7 @@ struct AllocActionFunction { template static WrapperFunctionBuffer handle(const char *ArgData, size_t ArgSize, Deserializer &&D, Handler &&H) { - typename detail::AAHandlerTraits::ArgTuple Args; + typename CallableArgInfo::args_tuple_type Args; if (!D.deserialize(ArgData, ArgSize, Args)) return WrapperFunctionBuffer::createOutOfBandError( "Could not deserialize allocation action argument buffer"); From 7c6560e91a05426c0ebe3e82dc48af89cf74b980 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 22 Jun 2026 23:15:03 -0700 Subject: [PATCH 124/511] [RISCV] Avoid implicit conversions from MCRegister to MCPhysReg. NFC (#205260) --- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 50184f74f9634..986383d714472 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -1733,9 +1733,9 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // Do not append a pair that's already in the CSR list. if (CSRSet.contains(Pair)) continue; - MCPhysReg EvenReg = TRI.getSubReg(Pair, RISCV::sub_gpr_even); - MCPhysReg OddReg = TRI.getSubReg(Pair, RISCV::sub_gpr_odd); - if (CSRSet.contains(EvenReg) && CSRSet.contains(OddReg)) { + MCRegister EvenReg = TRI.getSubReg(Pair, RISCV::sub_gpr_even); + MCRegister OddReg = TRI.getSubReg(Pair, RISCV::sub_gpr_odd); + if (CSRSet.contains(EvenReg.id()) && CSRSet.contains(OddReg.id())) { NewCSRs.push_back(Pair); CSRSet.insert(Pair); } @@ -1749,12 +1749,13 @@ void RISCVFrameLowering::determineCalleeSaves(MachineFunction &MF, // register bit. For GPRPair, only check sub_gpr_even and sub_gpr_odd, not // aliases like X8_W or X8_H which are not set in SavedRegs. for (unsigned i = 0; CSRegs[i]; ++i) { - unsigned CSReg = CSRegs[i]; + MCRegister CSReg = CSRegs[i]; bool CombineToSuperReg; if (RISCV::GPRPairRegClass.contains(CSReg)) { - MCPhysReg EvenReg = TRI.getSubReg(CSReg, RISCV::sub_gpr_even); - MCPhysReg OddReg = TRI.getSubReg(CSReg, RISCV::sub_gpr_odd); - CombineToSuperReg = SavedRegs.test(EvenReg) && SavedRegs.test(OddReg); + MCRegister EvenReg = TRI.getSubReg(CSReg, RISCV::sub_gpr_even); + MCRegister OddReg = TRI.getSubReg(CSReg, RISCV::sub_gpr_odd); + CombineToSuperReg = + SavedRegs.test(EvenReg.id()) && SavedRegs.test(OddReg.id()); // If s0(x8) is used as FP we can't generate load/store pair because it // breaks the frame chain. if (hasFP(MF) && CSReg == RISCV::X8_X9) From 98024e668a6d860b382c01f6f09df199ae9652ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 08:24:53 +0200 Subject: [PATCH 125/511] Reland "[clang][ssaf][NFC] Move SSAF flags from FrontendOptions to a dedicated SSAFOptions" (#204798) Second attempt of #204686 This class will help keeping SSAF options apart from generic FrontendOptions. It is inspired by AnalyzerOptions. This way all of these SSAF (and future) options will be at a centralized place. In preparation of rdar://179151023 --- .../include/clang/Frontend/CompilerInstance.h | 5 ++ .../clang/Frontend/CompilerInvocation.h | 25 ++++++--- .../include/clang/Frontend/FrontendOptions.h | 24 +-------- clang/include/clang/Frontend/SSAFOptions.h | 52 ++++++++++++++++++ clang/include/clang/Options/Options.td | 12 +++-- clang/lib/Frontend/CompilerInvocation.cpp | 32 ++++++++++- .../ExecuteCompilerInvocation.cpp | 3 +- .../TUSummaryExtractorFrontendAction.cpp | 25 ++++----- .../TUSummaryExtractorFrontendActionTest.cpp | 53 ++++++++++--------- 9 files changed, 156 insertions(+), 75 deletions(-) create mode 100644 clang/include/clang/Frontend/SSAFOptions.h diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index bb0eddb918623..24488e053c628 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -317,6 +317,11 @@ class CompilerInstance : public ModuleLoader { return Invocation->getFrontendOpts(); } + ssaf::SSAFOptions &getSSAFOpts() { return Invocation->getSSAFOpts(); } + const ssaf::SSAFOptions &getSSAFOpts() const { + return Invocation->getSSAFOpts(); + } + HeaderSearchOptions &getHeaderSearchOpts() { return Invocation->getHeaderSearchOpts(); } diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 6fa6cd5d95534..03097aefacf50 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -51,6 +51,10 @@ class HeaderSearchOptions; class PreprocessorOptions; class TargetOptions; +namespace ssaf { +class SSAFOptions; +} // namespace ssaf + // This lets us create the DiagnosticsEngine with a properly-filled-out // DiagnosticOptions instance. std::unique_ptr @@ -116,6 +120,9 @@ class CompilerInvocationBase { /// Options controlling preprocessed output. std::shared_ptr PreprocessorOutputOpts; + /// Options controlling the Scalable Static Analysis Framework (SSAF). + std::shared_ptr SSAFOpts; + /// Dummy tag type whose instance can be passed into the constructor to /// prevent creation of the reference-counted option objects. struct EmptyConstructor {}; @@ -150,6 +157,7 @@ class CompilerInvocationBase { const PreprocessorOutputOptions &getPreprocessorOutputOpts() const { return *PreprocessorOutputOpts; } + const ssaf::SSAFOptions &getSSAFOpts() const { return *SSAFOpts; } /// @} /// Visitation. @@ -247,19 +255,20 @@ class CompilerInvocation : public CompilerInvocationBase { /// @{ // Note: These need to be pulled in manually. Otherwise, they get hidden by // the mutable getters with the same names. - using CompilerInvocationBase::getLangOpts; - using CompilerInvocationBase::getTargetOpts; - using CompilerInvocationBase::getDiagnosticOpts; - using CompilerInvocationBase::getHeaderSearchOpts; - using CompilerInvocationBase::getPreprocessorOpts; using CompilerInvocationBase::getAnalyzerOpts; - using CompilerInvocationBase::getMigratorOpts; using CompilerInvocationBase::getAPINotesOpts; using CompilerInvocationBase::getCodeGenOpts; + using CompilerInvocationBase::getDependencyOutputOpts; + using CompilerInvocationBase::getDiagnosticOpts; using CompilerInvocationBase::getFileSystemOpts; using CompilerInvocationBase::getFrontendOpts; - using CompilerInvocationBase::getDependencyOutputOpts; + using CompilerInvocationBase::getHeaderSearchOpts; + using CompilerInvocationBase::getLangOpts; + using CompilerInvocationBase::getMigratorOpts; + using CompilerInvocationBase::getPreprocessorOpts; using CompilerInvocationBase::getPreprocessorOutputOpts; + using CompilerInvocationBase::getSSAFOpts; + using CompilerInvocationBase::getTargetOpts; /// @} /// Mutable getters. @@ -281,6 +290,7 @@ class CompilerInvocation : public CompilerInvocationBase { PreprocessorOutputOptions &getPreprocessorOutputOpts() { return *PreprocessorOutputOpts; } + ssaf::SSAFOptions &getSSAFOpts() { return *SSAFOpts; } /// @} /// Create a compiler invocation from a list of input options. @@ -392,6 +402,7 @@ class CowCompilerInvocation : public CompilerInvocationBase { FrontendOptions &getMutFrontendOpts(); DependencyOutputOptions &getMutDependencyOutputOpts(); PreprocessorOutputOptions &getMutPreprocessorOutputOpts(); + ssaf::SSAFOptions &getMutSSAFOpts(); /// @} }; diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 7c242f6e94fe0..a8627ea5d47a4 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -543,27 +543,6 @@ class FrontendOptions { /// minimization hints. std::string DumpMinimizationHintsPath; - /// List of SSAF extractors to enable. - std::vector SSAFExtractSummaries; - - /// The TU summary output file with the file extension representing the file - /// format. - std::string SSAFTUSummaryFile; - - /// Stable identifier for this translation unit, used as the name of the - /// `CompilationUnit` `BuildNamespace` of every produced TU summary. The - /// caller (typically the build system) supplies a value that is constant - /// across stages of the SSAF pipeline. - std::string SSAFCompilationUnitId; - - /// Show available SSAF summary extractors. - LLVM_PREFERRED_TYPE(bool) - unsigned SSAFShowExtractors : 1; - - /// Show available SSAF serialization formats. - LLVM_PREFERRED_TYPE(bool) - unsigned SSAFShowFormats : 1; - public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), @@ -581,8 +560,7 @@ class FrontendOptions { EmitPrettySymbolGraphs(false), GenReducedBMI(false), UseClangIRPipeline(false), ClangIRDisablePasses(false), ClangIRDisableCIRVerifier(false), ClangIREnableIdiomRecognizer(false), - TimeTraceGranularity(500), TimeTraceVerbose(false), - SSAFShowExtractors(false), SSAFShowFormats(false) {} + TimeTraceGranularity(500), TimeTraceVerbose(false) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Frontend/SSAFOptions.h b/clang/include/clang/Frontend/SSAFOptions.h new file mode 100644 index 0000000000000..738262cc4a713 --- /dev/null +++ b/clang/include/clang/Frontend/SSAFOptions.h @@ -0,0 +1,52 @@ +//===- SSAFOptions.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_SSAFOPTIONS_H +#define LLVM_CLANG_FRONTEND_SSAFOPTIONS_H + +#include "llvm/Support/Compiler.h" +#include +#include + +namespace clang::ssaf { + +class SSAFOptions { +public: + /// List of SSAF extractors to enable. + /// Controlled by: --ssaf-extract-summaries + std::vector ExtractSummaries; + + /// The TU summary output file with the file extension representing the + /// serialization format. + /// Controlled by: --ssaf-tu-summary-file + std::string TUSummaryFile; + + /// Stable identifier used as the name of the `CompilationUnit` + /// `BuildNamespace` of every produced TU summary. + /// Controlled by: --ssaf-compilation-unit-id + std::string CompilationUnitId; + + /// Show the list of available SSAF summary extractors and exit. + /// Controlled by: --ssaf-list-extractors + LLVM_PREFERRED_TYPE(bool) + unsigned ShowExtractors : 1; + + /// Show the list of available SSAF serialization formats and exit. + /// Controlled by: --ssaf-list-formats + LLVM_PREFERRED_TYPE(bool) + unsigned ShowFormats : 1; + + SSAFOptions() { + ShowExtractors = false; + ShowFormats = false; + }; +}; + +} // namespace clang::ssaf + +#endif // LLVM_CLANG_FRONTEND_SSAFOPTIONS_H diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 377897a15f746..fd13d0556083a 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -345,6 +345,8 @@ class FileSystemOpts : KeyPathAndMacro<"FileSystemOpts.", base, "FILE_SYSTEM_"> {} class AnalyzerOpts : KeyPathAndMacro<"AnalyzerOpts->", base, "ANALYZER_"> {} +class SSAFOpts + : KeyPathAndMacro<"SSAFOpts.", base, "SSAF_"> {} class MigratorOpts : KeyPathAndMacro<"MigratorOpts.", base, "MIGRATOR_"> {} @@ -947,7 +949,7 @@ def _ssaf_extract_summaries : Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Comma-separated list of summary names to extract">, - MarshallingInfoStringVector>; + MarshallingInfoStringVector>; def _ssaf_tu_summary_file : Joined<["--"], "ssaf-tu-summary-file=">, MetaVarName<".">, @@ -956,19 +958,19 @@ def _ssaf_tu_summary_file : HelpText< "The output file for the extracted summaries. " "The extension selects which file format to use.">, - MarshallingInfoString>; + MarshallingInfoString>; def _ssaf_list_extractors : Flag<["--"], "ssaf-list-extractors">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF summary extractors">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_list_formats : Flag<["--"], "ssaf-list-formats">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF serialization formats">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_compilation_unit_id : Joined<["--"], "ssaf-compilation-unit-id=">, MetaVarName<"">, @@ -978,7 +980,7 @@ def _ssaf_compilation_unit_id : "Stable identifier used as the CompilationUnit namespace name of every " "produced SSAF TU summary. Required when '--ssaf-tu-summary-file=' is " "set.">, - MarshallingInfoString>; + MarshallingInfoString>; def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index ca2d02c7dbd97..94ca546a5bcb6 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -32,6 +32,7 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -133,7 +134,8 @@ CompilerInvocationBase::CompilerInvocationBase() FSOpts(std::make_shared()), FrontendOpts(std::make_shared()), DependencyOutputOpts(std::make_shared()), - PreprocessorOutputOpts(std::make_shared()) {} + PreprocessorOutputOpts(std::make_shared()), + SSAFOpts(std::make_shared()) {} CompilerInvocationBase & CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { @@ -151,6 +153,7 @@ CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = make_shared_copy(X.getFrontendOpts()); DependencyOutputOpts = make_shared_copy(X.getDependencyOutputOpts()); PreprocessorOutputOpts = make_shared_copy(X.getPreprocessorOutputOpts()); + SSAFOpts = make_shared_copy(X.getSSAFOpts()); } return *this; } @@ -171,6 +174,7 @@ CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = X.FrontendOpts; DependencyOutputOpts = X.DependencyOutputOpts; PreprocessorOutputOpts = X.PreprocessorOutputOpts; + SSAFOpts = X.SSAFOpts; } return *this; } @@ -237,6 +241,10 @@ FrontendOptions &CowCompilerInvocation::getMutFrontendOpts() { return ensureOwned(FrontendOpts); } +ssaf::SSAFOptions &CowCompilerInvocation::getMutSSAFOpts() { + return ensureOwned(SSAFOpts); +} + DependencyOutputOptions &CowCompilerInvocation::getMutDependencyOutputOpts() { return ensureOwned(DependencyOutputOpts); } @@ -1034,6 +1042,26 @@ static void GenerateAnalyzerArgs(const AnalyzerOptions &Opts, // Nothing to generate for FullCompilerInvocation. } +static void GenerateSSAFArgs(const ssaf::SSAFOptions &SSAFOpts, + ArgumentConsumer Consumer) { +#define SSAF_OPTION_WITH_MARSHALLING(...) \ + GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) +#include "clang/Options/Options.inc" +#undef SSAF_OPTION_WITH_MARSHALLING +} + +static bool ParseSSAFArgs(ssaf::SSAFOptions &SSAFOpts, ArgList &Args, + DiagnosticsEngine &Diags) { + unsigned NumErrorsBefore = Diags.getNumErrors(); + +#define SSAF_OPTION_WITH_MARSHALLING(...) \ + PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) +#include "clang/Options/Options.inc" +#undef SSAF_OPTION_WITH_MARSHALLING + + return Diags.getNumErrors() == NumErrorsBefore; +} + static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); @@ -5083,6 +5111,7 @@ bool CompilerInvocation::CreateFromArgsImpl( ParseFileSystemArgs(Res.getFileSystemOpts(), Args, Diags); ParseMigratorArgs(Res.getMigratorOpts(), Args, Diags); ParseAnalyzerArgs(Res.getAnalyzerOpts(), Args, Diags); + ParseSSAFArgs(Res.getSSAFOpts(), Args, Diags); ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, /*DefaultDiagColor=*/false); ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags, LangOpts.IsHeaderFile); @@ -5435,6 +5464,7 @@ void CompilerInvocationBase::generateCC1CommandLine( GenerateFileSystemArgs(getFileSystemOpts(), Consumer); GenerateMigratorArgs(getMigratorOpts(), Consumer); GenerateAnalyzerArgs(getAnalyzerOpts(), Consumer); + GenerateSSAFArgs(getSSAFOpts(), Consumer); GenerateDiagnosticArgs(getDiagnosticOpts(), Consumer, /*DefaultDiagColor=*/false); GenerateFrontendArgs(getFrontendOpts(), Consumer, getLangOpts().IsHeaderFile); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index e4622496758ac..997200619e599 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -19,6 +19,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/FrontendPluginRegistry.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/Utils.h" #include "clang/FrontendTool/Utils.h" #include "clang/Options/Options.h" @@ -209,7 +210,7 @@ CreateFrontendAction(CompilerInstance &CI) { Act = std::make_unique(std::move(Act), FEOpts.ASTMergeFiles); - if (!FEOpts.SSAFTUSummaryFile.empty()) { + if (!CI.getSSAFOpts().TUSummaryFile.empty()) { Act = std::make_unique( std::move(Act)); } diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp index fe900f383ae31..b4b3e85386428 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -100,33 +101,33 @@ class TUSummaryRunner final : public MultiplexConsumer { private: TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const FrontendOptions &Opts); + const SSAFOptions &Opts); void HandleTranslationUnit(ASTContext &Ctx) override; TUSummary Summary; TUSummaryBuilder Builder = TUSummaryBuilder(Summary); std::unique_ptr Format; - const FrontendOptions &Opts; + const SSAFOptions &Opts; }; } // namespace std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { - const FrontendOptions &Opts = CI.getFrontendOpts(); + const SSAFOptions &Opts = CI.getSSAFOpts(); DiagnosticsEngine &Diags = CI.getDiagnostics(); - if (Opts.SSAFCompilationUnitId.empty()) { + if (Opts.CompilationUnitId.empty()) { Diags.Report(diag::warn_ssaf_tu_summary_requires_compilation_unit_id); return nullptr; } auto MaybePair = - parseOutputFileFormatAndPathOrReportError(Diags, Opts.SSAFTUSummaryFile); + parseOutputFileFormatAndPathOrReportError(Diags, Opts.TUSummaryFile); if (!MaybePair.has_value()) return nullptr; auto [FormatName, OutputPath] = MaybePair.value(); - if (reportUnrecognizedExtractorNames(Diags, Opts.SSAFExtractSummaries)) + if (reportUnrecognizedExtractorNames(Diags, Opts.ExtractSummaries)) return nullptr; return std::unique_ptr{new TUSummaryRunner{ @@ -135,18 +136,18 @@ std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { TUSummaryRunner::TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const FrontendOptions &Opts) + const SSAFOptions &Opts) : MultiplexConsumer(std::vector>{}), Summary(std::move(TargetTriple), BuildNamespace(BuildNamespaceKind::CompilationUnit, - Opts.SSAFCompilationUnitId)), + Opts.CompilationUnitId)), Format(std::move(Format)), Opts(Opts) { assert(this->Format); - assert(!Opts.SSAFCompilationUnitId.empty()); + assert(!Opts.CompilationUnitId.empty()); // Now the Summary and the builders are constructed, we can also construct the // extractors. - auto Extractors = makeTUSummaryExtractors(Builder, Opts.SSAFExtractSummaries); + auto Extractors = makeTUSummaryExtractors(Builder, Opts.ExtractSummaries); assert(!Extractors.empty()); // We must initialize the Consumers here because our extractors need a @@ -164,9 +165,9 @@ void TUSummaryRunner::HandleTranslationUnit(ASTContext &Ctx) { llvm::sys::sandbox::ScopedSetting Guard = llvm::sys::sandbox::scopedDisable(); // Then serialize the result. - if (auto Err = Format->writeTUSummary(Summary, Opts.SSAFTUSummaryFile)) { + if (auto Err = Format->writeTUSummary(Summary, Opts.TUSummaryFile)) { Ctx.getDiagnostics().Report(diag::warn_ssaf_write_tu_summary_failed) - << Opts.SSAFTUSummaryFile << llvm::toString(std::move(Err)); + << Opts.TUSummaryFile << llvm::toString(std::move(Err)); } } diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp index 18d9e1735061d..cb5448e4860bc 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp @@ -11,6 +11,7 @@ #include "clang/AST/ASTContext.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendOptions.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" @@ -299,9 +300,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, // Configure valid SSAF options so the failure is purely from the wrapped // action, not from runner creation. std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction ExtractorAction( std::make_unique()); @@ -315,9 +316,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithInvalidFormat_WrappedConsumerStillRuns) { // Use an unregistered format extension so TUSummaryRunner::create fails. std::string Output = makePath("output.xyz"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -345,9 +346,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithUnknownExtractor_WrappedConsumerStillRuns) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NonExistentExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NonExistentExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -370,9 +371,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_ASTConsumerCallbacksPropagate) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -425,9 +426,9 @@ struct OrderCheckingAction : public ASTFrontendAction { TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_WrappedRunsBeforeRunner) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); Wrapped->OutputPath = Output; @@ -447,9 +448,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { std::string Output = makePath("output.FailingSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction Action(std::make_unique()); @@ -469,8 +470,8 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { TEST_F(TUSummaryExtractorFrontendActionTest, MissingCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; // SSAFCompilationUnitId left empty. auto Wrapped = std::make_unique(); @@ -493,9 +494,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, EmptyCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = ""; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = ""; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -520,9 +521,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, const std::string CUId = "cu-X-test"; std::string Output = makePath("output.CapturingSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = CUId; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = CUId; TUSummaryExtractorFrontendAction Action(std::make_unique()); EXPECT_TRUE(Compiler->ExecuteAction(Action)); From 180c33c5d93af86913ee8c0ae1cd767531f8ca4a Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 23 Jun 2026 17:06:51 +1000 Subject: [PATCH 126/511] [orc-rt] Add return serialization to AllocActionFunction::handle. (#205271) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a Serializer template parameter to AllocActionFunction::handle and apply it to the handler's return value before forwarding as the action result. This lets handler authors return types other than WrapperFunctionBuffer. For SPS, AllocActionSPSSerializer is the default Serializer used by SPSAllocActionFunction::handle. It accepts either: - WrapperFunctionBuffer (identity pass-through, the existing behavior), or - Error (success → empty WFB; failure → out-of-band-error WFB carrying toString(Err)). Adds AllocActionTest coverage for both Error-return paths. --- orc-rt/include/orc-rt/AllocAction.h | 7 ++--- orc-rt/include/orc-rt/SPSAllocAction.h | 18 ++++++++++++- orc-rt/unittests/AllocActionTest.cpp | 36 ++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 4 deletions(-) diff --git a/orc-rt/include/orc-rt/AllocAction.h b/orc-rt/include/orc-rt/AllocAction.h index 25d27c4748388..e2362629eccd5 100644 --- a/orc-rt/include/orc-rt/AllocAction.h +++ b/orc-rt/include/orc-rt/AllocAction.h @@ -28,15 +28,16 @@ typedef orc_rt_WrapperFunctionBuffer (*AllocActionFn)(const char *ArgData, struct AllocActionFunction { - template + template static WrapperFunctionBuffer handle(const char *ArgData, size_t ArgSize, - Deserializer &&D, Handler &&H) { + Deserializer &&D, Serializer &&S, + Handler &&H) { typename CallableArgInfo::args_tuple_type Args; if (!D.deserialize(ArgData, ArgSize, Args)) return WrapperFunctionBuffer::createOutOfBandError( "Could not deserialize allocation action argument buffer"); - return std::apply(std::forward(H), std::move(Args)); + return S.serialize(std::apply(std::forward(H), std::move(Args))); } }; diff --git a/orc-rt/include/orc-rt/SPSAllocAction.h b/orc-rt/include/orc-rt/SPSAllocAction.h index ce9c11606338f..9737b1e37ab98 100644 --- a/orc-rt/include/orc-rt/SPSAllocAction.h +++ b/orc-rt/include/orc-rt/SPSAllocAction.h @@ -66,6 +66,22 @@ template <> class SPSSerializationTraits { } }; +struct AllocActionSPSSerializer { + + /// Pass-through for handlers returning WrapperFunctionBuffer. + static WrapperFunctionBuffer serialize(WrapperFunctionBuffer B) { return B; } + + /// Error serialization: + /// - success values converted to empty WrapperFunctionBuffers + /// - failure values converted to out-of-band errors. + static WrapperFunctionBuffer serialize(Error Err) { + if (!Err) + return WrapperFunctionBuffer(); + return WrapperFunctionBuffer::createOutOfBandError( + toString(std::move(Err)).c_str()); + } +}; + template struct AllocActionSPSDeserializer { template bool deserialize(const char *ArgData, size_t ArgSize, ArgTs &...Args) { @@ -84,7 +100,7 @@ template struct SPSAllocActionFunction { Handler &&H) { return AllocActionFunction::handle( ArgData, ArgSize, AllocActionSPSDeserializer>(), - std::forward(H)); + AllocActionSPSSerializer(), std::forward(H)); } }; diff --git a/orc-rt/unittests/AllocActionTest.cpp b/orc-rt/unittests/AllocActionTest.cpp index 08e2a64cde331..4e431d9099dc6 100644 --- a/orc-rt/unittests/AllocActionTest.cpp +++ b/orc-rt/unittests/AllocActionTest.cpp @@ -175,3 +175,39 @@ TEST(AllocActionTest, RunFinalizeActionsNullDealloc) { EXPECT_EQ(Val, 1); } + +// Handler that returns Error::success(). Exercises the +// AllocActionSPSSerializer::serialize(Error) overload's success path. +static orc_rt_WrapperFunctionBuffer +errorSuccess_sps_allocaction(const char *ArgData, size_t ArgSize) { + return SPSAllocActionFunction<>::handle( + ArgData, ArgSize, []() -> Error { return Error::success(); }) + .release(); +} + +// Handler that returns a StringError. Exercises the +// AllocActionSPSSerializer::serialize(Error) overload's failure path. +static orc_rt_WrapperFunctionBuffer +errorFailure_sps_allocaction(const char *ArgData, size_t ArgSize) { + return SPSAllocActionFunction<>::handle( + ArgData, ArgSize, + []() -> Error { return make_error("test failure"); }) + .release(); +} + +TEST(AllocActionTest, RunActionWithErrorSuccessReturn) { + // A handler returning Error::success() should produce a non-out-of-band + // result buffer. + AllocAction AA(errorSuccess_sps_allocaction, WrapperFunctionBuffer()); + auto B = AA(); + EXPECT_EQ(B.getOutOfBandError(), nullptr); +} + +TEST(AllocActionTest, RunActionWithErrorFailureReturn) { + // A handler returning a real Error should produce an out-of-band error + // result buffer carrying the Error's string form. + AllocAction AA(errorFailure_sps_allocaction, WrapperFunctionBuffer()); + auto B = AA(); + ASSERT_NE(B.getOutOfBandError(), nullptr); + EXPECT_STREQ(B.getOutOfBandError(), "test failure"); +} From de6266ef555c46d4945179bbb8e7fbb0575c8038 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 23 Jun 2026 09:08:09 +0200 Subject: [PATCH 127/511] [libc] Introduce the ioctl syscall wrapper and port all callers (#204640) This patch adds an ioctl syscall wrapper in linux_syscalls namespace and migrates all direct SYS_ioctl calls to use it. To handle the polymorphic nature of ioctl arguments (where some commands expect pointers, some expect scalar integers like queue_selector, and some expect no argument at all), I use a helper struct IoctlArg with implicit constructors. This avoids template bloat and overload ambiguities (particularly around literal 0) while keeping call sites clean. Assisted by Gemini. --- .../linux/syscall_wrappers/CMakeLists.txt | 13 +++++ .../OSUtil/linux/syscall_wrappers/ioctl.h | 49 +++++++++++++++++++ libc/src/sys/ioctl/linux/CMakeLists.txt | 3 +- libc/src/sys/ioctl/linux/ioctl.cpp | 15 +++--- libc/src/termios/linux/CMakeLists.txt | 21 +++----- libc/src/termios/linux/tcdrain.cpp | 9 ++-- libc/src/termios/linux/tcflow.cpp | 9 ++-- libc/src/termios/linux/tcflush.cpp | 10 ++-- libc/src/termios/linux/tcgetattr.cpp | 12 ++--- libc/src/termios/linux/tcgetsid.cpp | 9 ++-- libc/src/termios/linux/tcsendbreak.cpp | 9 ++-- libc/src/termios/linux/tcsetattr.cpp | 12 ++--- libc/src/unistd/linux/CMakeLists.txt | 5 +- libc/src/unistd/linux/isatty.cpp | 13 ++--- 14 files changed, 113 insertions(+), 76 deletions(-) create mode 100644 libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt index a52be9676a3ca..f3282c315d9a9 100644 --- a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt @@ -706,6 +706,19 @@ add_header_library( libc.include.sys_syscall ) +add_header_library( + ioctl + HDRS + ioctl.h + DEPENDS + libc.src.__support.CPP.type_traits + libc.src.__support.OSUtil.osutil + libc.src.__support.error_or + libc.src.__support.macros.attributes + libc.src.__support.macros.config + libc.include.sys_syscall +) + add_header_library( readlink HDRS diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h b/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h new file mode 100644 index 0000000000000..9410bbbcad8cd --- /dev/null +++ b/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h @@ -0,0 +1,49 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Syscall wrapper for ioctl. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H + +#include "src/__support/CPP/type_traits/enable_if.h" +#include "src/__support/CPP/type_traits/is_integral.h" +#include "src/__support/CPP/type_traits/is_null_pointer.h" +#include "src/__support/CPP/type_traits/is_pointer.h" +#include "src/__support/OSUtil/linux/syscall.h" // syscall_checked +#include "src/__support/error_or.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" +#include // For syscall numbers + +namespace LIBC_NAMESPACE_DECL { +namespace linux_syscalls { + +struct IoctlArg { + unsigned long val; + + LIBC_INLINE IoctlArg(const void *ptr) + : val(reinterpret_cast(ptr)) {} + + template , int> = 0> + LIBC_INLINE constexpr IoctlArg(T num = 0) + : val(static_cast(num)) {} +}; + +LIBC_INLINE ErrorOr ioctl(int fd, unsigned long request, + IoctlArg arg = 0) { + return syscall_checked(SYS_ioctl, fd, request, arg.val); +} + +} // namespace linux_syscalls +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H diff --git a/libc/src/sys/ioctl/linux/CMakeLists.txt b/libc/src/sys/ioctl/linux/CMakeLists.txt index 876f35aaee66c..33ff2d4dce214 100644 --- a/libc/src/sys/ioctl/linux/CMakeLists.txt +++ b/libc/src/sys/ioctl/linux/CMakeLists.txt @@ -6,7 +6,6 @@ add_entrypoint_object( ../ioctl.h DEPENDS libc.include.sys_ioctl - libc.include.sys_syscall - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/sys/ioctl/linux/ioctl.cpp b/libc/src/sys/ioctl/linux/ioctl.cpp index 9bb669c6a6f66..ec861ffc03ec6 100644 --- a/libc/src/sys/ioctl/linux/ioctl.cpp +++ b/libc/src/sys/ioctl/linux/ioctl.cpp @@ -8,11 +8,10 @@ #include "src/sys/ioctl/ioctl.h" -#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include -#include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -20,16 +19,14 @@ LLVM_LIBC_FUNCTION(int, ioctl, (int fd, unsigned long request, ...)) { va_list vargs; va_start(vargs, request); void *data_pointer = va_arg(vargs, void *); - int ret = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, request, data_pointer); va_end(vargs); - // Some ioctls can be expected to return positive values - if (ret >= 0) - return ret; + auto ret = linux_syscalls::ioctl(fd, request, data_pointer); - // If there is an error, errno is set and -1 is returned. - libc_errno = -ret; + if (ret.has_value()) + return ret.value(); + + libc_errno = ret.error(); return -1; } diff --git a/libc/src/termios/linux/CMakeLists.txt b/libc/src/termios/linux/CMakeLists.txt index e990fba25eabe..5d5440ae69266 100644 --- a/libc/src/termios/linux/CMakeLists.txt +++ b/libc/src/termios/linux/CMakeLists.txt @@ -51,9 +51,8 @@ add_entrypoint_object( HDRS ../tcgetsid.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -64,9 +63,8 @@ add_entrypoint_object( HDRS ../tcdrain.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -77,9 +75,8 @@ add_entrypoint_object( HDRS ../tcflush.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -90,9 +87,8 @@ add_entrypoint_object( HDRS ../tcflow.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -103,9 +99,8 @@ add_entrypoint_object( HDRS ../tcsendbreak.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -123,9 +118,8 @@ add_entrypoint_object( ../tcgetattr.h DEPENDS .kernel_termios - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -137,8 +131,7 @@ add_entrypoint_object( ../tcsetattr.h DEPENDS .kernel_termios - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/termios/linux/tcdrain.cpp b/libc/src/termios/linux/tcdrain.cpp index 570b15c24fe7f..4fce89d65a76f 100644 --- a/libc/src/termios/linux/tcdrain.cpp +++ b/libc/src/termios/linux/tcdrain.cpp @@ -8,21 +8,20 @@ #include "src/termios/tcdrain.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcdrain, (int fd)) { - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 1); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCSBRK, 1); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcflow.cpp b/libc/src/termios/linux/tcflow.cpp index 714ef6aa71298..4ffd294997ad4 100644 --- a/libc/src/termios/linux/tcflow.cpp +++ b/libc/src/termios/linux/tcflow.cpp @@ -8,21 +8,20 @@ #include "src/termios/tcflow.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflow, (int fd, int action)) { - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCXONC, action); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCXONC, action); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcflush.cpp b/libc/src/termios/linux/tcflush.cpp index 4c7b9fadc446d..8a4676d97454a 100644 --- a/libc/src/termios/linux/tcflush.cpp +++ b/libc/src/termios/linux/tcflush.cpp @@ -8,22 +8,20 @@ #include "src/termios/tcflush.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflush, (int fd, int queue_selector)) { - int ret = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCFLSH, queue_selector); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCFLSH, queue_selector); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcgetattr.cpp b/libc/src/termios/linux/tcgetattr.cpp index 2e768269c874d..0569be4ae588f 100644 --- a/libc/src/termios/linux/tcgetattr.cpp +++ b/libc/src/termios/linux/tcgetattr.cpp @@ -7,24 +7,22 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcgetattr.h" -#include "kernel_termios.h" - -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcgetattr, (int fd, struct termios *t)) { LIBC_NAMESPACE::kernel_termios kt; - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCGETS, &kt); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCGETS, &kt); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } t->c_iflag = kt.c_iflag; diff --git a/libc/src/termios/linux/tcgetsid.cpp b/libc/src/termios/linux/tcgetsid.cpp index 7487816cf2741..428ca83fb2ad0 100644 --- a/libc/src/termios/linux/tcgetsid.cpp +++ b/libc/src/termios/linux/tcgetsid.cpp @@ -8,22 +8,21 @@ #include "src/termios/tcgetsid.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(pid_t, tcgetsid, (int fd)) { pid_t sid; - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGSID, &sid); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TIOCGSID, &sid); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return sid; diff --git a/libc/src/termios/linux/tcsendbreak.cpp b/libc/src/termios/linux/tcsendbreak.cpp index 1d546c1d5953e..e91ec7b748582 100644 --- a/libc/src/termios/linux/tcsendbreak.cpp +++ b/libc/src/termios/linux/tcsendbreak.cpp @@ -8,13 +8,12 @@ #include "src/termios/tcsendbreak.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -23,9 +22,9 @@ LLVM_LIBC_FUNCTION(pid_t, tcsendbreak, (int fd, int /* unused duration */)) { // POSIX leaves the behavior for non-zero duration implementation dependent. // Which means that the behavior can be the same as it is when duration is // zero. So, we just pass zero to the syscall. - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 0); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCSBRK, 0); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcsetattr.cpp b/libc/src/termios/linux/tcsetattr.cpp index 8a2c7290217ba..b2f08d078ee83 100644 --- a/libc/src/termios/linux/tcsetattr.cpp +++ b/libc/src/termios/linux/tcsetattr.cpp @@ -7,15 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcsetattr.h" -#include "kernel_termios.h" - -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -52,9 +50,9 @@ LLVM_LIBC_FUNCTION(int, tcsetattr, kt.c_cc[i] = 0; } - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, cmd, &kt); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, cmd, &kt); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 04ccde414cd2f..af385e9bbed72 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -342,11 +342,10 @@ add_entrypoint_object( HDRS ../isatty.h DEPENDS - libc.hdr.fcntl_macros + libc.hdr.unistd_macros libc.include.unistd libc.include.sys_ioctl - libc.include.sys_syscall - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/unistd/linux/isatty.cpp b/libc/src/unistd/linux/isatty.cpp index a4d17912b57b0..4418feb2229a3 100644 --- a/libc/src/unistd/linux/isatty.cpp +++ b/libc/src/unistd/linux/isatty.cpp @@ -8,13 +8,11 @@ #include "src/unistd/isatty.h" -#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" - #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include // For ioctl numbers. -#include // For syscall numbers. +#include // For ioctl numbers. namespace LIBC_NAMESPACE_DECL { @@ -23,12 +21,11 @@ LLVM_LIBC_FUNCTION(int, isatty, (int fd)) { int line_d_val = INIT_VAL; // This gets the line dicipline of the terminal. When called on something that // isn't a terminal it doesn't change line_d_val and returns -1. - int result = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGETD, &line_d_val); - if (result == 0) + auto result = linux_syscalls::ioctl(fd, TIOCGETD, &line_d_val); + if (result.has_value()) return 1; - libc_errno = -result; + libc_errno = result.error(); return 0; } From 4395d602bb293fa4701c783d3615fb72ea812afe Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 23 Jun 2026 09:08:30 +0200 Subject: [PATCH 128/511] [Clang][ABI] Validate consistency between ABI lowering implementation (#203281) If the LLVM ABI library is used, and assertions are enabled, compute the ABI both using Clang's implementation the the LLVM ABI library, and verify that the results are the same. --- clang/lib/CodeGen/CGCall.cpp | 120 ++++++++++++++++++++++++++++++++++- 1 file changed, 117 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 82b374e50fd41..c4cd66d14f1dd 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -832,6 +832,32 @@ void computeSPIRKernelABIInfo(CodeGenModule &CGM, CGFunctionInfo &FI); } // namespace CodeGen } // namespace clang +#ifndef NDEBUG +static const char *abiKindToString(ABIArgInfo::Kind K) { + switch (K) { + case ABIArgInfo::Direct: + return "Direct"; + case ABIArgInfo::Extend: + return "Extend"; + case ABIArgInfo::Indirect: + return "Indirect"; + case ABIArgInfo::IndirectAliased: + return "IndirectAliased"; + case ABIArgInfo::Ignore: + return "Ignore"; + case ABIArgInfo::Expand: + return "Expand"; + case ABIArgInfo::CoerceAndExpand: + return "CoerceAndExpand"; + case ABIArgInfo::TargetSpecific: + return "TargetSpecific"; + case ABIArgInfo::InAlloca: + return "InAlloca"; + } + llvm_unreachable("Unknown kind"); +} +#endif + void CodeGenModule::computeABIInfoUsingLib(CGFunctionInfo &FI) { SmallVector MappedArgTypes; MappedArgTypes.reserve(FI.arg_size()); @@ -849,12 +875,100 @@ void CodeGenModule::computeABIInfoUsingLib(CGFunctionInfo &FI) { getLLVMABITargetInfo(AbiMapper->getTypeBuilder()).computeInfo(*AbiFI); - FI.getReturnInfo() = - convertABIArgInfo(AbiFI->getReturnInfo(), FI.getReturnType()); +#ifndef NDEBUG + // With assertions enabled, also compute info using Clang ABI logic, + // so we can ensure the results are consistent. + getABIInfo().computeInfo(FI); + + auto ConvertABIArgInfo = [&](ABIArgInfo &Target, + const llvm::abi::ArgInfo &AbiInfo, QualType Type, + int ArgNo) { + auto Check = [&](bool Cond, llvm::function_ref MessageFn) { + if (Cond) + return; + if (ArgNo == -1) + llvm::dbgs() << "For return value of type "; + else + llvm::dbgs() << "For argument " << ArgNo << " of type "; + llvm::dbgs() << Type << ": "; + MessageFn(); + llvm::dbgs() << "\n"; + abort(); + }; + auto CheckSimple = [&](auto TargetVal, auto ResVal, StringRef What) { + Check(TargetVal == ResVal, [&]() { + llvm::dbgs() << What << " mismatch (expected: " << TargetVal + << ", given: " << ResVal << ")"; + }); + }; + + ABIArgInfo Res = convertABIArgInfo(AbiInfo, Type); + Check(Target.getKind() == Res.getKind(), [&]() { + llvm::dbgs() << "Kind mismatch (expected: " + << abiKindToString(Target.getKind()) + << ", given: " << abiKindToString(Res.getKind()) << ")"; + }); + + if (Res.canHaveCoerceToType()) { + // Normalize nullptr types. + llvm::Type *TargetType = Target.getCoerceToType(); + llvm::Type *ResType = Res.getCoerceToType(); + if (!TargetType) + TargetType = getTypes().ConvertType(Type); + if (!ResType) + ResType = getTypes().ConvertType(Type); + + Check(TargetType == ResType, [&]() { + llvm::dbgs() << "CoerceToType mismatch (expected: " << *TargetType + << ", given: " << *ResType << ")"; + }); + } + + switch (Res.getKind()) { + case ABIArgInfo::Extend: + CheckSimple(Target.isSignExt(), Res.isSignExt(), "SignExt"); + CheckSimple(Target.isZeroExt(), Res.isZeroExt(), "ZeroExt"); + [[fallthrough]]; + case ABIArgInfo::Direct: + CheckSimple(Target.getDirectAlign(), Res.getDirectAlign(), "DirectAlign"); + CheckSimple(Target.getDirectOffset(), Res.getDirectOffset(), + "DirectOffset"); + break; + case ABIArgInfo::Indirect: + CheckSimple(Target.getIndirectByVal(), Res.getIndirectByVal(), + "IndirectByVal"); + [[fallthrough]]; + case ABIArgInfo::IndirectAliased: + CheckSimple(Target.getIndirectAddrSpace(), Res.getIndirectAddrSpace(), + "IndirectAddrSpace"); + CheckSimple(Target.getIndirectRealign(), Res.getIndirectRealign(), + "IndirectRealign"); + Check(Target.getIndirectAlign() == Res.getIndirectAlign(), [&]() { + llvm::dbgs() << "IndirectAlign mismatch (expected: " + << Target.getIndirectAlign().getQuantity() + << ", given: " << Res.getIndirectAlign().getQuantity() + << ")"; + }); + break; + default: + break; + } + + Target = Res; + }; +#else + auto ConvertABIArgInfo = + [&](ABIArgInfo &Target, const llvm::abi::ArgInfo &AbiInfo, QualType Type, + int ArgNo) { Target = convertABIArgInfo(AbiInfo, Type); }; +#endif + + ConvertABIArgInfo(FI.getReturnInfo(), AbiFI->getReturnInfo(), + FI.getReturnType(), -1); + int ArgNo = 0; for (auto [CGArg, AbiArg] : llvm::zip_equal(FI.arguments(), AbiFI->arguments())) - CGArg.info = convertABIArgInfo(AbiArg.Info, CGArg.type); + ConvertABIArgInfo(CGArg.info, AbiArg.Info, CGArg.type, ArgNo++); } ABIArgInfo CodeGenModule::convertABIArgInfo(const llvm::abi::ArgInfo &AbiInfo, From faa7d1096638a7d73af4a89ac2d4c3de1e76897c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 09:08:59 +0200 Subject: [PATCH 129/511] [clang][ssaf][NFC] Make SSAFOptions available in Builders and Extractors (#204684) Now that we have SSAFOptions, it would make it a lot more ergonomic if it was accessible from builders and extractors. This PR does exactly that. Part of rdar://179151023 Co-authored-by: Jan Korous Co-authored-by: Claude Opus 4.7 --- .../Core/TUSummary/TUSummaryBuilder.h | 8 +++++++- .../Core/TUSummary/TUSummaryExtractor.h | 4 ++++ .../Analyses/PointerFlow/PointerFlowExtractor.cpp | 3 +-- .../Core/TUSummary/TUSummaryExtractor.cpp | 4 ++++ .../Frontend/TUSummaryExtractorFrontendAction.cpp | 9 ++++++--- .../Analyses/CallGraph/CallGraphExtractorTest.cpp | 4 +++- .../Analyses/PointerFlow/PointerFlowTest.cpp | 4 +++- .../UnsafeBufferUsage/UnsafeBufferUsageTest.cpp | 4 +++- .../Registries/SummaryExtractorRegistryTest.cpp | 10 +++++++--- .../TUSummaryBuilderTest.cpp | 4 +++- 10 files changed, 41 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h index f9ebe5358b585..38bd60718ed9c 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h @@ -18,11 +18,13 @@ namespace clang::ssaf { class EntityName; +class SSAFOptions; class TUSummary; class TUSummaryBuilder { public: - explicit TUSummaryBuilder(TUSummary &Summary) : Summary(Summary) {} + TUSummaryBuilder(TUSummary &Summary, const SSAFOptions &Options) + : Summary(Summary), Options(Options) {} EntityId addEntity(const EntityName &EN, EntityLinkageType Linkage); @@ -35,8 +37,12 @@ class TUSummaryBuilder { std::pair addSummary(EntityId Entity, std::unique_ptr &&Data); + /// \returns the \c SSAFOptions of this builder. + const SSAFOptions &getOptions() const { return Options; } + private: TUSummary &Summary; + const SSAFOptions &Options; std::pair addSummaryImpl(EntityId Entity, std::unique_ptr &&Data); diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h index 46b0ae835d729..b943748873821 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h @@ -15,6 +15,7 @@ #include namespace clang::ssaf { +class SSAFOptions; class TUSummaryBuilder; class TUSummaryExtractor : public ASTConsumer { @@ -32,6 +33,9 @@ class TUSummaryExtractor : public ASTConsumer { /// \returns the EntityId, or std::nullopt if EntityName creation fails. std::optional addEntityForReturn(const FunctionDecl *FD); + /// \returns the \c SSAFOptions of the builder. + const SSAFOptions &getOptions() const; + protected: TUSummaryBuilder &SummaryBuilder; }; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index 38e3e3ec3ab9e..ef5932c52a6c3 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -311,8 +311,7 @@ PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base, class PointerFlowTUSummaryExtractor : public TUSummaryExtractor { public: - PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder) - : TUSummaryExtractor(Builder) {} + using TUSummaryExtractor::TUSummaryExtractor; /// \return a non-null unique pointer to a PointerFlowEntitySummary std::unique_ptr diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp index f0602e0d5550f..ccd5eef377d2d 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp @@ -74,3 +74,7 @@ TUSummaryExtractor::addEntityForReturn(const FunctionDecl *FD) { return std::nullopt; return SummaryBuilder.addEntity(*Name, getLinkageForDecl(FD)); } + +const SSAFOptions &TUSummaryExtractor::getOptions() const { + return SummaryBuilder.getOptions(); +} diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp index b4b3e85386428..4f290ccac3d16 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -106,9 +106,12 @@ class TUSummaryRunner final : public MultiplexConsumer { void HandleTranslationUnit(ASTContext &Ctx) override; TUSummary Summary; - TUSummaryBuilder Builder = TUSummaryBuilder(Summary); - std::unique_ptr Format; + + /// Owned by the \c CompilerInstance. const SSAFOptions &Opts; + + TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); + std::unique_ptr Format; }; } // namespace @@ -141,7 +144,7 @@ TUSummaryRunner::TUSummaryRunner(llvm::Triple TargetTriple, Summary(std::move(TargetTriple), BuildNamespace(BuildNamespaceKind::CompilationUnit, Opts.CompilationUnitId)), - Format(std::move(Format)), Opts(Opts) { + Opts(Opts), Format(std::move(Format)) { assert(this->Format); assert(!Opts.CompilationUnitId.empty()); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp index 69db546fbf93d..0a3685c4f1057 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp @@ -11,6 +11,7 @@ #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" #include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" @@ -123,10 +124,11 @@ template auto hasSummaryThat(const Matchers &...Ms) { static const SummaryName CallGraphName{CallGraphSummary::Name.str()}; struct CallGraphExtractorTest : ssaf::TestFixture { + SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder = TUSummaryBuilder(Summary); + TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); /// Creates the AST and extractor, then extracts the summaries from the AST. /// This will update the \c AST \c Builder and \c Summary data members. diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp index 44465a59d4cfd..49e7bdc21738b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp @@ -13,6 +13,7 @@ #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/Frontend/ASTUnit.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -146,6 +147,7 @@ struct EPLPair { class PointerFlowTest : public TestFixture { protected: + SSAFOptions Opts; TUSummary TUSum; TUSummaryBuilder Builder; std::unique_ptr Extractor; @@ -154,7 +156,7 @@ class PointerFlowTest : public TestFixture { PointerFlowTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum), Extractor(nullptr) {} + Builder(TUSum, Opts), Extractor(nullptr) {} template Extractor; @@ -43,7 +45,7 @@ class UnsafeBufferUsageTest : public TestFixture { UnsafeBufferUsageTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum) {} + Builder(TUSum, Opts) {} bool setUpTest(StringRef Code) { AST = tooling::buildASTFromCodeWithArgs( diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp index fd6ad83225e10..2294c045d554b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp @@ -8,6 +8,7 @@ #include "MockTUSummaryBuilder.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" #include "clang/Tooling/Tooling.h" @@ -46,8 +47,9 @@ TEST(SummaryExtractorRegistryTest, EnumeratingRegistryEntries) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor1", FakeBuilder); @@ -60,8 +62,9 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor2", FakeBuilder); @@ -74,8 +77,9 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { } TEST(SummaryExtractorRegistryTest, InvokingExtractors) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); std::vector> Consumers; for (std::string Name : {"MockSummaryExtractor1", "MockSummaryExtractor2"}) { auto Consumer = makeTUSummaryExtractor(Name, FakeBuilder); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp index ffcf068ce6956..55c5c781e42d9 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp @@ -9,6 +9,7 @@ #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" #include "FindDecl.h" #include "TestFixture.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" @@ -89,10 +90,11 @@ void PrintTo(const MockSummaryData3 &S, std::ostream *OS) { } struct TUSummaryBuilderTest : ssaf::TestFixture { + SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder{Summary}; + TUSummaryBuilder Builder{Summary, Opts}; TUSummaryExtractor Extractor{Builder}; [[nodiscard]] EntityId addTestEntity(llvm::StringRef USR) { From bb6f3eae146fcb12a30a3298b248b5f0cc444835 Mon Sep 17 00:00:00 2001 From: Yuxuan Shui Date: Tue, 23 Jun 2026 08:12:47 +0100 Subject: [PATCH 130/511] [symbolizer] Add a --pdb option. (#171053) Closes #142490 --- llvm/docs/CommandGuide/llvm-symbolizer.rst | 5 ++ .../llvm/DebugInfo/Symbolize/Symbolize.h | 1 + llvm/lib/DebugInfo/Symbolize/Symbolize.cpp | 88 +++++++++++-------- llvm/test/tools/llvm-symbolizer/pdb/pdb.test | 6 ++ llvm/tools/llvm-symbolizer/Opts.td | 1 + .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 1 + 6 files changed, 65 insertions(+), 37 deletions(-) diff --git a/llvm/docs/CommandGuide/llvm-symbolizer.rst b/llvm/docs/CommandGuide/llvm-symbolizer.rst index cb5127e9f4e1c..e56a7f209d196 100644 --- a/llvm/docs/CommandGuide/llvm-symbolizer.rst +++ b/llvm/docs/CommandGuide/llvm-symbolizer.rst @@ -442,6 +442,11 @@ OPTIONS } ] +.. option:: --pdb + + Use the specified PDB file at ````, overriding the PDB info + contained in the COFF object. + .. option:: --pretty-print, -p Print human-readable output. If :option:`--inlining` is specified, the diff --git a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h index 4fd7462d52ceb..b1fad68834332 100644 --- a/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h +++ b/llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h @@ -66,6 +66,7 @@ class LLVMSymbolizer { std::vector DsymHints; std::string FallbackDebugPath; std::string DWPName; + std::string PDBName; std::vector DebugFileDirectory; std::vector GsymFileDirectory; size_t MaxCacheSize = diff --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp index c2e15d6b3def4..b9ab047fb1b76 100644 --- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp +++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp @@ -770,46 +770,60 @@ LLVMSymbolizer::getOrCreateModuleInfo(StringRef ModuleName) { ObjectPair Objects = ObjectsOrErr.get(); std::unique_ptr Context; - // If this is a COFF object containing PDB info and not containing DWARF - // section, use a PDBContext to symbolize. Otherwise, use DWARF. - // Create a DIContext to symbolize as follows: - // - If there is a GSYM file, create a GsymContext. - // - Otherwise, if this is a COFF object containing PDB info, create a - // PDBContext. - // - Otherwise, create a DWARFContext. - const auto GsymFile = lookUpGsymFile(BinaryName.str()); - if (!GsymFile.empty()) { - auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile); - - if (ReaderOrErr) - Context = std::make_unique(std::move(*ReaderOrErr)); + pdb::PDB_ReaderType ReaderType = + Opts.UseDIA ? pdb::PDB_ReaderType::DIA : pdb::PDB_ReaderType::Native; + const auto *CoffObject = dyn_cast(Objects.first); + + // First, if the user specified a pdb file on the command line, use that. + if (CoffObject && !Opts.PDBName.empty()) { + using namespace pdb; + std::unique_ptr Session; + if (auto Err = loadDataForPDB(ReaderType, Opts.PDBName, Session)) { + Modules.emplace(ModuleName, std::unique_ptr()); + return createFileError(Opts.PDBName, std::move(Err)); + } + Context.reset(new PDBContext(*CoffObject, std::move(Session))); } + if (!Context) { - if (auto CoffObject = dyn_cast(Objects.first)) { - const codeview::DebugInfo *DebugInfo; - StringRef PDBFileName; - auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); - // Use DWARF if there're DWARF sections. - bool HasDwarf = llvm::any_of( - Objects.first->sections(), [](SectionRef Section) -> bool { - if (Expected SectionName = Section.getName()) - return SectionName.get() == ".debug_info"; - return false; - }); - if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { - using namespace pdb; - std::unique_ptr Session; - - PDB_ReaderType ReaderType = - Opts.UseDIA ? PDB_ReaderType::DIA : PDB_ReaderType::Native; - if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), - Session)) { - Modules.emplace(ModuleName, std::unique_ptr()); - // Return along the PDB filename to provide more context - return createFileError(PDBFileName, std::move(Err)); - } - Context.reset(new PDBContext(*CoffObject, std::move(Session))); + // If this is a COFF object containing PDB info and not containing DWARF + // section, use a PDBContext to symbolize. Otherwise, use DWARF. + // Create a DIContext to symbolize as follows: + // - If there is a GSYM file, create a GsymContext. + // - Otherwise, if this is a COFF object containing PDB info, create a + // PDBContext. + // - Otherwise, create a DWARFContext. + const auto GsymFile = lookUpGsymFile(BinaryName.str()); + if (!GsymFile.empty()) { + auto ReaderOrErr = gsym::GsymReader::openFile(GsymFile); + + if (ReaderOrErr) + Context = std::make_unique(std::move(*ReaderOrErr)); + } + } + + if (!Context && CoffObject) { + const codeview::DebugInfo *DebugInfo; + StringRef PDBFileName; + auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName); + // Use DWARF if there're DWARF sections. + bool HasDwarf = + llvm::any_of(Objects.first->sections(), [](SectionRef Section) -> bool { + if (Expected SectionName = Section.getName()) + return SectionName.get() == ".debug_info"; + return false; + }); + if (!EC && !HasDwarf && DebugInfo != nullptr && !PDBFileName.empty()) { + using namespace pdb; + std::unique_ptr Session; + + if (auto Err = loadDataForEXE(ReaderType, Objects.first->getFileName(), + Session)) { + Modules.emplace(ModuleName, std::unique_ptr()); + // Return along the PDB filename to provide more context + return createFileError(PDBFileName, std::move(Err)); } + Context.reset(new PDBContext(*CoffObject, std::move(Session))); } } if (!Context) diff --git a/llvm/test/tools/llvm-symbolizer/pdb/pdb.test b/llvm/test/tools/llvm-symbolizer/pdb/pdb.test index 46a1ae9814e6d..7ac9f268763cd 100644 --- a/llvm/test/tools/llvm-symbolizer/pdb/pdb.test +++ b/llvm/test/tools/llvm-symbolizer/pdb/pdb.test @@ -1,3 +1,5 @@ +RUN: rm -rf %t.exe + RUN: echo 0x401380 > %t.input RUN: echo 0x401390 >> %t.input RUN: echo 0x4013A0 >> %t.input @@ -11,6 +13,10 @@ RUN: | FileCheck %s RUN: llvm-symbolizer --obj="%p/Inputs/test.exe" --no-demangle < %t.input \ RUN: | FileCheck %s --check-prefix=CHECK-NO-DEMANGLE +RUN: cp %p/Inputs/test.exe %t.exe +RUN: llvm-symbolizer --obj="%t.exe" --pdb="%p/Inputs/test.pdb" < %t.input \ +RUN: | FileCheck %s + ; Check that -dia works RUN: llvm-symbolizer --dia --obj="%p/Inputs/test.exe" < %t.input \ RUN: | FileCheck %s diff --git a/llvm/tools/llvm-symbolizer/Opts.td b/llvm/tools/llvm-symbolizer/Opts.td index ba22cfdde6235..bb7b35c658e95 100644 --- a/llvm/tools/llvm-symbolizer/Opts.td +++ b/llvm/tools/llvm-symbolizer/Opts.td @@ -40,6 +40,7 @@ def functions_EQ : Joined<["--"], "functions=">, HelpText<"Print function name f defm gsym_file_directory : Eq<"gsym-file-directory", "Path to directory where to look for GSYM files">, MetaVarName<"">, Group; def help : F<"help", "Display this help">; defm dwp : Eq<"dwp", "Path to DWP file to be use for any split CUs">, MetaVarName<"">; +defm pdb : Eq<"pdb", "Path to PDB file">, MetaVarName<"">; defm dsym_hint : Eq<"dsym-hint", "Path to .dSYM bundles to search for debug info for the object files">, diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp index 4a75e14c6dd7a..0496df84fc8eb 100644 --- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp +++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp @@ -496,6 +496,7 @@ int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) { Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str(); Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line); Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str(); + Opts.PDBName = Args.getLastArgValue(OPT_pdb_EQ).str(); Opts.FallbackDebugPath = Args.getLastArgValue(OPT_fallback_debug_path_EQ).str(); Opts.GsymFileDirectory = Args.getAllArgValues(OPT_gsym_file_directory_EQ); From d56f0dae2d5b64a6d692bf3fa85c56326e18202f Mon Sep 17 00:00:00 2001 From: adalal-2441 Date: Tue, 23 Jun 2026 13:01:11 +0530 Subject: [PATCH 131/511] [X86] Prevent folding of volatile scalar loads into masked loads in selects (#205103) X86 select patterns were folding scalar FP loads into AVX-512 masked loads. Since masked loads suppress memory access when the mask is 0, this can incorrectly eliminate the observable access of volatile loads, leading to miscompilation. Non-volatile loads are unaffected. Multi-use loads already avoid folding, since folding consumes the load into the instruction's memory operand and leaves no value for the other users, forcing it to be materialized into a register. Single-use volatile loads did not, and this must also be prevented, as volatile loads are required to always perform their memory access. Fix this by using the isSimple()-guarded simple_load pattern instead of loadf32/loadf64, ensuring volatile loads are not folded. Found via @jlebar's X86 LLVM bug hunt / FuzzX effort: https://github.com/SemiAnalysisAI/FuzzX/blob/master/x86/bugs/093-avx512-vmovs-x86selects-load-fold-mask-suppress --- llvm/lib/Target/X86/X86InstrAVX512.td | 8 +-- llvm/test/CodeGen/X86/avx512-load-store.ll | 60 ++++++++++++++++++++++ 2 files changed, 64 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 44782de2b4648..2e17e164f2dce 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4319,12 +4319,12 @@ def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrrkz VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), (v4f32 (COPY_TO_REGCLASS FR32X:$src1, VR128X)))), FR32X)>; -def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), (f32 FR32X:$src0))), +def : Pat<(f32 (X86selects VK1WM:$mask, (f32 (simple_load addr:$src)), (f32 FR32X:$src0))), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmk (v4f32 (COPY_TO_REGCLASS FR32X:$src0, VR128X)), VK1WM:$mask, addr:$src)), FR32X)>; -def : Pat<(f32 (X86selects VK1WM:$mask, (loadf32 addr:$src), fp32imm0)), +def : Pat<(f32 (X86selects VK1WM:$mask, (f32 (simple_load addr:$src)), fp32imm0)), (COPY_TO_REGCLASS (v4f32 (VMOVSSZrmkz VK1WM:$mask, addr:$src)), FR32X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), @@ -4337,12 +4337,12 @@ def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrrkz VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (v2f64 (COPY_TO_REGCLASS FR64X:$src1, VR128X)))), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), (f64 FR64X:$src0))), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 (simple_load addr:$src)), (f64 FR64X:$src0))), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmk (v2f64 (COPY_TO_REGCLASS FR64X:$src0, VR128X)), VK1WM:$mask, addr:$src)), FR64X)>; -def : Pat<(f64 (X86selects VK1WM:$mask, (loadf64 addr:$src), fp64imm0)), +def : Pat<(f64 (X86selects VK1WM:$mask, (f64 (simple_load addr:$src)), fp64imm0)), (COPY_TO_REGCLASS (v2f64 (VMOVSDZrmkz VK1WM:$mask, addr:$src)), FR64X)>; diff --git a/llvm/test/CodeGen/X86/avx512-load-store.ll b/llvm/test/CodeGen/X86/avx512-load-store.ll index 52cffd38def78..f0677985621e1 100644 --- a/llvm/test/CodeGen/X86/avx512-load-store.ll +++ b/llvm/test/CodeGen/X86/avx512-load-store.ll @@ -588,3 +588,63 @@ define <80 x i32> @test_maskz_load_v80i32(ptr %p, <80 x i1> %mask) nounwind { %r = call <80 x i32> @llvm.masked.load.v80i32.p0(ptr %p, <80 x i1> %mask, <80 x i32> zeroinitializer) ret <80 x i32> %r } + +; A volatile scalar load must NOT be folded into the masked-load form: doing so +; would conditionally elide an observable memory access when the mask is 0. The +; load must remain unconditional, with a register-form masked move for the select. + +define float @no_fold_volatile_load_f32(i1 %c, ptr %p, float %s0) nounwind { +; CHECK64-LABEL: no_fold_volatile_load_f32: +; CHECK64: # %bb.0: +; CHECK64-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: no_fold_volatile_load_f32: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %eax +; CHECK32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK32-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; CHECK32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1} +; CHECK32-NEXT: vmovss %xmm0, (%esp) +; CHECK32-NEXT: flds (%esp) +; CHECK32-NEXT: popl %eax +; CHECK32-NEXT: retl + %ld = load volatile float, ptr %p + %r = select i1 %c, float %ld, float %s0 + ret float %r +} + +define double @no_fold_volatile_load_f64(i1 %c, ptr %p, double %s0) nounwind { +; CHECK64-LABEL: no_fold_volatile_load_f64: +; CHECK64: # %bb.0: +; CHECK64-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK64-NEXT: kmovw %edi, %k1 +; CHECK64-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; CHECK64-NEXT: retq +; +; CHECK32-LABEL: no_fold_volatile_load_f64: +; CHECK32: # %bb.0: +; CHECK32-NEXT: pushl %ebp +; CHECK32-NEXT: movl %esp, %ebp +; CHECK32-NEXT: andl $-8, %esp +; CHECK32-NEXT: subl $8, %esp +; CHECK32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK32-NEXT: movzbl 8(%ebp), %eax +; CHECK32-NEXT: movl 12(%ebp), %ecx +; CHECK32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK32-NEXT: kmovw %eax, %k1 +; CHECK32-NEXT: vmovsd %xmm1, %xmm0, %xmm0 {%k1} +; CHECK32-NEXT: vmovsd %xmm0, (%esp) +; CHECK32-NEXT: fldl (%esp) +; CHECK32-NEXT: movl %ebp, %esp +; CHECK32-NEXT: popl %ebp +; CHECK32-NEXT: retl + %ld = load volatile double, ptr %p + %r = select i1 %c, double %ld, double %s0 + ret double %r +} From 85dd29fafaa5d287dea7836cd132fad21148f9ab Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 23 Jun 2026 09:32:45 +0200 Subject: [PATCH 132/511] Revert "[libc] Introduce the ioctl syscall wrapper and port all callers" (#205277) Reverts llvm/llvm-project#204640 Breaks libc-x86_64-debian-fullbuild. Reverting while I investigate. --- .../linux/syscall_wrappers/CMakeLists.txt | 13 ----- .../OSUtil/linux/syscall_wrappers/ioctl.h | 49 ------------------- libc/src/sys/ioctl/linux/CMakeLists.txt | 3 +- libc/src/sys/ioctl/linux/ioctl.cpp | 15 +++--- libc/src/termios/linux/CMakeLists.txt | 21 +++++--- libc/src/termios/linux/tcdrain.cpp | 9 ++-- libc/src/termios/linux/tcflow.cpp | 9 ++-- libc/src/termios/linux/tcflush.cpp | 10 ++-- libc/src/termios/linux/tcgetattr.cpp | 12 +++-- libc/src/termios/linux/tcgetsid.cpp | 9 ++-- libc/src/termios/linux/tcsendbreak.cpp | 9 ++-- libc/src/termios/linux/tcsetattr.cpp | 12 +++-- libc/src/unistd/linux/CMakeLists.txt | 5 +- libc/src/unistd/linux/isatty.cpp | 13 +++-- 14 files changed, 76 insertions(+), 113 deletions(-) delete mode 100644 libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt index f3282c315d9a9..a52be9676a3ca 100644 --- a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt @@ -706,19 +706,6 @@ add_header_library( libc.include.sys_syscall ) -add_header_library( - ioctl - HDRS - ioctl.h - DEPENDS - libc.src.__support.CPP.type_traits - libc.src.__support.OSUtil.osutil - libc.src.__support.error_or - libc.src.__support.macros.attributes - libc.src.__support.macros.config - libc.include.sys_syscall -) - add_header_library( readlink HDRS diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h b/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h deleted file mode 100644 index 9410bbbcad8cd..0000000000000 --- a/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h +++ /dev/null @@ -1,49 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Syscall wrapper for ioctl. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H -#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H - -#include "src/__support/CPP/type_traits/enable_if.h" -#include "src/__support/CPP/type_traits/is_integral.h" -#include "src/__support/CPP/type_traits/is_null_pointer.h" -#include "src/__support/CPP/type_traits/is_pointer.h" -#include "src/__support/OSUtil/linux/syscall.h" // syscall_checked -#include "src/__support/error_or.h" -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/config.h" -#include // For syscall numbers - -namespace LIBC_NAMESPACE_DECL { -namespace linux_syscalls { - -struct IoctlArg { - unsigned long val; - - LIBC_INLINE IoctlArg(const void *ptr) - : val(reinterpret_cast(ptr)) {} - - template , int> = 0> - LIBC_INLINE constexpr IoctlArg(T num = 0) - : val(static_cast(num)) {} -}; - -LIBC_INLINE ErrorOr ioctl(int fd, unsigned long request, - IoctlArg arg = 0) { - return syscall_checked(SYS_ioctl, fd, request, arg.val); -} - -} // namespace linux_syscalls -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H diff --git a/libc/src/sys/ioctl/linux/CMakeLists.txt b/libc/src/sys/ioctl/linux/CMakeLists.txt index 33ff2d4dce214..876f35aaee66c 100644 --- a/libc/src/sys/ioctl/linux/CMakeLists.txt +++ b/libc/src/sys/ioctl/linux/CMakeLists.txt @@ -6,6 +6,7 @@ add_entrypoint_object( ../ioctl.h DEPENDS libc.include.sys_ioctl - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/sys/ioctl/linux/ioctl.cpp b/libc/src/sys/ioctl/linux/ioctl.cpp index ec861ffc03ec6..9bb669c6a6f66 100644 --- a/libc/src/sys/ioctl/linux/ioctl.cpp +++ b/libc/src/sys/ioctl/linux/ioctl.cpp @@ -8,10 +8,11 @@ #include "src/sys/ioctl/ioctl.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include +#include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -19,14 +20,16 @@ LLVM_LIBC_FUNCTION(int, ioctl, (int fd, unsigned long request, ...)) { va_list vargs; va_start(vargs, request); void *data_pointer = va_arg(vargs, void *); + int ret = + LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, request, data_pointer); va_end(vargs); - auto ret = linux_syscalls::ioctl(fd, request, data_pointer); + // Some ioctls can be expected to return positive values + if (ret >= 0) + return ret; - if (ret.has_value()) - return ret.value(); - - libc_errno = ret.error(); + // If there is an error, errno is set and -1 is returned. + libc_errno = -ret; return -1; } diff --git a/libc/src/termios/linux/CMakeLists.txt b/libc/src/termios/linux/CMakeLists.txt index 5d5440ae69266..e990fba25eabe 100644 --- a/libc/src/termios/linux/CMakeLists.txt +++ b/libc/src/termios/linux/CMakeLists.txt @@ -51,8 +51,9 @@ add_entrypoint_object( HDRS ../tcgetsid.h DEPENDS + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -63,8 +64,9 @@ add_entrypoint_object( HDRS ../tcdrain.h DEPENDS + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -75,8 +77,9 @@ add_entrypoint_object( HDRS ../tcflush.h DEPENDS + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -87,8 +90,9 @@ add_entrypoint_object( HDRS ../tcflow.h DEPENDS + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -99,8 +103,9 @@ add_entrypoint_object( HDRS ../tcsendbreak.h DEPENDS + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -118,8 +123,9 @@ add_entrypoint_object( ../tcgetattr.h DEPENDS .kernel_termios + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) @@ -131,7 +137,8 @@ add_entrypoint_object( ../tcsetattr.h DEPENDS .kernel_termios + libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/termios/linux/tcdrain.cpp b/libc/src/termios/linux/tcdrain.cpp index 4fce89d65a76f..570b15c24fe7f 100644 --- a/libc/src/termios/linux/tcdrain.cpp +++ b/libc/src/termios/linux/tcdrain.cpp @@ -8,20 +8,21 @@ #include "src/termios/tcdrain.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcdrain, (int fd)) { - auto ret = linux_syscalls::ioctl(fd, TCSBRK, 1); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 1); + if (ret < 0) { + libc_errno = -ret; return -1; } return 0; diff --git a/libc/src/termios/linux/tcflow.cpp b/libc/src/termios/linux/tcflow.cpp index 4ffd294997ad4..714ef6aa71298 100644 --- a/libc/src/termios/linux/tcflow.cpp +++ b/libc/src/termios/linux/tcflow.cpp @@ -8,20 +8,21 @@ #include "src/termios/tcflow.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflow, (int fd, int action)) { - auto ret = linux_syscalls::ioctl(fd, TCXONC, action); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCXONC, action); + if (ret < 0) { + libc_errno = -ret; return -1; } return 0; diff --git a/libc/src/termios/linux/tcflush.cpp b/libc/src/termios/linux/tcflush.cpp index 8a4676d97454a..4c7b9fadc446d 100644 --- a/libc/src/termios/linux/tcflush.cpp +++ b/libc/src/termios/linux/tcflush.cpp @@ -8,20 +8,22 @@ #include "src/termios/tcflush.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflush, (int fd, int queue_selector)) { - auto ret = linux_syscalls::ioctl(fd, TCFLSH, queue_selector); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = + LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCFLSH, queue_selector); + if (ret < 0) { + libc_errno = -ret; return -1; } return 0; diff --git a/libc/src/termios/linux/tcgetattr.cpp b/libc/src/termios/linux/tcgetattr.cpp index 0569be4ae588f..2e768269c874d 100644 --- a/libc/src/termios/linux/tcgetattr.cpp +++ b/libc/src/termios/linux/tcgetattr.cpp @@ -7,22 +7,24 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcgetattr.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "kernel_termios.h" + +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcgetattr, (int fd, struct termios *t)) { LIBC_NAMESPACE::kernel_termios kt; - auto ret = linux_syscalls::ioctl(fd, TCGETS, &kt); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCGETS, &kt); + if (ret < 0) { + libc_errno = -ret; return -1; } t->c_iflag = kt.c_iflag; diff --git a/libc/src/termios/linux/tcgetsid.cpp b/libc/src/termios/linux/tcgetsid.cpp index 428ca83fb2ad0..7487816cf2741 100644 --- a/libc/src/termios/linux/tcgetsid.cpp +++ b/libc/src/termios/linux/tcgetsid.cpp @@ -8,21 +8,22 @@ #include "src/termios/tcgetsid.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(pid_t, tcgetsid, (int fd)) { pid_t sid; - auto ret = linux_syscalls::ioctl(fd, TIOCGSID, &sid); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGSID, &sid); + if (ret < 0) { + libc_errno = -ret; return -1; } return sid; diff --git a/libc/src/termios/linux/tcsendbreak.cpp b/libc/src/termios/linux/tcsendbreak.cpp index e91ec7b748582..1d546c1d5953e 100644 --- a/libc/src/termios/linux/tcsendbreak.cpp +++ b/libc/src/termios/linux/tcsendbreak.cpp @@ -8,12 +8,13 @@ #include "src/termios/tcsendbreak.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -22,9 +23,9 @@ LLVM_LIBC_FUNCTION(pid_t, tcsendbreak, (int fd, int /* unused duration */)) { // POSIX leaves the behavior for non-zero duration implementation dependent. // Which means that the behavior can be the same as it is when duration is // zero. So, we just pass zero to the syscall. - auto ret = linux_syscalls::ioctl(fd, TCSBRK, 0); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 0); + if (ret < 0) { + libc_errno = -ret; return -1; } return 0; diff --git a/libc/src/termios/linux/tcsetattr.cpp b/libc/src/termios/linux/tcsetattr.cpp index b2f08d078ee83..8a2c7290217ba 100644 --- a/libc/src/termios/linux/tcsetattr.cpp +++ b/libc/src/termios/linux/tcsetattr.cpp @@ -7,13 +7,15 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcsetattr.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "kernel_termios.h" + +#include "src/__support/OSUtil/syscall.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. +#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -50,9 +52,9 @@ LLVM_LIBC_FUNCTION(int, tcsetattr, kt.c_cc[i] = 0; } - auto ret = linux_syscalls::ioctl(fd, cmd, &kt); - if (!ret.has_value()) { - libc_errno = ret.error(); + int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, cmd, &kt); + if (ret < 0) { + libc_errno = -ret; return -1; } return 0; diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index af385e9bbed72..04ccde414cd2f 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -342,10 +342,11 @@ add_entrypoint_object( HDRS ../isatty.h DEPENDS - libc.hdr.unistd_macros + libc.hdr.fcntl_macros libc.include.unistd libc.include.sys_ioctl - libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl + libc.include.sys_syscall + libc.src.__support.OSUtil.osutil libc.src.errno.errno ) diff --git a/libc/src/unistd/linux/isatty.cpp b/libc/src/unistd/linux/isatty.cpp index 4418feb2229a3..a4d17912b57b0 100644 --- a/libc/src/unistd/linux/isatty.cpp +++ b/libc/src/unistd/linux/isatty.cpp @@ -8,11 +8,13 @@ #include "src/unistd/isatty.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" +#include "src/__support/OSUtil/syscall.h" // For internal syscall function. #include "src/__support/common.h" + #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include // For ioctl numbers. +#include // For ioctl numbers. +#include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -21,11 +23,12 @@ LLVM_LIBC_FUNCTION(int, isatty, (int fd)) { int line_d_val = INIT_VAL; // This gets the line dicipline of the terminal. When called on something that // isn't a terminal it doesn't change line_d_val and returns -1. - auto result = linux_syscalls::ioctl(fd, TIOCGETD, &line_d_val); - if (result.has_value()) + int result = + LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGETD, &line_d_val); + if (result == 0) return 1; - libc_errno = result.error(); + libc_errno = -result; return 0; } From 0a88184a3b8f15780c6a2c60b7051c5bfe230dec Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 23 Jun 2026 00:33:44 -0700 Subject: [PATCH 133/511] [AMDGPU] Fold v2{i|f}64 immediates (#205195) --- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 2 + llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir | 233 +++++++++++++++++- llvm/test/CodeGen/AMDGPU/packed-fp64.ll | 250 ++++++-------------- llvm/test/CodeGen/AMDGPU/packed-u64.ll | 214 +++++------------ llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll | 14 +- 5 files changed, 360 insertions(+), 353 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp index 76944081477de..1affca363ca7d 100644 --- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp +++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp @@ -1148,7 +1148,9 @@ bool SIFoldOperandsImpl::tryFoldRegSeqSplat( break; case AMDGPU::OPERAND_REG_INLINE_AC_FP64: case AMDGPU::OPERAND_REG_INLINE_C_FP64: + case AMDGPU::OPERAND_REG_IMM_V2FP64: case AMDGPU::OPERAND_REG_INLINE_C_INT64: + case AMDGPU::OPERAND_REG_IMM_V2INT64: OpRC = TRI->getSubRegisterClass(OpRC, AMDGPU::sub0_sub1); break; default: diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir index f2e9c2f96183a..e169590261792 100644 --- a/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir +++ b/llvm/test/CodeGen/AMDGPU/fold-imm-pk64.mir @@ -3,6 +3,7 @@ --- name: pk_add_f64_no_fold_imm +tracksRegLiveness: true body: | bb.0: ; GFX1251-LABEL: name: pk_add_f64_no_fold_imm @@ -11,27 +12,251 @@ body: | ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4611686018427387904, implicit $exec ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] %0:vreg_128_align2 = IMPLICIT_DEF %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4611686018427387904, implicit $exec %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 ... -# FIXME: This should be foldable --- -name: pk_add_f64_fold_splat +name: pk_add_f64_fold_splat_1.0 +tracksRegLiveness: true body: | bb.0: - ; GFX1251-LABEL: name: pk_add_f64_fold_splat + ; GFX1251-LABEL: name: pk_add_f64_fold_splat_1.0 ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 - ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, 4607182418800017408, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] %0:vreg_128_align2 = IMPLICIT_DEF %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_f64_fold_splat_100.0 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_fold_splat_100.0 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, 4636737291354636288, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_f64_no_fold_splat_100.1 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_no_fold_splat_100.1 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_f64_no_fold_splat_100 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_no_fold_splat_100 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_f64_fold_splat_1 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_f64_fold_splat_1 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_F64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed [[DEF]], 8, 1, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_F64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_F64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_no_fold_imm +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_no_fold_imm + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 2, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_fold_splat_1 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_fold_splat_1 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, 1, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 1, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_fold_splat_100 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_fold_splat_100 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, 100, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 100, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_no_fold_splat_0x100000000 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_no_fold_splat_0x100000000 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4294967296, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4294967296, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4294967296, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4294967296, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_fold_splat_1.0 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_fold_splat_1.0 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, 4607182418800017408, 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4607182418800017408, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_no_fold_splat_100.0 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_no_fold_splat_100.0 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4636737291354636288, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 +... + +--- +name: pk_add_nc_u64_no_fold_splat_100.1 +tracksRegLiveness: true +body: | + bb.0: + ; GFX1251-LABEL: name: pk_add_nc_u64_no_fold_splat_100.1 + ; GFX1251: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF + ; GFX1251-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + ; GFX1251-NEXT: [[V_MOV_B1:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + ; GFX1251-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_128_align2 = REG_SEQUENCE killed [[V_MOV_B]], %subreg.sub0_sub1, killed [[V_MOV_B1]], %subreg.sub2_sub3 + ; GFX1251-NEXT: [[V_PK_ADD_NC_U64_:%[0-9]+]]:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed [[DEF]], 8, [[REG_SEQUENCE]], 0, 0, 0, implicit $mode, implicit $exec + ; GFX1251-NEXT: S_ENDPGM 0, implicit [[V_PK_ADD_NC_U64_]] + %0:vreg_128_align2 = IMPLICIT_DEF + %1:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + %2:vreg_64_align2 = V_MOV_B64_PSEUDO 4636744328229054054, implicit $exec + %3:vreg_128_align2 = REG_SEQUENCE killed %1, %subreg.sub0_sub1, killed %2, %subreg.sub2_sub3 + %4:vreg_128_align2 = nofpexcept V_PK_ADD_NC_U64 8, killed %0, 8, %3, 0, 0, 0, implicit $mode, implicit $exec + S_ENDPGM 0, implicit %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll index 4b7386a6475e7..b16a64dc59349 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll @@ -401,37 +401,17 @@ define amdgpu_kernel void @fadd_v32_vs(ptr addrspace(1) %a, <32 x double> %x) { } define amdgpu_kernel void @fadd_v2_v_imm(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fadd_v2_v_imm: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4059000000000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fadd_v2_v_imm: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x4059000000000000 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fadd_v2_v_imm: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_f64 v[0:3], 0x40590000, v[0:3] +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -467,40 +447,18 @@ define amdgpu_kernel void @fadd_v2_v_v_splat(ptr addrspace(1) %a) { ret void } -; TODO: splat literal can be folded, but it is a REG_SEQUENCE which we do not match - define amdgpu_kernel void @fadd_v2_v_lit_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fadd_v2_v_lit_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fadd_v2_v_lit_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 1.0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fadd_v2_v_lit_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_f64 v[0:3], v[0:3], 1.0 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -1267,37 +1225,17 @@ define amdgpu_kernel void @fmul_v32_vs(ptr addrspace(1) %a, <32 x double> %x) { } define amdgpu_kernel void @fmul_v2_v_imm(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fmul_v2_v_imm: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4059000000000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fmul_v2_v_imm: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x4059000000000000 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fmul_v2_v_imm: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_mul_f64 v[0:3], 0x40590000, v[0:3] +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -1334,37 +1272,17 @@ define amdgpu_kernel void @fmul_v2_v_v_splat(ptr addrspace(1) %a) { } define amdgpu_kernel void @fmul_v2_v_lit_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fmul_v2_v_lit_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 4.0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fmul_v2_v_lit_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 4.0 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fmul_v2_v_lit_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_mul_f64 v[0:3], v[0:3], 4.0 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 @@ -1841,38 +1759,32 @@ define amdgpu_kernel void @fma_v2_v_imm(ptr addrspace(1) %a) { ; GFX1251-SDAG: ; %bb.0: ; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 ; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4069000000000000 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], 0x4059000000000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x4059000000000000 +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], v[8:9] ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v12, s[0:1] scale_offset +; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[8:11], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v12, v[0:3], s[0:1] scale_offset +; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], 0x40690000 +; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; ; GFX1251-GISEL-LABEL: fma_v2_v_imm: ; GFX1251-GISEL: ; %bb.0: ; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[8:9], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x4059000000000000 -; GFX1251-GISEL-NEXT: s_wait_xcnt 0x0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[4:5], 0x4069000000000000 +; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv +; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 +; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x4069000000000000 +; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[6:7] ; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[4:5] ; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v12, s[8:9] scale_offset +; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset ; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], v[8:11] -; GFX1251-GISEL-NEXT: global_store_b128 v12, v[0:3], s[8:9] scale_offset +; GFX1251-GISEL-NEXT: v_pk_fma_f64 v[0:3], 0x40590000, v[0:3], v[4:7] +; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset ; GFX1251-GISEL-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id @@ -1910,43 +1822,17 @@ define amdgpu_kernel void @fma_v2_v_v_splat(ptr addrspace(1) %a) { } define amdgpu_kernel void @fma_v2_v_lit_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: fma_v2_v_lit_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1.0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[8:9], 4.0 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[10:11], v[8:9] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v12, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[8:11], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v12, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: fma_v2_v_lit_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[8:9], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v12, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 4.0 -; GFX1251-GISEL-NEXT: s_wait_xcnt 0x0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[4:5], 1.0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: s_mov_b64 s[6:7], s[4:5] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[10:11], s[6:7] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[8:9], s[4:5] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v12, s[8:9] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], v[8:11] -; GFX1251-GISEL-NEXT: global_store_b128 v12, v[0:3], s[8:9] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: fma_v2_v_lit_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_fma_f64 v[0:3], v[0:3], 4.0, 1.0 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x double>, ptr addrspace(1) %a, i32 %id %load = load <2 x double>, ptr addrspace(1) %gep, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/packed-u64.ll b/llvm/test/CodeGen/AMDGPU/packed-u64.ll index 2ae10b67a5d62..77e45fb1560e3 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-u64.ll @@ -401,37 +401,17 @@ define amdgpu_kernel void @add_v32_vs(ptr addrspace(1) %a, <32 x i64> %x) { } define amdgpu_kernel void @add_v2_v_imm(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: add_v2_v_imm: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: add_v2_v_imm: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x64 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: add_v2_v_imm: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_nc_u64 v[0:3], 0x64, v[0:3] +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -467,40 +447,18 @@ define amdgpu_kernel void @add_v2_v_v_splat(ptr addrspace(1) %a) { ret void } -; TODO: splat literal can be folded, but it is a REG_SEQUENCE which we do not match - define amdgpu_kernel void @add_v2_v_lit_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: add_v2_v_lit_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: add_v2_v_lit_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 1 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: add_v2_v_lit_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], 1 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -1026,37 +984,17 @@ define amdgpu_kernel void @sub_v32_vs(ptr addrspace(1) %a, <32 x i64> %x) { } define amdgpu_kernel void @sub_v2_v_imm(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: sub_v2_v_imm: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: sub_v2_v_imm: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x64 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: sub_v2_v_imm: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], 0x64 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -1066,37 +1004,17 @@ define amdgpu_kernel void @sub_v2_v_imm(ptr addrspace(1) %a) { } define amdgpu_kernel void @sub_v2_imm_v(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: sub_v2_imm_v: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 0x64 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[0:3], v[4:7], v[0:3] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: sub_v2_imm_v: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 0x64 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_sub_nc_u64 v[0:3], v[4:7], v[0:3] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: sub_v2_imm_v: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_sub_nc_u64 v[0:3], 0x64, v[0:3] +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 @@ -1132,40 +1050,18 @@ define amdgpu_kernel void @sub_v2_v_v_splat(ptr addrspace(1) %a) { ret void } -; TODO: splat literal can be folded, but it is a REG_SEQUENCE which we do not match - define amdgpu_kernel void @sub_v2_v_lit_splat(ptr addrspace(1) %a) { -; GFX1251-SDAG-LABEL: sub_v2_v_lit_splat: -; GFX1251-SDAG: ; %bb.0: -; GFX1251-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv -; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], 1 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], v[4:5] -; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 -; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_endpgm -; -; GFX1251-GISEL-LABEL: sub_v2_v_lit_splat: -; GFX1251-GISEL: ; %bb.0: -; GFX1251-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 -; GFX1251-GISEL-NEXT: s_load_b64 s[6:7], s[4:5], 0x24 nv -; GFX1251-GISEL-NEXT: v_and_b32_e32 v8, 0x3ff, v0 -; GFX1251-GISEL-NEXT: s_mov_b64 s[0:1], 1 -; GFX1251-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX1251-GISEL-NEXT: s_mov_b64 s[2:3], s[0:1] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-GISEL-NEXT: v_mov_b64_e32 v[4:5], s[0:1] -; GFX1251-GISEL-NEXT: s_wait_kmcnt 0x0 -; GFX1251-GISEL-NEXT: global_load_b128 v[0:3], v8, s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_wait_loadcnt 0x0 -; GFX1251-GISEL-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], v[4:7] -; GFX1251-GISEL-NEXT: global_store_b128 v8, v[0:3], s[6:7] scale_offset -; GFX1251-GISEL-NEXT: s_endpgm +; GFX1251-LABEL: sub_v2_v_lit_splat: +; GFX1251: ; %bb.0: +; GFX1251-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; msbs: dst=0 src0=0 src1=0 src2=0 +; GFX1251-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 nv +; GFX1251-NEXT: v_and_b32_e32 v4, 0x3ff, v0 +; GFX1251-NEXT: s_wait_kmcnt 0x0 +; GFX1251-NEXT: global_load_b128 v[0:3], v4, s[0:1] scale_offset +; GFX1251-NEXT: s_wait_loadcnt 0x0 +; GFX1251-NEXT: v_pk_sub_nc_u64 v[0:3], v[0:3], 1 +; GFX1251-NEXT: global_store_b128 v4, v[0:3], s[0:1] scale_offset +; GFX1251-NEXT: s_endpgm %id = tail call i32 @llvm.amdgcn.workitem.id.x() %gep = getelementptr inbounds <2 x i64>, ptr addrspace(1) %a, i32 %id %load = load <2 x i64>, ptr addrspace(1) %gep, align 8 diff --git a/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll b/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll index ee7e164057938..c4d2541e7fcf8 100644 --- a/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll +++ b/llvm/test/CodeGen/AMDGPU/pk-lshl-add-u64.ll @@ -147,14 +147,12 @@ define i32 @pk_lshl_add_u64_maybe_oob(<2 x ptr> %p, <2 x i32> %i) { ; GFX1251-NEXT: s_wait_kmcnt 0x0 ; GFX1251-NEXT: v_dual_mov_b32 v6, v5 :: v_dual_ashrrev_i32 v5, 31, v4 ; GFX1251-NEXT: s_mov_b32 s0, 2 -; GFX1251-NEXT: v_mov_b64_e32 v[8:9], 12 -; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX1251-NEXT: v_dual_mov_b32 v13, s0 :: v_dual_ashrrev_i32 v7, 31, v6 -; GFX1251-NEXT: v_mov_b32_e32 v12, s0 -; GFX1251-NEXT: v_mov_b64_e32 v[10:11], v[8:9] -; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX1251-NEXT: v_pk_lshl_add_u64 v[0:3], v[4:7], v[12:13], v[0:3] -; GFX1251-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], v[8:11] +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX1251-NEXT: v_dual_mov_b32 v9, s0 :: v_dual_ashrrev_i32 v7, 31, v6 +; GFX1251-NEXT: v_mov_b32_e32 v8, s0 +; GFX1251-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX1251-NEXT: v_pk_lshl_add_u64 v[0:3], v[4:7], v[8:9], v[0:3] +; GFX1251-NEXT: v_pk_add_nc_u64 v[0:3], v[0:3], 12 ; GFX1251-NEXT: flat_load_b32 v4, v[0:1] ; GFX1251-NEXT: flat_load_b32 v5, v[2:3] ; GFX1251-NEXT: s_wait_loadcnt_dscnt 0x0 From 9a23d6aeb58ac64eaee816315b49ff653d2af609 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 09:39:58 +0200 Subject: [PATCH 134/511] Revert some SSAF patches (#205279) I've started seeing some failures on Windows permissive bots. I'll revert my patches for now until further investigation. errors: https://lab.llvm.org/buildbot/#/builders/107/builds/20548 ``` C:\b\slave\sanitizer-windows\llvm-project\clang\lib\Frontend\CompilerInvocation.cpp C:\b\slave\sanitizer-windows\build\tools\clang\include\clang/Options/Options.inc(9981): error C2065: 'SSAFOpts': undeclared identifier C:\b\slave\sanitizer-windows\build\tools\clang\include\clang/Options/Options.inc(9982): note: see reference to function template instantiation 'auto GenerateSSAFArgs::::operator ()(const T &) const' being compiled with [ T=std::vector> ] ``` Revert "Reland "[clang][ssaf][NFC] Move SSAF flags from FrontendOptions to a dedicated SSAFOptions" (#204798)" This reverts commit 194da0f3e774426dfa646e9879a1ad109db06c6c. Revert "[clang][ssaf][NFC] Make SSAFOptions available in Builders and Extractors (#204684)" This reverts commit ffeb9c1d2612cb5729889b6e82ade060192722dd. --- .../include/clang/Frontend/CompilerInstance.h | 5 -- .../clang/Frontend/CompilerInvocation.h | 25 +++------ .../include/clang/Frontend/FrontendOptions.h | 24 ++++++++- clang/include/clang/Frontend/SSAFOptions.h | 52 ------------------ clang/include/clang/Options/Options.td | 12 ++--- .../Core/TUSummary/TUSummaryBuilder.h | 8 +-- .../Core/TUSummary/TUSummaryExtractor.h | 4 -- clang/lib/Frontend/CompilerInvocation.cpp | 32 +---------- .../ExecuteCompilerInvocation.cpp | 3 +- .../PointerFlow/PointerFlowExtractor.cpp | 3 +- .../Core/TUSummary/TUSummaryExtractor.cpp | 4 -- .../TUSummaryExtractorFrontendAction.cpp | 32 +++++------ .../CallGraph/CallGraphExtractorTest.cpp | 4 +- .../Analyses/PointerFlow/PointerFlowTest.cpp | 4 +- .../UnsafeBufferUsageTest.cpp | 4 +- .../TUSummaryExtractorFrontendActionTest.cpp | 53 +++++++++---------- .../SummaryExtractorRegistryTest.cpp | 10 ++-- .../TUSummaryBuilderTest.cpp | 4 +- 18 files changed, 87 insertions(+), 196 deletions(-) delete mode 100644 clang/include/clang/Frontend/SSAFOptions.h diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index 24488e053c628..bb0eddb918623 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -317,11 +317,6 @@ class CompilerInstance : public ModuleLoader { return Invocation->getFrontendOpts(); } - ssaf::SSAFOptions &getSSAFOpts() { return Invocation->getSSAFOpts(); } - const ssaf::SSAFOptions &getSSAFOpts() const { - return Invocation->getSSAFOpts(); - } - HeaderSearchOptions &getHeaderSearchOpts() { return Invocation->getHeaderSearchOpts(); } diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 03097aefacf50..6fa6cd5d95534 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -51,10 +51,6 @@ class HeaderSearchOptions; class PreprocessorOptions; class TargetOptions; -namespace ssaf { -class SSAFOptions; -} // namespace ssaf - // This lets us create the DiagnosticsEngine with a properly-filled-out // DiagnosticOptions instance. std::unique_ptr @@ -120,9 +116,6 @@ class CompilerInvocationBase { /// Options controlling preprocessed output. std::shared_ptr PreprocessorOutputOpts; - /// Options controlling the Scalable Static Analysis Framework (SSAF). - std::shared_ptr SSAFOpts; - /// Dummy tag type whose instance can be passed into the constructor to /// prevent creation of the reference-counted option objects. struct EmptyConstructor {}; @@ -157,7 +150,6 @@ class CompilerInvocationBase { const PreprocessorOutputOptions &getPreprocessorOutputOpts() const { return *PreprocessorOutputOpts; } - const ssaf::SSAFOptions &getSSAFOpts() const { return *SSAFOpts; } /// @} /// Visitation. @@ -255,20 +247,19 @@ class CompilerInvocation : public CompilerInvocationBase { /// @{ // Note: These need to be pulled in manually. Otherwise, they get hidden by // the mutable getters with the same names. + using CompilerInvocationBase::getLangOpts; + using CompilerInvocationBase::getTargetOpts; + using CompilerInvocationBase::getDiagnosticOpts; + using CompilerInvocationBase::getHeaderSearchOpts; + using CompilerInvocationBase::getPreprocessorOpts; using CompilerInvocationBase::getAnalyzerOpts; + using CompilerInvocationBase::getMigratorOpts; using CompilerInvocationBase::getAPINotesOpts; using CompilerInvocationBase::getCodeGenOpts; - using CompilerInvocationBase::getDependencyOutputOpts; - using CompilerInvocationBase::getDiagnosticOpts; using CompilerInvocationBase::getFileSystemOpts; using CompilerInvocationBase::getFrontendOpts; - using CompilerInvocationBase::getHeaderSearchOpts; - using CompilerInvocationBase::getLangOpts; - using CompilerInvocationBase::getMigratorOpts; - using CompilerInvocationBase::getPreprocessorOpts; + using CompilerInvocationBase::getDependencyOutputOpts; using CompilerInvocationBase::getPreprocessorOutputOpts; - using CompilerInvocationBase::getSSAFOpts; - using CompilerInvocationBase::getTargetOpts; /// @} /// Mutable getters. @@ -290,7 +281,6 @@ class CompilerInvocation : public CompilerInvocationBase { PreprocessorOutputOptions &getPreprocessorOutputOpts() { return *PreprocessorOutputOpts; } - ssaf::SSAFOptions &getSSAFOpts() { return *SSAFOpts; } /// @} /// Create a compiler invocation from a list of input options. @@ -402,7 +392,6 @@ class CowCompilerInvocation : public CompilerInvocationBase { FrontendOptions &getMutFrontendOpts(); DependencyOutputOptions &getMutDependencyOutputOpts(); PreprocessorOutputOptions &getMutPreprocessorOutputOpts(); - ssaf::SSAFOptions &getMutSSAFOpts(); /// @} }; diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index a8627ea5d47a4..7c242f6e94fe0 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -543,6 +543,27 @@ class FrontendOptions { /// minimization hints. std::string DumpMinimizationHintsPath; + /// List of SSAF extractors to enable. + std::vector SSAFExtractSummaries; + + /// The TU summary output file with the file extension representing the file + /// format. + std::string SSAFTUSummaryFile; + + /// Stable identifier for this translation unit, used as the name of the + /// `CompilationUnit` `BuildNamespace` of every produced TU summary. The + /// caller (typically the build system) supplies a value that is constant + /// across stages of the SSAF pipeline. + std::string SSAFCompilationUnitId; + + /// Show available SSAF summary extractors. + LLVM_PREFERRED_TYPE(bool) + unsigned SSAFShowExtractors : 1; + + /// Show available SSAF serialization formats. + LLVM_PREFERRED_TYPE(bool) + unsigned SSAFShowFormats : 1; + public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), @@ -560,7 +581,8 @@ class FrontendOptions { EmitPrettySymbolGraphs(false), GenReducedBMI(false), UseClangIRPipeline(false), ClangIRDisablePasses(false), ClangIRDisableCIRVerifier(false), ClangIREnableIdiomRecognizer(false), - TimeTraceGranularity(500), TimeTraceVerbose(false) {} + TimeTraceGranularity(500), TimeTraceVerbose(false), + SSAFShowExtractors(false), SSAFShowFormats(false) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Frontend/SSAFOptions.h b/clang/include/clang/Frontend/SSAFOptions.h deleted file mode 100644 index 738262cc4a713..0000000000000 --- a/clang/include/clang/Frontend/SSAFOptions.h +++ /dev/null @@ -1,52 +0,0 @@ -//===- SSAFOptions.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_FRONTEND_SSAFOPTIONS_H -#define LLVM_CLANG_FRONTEND_SSAFOPTIONS_H - -#include "llvm/Support/Compiler.h" -#include -#include - -namespace clang::ssaf { - -class SSAFOptions { -public: - /// List of SSAF extractors to enable. - /// Controlled by: --ssaf-extract-summaries - std::vector ExtractSummaries; - - /// The TU summary output file with the file extension representing the - /// serialization format. - /// Controlled by: --ssaf-tu-summary-file - std::string TUSummaryFile; - - /// Stable identifier used as the name of the `CompilationUnit` - /// `BuildNamespace` of every produced TU summary. - /// Controlled by: --ssaf-compilation-unit-id - std::string CompilationUnitId; - - /// Show the list of available SSAF summary extractors and exit. - /// Controlled by: --ssaf-list-extractors - LLVM_PREFERRED_TYPE(bool) - unsigned ShowExtractors : 1; - - /// Show the list of available SSAF serialization formats and exit. - /// Controlled by: --ssaf-list-formats - LLVM_PREFERRED_TYPE(bool) - unsigned ShowFormats : 1; - - SSAFOptions() { - ShowExtractors = false; - ShowFormats = false; - }; -}; - -} // namespace clang::ssaf - -#endif // LLVM_CLANG_FRONTEND_SSAFOPTIONS_H diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index fd13d0556083a..377897a15f746 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -345,8 +345,6 @@ class FileSystemOpts : KeyPathAndMacro<"FileSystemOpts.", base, "FILE_SYSTEM_"> {} class AnalyzerOpts : KeyPathAndMacro<"AnalyzerOpts->", base, "ANALYZER_"> {} -class SSAFOpts - : KeyPathAndMacro<"SSAFOpts.", base, "SSAF_"> {} class MigratorOpts : KeyPathAndMacro<"MigratorOpts.", base, "MIGRATOR_"> {} @@ -949,7 +947,7 @@ def _ssaf_extract_summaries : Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Comma-separated list of summary names to extract">, - MarshallingInfoStringVector>; + MarshallingInfoStringVector>; def _ssaf_tu_summary_file : Joined<["--"], "ssaf-tu-summary-file=">, MetaVarName<".">, @@ -958,19 +956,19 @@ def _ssaf_tu_summary_file : HelpText< "The output file for the extracted summaries. " "The extension selects which file format to use.">, - MarshallingInfoString>; + MarshallingInfoString>; def _ssaf_list_extractors : Flag<["--"], "ssaf-list-extractors">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF summary extractors">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_list_formats : Flag<["--"], "ssaf-list-formats">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF serialization formats">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_compilation_unit_id : Joined<["--"], "ssaf-compilation-unit-id=">, MetaVarName<"">, @@ -980,7 +978,7 @@ def _ssaf_compilation_unit_id : "Stable identifier used as the CompilationUnit namespace name of every " "produced SSAF TU summary. Required when '--ssaf-tu-summary-file=' is " "set.">, - MarshallingInfoString>; + MarshallingInfoString>; def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h index 38bd60718ed9c..f9ebe5358b585 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h @@ -18,13 +18,11 @@ namespace clang::ssaf { class EntityName; -class SSAFOptions; class TUSummary; class TUSummaryBuilder { public: - TUSummaryBuilder(TUSummary &Summary, const SSAFOptions &Options) - : Summary(Summary), Options(Options) {} + explicit TUSummaryBuilder(TUSummary &Summary) : Summary(Summary) {} EntityId addEntity(const EntityName &EN, EntityLinkageType Linkage); @@ -37,12 +35,8 @@ class TUSummaryBuilder { std::pair addSummary(EntityId Entity, std::unique_ptr &&Data); - /// \returns the \c SSAFOptions of this builder. - const SSAFOptions &getOptions() const { return Options; } - private: TUSummary &Summary; - const SSAFOptions &Options; std::pair addSummaryImpl(EntityId Entity, std::unique_ptr &&Data); diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h index b943748873821..46b0ae835d729 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h @@ -15,7 +15,6 @@ #include namespace clang::ssaf { -class SSAFOptions; class TUSummaryBuilder; class TUSummaryExtractor : public ASTConsumer { @@ -33,9 +32,6 @@ class TUSummaryExtractor : public ASTConsumer { /// \returns the EntityId, or std::nullopt if EntityName creation fails. std::optional addEntityForReturn(const FunctionDecl *FD); - /// \returns the \c SSAFOptions of the builder. - const SSAFOptions &getOptions() const; - protected: TUSummaryBuilder &SummaryBuilder; }; diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 94ca546a5bcb6..ca2d02c7dbd97 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -32,7 +32,6 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -134,8 +133,7 @@ CompilerInvocationBase::CompilerInvocationBase() FSOpts(std::make_shared()), FrontendOpts(std::make_shared()), DependencyOutputOpts(std::make_shared()), - PreprocessorOutputOpts(std::make_shared()), - SSAFOpts(std::make_shared()) {} + PreprocessorOutputOpts(std::make_shared()) {} CompilerInvocationBase & CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { @@ -153,7 +151,6 @@ CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = make_shared_copy(X.getFrontendOpts()); DependencyOutputOpts = make_shared_copy(X.getDependencyOutputOpts()); PreprocessorOutputOpts = make_shared_copy(X.getPreprocessorOutputOpts()); - SSAFOpts = make_shared_copy(X.getSSAFOpts()); } return *this; } @@ -174,7 +171,6 @@ CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = X.FrontendOpts; DependencyOutputOpts = X.DependencyOutputOpts; PreprocessorOutputOpts = X.PreprocessorOutputOpts; - SSAFOpts = X.SSAFOpts; } return *this; } @@ -241,10 +237,6 @@ FrontendOptions &CowCompilerInvocation::getMutFrontendOpts() { return ensureOwned(FrontendOpts); } -ssaf::SSAFOptions &CowCompilerInvocation::getMutSSAFOpts() { - return ensureOwned(SSAFOpts); -} - DependencyOutputOptions &CowCompilerInvocation::getMutDependencyOutputOpts() { return ensureOwned(DependencyOutputOpts); } @@ -1042,26 +1034,6 @@ static void GenerateAnalyzerArgs(const AnalyzerOptions &Opts, // Nothing to generate for FullCompilerInvocation. } -static void GenerateSSAFArgs(const ssaf::SSAFOptions &SSAFOpts, - ArgumentConsumer Consumer) { -#define SSAF_OPTION_WITH_MARSHALLING(...) \ - GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) -#include "clang/Options/Options.inc" -#undef SSAF_OPTION_WITH_MARSHALLING -} - -static bool ParseSSAFArgs(ssaf::SSAFOptions &SSAFOpts, ArgList &Args, - DiagnosticsEngine &Diags) { - unsigned NumErrorsBefore = Diags.getNumErrors(); - -#define SSAF_OPTION_WITH_MARSHALLING(...) \ - PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) -#include "clang/Options/Options.inc" -#undef SSAF_OPTION_WITH_MARSHALLING - - return Diags.getNumErrors() == NumErrorsBefore; -} - static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); @@ -5111,7 +5083,6 @@ bool CompilerInvocation::CreateFromArgsImpl( ParseFileSystemArgs(Res.getFileSystemOpts(), Args, Diags); ParseMigratorArgs(Res.getMigratorOpts(), Args, Diags); ParseAnalyzerArgs(Res.getAnalyzerOpts(), Args, Diags); - ParseSSAFArgs(Res.getSSAFOpts(), Args, Diags); ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, /*DefaultDiagColor=*/false); ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags, LangOpts.IsHeaderFile); @@ -5464,7 +5435,6 @@ void CompilerInvocationBase::generateCC1CommandLine( GenerateFileSystemArgs(getFileSystemOpts(), Consumer); GenerateMigratorArgs(getMigratorOpts(), Consumer); GenerateAnalyzerArgs(getAnalyzerOpts(), Consumer); - GenerateSSAFArgs(getSSAFOpts(), Consumer); GenerateDiagnosticArgs(getDiagnosticOpts(), Consumer, /*DefaultDiagColor=*/false); GenerateFrontendArgs(getFrontendOpts(), Consumer, getLangOpts().IsHeaderFile); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 997200619e599..e4622496758ac 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -19,7 +19,6 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/FrontendPluginRegistry.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/Utils.h" #include "clang/FrontendTool/Utils.h" #include "clang/Options/Options.h" @@ -210,7 +209,7 @@ CreateFrontendAction(CompilerInstance &CI) { Act = std::make_unique(std::move(Act), FEOpts.ASTMergeFiles); - if (!CI.getSSAFOpts().TUSummaryFile.empty()) { + if (!FEOpts.SSAFTUSummaryFile.empty()) { Act = std::make_unique( std::move(Act)); } diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index ef5932c52a6c3..38e3e3ec3ab9e 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -311,7 +311,8 @@ PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base, class PointerFlowTUSummaryExtractor : public TUSummaryExtractor { public: - using TUSummaryExtractor::TUSummaryExtractor; + PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder) + : TUSummaryExtractor(Builder) {} /// \return a non-null unique pointer to a PointerFlowEntitySummary std::unique_ptr diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp index ccd5eef377d2d..f0602e0d5550f 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp @@ -74,7 +74,3 @@ TUSummaryExtractor::addEntityForReturn(const FunctionDecl *FD) { return std::nullopt; return SummaryBuilder.addEntity(*Name, getLinkageForDecl(FD)); } - -const SSAFOptions &TUSummaryExtractor::getOptions() const { - return SummaryBuilder.getOptions(); -} diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp index 4f290ccac3d16..fe900f383ae31 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -11,7 +11,6 @@ #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/MultiplexConsumer.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -101,36 +100,33 @@ class TUSummaryRunner final : public MultiplexConsumer { private: TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const SSAFOptions &Opts); + const FrontendOptions &Opts); void HandleTranslationUnit(ASTContext &Ctx) override; TUSummary Summary; - - /// Owned by the \c CompilerInstance. - const SSAFOptions &Opts; - - TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); + TUSummaryBuilder Builder = TUSummaryBuilder(Summary); std::unique_ptr Format; + const FrontendOptions &Opts; }; } // namespace std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { - const SSAFOptions &Opts = CI.getSSAFOpts(); + const FrontendOptions &Opts = CI.getFrontendOpts(); DiagnosticsEngine &Diags = CI.getDiagnostics(); - if (Opts.CompilationUnitId.empty()) { + if (Opts.SSAFCompilationUnitId.empty()) { Diags.Report(diag::warn_ssaf_tu_summary_requires_compilation_unit_id); return nullptr; } auto MaybePair = - parseOutputFileFormatAndPathOrReportError(Diags, Opts.TUSummaryFile); + parseOutputFileFormatAndPathOrReportError(Diags, Opts.SSAFTUSummaryFile); if (!MaybePair.has_value()) return nullptr; auto [FormatName, OutputPath] = MaybePair.value(); - if (reportUnrecognizedExtractorNames(Diags, Opts.ExtractSummaries)) + if (reportUnrecognizedExtractorNames(Diags, Opts.SSAFExtractSummaries)) return nullptr; return std::unique_ptr{new TUSummaryRunner{ @@ -139,18 +135,18 @@ std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { TUSummaryRunner::TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const SSAFOptions &Opts) + const FrontendOptions &Opts) : MultiplexConsumer(std::vector>{}), Summary(std::move(TargetTriple), BuildNamespace(BuildNamespaceKind::CompilationUnit, - Opts.CompilationUnitId)), - Opts(Opts), Format(std::move(Format)) { + Opts.SSAFCompilationUnitId)), + Format(std::move(Format)), Opts(Opts) { assert(this->Format); - assert(!Opts.CompilationUnitId.empty()); + assert(!Opts.SSAFCompilationUnitId.empty()); // Now the Summary and the builders are constructed, we can also construct the // extractors. - auto Extractors = makeTUSummaryExtractors(Builder, Opts.ExtractSummaries); + auto Extractors = makeTUSummaryExtractors(Builder, Opts.SSAFExtractSummaries); assert(!Extractors.empty()); // We must initialize the Consumers here because our extractors need a @@ -168,9 +164,9 @@ void TUSummaryRunner::HandleTranslationUnit(ASTContext &Ctx) { llvm::sys::sandbox::ScopedSetting Guard = llvm::sys::sandbox::scopedDisable(); // Then serialize the result. - if (auto Err = Format->writeTUSummary(Summary, Opts.TUSummaryFile)) { + if (auto Err = Format->writeTUSummary(Summary, Opts.SSAFTUSummaryFile)) { Ctx.getDiagnostics().Report(diag::warn_ssaf_write_tu_summary_failed) - << Opts.TUSummaryFile << llvm::toString(std::move(Err)); + << Opts.SSAFTUSummaryFile << llvm::toString(std::move(Err)); } } diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp index 0a3685c4f1057..69db546fbf93d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp @@ -11,7 +11,6 @@ #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" #include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" @@ -124,11 +123,10 @@ template auto hasSummaryThat(const Matchers &...Ms) { static const SummaryName CallGraphName{CallGraphSummary::Name.str()}; struct CallGraphExtractorTest : ssaf::TestFixture { - SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); + TUSummaryBuilder Builder = TUSummaryBuilder(Summary); /// Creates the AST and extractor, then extracts the summaries from the AST. /// This will update the \c AST \c Builder and \c Summary data members. diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp index 49e7bdc21738b..44465a59d4cfd 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp @@ -13,7 +13,6 @@ #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/Frontend/ASTUnit.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -147,7 +146,6 @@ struct EPLPair { class PointerFlowTest : public TestFixture { protected: - SSAFOptions Opts; TUSummary TUSum; TUSummaryBuilder Builder; std::unique_ptr Extractor; @@ -156,7 +154,7 @@ class PointerFlowTest : public TestFixture { PointerFlowTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum, Opts), Extractor(nullptr) {} + Builder(TUSum), Extractor(nullptr) {} template Extractor; @@ -45,7 +43,7 @@ class UnsafeBufferUsageTest : public TestFixture { UnsafeBufferUsageTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum, Opts) {} + Builder(TUSum) {} bool setUpTest(StringRef Code) { AST = tooling::buildASTFromCodeWithArgs( diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp index cb5448e4860bc..18d9e1735061d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp @@ -11,7 +11,6 @@ #include "clang/AST/ASTContext.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendOptions.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" @@ -300,9 +299,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, // Configure valid SSAF options so the failure is purely from the wrapped // action, not from runner creation. std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction ExtractorAction( std::make_unique()); @@ -316,9 +315,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithInvalidFormat_WrappedConsumerStillRuns) { // Use an unregistered format extension so TUSummaryRunner::create fails. std::string Output = makePath("output.xyz"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -346,9 +345,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithUnknownExtractor_WrappedConsumerStillRuns) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NonExistentExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NonExistentExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -371,9 +370,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_ASTConsumerCallbacksPropagate) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -426,9 +425,9 @@ struct OrderCheckingAction : public ASTFrontendAction { TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_WrappedRunsBeforeRunner) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); Wrapped->OutputPath = Output; @@ -448,9 +447,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { std::string Output = makePath("output.FailingSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction Action(std::make_unique()); @@ -470,8 +469,8 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { TEST_F(TUSummaryExtractorFrontendActionTest, MissingCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; // SSAFCompilationUnitId left empty. auto Wrapped = std::make_unique(); @@ -494,9 +493,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, EmptyCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = ""; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = ""; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -521,9 +520,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, const std::string CUId = "cu-X-test"; std::string Output = makePath("output.CapturingSerializationFormat"); - Compiler->getSSAFOpts().TUSummaryFile = Output; - Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; - Compiler->getSSAFOpts().CompilationUnitId = CUId; + Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; + Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getFrontendOpts().SSAFCompilationUnitId = CUId; TUSummaryExtractorFrontendAction Action(std::make_unique()); EXPECT_TRUE(Compiler->ExecuteAction(Action)); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp index 2294c045d554b..fd6ad83225e10 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp @@ -8,7 +8,6 @@ #include "MockTUSummaryBuilder.h" #include "clang/Frontend/MultiplexConsumer.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" #include "clang/Tooling/Tooling.h" @@ -47,9 +46,8 @@ TEST(SummaryExtractorRegistryTest, EnumeratingRegistryEntries) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { - SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary, Opts); + MockTUSummaryBuilder FakeBuilder(Summary); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor1", FakeBuilder); @@ -62,9 +60,8 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { - SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary, Opts); + MockTUSummaryBuilder FakeBuilder(Summary); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor2", FakeBuilder); @@ -77,9 +74,8 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { } TEST(SummaryExtractorRegistryTest, InvokingExtractors) { - SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary, Opts); + MockTUSummaryBuilder FakeBuilder(Summary); std::vector> Consumers; for (std::string Name : {"MockSummaryExtractor1", "MockSummaryExtractor2"}) { auto Consumer = makeTUSummaryExtractor(Name, FakeBuilder); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp index 55c5c781e42d9..ffcf068ce6956 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp @@ -9,7 +9,6 @@ #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" #include "FindDecl.h" #include "TestFixture.h" -#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" @@ -90,11 +89,10 @@ void PrintTo(const MockSummaryData3 &S, std::ostream *OS) { } struct TUSummaryBuilderTest : ssaf::TestFixture { - SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder{Summary, Opts}; + TUSummaryBuilder Builder{Summary}; TUSummaryExtractor Extractor{Builder}; [[nodiscard]] EntityId addTestEntity(llvm::StringRef USR) { From 538e3adaef393ce25a8098a4926ee3a5f8d47e4f Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 23 Jun 2026 00:41:40 -0700 Subject: [PATCH 135/511] [AMDGPU] Select fneg modifier for v2f64 instructions (#205194) --- llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 45 ++++++++++++------- llvm/test/CodeGen/AMDGPU/packed-fp64.ll | 35 +++++---------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index ccdff3c0de381..7330f3b13f3cb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -3620,13 +3620,8 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, Src = Src.getOperand(0); } - // 64-bit VOP3P instructions do not have OPSEL or ABS. Bail on v2f64 or v2i64. - // TODO: Select NEG_LO and NEG_HI modifiers from BUILD_VECTOR. - if (Src.getValueSizeInBits() == 128) { - Mods |= SISrcMods::OP_SEL_1; // Just the default, OPSEL unsupported. - SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; - } + // 64-bit VOP3P instructions do not have OPSEL or ABS. + bool HasOpSel = Src.getValueSizeInBits() != 128; if (Src.getOpcode() == ISD::BUILD_VECTOR && Src.getNumOperands() == 2 && (!IsDOT || !Subtarget->hasDOTOpSelHazard())) { @@ -3645,11 +3640,13 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, Mods ^= SISrcMods::NEG_HI; } - if (isExtractHiElt(Lo, Lo)) - Mods |= SISrcMods::OP_SEL_0; + if (HasOpSel) { + if (isExtractHiElt(Lo, Lo)) + Mods |= SISrcMods::OP_SEL_0; - if (isExtractHiElt(Hi, Hi)) - Mods |= SISrcMods::OP_SEL_1; + if (isExtractHiElt(Hi, Hi)) + Mods |= SISrcMods::OP_SEL_1; + } unsigned VecSize = Src.getValueSizeInBits(); Lo = stripExtractLoElt(Lo); @@ -3679,18 +3676,29 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, } else if (VecSize == 32) { Src = createVOP3PSrc32FromLo16(Lo, Src, CurDAG, Subtarget); } else { - assert(Lo.getValueSizeInBits() == 32 && VecSize == 64); + assert((Lo.getValueSizeInBits() == 32 && VecSize == 64) || + (Lo.getValueSizeInBits() == 64 && VecSize == 128)); SDLoc SL(In); SDValue Undef = SDValue( CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, Lo.getValueType()), 0); - auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID - : AMDGPU::SReg_64RegClassID; + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); + // <2 x 64> instructions do not have OPSEL and also replicate low 64 + // bits of a scalar input into high 64 bits. Use VGPRs in this case. + // TODO: This fact can be exploited but we need to set proper OPSEL for + // codegen folding purposes. It will not affect a final instruction. + auto RC = (Lo->isDivergent() || !HasOpSel) + ? TRI->getVGPRClassForBitWidth(VecSize) + : TRI->getSGPRClassForBitWidth(VecSize); + unsigned NumRegs = Lo.getValueSizeInBits() == 32 ? 1 : 2; const SDValue Ops[] = { - CurDAG->getTargetConstant(RC, SL, MVT::i32), - Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), - Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) }; + CurDAG->getTargetConstant(RC->getID(), SL, MVT::i32), Lo, + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(0, NumRegs), SL, + MVT::i32), + HasOpSel ? Undef : Hi, + CurDAG->getTargetConstant( + TRI->getSubRegFromChannel(NumRegs, NumRegs), SL, MVT::i32)}; Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL, Src.getValueType(), Ops), 0); @@ -3716,6 +3724,9 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, // TODO: We should repeat the build_vector source check above for the // vector_shuffle for negates and casts of individual elements. + assert(Src.getValueSizeInBits() != 128 && + "<2 x 64> VECTOR_SHUFFLE should not be legal."); + auto *SVN = cast(Src); ArrayRef Mask = SVN->getMask(); diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll index b16a64dc59349..ff0c3edc9dcf0 100644 --- a/llvm/test/CodeGen/AMDGPU/packed-fp64.ll +++ b/llvm/test/CodeGen/AMDGPU/packed-fp64.ll @@ -581,8 +581,6 @@ define amdgpu_kernel void @fadd_v2_v_unfoldable_lit(ptr addrspace(1) %a) { ret void } -; TODO: fneg can be folded - define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-LABEL: fadd_v2_v_fneg: ; GFX1251-SDAG: ; %bb.0: @@ -591,12 +589,11 @@ define amdgpu_kernel void @fadd_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] neg_lo:[0,1] neg_hi:[0,1] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; @@ -638,13 +635,11 @@ define amdgpu_kernel void @fadd_v2_v_fneg_lo(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_mov_b32 s4, s2 +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] +; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] neg_lo:[0,1] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; @@ -686,13 +681,11 @@ define amdgpu_kernel void @fadd_v2_v_fneg_hi(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s5, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_mov_b32 s4, s2 ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[4:5] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 ; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] +; GFX1251-SDAG-NEXT: v_pk_add_f64 v[0:3], v[0:3], v[4:7] neg_hi:[0,1] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; @@ -1337,12 +1330,11 @@ define amdgpu_kernel void @fmul_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] ; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-SDAG-NEXT: v_pk_mul_f64 v[0:3], v[0:3], v[4:7] neg_lo:[0,1] neg_hi:[0,1] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; @@ -1894,14 +1886,11 @@ define amdgpu_kernel void @fma_v2_v_fneg(ptr addrspace(1) %a, double %x) { ; GFX1251-SDAG-NEXT: v_and_b32_e32 v8, 0x3ff, v0 ; GFX1251-SDAG-NEXT: s_wait_kmcnt 0x0 ; GFX1251-SDAG-NEXT: global_load_b128 v[0:3], v8, s[0:1] scale_offset -; GFX1251-SDAG-NEXT: s_xor_b32 s3, s3, 0x80000000 -; GFX1251-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(VALU_DEP_1) -; GFX1251-SDAG-NEXT: s_mov_b64 s[4:5], s[2:3] -; GFX1251-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[4:5] -; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[6:7] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[4:5], s[2:3] +; GFX1251-SDAG-NEXT: v_mov_b64_e32 v[6:7], s[2:3] ; GFX1251-SDAG-NEXT: s_wait_loadcnt 0x0 -; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], v[4:7] +; GFX1251-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX1251-SDAG-NEXT: v_pk_fma_f64 v[0:3], v[0:3], v[4:7], v[4:7] neg_lo:[0,1,1] neg_hi:[0,1,1] ; GFX1251-SDAG-NEXT: global_store_b128 v8, v[0:3], s[0:1] scale_offset ; GFX1251-SDAG-NEXT: s_endpgm ; From d38046132e4dfe93e9f176fa72940c0e77fbae76 Mon Sep 17 00:00:00 2001 From: Alexis Engelke Date: Tue, 23 Jun 2026 10:02:55 +0200 Subject: [PATCH 136/511] [IR] Remove IRBuilder AddMetadataToInst (#202280) This avoids an extra check for metadata on every instruction insertion, makes constructing an IRBuilder cheaper, and therefore slightly improves performance. As the C API doesn't expose CollectMetadataToCopy or any other way to add additional metadata to the IRBuilder, make LLVMAddMetadataToInst an alias for LLVMSetInstDebugLocation and undeprecate the latter. --- llvm/include/llvm-c/Core.h | 9 ++-- llvm/include/llvm/IR/IRBuilder.h | 54 +------------------ llvm/lib/CodeGen/AtomicExpandPass.cpp | 12 +++-- llvm/lib/IR/Core.cpp | 2 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 26 ++++++--- .../Instrumentation/BoundsChecking.cpp | 17 ++++-- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 15 +++--- 7 files changed, 53 insertions(+), 82 deletions(-) diff --git a/llvm/include/llvm-c/Core.h b/llvm/include/llvm-c/Core.h index 86f636c636783..f3b98a31c5bd7 100644 --- a/llvm/include/llvm-c/Core.h +++ b/llvm/include/llvm-c/Core.h @@ -4643,18 +4643,17 @@ LLVM_C_ABI void LLVMSetCurrentDebugLocation2(LLVMBuilderRef Builder, * current debug location for the given builder. If the builder has no current * debug location, this function is a no-op. * - * @deprecated LLVMSetInstDebugLocation is deprecated in favor of the more general - * LLVMAddMetadataToInst. - * * @see llvm::IRBuilder::SetInstDebugLocation() */ LLVM_C_ABI void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst); /** - * Adds the metadata registered with the given builder to the given instruction. + * Same as LLVMSetInstDebugLocation. + * + * @deprecated Use the identical LLVMSetInstDebugLocation. * - * @see llvm::IRBuilder::AddMetadataToInst() + * @see llvm::IRBuilder::SetInstDebugLocation() */ LLVM_C_ABI void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst); diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h index 400c3723e5d1c..692990f93c6fb 100644 --- a/llvm/include/llvm/IR/IRBuilder.h +++ b/llvm/include/llvm/IR/IRBuilder.h @@ -112,36 +112,10 @@ class FMFSource { /// Common base class shared among various IRBuilders. class IRBuilderBase { - /// Pairs of (metadata kind, MDNode *) that should be added to all newly - /// created instructions, excluding !dbg metadata, which is stored in the - /// StoredDL field. - SmallVector, 2> MetadataToCopy; /// The DebugLoc that will be applied to instructions inserted by this /// builder. DebugLoc StoredDL; - /// Add or update the an entry (Kind, MD) to MetadataToCopy, if \p MD is not - /// null. If \p MD is null, remove the entry with \p Kind. - void AddOrRemoveMetadataToCopy(unsigned Kind, MDNode *MD) { - assert(Kind != LLVMContext::MD_dbg && - "MD_dbg metadata must be stored in StoredDL"); - - if (!MD) { - erase_if(MetadataToCopy, [Kind](const std::pair &KV) { - return KV.first == Kind; - }); - return; - } - - for (auto &KV : MetadataToCopy) - if (KV.first == Kind) { - KV.second = MD; - return; - } - - MetadataToCopy.emplace_back(Kind, MD); - } - protected: BasicBlock *BB; BasicBlock::iterator InsertPt; @@ -171,7 +145,7 @@ class IRBuilderBase { template InstTy *Insert(InstTy *I, const Twine &Name = "") const { Inserter.InsertHelper(I, Name, InsertPt); - AddMetadataToInst(I); + SetInstDebugLocation(I); return I; } @@ -257,25 +231,6 @@ class IRBuilderBase { StoredDL = std::move(L); } - /// Set nosanitize metadata. - void SetNoSanitizeMetadata() { - AddOrRemoveMetadataToCopy(llvm::LLVMContext::MD_nosanitize, - llvm::MDNode::get(getContext(), {})); - } - - /// Collect metadata with IDs \p MetadataKinds from \p Src which should be - /// added to all created instructions. Entries present in MedataDataToCopy but - /// not on \p Src will be dropped from MetadataToCopy. - void CollectMetadataToCopy(Instruction *Src, - ArrayRef MetadataKinds) { - for (unsigned K : MetadataKinds) { - if (K == LLVMContext::MD_dbg) - SetCurrentDebugLocation(Src->getDebugLoc()); - else - AddOrRemoveMetadataToCopy(K, Src->getMetadata(K)); - } - } - /// Get location information used by debugging information. LLVM_ABI DebugLoc getCurrentDebugLocation() const; @@ -283,13 +238,6 @@ class IRBuilderBase { /// specified instruction. LLVM_ABI void SetInstDebugLocation(Instruction *I) const; - /// Add all entries in MetadataToCopy to \p I. - void AddMetadataToInst(Instruction *I) const { - for (const auto &KV : MetadataToCopy) - I->setMetadata(KV.first, KV.second); - SetInstDebugLocation(I); - } - /// Get the return type of the current function that we're emitting /// into. LLVM_ABI Type *getCurrentFunctionReturnType() const; diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp index db048e0c5ab5c..059c0d5cb7b8b 100644 --- a/llvm/lib/CodeGen/AtomicExpandPass.cpp +++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp @@ -174,24 +174,26 @@ class AtomicExpandLegacy : public FunctionPass { struct ReplacementIRBuilder : IRBuilder { MDNode *MMRAMD = nullptr; + MDNode *PCSectionsMD = nullptr; // Preserves the DebugLoc from I, and preserves still valid metadata. // Enable StrictFP builder mode when appropriate. explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL) - : IRBuilder(I->getContext(), InstSimplifyFolder(DL), - IRBuilderCallbackInserter( - [this](Instruction *I) { addMMRAMD(I); })) { + : IRBuilder( + I->getContext(), InstSimplifyFolder(DL), + IRBuilderCallbackInserter([this](Instruction *I) { addMD(I); })) { SetInsertPoint(I); - this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections}); if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP)) this->setIsFPConstrained(true); MMRAMD = I->getMetadata(LLVMContext::MD_mmra); + PCSectionsMD = I->getMetadata(LLVMContext::MD_pcsections); } - void addMMRAMD(Instruction *I) { + void addMD(Instruction *I) { if (canInstructionHaveMMRAs(*I)) I->setMetadata(LLVMContext::MD_mmra, MMRAMD); + I->setMetadata(LLVMContext::MD_pcsections, PCSectionsMD); } }; diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp index e64341aa0d1d7..6c6c3b5c84ede 100644 --- a/llvm/lib/IR/Core.cpp +++ b/llvm/lib/IR/Core.cpp @@ -3508,7 +3508,7 @@ void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) { } void LLVMAddMetadataToInst(LLVMBuilderRef Builder, LLVMValueRef Inst) { - unwrap(Builder)->AddMetadataToInst(unwrap(Inst)); + unwrap(Builder)->SetInstDebugLocation(unwrap(Inst)); } void LLVMBuilderSetDefaultFPMathTag(LLVMBuilderRef Builder, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7e9e82ae49f9e..291124bb485cb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -32803,8 +32803,12 @@ X86TargetLowering::shouldExpandLogicAtomicRMWInIR( } void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { - IRBuilder<> Builder(AI); - Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections}); + LLVMContext &Ctx = AI->getContext(); + IRBuilder Builder( + Ctx, ConstantFolder{}, IRBuilderCallbackInserter([&AI](Instruction *I) { + I->copyMetadata(*AI, LLVMContext::MD_pcsections); + })); + Builder.SetInsertPoint(AI); Intrinsic::ID IID_C = Intrinsic::not_intrinsic; Intrinsic::ID IID_I = Intrinsic::not_intrinsic; switch (AI->getOperation()) { @@ -32824,7 +32828,6 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const { break; } Instruction *I = AI->user_back(); - LLVMContext &Ctx = AI->getContext(); Value *Addr = Builder.CreatePointerCast(AI->getPointerOperand(), PointerType::getUnqual(Ctx)); Value *Result = nullptr; @@ -33047,10 +33050,13 @@ static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) { void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic( AtomicRMWInst *AI) const { - IRBuilder<> Builder(AI); - Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections}); - Instruction *TempI = nullptr; LLVMContext &Ctx = AI->getContext(); + IRBuilder Builder( + Ctx, ConstantFolder{}, IRBuilderCallbackInserter([&AI](Instruction *I) { + I->copyMetadata(*AI, LLVMContext::MD_pcsections); + })); + Builder.SetInsertPoint(AI); + Instruction *TempI = nullptr; ICmpInst *ICI = dyn_cast(AI->user_back()); if (!ICI) { TempI = AI->user_back(); @@ -33159,8 +33165,12 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { AI->use_empty()) return nullptr; - IRBuilder<> Builder(AI); - Builder.CollectMetadataToCopy(AI, {LLVMContext::MD_pcsections}); + IRBuilder Builder( + AI->getContext(), ConstantFolder{}, + IRBuilderCallbackInserter([&AI](Instruction *I) { + I->copyMetadata(*AI, LLVMContext::MD_pcsections); + })); + Builder.SetInsertPoint(AI); auto SSID = AI->getSyncScopeID(); // We must restrict the ordering to avoid generating loads with Release or // ReleaseAcquire orderings. diff --git a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp index 0506a9b44589b..58bc18b379048 100644 --- a/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp +++ b/llvm/lib/Transforms/Instrumentation/BoundsChecking.cpp @@ -41,14 +41,23 @@ STATISTIC(ChecksAdded, "Bounds checks added"); STATISTIC(ChecksSkipped, "Bounds checks skipped"); STATISTIC(ChecksUnable, "Bounds checks unable to add"); -class BuilderTy : public IRBuilder { +class NoSanitizeInserter final : public IRBuilderDefaultInserter { + mutable MDNode *NoSanitizeMD = nullptr; + public: - BuilderTy(BasicBlock *TheBB, BasicBlock::iterator IP, TargetFolder Folder) - : IRBuilder(TheBB, IP, Folder) { - SetNoSanitizeMetadata(); + NoSanitizeInserter() = default; + + void InsertHelper(Instruction *I, const Twine &Name, + BasicBlock::iterator InsertPt) const override { + IRBuilderDefaultInserter::InsertHelper(I, Name, InsertPt); + if (!NoSanitizeMD) + NoSanitizeMD = MDNode::get(I->getContext(), {}); + I->setMetadata(LLVMContext::MD_nosanitize, NoSanitizeMD); } }; +using BuilderTy = IRBuilder; + /// Gets the conditions under which memory accessing instructions will overflow. /// /// \p Ptr is the pointer that will be read/written, and \p InstVal is either diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b1478b7d10b8f..1e9c66c5a660c 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -4024,12 +4024,15 @@ static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); - IRBuilder<> Builder(PBI); - // The builder is used to create instructions to eliminate the branch in BB. - // If BB's terminator has !annotation metadata, add it to the new - // instructions. - Builder.CollectMetadataToCopy(BB->getTerminator(), - {LLVMContext::MD_annotation}); + IRBuilder Builder( + BB->getContext(), ConstantFolder{}, + IRBuilderCallbackInserter([&BB](Instruction *I) { + // The builder is used to create instructions to eliminate the branch in + // BB. If BB's terminator has !annotation metadata, add it to the new + // instructions. + I->copyMetadata(*BB->getTerminator(), LLVMContext::MD_annotation); + })); + Builder.SetInsertPoint(PBI); // If we need to invert the condition in the pred block to match, do so now. if (InvertPredCond) { From b69ccd199b2987f7ca836a89fc81468de5cb53b4 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 23 Jun 2026 10:52:25 +0200 Subject: [PATCH 137/511] [X86][Inline] Make ABI compatibility check more precise (#205106) When inlining a function that contains calls with vector arguments, we have to be careful that inlining does not change the ABI of the call. E.g. we generally can't inline a function without `+avx` into a function with `+avx` if there are calls using vectors of size 256 or larger, because they'd switch from passing in two xmm registers to passing in a ymm register. However, the current check is very crude and only allows inlining with interior calls if the target features match *exactly* (via the base areTypesABICompatible implementation). This is unnecessarily conservative, as many target features do not affect the call ABI at all. Make this check more precise by checking the result of getRegisterTypeForCallingConv for the type between the TLI instances for the caller and callee. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 51 +++--- .../Inline/X86/call-abi-compatibility.ll | 153 ++++++++++++++++++ .../X86/loop-vectorizer-noalias.ll | 6 +- 3 files changed, 180 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 0e9d8679ca9ec..6ff621aca79d2 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6719,22 +6719,25 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, const TargetMachine &TM = getTLI()->getTargetMachine(); // Work this as a subsetting of subtarget features. - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); + const X86Subtarget &CallerSubtarget = TM.getSubtarget(*Caller); + const X86Subtarget &CalleeSubtarget = TM.getSubtarget(*Callee); + const FeatureBitset &CallerBits = CallerSubtarget.getFeatureBits(); + const FeatureBitset &CalleeBits = CalleeSubtarget.getFeatureBits(); - // Check whether features are the same (apart from the ignore list). + // Check whether callee features are a subset of caller features + // (apart from the ignore list). FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; - if (RealCallerBits == RealCalleeBits) - return true; - - // If the features are a subset, we need to additionally check for calls - // that may become ABI-incompatible as a result of inlining. if ((RealCallerBits & RealCalleeBits) != RealCalleeBits) return false; + // If the features are not exactly the same (or there is a difference in + // AVX512 register usage), we need to additionally check for calls + // that may become ABI-incompatible as a result of inlining. + if (RealCallerBits == RealCalleeBits && + CallerSubtarget.useAVX512Regs() == CalleeSubtarget.useAVX512Regs()) + return true; + for (const Instruction &I : instructions(Callee)) { if (const auto *CB = dyn_cast(&I)) { // Having more target features is fine for inline ASM and intrinsics. @@ -6765,23 +6768,19 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, bool X86TTIImpl::areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef Types) const { - if (!BaseT::areTypesABICompatible(Caller, Callee, Types)) - return false; - - // If we get here, we know the target features match. If one function - // considers 512-bit vectors legal and the other does not, consider them - // incompatible. const TargetMachine &TM = getTLI()->getTargetMachine(); - - if (TM.getSubtarget(*Caller).useAVX512Regs() == - TM.getSubtarget(*Callee).useAVX512Regs()) - return true; - - // Consider the arguments compatible if they aren't vectors or aggregates. - // FIXME: Look at the size of vectors. - // FIXME: Look at the element types of aggregates to see if there are vectors. - return llvm::none_of(Types, - [](Type *T) { return T->isVectorTy() || T->isAggregateType(); }); + const TargetLowering *CallerTLI = + TM.getSubtargetImpl(*Caller)->getTargetLowering(); + const TargetLowering *CalleeTLI = + TM.getSubtargetImpl(*Callee)->getTargetLowering(); + + LLVMContext &Ctx = Caller->getContext(); + CallingConv::ID CC = Callee->getCallingConv(); + return all_of(Types, [&](Type *Ty) { + EVT VT = CallerTLI->getValueType(DL, Ty); + return CallerTLI->getRegisterTypeForCallingConv(Ctx, CC, VT) == + CalleeTLI->getRegisterTypeForCallingConv(Ctx, CC, VT); + }); } X86TTIImpl::TTI::MemCmpExpansionOptions diff --git a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll index 9d0981cd63756..7cd1685e631d4 100644 --- a/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll +++ b/llvm/test/Transforms/Inline/X86/call-abi-compatibility.ll @@ -143,5 +143,158 @@ define internal <8 x i64> @callee_inline_asm(ptr %p0, i64 %k, ptr %p1, ptr %p2) ret <8 x i64> %3 } +declare void @callee_128bit(<4 x float>) +declare void @callee_256bit(<8 x float>) +declare void @callee_512bit(<16 x float>) + +define void @callee_calls_128bit_baseline() { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_128bit_baseline() { +; CHECK-NEXT: call void @callee_128bit(<4 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_128bit(<4 x float> zeroinitializer) + ret void +} + +; Okay to inline as +sse3 does not change the ABI of 128 bit vectors relative +; to x86-64 baseline. +define void @caller_calls_128bit_sse3() "target-features"="+sse3" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_128bit_sse3 +; CHECK-SAME: () #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @callee_128bit(<4 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_calls_128bit_baseline() + ret void +} + +define void @callee_calls_256bit_baseline() { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_256bit_baseline() { +; CHECK-NEXT: call void @callee_256bit(<8 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_256bit(<8 x float> zeroinitializer) + ret void +} + +; Okay to inline as +sse3 does not change the ABI of 256 bit vectors relative +; to x86-64 baseline. +define void @caller_calls_256bit_sse3() "target-features"="+sse3" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_256bit_sse3 +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: call void @callee_256bit(<8 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_calls_256bit_baseline() + ret void +} + +; Can NOT inline as +avx changes the ABI of 256 bit vectors. +define void @caller_calls_256bit_avx() "target-features"="+avx" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_256bit_avx +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @callee_calls_256bit_baseline() +; CHECK-NEXT: ret void +; + call void @callee_calls_256bit_baseline() + ret void +} + +define void @callee_calls_256bit_avx() "target-features"="+avx" { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_256bit_avx +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @callee_256bit(<8 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_256bit(<8 x float> zeroinitializer) + ret void +} + +; Okay to inline as +avx2 does not change the ABI of 256 bit vectors relative +; to +avx. +define void @caller_calls_256bit_avx2() "target-features"="+avx2" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_256bit_avx2 +; CHECK-SAME: () #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @callee_256bit(<8 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_calls_256bit_avx() + ret void +} + +define void @callee_calls_512bit_baseline() { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_512bit_baseline() { +; CHECK-NEXT: call void @callee_512bit(<16 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_512bit(<16 x float> zeroinitializer) + ret void +} + +; Okay to inline as +sse3 does not change the ABI of 512 bit vectors relative +; to x86-64 baseline. +define void @caller_calls_512bit_sse3() "target-features"="+sse3" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_512bit_sse3 +; CHECK-SAME: () #[[ATTR3]] { +; CHECK-NEXT: call void @callee_512bit(<16 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_calls_512bit_baseline() + ret void +} + +; Can NOT inline as +avx changes the ABI of 512 bit vectors. +define void @caller_calls_512bit_avx() "target-features"="+avx" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_512bit_avx +; CHECK-SAME: () #[[ATTR0]] { +; CHECK-NEXT: call void @callee_calls_512bit_baseline() +; CHECK-NEXT: ret void +; + call void @callee_calls_512bit_baseline() + ret void +} + +define void @callee_calls_512bit_avx512f() "target-features"="+avx512f" { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_512bit_avx512f +; CHECK-SAME: () #[[ATTR5:[0-9]+]] { +; CHECK-NEXT: call void @callee_512bit(<16 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_512bit(<16 x float> zeroinitializer) + ret void +} + +; Okay to inline as +avx512bw does not change the ABI of 512 bit vectors +; relative to +avx512f. +define void @caller_calls_512bit_avx512bw() "target-features"="+avx512bw" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_512bit_avx512bw +; CHECK-SAME: () #[[ATTR6:[0-9]+]] { +; CHECK-NEXT: call void @callee_512bit(<16 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_calls_512bit_avx512f() + ret void +} + +define void @callee_calls_512bit_vector_width_256() "target-features"="+avx512vl" "min-legal-vector-width"="256" "prefer-vector-width"="256" { +; CHECK-LABEL: define {{[^@]+}}@callee_calls_512bit_vector_width_256 +; CHECK-SAME: () #[[ATTR7:[0-9]+]] { +; CHECK-NEXT: call void @callee_512bit(<16 x float> zeroinitializer) +; CHECK-NEXT: ret void +; + call void @callee_512bit(<16 x float> zeroinitializer) + ret void +} + +define void @caller_calls_512bit_vector_width_512() "target-features"="+avx512vl" "min-legal-vector-width"="512" "prefer-vector-width"="512" { +; CHECK-LABEL: define {{[^@]+}}@caller_calls_512bit_vector_width_512 +; CHECK-SAME: () #[[ATTR8:[0-9]+]] { +; CHECK-NEXT: call void @callee_calls_512bit_vector_width_256() +; CHECK-NEXT: ret void +; + call void @callee_calls_512bit_vector_width_256() + ret void +} + attributes #0 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512dq,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } attributes #1 = { "min-legal-vector-width"="512" "target-features"="+avx,+avx2,+avx512bw,+avx512f,+cmov,+crc32,+cx8,+evex512,+f16c,+fma,+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "tune-cpu"="generic" } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll index 112cc75b1cf05..96a7e2d4c52b3 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/loop-vectorizer-noalias.ll @@ -27,9 +27,9 @@ define void @accsum(ptr noundef %vals, i64 noundef %num) #0 { ; CHECK-NEXT: [[STORE_FORWARDED:%.*]] = phi i8 [ [[LOAD_INITIAL]], [[FOR_BODY_PREHEADER]] ], [ [[ADD_I:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[I_02:%.*]] = phi i64 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[INC:%.*]], [[FOR_BODY]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[VALS]], i64 [[I_02]] -; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[ADD_I]] = add i8 [[TMP0]], [[STORE_FORWARDED]] -; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]], !noalias [[META3]] +; CHECK-NEXT: store i8 [[ADD_I]], ptr [[ARRAYIDX]], align 1, !alias.scope [[META0]] ; CHECK-NEXT: [[INC]] = add nuw i64 [[I_02]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUM]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]] @@ -73,6 +73,4 @@ attributes #0 = { "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87"} ; CHECK: [[META0]] = !{[[META1:![0-9]+]]} ; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], !"acc: %val"} ; CHECK: [[META2]] = distinct !{[[META2]], !"acc"} -; CHECK: [[META3]] = !{[[META4:![0-9]+]]} -; CHECK: [[META4]] = distinct !{[[META4]], [[META2]], !"acc: %prev"} ;. From 51dd98bfa185ff8f56bb3153a3245d297d1ce5c6 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 23 Jun 2026 11:07:51 +0200 Subject: [PATCH 138/511] clang: Change TargetInfo::setCPU to take StringRef (#205278) The related APIs all use StringRef, so use StringRef for consistency. Co-Authored-By: Claude (Opus 4.8) --- clang/include/clang/Basic/TargetInfo.h | 4 +--- clang/lib/Basic/Targets/AArch64.cpp | 4 +--- clang/lib/Basic/Targets/AArch64.h | 2 +- clang/lib/Basic/Targets/AMDGPU.h | 2 +- clang/lib/Basic/Targets/ARM.cpp | 2 +- clang/lib/Basic/Targets/ARM.h | 2 +- clang/lib/Basic/Targets/AVR.cpp | 2 +- clang/lib/Basic/Targets/AVR.h | 2 +- clang/lib/Basic/Targets/BPF.h | 2 +- clang/lib/Basic/Targets/CSKY.cpp | 2 +- clang/lib/Basic/Targets/CSKY.h | 2 +- clang/lib/Basic/Targets/Hexagon.h | 2 +- clang/lib/Basic/Targets/Lanai.cpp | 2 +- clang/lib/Basic/Targets/Lanai.h | 2 +- clang/lib/Basic/Targets/LoongArch.h | 2 +- clang/lib/Basic/Targets/M68k.cpp | 5 ++--- clang/lib/Basic/Targets/M68k.h | 2 +- clang/lib/Basic/Targets/Mips.h | 2 +- clang/lib/Basic/Targets/NVPTX.h | 2 +- clang/lib/Basic/Targets/PPC.h | 2 +- clang/lib/Basic/Targets/RISCV.h | 2 +- clang/lib/Basic/Targets/Sparc.h | 4 ++-- clang/lib/Basic/Targets/SystemZ.h | 2 +- clang/lib/Basic/Targets/WebAssembly.h | 2 +- clang/lib/Basic/Targets/X86.h | 2 +- clang/lib/Basic/Targets/Xtensa.h | 2 +- 26 files changed, 28 insertions(+), 33 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index cc226403877e2..74030564c74b1 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1404,9 +1404,7 @@ class TargetInfo : public TransferrableTargetInfo, /// Target the specified CPU. /// /// \return False on error (invalid CPU name). - virtual bool setCPU(const std::string &Name) { - return false; - } + virtual bool setCPU(StringRef Name) { return false; } /// Fill a SmallVectorImpl with the valid values to setCPU. virtual void fillValidCPUList(SmallVectorImpl &Values) const {} diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp index 9afe6cb10729d..fbcff173297e6 100644 --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -285,9 +285,7 @@ bool AArch64TargetInfo::isValidCPUName(StringRef Name) const { return llvm::AArch64::parseCpu(Name).has_value(); } -bool AArch64TargetInfo::setCPU(const std::string &Name) { - return isValidCPUName(Name); -} +bool AArch64TargetInfo::setCPU(StringRef Name) { return isValidCPUName(Name); } void AArch64TargetInfo::fillValidCPUList( SmallVectorImpl &Values) const { diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h index 0a29bad81939b..b4b66db34b9b7 100644 --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -167,7 +167,7 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; llvm::APInt getFMVPriority(ArrayRef Features) const override; diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index 1761f32266d41..baad17487e9a1 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -280,7 +280,7 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (getTriple().isAMDGCN()) { GPUKind = llvm::AMDGPU::parseArchAMDGCN(Name); GPUFeatures = llvm::AMDGPU::getArchAttrAMDGCN(GPUKind); diff --git a/clang/lib/Basic/Targets/ARM.cpp b/clang/lib/Basic/Targets/ARM.cpp index f21e9ebbc903a..b9744ff9009ca 100644 --- a/clang/lib/Basic/Targets/ARM.cpp +++ b/clang/lib/Basic/Targets/ARM.cpp @@ -647,7 +647,7 @@ void ARMTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { llvm::ARM::fillValidCPUArchList(Values); } -bool ARMTargetInfo::setCPU(const std::string &Name) { +bool ARMTargetInfo::setCPU(StringRef Name) { if (Name != "generic") setArchInfo(llvm::ARM::parseCPUArch(Name)); diff --git a/clang/lib/Basic/Targets/ARM.h b/clang/lib/Basic/Targets/ARM.h index 43c4718f4735b..8bd56c8855fe3 100644 --- a/clang/lib/Basic/Targets/ARM.h +++ b/clang/lib/Basic/Targets/ARM.h @@ -173,7 +173,7 @@ class LLVM_LIBRARY_VISIBILITY ARMTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; bool setFPMath(StringRef Name) override; diff --git a/clang/lib/Basic/Targets/AVR.cpp b/clang/lib/Basic/Targets/AVR.cpp index 227651a2ab6fa..18db0a7807b53 100644 --- a/clang/lib/Basic/Targets/AVR.cpp +++ b/clang/lib/Basic/Targets/AVR.cpp @@ -501,7 +501,7 @@ void AVRTargetInfo::fillValidCPUList(SmallVectorImpl &Values) const { Values.push_back(Info.Name); } -bool AVRTargetInfo::setCPU(const std::string &Name) { +bool AVRTargetInfo::setCPU(StringRef Name) { // Set the ABI field based on the device or family name. auto It = llvm::find_if( AVRMcus, [&](const MCUInfo &Info) { return Info.Name == Name; }); diff --git a/clang/lib/Basic/Targets/AVR.h b/clang/lib/Basic/Targets/AVR.h index 211cf90fe9fb2..a49bbc0322816 100644 --- a/clang/lib/Basic/Targets/AVR.h +++ b/clang/lib/Basic/Targets/AVR.h @@ -173,7 +173,7 @@ class LLVM_LIBRARY_VISIBILITY AVRTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; std::optional handleAsmEscapedChar(char EscChar) const override; StringRef getABI() const override { return ABI; } diff --git a/clang/lib/Basic/Targets/BPF.h b/clang/lib/Basic/Targets/BPF.h index 47bf1f94177d1..e51f37b851d48 100644 --- a/clang/lib/Basic/Targets/BPF.h +++ b/clang/lib/Basic/Targets/BPF.h @@ -99,7 +99,7 @@ class LLVM_LIBRARY_VISIBILITY BPFTargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (Name == "v3" || Name == "v4") { HasAlu32 = true; } diff --git a/clang/lib/Basic/Targets/CSKY.cpp b/clang/lib/Basic/Targets/CSKY.cpp index e698508a2370c..7c6cf0e016744 100644 --- a/clang/lib/Basic/Targets/CSKY.cpp +++ b/clang/lib/Basic/Targets/CSKY.cpp @@ -19,7 +19,7 @@ bool CSKYTargetInfo::isValidCPUName(StringRef Name) const { return llvm::CSKY::parseCPUArch(Name) != llvm::CSKY::ArchKind::INVALID; } -bool CSKYTargetInfo::setCPU(const std::string &Name) { +bool CSKYTargetInfo::setCPU(StringRef Name) { llvm::CSKY::ArchKind archKind = llvm::CSKY::parseCPUArch(Name); bool isValid = (archKind != llvm::CSKY::ArchKind::INVALID); diff --git a/clang/lib/Basic/Targets/CSKY.h b/clang/lib/Basic/Targets/CSKY.h index fbdec1ea6d92d..c4993db955cd6 100644 --- a/clang/lib/Basic/Targets/CSKY.h +++ b/clang/lib/Basic/Targets/CSKY.h @@ -65,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY CSKYTargetInfo : public TargetInfo { return false; } - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; bool isValidCPUName(StringRef Name) const override; diff --git a/clang/lib/Basic/Targets/Hexagon.h b/clang/lib/Basic/Targets/Hexagon.h index f034911d77b92..70e2cc0fa9737 100644 --- a/clang/lib/Basic/Targets/Hexagon.h +++ b/clang/lib/Basic/Targets/Hexagon.h @@ -123,7 +123,7 @@ class LLVM_LIBRARY_VISIBILITY HexagonTargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (!isValidCPUName(Name)) return false; CPU = Name; diff --git a/clang/lib/Basic/Targets/Lanai.cpp b/clang/lib/Basic/Targets/Lanai.cpp index 8722a369ed87f..51df5c951f660 100644 --- a/clang/lib/Basic/Targets/Lanai.cpp +++ b/clang/lib/Basic/Targets/Lanai.cpp @@ -44,7 +44,7 @@ void LanaiTargetInfo::fillValidCPUList( Values.emplace_back("v11"); } -bool LanaiTargetInfo::setCPU(const std::string &Name) { +bool LanaiTargetInfo::setCPU(StringRef Name) { CPU = llvm::StringSwitch(Name).Case("v11", CK_V11).Default(CK_NONE); return CPU != CK_NONE; diff --git a/clang/lib/Basic/Targets/Lanai.h b/clang/lib/Basic/Targets/Lanai.h index a49c425965445..6989cf8c3be10 100644 --- a/clang/lib/Basic/Targets/Lanai.h +++ b/clang/lib/Basic/Targets/Lanai.h @@ -58,7 +58,7 @@ class LLVM_LIBRARY_VISIBILITY LanaiTargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; bool hasFeature(StringRef Feature) const override; diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h index bb1c2edacf103..16124a87d4955 100644 --- a/clang/lib/Basic/Targets/LoongArch.h +++ b/clang/lib/Basic/Targets/LoongArch.h @@ -65,7 +65,7 @@ class LLVM_LIBRARY_VISIBILITY LoongArchTargetInfo : public TargetInfo { BitIntMaxAlign = 128; } - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (!isValidCPUName(Name)) return false; CPU = Name; diff --git a/clang/lib/Basic/Targets/M68k.cpp b/clang/lib/Basic/Targets/M68k.cpp index 293cbe5ce9272..fde1e3953a6c0 100644 --- a/clang/lib/Basic/Targets/M68k.cpp +++ b/clang/lib/Basic/Targets/M68k.cpp @@ -35,9 +35,8 @@ M68kTargetInfo::M68kTargetInfo(const llvm::Triple &Triple, IntAlign = LongAlign = PointerAlign = 16; } -bool M68kTargetInfo::setCPU(const std::string &Name) { - StringRef N = Name; - CPU = llvm::StringSwitch(N) +bool M68kTargetInfo::setCPU(StringRef Name) { + CPU = llvm::StringSwitch(Name) .Case("generic", CK_68000) .Case("M68000", CK_68000) .Case("M68010", CK_68010) diff --git a/clang/lib/Basic/Targets/M68k.h b/clang/lib/Basic/Targets/M68k.h index 729d79ff77fbf..98add6a6221d5 100644 --- a/clang/lib/Basic/Targets/M68k.h +++ b/clang/lib/Basic/Targets/M68k.h @@ -54,7 +54,7 @@ class LLVM_LIBRARY_VISIBILITY M68kTargetInfo : public TargetInfo { std::optional handleAsmEscapedChar(char EscChar) const override; std::string_view getClobbers() const override; BuiltinVaListKind getBuiltinVaListKind() const override; - bool setCPU(const std::string &Name) override; + bool setCPU(StringRef Name) override; CallingConvCheckResult checkCallingConvention(CallingConv CC) const override; std::pair hardwareInterferenceSizes() const override { diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h index 4c09390c3c6d5..2f251c7eb8690 100644 --- a/clang/lib/Basic/Targets/Mips.h +++ b/clang/lib/Basic/Targets/Mips.h @@ -155,7 +155,7 @@ class LLVM_LIBRARY_VISIBILITY MipsTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { CPU = Name; return isValidCPUName(Name); } diff --git a/clang/lib/Basic/Targets/NVPTX.h b/clang/lib/Basic/Targets/NVPTX.h index 69ee20f38343b..8af9cb91c47ab 100644 --- a/clang/lib/Basic/Targets/NVPTX.h +++ b/clang/lib/Basic/Targets/NVPTX.h @@ -158,7 +158,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXTargetInfo : public TargetInfo { Values.emplace_back(OffloadArchToString(static_cast(i))); } - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { GPU = StringToOffloadArch(Name); return GPU != OffloadArch::Unknown; } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index a9f49aa3aebe1..e3bf5072d932d 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -98,7 +98,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const override; void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { bool CPUKnown = isValidCPUName(Name); if (CPUKnown) { CPU = Name; diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h index 705ee0694038b..193f77fe68a0b 100644 --- a/clang/lib/Basic/Targets/RISCV.h +++ b/clang/lib/Basic/Targets/RISCV.h @@ -51,7 +51,7 @@ class RISCVTargetInfo : public TargetInfo { HasStrictFP = true; } - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (!isValidCPUName(Name)) return false; CPU = Name; diff --git a/clang/lib/Basic/Targets/Sparc.h b/clang/lib/Basic/Targets/Sparc.h index d9a879ca4d5b3..95b69ff2be4bb 100644 --- a/clang/lib/Basic/Targets/Sparc.h +++ b/clang/lib/Basic/Targets/Sparc.h @@ -136,7 +136,7 @@ class LLVM_LIBRARY_VISIBILITY SparcTargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { CPU = getCPUKind(Name); return CPU != CK_GENERIC; } @@ -234,7 +234,7 @@ class LLVM_LIBRARY_VISIBILITY SparcV9TargetInfo : public SparcTargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { if (!SparcTargetInfo::setCPU(Name)) return false; return getCPUGeneration(CPU) == CG_V9; diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h index 00f7d7a055b24..70b529e8ca854 100644 --- a/clang/lib/Basic/Targets/SystemZ.h +++ b/clang/lib/Basic/Targets/SystemZ.h @@ -185,7 +185,7 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo { fillValidCPUList(Values); } - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { ISARevision = getISARevision(Name); return ISARevision != -1; } diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index fa0a2b9b505e0..2732cafb3906d 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -142,7 +142,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool isValidCPUName(StringRef Name) const final; void fillValidCPUList(SmallVectorImpl &Values) const final; - bool setCPU(const std::string &Name) final { return isValidCPUName(Name); } + bool setCPU(StringRef Name) final { return isValidCPUName(Name); } llvm::SmallVector getTargetBuiltins() const final; diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index c8c5d280754b4..f9c39b31f5e08 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -391,7 +391,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { void fillValidCPUList(SmallVectorImpl &Values) const override; void fillValidTuneCPUList(SmallVectorImpl &Values) const override; - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { bool Only64Bit = getTriple().getArch() != llvm::Triple::x86; CPU = llvm::X86::parseArchX86(Name, Only64Bit); return CPU != llvm::X86::CK_None; diff --git a/clang/lib/Basic/Targets/Xtensa.h b/clang/lib/Basic/Targets/Xtensa.h index 79783dddd4b64..fd76e38f797d2 100644 --- a/clang/lib/Basic/Targets/Xtensa.h +++ b/clang/lib/Basic/Targets/Xtensa.h @@ -100,7 +100,7 @@ class LLVM_LIBRARY_VISIBILITY XtensaTargetInfo : public TargetInfo { return llvm::StringSwitch(Name).Case("generic", true).Default(false); } - bool setCPU(const std::string &Name) override { + bool setCPU(StringRef Name) override { CPU = Name; return isValidCPUName(Name); } From 6aa38c89b579dd1e04132cf37e23e54b88936976 Mon Sep 17 00:00:00 2001 From: PushkarSingh <149073046+iitianpushkar@users.noreply.github.com> Date: Tue, 23 Jun 2026 14:41:00 +0530 Subject: [PATCH 139/511] [LifetimeSafety] Improve destroyed and invalidated diagnostic notes (#204900) ## Summary Improve Lifetime Safety diagnostic notes by identifying the affected storage in destruction and invalidation notes. Examples: ``` { int value; ptr = &value; } ``` Before: ``` note: destroyed here ``` After: ``` note: local variable 'value' is destroyed here ``` For temporaries: ``` note: temporary object is destroyed here ``` For invalidations: ``` note: local variable 'container' is invalidated here ``` For parameters: ``` note: parameter 'container' is invalidated here ``` For explicitly deallocated storage: ``` note: allocated object is freed here ``` ## Implementation The lifetime analysis already knows which object is responsible for a warning. This change reuses that information when producing the accompanying destruction, invalidation or deallocation note. All affected regression tests have been updated to check the complete diagnostic messages. Addresses #200234 --- .../clang/Basic/DiagnosticSemaKinds.td | 6 +- clang/lib/Sema/SemaLifetimeSafety.h | 75 ++-- .../LifetimeSafety/annotation-suggestions.cpp | 14 +- .../Sema/LifetimeSafety/invalidations.cpp | 148 +++---- clang/test/Sema/LifetimeSafety/safety-c.c | 19 +- clang/test/Sema/LifetimeSafety/safety.cpp | 377 +++++++++--------- 6 files changed, 316 insertions(+), 323 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index cb5f124c609ce..cde99dfb16ec5 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11056,9 +11056,9 @@ def warn_lifetime_safety_inapplicable_lifetimebound DefaultIgnore; def note_lifetime_safety_used_here : Note<"later used here">; -def note_lifetime_safety_invalidated_here : Note<"invalidated here">; -def note_lifetime_safety_destroyed_here : Note<"destroyed here">; -def note_lifetime_safety_freed_here : Note<"freed here">; +def note_lifetime_safety_invalidated_here : Note<"%0 is invalidated here">; +def note_lifetime_safety_destroyed_here : Note<"%0 is destroyed here">; +def note_lifetime_safety_freed_here : Note<"%0 is freed here">; def note_lifetime_safety_returned_here : Note<"returned here">; def note_lifetime_safety_moved_here : Note<"potentially moved here">; def note_lifetime_safety_dangling_field_here: Note<"this field dangles">; diff --git a/clang/lib/Sema/SemaLifetimeSafety.h b/clang/lib/Sema/SemaLifetimeSafety.h index 4bde272fb40a1..1047aecf863fb 100644 --- a/clang/lib/Sema/SemaLifetimeSafety.h +++ b/clang/lib/Sema/SemaLifetimeSafety.h @@ -87,13 +87,15 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { unsigned DiagID = MovedExpr ? diag::warn_lifetime_safety_use_after_scope_moved : diag::warn_lifetime_safety_use_after_scope; + std::string DestroyedSubject = getDiagSubjectDescription(IssueExpr); S.Diag(IssueExpr->getExprLoc(), DiagID) - << getDiagSubjectDescription(IssueExpr) << IssueExpr->getSourceRange(); + << DestroyedSubject << IssueExpr->getSourceRange(); if (MovedExpr) S.Diag(MovedExpr->getExprLoc(), diag::note_lifetime_safety_moved_here) << MovedExpr->getSourceRange(); - S.Diag(FreeLoc, diag::note_lifetime_safety_destroyed_here); + S.Diag(FreeLoc, diag::note_lifetime_safety_destroyed_here) + << DestroyedSubject; reportAliasingChain(ExprChain); @@ -167,13 +169,10 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { auto WarnDiag = isa(InvalidationExpr) ? diag::warn_lifetime_safety_use_after_free : diag::warn_lifetime_safety_invalidation; - auto UseDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(IssueExpr); S.Diag(IssueExpr->getExprLoc(), WarnDiag) - << getDiagSubjectDescription(IssueExpr) << IssueExpr->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), UseDiag) - << InvalidationExpr->getSourceRange(); + << InvalidatedSubject << IssueExpr->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); S.Diag(UseExpr->getExprLoc(), diag::note_lifetime_safety_used_here) << UseExpr->getSourceRange(); } @@ -183,14 +182,11 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { auto WarnDiag = isa(InvalidationExpr) ? diag::warn_lifetime_safety_use_after_free : diag::warn_lifetime_safety_invalidation; - auto UseDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(PVD); S.Diag(PVD->getSourceRange().getBegin(), WarnDiag) - << getDiagSubjectDescription(PVD) << PVD->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), UseDiag) - << InvalidationExpr->getSourceRange(); + << InvalidatedSubject << PVD->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); S.Diag(UseExpr->getExprLoc(), diag::note_lifetime_safety_used_here) << UseExpr->getSourceRange(); } @@ -198,16 +194,12 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { void reportInvalidatedField(const Expr *IssueExpr, const FieldDecl *DanglingField, const Expr *InvalidationExpr) override { - auto InvalidationDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(IssueExpr); S.Diag(IssueExpr->getExprLoc(), diag::warn_lifetime_safety_invalidated_field) - << getDiagSubjectDescription(IssueExpr) - << getDiagSubjectDescription(DanglingField) + << InvalidatedSubject << getDiagSubjectDescription(DanglingField) << IssueExpr->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), InvalidationDiag) - << InvalidationExpr->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); S.Diag(DanglingField->getLocation(), diag::note_lifetime_safety_dangling_field_here) << DanglingField->getEndLoc(); @@ -216,15 +208,12 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { void reportInvalidatedField(const ParmVarDecl *PVD, const FieldDecl *DanglingField, const Expr *InvalidationExpr) override { - auto InvalidationDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(PVD); S.Diag(PVD->getSourceRange().getBegin(), diag::warn_lifetime_safety_invalidated_field) - << getDiagSubjectDescription(PVD) - << getDiagSubjectDescription(DanglingField) << PVD->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), InvalidationDiag) - << InvalidationExpr->getSourceRange(); + << InvalidatedSubject << getDiagSubjectDescription(DanglingField) + << PVD->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); S.Diag(DanglingField->getLocation(), diag::note_lifetime_safety_dangling_field_here) << DanglingField->getEndLoc(); @@ -233,16 +222,12 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { void reportInvalidatedGlobal(const Expr *IssueExpr, const VarDecl *DanglingGlobal, const Expr *InvalidationExpr) override { - auto InvalidationDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(IssueExpr); S.Diag(IssueExpr->getExprLoc(), diag::warn_lifetime_safety_invalidated_global) - << getDiagSubjectDescription(IssueExpr) - << getDiagSubjectDescription(DanglingGlobal) + << InvalidatedSubject << getDiagSubjectDescription(DanglingGlobal) << IssueExpr->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), InvalidationDiag) - << InvalidationExpr->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); if (DanglingGlobal->isStaticLocal() || DanglingGlobal->isStaticDataMember()) S.Diag(DanglingGlobal->getLocation(), diag::note_lifetime_safety_dangling_static_here) @@ -256,15 +241,12 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { void reportInvalidatedGlobal(const ParmVarDecl *PVD, const VarDecl *DanglingGlobal, const Expr *InvalidationExpr) override { - auto InvalidationDiag = isa(InvalidationExpr) - ? diag::note_lifetime_safety_freed_here - : diag::note_lifetime_safety_invalidated_here; + std::string InvalidatedSubject = getDiagSubjectDescription(PVD); S.Diag(PVD->getSourceRange().getBegin(), diag::warn_lifetime_safety_invalidated_global) - << getDiagSubjectDescription(PVD) - << getDiagSubjectDescription(DanglingGlobal) << PVD->getSourceRange(); - S.Diag(InvalidationExpr->getExprLoc(), InvalidationDiag) - << InvalidationExpr->getSourceRange(); + << InvalidatedSubject << getDiagSubjectDescription(DanglingGlobal) + << PVD->getSourceRange(); + reportInvalidationSite(InvalidationExpr, InvalidatedSubject); if (DanglingGlobal->isStaticLocal() || DanglingGlobal->isStaticDataMember()) S.Diag(DanglingGlobal->getLocation(), diag::note_lifetime_safety_dangling_static_here) @@ -442,6 +424,15 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { } private: + void reportInvalidationSite(const Expr *InvalidationExpr, + StringRef InvalidatedSubject) { + auto Diag = isa(InvalidationExpr) + ? diag::note_lifetime_safety_freed_here + : diag::note_lifetime_safety_invalidated_here; + S.Diag(InvalidationExpr->getExprLoc(), Diag) + << InvalidatedSubject << InvalidationExpr->getSourceRange(); + } + std::string getLifetimeBoundFixItText(SourceLocation Loc, bool LeadingSpace, bool AllowGNUAttrMacro = true) { StringRef Spelling = S.getLangOpts().LifetimeSafetyLifetimeBoundMacro; diff --git a/clang/test/Sema/LifetimeSafety/annotation-suggestions.cpp b/clang/test/Sema/LifetimeSafety/annotation-suggestions.cpp index cef3397b57a6f..98c528516b8e7 100644 --- a/clang/test/Sema/LifetimeSafety/annotation-suggestions.cpp +++ b/clang/test/Sema/LifetimeSafety/annotation-suggestions.cpp @@ -277,21 +277,21 @@ View return_view_field(const ViewProvider& v) { // expected-warning {{paramet void test_get_on_temporary_pointer() { const ReturnsSelf* s_ref = &ReturnsSelf().get(); // expected-warning {{temporary object does not live long enough}}. - // expected-note@-1 {{destroyed here}} + // expected-note@-1 {{temporary object is destroyed here}} // expected-note@-2 {{result of call to 'get' aliases the storage of temporary object}} (void)s_ref; // expected-note {{later used here}} } void test_get_on_temporary_ref() { const ReturnsSelf& s_ref = ReturnsSelf().get(); // expected-warning {{temporary object does not live long enough}}. - // expected-note@-1 {{destroyed here}} + // expected-note@-1 {{temporary object is destroyed here}} // expected-note@-2 {{result of call to 'get' aliases the storage of temporary object}} (void)s_ref; // expected-note {{later used here}} } void test_getView_on_temporary() { View sv = ViewProvider{1}.getView(); // expected-warning {{temporary object does not live long enough}}. - // expected-note@-1 {{destroyed here}} + // expected-note@-1 {{temporary object is destroyed here}} // expected-note@-2 {{result of call to 'getView' aliases the storage of temporary object}} (void)sv; // expected-note {{later used here}} } @@ -604,7 +604,7 @@ void uaf_via_inferred_lifetimebound() { int local; f = return_lambda_capturing_param(local); // expected-warning {{local variable 'local' does not live long enough}} \ // expected-note {{result of call to 'return_lambda_capturing_param' aliases the storage of local variable 'local'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)f; // expected-note {{later used here}} } @@ -628,7 +628,7 @@ void test_inference() { MyObj obj; ptr = create_target(obj); // expected-warning {{local variable 'obj' does not live long enough}} \ // expected-note {{result of call to 'create_target' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)ptr; // expected-note {{later used here}} } } // namespace make_unique_suggestion @@ -636,12 +636,12 @@ void test_inference() { namespace new_allocation_suggestion { View* MakeView(const MyObj& in) { // expected-warning {{parameter in intra-TU function should be marked [[clang::lifetimebound]]}} - return new View(in); // expected-note {{param returned here}} {{destroyed here}} + return new View(in); // expected-note {{param returned here}} } void test_new_allocation() { View* v = MakeView(MyObj{}); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'MakeView' aliases the storage of temporary object}} (void)v; // expected-note {{later used here}} } diff --git a/clang/test/Sema/LifetimeSafety/invalidations.cpp b/clang/test/Sema/LifetimeSafety/invalidations.cpp index 301822f066de8..c2ac105855d07 100644 --- a/clang/test/Sema/LifetimeSafety/invalidations.cpp +++ b/clang/test/Sema/LifetimeSafety/invalidations.cpp @@ -8,7 +8,7 @@ namespace SimpleResize { void IteratorInvalidAfterResize(int new_size) { std::vector v; auto it = std::begin(v); // expected-warning {{local variable 'v' is later invalidated}} - v.resize(new_size); // expected-note {{invalidated here}} + v.resize(new_size); // expected-note {{local variable 'v' is invalidated here}} *it; // expected-note {{later used here}} } @@ -53,7 +53,7 @@ void InvalidateBeforeSwapIterators(std::vector v1, std::vector v2) { if (it1 == std::end(v1) || it2 == std::end(v2)) return; *it1 = 0; // ok *it2 = 0; // ok - v1.clear(); // expected-note {{invalidated here}} + v1.clear(); // expected-note {{parameter 'v1' is invalidated here}} *it1 = 0; // expected-note {{later used here}} // FIXME: Handle invalidating functions like std::swap. std::swap(it1, it2); @@ -67,7 +67,7 @@ void InvalidateBeforeSwapContainers(std::vector v1, std::vector v2) { if (it1 == std::end(v1) || it2 == std::end(v2)) return; *it1 = 0; // ok *it2 = 0; // ok - v1.clear(); // expected-note {{invalidated here}} + v1.clear(); // expected-note {{parameter 'v1' is invalidated here}} *it1 = 0; // expected-note {{later used here}} } } // namespace InvalidateBeforeSwap @@ -81,7 +81,7 @@ void SameConditionInvalidatesThenValidatesIterator() { if (it == container.end()) return; const bool a = A(); if (a) { - container.clear(); // expected-note {{invalidated here}} + container.clear(); // expected-note {{local variable 'container' is invalidated here}} } if (a) { it = container.begin(); @@ -112,7 +112,7 @@ void MergeWithDifferentContainerValuesInvalidated() { } else { it = std::find(v3.begin(), v3.end(), 10); } - v2.clear(); // expected-note {{invalidated here}} + v2.clear(); // expected-note {{local variable 'v2' is invalidated here}} *it = 20; // expected-note {{later used here}} } } // namespace IteratorWithMultipleContainers @@ -123,7 +123,7 @@ void IteratorInvalidationInAForLoop(std::vector v) { it != std::end(v); ++it) { // expected-note {{later used here}} if (Bool()) { - v.erase(it); // expected-note {{invalidated here}} + v.erase(it); // expected-note {{parameter 'v' is invalidated here}} } } } @@ -132,7 +132,7 @@ void IteratorInvalidationInAWhileLoop(std::vector v) { auto it = std::begin(v); // expected-warning {{parameter 'v' is later invalidated}} while (it != std::end(v)) { if (Bool()) { - v.erase(it); // expected-note {{invalidated here}} + v.erase(it); // expected-note {{parameter 'v' is invalidated here}} } ++it; // expected-note {{later used here}} } @@ -157,7 +157,7 @@ void IteratorInvalidationInAForeachLoop(std::vector v) { for (int& x : v) { // expected-warning {{parameter 'v' is later invalidated}} \ // expected-note {{later used here}} if (x % 2 == 0) { - v.erase(std::find(v.begin(), v.end(), 1)); // expected-note {{invalidated here}} + v.erase(std::find(v.begin(), v.end(), 1)); // expected-note {{parameter 'v' is invalidated here}} } } } @@ -185,7 +185,7 @@ void IteratorCheckedAfterFind(std::vector v) { void IteratorCheckedAfterFindThenErased(std::vector v) { auto it = std::find(std::begin(v), std::end(v), 3); // expected-warning {{parameter 'v' is later invalidated}} if (it != std::end(v)) { - v.erase(it); // expected-note {{invalidated here}} + v.erase(it); // expected-note {{parameter 'v' is invalidated here}} } *it; // expected-note {{later used here}} } @@ -202,7 +202,7 @@ void UseReturnedIteratorAfterInsert(std::vector v) { void UseInvalidIteratorAfterInsert(std::vector v) { auto it = std::begin(v); // expected-warning {{parameter 'v' is later invalidated}} - v.insert(it, 10); // expected-note {{invalidated here}} + v.insert(it, 10); // expected-note {{parameter 'v' is invalidated here}} if (it != std::end(v)) { // expected-note {{later used here}} *it; } @@ -221,7 +221,7 @@ void IteratorValidAfterInsert(std::vector v) { void IteratorInvalidAfterInsert(std::vector v, int value) { auto it = std::begin(v); // expected-warning {{parameter 'v' is later invalidated}} - v.insert(it, 0); // expected-note {{invalidated here}} + v.insert(it, 0); // expected-note {{parameter 'v' is invalidated here}} *it; // expected-note {{later used here}} } } // namespace SimpleStdInsert @@ -231,7 +231,7 @@ void IteratorUsedAfterErase(std::vector v) { auto it = std::begin(v); // expected-warning {{parameter 'v' is later invalidated}} for (; it != std::end(v); ++it) { // expected-note {{later used here}} if (*it > 3) { - v.erase(it); // expected-note {{invalidated here}} + v.erase(it); // expected-note {{parameter 'v' is invalidated here}} } } } @@ -239,7 +239,7 @@ void IteratorUsedAfterErase(std::vector v) { void IteratorUsedAfterPushBackParam(std::vector& v) { // expected-warning {{parameter 'v' is later invalidated}} auto it = std::begin(v); if (it != std::end(v) && *it == 3) { - v.push_back(4); // expected-note {{invalidated here}} + v.push_back(4); // expected-note {{parameter 'v' is invalidated here}} } ++it; // expected-note {{later used here}} } @@ -247,7 +247,7 @@ void IteratorUsedAfterPushBackParam(std::vector& v) { // expected-warning { void IteratorUsedAfterPushBack(std::vector v) { auto it = std::begin(v); // expected-warning {{parameter 'v' is later invalidated}} if (it != std::end(v) && *it == 3) { - v.push_back(4); // expected-note {{invalidated here}} + v.push_back(4); // expected-note {{parameter 'v' is invalidated here}} } ++it; // expected-note {{later used here}} } @@ -256,14 +256,14 @@ void IteratorUsedAfterPreIncrement() { std::vector v; auto it = v.begin(); // expected-warning {{local variable 'v' is later invalidated}} auto next = ++it; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{local variable 'v' is invalidated here}} (void)*next; // expected-note {{later used here}} } void IteratorUsedAfterPostDecrement(std::vector v) { auto it = v.rbegin(); // expected-warning {{parameter 'v' is later invalidated}} auto prev = it--; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{parameter 'v' is invalidated here}} (void)*prev; // expected-note {{later used here}} } @@ -271,21 +271,21 @@ void IteratorUsedAfterAddition() { std::vector v; auto it = v.cbegin(); // expected-warning {{local variable 'v' is later invalidated}} auto next = it + 5; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{local variable 'v' is invalidated here}} (void)*next; // expected-note {{later used here}} } void IteratorUsedAfterReverseSubtraction(std::vector v) { auto it = v.crbegin(); // expected-warning {{parameter 'v' is later invalidated}} auto prev = 5 - it; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{parameter 'v' is invalidated here}} (void)*prev; // expected-note {{later used here}} } void IteratorUsedAfterAddAdd(std::vector v) { auto it = v.cbegin(); // expected-warning {{parameter 'v' is later invalidated}} auto next = (it + 5) + 5; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{parameter 'v' is invalidated here}} (void)*next; // expected-note {{later used here}} } @@ -293,21 +293,21 @@ void IteratorUsedAfterMixedAddition() { std::vector v; auto it = v.cbegin(); // expected-warning {{local variable 'v' is later invalidated}} auto next = 1 + it + 2 + 3; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{local variable 'v' is invalidated here}} (void)*next; // expected-note {{later used here}} } void IteratorUsedAfterPreIncrementAddAssign(std::vector v) { auto it = v.begin(); // expected-warning {{parameter 'v' is later invalidated}} it = ++it + 1 + 2; - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{parameter 'v' is invalidated here}} (void)*it; // expected-note {{later used here}} } void IteratorUsedAfterBeginAddAssign() { std::vector v; auto it = v.begin() + 1; // expected-warning {{local variable 'v' is later invalidated}} - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{local variable 'v' is invalidated here}} (void)*it; // expected-note {{later used here}} } @@ -315,7 +315,7 @@ void IteratorUsedAfterStdBeginAddAssign() { std::vector v; std::vector::iterator it; it = std::begin(v) + 1; // expected-warning {{local variable 'v' is later invalidated}} - v.push_back(1); // expected-note {{invalidated here}} + v.push_back(1); // expected-note {{local variable 'v' is invalidated here}} (void)*it; // expected-note {{later used here}} } } // namespace SimpleInvalidIterators @@ -325,13 +325,13 @@ void IteratorInvalidatedThroughLocalReferenceAlias() { std::vector vv; std::vector &v = vv; auto it = vv.begin(); // expected-warning {{local variable 'vv' is later invalidated}} - v.push_back(42); // expected-note {{invalidated here}} + v.push_back(42); // expected-note {{local variable 'vv' is invalidated here}} (void)it; // expected-note {{later used here}} } void IteratorInvalidatedThroughPointerParameter(std::vector *v) { // expected-warning {{parameter 'v' is later invalidated}} auto it = v->begin(); - v->push_back(42); // expected-note {{invalidated here}} + v->push_back(42); // expected-note {{parameter 'v' is invalidated here}} (void)it; // expected-note {{later used here}} } @@ -348,7 +348,7 @@ void ParenthesizedContainerInvalidatesIterator() { namespace ContainerObjectAliases { // FIXME: Distinguish owner-borrow from content-borrow. void PointerParameterObjectUseIsOk(std::vector *v) { // expected-warning {{parameter 'v' is later invalidated}} - v->push_back(42); // expected-note {{invalidated here}} + v->push_back(42); // expected-note {{parameter 'v' is invalidated here}} (void)v; // expected-note {{later used here}} } @@ -356,7 +356,7 @@ void PointerParameterObjectUseIsOk(std::vector *v) { // expected-warning {{ void LocalPointerAliasObjectUseIsOk() { std::vector vv; std::vector *v = &vv; // expected-warning {{local variable 'vv' is later invalidated}} - v->push_back(42); // expected-note {{invalidated here}} + v->push_back(42); // expected-note {{local variable 'vv' is invalidated here}} (void)*v; // expected-note {{later used here}} } @@ -364,7 +364,7 @@ void LocalPointerAliasObjectUseIsOk() { void LocalReferenceAliasObjectUseIsOk() { std::vector vv; std::vector &v = vv; // expected-warning {{local variable 'vv' is later invalidated}} - v.push_back(42); // expected-note {{invalidated here}} + v.push_back(42); // expected-note {{local variable 'vv' is invalidated here}} (void)v; // expected-note {{later used here}} } } // namespace ContainerObjectAliases @@ -375,7 +375,7 @@ namespace ElementReferences { void ReferenceToVectorElement() { std::vector v = {1, 2, 3}; int& ref = v[0]; // expected-warning {{local variable 'v' is later invalidated}} - v.push_back(4); // expected-note {{invalidated here}} + v.push_back(4); // expected-note {{local variable 'v' is invalidated here}} ref = 10; // expected-note {{later used here}} (void)ref; } @@ -383,14 +383,14 @@ void ReferenceToVectorElement() { void PointerRefToVectorElement() { std::vector v = {nullptr, nullptr}; int*& ref = v[0]; // expected-warning {{local variable 'v' is later invalidated}} - v.push_back(nullptr); // expected-note {{invalidated here}} + v.push_back(nullptr); // expected-note {{local variable 'v' is invalidated here}} ref = nullptr; // expected-note {{later used here}} } void PointerToVectorElement() { std::vector v = {1, 2, 3}; int* ptr = &v[0]; // expected-warning {{local variable 'v' is later invalidated}} - v.resize(100); // expected-note {{invalidated here}} + v.resize(100); // expected-note {{local variable 'v' is invalidated here}} *ptr = 10; // expected-note {{later used here}} } @@ -403,7 +403,7 @@ void SelfInvalidatingMap() { // On the other hand, std::flat_map (since C++23) does not provide pointer stability on // insertion and following is unsafe for this container. mp[1] = "42"; - mp[2] // expected-note {{invalidated here}} + mp[2] // expected-note {{local variable 'mp' is invalidated here}} = mp[1]; // expected-warning {{local variable 'mp' is later invalidated}} expected-note {{later used here}} } @@ -412,7 +412,7 @@ void InvalidateErase() { std::flat_map mp; // None of these containers provide iterator stability. So following is unsafe: auto it = mp.find(3); // expected-warning {{local variable 'mp' is later invalidated}} - mp.erase(mp.find(4)); // expected-note {{invalidated here}} + mp.erase(mp.find(4)); // expected-note {{local variable 'mp' is invalidated here}} if (it != mp.end()) // expected-note {{later used here}} *it; } @@ -422,12 +422,12 @@ namespace Strings { void append(std::string str) { std::string_view view = str; // expected-warning {{parameter 'str' is later invalidated}} - str += "456"; // expected-note {{invalidated here}} + str += "456"; // expected-note {{parameter 'str' is invalidated here}} (void)view; // expected-note {{later used here}} } void reassign(std::string str, std::string str2) { std::string_view view = str; // expected-warning {{parameter 'str' is later invalidated}} - str = str2; // expected-note {{invalidated here}} + str = str2; // expected-note {{parameter 'str' is invalidated here}} (void)view; // expected-note {{later used here}} } } // namespace Strings @@ -437,7 +437,7 @@ void ReassigningAfterMove(std::string str, std::string str2) { std::string_view view = str; // expected-warning {{parameter 'str' is later invalidated}} std::vector someStorage; someStorage.push_back(std::move(str)); - str = str2; // expected-note {{invalidated here}} + str = str2; // expected-note {{parameter 'str' is invalidated here}} (void)view; // expected-note {{later used here}} } @@ -480,7 +480,7 @@ void Invalidate1Use2ViaRefIsOk() { S s; auto it = s.strings2.begin(); // expected-warning {{local variable 's' is later invalidated}} auto& strings1 = s.strings1; - strings1.push_back("1"); // expected-note {{invalidated here}} + strings1.push_back("1"); // expected-note {{local variable 's' is invalidated here}} *it; // expected-note {{later used here}} } void Invalidate1UseSIsOk() { @@ -493,14 +493,14 @@ void Invalidate1UseSIsOk() { void PointerToContainerIsOk() { std::vector s; std::vector* p = &s; // expected-warning {{local variable 's' is later invalidated}} - p->push_back("1"); // expected-note {{invalidated here}} + p->push_back("1"); // expected-note {{local variable 's' is invalidated here}} (void)*p; // expected-note {{later used here}} } void IteratorFromPointerToContainerIsInvalidated() { std::vector s; std::vector* p = &s; // expected-warning {{local variable 's' is later invalidated}} auto it = p->begin(); - p->push_back("1"); // expected-note {{invalidated here}} + p->push_back("1"); // expected-note {{local variable 's' is invalidated here}} *it; // expected-note {{later used here}} } // FIXME: Distinguish invalidating an element's contents from invalidating @@ -508,7 +508,7 @@ void IteratorFromPointerToContainerIsInvalidated() { void ChangingRegionOwnedByContainerIsOk() { std::vector subdirs; for (std::string& path : subdirs) // expected-warning {{local variable 'subdirs' is later invalidated}} expected-note {{later used here}} - path = std::string(); // expected-note {{invalidated here}} + path = std::string(); // expected-note {{local variable 'subdirs' is invalidated here}} } } // namespace ContainersAsFields @@ -522,7 +522,7 @@ struct SinkOwnerBorrow { SinkOwnerBorrow(std::string *dest, int n) : dest_(dest) { // expected-warning {{parameter 'dest' escapes to the field 'dest_' and is later invalidated}} if (n > 0) - dest->clear(); // expected-note {{invalidated here}} + dest->clear(); // expected-note {{parameter 'dest' is invalidated here}} } }; @@ -531,7 +531,7 @@ struct SinkInteriorBorrow { SinkInteriorBorrow(std::string *dest, int n) : dest_(dest->data()) { // expected-warning {{parameter 'dest' escapes to the field 'dest_' and is later invalidated}} if (n > 0) - dest->clear(); // expected-note {{invalidated here}} + dest->clear(); // expected-note {{parameter 'dest' is invalidated here}} } }; @@ -548,39 +548,39 @@ struct S { void InvalidatedFieldLocalVector() { std::vector strings; FieldFromLocalVector = *strings.begin(); // expected-warning {{local variable 'strings' escapes to the field 'FieldFromLocalVector' and is later invalidated}} - strings.push_back("1"); // expected-note {{invalidated here}} + strings.push_back("1"); // expected-note {{local variable 'strings' is invalidated here}} } void InvalidatedFieldByValueParamVector(std::vector strings) { FieldFromByValueParamVector = *strings.begin(); // expected-warning {{parameter 'strings' escapes to the field 'FieldFromByValueParamVector' and is later invalidated}} - strings.push_back("1"); // expected-note {{invalidated here}} + strings.push_back("1"); // expected-note {{parameter 'strings' is invalidated here}} } void InvalidatedFieldLocalString() { std::string s; FieldFromLocalString = s; // expected-warning {{local variable 's' escapes to the field 'FieldFromLocalString' and is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{local variable 's' is invalidated here}} } void InvalidatedFieldByValueParamString(std::string s) { FieldFromByValueParamString = s; // expected-warning {{parameter 's' escapes to the field 'FieldFromByValueParamString' and is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{parameter 's' is invalidated here}} } void InvalidatedFieldRefParamString(std::string &s) { // expected-warning {{parameter 's' escapes to the field 'FieldFromRefParamString' and is later invalidated}} FieldFromRefParamString = s; - s.~basic_string(); // expected-note {{invalidated here}} + s.~basic_string(); // expected-note {{parameter 's' is invalidated here}} } void InvalidatedFieldDelete() { int *p = new int; // expected-warning {{allocated object escapes to the field 'FieldFromNew' and is later invalidated}} FieldFromNew = p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} } void InvalidatedFieldDeleteParam(int *p) { // expected-warning {{parameter 'p' escapes to the field 'FieldFromPointerParam' and is later invalidated}} FieldFromPointerParam = p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{parameter 'p' is freed here}} } void FieldReassignedBeforeInvalidation() { @@ -608,41 +608,41 @@ struct S { void InvalidatedGlobalLocalVector() { std::vector strings; GlobalFromLocalVector = *strings.begin(); // expected-warning {{local variable 'strings' escapes to the global variable 'GlobalFromLocalVector' and is later invalidated}} - strings.push_back("1"); // expected-note {{invalidated here}} + strings.push_back("1"); // expected-note {{local variable 'strings' is invalidated here}} } void InvalidatedGlobalByValueParamString(std::string s) { GlobalFromByValueParamString = s; // expected-warning {{parameter 's' escapes to the global variable 'GlobalFromByValueParamString' and is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{parameter 's' is invalidated here}} } void InvalidatedGlobalRefParamString(std::string &s) { // expected-warning {{parameter 's' escapes to the global variable 'GlobalFromRefParamString' and is later invalidated}} GlobalFromRefParamString = s; - s.~basic_string(); // expected-note {{invalidated here}} + s.~basic_string(); // expected-note {{parameter 's' is invalidated here}} } void InvalidatedGlobalDelete() { int *p = new int; // expected-warning {{allocated object escapes to the global variable 'GlobalFromNew' and is later invalidated}} GlobalFromNew = p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} } void InvalidatedGlobalDeleteParam(int *p) { // expected-warning {{parameter 'p' escapes to the global variable 'GlobalFromPointerParam' and is later invalidated}} GlobalFromPointerParam = p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{parameter 'p' is freed here}} } void InvalidatedStaticLocalString() { static std::string_view StaticFromLocalString; // expected-note {{this static storage dangles}} std::string s; StaticFromLocalString = s; // expected-warning {{local variable 's' escapes to the static variable 'StaticFromLocalString' and is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{local variable 's' is invalidated here}} } void InvalidatedStaticMemberString() { std::string s; S::StaticMember = s; // expected-warning {{local variable 's' escapes to the static variable 'StaticMember' and is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{local variable 's' is invalidated here}} } void GlobalReassignedBeforeInvalidation() { @@ -715,14 +715,14 @@ void SetExtractDoesNotInvalidateOthers() { void SetClearInvalidates() { std::set s; auto it = s.begin(); // expected-warning {{local variable 's' is later invalidated}} - s.clear(); // expected-note {{invalidated here}} + s.clear(); // expected-note {{local variable 's' is invalidated here}} *it; // expected-note {{later used here}} } void MapClearInvalidates() { std::map m; auto it = m.begin(); // expected-warning {{local variable 'm' is later invalidated}} - m.clear(); // expected-note {{invalidated here}} + m.clear(); // expected-note {{local variable 'm' is invalidated here}} *it; // expected-note {{later used here}} } @@ -741,7 +741,7 @@ void MapSubscriptMultipleCallsDoesNotInvalidate(std::map mp, int a, in void FlatMapSubscriptMultipleCallsInvalidate(std::flat_map mp, int a, int b) { PrintMax(mp[a], mp[b]); // expected-warning {{parameter 'mp' is later invalidated}} \ - // expected-note {{invalidated here}} \ + // expected-note {{parameter 'mp' is invalidated here}} \ // expected-note {{later used here}} } @@ -752,7 +752,7 @@ void captured_view_invalidated_by_owner() { std::string s = "42"; std::string_view p = s; // expected-warning {{local variable 's' is later invalidated}} auto lambda = [=]() { return p; }; - s.push_back('c'); // expected-note {{invalidated here}} + s.push_back('c'); // expected-note {{local variable 's' is invalidated here}} lambda(); // expected-note {{later used here}} } @@ -760,7 +760,7 @@ void multiple_captures_one_invalidated() { std::string s1 = "a", s2 = "b"; std::string_view p1 = s1, p2 = s2; // expected-warning {{local variable 's1' is later invalidated}} auto lambda = [=]() { return p1.size() + p2.size(); }; - s1.clear(); // expected-note {{invalidated here}} + s1.clear(); // expected-note {{local variable 's1' is invalidated here}} lambda(); // expected-note {{later used here}} } @@ -794,7 +794,7 @@ struct S { void baz(){ std::vector vec = {"42"}; v = vec[0]; // expected-warning {{local variable 'vec' is later invalidated}} - vec.push_back("1"); // expected-note {{invalidated here}} + vec.push_back("1"); // expected-note {{local variable 'vec' is invalidated here}} bar(); // expected-note {{later used here}} v = nullptr; } @@ -807,7 +807,7 @@ void function_captured_ref_invalidated() { std::vector v; v.push_back(1); std::function f = [&r = v[0]]() { (void)r; }; // expected-warning {{local variable 'v' is later invalidated}} - v.push_back(2); // expected-note {{invalidated here}} + v.push_back(2); // expected-note {{local variable 'v' is invalidated here}} (void)f; // expected-note {{later used here}} } @@ -819,7 +819,7 @@ namespace explicit_destructor { void explicit_destructor_invalidates_pointer() { std::string s = "42"; const char *p = s.data(); // expected-warning {{local variable 's' is later invalidated}} - s.~basic_string(); // expected-note {{invalidated here}} + s.~basic_string(); // expected-note {{local variable 's' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -827,7 +827,7 @@ void pointer_destructor_invalidates_pointer() { char storage[sizeof(std::string)]; std::string *obj = new (storage) std::string("42"); // expected-warning {{local variable 'storage' is later invalidated}} const char *p = obj->data(); - obj->~basic_string(); // expected-note {{invalidated here}} + obj->~basic_string(); // expected-note {{local variable 'storage' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -835,7 +835,7 @@ void destroy_at_invalidates_pointer() { char storage[sizeof(std::string)]; std::string *obj = new (storage) std::string("42"); // expected-warning {{local variable 'storage' is later invalidated}} const char *p = obj->data(); - std::destroy_at(obj); // expected-note {{invalidated here}} + std::destroy_at(obj); // expected-note {{local variable 'storage' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -853,7 +853,7 @@ void destroy_at_invalidates_array_pointer() { std::string arr[1] = {"42"}; std::string (&arr_ref)[1] = arr; const char *p = arr[0].data(); // expected-warning {{local variable 'arr' is later invalidated}} - std::destroy_at(&arr_ref); // expected-note {{invalidated here}} + std::destroy_at(&arr_ref); // expected-note {{local variable 'arr' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -861,7 +861,7 @@ void reference_destructor_invalidates_pointer() { std::string s = "42"; std::string &ref = s; // expected-warning {{local variable 's' is later invalidated}} const char *p = ref.data(); - std::destroy_at(&ref); // expected-note {{invalidated here}} + std::destroy_at(&ref); // expected-note {{local variable 's' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -869,7 +869,7 @@ void destroy_at_ternary_operator(bool flag) { std::string* str1 = new std::string; // expected-warning {{allocated object is later invalidated}} std::string* str2 = new std::string; const char *p = str1->data(); - std::destroy_at(flag ? str1 : str2); // expected-note {{invalidated here}} + std::destroy_at(flag ? str1 : str2); // expected-note {{allocated object is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -881,7 +881,7 @@ struct StringOwner { void member_destructor_invalidates_pointer() { StringOwner owner = {"42", "43"}; const char *p = owner.s.data(); // expected-warning {{local variable 'owner' is later invalidated}} - owner.t.~basic_string(); // expected-note {{invalidated here}} + owner.t.~basic_string(); // expected-note {{local variable 'owner' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -892,7 +892,7 @@ namespace unique_ptr_invalidation { void invalid_after_reset() { std::unique_ptr up(new int); int *p = up.get(); // expected-warning {{local variable 'up' is later invalidated}} - up.reset(); // expected-note {{invalidated here}} + up.reset(); // expected-note {{local variable 'up' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -900,14 +900,14 @@ void invalid_after_move_assign() { std::unique_ptr up(new int); std::unique_ptr other(new int); int *p = up.get(); // expected-warning {{local variable 'up' is later invalidated}} - up = std::move(other); // expected-note {{invalidated here}} + up = std::move(other); // expected-note {{local variable 'up' is invalidated here}} (void)*p; // expected-note {{later used here}} } void invalid_after_null_assign() { std::unique_ptr up(new int); int *p = up.get(); // expected-warning {{local variable 'up' is later invalidated}} - up = nullptr; // expected-note {{invalidated here}} + up = nullptr; // expected-note {{local variable 'up' is invalidated here}} (void)*p; // expected-note {{later used here}} } @@ -915,7 +915,7 @@ void invalid_after_ternary_reset(bool flag) { std::unique_ptr up(new int); std::unique_ptr other(new int); int *p = flag ? up.get() : other.get(); // expected-warning {{local variable 'up' is later invalidated}} - up.reset(); // expected-note {{invalidated here}} + up.reset(); // expected-note {{local variable 'up' is invalidated here}} (void)*p; // expected-note {{later used here}} } diff --git a/clang/test/Sema/LifetimeSafety/safety-c.c b/clang/test/Sema/LifetimeSafety/safety-c.c index 9ab2a57cb08a9..e9443899c9935 100644 --- a/clang/test/Sema/LifetimeSafety/safety-c.c +++ b/clang/test/Sema/LifetimeSafety/safety-c.c @@ -16,7 +16,7 @@ void simple_case(void) { { int i; p = &i; // expected-warning {{local variable 'i' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'i' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -25,7 +25,7 @@ void chained_assignment(void) { { int i; p = q = r = &i; // expected-warning {{local variable 'i' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'i' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -35,7 +35,7 @@ void conditional_branch(int cond) { if (cond) { int i; p = &i; // expected-warning {{local variable 'i' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'i' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -46,7 +46,7 @@ void loop_with_break(int cond) { if (cond) { int i; p = &i; // expected-warning {{local variable 'i' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'i' is destroyed here}} } } (void)*p; // expected-note {{later used here}} @@ -64,7 +64,7 @@ void lifetimebound_call(void) { int i; p = identity(&i); // expected-warning {{local variable 'i' does not live long enough}} \ // expected-note {{result of call to 'identity' aliases the storage of local variable 'i'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'i' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -85,7 +85,7 @@ void struct_address_of_field(void) { { struct IntField holder; p = &holder.field; // expected-warning {{local variable 'holder' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'holder' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -95,7 +95,8 @@ void conditional_operator_lifetimebound(int cond) { int a, b; p = identity(cond ? &a // expected-warning {{local variable 'a' does not live long enough}} : &b); // expected-warning {{local variable 'b' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} \ + // expected-note {{local variable 'b' is destroyed here}} (void)*p; // expected-note 2 {{later used here}} } @@ -109,7 +110,7 @@ void union_member(void) { { union IntOrPtr u; p = &u.i; // expected-warning {{local variable 'u' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'u' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -125,7 +126,7 @@ void anonymous_union_member(void) { { struct AnonymousUnion u; p = &u.i; // expected-warning {{local variable 'u' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'u' is destroyed here}} (void)*p; // expected-note {{later used here}} } diff --git a/clang/test/Sema/LifetimeSafety/safety.cpp b/clang/test/Sema/LifetimeSafety/safety.cpp index 65bfe69e854ac..abd3d9c61b784 100644 --- a/clang/test/Sema/LifetimeSafety/safety.cpp +++ b/clang/test/Sema/LifetimeSafety/safety.cpp @@ -55,7 +55,7 @@ void simple_case() { { MyObj s; p = &s; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -64,7 +64,7 @@ void simple_case_gsl() { { MyObj s; v = s; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -93,7 +93,7 @@ void pointer_chain() { MyObj s; p = &s; // expected-warning {{does not live long enough}} q = p; // expected-note {{local variable 'p' aliases the storage of local variable 's'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*q; // expected-note {{later used here}} } @@ -103,7 +103,7 @@ void propagation_gsl() { MyObj s; v1 = s; // expected-warning {{local variable 's' does not live long enough}} v2 = v1; // expected-note {{local variable 'v1' aliases the storage of local variable 's'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} v2.use(); // expected-note {{later used here}} } @@ -112,7 +112,7 @@ void multiple_uses_one_warning() { { MyObj s; p = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} // No second warning for the same loan. p->id = 1; @@ -127,7 +127,7 @@ void multiple_pointers() { p = &s; // expected-warning {{does not live long enough}} q = &s; // expected-warning {{does not live long enough}} r = &s; // expected-warning {{does not live long enough}} - } // expected-note 3 {{destroyed here}} + } // expected-note 3 {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} (void)*q; // expected-note {{later used here}} (void)*r; // expected-note {{later used here}} @@ -139,7 +139,7 @@ void multiple_pointers_chained() { MyObj s; MyObj* obj1, *obj2; p = obj1 = obj2 = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -158,11 +158,11 @@ void single_pointer_multiple_loans(bool cond) { if (cond){ MyObj s; p = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} else { MyObj t; p = &t; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 't' is destroyed here}} (void)*p; // expected-note 2 {{later used here}} } @@ -171,11 +171,11 @@ void single_pointer_multiple_loans_gsl(bool cond) { if (cond){ MyObj s; v = s; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} else { MyObj t; v = t; // expected-warning {{local variable 't' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 't' is destroyed here}} v.use(); // expected-note 2 {{later used here}} } @@ -185,7 +185,7 @@ void if_branch(bool cond) { if (cond) { MyObj temp; p = &temp; // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -195,7 +195,7 @@ void if_branch_potential(bool cond) { if (cond) { MyObj temp; p = &temp; // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} if (!cond) (void)*p; // expected-note {{later used here}} else @@ -208,7 +208,7 @@ void if_branch_gsl(bool cond) { if (cond) { MyObj temp; v = temp; // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -223,7 +223,7 @@ void potential_together(bool cond) { p_definite = &s; // expected-warning {{does not live long enough}} if (cond) p_maybe = &s; // expected-warning {{does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note 2 {{local variable 's' is destroyed here}} (void)*p_definite; // expected-note {{later used here}} if (!cond) (void)*p_maybe; // expected-note {{later used here}} @@ -237,7 +237,7 @@ void overrides_potential(bool cond) { MyObj s; q = &s; // expected-warning {{does not live long enough}} p = q; - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} if (cond) { // 'q' is conditionally "rescued". 'p' is not. @@ -256,7 +256,7 @@ void due_to_conditional_killing(bool cond) { { MyObj s; q = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} if (cond) { // 'q' is conditionally "rescued". 'p' is not. q = &safe; @@ -269,7 +269,7 @@ void for_loop_use_after_loop_body(MyObj safe) { for (int i = 0; i < 1; ++i) { MyObj s; p = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -289,7 +289,7 @@ void for_loop_gsl() { for (int i = 0; i < 1; ++i) { MyObj s; v = s; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -300,7 +300,7 @@ void for_loop_use_before_loop_body(MyObj safe) { (void)*p; // expected-note {{later used here}} MyObj s; p = &s; // expected-warning {{does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; } @@ -311,7 +311,7 @@ void loop_with_break(bool cond) { if (cond) { MyObj temp; p = &temp; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp' is destroyed here}} } } (void)*p; // expected-note {{later used here}} @@ -324,7 +324,7 @@ void loop_with_break_gsl(bool cond) { if (cond) { MyObj temp; v = temp; // expected-warning {{local variable 'temp' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp' is destroyed here}} } } v.use(); // expected-note {{later used here}} @@ -338,7 +338,7 @@ void multiple_expiry_of_same_loan(bool cond) { MyObj unsafe; if (cond) { p = &unsafe; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'unsafe' is destroyed here}} } } (void)*p; // expected-note {{later used here}} @@ -349,7 +349,7 @@ void multiple_expiry_of_same_loan(bool cond) { if (cond) { p = &unsafe; // expected-warning {{does not live long enough}} if (cond) - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'unsafe' is destroyed here}} } } (void)*p; // expected-note {{later used here}} @@ -359,7 +359,7 @@ void multiple_expiry_of_same_loan(bool cond) { if (cond) { MyObj unsafe2; p = &unsafe2; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'unsafe2' is destroyed here}} } } (void)*p; // expected-note {{later used here}} @@ -370,7 +370,7 @@ void multiple_expiry_of_same_loan(bool cond) { if (cond) p = &unsafe; // expected-warning {{does not live long enough}} if (cond) - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'unsafe' is destroyed here}} } (void)*p; // expected-note {{later used here}} } @@ -382,7 +382,7 @@ void switch_potential(int mode) { case 1: { MyObj temp; p = &temp; // expected-warning {{local variable 'temp' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp' is destroyed here}} } case 2: { p = &safe; // This path is okay. @@ -401,17 +401,17 @@ void switch_uaf(int mode) { case 1: { MyObj temp1; p = &temp1; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp1' is destroyed here}} } case 2: { MyObj temp2; p = &temp2; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp2' is destroyed here}} } default: { MyObj temp2; p = &temp2; // expected-warning {{does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp2' is destroyed here}} } } (void)*p; // expected-note 3 {{later used here}} @@ -423,17 +423,17 @@ void switch_gsl(int mode) { case 1: { MyObj temp1; v = temp1; // expected-warning {{local variable 'temp1' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp1' is destroyed here}} } case 2: { MyObj temp2; v = temp2; // expected-warning {{local variable 'temp2' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp2' is destroyed here}} } default: { MyObj temp3; v = temp3; // expected-warning {{local variable 'temp3' does not live long enough}} - break; // expected-note {{destroyed here}} + break; // expected-note {{local variable 'temp3' is destroyed here}} } } v.use(); // expected-note 3 {{later used here}} @@ -451,7 +451,7 @@ void loan_from_previous_iteration(MyObj safe, bool condition) { q = p; (void)*p; (void)*q; // expected-note {{later used here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'x' is destroyed here}} } void trivial_int_uaf() { @@ -459,7 +459,7 @@ void trivial_int_uaf() { { int b = 1; a = &b; // expected-warning {{local variable 'b' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} (void)*a; // expected-note {{later used here}} } @@ -468,7 +468,7 @@ void trivial_class_uaf() { { TriviallyDestructedClass s; ptr = &s; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)ptr; // expected-note {{later used here}} } @@ -660,7 +660,7 @@ void test_view_pointer() { { View v; vp = &v; // expected-warning {{local variable 'v' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'v' is destroyed here}} vp->use(); // expected-note {{later used here}} } @@ -669,7 +669,7 @@ void test_view_double_pointer() { { View* vp = nullptr; vpp = &vp; // expected-warning {{local variable 'vp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'vp' is destroyed here}} (**vpp).use(); // expected-note {{later used here}} } @@ -698,7 +698,7 @@ void test_lifetimebound_multi_level() { int*** ppp = &pp; // expected-warning {{local variable 'pp' does not live long enough}} result = return_inner_ptr_addr(ppp); // expected-note {{local variable 'ppp' aliases the storage of local variable 'pp'}} \ // expected-note {{result of call to 'return_inner_ptr_addr' aliases the storage of local variable 'pp'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'pp' is destroyed here}} (void)**result; // expected-note {{used here}} } @@ -731,7 +731,7 @@ MyObj* uaf_before_uar() { { MyObj local_obj; p = &local_obj; // expected-warning {{local variable 'local_obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local_obj' is destroyed here}} return p; // expected-note {{later used here}} } @@ -822,7 +822,7 @@ void lifetimebound_simple_function() { MyObj obj; v = Identity(obj); // expected-warning {{local variable 'obj' does not live long enough}} \ // expected-note {{result of call to 'Identity' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -834,7 +834,8 @@ void lifetimebound_multiple_args_definite() { // expected-note {{result of call to 'Choose' aliases the storage of local variable 'obj2'}} obj1, // expected-warning {{local variable 'obj1' does not live long enough}} obj2); // expected-warning {{local variable 'obj2' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'obj1' is destroyed here}} \ + // expected-note {{local variable 'obj2' is destroyed here}} v.use(); // expected-note 2 {{later used here}} } @@ -848,8 +849,8 @@ void lifetimebound_multiple_args_potential(bool cond) { v = Choose(true, obj1, // expected-warning {{local variable 'obj1' does not live long enough}} obj2); // expected-warning {{local variable 'obj2' does not live long enough}} - } // expected-note {{destroyed here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj2' is destroyed here}} + } // expected-note {{local variable 'obj1' is destroyed here}} v.use(); // expected-note 2 {{later used here}} } @@ -861,7 +862,7 @@ void lifetimebound_mixed_args() { v = SelectFirst(obj1, // expected-warning {{local variable 'obj1' does not live long enough}} \ // expected-note {{result of call to 'SelectFirst' aliases the storage of local variable 'obj1'}} obj2); - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj1' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -877,7 +878,7 @@ void lifetimebound_member_function() { MyObj obj; v = obj.getView(); // expected-warning {{local variable 'obj' does not live long enough}} \ // expected-note {{result of call to 'getView' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -892,7 +893,7 @@ void lifetimebound_conversion_operator() { { LifetimeBoundConversionView obj; v = obj; // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -902,7 +903,7 @@ void lifetimebound_chained_calls() { MyObj obj; v = Identity(Identity(Identity(obj))); // expected-warning {{local variable 'obj' does not live long enough}} \ // expected-note 3 {{result of call to 'Identity' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -912,7 +913,7 @@ void lifetimebound_with_pointers() { MyObj obj; ptr = GetPointer(obj); // expected-warning {{local variable 'obj' does not live long enough}} \ // expected-note {{result of call to 'GetPointer' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)*ptr; // expected-note {{later used here}} } @@ -922,7 +923,7 @@ void chained_assignment_lifetimebound_call() { MyObj s; p = Identity(obj = &s); // expected-warning {{does not live long enough}} \ // expected-note {{result of call to 'Identity' aliases the storage of local variable 's'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -942,7 +943,7 @@ void lifetimebound_partial_safety(bool cond) { v = Choose(true, safe_obj, temp_obj); // expected-warning {{local variable 'temp_obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp_obj' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -956,7 +957,7 @@ void lifetimebound_return_reference() { const MyObj& ref = GetObject(temp_v); // expected-note {{local variable 'temp_v' aliases the storage of local variable 'obj'}} \ // expected-note {{result of call to 'GetObject' aliases the storage of local variable 'obj'}} ptr = &ref; - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)*ptr; // expected-note {{later used here}} } @@ -973,7 +974,7 @@ void lifetimebound_ctor() { { MyObj obj; v = obj; // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -982,7 +983,7 @@ void lifetimebound_ctor_functional_cast() { { MyObj obj; v = LifetimeBoundCtor(obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -991,7 +992,7 @@ void lifetimebound_ctor_c_style_cast() { { MyObj obj; v = (LifetimeBoundCtor)(obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -1000,7 +1001,7 @@ void lifetimebound_ctor_static_cast() { { MyObj obj; v = static_cast(obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -1010,7 +1011,7 @@ void lifetimebound_make_unique() { MyObj obj; ptr = std::make_unique(obj); // tu-warning {{local variable 'obj' does not live long enough}} \ // tu-note {{result of call to 'make_unique' aliases the storage of local variable 'obj'}} - } // tu-note {{destroyed here}} + } // tu-note {{local variable 'obj' is destroyed here}} (void)ptr; // tu-note {{later used here}} } @@ -1026,7 +1027,7 @@ void non_lifetimebound_make_unique() { void lifetimebound_make_unique_temp() { std::unique_ptr ptr = std::make_unique(MyObj()); // tu-warning {{temporary object does not live long enough}} \ - // tu-note {{destroyed here}} \ + // tu-note {{temporary object is destroyed here}} \ // tu-note {{result of call to 'make_unique' aliases the storage of temporary object}} (void)ptr; // tu-note {{later used here}} } @@ -1066,7 +1067,7 @@ void lifetimebound_make_unique_multi_params() { MyObj obj_short; ptr = std::make_unique(obj_short, obj_long); // tu-warning {{local variable 'obj_short' does not live long enough}} \ // tu-note {{result of call to 'make_unique' aliases the storage of local variable 'obj_short'}} - } // tu-note {{destroyed here}} + } // tu-note {{local variable 'obj_short' is destroyed here}} (void)ptr; // tu-note {{later used here}} } @@ -1077,7 +1078,7 @@ void lifetimebound_make_unique_multi_params2() { MyObj obj_short; ptr = std::make_unique(obj_long, obj_short, 1); // tu-warning {{local variable 'obj_short' does not live long enough}} \ // tu-note {{result of call to 'make_unique' aliases the storage of local variable 'obj_short'}} - } // tu-note {{destroyed here}} + } // tu-note {{local variable 'obj_short' is destroyed here}} (void)ptr; // tu-note {{later used here}} } @@ -1098,7 +1099,7 @@ void lifetimebound_make_unique_multi_params3_1() { MyObj obj_short; ptr = std::make_unique(obj_short, obj_long, 1.0); // tu-warning {{local variable 'obj_short' does not live long enough}} \ // tu-note {{result of call to 'make_unique' aliases the storage of local variable 'obj_short'}} - } // tu-note {{destroyed here}} + } // tu-note {{local variable 'obj_short' is destroyed here}} (void)ptr; // tu-note {{later used here}} } @@ -1109,7 +1110,7 @@ void lifetimebound_make_unique_multi_params3_2() { MyObj obj_short; ptr = std::make_unique(obj_long, obj_short, 1.0); // tu-warning {{local variable 'obj_short' does not live long enough}} \ // tu-note {{result of call to 'make_unique' aliases the storage of local variable 'obj_short'}} - } // tu-note {{destroyed here}} + } // tu-note {{local variable 'obj_short' is destroyed here}} (void)ptr; // tu-note {{later used here}} } @@ -1186,7 +1187,7 @@ void conditional_operator_one_unsafe_branch(bool cond) { MyObj temp; p = cond ? &temp // expected-warning {{local variable 'temp' does not live long enough}} : &safe; - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} // This is not a use-after-free for any value of `cond` but the analysis // cannot reason this and marks the above as a false positive. This @@ -1202,7 +1203,7 @@ void conditional_operator_two_unsafe_branches(bool cond) { MyObj a, b; p = cond ? &a // expected-warning {{local variable 'a' does not live long enough}} : &b; // expected-warning {{local variable 'b' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note 2 {{later used here}} } @@ -1214,7 +1215,7 @@ void conditional_operator_nested(bool cond) { : &b // expected-warning {{local variable 'b' does not live long enough}}. : cond ? &c // expected-warning {{local variable 'c' does not live long enough}}. : &d; // expected-warning {{local variable 'd' does not live long enough}}. - } // expected-note 4 {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} expected-note {{local variable 'd' is destroyed here}} expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'c' is destroyed here}} (void)*p; // expected-note 4 {{later used here}} } @@ -1224,7 +1225,7 @@ void conditional_operator_lifetimebound(bool cond) { MyObj a, b; p = Identity(cond ? &a // expected-warning {{local variable 'a' does not live long enough}} : &b); // expected-warning {{local variable 'b' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note 2 {{later used here}} } @@ -1234,7 +1235,7 @@ void conditional_operator_lifetimebound_nested(bool cond) { MyObj a, b; p = Identity(cond ? Identity(&a) // expected-warning {{local variable 'a' does not live long enough}} : Identity(&b)); // expected-warning {{local variable 'b' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note 2 {{later used here}} } @@ -1246,7 +1247,7 @@ void conditional_operator_lifetimebound_nested_deep(bool cond) { : &b) // expected-warning {{local variable 'b' does not live long enough}} : Identity(cond ? &c // expected-warning {{local variable 'c' does not live long enough}} : &d)); // expected-warning {{local variable 'd' does not live long enough}} - } // expected-note 4 {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} expected-note {{local variable 'd' is destroyed here}} expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'c' is destroyed here}} (void)*p; // expected-note 4 {{later used here}} } @@ -1257,7 +1258,7 @@ void comma_use_after_scope() { { MyObj temp; p = (side(), &temp); // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1266,7 +1267,7 @@ void comma_nested() { { MyObj temp; p = (side(), (side(), &temp)); // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1277,7 +1278,7 @@ void comma_masked_by_conditional(bool cond) { { MyObj temp; p = cond ? keep : (side(), &temp); // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1293,7 +1294,7 @@ void binary_conditional_false_unsafe(MyObj* in) { { MyObj temp; p = in ?: &temp; // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1303,7 +1304,7 @@ void binary_conditional_common_unsafe(MyObj* fallback) { MyObj temp; MyObj* t = &temp; // expected-warning {{local variable 'temp' does not live long enough}} p = t ?: fallback; - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1318,7 +1319,7 @@ void binary_conditional_nested(MyObj* a, MyObj* b) { { MyObj temp; p = a ?: b ?: &temp; // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1329,14 +1330,14 @@ void binary_conditional_masked_by_conditional(bool cond, MyObj* in) { { MyObj temp; p = cond ? keep : (in ?: &temp); // expected-warning {{local variable 'temp' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'temp' is destroyed here}} (void)*p; // expected-note {{later used here}} } void binary_conditional_use_after_free(int* in) { int* h = new int; // expected-warning {{allocated object does not live long enough}} int* p = in ?: h; - delete h; // expected-note {{freed here}} + delete h; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } @@ -1377,7 +1378,7 @@ void simpleparen() { MyObj a; MyObj* b = &a; // expected-warning {{local variable 'a' does not live long enough}} p = (((b))); - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1386,14 +1387,14 @@ void parentheses(bool cond) { { MyObj a; p = &((((a)))); // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} { MyObj a; p = ((GetPointer((a)))); // expected-warning {{local variable 'a' does not live long enough}} \ // expected-note {{result of call to 'GetPointer' aliases the storage of local variable 'a'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} { @@ -1402,14 +1403,14 @@ void parentheses(bool cond) { : b) // expected-warning {{local variable 'b' does not live long enough}}. : (cond ? c // expected-warning {{local variable 'c' does not live long enough}}. : d)); // expected-warning {{local variable 'd' does not live long enough}}. - } // expected-note 4 {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'c' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} expected-note {{local variable 'd' is destroyed here}} (void)*p; // expected-note 4 {{later used here}} { MyObj a, b, c, d; p = ((cond ? (((cond ? &a : &b))) // expected-warning {{local variable 'b' does not live long enough}} expected-warning {{local variable 'a' does not live long enough}}. : &(((cond ? c : d))))); // expected-warning {{local variable 'd' does not live long enough}} expected-warning {{local variable 'c' does not live long enough}}. - } // expected-note 4 {{destroyed here}} + } // expected-note {{local variable 'd' is destroyed here}} expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'c' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note 4 {{later used here}} } @@ -1417,13 +1418,13 @@ void parentheses(bool cond) { void use_temporary_after_destruction() { View a; a = non_trivially_destructed_temporary(); // expected-warning {{temporary object does not live long enough}} \ - expected-note {{destroyed here}} + expected-note {{temporary object is destroyed here}} use(a); // expected-note {{later used here}} } void passing_temporary_to_lifetime_bound_function() { View a = construct_view(non_trivially_destructed_temporary()); // expected-warning {{temporary object does not live long enough}} \ - expected-note {{destroyed here}} \ + expected-note {{temporary object is destroyed here}} \ expected-note {{result of call to 'construct_view' aliases the storage of temporary object}} use(a); // expected-note {{later used here}} } @@ -1431,7 +1432,7 @@ void passing_temporary_to_lifetime_bound_function() { void use_trivial_temporary_after_destruction() { View a; a = trivially_destructed_temporary(); // expected-warning {{temporary object does not live long enough}} \ - expected-note {{destroyed here}} + expected-note {{temporary object is destroyed here}} use(a); // expected-note {{later used here}} } @@ -1467,7 +1468,7 @@ namespace FullExprCleanupLoc { void var_initializer() { View v = non_trivially_destructed_temporary() // expected-warning {{temporary object does not live long enough}} \ // expected-note {{result of call to 'getView' aliases the storage of temporary object}} - .getView(); // expected-note {{destroyed here}} + .getView(); // expected-note {{temporary object is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -1475,7 +1476,7 @@ void expr_statement() { View v; v = non_trivially_destructed_temporary() // expected-warning {{temporary object does not live long enough}} \ // expected-note {{result of call to 'getView' aliases the storage of temporary object}} - .getView(); // expected-note {{destroyed here}} + .getView(); // expected-note {{temporary object is destroyed here}} v.use(); // expected-note {{later used here}} } } // namespace FullExprCleanupLoc @@ -1520,7 +1521,7 @@ void foobar() { view = string_or. // expected-warning {{local variable 'string_or' does not live long enough}} \ // expected-note {{result of call to 'value' aliases the storage of local variable 'string_or'}} value(); - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'string_or' is destroyed here}} (void)view; // expected-note {{later used here}} } } // namespace GH162834 @@ -1541,7 +1542,7 @@ void range_based_for_use_after_scope() { for (const MyObj &o : s) { // expected-warning {{local variable 's' does not live long enough}} v = o; } - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -1558,7 +1559,7 @@ void range_based_for_not_reference() { View v; { MyObjStorage s; - for (MyObj o : s) { // expected-note {{destroyed here}} + for (MyObj o : s) { // expected-note {{local variable 'o' is destroyed here}} v = o; // expected-warning {{local variable 'o' does not live long enough}} } } @@ -1594,7 +1595,7 @@ void test_user_defined_deref_uaf() { SmartPtr smart_ptr(&obj); p = &(*smart_ptr); // expected-warning {{local variable 'smart_ptr' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'smart_ptr'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'smart_ptr' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1612,7 +1613,7 @@ void test_user_defined_deref_with_view() { SmartPtr smart_ptr(&obj); v = *smart_ptr; // expected-warning {{local variable 'smart_ptr' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'smart_ptr'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'smart_ptr' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -1623,7 +1624,7 @@ void test_user_defined_deref_arrow() { SmartPtr smart_ptr(&obj); p = smart_ptr.operator->(); // expected-warning {{local variable 'smart_ptr' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'smart_ptr'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'smart_ptr' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1634,7 +1635,7 @@ void test_user_defined_deref_chained() { SmartPtr> double_ptr; p = &(**double_ptr); // expected-warning {{local variable 'double_ptr' does not live long enough}} \ // expected-note 2 {{expression aliases the storage of local variable 'double_ptr'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'double_ptr' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -1786,7 +1787,7 @@ void strict_warn_on_move() { MyObj a; v = a; // expected-warning {{local variable 'a' may not live long enough. This could be a false positive as the storage may have been moved later}} b = std::move(a); // expected-note {{potentially moved here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -1799,7 +1800,7 @@ void flow_sensitive(bool c) { return; } v = a; // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -1811,7 +1812,7 @@ void detect_conditional(bool cond) { v = cond ? a : b; // expected-warning {{local variable 'a' may not live long enough. This could be a false positive as the storage may have been moved later}} \ // expected-warning {{local variable 'b' may not live long enough. This could be a false positive as the storage may have been moved later}} take(std::move(cond ? a : b)); // expected-note 2 {{potentially moved here}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'b' is destroyed here}} expected-note {{local variable 'a' is destroyed here}} (void)v; // expected-note 2 {{later used here}} } @@ -1821,7 +1822,7 @@ void wrong_use_of_move_is_permissive() { MyObj a; v = std::move(a); // expected-warning {{local variable 'a' does not live long enough}} \ // expected-note {{result of call to 'move' aliases the storage of local variable 'a'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)v; // expected-note {{later used here}} const int* p; { @@ -1829,7 +1830,7 @@ void wrong_use_of_move_is_permissive() { p = std::move(a).getData(); // expected-warning {{local variable 'a' does not live long enough}} \ // expected-note {{result of call to 'move' aliases the storage of local variable 'a'}} \ // expected-note {{result of call to 'getData' aliases the storage of local variable 'a'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)p; // expected-note {{later used here}} } @@ -1842,7 +1843,7 @@ void test_release_no_uaf() { r = p.get(); // expected-warning {{local variable 'p' may not live long enough. This could be a false positive as the storage may have been moved later}} \ // expected-note {{result of call to 'get' aliases the storage of local variable 'p'}} take(p.release()); // expected-note {{potentially moved here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'p' is destroyed here}} (void)*r; // expected-note {{later used here}} } } // namespace strict_warn_on_move @@ -1865,10 +1866,10 @@ void bar() { x = s.x(); // expected-warning {{local variable 's' does not live long enough}} \ // expected-note {{result of call to 'x' aliases the storage of local variable 's'}} View y = S().x(); // expected-warning {{temporary object does not live long enough}} \ - expected-note {{destroyed here}} \ + expected-note {{temporary object is destroyed here}} \ expected-note {{result of call to 'x' aliases the storage of temporary object}} (void)y; // expected-note {{used here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)x; // expected-note {{used here}} } } @@ -1954,11 +1955,11 @@ const std::string& identity(const std::string& in [[clang::lifetimebound]]); const S& identity(const S& in [[clang::lifetimebound]]); void test_temporary() { - const std::string& x = S().x(); // expected-warning {{temporary object does not live long enough}} expected-note {{destroyed here}} \ + const std::string& x = S().x(); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'x' aliases the storage of temporary object}} (void)x; // expected-note {{later used here}} - const std::string& y = identity(S().x()); // expected-warning {{temporary object does not live long enough}} expected-note {{destroyed here}} \ + const std::string& y = identity(S().x()); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'x' aliases the storage of temporary object}} \ // expected-note {{result of call to 'identity' aliases the storage of temporary object}} (void)y; // expected-note {{later used here}} @@ -1969,20 +1970,20 @@ void test_temporary() { const std::string& zz = s.x(); // expected-warning {{local variable 's' does not live long enough}} \ // expected-note {{result of call to 'x' aliases the storage of local variable 's'}} z = zz; // expected-note {{expression aliases the storage of local variable 's'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)z; // expected-note {{later used here}} } void test_lifetime_extension_ok() { const S& x = S(); (void)x; - const S& y = identity(S()); // expected-warning {{temporary object does not live long enough}} expected-note {{destroyed here}} \ + const S& y = identity(S()); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'identity' aliases the storage of temporary object}} (void)y; // expected-note {{later used here}} } const std::string& test_return() { - const std::string& x = S().x(); // expected-warning {{temporary object does not live long enough}} expected-note {{destroyed here}} \ + const std::string& x = S().x(); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'x' aliases the storage of temporary object}} return x; // expected-note {{later used here}} } @@ -2002,7 +2003,7 @@ void uaf() { S* p = &str; // expected-warning {{local variable 'str' does not live long enough}} view = p->s; // expected-note {{local variable 'p' aliases the storage of local variable 'str'}} \ // expected-note {{expression aliases the storage of local variable 'str'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'str' is destroyed here}} (void)view; // expected-note {{later used here}} } @@ -2029,7 +2030,7 @@ void uaf_union() { U* up = &u; // expected-warning {{local variable 'u' does not live long enough}} view = up->s; // expected-note {{local variable 'up' aliases the storage of local variable 'u'}} \ // expected-note {{expression aliases the storage of local variable 'u'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'u' is destroyed here}} (void)view; // expected-note {{later used here}} } @@ -2046,7 +2047,7 @@ void uaf_anonymous_union() { AnonymousUnion au; AnonymousUnion* up = &au; // expected-warning {{local variable 'au' does not live long enough}} ip = &up->x; // expected-note {{local variable 'up' aliases the storage of local variable 'au'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'au' is destroyed here}} (void)ip; // expected-note {{later used here}} } @@ -2103,11 +2104,11 @@ const T* MemberFuncsTpl::memberC(const T& x [[clang::lifetimebound]]) { void test() { MemberFuncsTpl mtf; - const MyObj* pTMA = mtf.memberA(MyObj()); // expected-warning {{temporary object does not live long enough}} // expected-note {{destroyed here}} \ + const MyObj* pTMA = mtf.memberA(MyObj()); // expected-warning {{temporary object does not live long enough}} // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'memberA' aliases the storage of temporary object}} - const MyObj* pTMB = mtf.memberB(MyObj()); // tu-warning {{temporary object does not live long enough}} // tu-note {{destroyed here}} \ + const MyObj* pTMB = mtf.memberB(MyObj()); // tu-warning {{temporary object does not live long enough}} // tu-note {{temporary object is destroyed here}} \ // tu-note {{result of call to 'memberB' aliases the storage of temporary object}} - const MyObj* pTMC = mtf.memberC(MyObj()); // expected-warning {{temporary object does not live long enough}} // expected-note {{destroyed here}} \ + const MyObj* pTMC = mtf.memberC(MyObj()); // expected-warning {{temporary object does not live long enough}} // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'memberC' aliases the storage of temporary object}} (void)pTMA; // expected-note {{later used here}} (void)pTMB; // tu-note {{later used here}} @@ -2146,7 +2147,7 @@ void test_optional_arrow() { p = opt->data(); // expected-warning {{local variable 'opt' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'opt'}} \ // expected-note {{result of call to 'data' aliases the storage of local variable 'opt'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'opt' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2157,7 +2158,7 @@ void test_optional_arrow_lifetimebound() { v = opt->getView(); // expected-warning {{local variable 'opt' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'opt'}} \ // expected-note {{result of call to 'getView' aliases the storage of local variable 'opt'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'opt' is destroyed here}} v.use(); // expected-note {{later used here}} } @@ -2168,7 +2169,7 @@ void test_unique_ptr_arrow() { p = up->data(); // expected-warning {{local variable 'up' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'up'}} \ // expected-note {{result of call to 'data' aliases the storage of local variable 'up'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'up' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2362,7 +2363,7 @@ void multi_level_pointer_in_loop() { pp = &p; } (void)**pp; // expected-note {{later used here}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} } void outer_pointer_outlives_inner_pointee() { @@ -2371,7 +2372,7 @@ void outer_pointer_outlives_inner_pointee() { for (int i = 0; i < 10; ++i) { MyObj obj; view = &obj; // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)*view; // expected-note {{later used here}} } @@ -2384,7 +2385,7 @@ void element_use_after_scope() { { int a[10]{}; p = &a[2]; // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2416,7 +2417,7 @@ void multidimensional_use_after_scope() { { int a[3][4]{}; p = &a[1][2]; // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2429,7 +2430,7 @@ void member_array_element_use_after_scope() { { S s; p = &s.arr[0]; // expected-warning {{local variable 's' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 's' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2438,7 +2439,7 @@ void array_of_pointers_use_after_scope() { { int* a[10]{}; p = a; // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2447,7 +2448,7 @@ void reversed_subscript_use_after_scope() { { int a[10]{}; p = &(0[a]); // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -2475,7 +2476,7 @@ void pointer_arithmetic_use_after_scope() { p = a + 5; // expected-warning {{local variable 'a' does not live long enough}} p2 = a - 5; // expected-warning {{local variable 'a' does not live long enough}} p3 = 5 + a; // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note 3 {{destroyed here}} + } // expected-note 3 {{local variable 'a' is destroyed here}} (void)*p; // expected-note {{later used here}} (void)*p2; // expected-note {{later used here}} (void)*p3; // expected-note {{later used here}} @@ -2521,7 +2522,7 @@ void indexing_with_static_operator() { S()(1, 2); S& x = S()("1", // expected-note 2 {{expression aliases the storage of temporary object}} 2, // expected-warning {{temporary object does not live long enough}} - 3); // expected-warning {{temporary object does not live long enough}} expected-note 2 {{destroyed here}} + 3); // expected-warning {{temporary object does not live long enough}} expected-note 2 {{temporary object is destroyed here}} (void)x; // expected-note 2 {{later used here}} @@ -2544,14 +2545,14 @@ S getS(const std::string &s [[clang::lifetimebound]]); void from_free_function() { S s = getS(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'getS' aliases the storage of temporary object}} use(s); // expected-note {{later used here}} } void from_constructor() { S s(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} + // expected-note {{temporary object is destroyed here}} use(s); // expected-note {{later used here}} } @@ -2564,14 +2565,14 @@ struct Factory { void from_method() { Factory f; S s = f.make(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'make' aliases the storage of temporary object}} use(s); // expected-note {{later used here}} } void from_static_method() { S s = Factory::create(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'create' aliases the storage of temporary object}} use(s); // expected-note {{later used here}} } @@ -2582,7 +2583,7 @@ void from_lifetimebound_this_method() { Factory f; value = f.makeThis(); // expected-warning {{local variable 'f' does not live long enough}} \ // expected-note {{result of call to 'makeThis' aliases the storage of local variable 'f'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'f' is destroyed here}} use(value); // expected-note {{later used here}} } @@ -2592,7 +2593,7 @@ void across_scope() { std::string str{"abc"}; s = getS(str); // expected-warning {{local variable 'str' does not live long enough}} \ // expected-note {{result of call to 'getS' aliases the storage of local variable 'str'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'str' is destroyed here}} use(s); // expected-note {{later used here}} } @@ -2616,7 +2617,7 @@ void assignment_propagation() { a = getS(str); // expected-warning {{local variable 'str' does not live long enough}} \ // expected-note {{result of call to 'getS' aliases the storage of local variable 'str'}} b = a; // expected-note {{local variable 'a' aliases the storage of local variable 'str'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'str' is destroyed here}} use(b); // expected-note {{later used here}} } @@ -2628,7 +2629,7 @@ void chained_defaulted_assignment_propagation() { // expected-note {{result of call to 'getS' aliases the storage of local variable 'str'}} c = b = a; // expected-note {{local variable 'a' aliases the storage of local variable 'str'}}\ // expected-note {{expression aliases the storage of local variable 'str'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'str' is destroyed here}} use(c); // expected-note {{later used here}} } @@ -2641,7 +2642,7 @@ void no_annotation() { void mix_annotated_and_not() { S s1 = getS(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'getS' aliases the storage of temporary object}} S s2 = getSNoAnnotation(std::string("temp")); use(s1); // expected-note {{later used here}} @@ -2655,7 +2656,7 @@ S multiple_lifetimebound_params() { S s = getS2(str, std::string("temp")); // expected-warning {{stack memory associated with local variable 'str' is returned}} \ // expected-warning {{temporary object does not live long enough}} \ // expected-note {{result of call to 'getS2' aliases the storage of temporary object}} \ - // expected-note {{destroyed here}} + // expected-note {{temporary object is destroyed here}} return s; // expected-note {{returned here}} \ // expected-note {{later used here}} } @@ -2674,7 +2675,7 @@ T make(const std::string &s [[clang::lifetimebound]]); void from_template_instantiation() { S s = make(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'make' aliases the storage of temporary object}} use(s); // expected-note {{later used here}} } @@ -2738,7 +2739,7 @@ SAlias getSAlias(const std::string &s [[clang::lifetimebound]]); void from_typedef_return() { SAlias s = getSAlias(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'getSAlias' aliases the storage of temporary object}} use(s); // expected-note {{later used here}} } @@ -2813,7 +2814,7 @@ std::unique_ptr getUniqueS(const std::string &s [[clang::lifetimebound]]); void owner_return_unique_ptr_s() { auto ptr = getUniqueS(std::string("temp")); // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} \ + // expected-note {{temporary object is destroyed here}} \ // expected-note {{result of call to 'getUniqueS' aliases the storage of temporary object}} (void)ptr; // expected-note {{later used here}} } @@ -2832,7 +2833,7 @@ void owner_outlives_lifetimebound_source() { std::string local; ups = getUniqueS(local); // expected-warning {{local variable 'local' does not live long enough}} \ // expected-note {{result of call to 'getUniqueS' aliases the storage of local variable 'local'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)ups; // expected-note {{later used here}} } @@ -2855,7 +2856,7 @@ void local_pointer() { { int v; p = Pointer(v); // expected-warning {{local variable 'v' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'v' is destroyed here}} use(*p); // expected-note {{later used here}} } @@ -2868,7 +2869,7 @@ void nested_local_pointer() { p = Pointer(v); // expected-warning {{local variable 'v' does not live long enough}} pp = Pointer(p); // expected-note {{local variable 'p' aliases the storage of local variable 'v'}} ppp = Pointer(pp); // expected-note {{local variable 'pp' aliases the storage of local variable 'v'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'v' is destroyed here}} use(***ppp); // expected-note {{later used here}} } @@ -2965,50 +2966,50 @@ void new_view_from_dead_scope() { { MyObj obj; p = new View(obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} p->use(); // expected-note {{later used here}} } void new_int_basic() { int *p = new int; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void new_int_parens() { int *p = new int(); // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void new_int_braces() { int *p = new int{}; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void new_int_aligned() { int *p = new (std::align_val_t(sizeof(int))) int{}; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void new_int_nothrow() { int *p = new (std::nothrow) int{}; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void new_int_aligned_nothrow() { int *p = new (std::align_val_t(sizeof(int)), std::nothrow) int{}; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } void conditional_delete(bool cond) { int *p1 = new int; // expected-warning {{allocated object does not live long enough}} int *p2 = new int; // expected-warning {{allocated object does not live long enough}} - delete (cond ? p1 : p2); // expected-note 2 {{freed here}} + delete (cond ? p1 : p2); // expected-note 2 {{allocated object is freed here}} (void)*p1; // expected-note {{later used here}} (void)*p2; // expected-note {{later used here}} } @@ -3018,7 +3019,7 @@ int* foo(int* x [[clang::lifetimebound]], int* y [[clang::lifetimebound]]); void delete_returned_from_call() { int* x = new int(1); // expected-warning {{allocated object does not live long enough}} int* y = new int(2); // expected-warning {{allocated object does not live long enough}} - delete foo(x, y); // expected-note 2 {{freed here}} + delete foo(x, y); // expected-note 2 {{allocated object is freed here}} (void)x; // expected-note {{later used here}} (void)y; // expected-note {{later used here}} } @@ -3029,7 +3030,7 @@ void new_pointer_from_pointer() { MyObj obj; MyObj *q = &obj; // expected-warning {{local variable 'obj' does not live long enough}} p = new MyObj *(q); // expected-note {{local variable 'q' aliases the storage of local variable 'obj'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)**p; // expected-note {{later used here}} } @@ -3038,7 +3039,7 @@ void new_pointer_from_dead_object() { { MyObj obj; p = new MyObj *(&obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)**p; // expected-note {{later used here}} } @@ -3052,25 +3053,25 @@ void new_multiview_from_mixed_scope() { { MyObj obj2; p = new MultiView(obj1, obj2); // expected-warning {{local variable 'obj2' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj2' is destroyed here}} (void)p; // expected-note {{later used here}} } void new_array_basic() { int *p = new int[2]; // expected-warning {{allocated object does not live long enough}} - delete[] p; // expected-note {{freed here}} + delete[] p; // expected-note {{allocated object is freed here}} (void)p[0]; // expected-note {{later used here}} } void new_array_parens() { int *p = new int[2](); // expected-warning {{allocated object does not live long enough}} - delete[] p; // expected-note {{freed here}} + delete[] p; // expected-note {{allocated object is freed here}} (void)p[0]; // expected-note {{later used here}} } void new_array_braces() { int *p = new int[2]{}; // expected-warning {{allocated object does not live long enough}} - delete[] p; // expected-note {{freed here}} + delete[] p; // expected-note {{allocated object is freed here}} (void)p[0]; // expected-note {{later used here}} } @@ -3105,26 +3106,26 @@ void pointer_array_field_sensitivity() { void delete_direct_use_after_free() { MyObj *p = new MyObj; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)p->id; // expected-note {{later used here}} } void delete_alias_use_after_free() { MyObj *p = new MyObj; // expected-warning {{allocated object does not live long enough}} MyObj *q = p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)q->id; // expected-note {{later used here}} } void delete_pointer_propagation_use_after_free() { MyObj *p = new MyObj; // expected-warning {{allocated object does not live long enough}} MyObj **pp = &p; - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)(*pp)->id; // expected-note {{later used here}} } void delete_param_pointer(int* x) { // expected-warning {{parameter 'x' does not live long enough}} - delete x; // expected-note {{freed here}} + delete x; // expected-note {{parameter 'x' is freed here}} (void)x; // expected-note {{later used here}} } @@ -3139,7 +3140,7 @@ struct S { void use_inner_origin_after_delete(MyObj* obj) { // expected-warning {{parameter 'obj' does not live long enough}} int* p = &obj->id; - delete obj; // expected-note {{freed here}} + delete obj; // expected-note {{parameter 'obj' is freed here}} (void)*p; // expected-note {{later used here}} } @@ -3160,7 +3161,7 @@ struct ClassSpecificDelete { void class_specific_operator_delete_use_after_free() { ClassSpecificDelete *p = new ClassSpecificDelete; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)p->X; // expected-note {{later used here}} } @@ -3172,7 +3173,7 @@ struct ClassSpecificNew { void class_specific_operator_new_use_after_free() { ClassSpecificNew *p = new ClassSpecificNew; // expected-warning {{allocated object does not live long enough}} - delete p; // expected-note {{freed here}} + delete p; // expected-note {{allocated object is freed here}} (void)p->X; // expected-note {{later used here}} } @@ -3190,14 +3191,14 @@ void delete_through_pointer_field() { void delete_stack_object() { MyObj obj; MyObj* p = &obj; // expected-warning {{local variable 'obj' does not live long enough}} - delete &obj; // expected-note {{freed here}} + delete &obj; // expected-note {{local variable 'obj' is freed here}} (void)p->id; // expected-note {{later used here}} } void delete_stack_object_int() { int obj; int* p = &obj; // expected-warning {{local variable 'obj' does not live long enough}} - delete &obj; // expected-note {{freed here}} + delete &obj; // expected-note {{local variable 'obj' is freed here}} (void)*p; // expected-note {{later used here}} } @@ -3215,7 +3216,7 @@ void placement_new_int_basic() { { int storage; p = new (&storage) int; // expected-warning {{local variable 'storage' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'storage' is destroyed here}} (void)*p; // expected-note {{later used here}} } @@ -3225,7 +3226,7 @@ void placement_new_view_from_dead_scope() { { MyObj obj; p = new (&storage) View(obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} p->use(); // expected-note {{later used here}} } @@ -3235,7 +3236,7 @@ void placement_new_pointer_from_dead_object() { { MyObj obj; p = new (&slot) MyObj *(&obj); // expected-warning {{local variable 'obj' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'obj' is destroyed here}} (void)**p; // expected-note {{later used here}} } @@ -3244,7 +3245,7 @@ void placement_new_array_basic() { { int storage[2]; p = new (&storage) int[2]; // expected-warning {{local variable 'storage' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'storage' is destroyed here}} (void)p[0]; // expected-note {{later used here}} } @@ -3253,14 +3254,14 @@ void placement_new_array_braces() { { int storage[2]; p = new (&storage) int[2]{}; // expected-warning {{local variable 'storage' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'storage' is destroyed here}} (void)p[0]; // expected-note {{later used here}} } void placement_new_heap_then_delete_use_after_free() { int *storage = new int(7); // expected-warning {{allocated object does not live long enough}} int *p = new (storage) int(42); - delete storage; // expected-note {{freed here}} + delete storage; // expected-note {{allocated object is freed here}} (void)*p; // expected-note {{later used here}} } @@ -3308,7 +3309,7 @@ void placement_new_delete_result_of_lifetimebound_call() { int *y = new int(2); // expected-warning {{allocated object does not live long enough}} int *slot = nullptr; int **p = new (&slot) int *(foo(x, y)); - delete foo(x, y); // expected-note 2 {{freed here}} + delete foo(x, y); // expected-note 2 {{allocated object is freed here}} (void)**p; // expected-note 2 {{later used here}} } @@ -3459,7 +3460,7 @@ struct S { { int num; this->p_ = # // expected-warning {{local variable 'num' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'num' is destroyed here}} bar(); // expected-note {{later used here}} this->p_ = &GLOBAL_INT; } @@ -3477,7 +3478,7 @@ struct T { std::string_view v; void bar(); void foo() { - v = std::string("tmp"); // expected-warning {{temporary object does not live long enough}} expected-note {{destroyed here}} + v = std::string("tmp"); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} bar(); // expected-note {{later used here}} } }; @@ -3499,7 +3500,7 @@ struct S2 : S { { int num; this->p_ = # // expected-warning {{local variable 'num' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'num' is destroyed here}} bar(); // expected-note {{later used here}} this->p_ = &GLOBAL_INT; } @@ -3507,7 +3508,7 @@ struct S2 : S { { int num; this->p_ = # // expected-warning {{local variable 'num' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'num' is destroyed here}} bar2(); // expected-note {{later used here}} this->p_ = nullptr; } @@ -3625,7 +3626,7 @@ void uaf_via_lifetimebound() { int local; f = capture_lifetimebound_param(local); // expected-warning {{local variable 'local' does not live long enough}} \ // expected-note {{result of call to 'capture_lifetimebound_param' aliases the storage of local variable 'local'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)f; // expected-note {{later used here}} } @@ -3644,7 +3645,7 @@ struct [[gsl::Pointer]] function_ref { // avoid this warning for non-capturing lambdas. void assign_non_capturing_to_function_ref(function_ref &r) { r = []() {}; // expected-warning {{temporary object does not live long enough}} \ - // expected-note {{destroyed here}} + // expected-note {{temporary object is destroyed here}} (void)r; // expected-note {{later used here}} } @@ -3689,7 +3690,7 @@ void deref_use_after_scope() { optional opt; p = &*opt; // expected-warning {{local variable 'opt' does not live long enough}} \ // expected-note {{expression aliases the storage of local variable 'opt'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'opt' is destroyed here}} (void)p->id; // expected-note {{later used here}} } @@ -3741,7 +3742,7 @@ void use_after_free_capture_by() { { MyObj a; setCaptureBy(res, a); // expected-warning {{local variable 'a' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'a' is destroyed here}} (void)res; // expected-note {{later used here}} } @@ -3759,7 +3760,7 @@ void transitive_capture() { MyObj local; setCaptureBy(v1, local); // expected-warning {{local variable 'local' does not live long enough}} setCaptureBy(v2, v1); // expected-note {{local variable 'v1' aliases the storage of local variable 'local'}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)v2; // expected-note {{later used here}} } @@ -3770,7 +3771,7 @@ void test_reference_to_view() { { MyObj local; set1(v, local); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)v; // expected-note {{later used here}} } @@ -3801,7 +3802,7 @@ void test_reference_to_pointer() { { MyObj local; set3(ptr, local); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)ptr; // expected-note {{later used here}} } @@ -3815,7 +3816,7 @@ void member_capture() { { MyObj local; c.set(local); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)c.stored; // expected-note {{later used here}} } @@ -3844,7 +3845,7 @@ void multiple_captures() { { MyObj val2; captureTwo(res, val1, val2); // expected-warning {{local variable 'val2' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'val2' is destroyed here}} (void)res; // expected-note {{later used here}} } @@ -3854,7 +3855,7 @@ void multiple_local_captures() { MyObj val1; MyObj val2; captureTwo(res, val1, val2); // expected-warning {{local variable 'val1' does not live long enough}} // expected-warning {{local variable 'val2' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'val2' is destroyed here}} expected-note {{local variable 'val1' is destroyed here}} (void)res; // expected-note 2 {{later used here}} } @@ -3866,7 +3867,7 @@ void captured_by_multiple_params() { { MyObj local; captureIntoTwo(v1, v2, local); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)v1; // expected-note {{later used here}} } @@ -3875,7 +3876,7 @@ void captured_by_multiple_params_2() { { MyObj local; captureIntoTwo(v1, v2, local); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{destroyed here}} + } // expected-note {{local variable 'local' is destroyed here}} (void)v2; // expected-note {{later used here}} } @@ -3886,7 +3887,7 @@ void capturing_multiple_locals() { setCaptureBy(v, local1); // expected-warning{{local variable 'local1' does not live long enough}} MyObj local2; setCaptureBy(v, local2); // expected-warning{{local variable 'local2' does not live long enough}} - } // expected-note 2 {{destroyed here}} + } // expected-note {{local variable 'local1' is destroyed here}} expected-note {{local variable 'local2' is destroyed here}} (void)v; // expected-note 2 {{later used here}} } From 44debc28f821870f9c2efcb3f4aa2d119fe28a1e Mon Sep 17 00:00:00 2001 From: Avhi Date: Tue, 23 Jun 2026 14:46:46 +0530 Subject: [PATCH 140/511] [FixIrreducible] Use reportFatalUsageError for unsupported terminators (#205244) `opt -passes=fix-irreducible` crashed via `llvm_unreachable` on a `switch` terminator incident to an irreducible cycle header. Such terminators must be lowered first (`lower-switch`); replace the `llvm_unreachable` at both sites with `reportFatalUsageError` so the pass fails gracefully instead of crashing. Fixes #191978 Signed-off-by: AvhiMaz --- llvm/lib/Transforms/Utils/FixIrreducible.cpp | 7 +++++-- .../FixIrreducible/unsupported-terminator.ll | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 llvm/test/Transforms/FixIrreducible/unsupported-terminator.ll diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index 8e6425adc2855..ead39f5991081 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -134,6 +134,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ControlFlowUtils.h" @@ -320,7 +321,8 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT, << printBasicBlock(Succ) << '\n'); } } else { - llvm_unreachable("unsupported block terminator"); + reportFatalUsageError("unsupported block terminator: fix-irreducible " + "only supports br and callbr instructions"); } } @@ -364,7 +366,8 @@ static bool fixIrreducible(Cycle &C, CycleInfo &CI, DominatorTree &DT, << printBasicBlock(Succ) << '\n'); } } else { - llvm_unreachable("unsupported block terminator"); + reportFatalUsageError("unsupported block terminator: fix-irreducible " + "only supports br and callbr instructions"); } } diff --git a/llvm/test/Transforms/FixIrreducible/unsupported-terminator.ll b/llvm/test/Transforms/FixIrreducible/unsupported-terminator.ll new file mode 100644 index 0000000000000..f1be342c3d324 --- /dev/null +++ b/llvm/test/Transforms/FixIrreducible/unsupported-terminator.ll @@ -0,0 +1,19 @@ +; RUN: not opt < %s -passes=fix-irreducible -S 2>&1 | FileCheck %s +; CHECK: LLVM ERROR: unsupported block terminator: fix-irreducible only supports br and callbr instructions + +define void @loop_1(i32 %Value, i1 %PredEntry) { +entry: + br i1 %PredEntry, label %A, label %B + +A: + br label %B + +B: + switch i32 %Value, label %exit [ + i32 0, label %A + i32 1, label %B + ] + +exit: + ret void +} From 6bad3aac634f2aa32340007dcd4277d1df6fb5ff Mon Sep 17 00:00:00 2001 From: Ches Burks Date: Tue, 23 Jun 2026 05:18:07 -0400 Subject: [PATCH 141/511] [MLIR][ADT] Improve matcher compatability with C++20 STL (#205255) When building MLIR on C++20 in Visual Studio with clang-cl, there are several related compiler errors, grouped by project: MLIRQueryMatcher ```C type '_Mybase' (aka 'typename conditional, is_trivially_move_constructible, is_trivially_move_assignable>, typename conditional, is_trivially_copy_constructible, is_trivially_copy_assignable>, _Non_trivial_move<_Optional_construct_base, DynMatcher>, _Non_trivial_copy_assign<_Optional_construct_base, DynMatcher>>::type, _Non_trivial_move_assign<_Optional_construct_base, DynMatcher>>::type') is not a direct or virtual base of 'std::optional' no member named '_Value' in 'std::optional' no member named '_Has_value' in 'std::optional' no matching function for call to '_Destroy_range' invalid application of 'sizeof' to an incomplete type 'mlir::query::matcher::DynMatcher' invalid application of 'alignof' to an incomplete type 'mlir::query::matcher::DynMatcher' ``` MLIRQueryMatcher, MLIRQuery, MLIRQueryLib, and mlir-query ```C no viable conversion from 'std::vector' to 'ArrayRef' incomplete type 'mlir::query::matcher::DynMatcher' used in type trait expression ``` MLIRIR ```C no matching constructor for initialization of 'llvm::detail::indexed_accessor_range_base::iterator' invalid operands to binary expression ('const std::reverse_iterator::iterator>' and 'const std::reverse_iterator::iterator>') ``` std::vector operations require complete type T for pointer arithmetic, std::optional has a similar problem. std::reverse_iterator requires a default constructor for iterator. Adding a default constructor for iterator, and defining VariadicMatcher functions after DynMatcher is defined, resolves the errors. --- llvm/include/llvm/ADT/STLExtras.h | 1 + .../mlir/Query/Matcher/MatchersInternal.h | 29 +++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h index fb9fdae1733f8..0081ce2da106b 100644 --- a/llvm/include/llvm/ADT/STLExtras.h +++ b/llvm/include/llvm/ADT/STLExtras.h @@ -1220,6 +1220,7 @@ class indexed_accessor_range_base { class iterator : public indexed_accessor_iterator { public: + iterator() : iterator::indexed_accessor_iterator(nullptr, 0) {} // Index into this iterator, invoking a static method on the derived type. ReferenceT operator*() const { return DerivedT::dereference_iterator(this->getBase(), this->getIndex()); diff --git a/mlir/include/mlir/Query/Matcher/MatchersInternal.h b/mlir/include/mlir/Query/Matcher/MatchersInternal.h index 88109430b6feb..4e7375283486f 100644 --- a/mlir/include/mlir/Query/Matcher/MatchersInternal.h +++ b/mlir/include/mlir/Query/Matcher/MatchersInternal.h @@ -102,13 +102,11 @@ using VariadicOperatorFunction = bool (*)(Operation *op, template class VariadicMatcher : public MatcherInterface { public: - VariadicMatcher(std::vector matchers) - : matchers(std::move(matchers)) {} + VariadicMatcher(std::vector matchers); + ~VariadicMatcher() override; - bool match(Operation *op) override { return Func(op, nullptr, matchers); } - bool match(Operation *op, SetVector &matchedOps) override { - return Func(op, &matchedOps, matchers); - } + bool match(Operation *op) override; + bool match(Operation *op, SetVector &matchedOps) override; private: std::vector matchers; @@ -168,6 +166,25 @@ class DynMatcher { std::string functionName; }; +// Implementation of VariadicMatcher functions after DynMatcher is defined +template +VariadicMatcher::VariadicMatcher(std::vector matchers) + : matchers(std::move(matchers)) {} + +template +VariadicMatcher::~VariadicMatcher() = default; + +template +bool VariadicMatcher::match(Operation *op) { + return Func(op, nullptr, matchers); +} + +template +bool VariadicMatcher::match(Operation *op, + SetVector &matchedOps) { + return Func(op, &matchedOps, matchers); +} + // VariadicOperatorMatcher related types. template class VariadicOperatorMatcher { From 14473af6209208ebddff771699380bdf9b7e247f Mon Sep 17 00:00:00 2001 From: SiHuaN Date: Tue, 23 Jun 2026 17:24:54 +0800 Subject: [PATCH 142/511] [Clang][RISCV] packed exchanged add/sub intrinsics (#205251) Add the `__riscv_{pas,psa,psas,pssa,paas,pasa}_x_*` header wrappers over new `__builtin_riscv_*` builtins. --- clang/include/clang/Basic/BuiltinsRISCV.td | 22 + clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 49 +++ clang/lib/Headers/riscv_packed_simd.h | 22 + clang/test/CodeGen/RISCV/rvp-intrinsics.c | 398 ++++++++++++++++++ .../riscv_packed_simd.c | 126 ++++++ 5 files changed, 617 insertions(+) diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td index 3a1b54763bae6..ee20fefadd7c3 100644 --- a/clang/include/clang/Basic/BuiltinsRISCV.td +++ b/clang/include/clang/Basic/BuiltinsRISCV.td @@ -181,6 +181,28 @@ def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned ch def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">; def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">; +// Packed Exchanged Addition and Subtraction (32-bit) +def pas_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def psa_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def psas_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def pssa_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def paas_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; +def pasa_x_i16x2 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">; + +// Packed Exchanged Addition and Subtraction (64-bit) +def pas_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def pas_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def psa_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def psa_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def psas_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def psas_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def pssa_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def pssa_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def paas_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def paas_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; +def pasa_x_i16x4 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">; +def pasa_x_i32x2 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">; + // Packed Absolute Value and Absolute Difference (32-bit) def pabd_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>, _Vector<4, signed char>)">; def pabd_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>, _Vector<2, short>)">; diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index a1e9acb7ec2c8..d5b027fe5f8fe 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -1220,6 +1220,25 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pasubu_u8x8: case RISCV::BI__builtin_riscv_pasubu_u16x4: case RISCV::BI__builtin_riscv_pasubu_u32x2: + // Packed Exchanged Addition and Subtraction + case RISCV::BI__builtin_riscv_pas_x_i16x2: + case RISCV::BI__builtin_riscv_pas_x_i16x4: + case RISCV::BI__builtin_riscv_pas_x_i32x2: + case RISCV::BI__builtin_riscv_psa_x_i16x2: + case RISCV::BI__builtin_riscv_psa_x_i16x4: + case RISCV::BI__builtin_riscv_psa_x_i32x2: + case RISCV::BI__builtin_riscv_psas_x_i16x2: + case RISCV::BI__builtin_riscv_psas_x_i16x4: + case RISCV::BI__builtin_riscv_psas_x_i32x2: + case RISCV::BI__builtin_riscv_pssa_x_i16x2: + case RISCV::BI__builtin_riscv_pssa_x_i16x4: + case RISCV::BI__builtin_riscv_pssa_x_i32x2: + case RISCV::BI__builtin_riscv_paas_x_i16x2: + case RISCV::BI__builtin_riscv_paas_x_i16x4: + case RISCV::BI__builtin_riscv_paas_x_i32x2: + case RISCV::BI__builtin_riscv_pasa_x_i16x2: + case RISCV::BI__builtin_riscv_pasa_x_i16x4: + case RISCV::BI__builtin_riscv_pasa_x_i32x2: // Packed Absolute Value and Absolute Difference case RISCV::BI__builtin_riscv_pabd_i8x4: case RISCV::BI__builtin_riscv_pabd_i16x2: @@ -1260,6 +1279,36 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, case RISCV::BI__builtin_riscv_pasubu_u32x2: ID = Intrinsic::riscv_pasubu; break; + case RISCV::BI__builtin_riscv_pas_x_i16x2: + case RISCV::BI__builtin_riscv_pas_x_i16x4: + case RISCV::BI__builtin_riscv_pas_x_i32x2: + ID = Intrinsic::riscv_pas; + break; + case RISCV::BI__builtin_riscv_psa_x_i16x2: + case RISCV::BI__builtin_riscv_psa_x_i16x4: + case RISCV::BI__builtin_riscv_psa_x_i32x2: + ID = Intrinsic::riscv_psa; + break; + case RISCV::BI__builtin_riscv_psas_x_i16x2: + case RISCV::BI__builtin_riscv_psas_x_i16x4: + case RISCV::BI__builtin_riscv_psas_x_i32x2: + ID = Intrinsic::riscv_psas; + break; + case RISCV::BI__builtin_riscv_pssa_x_i16x2: + case RISCV::BI__builtin_riscv_pssa_x_i16x4: + case RISCV::BI__builtin_riscv_pssa_x_i32x2: + ID = Intrinsic::riscv_pssa; + break; + case RISCV::BI__builtin_riscv_paas_x_i16x2: + case RISCV::BI__builtin_riscv_paas_x_i16x4: + case RISCV::BI__builtin_riscv_paas_x_i32x2: + ID = Intrinsic::riscv_paas; + break; + case RISCV::BI__builtin_riscv_pasa_x_i16x2: + case RISCV::BI__builtin_riscv_pasa_x_i16x4: + case RISCV::BI__builtin_riscv_pasa_x_i32x2: + ID = Intrinsic::riscv_pasa; + break; case RISCV::BI__builtin_riscv_pabd_i8x4: case RISCV::BI__builtin_riscv_pabd_i16x2: case RISCV::BI__builtin_riscv_pabd_i8x8: diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h index 56f6b108d5f14..5aa00f1519671 100644 --- a/clang/lib/Headers/riscv_packed_simd.h +++ b/clang/lib/Headers/riscv_packed_simd.h @@ -206,6 +206,28 @@ __packed_sh1add(psh1add_u32x2, uint32x2_t) __packed_sh1sadd(pssh1sadd_i16x4, int16x4_t) __packed_sh1sadd(pssh1sadd_i32x2, int32x2_t) +/* Packed Exchanged Addition and Subtraction (32-bit) */ +__packed_binary_builtin(pas_x_i16x2, int16x2_t, __builtin_riscv_pas_x_i16x2) +__packed_binary_builtin(psa_x_i16x2, int16x2_t, __builtin_riscv_psa_x_i16x2) +__packed_binary_builtin(psas_x_i16x2, int16x2_t, __builtin_riscv_psas_x_i16x2) +__packed_binary_builtin(pssa_x_i16x2, int16x2_t, __builtin_riscv_pssa_x_i16x2) +__packed_binary_builtin(paas_x_i16x2, int16x2_t, __builtin_riscv_paas_x_i16x2) +__packed_binary_builtin(pasa_x_i16x2, int16x2_t, __builtin_riscv_pasa_x_i16x2) + +/* Packed Exchanged Addition and Subtraction (64-bit) */ +__packed_binary_builtin(pas_x_i16x4, int16x4_t, __builtin_riscv_pas_x_i16x4) +__packed_binary_builtin(psa_x_i16x4, int16x4_t, __builtin_riscv_psa_x_i16x4) +__packed_binary_builtin(psas_x_i16x4, int16x4_t, __builtin_riscv_psas_x_i16x4) +__packed_binary_builtin(pssa_x_i16x4, int16x4_t, __builtin_riscv_pssa_x_i16x4) +__packed_binary_builtin(paas_x_i16x4, int16x4_t, __builtin_riscv_paas_x_i16x4) +__packed_binary_builtin(pasa_x_i16x4, int16x4_t, __builtin_riscv_pasa_x_i16x4) +__packed_binary_builtin(pas_x_i32x2, int32x2_t, __builtin_riscv_pas_x_i32x2) +__packed_binary_builtin(psa_x_i32x2, int32x2_t, __builtin_riscv_psa_x_i32x2) +__packed_binary_builtin(psas_x_i32x2, int32x2_t, __builtin_riscv_psas_x_i32x2) +__packed_binary_builtin(pssa_x_i32x2, int32x2_t, __builtin_riscv_pssa_x_i32x2) +__packed_binary_builtin(paas_x_i32x2, int32x2_t, __builtin_riscv_paas_x_i32x2) +__packed_binary_builtin(pasa_x_i32x2, int32x2_t, __builtin_riscv_pasa_x_i32x2) + /* Packed Minimum and Maximum (32-bit) */ __packed_binary_builtin(pmin_i8x4, int8x4_t, __builtin_elementwise_min) __packed_binary_builtin(pmin_i16x2, int16x2_t, __builtin_elementwise_min) diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c index cc388d0ab0328..d3f153109b904 100644 --- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c +++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c @@ -1664,6 +1664,404 @@ int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) { return __riscv_pssh1sadd_i32x2(a, b); } +/* Packed Exchanged Addition and Subtraction (32-bit) */ +// RV32-LABEL: define dso_local i32 @test_pas_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pas_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_pas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pas_x_i16x2(a, b); +} + +// RV32-LABEL: define dso_local i32 @test_psa_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.psa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_psa_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.psa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_psa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_psa_x_i16x2(a, b); +} + +// RV32-LABEL: define dso_local i32 @test_psas_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.psas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_psas_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.psas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_psas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_psas_x_i16x2(a, b); +} + +// RV32-LABEL: define dso_local i32 @test_pssa_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pssa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pssa_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pssa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_pssa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pssa_x_i16x2(a, b); +} + +// RV32-LABEL: define dso_local i32 @test_paas_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_paas_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.paas.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_paas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_paas_x_i16x2(a, b); +} + +// RV32-LABEL: define dso_local i32 @test_pasa_x_i16x2( +// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV32-NEXT: ret i32 [[TMP3]] +// +// RV64-LABEL: define dso_local i32 @test_pasa_x_i16x2( +// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pasa.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32 +// RV64-NEXT: ret i32 [[TMP3]] +// +int16x2_t test_pasa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pasa_x_i16x2(a, b); +} + +/* Packed Exchanged Addition and Subtraction (64-bit) */ +// RV32-LABEL: define dso_local i64 @test_pas_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pas_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_pas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pas_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_psa_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.psa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_psa_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.psa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_psa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_psa_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_psas_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.psas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_psas_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.psas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_psas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_psas_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_pssa_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pssa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pssa_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pssa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_pssa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pssa_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_paas_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paas_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.paas.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_paas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_paas_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_pasa_x_i16x4( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasa_x_i16x4( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16> +// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pasa.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int16x4_t test_pasa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pasa_x_i16x4(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_pas_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pas_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_pas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pas_x_i32x2(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_psa_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.psa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_psa_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.psa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_psa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_psa_x_i32x2(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_psas_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.psas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_psas_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.psas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_psas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_psas_x_i32x2(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_pssa_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pssa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pssa_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pssa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_pssa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pssa_x_i32x2(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_paas_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_paas_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.paas.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_paas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_paas_x_i32x2(a, b); +} + +// RV32-LABEL: define dso_local i64 @test_pasa_x_i32x2( +// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV32-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV32-NEXT: ret i64 [[TMP3]] +// +// RV64-LABEL: define dso_local i64 @test_pasa_x_i32x2( +// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32> +// RV64-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.riscv.pasa.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]]) +// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64 +// RV64-NEXT: ret i64 [[TMP3]] +// +int32x2_t test_pasa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pasa_x_i32x2(a, b); +} + /* Packed Minimum and Maximum (32-bit) */ // RV32-LABEL: define dso_local i32 @test_pmin_i8x4( diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c index e9f90fc17e23d..020a6be70aadb 100644 --- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c +++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c @@ -565,6 +565,132 @@ int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) { return __riscv_pssh1sadd_i32x2(a, b); } +// CHECK-LABEL: test_pas_x_i16x2: +// CHECK: pas.hx +int16x2_t test_pas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pas_x_i16x2(a, b); +} + +// CHECK-LABEL: test_psa_x_i16x2: +// CHECK: psa.hx +int16x2_t test_psa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_psa_x_i16x2(a, b); +} + +// CHECK-LABEL: test_psas_x_i16x2: +// CHECK: psas.hx +int16x2_t test_psas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_psas_x_i16x2(a, b); +} + +// CHECK-LABEL: test_pssa_x_i16x2: +// CHECK: pssa.hx +int16x2_t test_pssa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pssa_x_i16x2(a, b); +} + +// CHECK-LABEL: test_paas_x_i16x2: +// CHECK: paas.hx +int16x2_t test_paas_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_paas_x_i16x2(a, b); +} + +// CHECK-LABEL: test_pasa_x_i16x2: +// CHECK: pasa.hx +int16x2_t test_pasa_x_i16x2(int16x2_t a, int16x2_t b) { + return __riscv_pasa_x_i16x2(a, b); +} + +// CHECK-LABEL: test_pas_x_i16x4: +// RV32: pas.dhx +// RV64: pas.hx +int16x4_t test_pas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pas_x_i16x4(a, b); +} + +// CHECK-LABEL: test_psa_x_i16x4: +// RV32: psa.dhx +// RV64: psa.hx +int16x4_t test_psa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_psa_x_i16x4(a, b); +} + +// CHECK-LABEL: test_psas_x_i16x4: +// RV32: psas.dhx +// RV64: psas.hx +int16x4_t test_psas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_psas_x_i16x4(a, b); +} + +// CHECK-LABEL: test_pssa_x_i16x4: +// RV32: pssa.dhx +// RV64: pssa.hx +int16x4_t test_pssa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pssa_x_i16x4(a, b); +} + +// CHECK-LABEL: test_paas_x_i16x4: +// RV32: paas.dhx +// RV64: paas.hx +int16x4_t test_paas_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_paas_x_i16x4(a, b); +} + +// CHECK-LABEL: test_pasa_x_i16x4: +// RV32: pasa.dhx +// RV64: pasa.hx +int16x4_t test_pasa_x_i16x4(int16x4_t a, int16x4_t b) { + return __riscv_pasa_x_i16x4(a, b); +} + +// CHECK-LABEL: test_pas_x_i32x2: +// RV32: add{{[[:space:]]}} +// RV32: sub{{[[:space:]]}} +// RV64: pas.wx +int32x2_t test_pas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pas_x_i32x2(a, b); +} + +// CHECK-LABEL: test_psa_x_i32x2: +// RV32: sub{{[[:space:]]}} +// RV32: add{{[[:space:]]}} +// RV64: psa.wx +int32x2_t test_psa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_psa_x_i32x2(a, b); +} + +// CHECK-LABEL: test_psas_x_i32x2: +// RV32: sadd{{[[:space:]]}} +// RV32: ssub{{[[:space:]]}} +// RV64: psas.wx +int32x2_t test_psas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_psas_x_i32x2(a, b); +} + +// CHECK-LABEL: test_pssa_x_i32x2: +// RV32: ssub{{[[:space:]]}} +// RV32: sadd{{[[:space:]]}} +// RV64: pssa.wx +int32x2_t test_pssa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pssa_x_i32x2(a, b); +} + +// CHECK-LABEL: test_paas_x_i32x2: +// RV32: aadd{{[[:space:]]}} +// RV32: asub{{[[:space:]]}} +// RV64: paas.wx +int32x2_t test_paas_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_paas_x_i32x2(a, b); +} + +// CHECK-LABEL: test_pasa_x_i32x2: +// RV32: asub{{[[:space:]]}} +// RV32: aadd{{[[:space:]]}} +// RV64: pasa.wx +int32x2_t test_pasa_x_i32x2(int32x2_t a, int32x2_t b) { + return __riscv_pasa_x_i32x2(a, b); +} + // CHECK-LABEL: test_pmin_i8x4: // CHECK: pmin.b int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) { From f15be52566071365bb17c03351960c25536608f6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Jun 2026 10:26:36 +0100 Subject: [PATCH 143/511] [X86] madd.ll - add SSE42 test coverage (#205299) --- llvm/test/CodeGen/X86/madd.ll | 1032 +++++++++++++++++++++++---------- 1 file changed, 730 insertions(+), 302 deletions(-) diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 201a0900e6eb9..39fbe706ff369 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX256,AVX512,AVX512F @@ -32,6 +33,29 @@ define i32 @_Z10test_shortPsS_i_128(ptr nocapture readonly, ptr nocapture readon ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: _Z10test_shortPsS_i_128: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB0_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmaddwd %xmm1, %xmm2 +; SSE42-NEXT: paddd %xmm2, %xmm0 +; SSE42-NEXT: addq $8, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB0_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: retq +; ; AVX-LABEL: _Z10test_shortPsS_i_128: ; AVX: # %bb.0: # %entry ; AVX-NEXT: movl %edx, %eax @@ -81,30 +105,30 @@ middle.block: } define i32 @_Z10test_shortPsS_i_256(ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 { -; SSE2-LABEL: _Z10test_shortPsS_i_256: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl %edx, %eax -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB1_1: # %vector.body -; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm2 -; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm3 -; SSE2-NEXT: pmaddwd %xmm2, %xmm3 -; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $8, %rcx -; SSE2-NEXT: cmpq %rcx, %rax -; SSE2-NEXT: jne .LBB1_1 -; SSE2-NEXT: # %bb.2: # %middle.block -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: retq +; SSE-LABEL: _Z10test_shortPsS_i_256: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movl %edx, %eax +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: .p2align 4 +; SSE-NEXT: .LBB1_1: # %vector.body +; SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE-NEXT: movdqu (%rdi,%rcx,2), %xmm2 +; SSE-NEXT: movdqu (%rsi,%rcx,2), %xmm3 +; SSE-NEXT: pmaddwd %xmm2, %xmm3 +; SSE-NEXT: paddd %xmm3, %xmm1 +; SSE-NEXT: addq $8, %rcx +; SSE-NEXT: cmpq %rcx, %rax +; SSE-NEXT: jne .LBB1_1 +; SSE-NEXT: # %bb.2: # %middle.block +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq ; ; AVX1-LABEL: _Z10test_shortPsS_i_256: ; AVX1: # %bb.0: # %entry @@ -183,37 +207,37 @@ middle.block: } define i32 @_Z10test_shortPsS_i_512(ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 { -; SSE2-LABEL: _Z10test_shortPsS_i_512: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl %edx, %eax -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB2_1: # %vector.body -; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm3 -; SSE2-NEXT: movdqu 16(%rdi,%rcx,2), %xmm4 -; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm5 -; SSE2-NEXT: pmaddwd %xmm3, %xmm5 -; SSE2-NEXT: paddd %xmm5, %xmm2 -; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm3 -; SSE2-NEXT: pmaddwd %xmm4, %xmm3 -; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: addq $16, %rcx -; SSE2-NEXT: cmpq %rcx, %rax -; SSE2-NEXT: jne .LBB2_1 -; SSE2-NEXT: # %bb.2: # %middle.block -; SSE2-NEXT: paddd %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: retq +; SSE-LABEL: _Z10test_shortPsS_i_512: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movl %edx, %eax +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: .p2align 4 +; SSE-NEXT: .LBB2_1: # %vector.body +; SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE-NEXT: movdqu (%rdi,%rcx,2), %xmm3 +; SSE-NEXT: movdqu 16(%rdi,%rcx,2), %xmm4 +; SSE-NEXT: movdqu (%rsi,%rcx,2), %xmm5 +; SSE-NEXT: pmaddwd %xmm3, %xmm5 +; SSE-NEXT: paddd %xmm5, %xmm2 +; SSE-NEXT: movdqu 16(%rsi,%rcx,2), %xmm3 +; SSE-NEXT: pmaddwd %xmm4, %xmm3 +; SSE-NEXT: paddd %xmm3, %xmm1 +; SSE-NEXT: addq $16, %rcx +; SSE-NEXT: cmpq %rcx, %rax +; SSE-NEXT: jne .LBB2_1 +; SSE-NEXT: # %bb.2: # %middle.block +; SSE-NEXT: paddd %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: paddd %xmm2, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq ; ; AVX1-LABEL: _Z10test_shortPsS_i_512: ; AVX1: # %bb.0: # %entry @@ -329,51 +353,51 @@ middle.block: } define i32 @_Z10test_shortPsS_i_1024(ptr nocapture readonly, ptr nocapture readonly, i32) local_unnamed_addr #0 { -; SSE2-LABEL: _Z10test_shortPsS_i_1024: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movl %edx, %eax -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: xorl %ecx, %ecx -; SSE2-NEXT: pxor %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm3, %xmm3 -; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB3_1: # %vector.body -; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 -; SSE2-NEXT: movdqu (%rdi,%rcx,2), %xmm5 -; SSE2-NEXT: movdqu 16(%rdi,%rcx,2), %xmm6 -; SSE2-NEXT: movdqu 32(%rdi,%rcx,2), %xmm7 -; SSE2-NEXT: movdqu 48(%rdi,%rcx,2), %xmm8 -; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm9 -; SSE2-NEXT: pmaddwd %xmm5, %xmm9 -; SSE2-NEXT: paddd %xmm9, %xmm2 -; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm5 -; SSE2-NEXT: pmaddwd %xmm6, %xmm5 -; SSE2-NEXT: paddd %xmm5, %xmm4 -; SSE2-NEXT: movdqu 32(%rsi,%rcx,2), %xmm5 -; SSE2-NEXT: pmaddwd %xmm7, %xmm5 -; SSE2-NEXT: paddd %xmm5, %xmm1 -; SSE2-NEXT: movdqu 48(%rsi,%rcx,2), %xmm5 -; SSE2-NEXT: pmaddwd %xmm8, %xmm5 -; SSE2-NEXT: paddd %xmm5, %xmm3 -; SSE2-NEXT: addq $16, %rcx -; SSE2-NEXT: cmpq %rcx, %rax -; SSE2-NEXT: jne .LBB3_1 -; SSE2-NEXT: # %bb.2: # %middle.block -; SSE2-NEXT: paddd %xmm0, %xmm4 -; SSE2-NEXT: paddd %xmm0, %xmm3 -; SSE2-NEXT: paddd %xmm4, %xmm3 -; SSE2-NEXT: paddd %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: paddd %xmm3, %xmm1 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: retq +; SSE-LABEL: _Z10test_shortPsS_i_1024: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movl %edx, %eax +; SSE-NEXT: pxor %xmm0, %xmm0 +; SSE-NEXT: xorl %ecx, %ecx +; SSE-NEXT: pxor %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm4, %xmm4 +; SSE-NEXT: pxor %xmm1, %xmm1 +; SSE-NEXT: pxor %xmm3, %xmm3 +; SSE-NEXT: .p2align 4 +; SSE-NEXT: .LBB3_1: # %vector.body +; SSE-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE-NEXT: movdqu (%rdi,%rcx,2), %xmm5 +; SSE-NEXT: movdqu 16(%rdi,%rcx,2), %xmm6 +; SSE-NEXT: movdqu 32(%rdi,%rcx,2), %xmm7 +; SSE-NEXT: movdqu 48(%rdi,%rcx,2), %xmm8 +; SSE-NEXT: movdqu (%rsi,%rcx,2), %xmm9 +; SSE-NEXT: pmaddwd %xmm5, %xmm9 +; SSE-NEXT: paddd %xmm9, %xmm2 +; SSE-NEXT: movdqu 16(%rsi,%rcx,2), %xmm5 +; SSE-NEXT: pmaddwd %xmm6, %xmm5 +; SSE-NEXT: paddd %xmm5, %xmm4 +; SSE-NEXT: movdqu 32(%rsi,%rcx,2), %xmm5 +; SSE-NEXT: pmaddwd %xmm7, %xmm5 +; SSE-NEXT: paddd %xmm5, %xmm1 +; SSE-NEXT: movdqu 48(%rsi,%rcx,2), %xmm5 +; SSE-NEXT: pmaddwd %xmm8, %xmm5 +; SSE-NEXT: paddd %xmm5, %xmm3 +; SSE-NEXT: addq $16, %rcx +; SSE-NEXT: cmpq %rcx, %rax +; SSE-NEXT: jne .LBB3_1 +; SSE-NEXT: # %bb.2: # %middle.block +; SSE-NEXT: paddd %xmm0, %xmm4 +; SSE-NEXT: paddd %xmm0, %xmm3 +; SSE-NEXT: paddd %xmm4, %xmm3 +; SSE-NEXT: paddd %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: paddd %xmm2, %xmm1 +; SSE-NEXT: paddd %xmm3, %xmm1 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE-NEXT: paddd %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq ; ; AVX1-LABEL: _Z10test_shortPsS_i_1024: ; AVX1: # %bb.0: # %entry @@ -572,6 +596,29 @@ define i32 @_Z9test_charPcS_i_128(ptr nocapture readonly, ptr nocapture readonly ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: _Z9test_charPcS_i_128: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB4_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovsxbd (%rdi,%rcx), %xmm1 +; SSE42-NEXT: pmovsxbd (%rsi,%rcx), %xmm2 +; SSE42-NEXT: pmulld %xmm1, %xmm2 +; SSE42-NEXT: paddd %xmm2, %xmm0 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB4_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: retq +; ; AVX-LABEL: _Z9test_charPcS_i_128: ; AVX: # %bb.0: # %entry ; AVX-NEXT: movl %edx, %eax @@ -650,6 +697,31 @@ define i32 @_Z9test_charPcS_i_256(ptr nocapture readonly, ptr nocapture readonly ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: _Z9test_charPcS_i_256: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB5_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovsxbw (%rdi,%rcx), %xmm2 +; SSE42-NEXT: pmovsxbw (%rsi,%rcx), %xmm3 +; SSE42-NEXT: pmaddwd %xmm2, %xmm3 +; SSE42-NEXT: paddd %xmm3, %xmm1 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB5_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: _Z9test_charPcS_i_256: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -767,6 +839,38 @@ define i32 @_Z9test_charPcS_i_512(ptr nocapture readonly, ptr nocapture readonly ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: _Z9test_charPcS_i_512: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB6_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovsxbw 8(%rdi,%rcx), %xmm3 +; SSE42-NEXT: pmovsxbw (%rdi,%rcx), %xmm4 +; SSE42-NEXT: pmovsxbw 8(%rsi,%rcx), %xmm5 +; SSE42-NEXT: pmaddwd %xmm3, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm1 +; SSE42-NEXT: pmovsxbw (%rsi,%rcx), %xmm3 +; SSE42-NEXT: pmaddwd %xmm4, %xmm3 +; SSE42-NEXT: paddd %xmm3, %xmm2 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB6_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm0, %xmm2 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: paddd %xmm2, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: _Z9test_charPcS_i_512: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -943,6 +1047,52 @@ define i32 @_Z9test_charPcS_i_1024(ptr nocapture readonly, ptr nocapture readonl ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: _Z9test_charPcS_i_1024: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB7_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovsxbw 16(%rdi,%rcx), %xmm5 +; SSE42-NEXT: pmovsxbw 8(%rdi,%rcx), %xmm6 +; SSE42-NEXT: pmovsxbw (%rdi,%rcx), %xmm7 +; SSE42-NEXT: pmovsxbw 24(%rdi,%rcx), %xmm8 +; SSE42-NEXT: pmovsxbw 16(%rsi,%rcx), %xmm9 +; SSE42-NEXT: pmaddwd %xmm5, %xmm9 +; SSE42-NEXT: paddd %xmm9, %xmm1 +; SSE42-NEXT: pmovsxbw 8(%rsi,%rcx), %xmm5 +; SSE42-NEXT: pmaddwd %xmm6, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm4 +; SSE42-NEXT: pmovsxbw (%rsi,%rcx), %xmm5 +; SSE42-NEXT: pmaddwd %xmm7, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm2 +; SSE42-NEXT: pmovsxbw 24(%rsi,%rcx), %xmm5 +; SSE42-NEXT: pmaddwd %xmm8, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm3 +; SSE42-NEXT: addq $32, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB7_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm0, %xmm4 +; SSE42-NEXT: paddd %xmm0, %xmm3 +; SSE42-NEXT: paddd %xmm4, %xmm3 +; SSE42-NEXT: paddd %xmm0, %xmm2 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: paddd %xmm2, %xmm1 +; SSE42-NEXT: paddd %xmm3, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: _Z9test_charPcS_i_1024: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -1146,6 +1296,29 @@ define i32 @test_unsigned_short_128(ptr nocapture readonly, ptr nocapture readon ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: test_unsigned_short_128: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB8_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm1, %xmm2 +; SSE42-NEXT: paddd %xmm2, %xmm0 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB8_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: retq +; ; AVX-LABEL: test_unsigned_short_128: ; AVX: # %bb.0: # %entry ; AVX-NEXT: movl %edx, %eax @@ -1226,6 +1399,35 @@ define i32 @test_unsigned_short_256(ptr nocapture readonly, ptr nocapture readon ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: test_unsigned_short_256: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB9_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm2, %xmm4 +; SSE42-NEXT: paddd %xmm4, %xmm1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm3, %xmm2 +; SSE42-NEXT: paddd %xmm2, %xmm0 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB9_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: test_unsigned_short_256: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -1355,6 +1557,47 @@ define i32 @test_unsigned_short_512(ptr nocapture readonly, ptr nocapture readon ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: test_unsigned_short_512: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB10_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm5 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm6 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm7 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm4, %xmm8 +; SSE42-NEXT: paddd %xmm8, %xmm3 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm5, %xmm4 +; SSE42-NEXT: paddd %xmm4, %xmm1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm6, %xmm4 +; SSE42-NEXT: paddd %xmm4, %xmm0 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm7, %xmm4 +; SSE42-NEXT: paddd %xmm4, %xmm2 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB10_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm3, %xmm0 +; SSE42-NEXT: paddd %xmm2, %xmm1 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: test_unsigned_short_512: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -1560,6 +1803,71 @@ define i32 @test_unsigned_short_1024(ptr nocapture readonly, ptr nocapture reado ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: test_unsigned_short_1024: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: pxor %xmm6, %xmm6 +; SSE42-NEXT: pxor %xmm5, %xmm5 +; SSE42-NEXT: pxor %xmm7, %xmm7 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB11_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm3 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm0 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm2 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm4 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm6 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm8 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: paddd %xmm9, %xmm5 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm9 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; SSE42-NEXT: pmulld %xmm8, %xmm9 +; SSE42-NEXT: paddd %xmm9, %xmm7 +; SSE42-NEXT: addq $16, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB11_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm6, %xmm3 +; SSE42-NEXT: paddd %xmm7, %xmm2 +; SSE42-NEXT: paddd %xmm3, %xmm2 +; SSE42-NEXT: paddd %xmm4, %xmm0 +; SSE42-NEXT: paddd %xmm5, %xmm1 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: paddd %xmm2, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: test_unsigned_short_1024: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -1738,10 +2046,10 @@ middle.block: } define <4 x i32> @pmaddwd_8(<8 x i16> %A, <8 x i16> %B) { -; SSE2-LABEL: pmaddwd_8: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_8: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_8: ; AVX: # %bb.0: @@ -1757,10 +2065,10 @@ define <4 x i32> @pmaddwd_8(<8 x i16> %A, <8 x i16> %B) { } define <4 x i32> @pmaddwd_8_swapped(<8 x i16> %A, <8 x i16> %B) { -; SSE2-LABEL: pmaddwd_8_swapped: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_8_swapped: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_8_swapped: ; AVX: # %bb.0: @@ -1775,7 +2083,7 @@ define <4 x i32> @pmaddwd_8_swapped(<8 x i16> %A, <8 x i16> %B) { ret <4 x i32> %ret } -; FIXME: SSE fails to match PMADDWD +; FIXME: SSE2 fails to match PMADDWD define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { ; SSE2-LABEL: larger_mul: ; SSE2: # %bb.0: @@ -1791,6 +2099,19 @@ define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { ; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: retq ; +; SSE42-LABEL: larger_mul: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE42-NEXT: pmaddwd %xmm2, %xmm0 +; SSE42-NEXT: pmaddwd %xmm3, %xmm1 +; SSE42-NEXT: phaddd %xmm0, %xmm1 +; SSE42-NEXT: movdqa %xmm1, %xmm0 +; SSE42-NEXT: retq +; ; AVX1-LABEL: larger_mul: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 @@ -1822,11 +2143,11 @@ define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { } define <8 x i32> @pmaddwd_16(<16 x i16> %A, <16 x i16> %B) { -; SSE2-LABEL: pmaddwd_16: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm2, %xmm0 -; SSE2-NEXT: pmaddwd %xmm3, %xmm1 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_16: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: pmaddwd %xmm3, %xmm1 +; SSE-NEXT: retq ; ; AVX1-LABEL: pmaddwd_16: ; AVX1: # %bb.0: @@ -1851,13 +2172,13 @@ define <8 x i32> @pmaddwd_16(<16 x i16> %A, <16 x i16> %B) { } define <16 x i32> @pmaddwd_32(<32 x i16> %A, <32 x i16> %B) { -; SSE2-LABEL: pmaddwd_32: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm4, %xmm0 -; SSE2-NEXT: pmaddwd %xmm5, %xmm1 -; SSE2-NEXT: pmaddwd %xmm6, %xmm2 -; SSE2-NEXT: pmaddwd %xmm7, %xmm3 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_32: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm4, %xmm0 +; SSE-NEXT: pmaddwd %xmm5, %xmm1 +; SSE-NEXT: pmaddwd %xmm6, %xmm2 +; SSE-NEXT: pmaddwd %xmm7, %xmm3 +; SSE-NEXT: retq ; ; AVX1-LABEL: pmaddwd_32: ; AVX1: # %bb.0: @@ -1902,10 +2223,10 @@ define <16 x i32> @pmaddwd_32(<32 x i16> %A, <32 x i16> %B) { } define <4 x i32> @pmaddwd_const(<8 x i16> %A) { -; SSE2-LABEL: pmaddwd_const: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32767,32768,0,0,1,7,42,32] -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_const: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [32767,32768,0,0,1,7,42,32] +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_const: ; AVX: # %bb.0: @@ -1935,6 +2256,19 @@ define <4 x i32> @pmaddwd_negative1(<8 x i16> %A, <8 x i16> %B) { ; SSE2-NEXT: paddd %xmm2, %xmm0 ; SSE2-NEXT: retq ; +; SSE42-LABEL: pmaddwd_negative1: +; SSE42: # %bb.0: +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] +; SSE42-NEXT: pmulld %xmm0, %xmm1 +; SSE42-NEXT: pmulld %xmm4, %xmm2 +; SSE42-NEXT: phaddd %xmm1, %xmm2 +; SSE42-NEXT: movdqa %xmm2, %xmm0 +; SSE42-NEXT: retq +; ; AVX1-LABEL: pmaddwd_negative1: ; AVX1: # %bb.0: ; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 @@ -1983,6 +2317,16 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; +; SSE42-LABEL: pmaddwd_negative2: +; SSE42: # %bb.0: +; SSE42-NEXT: pmovsxwd %xmm0, %xmm1 +; SSE42-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE42-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,0,7,0,42,0,32,0] +; SSE42-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 # [32768,4294934528,0,0] +; SSE42-NEXT: phaddd %xmm0, %xmm1 +; SSE42-NEXT: movdqa %xmm1, %xmm0 +; SSE42-NEXT: retq +; ; AVX1-LABEL: pmaddwd_negative2: ; AVX1: # %bb.0: ; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 @@ -2009,10 +2353,10 @@ define <4 x i32> @pmaddwd_negative2(<8 x i16> %A) { } define <4 x i32> @jumbled_indices4(<8 x i16> %A, <8 x i16> %B) { -; SSE2-LABEL: jumbled_indices4: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm1, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: jumbled_indices4: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm1, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: jumbled_indices4: ; AVX: # %bb.0: @@ -2028,11 +2372,11 @@ define <4 x i32> @jumbled_indices4(<8 x i16> %A, <8 x i16> %B) { } define <8 x i32> @jumbled_indices8(<16 x i16> %A, <16 x i16> %B) { -; SSE2-LABEL: jumbled_indices8: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm2, %xmm0 -; SSE2-NEXT: pmaddwd %xmm3, %xmm1 -; SSE2-NEXT: retq +; SSE-LABEL: jumbled_indices8: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: pmaddwd %xmm3, %xmm1 +; SSE-NEXT: retq ; ; AVX1-LABEL: jumbled_indices8: ; AVX1: # %bb.0: @@ -2057,13 +2401,13 @@ define <8 x i32> @jumbled_indices8(<16 x i16> %A, <16 x i16> %B) { } define <16 x i32> @jumbled_indices16(<32 x i16> %A, <32 x i16> %B) { -; SSE2-LABEL: jumbled_indices16: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm4, %xmm0 -; SSE2-NEXT: pmaddwd %xmm5, %xmm1 -; SSE2-NEXT: pmaddwd %xmm6, %xmm2 -; SSE2-NEXT: pmaddwd %xmm7, %xmm3 -; SSE2-NEXT: retq +; SSE-LABEL: jumbled_indices16: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm4, %xmm0 +; SSE-NEXT: pmaddwd %xmm5, %xmm1 +; SSE-NEXT: pmaddwd %xmm6, %xmm2 +; SSE-NEXT: pmaddwd %xmm7, %xmm3 +; SSE-NEXT: retq ; ; AVX1-LABEL: jumbled_indices16: ; AVX1: # %bb.0: @@ -2108,26 +2452,26 @@ define <16 x i32> @jumbled_indices16(<32 x i16> %A, <32 x i16> %B) { } define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) { -; SSE2-LABEL: jumbled_indices32: -; SSE2: # %bb.0: -; SSE2-NEXT: movq %rdi, %rax -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm0 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm1 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm2 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm3 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm4 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm5 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm6 -; SSE2-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm7 -; SSE2-NEXT: movdqa %xmm7, 112(%rdi) -; SSE2-NEXT: movdqa %xmm6, 96(%rdi) -; SSE2-NEXT: movdqa %xmm5, 80(%rdi) -; SSE2-NEXT: movdqa %xmm4, 64(%rdi) -; SSE2-NEXT: movdqa %xmm3, 48(%rdi) -; SSE2-NEXT: movdqa %xmm2, 32(%rdi) -; SSE2-NEXT: movdqa %xmm1, 16(%rdi) -; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: retq +; SSE-LABEL: jumbled_indices32: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm0 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm1 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm2 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm3 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm4 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm5 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm6 +; SSE-NEXT: pmaddwd {{[0-9]+}}(%rsp), %xmm7 +; SSE-NEXT: movdqa %xmm7, 112(%rdi) +; SSE-NEXT: movdqa %xmm6, 96(%rdi) +; SSE-NEXT: movdqa %xmm5, 80(%rdi) +; SSE-NEXT: movdqa %xmm4, 64(%rdi) +; SSE-NEXT: movdqa %xmm3, 48(%rdi) +; SSE-NEXT: movdqa %xmm2, 32(%rdi) +; SSE-NEXT: movdqa %xmm1, 16(%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) +; SSE-NEXT: retq ; ; AVX1-LABEL: jumbled_indices32: ; AVX1: # %bb.0: @@ -2192,11 +2536,11 @@ define <32 x i32> @jumbled_indices32(<64 x i16> %A, <64 x i16> %B) { ; NOTE: We're testing with loads because ABI lowering creates a concat_vectors that extract_vector_elt creation can see through. ; This would require the combine to recreate the concat_vectors. define <4 x i32> @pmaddwd_128(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_128: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_128: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_128: ; AVX: # %bb.0: @@ -2220,13 +2564,13 @@ define <4 x i32> @pmaddwd_128(ptr %Aptr, ptr %Bptr) { } define <8 x i32> @pmaddwd_256(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_256: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: movdqa 16(%rdi), %xmm1 -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: pmaddwd 16(%rsi), %xmm1 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_256: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: movdqa 16(%rdi), %xmm1 +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: pmaddwd 16(%rsi), %xmm1 +; SSE-NEXT: retq ; ; AVX1-LABEL: pmaddwd_256: ; AVX1: # %bb.0: @@ -2259,17 +2603,17 @@ define <8 x i32> @pmaddwd_256(ptr %Aptr, ptr %Bptr) { } define <16 x i32> @pmaddwd_512(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_512: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: movdqa 16(%rdi), %xmm1 -; SSE2-NEXT: movdqa 32(%rdi), %xmm2 -; SSE2-NEXT: movdqa 48(%rdi), %xmm3 -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: pmaddwd 16(%rsi), %xmm1 -; SSE2-NEXT: pmaddwd 32(%rsi), %xmm2 -; SSE2-NEXT: pmaddwd 48(%rsi), %xmm3 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_512: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: movdqa 16(%rdi), %xmm1 +; SSE-NEXT: movdqa 32(%rdi), %xmm2 +; SSE-NEXT: movdqa 48(%rdi), %xmm3 +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: pmaddwd 16(%rsi), %xmm1 +; SSE-NEXT: pmaddwd 32(%rsi), %xmm2 +; SSE-NEXT: pmaddwd 48(%rsi), %xmm3 +; SSE-NEXT: retq ; ; AVX1-LABEL: pmaddwd_512: ; AVX1: # %bb.0: @@ -2324,34 +2668,34 @@ define <16 x i32> @pmaddwd_512(ptr %Aptr, ptr %Bptr) { } define <32 x i32> @pmaddwd_1024(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_1024: -; SSE2: # %bb.0: -; SSE2-NEXT: movq %rdi, %rax -; SSE2-NEXT: movdqa (%rsi), %xmm0 -; SSE2-NEXT: movdqa 16(%rsi), %xmm1 -; SSE2-NEXT: movdqa 32(%rsi), %xmm2 -; SSE2-NEXT: movdqa 48(%rsi), %xmm3 -; SSE2-NEXT: pmaddwd (%rdx), %xmm0 -; SSE2-NEXT: pmaddwd 16(%rdx), %xmm1 -; SSE2-NEXT: pmaddwd 32(%rdx), %xmm2 -; SSE2-NEXT: pmaddwd 48(%rdx), %xmm3 -; SSE2-NEXT: movdqa 64(%rsi), %xmm4 -; SSE2-NEXT: pmaddwd 64(%rdx), %xmm4 -; SSE2-NEXT: movdqa 80(%rsi), %xmm5 -; SSE2-NEXT: pmaddwd 80(%rdx), %xmm5 -; SSE2-NEXT: movdqa 96(%rsi), %xmm6 -; SSE2-NEXT: pmaddwd 96(%rdx), %xmm6 -; SSE2-NEXT: movdqa 112(%rsi), %xmm7 -; SSE2-NEXT: pmaddwd 112(%rdx), %xmm7 -; SSE2-NEXT: movdqa %xmm7, 112(%rdi) -; SSE2-NEXT: movdqa %xmm6, 96(%rdi) -; SSE2-NEXT: movdqa %xmm5, 80(%rdi) -; SSE2-NEXT: movdqa %xmm4, 64(%rdi) -; SSE2-NEXT: movdqa %xmm3, 48(%rdi) -; SSE2-NEXT: movdqa %xmm2, 32(%rdi) -; SSE2-NEXT: movdqa %xmm1, 16(%rdi) -; SSE2-NEXT: movdqa %xmm0, (%rdi) -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_1024: +; SSE: # %bb.0: +; SSE-NEXT: movq %rdi, %rax +; SSE-NEXT: movdqa (%rsi), %xmm0 +; SSE-NEXT: movdqa 16(%rsi), %xmm1 +; SSE-NEXT: movdqa 32(%rsi), %xmm2 +; SSE-NEXT: movdqa 48(%rsi), %xmm3 +; SSE-NEXT: pmaddwd (%rdx), %xmm0 +; SSE-NEXT: pmaddwd 16(%rdx), %xmm1 +; SSE-NEXT: pmaddwd 32(%rdx), %xmm2 +; SSE-NEXT: pmaddwd 48(%rdx), %xmm3 +; SSE-NEXT: movdqa 64(%rsi), %xmm4 +; SSE-NEXT: pmaddwd 64(%rdx), %xmm4 +; SSE-NEXT: movdqa 80(%rsi), %xmm5 +; SSE-NEXT: pmaddwd 80(%rdx), %xmm5 +; SSE-NEXT: movdqa 96(%rsi), %xmm6 +; SSE-NEXT: pmaddwd 96(%rdx), %xmm6 +; SSE-NEXT: movdqa 112(%rsi), %xmm7 +; SSE-NEXT: pmaddwd 112(%rdx), %xmm7 +; SSE-NEXT: movdqa %xmm7, 112(%rdi) +; SSE-NEXT: movdqa %xmm6, 96(%rdi) +; SSE-NEXT: movdqa %xmm5, 80(%rdi) +; SSE-NEXT: movdqa %xmm4, 64(%rdi) +; SSE-NEXT: movdqa %xmm3, 48(%rdi) +; SSE-NEXT: movdqa %xmm2, 32(%rdi) +; SSE-NEXT: movdqa %xmm1, 16(%rdi) +; SSE-NEXT: movdqa %xmm0, (%rdi) +; SSE-NEXT: retq ; ; AVX1-LABEL: pmaddwd_1024: ; AVX1: # %bb.0: @@ -2427,11 +2771,11 @@ define <32 x i32> @pmaddwd_1024(ptr %Aptr, ptr %Bptr) { } define <4 x i32> @pmaddwd_commuted_mul(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_commuted_mul: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_commuted_mul: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_commuted_mul: ; AVX: # %bb.0: @@ -2455,11 +2799,11 @@ define <4 x i32> @pmaddwd_commuted_mul(ptr %Aptr, ptr %Bptr) { } define <4 x i32> @pmaddwd_swapped_indices(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_swapped_indices: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_swapped_indices: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_swapped_indices: ; AVX: # %bb.0: @@ -2484,12 +2828,12 @@ define <4 x i32> @pmaddwd_swapped_indices(ptr %Aptr, ptr %Bptr) { ; Negative test where indices aren't paired properly define <4 x i32> @pmaddwd_bad_indices(ptr %Aptr, ptr %Bptr) { -; SSE2-LABEL: pmaddwd_bad_indices: -; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = mem[1,0,2,3,4,5,6,7] -; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7] -; SSE2-NEXT: pmaddwd (%rsi), %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: pmaddwd_bad_indices: +; SSE: # %bb.0: +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[1,0,2,3,4,5,6,7] +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,6,7] +; SSE-NEXT: pmaddwd (%rsi), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: pmaddwd_bad_indices: ; AVX: # %bb.0: @@ -2516,21 +2860,21 @@ define <4 x i32> @pmaddwd_bad_indices(ptr %Aptr, ptr %Bptr) { ; This test contains two multiplies joined by an add. The result of that add is then reduced to a single element. ; SelectionDAGBuilder should tag the joining add as a vector reduction. We need to recognize that both sides can use pmaddwd define i32 @madd_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) { -; SSE2-LABEL: madd_double_reduction: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: movdqu (%rsi), %xmm1 -; SSE2-NEXT: pmaddwd %xmm0, %xmm1 -; SSE2-NEXT: movdqu (%rdx), %xmm0 -; SSE2-NEXT: movdqu (%rcx), %xmm2 -; SSE2-NEXT: pmaddwd %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: retq +; SSE-LABEL: madd_double_reduction: +; SSE: # %bb.0: +; SSE-NEXT: movdqu (%rdi), %xmm0 +; SSE-NEXT: movdqu (%rsi), %xmm1 +; SSE-NEXT: pmaddwd %xmm0, %xmm1 +; SSE-NEXT: movdqu (%rdx), %xmm0 +; SSE-NEXT: movdqu (%rcx), %xmm2 +; SSE-NEXT: pmaddwd %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm1, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] +; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: madd_double_reduction: ; AVX: # %bb.0: @@ -2561,31 +2905,31 @@ define i32 @madd_double_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3) { } define i32 @madd_quad_reduction(ptr %arg, ptr %arg1, ptr %arg2, ptr %arg3, ptr %arg4, ptr %arg5, ptr %arg6, ptr %arg7) { -; SSE2-LABEL: madd_quad_reduction: -; SSE2: # %bb.0: -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %rax -; SSE2-NEXT: movq {{[0-9]+}}(%rsp), %r10 -; SSE2-NEXT: movdqu (%rdi), %xmm0 -; SSE2-NEXT: movdqu (%rsi), %xmm1 -; SSE2-NEXT: pmaddwd %xmm0, %xmm1 -; SSE2-NEXT: movdqu (%rdx), %xmm0 -; SSE2-NEXT: movdqu (%rcx), %xmm2 -; SSE2-NEXT: pmaddwd %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm1, %xmm2 -; SSE2-NEXT: movdqu (%r8), %xmm0 -; SSE2-NEXT: movdqu (%r9), %xmm1 -; SSE2-NEXT: pmaddwd %xmm0, %xmm1 -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: movdqu (%r10), %xmm0 -; SSE2-NEXT: movdqu (%rax), %xmm2 -; SSE2-NEXT: pmaddwd %xmm0, %xmm2 -; SSE2-NEXT: paddd %xmm1, %xmm2 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; SSE2-NEXT: paddd %xmm0, %xmm1 -; SSE2-NEXT: movd %xmm1, %eax -; SSE2-NEXT: retq +; SSE-LABEL: madd_quad_reduction: +; SSE: # %bb.0: +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %rax +; SSE-NEXT: movq {{[0-9]+}}(%rsp), %r10 +; SSE-NEXT: movdqu (%rdi), %xmm0 +; SSE-NEXT: movdqu (%rsi), %xmm1 +; SSE-NEXT: pmaddwd %xmm0, %xmm1 +; SSE-NEXT: movdqu (%rdx), %xmm0 +; SSE-NEXT: movdqu (%rcx), %xmm2 +; SSE-NEXT: pmaddwd %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm1, %xmm2 +; SSE-NEXT: movdqu (%r8), %xmm0 +; SSE-NEXT: movdqu (%r9), %xmm1 +; SSE-NEXT: pmaddwd %xmm0, %xmm1 +; SSE-NEXT: paddd %xmm2, %xmm1 +; SSE-NEXT: movdqu (%r10), %xmm0 +; SSE-NEXT: movdqu (%rax), %xmm2 +; SSE-NEXT: pmaddwd %xmm0, %xmm2 +; SSE-NEXT: paddd %xmm1, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,2,3] +; SSE-NEXT: paddd %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE-NEXT: paddd %xmm0, %xmm1 +; SSE-NEXT: movd %xmm1, %eax +; SSE-NEXT: retq ; ; AVX-LABEL: madd_quad_reduction: ; AVX: # %bb.0: @@ -2679,6 +3023,44 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; SSE2-NEXT: orq %rcx, %rax ; SSE2-NEXT: retq ; +; SSE42-LABEL: sum_and_sum_of_squares: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %esi, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pxor %xmm3, %xmm3 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB33_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; SSE42-NEXT: paddd %xmm5, %xmm3 +; SSE42-NEXT: paddd %xmm4, %xmm2 +; SSE42-NEXT: pmaddwd %xmm4, %xmm4 +; SSE42-NEXT: paddd %xmm4, %xmm0 +; SSE42-NEXT: pmaddwd %xmm5, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm1 +; SSE42-NEXT: addq $8, %rdi +; SSE42-NEXT: addq $-8, %rax +; SSE42-NEXT: jne .LBB33_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm3, %xmm2 +; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] +; SSE42-NEXT: paddd %xmm2, %xmm3 +; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,1,1] +; SSE42-NEXT: paddd %xmm3, %xmm2 +; SSE42-NEXT: movd %xmm2, %ecx +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: movd %xmm0, %eax +; SSE42-NEXT: shlq $32, %rcx +; SSE42-NEXT: orq %rcx, %rax +; SSE42-NEXT: retq +; ; AVX1-LABEL: sum_and_sum_of_squares: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %esi, %eax @@ -2814,6 +3196,32 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: sum_of_square_differences: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: movl %edx, %eax +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %ecx, %ecx +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB34_1: # %vector.body +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SSE42-NEXT: pmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; SSE42-NEXT: psubw %xmm2, %xmm3 +; SSE42-NEXT: pmaddwd %xmm3, %xmm3 +; SSE42-NEXT: paddd %xmm3, %xmm1 +; SSE42-NEXT: addq $8, %rcx +; SSE42-NEXT: cmpq %rcx, %rax +; SSE42-NEXT: jne .LBB34_1 +; SSE42-NEXT: # %bb.2: # %middle.block +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: sum_of_square_differences: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: movl %edx, %eax @@ -2898,10 +3306,10 @@ middle.block: ; PR49716 - https://llvm.org/PR49716 define <4 x i32> @input_size_mismatch(<16 x i16> %x, ptr %p) { -; SSE2-LABEL: input_size_mismatch: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd (%rdi), %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: input_size_mismatch: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd (%rdi), %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: input_size_mismatch: ; AVX: # %bb.0: @@ -2924,10 +3332,10 @@ define <4 x i32> @input_size_mismatch(<16 x i16> %x, ptr %p) { } define <4 x i32> @output_size_mismatch(<16 x i16> %x, <16 x i16> %y) { -; SSE2-LABEL: output_size_mismatch: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm2, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: output_size_mismatch: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: retq ; ; AVX-LABEL: output_size_mismatch: ; AVX: # %bb.0: @@ -2949,11 +3357,11 @@ define <4 x i32> @output_size_mismatch(<16 x i16> %x, <16 x i16> %y) { } define <4 x i32> @output_size_mismatch_high_subvector(<16 x i16> %x, <16 x i16> %y) { -; SSE2-LABEL: output_size_mismatch_high_subvector: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: pmaddwd %xmm2, %xmm0 -; SSE2-NEXT: retq +; SSE-LABEL: output_size_mismatch_high_subvector: +; SSE: # %bb.0: +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: retq ; ; AVX1-LABEL: output_size_mismatch_high_subvector: ; AVX1: # %bb.0: @@ -3022,6 +3430,39 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: retq ; +; SSE42-LABEL: add_used_by_loop_phi: +; SSE42: # %bb.0: # %entry +; SSE42-NEXT: addq %rdx, %rdi +; SSE42-NEXT: addq %rcx, %rsi +; SSE42-NEXT: pxor %xmm0, %xmm0 +; SSE42-NEXT: xorl %eax, %eax +; SSE42-NEXT: pxor %xmm2, %xmm2 +; SSE42-NEXT: pxor %xmm1, %xmm1 +; SSE42-NEXT: .p2align 4 +; SSE42-NEXT: .LBB38_1: # %loop +; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 +; SSE42-NEXT: pmovsxbw 8(%rdi,%rax), %xmm3 +; SSE42-NEXT: pmovsxbw (%rdi,%rax), %xmm4 +; SSE42-NEXT: pmovsxbw 8(%rsi,%rax), %xmm5 +; SSE42-NEXT: pmaddwd %xmm3, %xmm5 +; SSE42-NEXT: paddd %xmm5, %xmm1 +; SSE42-NEXT: pmovsxbw (%rsi,%rax), %xmm3 +; SSE42-NEXT: pmaddwd %xmm4, %xmm3 +; SSE42-NEXT: paddd %xmm3, %xmm2 +; SSE42-NEXT: addq $16, %rax +; SSE42-NEXT: cmpq %r8, %rax +; SSE42-NEXT: jb .LBB38_1 +; SSE42-NEXT: # %bb.2: # %afterloop +; SSE42-NEXT: paddd %xmm0, %xmm2 +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: paddd %xmm2, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] +; SSE42-NEXT: paddd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSE42-NEXT: paddd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %eax +; SSE42-NEXT: retq +; ; AVX1-LABEL: add_used_by_loop_phi: ; AVX1: # %bb.0: # %entry ; AVX1-NEXT: addq %rdx, %rdi @@ -3145,26 +3586,13 @@ afterloop: } define <16 x i32> @extract_concat_pmaddwd(<32 x i16> %a, <32 x i16> %b) { -; CHECK-LABEL: extract_concat_pmaddwd: -; CHECK: # %bb.0: -; CHECK-NEXT: vextractf128 $1, %ymm2, %xmm4 -; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm5 -; CHECK-NEXT: vpmaddwd %xmm4, %xmm5, %xmm4 -; CHECK-NEXT: vpmaddwd %xmm2, %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 -; CHECK-NEXT: vextractf128 $1, %ymm3, %xmm2 -; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm4 -; CHECK-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 -; CHECK-NEXT: vpmaddwd %xmm3, %xmm1, %xmm1 -; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 -; CHECK-NEXT: retq -; SSE2-LABEL: extract_concat_pmaddwd: -; SSE2: # %bb.0: -; SSE2-NEXT: pmaddwd %xmm4, %xmm0 -; SSE2-NEXT: pmaddwd %xmm5, %xmm1 -; SSE2-NEXT: pmaddwd %xmm6, %xmm2 -; SSE2-NEXT: pmaddwd %xmm7, %xmm3 -; SSE2-NEXT: retq +; SSE-LABEL: extract_concat_pmaddwd: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm4, %xmm0 +; SSE-NEXT: pmaddwd %xmm5, %xmm1 +; SSE-NEXT: pmaddwd %xmm6, %xmm2 +; SSE-NEXT: pmaddwd %xmm7, %xmm3 +; SSE-NEXT: retq ; ; AVX1-LABEL: extract_concat_pmaddwd: ; AVX1: # %bb.0: From fdab3f9e0d856583bfd530b6b1e11778314cda27 Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 23 Jun 2026 10:30:44 +0100 Subject: [PATCH 144/511] [lldb][test] Skip TestConcurrentManyBreakpoints (#205298) --- .../thread/concurrent_events/TestConcurrentManyBreakpoints.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py index 3d1af61793104..4b8900f25a8e8 100644 --- a/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py +++ b/lldb/test/API/functionalities/thread/concurrent_events/TestConcurrentManyBreakpoints.py @@ -5,6 +5,7 @@ @skipIfTargetDoesNotSupportThreads() @skipIfWindows +@skipIf # See llvm/llvm-project/#205297 class ConcurrentManyBreakpoints(ConcurrentEventsBase): # Atomic sequences are not supported yet for MIPS in LLDB. @skipIf(triple="^mips") From d57f85eef3a225145264d951b9d95aa2de401508 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 23 Jun 2026 11:31:09 +0200 Subject: [PATCH 145/511] AMDGPU: Move AMDGPUTargetID to AMDGPUTargetParser (#205268) --- .../llvm/TargetParser/AMDGPUTargetParser.h | 107 +++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 10 +- .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 5 +- .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 10 +- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 16 +- .../Target/AMDGPU/Disassembler/CMakeLists.txt | 1 - llvm/lib/Target/AMDGPU/GCNSubtarget.cpp | 2 +- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 +- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 16 +- .../MCTargetDesc/AMDGPUTargetStreamer.h | 10 +- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 148 +++--------------- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 117 ++------------ llvm/lib/TargetParser/AMDGPUTargetParser.cpp | 106 +++++++++++++ 13 files changed, 276 insertions(+), 278 deletions(-) diff --git a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h index 1288f4cd69ff0..b35128ba926e3 100644 --- a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h +++ b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h @@ -17,10 +17,13 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" #include +#include +#include #include namespace llvm { +class raw_ostream; template class SmallVectorImpl; class Triple; @@ -102,6 +105,110 @@ LLVM_ABI IsaVersion getIsaVersion(StringRef GPU); /// default target features with entries overridden by \p Features. LLVM_ABI std::pair fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap &Features); + +enum class TargetIDSetting { Unsupported, Any, Off, On }; + +class LLVM_ABI AMDGPUTargetID { +private: + GPUKind Arch; + std::string TargetTripleString; + TargetIDSetting XnackSetting; + TargetIDSetting SramEccSetting; + bool IsAMDHSA; + +public: + AMDGPUTargetID(GPUKind Arch, const Triple &TT, TargetIDSetting XnackSetting, + TargetIDSetting SramEccSetting); + + ~AMDGPUTargetID() = default; + + /// \return True if the current xnack setting is not "Unsupported". + bool isXnackSupported() const { + return XnackSetting != TargetIDSetting::Unsupported; + } + + /// \returns True if the current xnack setting is "On" or "Any". + bool isXnackOnOrAny() const { + return XnackSetting == TargetIDSetting::On || + XnackSetting == TargetIDSetting::Any; + } + + /// \returns True if current xnack setting is "On" or "Off", + /// false otherwise. + bool isXnackOnOrOff() const { + return getXnackSetting() == TargetIDSetting::On || + getXnackSetting() == TargetIDSetting::Off; + } + + /// \returns The current xnack TargetIDSetting, possible options are + /// "Unsupported", "Any", "Off", and "On". + TargetIDSetting getXnackSetting() const { return XnackSetting; } + + /// Sets xnack setting to \p NewXnackSetting. + void setXnackSetting(TargetIDSetting NewXnackSetting) { + XnackSetting = NewXnackSetting; + } + + /// \return True if the current sramecc setting is not "Unsupported". + bool isSramEccSupported() const { + return SramEccSetting != TargetIDSetting::Unsupported; + } + + /// \returns True if the current sramecc setting is "On" or "Any". + bool isSramEccOnOrAny() const { + return SramEccSetting == TargetIDSetting::On || + SramEccSetting == TargetIDSetting::Any; + } + + /// \returns True if current sramecc setting is "On" or "Off", + /// false otherwise. + bool isSramEccOnOrOff() const { + return getSramEccSetting() == TargetIDSetting::On || + getSramEccSetting() == TargetIDSetting::Off; + } + + /// \returns The current sramecc TargetIDSetting, possible options are + /// "Unsupported", "Any", "Off", and "On". + TargetIDSetting getSramEccSetting() const { return SramEccSetting; } + + /// Sets sramecc setting to \p NewSramEccSetting. + void setSramEccSetting(TargetIDSetting NewSramEccSetting) { + SramEccSetting = NewSramEccSetting; + } + + void setTargetIDFromTargetIDStream(StringRef TargetID); + + GPUKind getGPUKind() const { return Arch; } + + StringRef getTargetTripleString() const { return TargetTripleString; } + + /// \returns True if this is an AMDHSA target. + bool isAMDHSA() const { return IsAMDHSA; } + + /// Parse a target ID directive string (e.g., + /// "amdgcn-amd-amdhsa--gfx1010:xnack-") and return an AMDGPUTargetID. + /// \returns AMDGPUTargetID or std::nullopt if malformed. + static std::optional + parseTargetIDString(StringRef TargetIDDirective); + + /// Write string representation to \p OS + void print(raw_ostream &OS) const; + + /// \returns String representation of an object. + std::string toString() const; + + bool operator==(const AMDGPUTargetID &Other) const; + bool operator!=(const AMDGPUTargetID &Other) const { + return !(*this == Other); + } +}; + +inline raw_ostream &operator<<(raw_ostream &OS, + const AMDGPUTargetID &TargetID) { + TargetID.print(OS); + return OS; +} + } // namespace AMDGPU } // namespace llvm diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 9f29934f5a4a5..a08f23f0f768a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -198,7 +198,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { // Make sure function's xnack settings are compatible with module's // xnack settings. if (FunctionTargetID.isXnackSupported() && - FunctionTargetID.getXnackSetting() != IsaInfo::TargetIDSetting::Any && + FunctionTargetID.getXnackSetting() != AMDGPU::TargetIDSetting::Any && FunctionTargetID.getXnackSetting() != getTargetStreamer()->getTargetID()->getXnackSetting()) { OutContext.reportError( @@ -209,7 +209,7 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() { // Make sure function's sramecc settings are compatible with module's // sramecc settings. if (FunctionTargetID.isSramEccSupported() && - FunctionTargetID.getSramEccSetting() != IsaInfo::TargetIDSetting::Any && + FunctionTargetID.getSramEccSetting() != AMDGPU::TargetIDSetting::Any && FunctionTargetID.getSramEccSetting() != getTargetStreamer()->getTargetID()->getSramEccSetting()) { OutContext.reportError( @@ -1212,12 +1212,12 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) { break; const GCNSubtarget &STM = TM.getSubtarget(F); - const IsaInfo::AMDGPUTargetID &STMTargetID = STM.getTargetID(); + const AMDGPUTargetID &STMTargetID = STM.getTargetID(); if (TSTargetID->isXnackSupported()) - if (TSTargetID->getXnackSetting() == IsaInfo::TargetIDSetting::Any) + if (TSTargetID->getXnackSetting() == AMDGPU::TargetIDSetting::Any) TSTargetID->setXnackSetting(STMTargetID.getXnackSetting()); if (TSTargetID->isSramEccSupported()) - if (TSTargetID->getSramEccSetting() == IsaInfo::TargetIDSetting::Any) + if (TSTargetID->getSramEccSetting() == AMDGPU::TargetIDSetting::Any) TSTargetID->setSramEccSetting(STMTargetID.getSramEccSetting()); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 35ecf98836678..78fe4df844714 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -215,8 +215,7 @@ void MetadataStreamerMsgPackV4::emitVersion() { getRootMetadata("amdhsa.version") = Version; } -void MetadataStreamerMsgPackV4::emitTargetID( - const IsaInfo::AMDGPUTargetID &TargetID) { +void MetadataStreamerMsgPackV4::emitTargetID(const AMDGPUTargetID &TargetID) { getRootMetadata("amdhsa.target") = HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); } @@ -559,7 +558,7 @@ bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { } void MetadataStreamerMsgPackV4::begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) { + const AMDGPUTargetID &TargetID) { emitVersion(); emitTargetID(TargetID); emitPrintf(Mod); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index 6bad4dbdf5321..f2cd8dd5882f8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -36,9 +36,7 @@ class Type; namespace AMDGPU { -namespace IsaInfo { class AMDGPUTargetID; -} namespace HSAMD { @@ -48,8 +46,7 @@ class MetadataStreamer { virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; - virtual void begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) = 0; + virtual void begin(const Module &Mod, const AMDGPUTargetID &TargetID) = 0; virtual void end() = 0; @@ -96,7 +93,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4 void emitVersion() override; - void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID); + void emitTargetID(const AMDGPUTargetID &TargetID); void emitPrintf(const Module &Mod); @@ -135,8 +132,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4 bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; - void begin(const Module &Mod, - const IsaInfo::AMDGPUTargetID &TargetID) override; + void begin(const Module &Mod, const AMDGPUTargetID &TargetID) override; void end() override; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index a7191c4411336..b5a5285811da0 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5980,13 +5980,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { if (getParser().parseEscapedString(TargetIDDirective)) return true; - std::optional MaybeParsed = - AMDGPU::IsaInfo::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + std::optional MaybeParsed = + AMDGPU::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); if (!MaybeParsed) return getParser().Error(TargetStart, "malformed target ID"); - const AMDGPU::IsaInfo::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; - const std::optional &CurrentTargetID = + const AMDGPU::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = getTargetStreamer().getTargetID(); if (*CurrentTargetID != ParsedTargetID) { @@ -6692,13 +6692,13 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() { StringRef TargetIDDirective = getLexer().getTok().getStringContents(); - std::optional MaybeParsed = - AMDGPU::IsaInfo::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + std::optional MaybeParsed = + AMDGPU::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); if (!MaybeParsed) return Error(getParser().getTok().getLoc(), "malformed target id"); - const AMDGPU::IsaInfo::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; - const std::optional &CurrentTargetID = + const AMDGPU::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = getTargetStreamer().getTargetID(); if (*CurrentTargetID != ParsedTargetID) { diff --git a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt index aeede04081fc7..aa96d67c527a4 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt @@ -10,7 +10,6 @@ add_llvm_component_library(LLVMAMDGPUDisassembler CodeGenTypes MC MCDisassembler - TargetParser Support ADD_TO_COMPONENT diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp index 1ec3e45cc0359..37efb3a51cb9d 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.cpp @@ -180,7 +180,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, : // clang-format off AMDGPUGenSubtargetInfo(TT, GPU, /*TuneCPU*/ GPU, FS), AMDGPUSubtarget(TT), - TargetID(*this, FS), + TargetID(AMDGPU::createAMDGPUTargetID(*this, FS)), InstrItins(getInstrItineraryForCPU(GPU)), BufferOOBRelaxed(BufferOOBRelaxed), TBufferOOBRelaxed(TBufferOOBRelaxed), diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index af47a8725c2d0..1ab883dea24ec 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -71,7 +71,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, protected: // Basic subtarget description. - AMDGPU::IsaInfo::AMDGPUTargetID TargetID; + AMDGPU::AMDGPUTargetID TargetID; unsigned Gen = INVALID; InstrItineraryData InstrItins; int LDSBankCount = 0; @@ -157,9 +157,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return RegBankInfo.get(); } - const AMDGPU::IsaInfo::AMDGPUTargetID &getTargetID() const { - return TargetID; - } + const AMDGPU::AMDGPUTargetID &getTargetID() const { return TargetID; } const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index e9d7cc3f1476d..d999e99fb7d12 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -914,31 +914,31 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() { // xnack. switch (getTargetID()->getXnackSetting()) { - case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: + case AMDGPU::TargetIDSetting::Unsupported: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::Any: + case AMDGPU::TargetIDSetting::Any: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ANY_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::Off: + case AMDGPU::TargetIDSetting::Off: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_OFF_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::On: + case AMDGPU::TargetIDSetting::On: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_XNACK_ON_V4; break; } // sramecc. switch (getTargetID()->getSramEccSetting()) { - case AMDGPU::IsaInfo::TargetIDSetting::Unsupported: + case AMDGPU::TargetIDSetting::Unsupported: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::Any: + case AMDGPU::TargetIDSetting::Any: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ANY_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::Off: + case AMDGPU::TargetIDSetting::Off: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_OFF_V4; break; - case AMDGPU::IsaInfo::TargetIDSetting::On: + case AMDGPU::TargetIDSetting::On: EFlagsV4 |= ELF::EF_AMDGPU_FEATURE_SRAMECC_ON_V4; break; } diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index dc9636c6c2105..c8135276b8c5f 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -57,7 +57,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: // TODO: Move HSAMetadataStream to AMDGPUTargetStreamer. - std::optional TargetID; + std::optional TargetID; unsigned CodeObjectVersion; MCContext &getContext() const { return Streamer.getContext(); } @@ -133,15 +133,13 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); - const std::optional &getTargetID() const { - return TargetID; - } - std::optional &getTargetID() { + const std::optional &getTargetID() const { return TargetID; } + std::optional &getTargetID() { return TargetID; } void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { assert(TargetID == std::nullopt && "TargetID can only be initialized once"); - TargetID.emplace(STI, FeatureString); + TargetID = AMDGPU::createAMDGPUTargetID(STI, FeatureString); } }; diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index b59e8ddf2b282..efcb8d18bc503 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1097,20 +1097,15 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, return VOPD::InstInfo(OpXInfo, OpYInfo); } -namespace IsaInfo { - -AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, - StringRef FeatureString) - : Arch(parseArchAMDGCN(STI.getCPU())), - TargetTripleString( - STI.getTargetTriple().normalize(Triple::CanonicalForm::FOUR_IDENT)), - XnackSetting(STI.getFeatureBits().test(FeatureSupportsXNACK) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported), - SramEccSetting(STI.getFeatureBits().test(FeatureSupportsSRAMECC) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported), - IsAMDHSA(STI.getTargetTriple().getOS() == Triple::AMDHSA) { +AMDGPUTargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, + StringRef FeatureString) { + AMDGPUTargetID TargetID(parseArchAMDGCN(STI.getCPU()), STI.getTargetTriple(), + STI.getFeatureBits().test(FeatureSupportsXNACK) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported, + STI.getFeatureBits().test(FeatureSupportsSRAMECC) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported); // Check if xnack or sramecc is explicitly enabled or disabled. In the // absence of the target features we assume we must generate code that can run @@ -1134,12 +1129,12 @@ AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, // Targets without on/off mode support keep their initial setting (Any). bool XnackSupported = STI.getFeatureBits().test(FeatureXNACKOnOffModes); - bool SramEccSupported = isSramEccSupported(); + bool SramEccSupported = TargetID.isSramEccSupported(); if (XnackRequested) { if (XnackSupported) { - XnackSetting = - *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; + TargetID.setXnackSetting(*XnackRequested ? TargetIDSetting::On + : TargetIDSetting::Off); } else { // If a specific xnack setting was requested and this GPU does not support // xnack emit a warning. Setting will remain set to "Unsupported". @@ -1155,8 +1150,8 @@ AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, if (SramEccRequested) { if (SramEccSupported) { - SramEccSetting = - *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; + TargetID.setSramEccSetting(*SramEccRequested ? TargetIDSetting::On + : TargetIDSetting::Off); } else { // If a specific sramecc setting was requested and this GPU does not // support sramecc emit a warning. Setting will remain set to @@ -1170,111 +1165,11 @@ AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI, } } } -} - -AMDGPUTargetID::AMDGPUTargetID(GPUKind Arch, StringRef TargetTripleString, - TargetIDSetting XnackSetting, - TargetIDSetting SramEccSetting, bool IsAMDHSA) - : Arch(Arch), TargetTripleString(TargetTripleString), - XnackSetting(XnackSetting), SramEccSetting(SramEccSetting), - IsAMDHSA(IsAMDHSA) {} - -static TargetIDSetting -getTargetIDSettingFromFeatureString(StringRef FeatureString) { - if (FeatureString.ends_with("-")) - return TargetIDSetting::Off; - if (FeatureString.ends_with("+")) - return TargetIDSetting::On; - - llvm_unreachable("Malformed feature string"); -} - -void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { - SmallVector TargetIDSplit; - TargetID.split(TargetIDSplit, ':'); - for (const auto &FeatureString : TargetIDSplit) { - if (FeatureString.starts_with("xnack")) - XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); - if (FeatureString.starts_with("sramecc")) - SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); - } + return TargetID; } -std::optional -AMDGPUTargetID::parseTargetIDString(StringRef TargetIDDirective) { - // Split on '-' to get arch-vendor-os-environment-processor:features - // There is a single dash separator after the 4-component triple - SmallVector Parts; - TargetIDDirective.split(Parts, '-', /*MaxSplit=*/4); - if (Parts.size() < 4) - return std::nullopt; - - Triple TT(Parts[0], Parts[1], Parts[2], Parts[3]); - if (!TT.isAMDGCN()) - return std::nullopt; - - SmallVector FeatureSplit; - Parts[4].split(FeatureSplit, ':'); - if (FeatureSplit.empty()) - return std::nullopt; - - StringRef CPUName = FeatureSplit[0]; - - // Determine xnack/sramecc support based on the architecture attributes - GPUKind Arch = parseArchAMDGCN(CPUName); - unsigned ArchAttr = getArchAttrAMDGCN(Arch); - - TargetIDSetting XnackSetting = (ArchAttr & FEATURE_XNACK) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported; - TargetIDSetting SramEccSetting = (ArchAttr & FEATURE_SRAMECC) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported; - - for (StringRef FeatureString : - ArrayRef(FeatureSplit).drop_front(1)) { - if (FeatureString.starts_with("xnack")) - XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); - else if (FeatureString.starts_with("sramecc")) - SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); - } - - return AMDGPUTargetID(Arch, TT.normalize(Triple::CanonicalForm::FOUR_IDENT), - XnackSetting, SramEccSetting, - TT.getOS() == Triple::AMDHSA); -} - -void AMDGPUTargetID::print(raw_ostream &StreamRep) const { - StreamRep << TargetTripleString << '-' << getArchNameAMDGCN(Arch); - - if (IsAMDHSA) { - // sramecc. - if (getSramEccSetting() == TargetIDSetting::Off) - StreamRep << ":sramecc-"; - else if (getSramEccSetting() == TargetIDSetting::On) - StreamRep << ":sramecc+"; - - // xnack. - if (getXnackSetting() == TargetIDSetting::Off) - StreamRep << ":xnack-"; - else if (getXnackSetting() == TargetIDSetting::On) - StreamRep << ":xnack+"; - } -} - -std::string AMDGPUTargetID::toString() const { - std::string Str; - raw_string_ostream OS(Str); - OS << *this; - return Str; -} - -bool AMDGPUTargetID::operator==(const AMDGPUTargetID &Other) const { - return Arch == Other.Arch && XnackSetting == Other.XnackSetting && - SramEccSetting == Other.SramEccSetting && IsAMDHSA == Other.IsAMDHSA && - TargetTripleString == Other.TargetTripleString; -} +namespace IsaInfo { unsigned getInstCacheLineSize(const MCSubtargetInfo &STI) { if (STI.getFeatureBits().test(FeatureInstCacheLineSize128)) @@ -3963,19 +3858,18 @@ ClusterDimsAttr ClusterDimsAttr::get(const Function &F) { } // namespace AMDGPU -raw_ostream &operator<<(raw_ostream &OS, - const AMDGPU::IsaInfo::TargetIDSetting S) { +raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::TargetIDSetting S) { switch (S) { - case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): + case (AMDGPU::TargetIDSetting::Unsupported): OS << "Unsupported"; break; - case (AMDGPU::IsaInfo::TargetIDSetting::Any): + case (AMDGPU::TargetIDSetting::Any): OS << "Any"; break; - case (AMDGPU::IsaInfo::TargetIDSetting::Off): + case (AMDGPU::TargetIDSetting::Off): OS << "Off"; break; - case (AMDGPU::IsaInfo::TargetIDSetting::On): + case (AMDGPU::TargetIDSetting::On): OS << "On"; break; } diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 81b0fd56e5bc5..1fb0f6b1dbc30 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -143,6 +143,14 @@ struct WMMAInstInfo { #define GET_WMMAInstInfoTable_DECL #include "AMDGPUGenSearchableTables.inc" +using TargetIDSetting = AMDGPU::TargetIDSetting; +using AMDGPUTargetID = AMDGPU::AMDGPUTargetID; + +/// Construct AMDGPUTargetID from MCSubtargetInfo. \p FeatureString is used to +/// determine explicitly requested xnack/sramecc settings. +AMDGPUTargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, + StringRef FeatureString); + namespace IsaInfo { enum { @@ -152,112 +160,6 @@ enum { TRAP_NUM_SGPRS = 16 }; -enum class TargetIDSetting { Unsupported, Any, Off, On }; - -class AMDGPUTargetID { -private: - GPUKind Arch; - std::string TargetTripleString; - TargetIDSetting XnackSetting; - TargetIDSetting SramEccSetting; - bool IsAMDHSA; - -public: - explicit AMDGPUTargetID(const MCSubtargetInfo &STI, StringRef FeatureString); - - AMDGPUTargetID(GPUKind Arch, StringRef TargetTripleString, - TargetIDSetting XnackSetting, TargetIDSetting SramEccSetting, - bool IsAMDHSA); - - ~AMDGPUTargetID() = default; - - /// \return True if the current xnack setting is not "Unsupported". - bool isXnackSupported() const { - return XnackSetting != TargetIDSetting::Unsupported; - } - - /// \returns True if the current xnack setting is "On" or "Any". - bool isXnackOnOrAny() const { - return XnackSetting == TargetIDSetting::On || - XnackSetting == TargetIDSetting::Any; - } - - /// \returns True if current xnack setting is "On" or "Off", - /// false otherwise. - bool isXnackOnOrOff() const { - return getXnackSetting() == TargetIDSetting::On || - getXnackSetting() == TargetIDSetting::Off; - } - - /// \returns The current xnack TargetIDSetting, possible options are - /// "Unsupported", "Any", "Off", and "On". - TargetIDSetting getXnackSetting() const { return XnackSetting; } - - /// Sets xnack setting to \p NewXnackSetting. - void setXnackSetting(TargetIDSetting NewXnackSetting) { - XnackSetting = NewXnackSetting; - } - - /// \return True if the current sramecc setting is not "Unsupported". - bool isSramEccSupported() const { - return SramEccSetting != TargetIDSetting::Unsupported; - } - - /// \returns True if the current sramecc setting is "On" or "Any". - bool isSramEccOnOrAny() const { - return SramEccSetting == TargetIDSetting::On || - SramEccSetting == TargetIDSetting::Any; - } - - /// \returns True if current sramecc setting is "On" or "Off", - /// false otherwise. - bool isSramEccOnOrOff() const { - return getSramEccSetting() == TargetIDSetting::On || - getSramEccSetting() == TargetIDSetting::Off; - } - - /// \returns The current sramecc TargetIDSetting, possible options are - /// "Unsupported", "Any", "Off", and "On". - TargetIDSetting getSramEccSetting() const { return SramEccSetting; } - - /// Sets sramecc setting to \p NewSramEccSetting. - void setSramEccSetting(TargetIDSetting NewSramEccSetting) { - SramEccSetting = NewSramEccSetting; - } - - void setTargetIDFromTargetIDStream(StringRef TargetID); - - GPUKind getGPUKind() const { return Arch; } - - StringRef getTargetTripleString() const { return TargetTripleString; } - - /// \returns True if this is an AMDHSA target. - bool isAMDHSA() const { return IsAMDHSA; } - - /// Parse a target ID directive string (e.g., - /// "amdgcn-amd-amdhsa--gfx1010:xnack-") and return an AMDGPUTargetID. - /// \returns AMDGPUTargetID or std::nullopt if malformed. - static std::optional - parseTargetIDString(StringRef TargetIDDirective); - - /// Write string representation to \p OS - void print(raw_ostream &OS) const; - - /// \returns String representation of an object. - std::string toString() const; - - bool operator==(const AMDGPUTargetID &Other) const; - bool operator!=(const AMDGPUTargetID &Other) const { - return !(*this == Other); - } -}; - -inline raw_ostream &operator<<(raw_ostream &OS, - const AMDGPUTargetID &TargetID) { - TargetID.print(OS); - return OS; -} - /// \returns Instruction cache line size in bytes for given subtarget \p STI. unsigned getInstCacheLineSize(const MCSubtargetInfo &STI); @@ -1930,8 +1832,7 @@ class ClusterDimsAttr { } // namespace AMDGPU -raw_ostream &operator<<(raw_ostream &OS, - const AMDGPU::IsaInfo::TargetIDSetting S); +raw_ostream &operator<<(raw_ostream &OS, const AMDGPU::TargetIDSetting S); } // end namespace llvm diff --git a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp index 77f1f2d795167..fe6a62a9e9e97 100644 --- a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp +++ b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp @@ -11,9 +11,12 @@ //===----------------------------------------------------------------------===// #include "llvm/TargetParser/AMDGPUTargetParser.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" using namespace llvm; @@ -669,3 +672,106 @@ AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, } return {NO_ERROR, StringRef()}; } + +AMDGPUTargetID::AMDGPUTargetID(GPUKind Arch, const Triple &TT, + TargetIDSetting XnackSetting, + TargetIDSetting SramEccSetting) + : Arch(Arch), + TargetTripleString(TT.normalize(Triple::CanonicalForm::FOUR_IDENT)), + XnackSetting(XnackSetting), SramEccSetting(SramEccSetting), + IsAMDHSA(TT.getOS() == Triple::AMDHSA) {} + +static TargetIDSetting +getTargetIDSettingFromFeatureString(StringRef FeatureString) { + if (FeatureString.ends_with("-")) + return TargetIDSetting::Off; + if (FeatureString.ends_with("+")) + return TargetIDSetting::On; + + llvm_unreachable("Malformed feature string"); +} + +void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { + SmallVector TargetIDSplit; + TargetID.split(TargetIDSplit, ':'); + + for (const auto &FeatureString : TargetIDSplit) { + if (FeatureString.starts_with("xnack")) + XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); + if (FeatureString.starts_with("sramecc")) + SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); + } +} + +std::optional +AMDGPUTargetID::parseTargetIDString(StringRef TargetIDDirective) { + // Split on '-' to get arch-vendor-os-environment-processor:features + // There is a single dash separator after the 4-component triple + SmallVector Parts; + TargetIDDirective.split(Parts, '-', /*MaxSplit=*/4); + if (Parts.size() < 4) + return std::nullopt; + + Triple TT(Parts[0], Parts[1], Parts[2], Parts[3]); + if (!TT.isAMDGCN()) + return std::nullopt; + + SmallVector FeatureSplit; + Parts[4].split(FeatureSplit, ':'); + if (FeatureSplit.empty()) + return std::nullopt; + + StringRef CPUName = FeatureSplit[0]; + + // Determine xnack/sramecc support based on the architecture attributes + GPUKind Arch = parseArchAMDGCN(CPUName); + unsigned ArchAttr = getArchAttrAMDGCN(Arch); + + TargetIDSetting XnackSetting = (ArchAttr & FEATURE_XNACK) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported; + TargetIDSetting SramEccSetting = (ArchAttr & FEATURE_SRAMECC) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported; + + for (StringRef FeatureString : + ArrayRef(FeatureSplit).drop_front(1)) { + if (FeatureString.starts_with("xnack")) + XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); + else if (FeatureString.starts_with("sramecc")) + SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); + } + + return AMDGPUTargetID(Arch, TT, XnackSetting, SramEccSetting); +} + +void AMDGPUTargetID::print(raw_ostream &StreamRep) const { + StreamRep << TargetTripleString << '-' << getArchNameAMDGCN(Arch); + + if (IsAMDHSA) { + // sramecc. + if (getSramEccSetting() == TargetIDSetting::Off) + StreamRep << ":sramecc-"; + else if (getSramEccSetting() == TargetIDSetting::On) + StreamRep << ":sramecc+"; + + // xnack. + if (getXnackSetting() == TargetIDSetting::Off) + StreamRep << ":xnack-"; + else if (getXnackSetting() == TargetIDSetting::On) + StreamRep << ":xnack+"; + } +} + +std::string AMDGPUTargetID::toString() const { + std::string Str; + raw_string_ostream OS(Str); + OS << *this; + return Str; +} + +bool AMDGPUTargetID::operator==(const AMDGPUTargetID &Other) const { + return Arch == Other.Arch && XnackSetting == Other.XnackSetting && + SramEccSetting == Other.SramEccSetting && IsAMDHSA == Other.IsAMDHSA && + TargetTripleString == Other.TargetTripleString; +} From 03d27ac35cb24800f6713f82ef496b6d3eb75ff5 Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 23 Jun 2026 02:34:26 -0700 Subject: [PATCH 146/511] [UR][L0] Add read-only flag to host memory registration (#22388) Replace the placeholder TBD flag in the host memory registration flags enum with UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY and regenerate the API headers. The flag indicates that device access to the registered range is read-only. In the Level Zero v2 adapter, map the flag onto ZE_HOST_MEM_ALLOC_FLAG_MEM_READ_ONLY when calling zeMemAllocHost so the driver registers the external system memory range in read-only device-access mode. Related to: https://github.com/intel/llvm/pull/22324 Assisted-By: Claude --- unified-runtime/include/unified-runtime/ur_api.h | 6 ++++-- unified-runtime/include/unified-runtime/ur_print.hpp | 12 ++++++------ .../scripts/core/exp-usm-host-alloc-register.yml | 7 +++++-- .../source/adapters/level_zero/v2/usm.cpp | 12 ++++++++++-- 4 files changed, 25 insertions(+), 12 deletions(-) diff --git a/unified-runtime/include/unified-runtime/ur_api.h b/unified-runtime/include/unified-runtime/ur_api.h index 2ee9c031c76fd..dcb0a60e76805 100644 --- a/unified-runtime/include/unified-runtime/ur_api.h +++ b/unified-runtime/include/unified-runtime/ur_api.h @@ -11852,8 +11852,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp( /// @brief USM host memory registration flags. typedef uint32_t ur_exp_usm_host_alloc_register_flags_t; typedef enum ur_exp_usm_host_alloc_register_flag_t { - /// Reserved for future use. - UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD = UR_BIT(0), + /// Device access to the registered range is read-only. The behavior + /// is undefined if device code writes to a range registered with this + /// flag. + UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY = UR_BIT(0), /// @cond UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_FORCE_UINT32 = 0x7fffffff /// @endcond diff --git a/unified-runtime/include/unified-runtime/ur_print.hpp b/unified-runtime/include/unified-runtime/ur_print.hpp index c3659b4cacc3d..c741f8eb85ecc 100644 --- a/unified-runtime/include/unified-runtime/ur_print.hpp +++ b/unified-runtime/include/unified-runtime/ur_print.hpp @@ -12533,8 +12533,8 @@ inline ur_result_t printFlag(std::ostream &os, inline std::ostream & operator<<(std::ostream &os, enum ur_exp_usm_host_alloc_register_flag_t value) { switch (value) { - case UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD: - os << "UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD"; + case UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY: + os << "UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY"; break; default: os << "unknown enumerator"; @@ -12553,15 +12553,15 @@ printFlag(std::ostream &os, uint32_t val = flag; bool first = true; - if ((val & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD) == - (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD) { - val ^= (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD; + if ((val & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY) == + (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY) { + val ^= (uint32_t)UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; if (!first) { os << " | "; } else { first = false; } - os << UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_TBD; + os << UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY; } if (val != 0) { std::bitset<32> bits(val); diff --git a/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml b/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml index 173da23cff408..88df311477b53 100644 --- a/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml +++ b/unified-runtime/scripts/core/exp-usm-host-alloc-register.yml @@ -29,8 +29,11 @@ type: enum desc: "USM host memory registration flags." name: $x_exp_usm_host_alloc_register_flags_t etors: - - name: TBD - desc: "Reserved for future use." + - name: READ_ONLY + desc: |- + Device access to the registered range is read-only. The behavior + is undefined if device code writes to a range registered with this + flag. --- #-------------------------------------------------------------------------- type: struct desc: "USM host memory registration properties." diff --git a/unified-runtime/source/adapters/level_zero/v2/usm.cpp b/unified-runtime/source/adapters/level_zero/v2/usm.cpp index 0a201cef0b6b7..ead2ca4a95260 100644 --- a/unified-runtime/source/adapters/level_zero/v2/usm.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/usm.cpp @@ -903,7 +903,7 @@ ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t hContext, ur_result_t urUSMHostAllocRegisterExp( ur_context_handle_t hContext, void *pHostMem, size_t size, - const ur_exp_usm_host_alloc_register_properties_t * /*pProperties*/) { + const ur_exp_usm_host_alloc_register_properties_t *pProperties) { if (!hContext->getPlatform()->ZeExternalMemoryMappingExtensionSupported) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } @@ -927,8 +927,16 @@ ur_result_t urUSMHostAllocRegisterExp( ZE_STRUCTURE_TYPE_EXTERNAL_MEMMAP_SYSMEM_EXT_DESC, nullptr, pHostMem, size}; + // Map the read-only registration flag onto the Level Zero host allocation + // flag, telling the driver that device access to the range is read-only. + ze_host_mem_alloc_flags_t hostFlags = 0; + if (pProperties && + (pProperties->flags & UR_EXP_USM_HOST_ALLOC_REGISTER_FLAG_READ_ONLY)) { + hostFlags |= ZE_HOST_MEM_ALLOC_FLAG_MEM_READ_ONLY; + } + ze_host_mem_alloc_desc_t hostDesc = {ZE_STRUCTURE_TYPE_HOST_MEM_ALLOC_DESC, - &sysMemDesc, 0}; + &sysMemDesc, hostFlags}; void *mappedMem = nullptr; ZE2UR_CALL(zeMemAllocHost, From f8b2b1c7fd4f82ba94547bb4f3e7984932339690 Mon Sep 17 00:00:00 2001 From: kekaczma <162420515+kekaczma@users.noreply.github.com> Date: Tue, 23 Jun 2026 11:38:58 +0200 Subject: [PATCH 147/511] [UR][CUDA] Fix flaky urUSMContextMemcpyExp test with optimized D2D handling (#22090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The urUSMContextMemcpyExpTestDevice::Success test was flaky on CUDA due to cuMemcpy() executing asynchronously for device-to-device copies. This caused race conditions where data was read before the copy completed. Solution uses hybrid approach: - H2D/D2H: cuMemcpy() (synchronous, fast path) - D2D: dedicated stream + cuMemcpyAsync() + cuStreamSynchronize() This provides correctness with minimal overhead (~15-70μs for stream create/destroy) while avoiding the performance cliff of context-wide synchronization (cuCtxSynchronize blocks all GPU work). Also adds comprehensive test coverage (large allocations, concurrent copies, multi-threaded sequential, unaligned pointers) and re-enables L0_V2 tests - (#19604) no repro on this configuration. Fixes #19688 --- unified-runtime/source/adapters/cuda/usm.cpp | 65 +++++- .../urUSMContextMemcpyExp.cpp | 212 +++++++++++++++++- 2 files changed, 266 insertions(+), 11 deletions(-) diff --git a/unified-runtime/source/adapters/cuda/usm.cpp b/unified-runtime/source/adapters/cuda/usm.cpp index 7a56030c8978a..1a65db90fe4b1 100644 --- a/unified-runtime/source/adapters/cuda/usm.cpp +++ b/unified-runtime/source/adapters/cuda/usm.cpp @@ -573,12 +573,65 @@ urUSMPoolTrimToExp(ur_context_handle_t hContext, ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp(ur_context_handle_t, - void *pDst, - const void *pSrc, - size_t Size) { - UR_CHECK_ERROR(cuMemcpy((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size)); - return UR_RESULT_SUCCESS; +UR_APIEXPORT ur_result_t UR_APICALL urUSMContextMemcpyExp( + ur_context_handle_t hContext, void *pDst, const void *pSrc, size_t Size) { + // Detect memory types to determine if this is a device-to-device copy + CUmemorytype memTypeDst, memTypeSrc; + CUresult dstResult = cuPointerGetAttribute( + &memTypeDst, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pDst); + CUresult srcResult = cuPointerGetAttribute( + &memTypeSrc, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, (CUdeviceptr)pSrc); + + bool isDstDevice = + (dstResult == CUDA_SUCCESS && memTypeDst == CU_MEMORYTYPE_DEVICE); + bool isSrcDevice = + (srcResult == CUDA_SUCCESS && memTypeSrc == CU_MEMORYTYPE_DEVICE); + + // For host-to-device or device-to-host, use simple synchronous copy + if (!isDstDevice || !isSrcDevice) { + UR_CHECK_ERROR(cuMemcpy((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size)); + return UR_RESULT_SUCCESS; + } + + // Get the device ordinal for the destination pointer + unsigned int devIdx = 0; + UR_CHECK_ERROR(cuPointerGetAttribute( + &devIdx, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, (CUdeviceptr)pDst)); + + if (devIdx >= hContext->getDevices().size()) { + return UR_RESULT_ERROR_INVALID_CONTEXT; + } + + ur_device_handle_t owningDev = hContext->getDevices()[devIdx]; + ScopedContext Active(owningDev); + + // Create a dedicated stream for this copy operation + CUstream copyStream = nullptr; + UR_CHECK_ERROR(cuStreamCreate(©Stream, CU_STREAM_NON_BLOCKING)); + + auto streamCleanup = [©Stream]() { + if (copyStream) { + cuStreamDestroy(copyStream); + copyStream = nullptr; + } + }; + + try { + UR_CHECK_ERROR( + cuMemcpyAsync((CUdeviceptr)pDst, (CUdeviceptr)pSrc, Size, copyStream)); + + UR_CHECK_ERROR(cuStreamSynchronize(copyStream)); + + streamCleanup(); + return UR_RESULT_SUCCESS; + + } catch (ur_result_t Err) { + streamCleanup(); + return Err; + } catch (...) { + streamCleanup(); + throw; + } } UR_APIEXPORT ur_result_t UR_APICALL urUSMHostAllocRegisterExp( diff --git a/unified-runtime/test/conformance/exp_usm_context_memcpy/urUSMContextMemcpyExp.cpp b/unified-runtime/test/conformance/exp_usm_context_memcpy/urUSMContextMemcpyExp.cpp index e051d8a33cd21..82044b6293047 100644 --- a/unified-runtime/test/conformance/exp_usm_context_memcpy/urUSMContextMemcpyExp.cpp +++ b/unified-runtime/test/conformance/exp_usm_context_memcpy/urUSMContextMemcpyExp.cpp @@ -6,11 +6,12 @@ #include "uur/utils.h" #include +#include +#include +#include + struct urUSMContextMemcpyExpTest : uur::urMultiQueueTypeTest { void SetUp() override { - // https://github.com/intel/llvm/issues/19604 - // this test uses urEnqueueUSMFill which looks to be bugged with latest driver - UUR_KNOWN_FAILURE_ON(uur::LevelZeroV2{}); UUR_RETURN_ON_FATAL_FAILURE(uur::urMultiQueueTypeTest::SetUp()); bool context_memcpy_support = false; @@ -80,8 +81,6 @@ struct urUSMContextMemcpyExpTestDevice : urUSMContextMemcpyExpTest { UUR_INSTANTIATE_DEVICE_TEST_SUITE_MULTI_QUEUE(urUSMContextMemcpyExpTestDevice); TEST_P(urUSMContextMemcpyExpTestDevice, Success) { - // https://github.com/intel/llvm/issues/19688 - UUR_KNOWN_FAILURE_ON(uur::CUDA{}); ASSERT_SUCCESS( urUSMContextMemcpyExp(context, dst_ptr, src_ptr, allocation_size)); verifyData(); @@ -166,3 +165,206 @@ TEST_P(urUSMContextMemcpyExpTestShared, Success) { urUSMContextMemcpyExp(context, dst_ptr, src_ptr, allocation_size)); verifyData(); } + +TEST_P(urUSMContextMemcpyExpTestDevice, LargeAllocation) { + constexpr size_t large_size = 64 * 1024 * 1024; + void *large_src = nullptr; + void *large_dst = nullptr; + + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, large_size, + &large_src)); + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, large_size, + &large_dst)); + + constexpr uint8_t pattern = 0xAB; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, large_src, sizeof(pattern), &pattern, + large_size, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_SUCCESS( + urUSMContextMemcpyExp(context, large_dst, large_src, large_size)); + + uint8_t first = 0, last = 0; + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, true, &first, large_dst, 1, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urEnqueueUSMMemcpy( + queue, true, &last, static_cast(large_dst) + large_size - 1, 1, 0, + nullptr, nullptr)); + ASSERT_EQ(first, pattern); + ASSERT_EQ(last, pattern); + + EXPECT_SUCCESS(urUSMFree(context, large_src)); + EXPECT_SUCCESS(urUSMFree(context, large_dst)); +} + +TEST_P(urUSMContextMemcpyExpTestDevice, ConcurrentCopies) { + constexpr int num_threads = 4; + constexpr size_t size_per_thread = 1024; + + struct ThreadData { + void *src; + void *dst; + uint8_t pattern; + }; + + std::vector thread_data(num_threads); + + for (int i = 0; i < num_threads; ++i) { + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + size_per_thread, &thread_data[i].src)); + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, + size_per_thread, &thread_data[i].dst)); + + thread_data[i].pattern = static_cast(i + 1); + ASSERT_SUCCESS(urEnqueueUSMFill(queue, thread_data[i].src, 1, + &thread_data[i].pattern, size_per_thread, 0, + nullptr, nullptr)); + } + ASSERT_SUCCESS(urQueueFinish(queue)); + + std::vector threads; + std::atomic errors{0}; + + for (int i = 0; i < num_threads; ++i) { + threads.emplace_back([&, i]() { + auto result = urUSMContextMemcpyExp(context, thread_data[i].dst, + thread_data[i].src, size_per_thread); + if (result != UR_RESULT_SUCCESS) { + errors++; + } + }); + } + + for (auto &t : threads) { + t.join(); + } + + ASSERT_EQ(errors.load(), 0) << "Some concurrent copies failed"; + + for (int i = 0; i < num_threads; ++i) { + std::vector result(size_per_thread); + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, true, result.data(), + thread_data[i].dst, size_per_thread, 0, + nullptr, nullptr)); + + for (auto byte : result) { + ASSERT_EQ(byte, thread_data[i].pattern) + << "Thread " << i << " data corrupted"; + } + + EXPECT_SUCCESS(urUSMFree(context, thread_data[i].src)); + EXPECT_SUCCESS(urUSMFree(context, thread_data[i].dst)); + } +} + +TEST_P(urUSMContextMemcpyExpTestDevice, MultiThreadedSequential) { + constexpr int num_threads = 4; + constexpr size_t size_per_thread = 512; + + std::atomic errors{0}; + std::vector threads; + + for (int thread_id = 0; thread_id < num_threads; ++thread_id) { + threads.emplace_back([&, thread_id]() { + void *src = nullptr; + void *dst = nullptr; + + auto cleanup = [&]() { + if (src) + urUSMFree(context, src); + if (dst) + urUSMFree(context, dst); + }; + + if (urUSMDeviceAlloc(context, device, nullptr, nullptr, size_per_thread, + &src) != UR_RESULT_SUCCESS) { + errors++; + return; + } + if (urUSMDeviceAlloc(context, device, nullptr, nullptr, size_per_thread, + &dst) != UR_RESULT_SUCCESS) { + cleanup(); + errors++; + return; + } + + uint8_t pattern = static_cast(thread_id + 1); + if (urEnqueueUSMFill(queue, src, 1, &pattern, size_per_thread, 0, nullptr, + nullptr) != UR_RESULT_SUCCESS) { + cleanup(); + errors++; + return; + } + if (urQueueFinish(queue) != UR_RESULT_SUCCESS) { + cleanup(); + errors++; + return; + } + + if (urUSMContextMemcpyExp(context, dst, src, size_per_thread) != + UR_RESULT_SUCCESS) { + cleanup(); + errors++; + return; + } + + std::vector verify(size_per_thread); + if (urEnqueueUSMMemcpy(queue, true, verify.data(), dst, size_per_thread, + 0, nullptr, nullptr) != UR_RESULT_SUCCESS) { + cleanup(); + errors++; + return; + } + + for (auto byte : verify) { + if (byte != pattern) { + errors++; + break; + } + } + + cleanup(); + }); + } + + for (auto &t : threads) { + t.join(); + } + + ASSERT_EQ(errors.load(), 0) << "Multi-threaded sequential test failed"; +} + +TEST_P(urUSMContextMemcpyExpTestDevice, UnalignedPointers) { + constexpr size_t base_size = 1024; + constexpr size_t offset = 7; + void *src_base = nullptr; + void *dst_base = nullptr; + + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, base_size, + &src_base)); + ASSERT_SUCCESS(urUSMDeviceAlloc(context, device, nullptr, nullptr, base_size, + &dst_base)); + + void *src_unaligned = static_cast(src_base) + offset; + void *dst_unaligned = static_cast(dst_base) + offset; + size_t copy_size = base_size - offset; + + constexpr uint8_t pattern = 0xCD; + ASSERT_SUCCESS(urEnqueueUSMFill(queue, src_base, sizeof(pattern), &pattern, + base_size, 0, nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_SUCCESS( + urUSMContextMemcpyExp(context, dst_unaligned, src_unaligned, copy_size)); + + std::vector verify(copy_size); + ASSERT_SUCCESS(urEnqueueUSMMemcpy(queue, true, verify.data(), dst_unaligned, + copy_size, 0, nullptr, nullptr)); + + for (auto byte : verify) { + ASSERT_EQ(byte, pattern); + } + + EXPECT_SUCCESS(urUSMFree(context, src_base)); + EXPECT_SUCCESS(urUSMFree(context, dst_base)); +} From 5615c843e281a26a8214ed2ee2d78e3233039659 Mon Sep 17 00:00:00 2001 From: John Brawn Date: Tue, 23 Jun 2026 10:41:24 +0100 Subject: [PATCH 148/511] [LSR] Don't merge ICmpZero uses outside loop (#205131) In NarrowSearchSpaceByMergingUsesOutsideLoop don't merge ICmpZero uses outside the loop with uses inside the loop, as the resulting use will have a kind that's not ICmpZero, which will mean the compare won't be expanded correctly later. --- .../Transforms/Scalar/LoopStrengthReduce.cpp | 5 +- .../AArch64/use-outside-loop.ll | 171 ++++++++++++++++++ 2 files changed, 175 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 57a76565bfbb7..4aeb32bfd3b4b 100644 --- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -5238,7 +5238,10 @@ void LSRInstance::NarrowSearchSpaceByMergingUsesOutsideLoop() { for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { LSRUse &LU = Uses[LUIdx]; - if (!LU.AllFixupsOutsideLoop || LU.Formulae.empty()) + // Don't merge ICmpZero uses outside the loop, as ICmpZero needs to be + // handled specially when expanding. + if (!LU.AllFixupsOutsideLoop || LU.Formulae.empty() || + LU.Kind == LSRUse::ICmpZero) continue; LLVM_DEBUG(dbgs() << " Trying to eliminate use "; LU.print(dbgs()); diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll index 02ba9f23a4222..dcad054a7c4c8 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/use-outside-loop.ll @@ -627,3 +627,174 @@ exit: store i64 %iv_sub.next, ptr %arrayidx4, align 8 ret i32 %ret.4 } + +; Here the compare outside the loop has the same formula as the one inside the +; loop, but the one outside is ICmpZero whereas the one inside is Basic. We +; shouldn't merge these as the compare outside the loop wouldn't be transformed +; correctly when it's converted to down-counting form. +define i1 @icmpzero_outside_loop(ptr %p, i64 %n) { +; CHECK-LABEL: define i1 @icmpzero_outside_loop( +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1 +; CHECK-NEXT: [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2 +; CHECK-NEXT: [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3 +; CHECK-NEXT: [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[P0_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[P1_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr nuw i8, ptr [[P2_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr nuw i8, ptr [[P3_LOAD]], i64 128 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV10:%.*]] = phi ptr [ [[SCEVGEP11:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP9]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP3]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[TMP0]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4 +; CHECK-NEXT: [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: [[SCEVGEP11]] = getelementptr i8, ptr [[LSR_IV10]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ult i64 [[LSR_IV_NEXT]], 1 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: store ptr [[SCEVGEP2]], ptr [[P]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP5]], ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP8]], ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP11]], ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %p0.load = load ptr, ptr %p, align 8 + %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1 + %p1.load = load ptr, ptr %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2 + %p2.load = load ptr, ptr %arrayidx2, align 8 + %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3 + %p3.load = load ptr, ptr %arrayidx3, align 8 + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ] + %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ] + %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off + %val0 = load i32, ptr %p0, align 4 + %ret.1 = add nsw i32 %val0, %ret.0 + %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off + %val1 = load i32, ptr %p1, align 4 + %ret.2 = add nsw i32 %val1, %ret.1 + %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off + %val2 = load i32, ptr %p2, align 4 + %ret.3 = add nsw i32 %val2, %ret.2 + %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off + %val3 = load i32, ptr %p3, align 4 + %ret.4 = add nsw i32 %val3, %ret.3 + %iv.next = add nuw nsw i64 %iv, 1 + %off.next = add nuw nsw i64 %off, 1 + %sub = sub i64 %n, %iv + %exitcond = icmp ult i64 %sub, 1 + br i1 %exitcond, label %exit, label %for.body + +exit: + %p0.last = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off.next + store ptr %p0.last, ptr %p, align 8 + %p1.last = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off.next + store ptr %p1.last, ptr %arrayidx1, align 8 + %p2.last = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off.next + store ptr %p2.last, ptr %arrayidx2, align 8 + %p3.last = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off.next + store ptr %p3.last, ptr %arrayidx3, align 8 + %cmp = icmp eq i64 %iv, %n + ret i1 %cmp +} + +; Here the conditions inside and outside the loop are both icmpzero, and we +; expect them to be combined into the same LSRUse right from the start. +define i1 @icmpzero_inside_outside_loop(ptr %p, i64 %n) { +; CHECK-LABEL: define i1 @icmpzero_inside_outside_loop( +; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[P0_LOAD:%.*]] = load ptr, ptr [[P]], align 8 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 1 +; CHECK-NEXT: [[P1_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 2 +; CHECK-NEXT: [[P2_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds nuw ptr, ptr [[P]], i64 3 +; CHECK-NEXT: [[P3_LOAD:%.*]] = load ptr, ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr nuw i8, ptr [[P0_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr nuw i8, ptr [[P1_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr nuw i8, ptr [[P2_LOAD]], i64 128 +; CHECK-NEXT: [[SCEVGEP9:%.*]] = getelementptr nuw i8, ptr [[P3_LOAD]], i64 128 +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[LSR_IV10:%.*]] = phi ptr [ [[SCEVGEP11:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP9]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV7:%.*]] = phi ptr [ [[SCEVGEP8:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP6]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV4:%.*]] = phi ptr [ [[SCEVGEP5:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP3]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], %[[FOR_BODY]] ], [ [[SCEVGEP]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], %[[FOR_BODY]] ], [ [[TMP0]], %[[ENTRY]] ] +; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1 +; CHECK-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i64 4 +; CHECK-NEXT: [[SCEVGEP5]] = getelementptr i8, ptr [[LSR_IV4]], i64 4 +; CHECK-NEXT: [[SCEVGEP8]] = getelementptr i8, ptr [[LSR_IV7]], i64 4 +; CHECK-NEXT: [[SCEVGEP11]] = getelementptr i8, ptr [[LSR_IV10]], i64 4 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND]], label %[[EXIT:.*]], label %[[FOR_BODY]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: store ptr [[SCEVGEP2]], ptr [[P]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP5]], ptr [[ARRAYIDX1]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP8]], ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: store ptr [[SCEVGEP11]], ptr [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; +entry: + %p0.load = load ptr, ptr %p, align 8 + %arrayidx1 = getelementptr inbounds nuw ptr, ptr %p, i64 1 + %p1.load = load ptr, ptr %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds nuw ptr, ptr %p, i64 2 + %p2.load = load ptr, ptr %arrayidx2, align 8 + %arrayidx3 = getelementptr inbounds nuw ptr, ptr %p, i64 3 + %p3.load = load ptr, ptr %arrayidx3, align 8 + br label %for.body + +for.body: + %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ] + %off = phi i64 [ %off.next, %for.body ], [ 32, %entry ] + %ret.0 = phi i32 [ %ret.4, %for.body ], [ 0, %entry ] + %p0 = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off + %val0 = load i32, ptr %p0, align 4 + %ret.1 = add nsw i32 %val0, %ret.0 + %p1 = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off + %val1 = load i32, ptr %p1, align 4 + %ret.2 = add nsw i32 %val1, %ret.1 + %p2 = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off + %val2 = load i32, ptr %p2, align 4 + %ret.3 = add nsw i32 %val2, %ret.2 + %p3 = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off + %val3 = load i32, ptr %p3, align 4 + %ret.4 = add nsw i32 %val3, %ret.3 + %iv.next = add nuw nsw i64 %iv, 1 + %off.next = add nuw nsw i64 %off, 1 + %exitcond = icmp eq i64 %iv, %n + br i1 %exitcond, label %exit, label %for.body + +exit: + %p0.last = getelementptr inbounds nuw i32, ptr %p0.load, i64 %off.next + store ptr %p0.last, ptr %p, align 8 + %p1.last = getelementptr inbounds nuw i32, ptr %p1.load, i64 %off.next + store ptr %p1.last, ptr %arrayidx1, align 8 + %p2.last = getelementptr inbounds nuw i32, ptr %p2.load, i64 %off.next + store ptr %p2.last, ptr %arrayidx2, align 8 + %p3.last = getelementptr inbounds nuw i32, ptr %p3.load, i64 %off.next + store ptr %p3.last, ptr %arrayidx3, align 8 + %cmp = icmp eq i64 %iv, %n + ret i1 %cmp +} From 6f5a4808443a9be1dfa11f7017c402ef82c3f3b4 Mon Sep 17 00:00:00 2001 From: Jianjian Guan Date: Tue, 23 Jun 2026 17:45:07 +0800 Subject: [PATCH 149/511] [CIR] Fix LoadOp creation (#205294) --- .../Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp index b252ed188c408..a0d3b5d39df89 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp @@ -532,6 +532,7 @@ void rewriteIndirectReturnCall(cir::CallOp call, auto load = cir::LoadOp::create(builder, call.getLoc(), origRetTy, sretSlot, /*isDeref=*/mlir::UnitAttr(), /*isVolatile=*/mlir::UnitAttr(), + /*is_nontemporal=*/mlir::UnitAttr(), /*alignment=*/mlir::IntegerAttr(), /*sync_scope=*/cir::SyncScopeKindAttr(), /*mem_order=*/cir::MemOrderAttr()); From 7300c3dbfed7f0cdbe14390c06e462c471283fa2 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 23 Jun 2026 11:57:09 +0200 Subject: [PATCH 150/511] [X86][TTI] Handle structs in areTypesABICompatible() (#205308) Fixes a regression from #205106. getValueType() asserts on aggregate types. Use CompuateValueVTs() to compute the de-aggregated VTs. Performing argument promotion for struct types seems pretty dubious to me, but it was previously allowed, so I'm retaining that behavior. We may want to disable promotion of aggregates in ArgPromotion entirely though. --- .../lib/Target/X86/X86TargetTransformInfo.cpp | 11 ++++++--- .../ArgumentPromotion/X86/struct-load.ll | 24 +++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 llvm/test/Transforms/ArgumentPromotion/X86/struct-load.ll diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 6ff621aca79d2..fba2f08912fed 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -51,6 +51,7 @@ #include "X86TargetTransformInfo.h" #include "llvm/ADT/SmallBitVector.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/BasicTTIImpl.h" #include "llvm/CodeGen/CostTable.h" #include "llvm/CodeGen/TargetLowering.h" @@ -6775,11 +6776,15 @@ bool X86TTIImpl::areTypesABICompatible(const Function *Caller, TM.getSubtargetImpl(*Callee)->getTargetLowering(); LLVMContext &Ctx = Caller->getContext(); + const DataLayout &DL = Caller->getDataLayout(); CallingConv::ID CC = Callee->getCallingConv(); return all_of(Types, [&](Type *Ty) { - EVT VT = CallerTLI->getValueType(DL, Ty); - return CallerTLI->getRegisterTypeForCallingConv(Ctx, CC, VT) == - CalleeTLI->getRegisterTypeForCallingConv(Ctx, CC, VT); + SmallVector VTs; + ComputeValueVTs(*CallerTLI, DL, Ty, VTs); + return all_of(VTs, [&](EVT VT) { + return CallerTLI->getRegisterTypeForCallingConv(Ctx, CC, VT) == + CalleeTLI->getRegisterTypeForCallingConv(Ctx, CC, VT); + }); }); } diff --git a/llvm/test/Transforms/ArgumentPromotion/X86/struct-load.ll b/llvm/test/Transforms/ArgumentPromotion/X86/struct-load.ll new file mode 100644 index 0000000000000..72d4239ba4bed --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/X86/struct-load.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes=argpromotion < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define { i32, i32 } @caller(ptr %p) { +; CHECK-LABEL: define { i32, i32 } @caller( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[P_VAL:%.*]] = load { i32, i32 }, ptr [[P]], align 4 +; CHECK-NEXT: [[RES:%.*]] = call { i32, i32 } @callee({ i32, i32 } [[P_VAL]]) +; CHECK-NEXT: ret { i32, i32 } [[RES]] +; + %res = call { i32, i32 } @callee(ptr %p) + ret { i32, i32 } %res +} + +define internal { i32, i32 } @callee(ptr %p) { +; CHECK-LABEL: define internal { i32, i32 } @callee( +; CHECK-SAME: { i32, i32 } [[P_0_VAL:%.*]]) { +; CHECK-NEXT: ret { i32, i32 } [[P_0_VAL]] +; + %res = load { i32, i32 }, ptr %p + ret { i32, i32 } %res +} From a55ecc2d05e66d757583d8099b40aa6e888661d2 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 23 Jun 2026 12:16:36 +0200 Subject: [PATCH 151/511] AMDGPU: Temporarily restore disassembler's dependency on TargetParser, again (#205309) Reverts part of #205268 --- llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt index aa96d67c527a4..aeede04081fc7 100644 --- a/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/Disassembler/CMakeLists.txt @@ -10,6 +10,7 @@ add_llvm_component_library(LLVMAMDGPUDisassembler CodeGenTypes MC MCDisassembler + TargetParser Support ADD_TO_COMPONENT From de8dd5908f75491f0f34dc8c29f4e467ffc69ac3 Mon Sep 17 00:00:00 2001 From: Konstantinos Parasyris Date: Tue, 23 Jun 2026 03:18:33 -0700 Subject: [PATCH 152/511] [SYCL] Add header-reach layering guard; drop include_deps goldens (#22326) The previously existing include-dep tests frequently resulted in conflicts when merging intel/llvm PRs with our downstream compilers as they require order sensitive inclusions. This PR replaces these order-sensitive, full-transitive-list include_deps golden tests with a contract check asserting lightweight top-level headers do not transitively pull in the heavy kernel-launch headers. Locks in ~46 already -lean headers and backstops the upcoming queue/usm/backend splits. The plan is on later PRs to extend appropriately the `FORBIDDEN_REACH` list with additional points. Test-only; no shipping headers changed. --- sycl/test/include_deps/deps_known.sh | 26 -- sycl/test/include_deps/header_reach.cpp | 1 + sycl/test/include_deps/sycl_accessor.hpp.cpp | 50 ---- sycl/test/include_deps/sycl_buffer.hpp.cpp | 49 ---- .../include_deps/sycl_detail_core.hpp.cpp | 147 ------------ .../sycl_detail_defines_elementary.hpp.cpp | 8 - .../include_deps/sycl_detail_export.hpp.cpp | 8 - .../sycl_khr_split_headers_accessor.hpp.cpp | 53 ----- .../sycl_khr_split_headers_atomic.hpp.cpp | 42 ---- .../sycl_khr_split_headers_bit.hpp.cpp | 12 - .../sycl_khr_split_headers_buffer.hpp.cpp | 52 ---- ..._khr_split_headers_builtins_common.hpp.cpp | 41 ---- ...r_split_headers_builtins_geometric.hpp.cpp | 41 ---- ...khr_split_headers_builtins_integer.hpp.cpp | 41 ---- ...cl_khr_split_headers_builtins_math.hpp.cpp | 43 ---- ..._split_headers_builtins_relational.hpp.cpp | 41 ---- .../sycl_khr_split_headers_byte.hpp.cpp | 12 - .../sycl_khr_split_headers_context.hpp.cpp | 29 --- .../sycl_khr_split_headers_device.hpp.cpp | 36 --- .../sycl_khr_split_headers_event.hpp.cpp | 21 -- .../sycl_khr_split_headers_exception.hpp.cpp | 15 -- .../sycl_khr_split_headers_functional.hpp.cpp | 12 - ...khr_split_headers_group_algorithms.hpp.cpp | 67 ------ .../sycl_khr_split_headers_groups.hpp.cpp | 70 ------ .../sycl_khr_split_headers_half.hpp.cpp | 20 -- .../sycl_khr_split_headers_handler.hpp.cpp | 132 ----------- ...t_headers_hierarchical_parallelism.hpp.cpp | 35 --- .../sycl_khr_split_headers_images.hpp.cpp | 102 -------- ...sycl_khr_split_headers_index_space.hpp.cpp | 37 --- ...l_khr_split_headers_interop_handle.hpp.cpp | 100 -------- ...cl_khr_split_headers_kernel_bundle.hpp.cpp | 141 ----------- ...l_khr_split_headers_kernel_handler.hpp.cpp | 16 -- .../sycl_khr_split_headers_marray.hpp.cpp | 19 -- .../sycl_khr_split_headers_math.hpp.cpp | 49 ---- .../sycl_khr_split_headers_multi_ptr.hpp.cpp | 22 -- .../sycl_khr_split_headers_platform.hpp.cpp | 33 --- ...cl_khr_split_headers_property_list.hpp.cpp | 18 -- .../sycl_khr_split_headers_queue.hpp.cpp | 150 ------------ .../sycl_khr_split_headers_reduction.hpp.cpp | 187 --------------- .../sycl_khr_split_headers_span.hpp.cpp | 13 - .../sycl_khr_split_headers_stream.hpp.cpp | 157 ------------ ...sycl_khr_split_headers_type_traits.hpp.cpp | 17 -- .../sycl_khr_split_headers_usm.hpp.cpp | 170 ------------- .../sycl_khr_split_headers_vec.hpp.cpp | 34 --- .../sycl_khr_split_headers_version.hpp.cpp | 10 - sycl/test/include_deps/update_test.sh | 62 ----- sycl/tools/header_reach_check.py | 224 ++++++++++++++++++ 47 files changed, 225 insertions(+), 2440 deletions(-) delete mode 100644 sycl/test/include_deps/deps_known.sh create mode 100644 sycl/test/include_deps/header_reach.cpp delete mode 100644 sycl/test/include_deps/sycl_accessor.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_buffer.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_detail_core.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_detail_defines_elementary.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_detail_export.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_accessor.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_atomic.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_bit.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_buffer.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_builtins_common.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_builtins_geometric.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_builtins_integer.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_builtins_math.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_builtins_relational.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_byte.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_context.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_device.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_event.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_exception.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_functional.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_group_algorithms.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_groups.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_half.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_handler.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_hierarchical_parallelism.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_images.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_index_space.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_interop_handle.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_kernel_bundle.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_kernel_handler.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_marray.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_math.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_multi_ptr.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_platform.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_property_list.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_queue.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_reduction.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_span.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_stream.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_type_traits.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_usm.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_vec.hpp.cpp delete mode 100644 sycl/test/include_deps/sycl_khr_split_headers_version.hpp.cpp delete mode 100755 sycl/test/include_deps/update_test.sh create mode 100644 sycl/tools/header_reach_check.py diff --git a/sycl/test/include_deps/deps_known.sh b/sycl/test/include_deps/deps_known.sh deleted file mode 100644 index bd58a73723f1f..0000000000000 --- a/sycl/test/include_deps/deps_known.sh +++ /dev/null @@ -1,26 +0,0 @@ -function deps() { - HEADER=$1 - echo "Dependencies for <$HEADER>:" - - # Format is something like this: - # - # $ clang++ -fsycl -fsycl-device-only -include "detail/defines_elementary.hpp" -c -x c++ /dev/null -MD -MF - - # /dev/null: /dev/null \ - # /localdisk2/aeloviko/sycl/build/bin/../include/sycl/detail/defines_elementary.hpp - # - # However, sometimes first header is on the same line with - # "null.o: /dev/null
", so add an explicit line break there. - - clang++ -fsycl -fsycl-device-only -include "$HEADER" -c -x c++ /dev/null -o /dev/null -MD -MF - \ - | sed 's@: /dev/null@: /dev/null\n@' \ - | grep 'include/sycl\|/dev/null\|CL/\|ur_\|:' \ - | grep -v 'stl_wrappers' \ - | sed 's@.*/include/sycl/@@' \ - | sed 's@.*/include/CL/@CL/@' \ - | sed 's@.*/include/ur_@ur_@' \ - | sed 's@.*/include/unified-runtime/@@' \ - | sed 's/ \\//' -} - -deps $1 -echo "" diff --git a/sycl/test/include_deps/header_reach.cpp b/sycl/test/include_deps/header_reach.cpp new file mode 100644 index 0000000000000..9bd0ed4c69c64 --- /dev/null +++ b/sycl/test/include_deps/header_reach.cpp @@ -0,0 +1 @@ +// RUN: %python %sycl_tools_src_dir/header_reach_check.py --clang %clangxx diff --git a/sycl/test/include_deps/sycl_accessor.hpp.cpp b/sycl/test/include_deps/sycl_accessor.hpp.cpp deleted file mode 100644 index ac222e28a048d..0000000000000 --- a/sycl/test/include_deps/sycl_accessor.hpp.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/accessor.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_buffer.hpp.cpp b/sycl/test/include_deps/sycl_buffer.hpp.cpp deleted file mode 100644 index 20980206267ab..0000000000000 --- a/sycl/test/include_deps/sycl_buffer.hpp.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/buffer.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_detail_core.hpp.cpp b/sycl/test/include_deps/sycl_detail_core.hpp.cpp deleted file mode 100644 index c37736d6d5623..0000000000000 --- a/sycl/test/include_deps/sycl_detail_core.hpp.cpp +++ /dev/null @@ -1,147 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/detail/core.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: detail/core.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: queue.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/optional.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: exception_list.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: info/queue.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_detail_defines_elementary.hpp.cpp b/sycl/test/include_deps/sycl_detail_defines_elementary.hpp.cpp deleted file mode 100644 index 56df23e41cc36..0000000000000 --- a/sycl/test/include_deps/sycl_detail_defines_elementary.hpp.cpp +++ /dev/null @@ -1,8 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/detail/defines_elementary.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_detail_export.hpp.cpp b/sycl/test/include_deps/sycl_detail_export.hpp.cpp deleted file mode 100644 index 3f0a04ea2769b..0000000000000 --- a/sycl/test/include_deps/sycl_detail_export.hpp.cpp +++ /dev/null @@ -1,8 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/detail/export.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: detail/export.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_accessor.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_accessor.hpp.cpp deleted file mode 100644 index 267c56c1eec4a..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_accessor.hpp.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/accessor.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/accessor.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_atomic.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_atomic.hpp.cpp deleted file mode 100644 index a079b6c58fc51..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_atomic.hpp.cpp +++ /dev/null @@ -1,42 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/atomic.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/atomic.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: atomic_fence.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: atomic_ref.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: ext/oneapi/experimental/address_cast.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_bit.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_bit.hpp.cpp deleted file mode 100644 index 5aba64de649c4..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_bit.hpp.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/bit.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/bit.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_buffer.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_buffer.hpp.cpp deleted file mode 100644 index a15c52222bd54..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_buffer.hpp.cpp +++ /dev/null @@ -1,52 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/buffer.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/buffer.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_builtins_common.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_builtins_common.hpp.cpp deleted file mode 100644 index 7f1cfad9156eb..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_builtins_common.hpp.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/builtins_common.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/builtins_common.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: detail/builtins/common_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_builtins_geometric.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_builtins_geometric.hpp.cpp deleted file mode 100644 index 6357bf0808d2c..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_builtins_geometric.hpp.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/builtins_geometric.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/builtins_geometric.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: detail/builtins/geometric_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_builtins_integer.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_builtins_integer.hpp.cpp deleted file mode 100644 index 3aebf823b6266..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_builtins_integer.hpp.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/builtins_integer.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/builtins_integer.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: detail/builtins/integer_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_builtins_math.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_builtins_math.hpp.cpp deleted file mode 100644 index 6b156b4aefc09..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_builtins_math.hpp.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/builtins_math.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/builtins_math.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: detail/builtins/half_precision_math_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-NEXT: detail/builtins/math_functions.hpp -// CHECK-NEXT: detail/builtins/native_math_functions.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_builtins_relational.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_builtins_relational.hpp.cpp deleted file mode 100644 index 604899d75ba8a..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_builtins_relational.hpp.cpp +++ /dev/null @@ -1,41 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/builtins_relational.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/builtins_relational.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: detail/builtins/relational_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_byte.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_byte.hpp.cpp deleted file mode 100644 index 8c7d2bf1a268e..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_byte.hpp.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/byte.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/byte.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_context.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_context.hpp.cpp deleted file mode 100644 index 0038552ea83b5..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_context.hpp.cpp +++ /dev/null @@ -1,29 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/context.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/context.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_device.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_device.hpp.cpp deleted file mode 100644 index 94ef8cd7be3dd..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_device.hpp.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/device.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/device.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: device.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_event.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_event.hpp.cpp deleted file mode 100644 index e8d81e02eeb63..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_event.hpp.cpp +++ /dev/null @@ -1,21 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/event.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/event.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_exception.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_exception.hpp.cpp deleted file mode 100644 index 0c6033668fe43..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_exception.hpp.cpp +++ /dev/null @@ -1,15 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/exception.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/exception.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: exception_list.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_functional.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_functional.hpp.cpp deleted file mode 100644 index 3eb8d79a870f6..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_functional.hpp.cpp +++ /dev/null @@ -1,12 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/functional.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/functional.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: functional.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_group_algorithms.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_group_algorithms.hpp.cpp deleted file mode 100644 index 3cdbaa72e6c7d..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_group_algorithms.hpp.cpp +++ /dev/null @@ -1,67 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/group_algorithms.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/group_algorithms.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: group_algorithm.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: functional.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: half_type.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: known_identity.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: ext/oneapi/functional.hpp -// CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_groups.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_groups.hpp.cpp deleted file mode 100644 index 42db0d19e3dce..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_groups.hpp.cpp +++ /dev/null @@ -1,70 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/groups.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/groups.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: group_algorithm.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: functional.hpp -// CHECK-NEXT: half_type.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: known_identity.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: ext/oneapi/functional.hpp -// CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: group_barrier.hpp -// CHECK-NEXT: sub_group.hpp -// CHECK-NEXT: __spirv/spirv_ops_subgroup.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_half.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_half.hpp.cpp deleted file mode 100644 index ff6f7b971a316..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_half.hpp.cpp +++ /dev/null @@ -1,20 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/half.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/half.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: half_type.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_handler.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_handler.hpp.cpp deleted file mode 100644 index 870d2946166e4..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_handler.hpp.cpp +++ /dev/null @@ -1,132 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/handler.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/handler.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_hierarchical_parallelism.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_hierarchical_parallelism.hpp.cpp deleted file mode 100644 index 27f60417ac251..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_hierarchical_parallelism.hpp.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/hierarchical_parallelism.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/hierarchical_parallelism.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: h_item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: item.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_images.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_images.hpp.cpp deleted file mode 100644 index 834fa80395952..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_images.hpp.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/images.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/images.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: accessor_image.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: detail/image_accessor_util.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: image.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: detail/backend_traits.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: detail/image_ocl_types.hpp -// CHECK-NEXT: __spirv/spirv_ops_image.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: properties/image_properties.hpp -// CHECK-NEXT: properties/image_properties.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_index_space.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_index_space.hpp.cpp deleted file mode 100644 index a9d924df408a7..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_index_space.hpp.cpp +++ /dev/null @@ -1,37 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/index_space.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/index_space.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_interop_handle.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_interop_handle.hpp.cpp deleted file mode 100644 index e48d70941170e..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_interop_handle.hpp.cpp +++ /dev/null @@ -1,100 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/interop_handle.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/interop_handle.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: interop_handle.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: image.hpp -// CHECK-NEXT: detail/backend_traits.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_kernel_bundle.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_kernel_bundle.hpp.cpp deleted file mode 100644 index a4b8da97ee2cb..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_kernel_bundle.hpp.cpp +++ /dev/null @@ -1,141 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/kernel_bundle.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/kernel_bundle.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: kernel_bundle.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/ur.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: ur_api_funcs.def -// CHECK-NEXT: device.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-NEXT: specialization_id.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_kernel_handler.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_kernel_handler.hpp.cpp deleted file mode 100644 index 8d1eefeca6d55..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_kernel_handler.hpp.cpp +++ /dev/null @@ -1,16 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/kernel_handler.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/kernel_handler.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_marray.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_marray.hpp.cpp deleted file mode 100644 index 50bdc1b8fa69d..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_marray.hpp.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/marray.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/marray.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_math.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_math.hpp.cpp deleted file mode 100644 index 75a0d00334c3c..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_math.hpp.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/math.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/math.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: builtins.hpp -// CHECK-NEXT: detail/builtins/builtins.hpp -// CHECK-NEXT: detail/builtins/common_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-NEXT: detail/builtins/geometric_functions.hpp -// CHECK-NEXT: detail/builtins/half_precision_math_functions.hpp -// CHECK-NEXT: detail/builtins/integer_functions.hpp -// CHECK-NEXT: detail/builtins/math_functions.hpp -// CHECK-NEXT: detail/builtins/native_math_functions.hpp -// CHECK-NEXT: detail/builtins/relational_functions.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_multi_ptr.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_multi_ptr.hpp.cpp deleted file mode 100644 index 65f6dec370482..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_multi_ptr.hpp.cpp +++ /dev/null @@ -1,22 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/multi_ptr.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/multi_ptr.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_platform.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_platform.hpp.cpp deleted file mode 100644 index 7a1b40b98c05a..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_platform.hpp.cpp +++ /dev/null @@ -1,33 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/platform.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/platform.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: platform.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: info/platform.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_property_list.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_property_list.hpp.cpp deleted file mode 100644 index 175a775731549..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_property_list.hpp.cpp +++ /dev/null @@ -1,18 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/property_list.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/property_list.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_queue.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_queue.hpp.cpp deleted file mode 100644 index f4b01f58da5bf..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_queue.hpp.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/queue.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/queue.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: properties/queue_properties.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: properties/queue_properties.def -// CHECK-NEXT: queue.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/optional.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: exception_list.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: info/queue.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_reduction.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_reduction.hpp.cpp deleted file mode 100644 index 3bad713783ebc..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_reduction.hpp.cpp +++ /dev/null @@ -1,187 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/reduction.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/reduction.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: properties/reduction_properties.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: properties/reduction_properties.def -// CHECK-NEXT: reduction.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: atomic_ref.hpp -// CHECK-NEXT: ext/oneapi/experimental/address_cast.hpp -// CHECK-NEXT: detail/spirv.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: builtins.hpp -// CHECK-NEXT: detail/builtins/builtins.hpp -// CHECK-NEXT: detail/builtins/common_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-NEXT: detail/builtins/geometric_functions.hpp -// CHECK-NEXT: detail/builtins/half_precision_math_functions.hpp -// CHECK-NEXT: detail/builtins/integer_functions.hpp -// CHECK-NEXT: detail/builtins/math_functions.hpp -// CHECK-NEXT: detail/builtins/native_math_functions.hpp -// CHECK-NEXT: detail/builtins/relational_functions.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/tuple.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: exception_list.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: group_algorithm.hpp -// CHECK-NEXT: functional.hpp -// CHECK-NEXT: half_type.hpp -// CHECK-NEXT: known_identity.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: ext/oneapi/functional.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: queue.hpp -// CHECK-NEXT: detail/optional.hpp -// CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp -// CHECK-NEXT: info/queue.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-NEXT: usm.hpp -// CHECK-NEXT: platform.hpp -// CHECK-NEXT: info/platform.hpp -// CHECK-NEXT: usm/usm_pointer_info.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_span.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_span.hpp.cpp deleted file mode 100644 index 885eafdfddc36..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_span.hpp.cpp +++ /dev/null @@ -1,13 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/span.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/span.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_stream.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_stream.hpp.cpp deleted file mode 100644 index 06c49f9cd1d86..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_stream.hpp.cpp +++ /dev/null @@ -1,157 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/stream.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/stream.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: stream.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_atomic.hpp -// CHECK-NEXT: __spirv/spirv_ops_builtin_decls.hpp -// CHECK-NEXT: __spirv/spirv_ops_base.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: builtins.hpp -// CHECK-NEXT: detail/builtins/builtins.hpp -// CHECK-NEXT: detail/builtins/common_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-NEXT: detail/builtins/geometric_functions.hpp -// CHECK-NEXT: detail/builtins/half_precision_math_functions.hpp -// CHECK-NEXT: detail/builtins/integer_functions.hpp -// CHECK-NEXT: detail/builtins/math_functions.hpp -// CHECK-NEXT: detail/builtins/native_math_functions.hpp -// CHECK-NEXT: detail/builtins/relational_functions.hpp -// CHECK-NEXT: ext/oneapi/bfloat16.hpp -// CHECK-NEXT: half_type.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: h_item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_type_traits.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_type_traits.hpp.cpp deleted file mode 100644 index a161e80055c6d..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_type_traits.hpp.cpp +++ /dev/null @@ -1,17 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/type_traits.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/type_traits.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: device_aspect_traits.hpp -// CHECK-NEXT: device_aspect_macros.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_usm.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_usm.hpp.cpp deleted file mode 100644 index 7ae776a6c043b..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_usm.hpp.cpp +++ /dev/null @@ -1,170 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/usm.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/usm.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: usm.hpp -// CHECK-NEXT: builtins.hpp -// CHECK-NEXT: detail/builtins/builtins.hpp -// CHECK-NEXT: detail/builtins/common_functions.hpp -// CHECK-NEXT: detail/builtins/builtin_helpers.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: detail/half_type_impl.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: aspects.hpp -// CHECK-NEXT: info/aspects.def -// CHECK-NEXT: info/aspects_deprecated.def -// CHECK-NEXT: detail/loop.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-NEXT: marray.hpp -// CHECK-NEXT: detail/builtins/helper_macros.hpp -// CHECK-NEXT: detail/builtins/geometric_functions.hpp -// CHECK-NEXT: detail/builtins/half_precision_math_functions.hpp -// CHECK-NEXT: detail/builtins/integer_functions.hpp -// CHECK-NEXT: detail/builtins/math_functions.hpp -// CHECK-NEXT: detail/builtins/native_math_functions.hpp -// CHECK-NEXT: detail/builtins/relational_functions.hpp -// CHECK-NEXT: device.hpp -// CHECK-NEXT: backend_types.hpp -// CHECK-NEXT: detail/owner_less_base.hpp -// CHECK-NEXT: detail/impl_utils.hpp -// CHECK-NEXT: ext/oneapi/weak_object_base.hpp -// CHECK-NEXT: detail/string.hpp -// CHECK-NEXT: detail/string_view.hpp -// CHECK-NEXT: detail/util.hpp -// CHECK-NEXT: detail/abi_neutral.hpp -// CHECK-NEXT: device_selector.hpp -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.hpp -// CHECK-NEXT: detail/info_desc_traits.hpp -// CHECK-NEXT: ur_api.h -// CHECK-NEXT: ext/oneapi/experimental/device_architecture.def -// CHECK-NEXT: info/device.hpp -// CHECK-NEXT: detail/device_info_types.hpp -// CHECK-NEXT: range.hpp -// CHECK-NEXT: detail/array.hpp -// CHECK-NEXT: kernel_bundle_enums.hpp -// CHECK-NEXT: platform.hpp -// CHECK-NEXT: info/platform.hpp -// CHECK-NEXT: property_list.hpp -// CHECK-NEXT: detail/property_helper.hpp -// CHECK-NEXT: detail/property_list_base.hpp -// CHECK-NEXT: exception.hpp -// CHECK-NEXT: properties/property_traits.hpp -// CHECK-NEXT: queue.hpp -// CHECK-NEXT: accessor.hpp -// CHECK-NEXT: detail/accessor_iterator.hpp -// CHECK-NEXT: id.hpp -// CHECK-NEXT: detail/code_location.hpp -// CHECK-NEXT: detail/fwd/buffer.hpp -// CHECK-NEXT: detail/handler_proxy.hpp -// CHECK-NEXT: ext/oneapi/accessor_property_list.hpp -// CHECK-NEXT: multi_ptr.hpp -// CHECK-NEXT: detail/address_space_cast.hpp -// CHECK-NEXT: __spirv/spirv_types.hpp -// CHECK-NEXT: pointers.hpp -// CHECK-NEXT: properties/accessor_properties.hpp -// CHECK-NEXT: properties/runtime_accessor_properties.def -// CHECK-NEXT: async_handler.hpp -// CHECK-NEXT: buffer.hpp -// CHECK-NEXT: detail/helpers.hpp -// CHECK-NEXT: memory_enums.hpp -// CHECK-NEXT: detail/is_device_copyable.hpp -// CHECK-NEXT: detail/stl_type_traits.hpp -// CHECK-NEXT: detail/sycl_mem_obj_allocator.hpp -// CHECK-NEXT: detail/aligned_allocator.hpp -// CHECK-NEXT: detail/os_util.hpp -// CHECK-NEXT: properties/buffer_properties.hpp -// CHECK-NEXT: context.hpp -// CHECK-NEXT: info/context.hpp -// CHECK-NEXT: usm/usm_enums.hpp -// CHECK-NEXT: properties/buffer_properties.def -// CHECK-NEXT: detail/cg_types.hpp -// CHECK-NEXT: kernel_handler.hpp -// CHECK-NEXT: detail/kernel_desc.hpp -// CHECK-NEXT: detail/nd_range_view.hpp -// CHECK-NEXT: nd_range.hpp -// CHECK-NEXT: detail/optional.hpp -// CHECK-NEXT: detail/range_rounding.hpp -// CHECK-NEXT: ext/oneapi/kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/forward_progress.hpp -// CHECK-NEXT: ext/oneapi/free_function_kernel_properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_value.hpp -// CHECK-NEXT: ext/oneapi/properties/property.hpp -// CHECK-NEXT: ext/oneapi/properties.hpp -// CHECK-NEXT: ext/oneapi/properties/property_utils.hpp -// CHECK-NEXT: item.hpp -// CHECK-NEXT: detail/item_base.hpp -// CHECK-NEXT: event.hpp -// CHECK-NEXT: info/event.hpp -// CHECK-NEXT: exception_list.hpp -// CHECK-NEXT: ext/oneapi/device_global/device_global.hpp -// CHECK-NEXT: ext/oneapi/device_global/properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/event_mode_property.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/command_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/common.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/executable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/node.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/graph_properties.def -// CHECK-NEXT: ext/oneapi/experimental/detail/properties/node_properties.def -// CHECK-NEXT: ext/oneapi/experimental/graph/modifiable_graph.hpp -// CHECK-NEXT: ext/oneapi/experimental/graph/dynamic.hpp -// CHECK-NEXT: ext/oneapi/experimental/work_group_memory.hpp -// CHECK-NEXT: handler.hpp -// CHECK-NEXT: detail/cl.h -// CHECK-NEXT: CL/cl.h -// CHECK-NEXT: CL/cl_version.h -// CHECK-NEXT: CL/cl_platform.h -// CHECK-NEXT: CL/cl_ext.h -// CHECK-NEXT: detail/get_device_kernel_info.hpp -// CHECK-NEXT: detail/compile_time_kernel_info.hpp -// CHECK-NEXT: detail/kernel_launch_helper.hpp -// CHECK-NEXT: ext/intel/experimental/fp_control_kernel_properties.hpp -// CHECK-NEXT: ext/intel/experimental/kernel_execution_properties.hpp -// CHECK-NEXT: ext/oneapi/experimental/cluster_group_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/use_root_sync_prop.hpp -// CHECK-NEXT: ext/oneapi/experimental/virtual_functions.hpp -// CHECK-NEXT: ext/oneapi/work_group_scratch_memory.hpp -// CHECK-NEXT: detail/sycl_local_mem_builtins.hpp -// CHECK-NEXT: detail/reduction_forward.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_interop.hpp -// CHECK-NEXT: ext/oneapi/interop_common.hpp -// CHECK-NEXT: ext/oneapi/bindless_images_mem_handle.hpp -// CHECK-NEXT: ext/oneapi/experimental/free_function_traits.hpp -// CHECK-NEXT: ext/oneapi/experimental/raw_kernel_arg.hpp -// CHECK-NEXT: group.hpp -// CHECK-NEXT: detail/async_work_group_copy_ptr.hpp -// CHECK-NEXT: device_event.hpp -// CHECK-NEXT: kernel.hpp -// CHECK-NEXT: info/kernel.hpp -// CHECK-NEXT: nd_item.hpp -// CHECK-NEXT: sampler.hpp -// CHECK-NEXT: info/queue.hpp -// CHECK-NEXT: sycl_span.hpp -// CHECK-NEXT: usm/usm_pointer_info.hpp -// CHECK-NEXT: usm/usm_allocator.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_vec.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_vec.hpp.cpp deleted file mode 100644 index ac5112175bc83..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_vec.hpp.cpp +++ /dev/null @@ -1,34 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/vec.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/vec.hpp -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-NEXT: vector.hpp -// CHECK-NEXT: detail/vector_arith.hpp -// CHECK-NEXT: detail/generic_type_traits.hpp -// CHECK-NEXT: access/access.hpp -// CHECK-NEXT: aliases.hpp -// CHECK-NEXT: bit_cast.hpp -// CHECK-NEXT: detail/fwd/half.hpp -// CHECK-NEXT: detail/type_traits.hpp -// CHECK-NEXT: detail/type_traits/bool_traits.hpp -// CHECK-NEXT: detail/type_traits/vec_marray_traits.hpp -// CHECK-NEXT: detail/fwd/multi_ptr.hpp -// CHECK-NEXT: detail/type_traits/integer_traits.hpp -// CHECK-NEXT: detail/vector_traits.hpp -// CHECK-NEXT: detail/vector_core.hpp -// CHECK-NEXT: detail/named_swizzles_mixin.hpp -// CHECK-NEXT: detail/common.hpp -// CHECK-NEXT: detail/assert.hpp -// CHECK-NEXT: __spirv/spirv_vars.hpp -// CHECK-NEXT: detail/export.hpp -// CHECK-NEXT: detail/nd_loop.hpp -// CHECK-NEXT: detail/fwd/accessor.hpp -// CHECK-NEXT: detail/defines.hpp -// CHECK-NEXT: detail/memcpy.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/sycl_khr_split_headers_version.hpp.cpp b/sycl/test/include_deps/sycl_khr_split_headers_version.hpp.cpp deleted file mode 100644 index e2252d703fad8..0000000000000 --- a/sycl/test/include_deps/sycl_khr_split_headers_version.hpp.cpp +++ /dev/null @@ -1,10 +0,0 @@ -// Use update_test.sh to (re-)generate the checks -// REQUIRES: linux -// RUN: bash %S/deps_known.sh sycl/khr/split_headers/version.hpp | FileCheck %s - -// CHECK-LABEL: Dependencies for : -// CHECK-NEXT: /dev/null: /dev/null -// CHECK-NEXT: khr/split_headers/version.hpp -// CHECK-NEXT: detail/defines_elementary.hpp -// CHECK-NEXT: feature_test.hpp -// CHECK-EMPTY: diff --git a/sycl/test/include_deps/update_test.sh b/sycl/test/include_deps/update_test.sh deleted file mode 100755 index 6bac1ab4b56be..0000000000000 --- a/sycl/test/include_deps/update_test.sh +++ /dev/null @@ -1,62 +0,0 @@ -HEADERS=( - sycl/detail/defines_elementary.hpp - sycl/detail/export.hpp - - sycl/buffer.hpp - sycl/accessor.hpp - - sycl/detail/core.hpp - - sycl/khr/split_headers/accessor.hpp - sycl/khr/split_headers/atomic.hpp - # backend header depends on how the project was configured and as such it - # is not exactly portable, so it is excluded - # sycl/khr/split_headers/backend - sycl/khr/split_headers/bit.hpp - sycl/khr/split_headers/buffer.hpp - sycl/khr/split_headers/byte.hpp - sycl/khr/split_headers/builtins_common.hpp - sycl/khr/split_headers/builtins_geometric.hpp - sycl/khr/split_headers/builtins_integer.hpp - sycl/khr/split_headers/builtins_math.hpp - sycl/khr/split_headers/builtins_relational.hpp - sycl/khr/split_headers/context.hpp - sycl/khr/split_headers/device.hpp - sycl/khr/split_headers/event.hpp - sycl/khr/split_headers/exception.hpp - sycl/khr/split_headers/functional.hpp - sycl/khr/split_headers/group_algorithms.hpp - sycl/khr/split_headers/groups.hpp - sycl/khr/split_headers/half.hpp - sycl/khr/split_headers/handler.hpp - sycl/khr/split_headers/hierarchical_parallelism.hpp - sycl/khr/split_headers/images.hpp - sycl/khr/split_headers/index_space.hpp - sycl/khr/split_headers/interop_handle.hpp - sycl/khr/split_headers/kernel_bundle.hpp - sycl/khr/split_headers/kernel_handler.hpp - sycl/khr/split_headers/marray.hpp - sycl/khr/split_headers/math.hpp - sycl/khr/split_headers/multi_ptr.hpp - sycl/khr/split_headers/platform.hpp - sycl/khr/split_headers/property_list.hpp - sycl/khr/split_headers/queue.hpp - sycl/khr/split_headers/reduction.hpp - sycl/khr/split_headers/span.hpp - sycl/khr/split_headers/stream.hpp - sycl/khr/split_headers/type_traits.hpp - sycl/khr/split_headers/usm.hpp - sycl/khr/split_headers/vec.hpp - sycl/khr/split_headers/version.hpp -) - -for x in ${HEADERS[@]} ; do - name="$(echo $x | sed 's@/@_@g').cpp" - echo -e "// Use update_test.sh to (re-)generate the checks" > $name - echo -e "// REQUIRES: linux" >> $name - echo -e "// RUN: bash %S/deps_known.sh $x | FileCheck %s\n" >> $name - bash deps_known.sh $x | \ - sed 's@^@// CHECK-NEXT: @' | \ - sed 's@CHECK-NEXT: Dependencies@CHECK-LABEL: Dependencies@' | \ - sed 's@CHECK-NEXT: $@CHECK-EMPTY:@' >> $name -done diff --git a/sycl/tools/header_reach_check.py b/sycl/tools/header_reach_check.py new file mode 100644 index 0000000000000..b8251e3e8f3c7 --- /dev/null +++ b/sycl/tools/header_reach_check.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +""" +Check that lightweight SYCL public headers do not transitively include +the heavy kernel-launch headers. Exits nonzero on any violation. +""" + +import argparse +import os +import subprocess # nosec B404 +import sys + +# ────────────────────────────────────────────────────────────────────── +# HEADER LAYERING CONTRACT — read before editing. +# +# These rules guarantee that lightweight public headers do not transitively +# pull in the heavy kernel-launch headers (handler/accessor/buffer/queue/...), +# protecting compile time across the include tree and the KHR split_headers +# surface. +# +# This table is HAND-MAINTAINED, not autogenerated. It encodes what SHOULD be +# true, not a snapshot of what currently is. +# +# If this check fails, the DEFAULT fix is to remove the offending #include or +# break the dependency — NOT to weaken a rule here. Relaxing a rule (removing +# a target or an entry) is a deliberate architectural regression requiring +# justification in the PR description and reviewer sign-off. +# ────────────────────────────────────────────────────────────────────── + +HEAVY = [ + "handler.hpp", + "accessor.hpp", + "buffer.hpp", + "queue.hpp", + "image.hpp", + "stream.hpp", + "reduction.hpp", + "kernel_bundle.hpp", +] + +FORBIDDEN_REACH = [ + # Heavy headers — currently unconstrained; add targets as splits land. + ("sycl/accessor.hpp", []), + ("sycl/buffer.hpp", []), + ("sycl/handler.hpp", []), + ("sycl/image.hpp", []), + ("sycl/kernel_bundle.hpp", []), + ("sycl/queue.hpp", []), + ("sycl/reduction.hpp", []), + ("sycl/stream.hpp", []), + # Lightweight headers — must not reach any heavy header. + ("sycl/aliases.hpp", HEAVY), + ("sycl/aspects.hpp", HEAVY), + ("sycl/async_handler.hpp", HEAVY), + ("sycl/atomic_fence.hpp", HEAVY), + ("sycl/atomic.hpp", HEAVY), + ("sycl/atomic_ref.hpp", HEAVY), + ("sycl/backend_types.hpp", HEAVY), + ("sycl/bit_cast.hpp", HEAVY), + ("sycl/builtins.hpp", HEAVY), + ("sycl/builtins_esimd.hpp", HEAVY), + ("sycl/context.hpp", HEAVY), + ("sycl/define_vendors.hpp", HEAVY), + ("sycl/device_aspect_traits.hpp", HEAVY), + ("sycl/device_event.hpp", HEAVY), + ("sycl/device.hpp", HEAVY), + ("sycl/device_selector.hpp", HEAVY), + ("sycl/event.hpp", HEAVY), + ("sycl/exception.hpp", HEAVY), + ("sycl/exception_list.hpp", HEAVY), + ("sycl/functional.hpp", HEAVY), + ("sycl/group_algorithm.hpp", HEAVY), + ("sycl/group_barrier.hpp", HEAVY), + ("sycl/group.hpp", HEAVY), + ("sycl/half_type.hpp", HEAVY), + ("sycl/h_item.hpp", HEAVY), + ("sycl/id.hpp", HEAVY), + ("sycl/item.hpp", HEAVY), + ("sycl/kernel_bundle_enums.hpp", HEAVY), + ("sycl/kernel_handler.hpp", HEAVY), + ("sycl/kernel.hpp", HEAVY), + ("sycl/known_identity.hpp", HEAVY), + ("sycl/marray.hpp", HEAVY), + ("sycl/memory_enums.hpp", HEAVY), + ("sycl/multi_ptr.hpp", HEAVY), + ("sycl/nd_item.hpp", HEAVY), + ("sycl/nd_range.hpp", HEAVY), + ("sycl/platform.hpp", HEAVY), + ("sycl/pointers.hpp", HEAVY), + ("sycl/property_list_conversion.hpp", HEAVY), + ("sycl/property_list.hpp", HEAVY), + ("sycl/range.hpp", HEAVY), + ("sycl/sampler.hpp", HEAVY), + ("sycl/specialization_id.hpp", HEAVY), + ("sycl/sub_group.hpp", HEAVY), + ("sycl/sycl_span.hpp", HEAVY), + ("sycl/vector.hpp", HEAVY), +] + + +def get_transitive_includes(clang, header): + """Run device-only and host compiles, return union of reached sycl/ headers. + + ``clang`` is a list of tokens (the binary plus any leading driver flags, + e.g. ``--driver-mode=g++`` that lit's %clangxx expands to on Windows). + """ + reached = set() + commands = [ + [ + *clang, + "-fsycl", + "-fsycl-device-only", + "-include", + header, + "-c", + "-x", + "c++", + os.devnull, + "-o", + os.devnull, + "-MD", + "-MF", + "-", + ], + [ + *clang, + "-fsycl", + "-include", + header, + "-c", + "-x", + "c++", + os.devnull, + "-o", + os.devnull, + "-MD", + "-MF", + "-", + ], + ] + for cmd in commands: + result = subprocess.run(cmd, capture_output=True) + if result.returncode != 0: + stderr = result.stderr.decode(errors="replace").strip() + print( + f"error: compile failed for {header}:\n" + f" command: {' '.join(cmd)}\n" + f" stderr: {stderr}", + file=sys.stderr, + ) + sys.exit(2) + output = result.stdout.decode(errors="replace") + if not output.strip(): + stderr = result.stderr.decode(errors="replace").strip() + print( + f"error: empty -MD output for {header}:\n" + f" command: {' '.join(cmd)}\n" + f" stderr: {stderr}", + file=sys.stderr, + ) + sys.exit(2) + # Normalize Windows backslash separators to '/' so the marker match + # works on both platforms. Make line-continuations ('\' at EOL) and + # path backslashes both become '/', which split() then tokenizes. + normalized = output.replace("\\", "/") + for tok in normalized.split(): + idx = tok.find("/include/sycl/") + if idx != -1: + rel = tok[idx + len("/include/sycl/") :] + reached.add(rel) + return reached + + +def main(): + parser = argparse.ArgumentParser( + description="Check SYCL header transitive-reach layering contract." + ) + parser.add_argument( + "--clang", + required=True, + nargs=argparse.REMAINDER, + help="clang++ invocation: binary path plus any trailing driver flags. " + "Must be the LAST argument. lit's %%clangxx expands to e.g. " + "'clang.exe --driver-mode=g++' on Windows; REMAINDER captures the " + "binary and all following tokens verbatim as one command list.", + ) + args = parser.parse_args() + if not args.clang: + parser.error("--clang requires at least the clang++ binary path") + + violations = [] + for entry_header, forbidden_targets in FORBIDDEN_REACH: + reached = get_transitive_includes(args.clang, entry_header) + entry_basename = entry_header.split("/")[-1] + reached.discard(entry_basename) + for target in forbidden_targets: + if target in reached: + violations.append((entry_header, target)) + + if violations: + for entry, target in violations: + print( + f"error: header layering violation: {entry} must not " + f"transitively include {target}, but it does.\n" + f" This coupling defeats the split-header compile-time " + f"guarantee.\n" + f" Default fix: remove the offending #include or break " + f"the dependency.\n" + f" Do NOT relax the rule table to silence this without " + f"PR justification.\n" + ) + print(f"FAILED: {len(violations)} header layering violation(s) found.") + sys.exit(1) + else: + constrained = sum(1 for _, t in FORBIDDEN_REACH if t) + print( + f"PASSED: all {constrained} constrained headers are clean " + f"({len(FORBIDDEN_REACH)} total entries, " + f"{len(HEAVY)} forbidden targets max)." + ) + sys.exit(0) + + +if __name__ == "__main__": + main() From 429a106ad2b8fb29f770f2cc08603f6fe8358b29 Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 23 Jun 2026 12:21:03 +0200 Subject: [PATCH 153/511] [SystemZ] Add serialization strings for some MO target flags. (#203053) These strings are needed for MIR textual representation: If one is missing it doesn't work to do "-stop-before=XXX and then -start-before=XXX". --- llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 2 ++ llvm/test/CodeGen/SystemZ/target-flags.ll | 14 ++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 llvm/test/CodeGen/SystemZ/target-flags.ll diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp index c16da52d09e62..d5bdb8ef3b595 100644 --- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -2424,6 +2424,8 @@ SystemZInstrInfo::getSerializableDirectMachineOperandTargetFlags() const { using namespace SystemZII; static const std::pair TargetFlags[] = { + {MO_GOT, "systemz-got"}, + {MO_INDNTPOFF, "systemz-indntpoff"}, {MO_ADA_DATA_SYMBOL_ADDR, "systemz-ada-datasymboladdr"}, {MO_ADA_INDIRECT_FUNC_DESC, "systemz-ada-indirectfuncdesc"}, {MO_ADA_DIRECT_FUNC_DESC, "systemz-ada-directfuncdesc"}}; diff --git a/llvm/test/CodeGen/SystemZ/target-flags.ll b/llvm/test/CodeGen/SystemZ/target-flags.ll new file mode 100644 index 0000000000000..e25c8972fb263 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/target-flags.ll @@ -0,0 +1,14 @@ +; RUN: llc -mtriple=s390x-linux-gnu -stop-after=systemz-isel --simplify-mir < %s | FileCheck %s + +@G = external global i64 +define i64 @fun0() { +; CHECK: %{{.*}}:addr64bit = LGRL target-flags(systemz-got) @G + %Res = load i64, ptr @G + ret i64 %Res +} + +@x = thread_local(initialexec) global i32 0 +define ptr@fun1() { +; CHECK: %{{.*}}:addr64bit = LARL target-flags(systemz-indntpoff) @x + ret ptr@x +} From b8ad0813857a5dba11de7c2aeab494d01b107f62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 12:49:07 +0200 Subject: [PATCH 154/511] Reland "[clang][ssaf][NFC] Move SSAF flags from FrontendOptions to a dedicated SSAFOptions" (#205312) Third attempt of #204686 Previous attempt was: #204798 This was last reverted in #205279 This class will help keeping SSAF options apart from generic FrontendOptions. It is inspired by AnalyzerOptions. This way all of these SSAF (and future) options will be at a centralized place. In preparation of rdar://179151023 --- The previous attempt had issues on Windows with `/permissive` configs. The issue was that `GENERATE_OPTION_WITH_MARSHALLING` had a generic lambda capture and that does not constitute as an ODR-use of the captured name - because that name was only used in an unevaluated context. This meant that MSVC refused to find the function parameter. My speculative solution is to follow the established pattern of passing a pointer to the Opts structure instead of passing it directly as a reference - similar to how `AnalyzerOptions` are passed around that macro. --- .../include/clang/Frontend/CompilerInstance.h | 5 ++ .../clang/Frontend/CompilerInvocation.h | 25 ++++++--- .../include/clang/Frontend/FrontendOptions.h | 24 +-------- clang/include/clang/Frontend/SSAFOptions.h | 52 ++++++++++++++++++ clang/include/clang/Options/Options.td | 12 +++-- clang/lib/Frontend/CompilerInvocation.cpp | 36 ++++++++++++- .../ExecuteCompilerInvocation.cpp | 3 +- .../TUSummaryExtractorFrontendAction.cpp | 25 ++++----- .../TUSummaryExtractorFrontendActionTest.cpp | 53 ++++++++++--------- 9 files changed, 160 insertions(+), 75 deletions(-) create mode 100644 clang/include/clang/Frontend/SSAFOptions.h diff --git a/clang/include/clang/Frontend/CompilerInstance.h b/clang/include/clang/Frontend/CompilerInstance.h index bb0eddb918623..24488e053c628 100644 --- a/clang/include/clang/Frontend/CompilerInstance.h +++ b/clang/include/clang/Frontend/CompilerInstance.h @@ -317,6 +317,11 @@ class CompilerInstance : public ModuleLoader { return Invocation->getFrontendOpts(); } + ssaf::SSAFOptions &getSSAFOpts() { return Invocation->getSSAFOpts(); } + const ssaf::SSAFOptions &getSSAFOpts() const { + return Invocation->getSSAFOpts(); + } + HeaderSearchOptions &getHeaderSearchOpts() { return Invocation->getHeaderSearchOpts(); } diff --git a/clang/include/clang/Frontend/CompilerInvocation.h b/clang/include/clang/Frontend/CompilerInvocation.h index 6fa6cd5d95534..03097aefacf50 100644 --- a/clang/include/clang/Frontend/CompilerInvocation.h +++ b/clang/include/clang/Frontend/CompilerInvocation.h @@ -51,6 +51,10 @@ class HeaderSearchOptions; class PreprocessorOptions; class TargetOptions; +namespace ssaf { +class SSAFOptions; +} // namespace ssaf + // This lets us create the DiagnosticsEngine with a properly-filled-out // DiagnosticOptions instance. std::unique_ptr @@ -116,6 +120,9 @@ class CompilerInvocationBase { /// Options controlling preprocessed output. std::shared_ptr PreprocessorOutputOpts; + /// Options controlling the Scalable Static Analysis Framework (SSAF). + std::shared_ptr SSAFOpts; + /// Dummy tag type whose instance can be passed into the constructor to /// prevent creation of the reference-counted option objects. struct EmptyConstructor {}; @@ -150,6 +157,7 @@ class CompilerInvocationBase { const PreprocessorOutputOptions &getPreprocessorOutputOpts() const { return *PreprocessorOutputOpts; } + const ssaf::SSAFOptions &getSSAFOpts() const { return *SSAFOpts; } /// @} /// Visitation. @@ -247,19 +255,20 @@ class CompilerInvocation : public CompilerInvocationBase { /// @{ // Note: These need to be pulled in manually. Otherwise, they get hidden by // the mutable getters with the same names. - using CompilerInvocationBase::getLangOpts; - using CompilerInvocationBase::getTargetOpts; - using CompilerInvocationBase::getDiagnosticOpts; - using CompilerInvocationBase::getHeaderSearchOpts; - using CompilerInvocationBase::getPreprocessorOpts; using CompilerInvocationBase::getAnalyzerOpts; - using CompilerInvocationBase::getMigratorOpts; using CompilerInvocationBase::getAPINotesOpts; using CompilerInvocationBase::getCodeGenOpts; + using CompilerInvocationBase::getDependencyOutputOpts; + using CompilerInvocationBase::getDiagnosticOpts; using CompilerInvocationBase::getFileSystemOpts; using CompilerInvocationBase::getFrontendOpts; - using CompilerInvocationBase::getDependencyOutputOpts; + using CompilerInvocationBase::getHeaderSearchOpts; + using CompilerInvocationBase::getLangOpts; + using CompilerInvocationBase::getMigratorOpts; + using CompilerInvocationBase::getPreprocessorOpts; using CompilerInvocationBase::getPreprocessorOutputOpts; + using CompilerInvocationBase::getSSAFOpts; + using CompilerInvocationBase::getTargetOpts; /// @} /// Mutable getters. @@ -281,6 +290,7 @@ class CompilerInvocation : public CompilerInvocationBase { PreprocessorOutputOptions &getPreprocessorOutputOpts() { return *PreprocessorOutputOpts; } + ssaf::SSAFOptions &getSSAFOpts() { return *SSAFOpts; } /// @} /// Create a compiler invocation from a list of input options. @@ -392,6 +402,7 @@ class CowCompilerInvocation : public CompilerInvocationBase { FrontendOptions &getMutFrontendOpts(); DependencyOutputOptions &getMutDependencyOutputOpts(); PreprocessorOutputOptions &getMutPreprocessorOutputOpts(); + ssaf::SSAFOptions &getMutSSAFOpts(); /// @} }; diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 7c242f6e94fe0..a8627ea5d47a4 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -543,27 +543,6 @@ class FrontendOptions { /// minimization hints. std::string DumpMinimizationHintsPath; - /// List of SSAF extractors to enable. - std::vector SSAFExtractSummaries; - - /// The TU summary output file with the file extension representing the file - /// format. - std::string SSAFTUSummaryFile; - - /// Stable identifier for this translation unit, used as the name of the - /// `CompilationUnit` `BuildNamespace` of every produced TU summary. The - /// caller (typically the build system) supplies a value that is constant - /// across stages of the SSAF pipeline. - std::string SSAFCompilationUnitId; - - /// Show available SSAF summary extractors. - LLVM_PREFERRED_TYPE(bool) - unsigned SSAFShowExtractors : 1; - - /// Show available SSAF serialization formats. - LLVM_PREFERRED_TYPE(bool) - unsigned SSAFShowFormats : 1; - public: FrontendOptions() : DisableFree(false), RelocatablePCH(false), ShowHelp(false), @@ -581,8 +560,7 @@ class FrontendOptions { EmitPrettySymbolGraphs(false), GenReducedBMI(false), UseClangIRPipeline(false), ClangIRDisablePasses(false), ClangIRDisableCIRVerifier(false), ClangIREnableIdiomRecognizer(false), - TimeTraceGranularity(500), TimeTraceVerbose(false), - SSAFShowExtractors(false), SSAFShowFormats(false) {} + TimeTraceGranularity(500), TimeTraceVerbose(false) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. diff --git a/clang/include/clang/Frontend/SSAFOptions.h b/clang/include/clang/Frontend/SSAFOptions.h new file mode 100644 index 0000000000000..738262cc4a713 --- /dev/null +++ b/clang/include/clang/Frontend/SSAFOptions.h @@ -0,0 +1,52 @@ +//===- SSAFOptions.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FRONTEND_SSAFOPTIONS_H +#define LLVM_CLANG_FRONTEND_SSAFOPTIONS_H + +#include "llvm/Support/Compiler.h" +#include +#include + +namespace clang::ssaf { + +class SSAFOptions { +public: + /// List of SSAF extractors to enable. + /// Controlled by: --ssaf-extract-summaries + std::vector ExtractSummaries; + + /// The TU summary output file with the file extension representing the + /// serialization format. + /// Controlled by: --ssaf-tu-summary-file + std::string TUSummaryFile; + + /// Stable identifier used as the name of the `CompilationUnit` + /// `BuildNamespace` of every produced TU summary. + /// Controlled by: --ssaf-compilation-unit-id + std::string CompilationUnitId; + + /// Show the list of available SSAF summary extractors and exit. + /// Controlled by: --ssaf-list-extractors + LLVM_PREFERRED_TYPE(bool) + unsigned ShowExtractors : 1; + + /// Show the list of available SSAF serialization formats and exit. + /// Controlled by: --ssaf-list-formats + LLVM_PREFERRED_TYPE(bool) + unsigned ShowFormats : 1; + + SSAFOptions() { + ShowExtractors = false; + ShowFormats = false; + }; +}; + +} // namespace clang::ssaf + +#endif // LLVM_CLANG_FRONTEND_SSAFOPTIONS_H diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 377897a15f746..4fc9f4d4c3472 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -345,6 +345,8 @@ class FileSystemOpts : KeyPathAndMacro<"FileSystemOpts.", base, "FILE_SYSTEM_"> {} class AnalyzerOpts : KeyPathAndMacro<"AnalyzerOpts->", base, "ANALYZER_"> {} +class SSAFOpts + : KeyPathAndMacro<"SSAFOpts->", base, "SSAF_"> {} class MigratorOpts : KeyPathAndMacro<"MigratorOpts.", base, "MIGRATOR_"> {} @@ -947,7 +949,7 @@ def _ssaf_extract_summaries : Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Comma-separated list of summary names to extract">, - MarshallingInfoStringVector>; + MarshallingInfoStringVector>; def _ssaf_tu_summary_file : Joined<["--"], "ssaf-tu-summary-file=">, MetaVarName<".">, @@ -956,19 +958,19 @@ def _ssaf_tu_summary_file : HelpText< "The output file for the extracted summaries. " "The extension selects which file format to use.">, - MarshallingInfoString>; + MarshallingInfoString>; def _ssaf_list_extractors : Flag<["--"], "ssaf-list-extractors">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF summary extractors">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_list_formats : Flag<["--"], "ssaf-list-formats">, Group, Visibility<[ClangOption, CC1Option]>, HelpText<"Display the list of available SSAF serialization formats">, - MarshallingInfoFlag>; + MarshallingInfoFlag>; def _ssaf_compilation_unit_id : Joined<["--"], "ssaf-compilation-unit-id=">, MetaVarName<"">, @@ -978,7 +980,7 @@ def _ssaf_compilation_unit_id : "Stable identifier used as the CompilationUnit namespace name of every " "produced SSAF TU summary. Required when '--ssaf-tu-summary-file=' is " "set.">, - MarshallingInfoString>; + MarshallingInfoString>; def Xarch__ : JoinedAndSeparate<["-"], "Xarch_">, Flags<[NoXarchOption]>, diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index ca2d02c7dbd97..55b344fc2da26 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -32,6 +32,7 @@ #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MigratorOptions.h" #include "clang/Frontend/PreprocessorOutputOptions.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Frontend/Utils.h" #include "clang/Lex/HeaderSearchOptions.h" @@ -133,7 +134,8 @@ CompilerInvocationBase::CompilerInvocationBase() FSOpts(std::make_shared()), FrontendOpts(std::make_shared()), DependencyOutputOpts(std::make_shared()), - PreprocessorOutputOpts(std::make_shared()) {} + PreprocessorOutputOpts(std::make_shared()), + SSAFOpts(std::make_shared()) {} CompilerInvocationBase & CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { @@ -151,6 +153,7 @@ CompilerInvocationBase::deep_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = make_shared_copy(X.getFrontendOpts()); DependencyOutputOpts = make_shared_copy(X.getDependencyOutputOpts()); PreprocessorOutputOpts = make_shared_copy(X.getPreprocessorOutputOpts()); + SSAFOpts = make_shared_copy(X.getSSAFOpts()); } return *this; } @@ -171,6 +174,7 @@ CompilerInvocationBase::shallow_copy_assign(const CompilerInvocationBase &X) { FrontendOpts = X.FrontendOpts; DependencyOutputOpts = X.DependencyOutputOpts; PreprocessorOutputOpts = X.PreprocessorOutputOpts; + SSAFOpts = X.SSAFOpts; } return *this; } @@ -237,6 +241,10 @@ FrontendOptions &CowCompilerInvocation::getMutFrontendOpts() { return ensureOwned(FrontendOpts); } +ssaf::SSAFOptions &CowCompilerInvocation::getMutSSAFOpts() { + return ensureOwned(SSAFOpts); +} + DependencyOutputOptions &CowCompilerInvocation::getMutDependencyOutputOpts() { return ensureOwned(DependencyOutputOpts); } @@ -1034,6 +1042,30 @@ static void GenerateAnalyzerArgs(const AnalyzerOptions &Opts, // Nothing to generate for FullCompilerInvocation. } +static void GenerateSSAFArgs(const ssaf::SSAFOptions &Opts, + ArgumentConsumer Consumer) { + const ssaf::SSAFOptions *SSAFOpts = &Opts; + +#define SSAF_OPTION_WITH_MARSHALLING(...) \ + GENERATE_OPTION_WITH_MARSHALLING(Consumer, __VA_ARGS__) +#include "clang/Options/Options.inc" +#undef SSAF_OPTION_WITH_MARSHALLING +} + +static bool ParseSSAFArgs(ssaf::SSAFOptions &Opts, ArgList &Args, + DiagnosticsEngine &Diags) { + unsigned NumErrorsBefore = Diags.getNumErrors(); + + ssaf::SSAFOptions *SSAFOpts = &Opts; + +#define SSAF_OPTION_WITH_MARSHALLING(...) \ + PARSE_OPTION_WITH_MARSHALLING(Args, Diags, __VA_ARGS__) +#include "clang/Options/Options.inc" +#undef SSAF_OPTION_WITH_MARSHALLING + + return Diags.getNumErrors() == NumErrorsBefore; +} + static bool ParseAnalyzerArgs(AnalyzerOptions &Opts, ArgList &Args, DiagnosticsEngine &Diags) { unsigned NumErrorsBefore = Diags.getNumErrors(); @@ -5083,6 +5115,7 @@ bool CompilerInvocation::CreateFromArgsImpl( ParseFileSystemArgs(Res.getFileSystemOpts(), Args, Diags); ParseMigratorArgs(Res.getMigratorOpts(), Args, Diags); ParseAnalyzerArgs(Res.getAnalyzerOpts(), Args, Diags); + ParseSSAFArgs(Res.getSSAFOpts(), Args, Diags); ParseDiagnosticArgs(Res.getDiagnosticOpts(), Args, &Diags, /*DefaultDiagColor=*/false); ParseFrontendArgs(Res.getFrontendOpts(), Args, Diags, LangOpts.IsHeaderFile); @@ -5435,6 +5468,7 @@ void CompilerInvocationBase::generateCC1CommandLine( GenerateFileSystemArgs(getFileSystemOpts(), Consumer); GenerateMigratorArgs(getMigratorOpts(), Consumer); GenerateAnalyzerArgs(getAnalyzerOpts(), Consumer); + GenerateSSAFArgs(getSSAFOpts(), Consumer); GenerateDiagnosticArgs(getDiagnosticOpts(), Consumer, /*DefaultDiagColor=*/false); GenerateFrontendArgs(getFrontendOpts(), Consumer, getLangOpts().IsHeaderFile); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index e4622496758ac..997200619e599 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -19,6 +19,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendActions.h" #include "clang/Frontend/FrontendPluginRegistry.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/Utils.h" #include "clang/FrontendTool/Utils.h" #include "clang/Options/Options.h" @@ -209,7 +210,7 @@ CreateFrontendAction(CompilerInstance &CI) { Act = std::make_unique(std::move(Act), FEOpts.ASTMergeFiles); - if (!FEOpts.SSAFTUSummaryFile.empty()) { + if (!CI.getSSAFOpts().TUSummaryFile.empty()) { Act = std::make_unique( std::move(Act)); } diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp index fe900f383ae31..b4b3e85386428 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -11,6 +11,7 @@ #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -100,33 +101,33 @@ class TUSummaryRunner final : public MultiplexConsumer { private: TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const FrontendOptions &Opts); + const SSAFOptions &Opts); void HandleTranslationUnit(ASTContext &Ctx) override; TUSummary Summary; TUSummaryBuilder Builder = TUSummaryBuilder(Summary); std::unique_ptr Format; - const FrontendOptions &Opts; + const SSAFOptions &Opts; }; } // namespace std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { - const FrontendOptions &Opts = CI.getFrontendOpts(); + const SSAFOptions &Opts = CI.getSSAFOpts(); DiagnosticsEngine &Diags = CI.getDiagnostics(); - if (Opts.SSAFCompilationUnitId.empty()) { + if (Opts.CompilationUnitId.empty()) { Diags.Report(diag::warn_ssaf_tu_summary_requires_compilation_unit_id); return nullptr; } auto MaybePair = - parseOutputFileFormatAndPathOrReportError(Diags, Opts.SSAFTUSummaryFile); + parseOutputFileFormatAndPathOrReportError(Diags, Opts.TUSummaryFile); if (!MaybePair.has_value()) return nullptr; auto [FormatName, OutputPath] = MaybePair.value(); - if (reportUnrecognizedExtractorNames(Diags, Opts.SSAFExtractSummaries)) + if (reportUnrecognizedExtractorNames(Diags, Opts.ExtractSummaries)) return nullptr; return std::unique_ptr{new TUSummaryRunner{ @@ -135,18 +136,18 @@ std::unique_ptr TUSummaryRunner::create(CompilerInstance &CI) { TUSummaryRunner::TUSummaryRunner(llvm::Triple TargetTriple, std::unique_ptr Format, - const FrontendOptions &Opts) + const SSAFOptions &Opts) : MultiplexConsumer(std::vector>{}), Summary(std::move(TargetTriple), BuildNamespace(BuildNamespaceKind::CompilationUnit, - Opts.SSAFCompilationUnitId)), + Opts.CompilationUnitId)), Format(std::move(Format)), Opts(Opts) { assert(this->Format); - assert(!Opts.SSAFCompilationUnitId.empty()); + assert(!Opts.CompilationUnitId.empty()); // Now the Summary and the builders are constructed, we can also construct the // extractors. - auto Extractors = makeTUSummaryExtractors(Builder, Opts.SSAFExtractSummaries); + auto Extractors = makeTUSummaryExtractors(Builder, Opts.ExtractSummaries); assert(!Extractors.empty()); // We must initialize the Consumers here because our extractors need a @@ -164,9 +165,9 @@ void TUSummaryRunner::HandleTranslationUnit(ASTContext &Ctx) { llvm::sys::sandbox::ScopedSetting Guard = llvm::sys::sandbox::scopedDisable(); // Then serialize the result. - if (auto Err = Format->writeTUSummary(Summary, Opts.SSAFTUSummaryFile)) { + if (auto Err = Format->writeTUSummary(Summary, Opts.TUSummaryFile)) { Ctx.getDiagnostics().Report(diag::warn_ssaf_write_tu_summary_failed) - << Opts.SSAFTUSummaryFile << llvm::toString(std::move(Err)); + << Opts.TUSummaryFile << llvm::toString(std::move(Err)); } } diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp index 18d9e1735061d..cb5448e4860bc 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp @@ -11,6 +11,7 @@ #include "clang/AST/ASTContext.h" #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendOptions.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Lex/PreprocessorOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" @@ -299,9 +300,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, // Configure valid SSAF options so the failure is purely from the wrapped // action, not from runner creation. std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction ExtractorAction( std::make_unique()); @@ -315,9 +316,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithInvalidFormat_WrappedConsumerStillRuns) { // Use an unregistered format extension so TUSummaryRunner::create fails. std::string Output = makePath("output.xyz"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -345,9 +346,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsWithUnknownExtractor_WrappedConsumerStillRuns) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NonExistentExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NonExistentExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -370,9 +371,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_ASTConsumerCallbacksPropagate) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -425,9 +426,9 @@ struct OrderCheckingAction : public ASTFrontendAction { TEST_F(TUSummaryExtractorFrontendActionTest, RunnerSucceeds_WrappedRunsBeforeRunner) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; auto Wrapped = std::make_unique(); Wrapped->OutputPath = Output; @@ -447,9 +448,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { std::string Output = makePath("output.FailingSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = "test-cu"; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = "test-cu"; TUSummaryExtractorFrontendAction Action(std::make_unique()); @@ -469,8 +470,8 @@ TEST_F(TUSummaryExtractorFrontendActionTest, RunnerFailsToWrite) { TEST_F(TUSummaryExtractorFrontendActionTest, MissingCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; // SSAFCompilationUnitId left empty. auto Wrapped = std::make_unique(); @@ -493,9 +494,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, TEST_F(TUSummaryExtractorFrontendActionTest, EmptyCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = ""; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = ""; auto Wrapped = std::make_unique(); const EventLog &Log = Wrapped->getLog(); @@ -520,9 +521,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, const std::string CUId = "cu-X-test"; std::string Output = makePath("output.CapturingSerializationFormat"); - Compiler->getFrontendOpts().SSAFTUSummaryFile = Output; - Compiler->getFrontendOpts().SSAFExtractSummaries = {"NoOpExtractor"}; - Compiler->getFrontendOpts().SSAFCompilationUnitId = CUId; + Compiler->getSSAFOpts().TUSummaryFile = Output; + Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; + Compiler->getSSAFOpts().CompilationUnitId = CUId; TUSummaryExtractorFrontendAction Action(std::make_unique()); EXPECT_TRUE(Compiler->ExecuteAction(Action)); From 40c43718b03892d28e64d32adb57cb6995cf980e Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Tue, 23 Jun 2026 11:53:48 +0100 Subject: [PATCH 155/511] [NFC] UnicodeNameMappingGenerator: restore #include (#205316) #204303 removed this include while converting `unordered_map` uses to `DenseMap`, but `loadDataFiles` still uses `unordered_multimap`. See https://ci.swift.org/job/llvm.org/job/clang-stage2-Rthinlto/job/main/360/ ``` [2026-06-23T05:46:26.519Z] /Users/ec2-user/jenkins/workspace/m.org_clang-stage2-Rthinlto_main/llvm-project/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp:34:13: error: missing '#include '; 'unordered_multimap' must be declared before it is used [2026-06-23T05:46:26.519Z] 34 | static std::unordered_multimap [2026-06-23T05:46:26.519Z] | ^ ``` --- llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp index 366e7a035e064..84ef0daf0ec7a 100644 --- a/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp +++ b/llvm/utils/UnicodeData/UnicodeNameMappingGenerator.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include From 74ce4f4e852c7746640d301ea4efbec3cd44c85b Mon Sep 17 00:00:00 2001 From: DmitriiMartynov <31047826+DmitriiMartynov@users.noreply.github.com> Date: Tue, 23 Jun 2026 13:54:45 +0300 Subject: [PATCH 156/511] [BOLT][AArch64] reproducible output with constant islands (#204546) Optimized binaries from subsequent llvm-bolt runs may sometimes differ due to the unordered set (SmallPtrSet), even if the input binary and parameters are the same. Usage of SetVector guarantees a deterministic sequence of binary functions while keeping each function as a single instance. Below you can see two different main functions before the fix after two llvm-bolt runs (same input binaries, same arguments). ``` 0000000000210400
: 210400: 10000140 adr x0, 0x210428 210404: f9400000 ldr x0, [x0] 210408: 10000140 adr x0, 0x210430 21040c: f9400000 ldr x0, [x0] 210410: 10000180 adr x0, 0x210440 210414: f9400000 ldr x0, [x0] 210418: 10000180 adr x0, 0x210448 21041c: f9400000 ldr x0, [x0] 210420: d65f03c0 ret 210424: d503201f nop 210428: 00000010 udf #0x10 21042c: 00000000 udf #0x0 210430: 00000020 udf #0x20 210434: 00000000 udf #0x0 210438: d503201f nop 21043c: d503201f nop 210440: 00000030 udf #0x30 210444: 00000000 udf #0x0 210448: 00000040 udf #0x40 21044c: 00000000 udf #0x0 210450: d503201f nop 210454: d503201f nop 0000000000210400
: 210400: 100001c0 adr x0, 0x210438 210404: f9400000 ldr x0, [x0] 210408: 100001c0 adr x0, 0x210440 21040c: f9400000 ldr x0, [x0] 210410: 100000c0 adr x0, 0x210428 210414: f9400000 ldr x0, [x0] 210418: 100000c0 adr x0, 0x210430 21041c: f9400000 ldr x0, [x0] 210420: d65f03c0 ret 210424: d503201f nop 210428: 00000030 udf #0x30 21042c: 00000000 udf #0x0 210430: 00000040 udf #0x40 210434: 00000000 udf #0x0 210438: 00000010 udf #0x10 21043c: 00000000 udf #0x0 210440: 00000020 udf #0x20 210444: 00000000 udf #0x0 210448: d503201f nop 21044c: d503201f nop ``` --- bolt/include/bolt/Core/BinaryFunction.h | 5 +- .../AArch64/constant-island-reproducible.s | 56 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 bolt/test/AArch64/constant-island-reproducible.s diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 0fdfcc5d76597..4ae8cfe372855 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -37,6 +37,7 @@ #include "bolt/Utils/NameResolver.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator.h" @@ -193,7 +194,9 @@ class BinaryFunction { /// Keeps track of other functions we depend on because there is a reference /// to the constant islands in them. IslandProxiesType Proxies, ColdProxies; - SmallPtrSet Dependency; // The other way around + SetVector, + SmallPtrSet> + Dependency; // The other way around mutable MCSymbol *FunctionConstantIslandLabel{nullptr}; mutable MCSymbol *FunctionColdConstantIslandLabel{nullptr}; diff --git a/bolt/test/AArch64/constant-island-reproducible.s b/bolt/test/AArch64/constant-island-reproducible.s new file mode 100644 index 0000000000000..d00086ae4add6 --- /dev/null +++ b/bolt/test/AArch64/constant-island-reproducible.s @@ -0,0 +1,56 @@ +# This test checks that the sequence of generated constant islands is the same across +# every llvm-bolt run. +# The 1 KB alignment is used to place the main and dummy0 functions far enough apart. +# If the functions are close, the original constant islands are used instead of +# the generated ones (no binary difference in this case). + +# REQUIRES: system-linux + +# RUN: %clang %s %cflags -no-pie -o %t.exe -Wl,-q +# RUN: llvm-bolt %t.exe -o %t.bolt --lite=0 +# RUN: llvm-objdump --disassemble-symbols=main %t.bolt | FileCheck %s + +# CHECK: 00000010 udf #0x10 +# CHECK: 00000020 udf #0x20 +# CHECK: 00000030 udf #0x30 +# CHECK: 00000040 udf #0x40 + + .text + .align 10 + .global main + .type main, %function +main: + adr x0, CI0 + ldr x0, [x0] + adr x0, CI1 + ldr x0, [x0] + adr x0, CI2 + ldr x0, [x0] + adr x0, CI3 + ldr x0, [x0] + ret + .size main, .-main + + .align 10 + .global dummy0 + .type dummy0, %function +dummy0: + mov x0, #0 + ret + .size dummy0, .-dummy0 +CI0: + .xword 0x10 +CI1: + .xword 0x20 + + .align 4 + .global dummy1 + .type dummy1, %function +dummy1: + mov x0, #0 + ret + .size dummy1, .-dummy1 +CI2: + .xword 0x30 +CI3: + .xword 0x40 From 018992bf413e42b12bf0d095fbf3ed413db27e3d Mon Sep 17 00:00:00 2001 From: Lucas Date: Tue, 23 Jun 2026 08:03:20 -0300 Subject: [PATCH 157/511] [clang-tidy][NFC] Update CERT wiki link across all clang-tidy docs (#205086) This patch updates the outdated CMU wiki link in the clang-tidy documentation. The old link currently returns a `301 Moved Permanently` redirecting to the new GitHub Pages location. This patch updates the source file to point directly to the new destination to prevent future link rot. Closes #200277 --- .../checks/bugprone/assignment-in-selection-statement.rst | 2 +- .../checks/bugprone/bad-signal-to-kill-thread.rst | 2 +- .../checks/bugprone/copy-constructor-mutates-argument.rst | 2 +- .../bugprone/default-operator-new-on-overaligned-type.rst | 2 +- .../checks/bugprone/exception-copy-constructor-throws.rst | 2 +- .../docs/clang-tidy/checks/bugprone/macro-parentheses.rst | 2 +- .../bugprone/pointer-arithmetic-on-polymorphic-object.rst | 2 +- .../clang-tidy/checks/bugprone/random-generator-seed.rst | 4 ++-- .../bugprone/raw-memory-call-on-non-trivial-type.rst | 2 +- .../clang-tidy/checks/bugprone/reserved-identifier.rst | 4 ++-- .../checks/bugprone/shared-ptr-array-mismatch.rst | 2 +- .../docs/clang-tidy/checks/bugprone/signal-handler.rst | 4 ++-- .../clang-tidy/checks/bugprone/signed-char-misuse.rst | 2 +- .../docs/clang-tidy/checks/bugprone/sizeof-expression.rst | 2 +- .../checks/bugprone/spuriously-wake-up-functions.rst | 4 ++-- .../checks/bugprone/suspicious-memory-comparison.rst | 8 ++++---- .../checks/bugprone/unhandled-self-assignment.rst | 2 +- .../checks/bugprone/unique-ptr-array-mismatch.rst | 2 +- .../docs/clang-tidy/checks/bugprone/unsafe-functions.rst | 4 ++-- clang-tools-extra/docs/clang-tidy/checks/cert/err33-c.rst | 2 +- .../docs/clang-tidy/checks/cert/err60-cpp.rst | 2 +- .../docs/clang-tidy/checks/cert/mem57-cpp.rst | 2 +- clang-tools-extra/docs/clang-tidy/checks/cert/msc30-c.rst | 2 +- clang-tools-extra/docs/clang-tidy/checks/cert/msc32-c.rst | 2 +- .../docs/clang-tidy/checks/cert/msc50-cpp.rst | 2 +- .../docs/clang-tidy/checks/cert/msc51-cpp.rst | 2 +- .../docs/clang-tidy/checks/cert/oop57-cpp.rst | 2 +- .../checks/concurrency/thread-canceltype-asynchronous.rst | 2 +- .../checks/misc/anonymous-namespace-in-header.rst | 2 +- .../docs/clang-tidy/checks/misc/no-recursion.rst | 2 +- .../docs/clang-tidy/checks/misc/predictable-rand.rst | 4 ++-- .../checks/misc/static-initialization-cycle.rst | 2 +- .../checks/misc/throw-by-value-catch-by-reference.rst | 2 +- .../clang-tidy/checks/readability/enum-initial-value.rst | 2 +- 34 files changed, 43 insertions(+), 43 deletions(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/assignment-in-selection-statement.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/assignment-in-selection-statement.rst index 0c302ccee7864..0513ce3e3f771 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/assignment-in-selection-statement.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/assignment-in-selection-statement.rst @@ -17,7 +17,7 @@ condition (of ``if`` or a loop statement). This check corresponds to the CERT rule `EXP45-C. Do not perform assignments in selection statements -`_. +`_. Examples ======== diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/bad-signal-to-kill-thread.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/bad-signal-to-kill-thread.rst index 365624a8b1a0a..2a09a804630a0 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/bad-signal-to-kill-thread.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/bad-signal-to-kill-thread.rst @@ -13,6 +13,6 @@ just the individual thread. Use any signal except ``SIGTERM``. This check corresponds to the CERT C Coding Standard rule `POS44-C. Do not use signals to terminate threads -`_. +`_. `cert-pos44-c` redirects here as an alias of this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst index e45a94a0c9c0a..b27045488eebf 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/copy-constructor-mutates-argument.rst @@ -8,4 +8,4 @@ in copy constructors and copy assignment operators. This check corresponds to the CERT C Coding Standard rule `OOP58-CPP. Copy operations must not mutate the source object -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/default-operator-new-on-overaligned-type.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/default-operator-new-on-overaligned-type.rst index c9918120f0770..3fdef811e9d5b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/default-operator-new-on-overaligned-type.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/default-operator-new-on-overaligned-type.rst @@ -17,4 +17,4 @@ References This check corresponds to the CERT C++ Coding Standard rule `MEM57-CPP. Avoid using default operator new for over-aligned types -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst index 7170501328ade..9c45cac525f7e 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/exception-copy-constructor-throws.rst @@ -28,4 +28,4 @@ References This check corresponds to the CERT C++ Coding Standard rule `ERR60-CPP. Exception objects must be nothrow copy constructible -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/macro-parentheses.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/macro-parentheses.rst index bfeb7cfce4334..e70a3fd20865b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/macro-parentheses.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/macro-parentheses.rst @@ -20,4 +20,4 @@ properly. This check corresponds to the CERT C Coding Standard rule `PRE02-C. Macro replacement lists should be parenthesized. -`_ +`_ diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst index 2641cfe72e18c..6412607971306 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/pointer-arithmetic-on-polymorphic-object.rst @@ -65,4 +65,4 @@ References This check corresponds to the SEI Cert rule `CTR56-CPP. Do not use pointer arithmetic on polymorphic objects -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/random-generator-seed.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/random-generator-seed.rst index 25680994a58d2..c789f0fa6b27c 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/random-generator-seed.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/random-generator-seed.rst @@ -39,6 +39,6 @@ References This check corresponds to the CERT C++ Coding Standard rules `MSC51-CPP. Ensure your random number generator is properly seeded -`_ and +`_ and `MSC32-C. Properly seed pseudorandom number generators -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/raw-memory-call-on-non-trivial-type.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/raw-memory-call-on-non-trivial-type.rst index db3844447b3fd..3385abdc39ab3 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/raw-memory-call-on-non-trivial-type.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/raw-memory-call-on-non-trivial-type.rst @@ -32,4 +32,4 @@ Options This check corresponds to the CERT C++ Coding Standard rule `OOP57-CPP. Prefer special member functions and overloaded operators to C Standard Library functions -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst index 3f6cee9b3bb5a..f181659270a84 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst @@ -38,10 +38,10 @@ standards, e.g. C++ 'zombie names' and C future library directions. This check corresponds to CERT C Coding Standard rule `DCL37-C. Do not declare or define a reserved identifier -`_ +`_ as well as its C++ counterpart, `DCL51-CPP. Do not declare or define a reserved identifier -`_. +`_. Options ------- diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/shared-ptr-array-mismatch.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/shared-ptr-array-mismatch.rst index 0833195edbb79..003be010f359b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/shared-ptr-array-mismatch.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/shared-ptr-array-mismatch.rst @@ -32,5 +32,5 @@ Example: This check partially covers the CERT C++ Coding Standard rule `MEM51-CPP. Properly deallocate dynamically allocated resources -`_ +`_ However, only the ``std::shared_ptr`` case is detected by this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signal-handler.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signal-handler.rst index 42cfdf0f29eeb..f5648654023c0 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signal-handler.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signal-handler.rst @@ -31,10 +31,10 @@ recursively. This check implements the CERT C Coding Standard rule `SIG30-C. Call only asynchronous-safe functions within signal handlers -`_ +`_ and the rule `MSC54-CPP. A signal handler must be a plain old function -`_. +`_. It has the alias names ``cert-sig30-c`` and ``cert-msc54-cpp``. Options diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst index 4b98c36ee84c9..2a728d1093f6f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/signed-char-misuse.rst @@ -45,7 +45,7 @@ Currently, this check warns in the following cases: See also: `STR34-C. Cast characters to unsigned char before converting to larger integer sizes -`_ +`_ A good example from the CERT description when a ``char`` variable is used to read from a file that might contain non-ASCII characters. The problem comes diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst index aa2e529628c0e..4ed7cdc8cab4b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/sizeof-expression.rst @@ -264,7 +264,7 @@ this check. This check corresponds to the CERT C Coding Standard rule `ARR39-C. Do not add or subtract a scaled integer to a pointer -`_. +`_. Limitations diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/spuriously-wake-up-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/spuriously-wake-up-functions.rst index 1b5bab2143a2d..961d468d89c22 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/spuriously-wake-up-functions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/spuriously-wake-up-functions.rst @@ -23,7 +23,7 @@ condition parameter. This check corresponds to the CERT C++ Coding Standard rule `CON54-CPP. Wrap functions that can spuriously wake up in a loop -`_. +`_. and CERT C Coding Standard rule `CON36-C. Wrap functions that can spuriously wake up in a loop -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-memory-comparison.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-memory-comparison.rst index 317f8e1839597..7babea1361e83 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-memory-comparison.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/suspicious-memory-comparison.rst @@ -18,18 +18,18 @@ This may be caused by padding or floating-point types. See also: `EXP42-C. Do not compare padding data -`_ +`_ and `FLP37-C. Do not use object representations to compare floating-point values -`_ +`_ This check is also related to and partially overlaps the CERT C++ Coding Standard rules `OOP57-CPP. Prefer special member functions and overloaded operators to C Standard Library functions -`_ +`_ and `EXP62-CPP. Do not access the bits of an object representation that are not part of the object's value representation -`_ +`_ `cert-exp42-c` redirects here as an alias of this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst index 07c4b33048add..6c83ccaf7c32f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unhandled-self-assignment.rst @@ -17,7 +17,7 @@ operator was not written with care. See also: `OOP54-CPP. Gracefully handle self-copy assignment -`_ +`_ A copy assignment operator must prevent that self-copy assignment ruins the object state. A typical use case is when the class has a pointer field diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unique-ptr-array-mismatch.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unique-ptr-array-mismatch.rst index c00385954b23c..71a805543e1b9 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unique-ptr-array-mismatch.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unique-ptr-array-mismatch.rst @@ -35,5 +35,5 @@ Example: This check partially covers the CERT C++ Coding Standard rule `MEM51-CPP. Properly deallocate dynamically allocated resources -`_ +`_ However, only the ``std::unique_ptr`` case is detected by this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst index 4f5f8b39ed406..989d637e8ebbe 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/unsafe-functions.rst @@ -10,9 +10,9 @@ The check heavily relies on the functions from the The check implements the following rules from the CERT C Coding Standard: - Recommendation `MSC24-C. Do not use deprecated or obsolescent functions - `_. + `_. - Rule `MSC33-C. Do not pass invalid data to the asctime() function - `_. + `_. `cert-msc24-c` and `cert-msc33-c` redirect here as aliases of this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/err33-c.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/err33-c.rst index 75da669c0a2b3..ac974c5e93de6 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/err33-c.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/err33-c.rst @@ -194,7 +194,7 @@ disabled by setting `AllowCastToVoid` option to `false`. The check corresponds to a part of CERT C Coding Standard rule `ERR33-C. Detect and handle standard library errors -`_. +`_. The list of checked functions is taken from the rule, with following exception: * The check can not differentiate if a function is called with ``NULL`` diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst index 126b71cfc8461..2d96d3b43758a 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/err60-cpp.rst @@ -10,4 +10,4 @@ for more information. This check corresponds to the CERT C++ Coding Standard rule `ERR60-CPP. Exception objects must be nothrow copy constructible -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst index aeeffecac0011..8cd27a724ad58 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/mem57-cpp.rst @@ -10,4 +10,4 @@ for more information. This check corresponds to the CERT C++ Coding Standard rule `MEM57-CPP. Avoid using default operator new for over-aligned types -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/msc30-c.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/msc30-c.rst index edbe16d229885..f5045c3539a28 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/msc30-c.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/msc30-c.rst @@ -8,4 +8,4 @@ The `cert-msc30-c` check is an alias, please see This check corresponds to the CERT C Coding Standard rule `MSC30-C. Do not use the rand() function for generating pseudorandom numbers -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/msc32-c.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/msc32-c.rst index 304873d638496..5239a7940c54f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/msc32-c.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/msc32-c.rst @@ -9,4 +9,4 @@ for more information. This check corresponds to the CERT C Coding Standard rule `MSC32-C. Properly seed pseudorandom number generators -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/msc50-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/msc50-cpp.rst index 4951dc7a4c6cc..639f2d72908e5 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/msc50-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/msc50-cpp.rst @@ -8,4 +8,4 @@ The `cert-msc50-cpp` check is an alias, please see This check corresponds to the CERT C Coding Standard rule `MSC50-CPP. Do not use std::rand() for generating pseudorandom numbers -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/msc51-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/msc51-cpp.rst index 77b584205f8de..57cb0646c5e22 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/msc51-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/msc51-cpp.rst @@ -10,4 +10,4 @@ for more information. This check corresponds to the CERT C++ Coding Standard rule `MSC51-CPP. Ensure your random number generator is properly seeded -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/cert/oop57-cpp.rst b/clang-tools-extra/docs/clang-tidy/checks/cert/oop57-cpp.rst index 1ce3269ff73f0..dc599c3254441 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/cert/oop57-cpp.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/cert/oop57-cpp.rst @@ -11,4 +11,4 @@ for more information. This check corresponds to the CERT C++ Coding Standard rule `OOP57-CPP. Prefer special member functions and overloaded operators to C Standard Library functions -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/concurrency/thread-canceltype-asynchronous.rst b/clang-tools-extra/docs/clang-tidy/checks/concurrency/thread-canceltype-asynchronous.rst index 5e4d980077d50..8085fdb728a89 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/concurrency/thread-canceltype-asynchronous.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/concurrency/thread-canceltype-asynchronous.rst @@ -16,6 +16,6 @@ be acted upon and the effect is as if it was an asynchronous cancellation. This check corresponds to the CERT C Coding Standard rule `POS47-C. Do not use threads that can be canceled asynchronously -`_. +`_. `cert-pos47-c` redirects here as an alias of this check. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/anonymous-namespace-in-header.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/anonymous-namespace-in-header.rst index eef596da2e77c..447a6975157b3 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/anonymous-namespace-in-header.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/anonymous-namespace-in-header.rst @@ -15,6 +15,6 @@ References This check corresponds to the CERT C++ Coding Standard rule `DCL59-CPP. Do not define an unnamed namespace in a header file -`_. +`_. Corresponding cpplint.py check name: `build/namespaces`. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/no-recursion.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/no-recursion.rst index 53753f1d029d0..ff2e851f4d31d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/no-recursion.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/no-recursion.rst @@ -10,7 +10,7 @@ and displays one example of a possible call graph loop (recursion). References: -* CERT C++ Coding Standard rule `DCL56-CPP. Avoid cycles during initialization of static objects `_. +* CERT C++ Coding Standard rule `DCL56-CPP. Avoid cycles during initialization of static objects `_. * JPL Institutional Coding Standard for the C Programming Language (JPL DOCID D-60411) rule `2.4 Do not use direct or indirect recursion`. * OpenCL Specification, Version 1.2 rule `6.9 Restrictions: i. Recursion is not supported. `_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/predictable-rand.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/predictable-rand.rst index 00156649cf220..303c30927d458 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/predictable-rand.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/predictable-rand.rst @@ -15,6 +15,6 @@ References This check corresponds to the CERT C Coding Standard rules `MSC30-C. Do not use the rand() function for generating pseudorandom numbers -`_. +`_. `MSC50-CPP. Do not use std::rand() for generating pseudorandom numbers -`_. +`_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/static-initialization-cycle.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/static-initialization-cycle.rst index 7a50428b53df7..7296411377d59 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/static-initialization-cycle.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/static-initialization-cycle.rst @@ -60,4 +60,4 @@ References ---------- * CERT C++ Coding Standard rule `DCL56-CPP. Avoid cycles during initialization - of static objects `_. + of static objects `_. diff --git a/clang-tools-extra/docs/clang-tidy/checks/misc/throw-by-value-catch-by-reference.rst b/clang-tools-extra/docs/clang-tidy/checks/misc/throw-by-value-catch-by-reference.rst index cfea1c81fe655..3d06efaa6f44d 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/misc/throw-by-value-catch-by-reference.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/misc/throw-by-value-catch-by-reference.rst @@ -8,7 +8,7 @@ misc-throw-by-value-catch-by-reference Finds violations of the rule "Throw by value, catch by reference" presented for example in "C++ Coding Standards" by H. Sutter and A. Alexandrescu, as well as the CERT C++ Coding Standard rule `ERR61-CPP. Catch exceptions by lvalue -reference `_. +reference `_. Exceptions: diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst index f59c433c51d0b..590863804630f 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/enum-initial-value.rst @@ -58,7 +58,7 @@ The following three cases are accepted: }; This check corresponds to the CERT C Coding Standard recommendation `INT09-C. Ensure enumeration constants map to unique values -`_. +`_. `cert-int09-c` redirects here as an alias of this check. From 2838237c9bf347521a0869f175b058eb5db5468e Mon Sep 17 00:00:00 2001 From: Zeyi Xu Date: Tue, 23 Jun 2026 19:06:04 +0800 Subject: [PATCH 158/511] [clang-tidy] Avoid token merging in redundant-parentheses fix-its (#202365) The readability-redundant-parentheses check emitted fix-its that simply removed both parentheses. Tools that apply those fix-its directly could join adjacent tokens and produce invalid code, e.g. `return(0)` becoming `return0`. Replace the opening parenthesis with a space when removing it would merge identifier characters across the removed token. AI Usage: Test assisted by Codex. Closes https://github.com/llvm/llvm-project/issues/185108 --- .../readability/RedundantParenthesesCheck.cpp | 21 ++++++++++++++++- .../clangd/unittests/DiagnosticsTests.cpp | 23 +++++++++++++++++++ clang-tools-extra/docs/ReleaseNotes.rst | 10 +++++--- 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp index c177c07b95a75..639e183f434b2 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp @@ -13,6 +13,7 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersMacros.h" +#include "clang/Lex/Lexer.h" #include using namespace clang::ast_matchers; @@ -34,6 +35,23 @@ AST_MATCHER(ParenExpr, isInMacro) { } // namespace +static FixItHint createSpacedRemoval(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { + if (Loc.isValid() && !Loc.isMacroID()) { + auto LocInfo = SM.getDecomposedLoc(Loc); + bool Invalid = false; + StringRef Buffer = SM.getBufferData(LocInfo.first, &Invalid); + if (!Invalid && LocInfo.second > 0 && LocInfo.second + 1 < Buffer.size() && + Lexer::isAsciiIdentifierContinueChar(Buffer[LocInfo.second - 1], + LangOpts) && + Lexer::isAsciiIdentifierContinueChar(Buffer[LocInfo.second + 1], + LangOpts)) + return FixItHint::CreateReplacement(SourceRange(Loc, Loc), " "); + } + return FixItHint::CreateRemoval(Loc); +} + RedundantParenthesesCheck::RedundantParenthesesCheck(StringRef Name, ClangTidyContext *Context) : ClangTidyCheck(Name, Context), @@ -66,7 +84,8 @@ void RedundantParenthesesCheck::registerMatchers(MatchFinder *Finder) { void RedundantParenthesesCheck::check(const MatchFinder::MatchResult &Result) { const auto *PE = Result.Nodes.getNodeAs("dup"); diag(PE->getBeginLoc(), "redundant parentheses around expression") - << FixItHint::CreateRemoval(PE->getLParen()) + << createSpacedRemoval(PE->getLParen(), *Result.SourceManager, + getLangOpts()) << FixItHint::CreateRemoval(PE->getRParen()); } diff --git a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp index 4258f7faf34fd..6d91ac1ef1e8e 100644 --- a/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp +++ b/clang-tools-extra/clangd/unittests/DiagnosticsTests.cpp @@ -359,6 +359,29 @@ TEST(DiagnosticsTest, ClangTidy) { "function 'bar' is within a recursive call chain")))); } +TEST(DiagnosticsTest, ClangTidyRedundantParenthesesFix) { + Annotations Test(R"cpp( + int func() { + return$lparen[[(]]0$rparen[[)]]; + } + )cpp"); + auto TU = TestTU::withCode(Test.code()); + TU.ClangTidyProvider = addTidyChecks("readability-redundant-parentheses"); + + clangd::Fix ExpectedFix; + ExpectedFix.Message = "redundant parentheses around expression"; + ExpectedFix.Edits.push_back(TextEdit{Test.range("lparen"), " "}); + ExpectedFix.Edits.push_back(TextEdit{Test.range("rparen"), ""}); + + EXPECT_THAT( + TU.build().getDiagnostics(), + ifTidyChecks(ElementsAre(AllOf( + Diag(Test.range("lparen"), "redundant parentheses around expression"), + diagSource(Diag::ClangTidy), + diagName("readability-redundant-parentheses"), + withFix(equalToFix(ExpectedFix)))))); +} + TEST(DiagnosticsTest, ClangTidyEOF) { // clang-format off Annotations Test(R"cpp( diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index bbd4f8f5b3c58..8871b37ddb1bf 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -848,9 +848,13 @@ Changes in existing checks macros that may expand differently in other configurations. - Improved :doc:`readability-redundant-parentheses - ` check by fixing a - false positive for parentheses present around an overloaded operator in the - context of a binary operation. + ` check: + + - Fixed a false positive for parentheses present around an overloaded operator + in the context of a binary operation. + + - Fixed a bug where clients that apply fix-its without :program:`clang-tidy`'s + cleanup could produce invalid code by joining adjacent tokens. - Improved :doc:`readability-redundant-preprocessor ` check by fixing a From b2a68380fc73443db0278b595ad1a28b7a59f525 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Jun 2026 12:07:16 +0100 Subject: [PATCH 159/511] [X86] phaddsub.ll - update PR39921/PR39936 test case to a vector.reduce.v8i32 call (#205310) Matches middle-end IR produced from the tests' C++ source since #199872 --- llvm/test/CodeGen/X86/phaddsub.ll | 54 ++++++++++++++----------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/llvm/test/CodeGen/X86/phaddsub.ll b/llvm/test/CodeGen/X86/phaddsub.ll index 12c7a092d6c18..1ec4738220b6c 100644 --- a/llvm/test/CodeGen/X86/phaddsub.ll +++ b/llvm/test/CodeGen/X86/phaddsub.ll @@ -615,28 +615,30 @@ define <8 x i16> @phaddw_single_source6(<8 x i16> %x) { define i32 @PR39936_v8i32(<8 x i32>) { ; SSSE3-SLOW-LABEL: PR39936_v8i32: ; SSSE3-SLOW: # %bb.0: -; SSSE3-SLOW-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-SLOW-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] +; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] ; SSSE3-SLOW-NEXT: paddd %xmm0, %xmm1 -; SSSE3-SLOW-NEXT: movd %xmm1, %eax +; SSSE3-SLOW-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1] +; SSSE3-SLOW-NEXT: paddd %xmm1, %xmm0 +; SSSE3-SLOW-NEXT: movd %xmm0, %eax ; SSSE3-SLOW-NEXT: retq ; ; SSSE3-FAST-LABEL: PR39936_v8i32: ; SSSE3-FAST: # %bb.0: -; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm0 -; SSSE3-FAST-NEXT: movd %xmm0, %eax +; SSSE3-FAST-NEXT: phaddd %xmm0, %xmm1 +; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm1 +; SSSE3-FAST-NEXT: phaddd %xmm1, %xmm1 +; SSSE3-FAST-NEXT: movd %xmm1, %eax ; SSSE3-FAST-NEXT: retq ; ; AVX1-SLOW-LABEL: PR39936_v8i32: ; AVX1-SLOW: # %bb.0: ; AVX1-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-SLOW-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX1-SLOW-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX1-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX1-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX1-SLOW-NEXT: vmovd %xmm0, %eax ; AVX1-SLOW-NEXT: vzeroupper ; AVX1-SLOW-NEXT: retq @@ -644,7 +646,7 @@ define i32 @PR39936_v8i32(<8 x i32>) { ; AVX1-FAST-LABEL: PR39936_v8i32: ; AVX1-FAST: # %bb.0: ; AVX1-FAST-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX1-FAST-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm1, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX1-FAST-NEXT: vmovd %xmm0, %eax @@ -654,10 +656,11 @@ define i32 @PR39936_v8i32(<8 x i32>) { ; AVX2-SLOW-LABEL: PR39936_v8i32: ; AVX2-SLOW: # %bb.0: ; AVX2-SLOW-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-SLOW-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX2-SLOW-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-SLOW-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX2-SLOW-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-SLOW-NEXT: vmovd %xmm0, %eax ; AVX2-SLOW-NEXT: vzeroupper ; AVX2-SLOW-NEXT: retq @@ -665,7 +668,7 @@ define i32 @PR39936_v8i32(<8 x i32>) { ; AVX2-FAST-LABEL: PR39936_v8i32: ; AVX2-FAST: # %bb.0: ; AVX2-FAST-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-FAST-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm1, %xmm0 ; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX2-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0 ; AVX2-FAST-NEXT: vmovd %xmm0, %eax @@ -675,22 +678,15 @@ define i32 @PR39936_v8i32(<8 x i32>) { ; AVX2-SHUF-LABEL: PR39936_v8i32: ; AVX2-SHUF: # %bb.0: ; AVX2-SHUF-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-SHUF-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX2-SHUF-NEXT: vphaddd %xmm0, %xmm0, %xmm0 +; AVX2-SHUF-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX2-SHUF-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX2-SHUF-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-SHUF-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] -; AVX2-SHUF-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; AVX2-SHUF-NEXT: vpaddd %xmm1, %xmm0, %xmm0 ; AVX2-SHUF-NEXT: vmovd %xmm0, %eax ; AVX2-SHUF-NEXT: vzeroupper ; AVX2-SHUF-NEXT: retq - %2 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> - %3 = shufflevector <8 x i32> %0, <8 x i32> undef, <8 x i32> - %4 = add <8 x i32> %2, %3 - %5 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> - %6 = shufflevector <8 x i32> %4, <8 x i32> undef, <8 x i32> - %7 = add <8 x i32> %5, %6 - %8 = shufflevector <8 x i32> %7, <8 x i32> undef, <8 x i32> - %9 = add <8 x i32> %8, %7 - %10 = extractelement <8 x i32> %9, i32 0 - ret i32 %10 + %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %0) + ret i32 %res } From fd5640cc10134cf3b13f655be35ef2afa916923c Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Tue, 23 Jun 2026 13:07:39 +0200 Subject: [PATCH 160/511] [SystemZ] Enable liveness reduction in pre-RA sched strategy. (#188823) Add some handling of register pressure by scheduling an SU "low" if it closes a live range (under certain conditions). As this is checked before latency reduction, the "data-sequnces" check that was used to selectively enable latency reduction can now be removed. This gives good improvements on several benchmarks and is also a simplification of the SystemZPreRASchedStrategy. --- .../SystemZ/SystemZMachineScheduler.cpp | 125 ++-- .../Target/SystemZ/SystemZMachineScheduler.h | 21 +- llvm/test/CodeGen/SystemZ/args-22.ll | 56 +- .../CodeGen/SystemZ/atomicrmw-ops-i128.ll | 16 +- llvm/test/CodeGen/SystemZ/bswap-09.ll | 24 +- llvm/test/CodeGen/SystemZ/bswap-10.ll | 10 +- llvm/test/CodeGen/SystemZ/call-zos-vec.ll | 18 +- .../test/CodeGen/SystemZ/canonicalize-vars.ll | 36 +- .../SystemZ/codegenprepare-sink-and-for-tm.ll | 2 +- llvm/test/CodeGen/SystemZ/dag-combine-05.ll | 20 +- llvm/test/CodeGen/SystemZ/dag-combine-07.ll | 4 +- .../test/CodeGen/SystemZ/fold-masked-merge.ll | 18 +- llvm/test/CodeGen/SystemZ/fp-copysign-03.ll | 12 +- .../CodeGen/SystemZ/fp-half-vector-binops.ll | 20 +- .../SystemZ/fp-half-vector-fcmp-select.ll | 64 +- ...-asm-fp-int-casting-explicit-regs-zEC12.ll | 8 +- .../inline-asm-fp-int-casting-zEC12.ll | 8 +- llvm/test/CodeGen/SystemZ/int-cmp-65.ll | 8 +- llvm/test/CodeGen/SystemZ/int-conv-14.ll | 24 +- llvm/test/CodeGen/SystemZ/int-conv-15.ll | 12 +- llvm/test/CodeGen/SystemZ/int-mul-12.ll | 27 +- llvm/test/CodeGen/SystemZ/int-mul-13.ll | 8 +- llvm/test/CodeGen/SystemZ/int-uadd-14.ll | 50 +- llvm/test/CodeGen/SystemZ/int-usub-13.ll | 50 +- .../SystemZ/machine-combiner-reassoc-fp.ll | 192 +++--- .../SystemZ/misched-prera-cmp-elim.mir | 53 +- .../SystemZ/misched-prera-latencies.mir | 41 +- .../CodeGen/SystemZ/misched-prera-loads.mir | 627 ++++++++++++++++++ .../CodeGen/SystemZ/misched-prera-pdiffs.mir | 151 +++++ .../SystemZ/regcoal-subranges-update.mir | 8 +- .../SystemZ/regcoal_remat_empty_subrange.ll | 4 +- llvm/test/CodeGen/SystemZ/risbg-04.ll | 26 +- llvm/test/CodeGen/SystemZ/rot-03.ll | 22 +- llvm/test/CodeGen/SystemZ/shift-12.ll | 30 +- llvm/test/CodeGen/SystemZ/shift-13.ll | 58 +- llvm/test/CodeGen/SystemZ/shift-14.ll | 58 +- llvm/test/CodeGen/SystemZ/shift-15.ll | 58 +- llvm/test/CodeGen/SystemZ/shift-16.ll | 92 +-- llvm/test/CodeGen/SystemZ/shift-17.ll | 128 ++-- llvm/test/CodeGen/SystemZ/soft-float-args.ll | 10 +- .../SystemZ/store_nonbytesized_vecs.ll | 80 +-- .../SystemZ/vec-cmp-cmp-logic-select.ll | 340 +++++----- llvm/test/CodeGen/SystemZ/vec-cmpsel-01.ll | 70 +- llvm/test/CodeGen/SystemZ/vec-eval.ll | 80 +-- llvm/test/CodeGen/SystemZ/vec-move-23.ll | 56 +- llvm/test/CodeGen/SystemZ/vec-mul-07.ll | 16 +- llvm/test/CodeGen/SystemZ/vec-perm-12.ll | 12 +- llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll | 10 +- .../vector-constrained-fp-intrinsics.ll | 251 +++---- 49 files changed, 1950 insertions(+), 1164 deletions(-) create mode 100644 llvm/test/CodeGen/SystemZ/misched-prera-loads.mir create mode 100644 llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp index a1aa8bedbb2d2..fe22ceac5fce7 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -19,55 +19,6 @@ static bool isRegDef(const MachineOperand &MO) { return MO.isReg() && MO.isDef(); } -void SystemZPreRASchedStrategy::initializeLatencyReduction() { - // Enable latency reduction for a region that has a considerable amount of - // data sequences that should be interlaved. These are SUs that only have - // one data predecessor / successor edge(s) to their adjacent instruction(s) - // in the input order. Disable if region has many SUs relative to the - // overall height. - unsigned DAGHeight = 0; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) - DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight()); - RegionPolicy.DisableLatencyHeuristic = - DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u); - if ((HasDataSequences = !RegionPolicy.DisableLatencyHeuristic)) { - unsigned CurrSequence = 0, NumSeqNodes = 0; - auto countSequence = [&CurrSequence, &NumSeqNodes]() { - if (CurrSequence >= 2) - NumSeqNodes += CurrSequence; - CurrSequence = 0; - }; - for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { - const SUnit *SU = &DAG->SUnits[Idx]; - bool InDataSequence = true; - // One Data pred to MI just above, or no preds. - unsigned NumPreds = 0; - for (const SDep &Pred : SU->Preds) - if (++NumPreds != 1 || Pred.getKind() != SDep::Data || - Pred.getSUnit()->NodeNum != Idx - 1) - InDataSequence = false; - // One Data succ or no succs (ignoring ExitSU). - unsigned NumSuccs = 0; - for (const SDep &Succ : SU->Succs) - if (Succ.getSUnit() != &DAG->ExitSU && - (++NumSuccs != 1 || Succ.getKind() != SDep::Data)) - InDataSequence = false; - // Another type of node or one that does not have a single data pred - // ends any previous sequence. - if (!InDataSequence || !NumPreds) - countSequence(); - if (InDataSequence) - CurrSequence++; - } - countSequence(); - if (NumSeqNodes >= std::max(size_t(4), DAG->SUnits.size() / 4)) { - LLVM_DEBUG(dbgs() << "Number of nodes in def-use sequences: " - << NumSeqNodes << ". ";); - } else - HasDataSequences = false; - } -} - bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI, bool CCDef) const { if (Cmp0SrcReg != SystemZ::NoRegister && MI->getNumOperands() && @@ -79,6 +30,30 @@ bool SystemZPreRASchedStrategy::definesCmp0Src(const MachineInstr *MI, return false; } +bool SystemZPreRASchedStrategy::closesLiveRange(const SUnit *SU, + ScheduleDAGMILive *DAG) const { + if (SU->getInstr()->isCopy()) + return false; + + // Extract the PressureChanges that all fp/vector or GR64/GR32/GRH32 regs + // affect respectively. misched-prera-pdiffs.mir tests against any future + // change in the PressureSets modelling, so simply hard-code them here. + int VR16PChange = 0, GRX32PChange = 0; + const PressureDiff &PDiff = DAG->getPressureDiff(SU); + for (const PressureChange &PC : PDiff) { + if (!PC.isValid()) + break; + if (PC.getPSet() == SystemZ::VR16Bit) + VR16PChange = PC.getUnitInc(); + else if (PC.getPSet() == SystemZ::GRX32Bit) + GRX32PChange = PC.getUnitInc(); + } + + // Return true for a (vreg) def when register pressure is reduced. Prioritize + // FP/vector regs over GPRs. + return VR16PChange < 0 || (!VR16PChange && GRX32PChange < 0); +} + bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { @@ -94,20 +69,26 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, if (tryBiasPhysRegs(TryCand, Cand, Zone, /*BiasPRegsExtra=*/true)) return TryCand.Reason != NoCand; - // Don't extend the scheduled latency in regions with many nodes in data - // sequences, or for (single block loop) regions that are acyclically - // (within a single loop iteration) latency limited. IsAcyclicLatencyLimited - // is set only after initialization in registerRoots(), which is why it is - // checked here instead of earlier. - if (!RegionPolicy.DisableLatencyHeuristic && - (HasDataSequences || Rem.IsAcyclicLatencyLimited)) + if (RegionPolicy.ShouldTrackPressure) { + auto schedLow = [&](const SUnit *SU) { + return SU->getHeight() <= Zone->getScheduledLatency() && + SU->getHeight() < LivenessHeightCutOff && closesLiveRange(SU, DAG); + }; + // One SU closes a live range while preserving the scheduled latency. + if (tryGreater(schedLow(TryCand.SU), schedLow(Cand.SU), TryCand, Cand, + RegExcess)) + return TryCand.Reason != NoCand; + } + + if (!RegionPolicy.DisableLatencyHeuristic) if (const SUnit *HigherSU = TryCand.SU->getHeight() > Cand.SU->getHeight() ? TryCand.SU : TryCand.SU->getHeight() < Cand.SU->getHeight() ? Cand.SU : nullptr) if (HigherSU->getHeight() > Zone->getScheduledLatency() && HigherSU->getDepth() < computeRemLatency(*Zone)) { - // One or both SUs increase the scheduled latency. + // The higher SU increases the scheduled latency but is not on the + // Critical Path by Depth, so put it above the other one. tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), TryCand, Cand, GenericSchedulerBase::BotHeightReduce); return TryCand.Reason != NoCand; @@ -135,16 +116,20 @@ bool SystemZPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, void SystemZPreRASchedStrategy::initPolicy(MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End, unsigned NumRegionInstrs) { - // Avoid setting up the register pressure tracker for small regions to save - // compile time. Currently only used for computeCyclicCriticalPath() which - // is used for single block loops. - MachineBasicBlock *MBB = Begin->getParent(); - RegionPolicy.ShouldTrackPressure = - MBB->isSuccessor(MBB) && NumRegionInstrs >= 8; + // TopRegionSUs is the number of SUs that is considered to be part of the + // "top" of a region. Liveness reduction is not done in regions smaller than + // this. The idea is to prioritize latency more after branches and help + // liveness only when the decoder is ahead of execution anyway. + static const unsigned TopRegionSUs = 36; + + // Avoid setting up the register pressure tracker unless needed to save + // compile time. + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > TopRegionSUs; // These heuristics has so far seemed to work better without adding a // top-down boundary. RegionPolicy.OnlyBottomUp = true; + BotIdx = NumRegionInstrs - 1; this->NumRegionInstrs = NumRegionInstrs; } @@ -154,9 +139,17 @@ void SystemZPreRASchedStrategy::initialize(ScheduleDAGMI *dag) { Cmp0SrcReg = SystemZ::NoRegister; - initializeLatencyReduction(); - LLVM_DEBUG(dbgs() << "Latency scheduling " << (HasDataSequences ? "" : "not ") - << "enabled for data sequences.\n";); + unsigned DAGHeight = 0; + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) + DAGHeight = std::max(DAGHeight, DAG->SUnits[Idx].getHeight()); + + if (RegionPolicy.ShouldTrackPressure) + LivenessHeightCutOff = DAGHeight / (DAG->SUnits.size() < 50 ? 4 : 2); + + // Disable latency reduction if region has many SUs relative to the + // overall height. + RegionPolicy.DisableLatencyHeuristic = + DAG->SUnits.size() >= 3 * std::max(DAGHeight, 1u); } void SystemZPreRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) { diff --git a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h index 4fdfd92d192c3..881e06b084c20 100644 --- a/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h +++ b/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h @@ -8,9 +8,12 @@ // -------------------------- Pre RA scheduling ----------------------------- // // -// SystemZPreRASchedStrategy performs latency scheduling in certain types of -// regions where this is beneficial, and also helps copy coalescing and -// comparison elimination. +// SystemZPreRASchedStrategy reduces register pressure by scheduling a (live) +// definition low if it does not cause another register to become live (all +// uses live). In most regions it then reduces the scheduled latency but only +// if the SU that is higher (by Height) than the scheduled latency is as well +// lower (by Depth) than the remaining latency. It also helps copy coalescing +// and comparison elimination. // // -------------------------- Post RA scheduling ---------------------------- // // @@ -34,16 +37,18 @@ namespace llvm { /// A MachineSchedStrategy implementation for SystemZ pre RA scheduling. class SystemZPreRASchedStrategy : public GenericScheduler { - void initializeLatencyReduction(); - Register Cmp0SrcReg; // Return true if MI defines the Cmp0SrcReg that is used by a scheduled // compare with 0. If CCDef is true MI must also have an implicit def of CC. bool definesCmp0Src(const MachineInstr *MI, bool CCDef = true) const; - // True if the region has many instructions in def-use sequences and would - // likely benefit from latency reduction. - bool HasDataSequences; + // SUs that have a Height of at least this value will not be scheduled + // "low" to reduce liveness. + unsigned LivenessHeightCutOff; + + // Return true if the instruction defines a register while all use operands + // are already live. + bool closesLiveRange(const SUnit *SU, ScheduleDAGMILive *DAG) const; protected: bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, diff --git a/llvm/test/CodeGen/SystemZ/args-22.ll b/llvm/test/CodeGen/SystemZ/args-22.ll index ba422b65fc299..0d20cd1ebc7c1 100644 --- a/llvm/test/CodeGen/SystemZ/args-22.ll +++ b/llvm/test/CodeGen/SystemZ/args-22.ll @@ -124,8 +124,8 @@ define void @arg1(%Ty1 %A) { ; VECTOR-NEXT: lgrl %r1, Dst@GOT ; VECTOR-NEXT: vrepib %v1, 8 ; VECTOR-NEXT: vsteb %v0, 8(%r1), 15 -; VECTOR-NEXT: vsrlb %v0, %v0, %v1 -; VECTOR-NEXT: vsteg %v0, 0(%r1), 1 +; VECTOR-NEXT: vsrlb %v1, %v0, %v1 +; VECTOR-NEXT: vsteg %v1, 0(%r1), 1 ; VECTOR-NEXT: br %r14 store %Ty1 %A, ptr @Dst ret void @@ -351,14 +351,14 @@ define void @arg3(%Ty3 %A) { ; ; VECTOR-LABEL: arg3: ; VECTOR: # %bb.0: -; VECTOR-NEXT: vl %v0, 0(%r3), 3 +; VECTOR-NEXT: vl %v0, 0(%r2), 3 +; VECTOR-NEXT: vl %v1, 0(%r3), 3 ; VECTOR-NEXT: lgrl %r1, Dst@GOT -; VECTOR-NEXT: vl %v1, 0(%r2), 3 -; VECTOR-NEXT: vsteb %v1, 8(%r1), 15 -; VECTOR-NEXT: vst %v0, 16(%r1), 3 -; VECTOR-NEXT: vrepib %v0, 8 -; VECTOR-NEXT: vsrlb %v0, %v1, %v0 -; VECTOR-NEXT: vsteg %v0, 0(%r1), 1 +; VECTOR-NEXT: vsteb %v0, 8(%r1), 15 +; VECTOR-NEXT: vrepib %v2, 8 +; VECTOR-NEXT: vsrlb %v2, %v0, %v2 +; VECTOR-NEXT: vst %v1, 16(%r1), 3 +; VECTOR-NEXT: vsteg %v2, 0(%r1), 1 ; VECTOR-NEXT: br %r14 store %Ty3 %A, ptr @Dst ret void @@ -402,11 +402,11 @@ define void @call3() { ; VECTOR-NEXT: vlrepg %v1, 0(%r1) ; VECTOR-NEXT: vrepib %v2, 8 ; VECTOR-NEXT: vslb %v1, %v1, %v2 +; VECTOR-NEXT: vl %v2, 16(%r1), 3 ; VECTOR-NEXT: vo %v0, %v0, %v1 -; VECTOR-NEXT: vl %v1, 16(%r1), 3 ; VECTOR-NEXT: la %r2, 176(%r15) ; VECTOR-NEXT: la %r3, 160(%r15) -; VECTOR-NEXT: vst %v1, 160(%r15), 3 +; VECTOR-NEXT: vst %v2, 160(%r15), 3 ; VECTOR-NEXT: vst %v0, 176(%r15), 3 ; VECTOR-NEXT: brasl %r14, Fnptr@PLT ; VECTOR-NEXT: lmg %r14, %r15, 304(%r15) @@ -601,15 +601,15 @@ define %Ty4 @ret4() { ; VECTOR-NEXT: brasl %r14, Fnptr@PLT ; VECTOR-NEXT: lb %r0, 164(%r15) ; VECTOR-NEXT: lh %r1, 166(%r15) -; VECTOR-NEXT: lb %r4, 200(%r15) +; VECTOR-NEXT: lb %r2, 200(%r15) ; VECTOR-NEXT: lde %f0, 160(%r15) -; VECTOR-NEXT: l %r2, 168(%r15) -; VECTOR-NEXT: lg %r3, 176(%r15) +; VECTOR-NEXT: l %r3, 168(%r15) +; VECTOR-NEXT: lg %r4, 176(%r15) ; VECTOR-NEXT: vl %v1, 184(%r15), 3 -; VECTOR-NEXT: stc %r4, 40(%r13) +; VECTOR-NEXT: stc %r2, 40(%r13) ; VECTOR-NEXT: vst %v1, 24(%r13), 3 -; VECTOR-NEXT: stg %r3, 16(%r13) -; VECTOR-NEXT: st %r2, 8(%r13) +; VECTOR-NEXT: stg %r4, 16(%r13) +; VECTOR-NEXT: st %r3, 8(%r13) ; VECTOR-NEXT: sth %r1, 6(%r13) ; VECTOR-NEXT: stc %r0, 4(%r13) ; VECTOR-NEXT: ste %f0, 0(%r13) @@ -810,10 +810,10 @@ define void @arg6(%Ty6 %A) { ; VECTOR-NEXT: vsteb %v1, 24(%r1), 15 ; VECTOR-NEXT: vrepib %v2, 8 ; VECTOR-NEXT: vsteb %v0, 8(%r1), 15 -; VECTOR-NEXT: vsrlb %v1, %v1, %v2 -; VECTOR-NEXT: vsrlb %v0, %v0, %v2 -; VECTOR-NEXT: vsteg %v1, 16(%r1), 1 -; VECTOR-NEXT: vsteg %v0, 0(%r1), 1 +; VECTOR-NEXT: vsrlb %v3, %v1, %v2 +; VECTOR-NEXT: vsrlb %v2, %v0, %v2 +; VECTOR-NEXT: vsteg %v3, 16(%r1), 1 +; VECTOR-NEXT: vsteg %v2, 0(%r1), 1 ; VECTOR-NEXT: br %r14 store %Ty6 %A, ptr @Dst ret void @@ -854,17 +854,17 @@ define void @call6() { ; VECTOR-NEXT: aghi %r15, -192 ; VECTOR-NEXT: .cfi_def_cfa_offset 352 ; VECTOR-NEXT: lgrl %r1, Src@GOT +; VECTOR-NEXT: vgbm %v0, 0 ; VECTOR-NEXT: vgbm %v1, 0 ; VECTOR-NEXT: vleb %v1, 8(%r1), 15 ; VECTOR-NEXT: vlrepg %v2, 0(%r1) -; VECTOR-NEXT: vrepib %v3, 8 -; VECTOR-NEXT: vslb %v2, %v2, %v3 -; VECTOR-NEXT: vgbm %v0, 0 -; VECTOR-NEXT: vo %v1, %v1, %v2 ; VECTOR-NEXT: vleb %v0, 24(%r1), 15 -; VECTOR-NEXT: vlrepg %v2, 16(%r1) -; VECTOR-NEXT: vslb %v2, %v2, %v3 -; VECTOR-NEXT: vo %v0, %v0, %v2 +; VECTOR-NEXT: vlrepg %v3, 16(%r1) +; VECTOR-NEXT: vrepib %v4, 8 +; VECTOR-NEXT: vslb %v2, %v2, %v4 +; VECTOR-NEXT: vslb %v3, %v3, %v4 +; VECTOR-NEXT: vo %v1, %v1, %v2 +; VECTOR-NEXT: vo %v0, %v0, %v3 ; VECTOR-NEXT: la %r2, 176(%r15) ; VECTOR-NEXT: la %r3, 160(%r15) ; VECTOR-NEXT: vst %v0, 160(%r15), 3 diff --git a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll index 09edd6eb227af..b7d76f4e12098 100644 --- a/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll +++ b/llvm/test/CodeGen/SystemZ/atomicrmw-ops-i128.ll @@ -111,11 +111,11 @@ define i128 @atomicrmw_nand(ptr %src, i128 %b) { ; CHECK-NEXT: vlvgp %v1, %r0, %r1 ; CHECK-NEXT: .LBB4_1: # %atomicrmw.start ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: vnn %v2, %v1, %v0 ; CHECK-NEXT: vlgvg %r1, %v1, 1 ; CHECK-NEXT: vlgvg %r0, %v1, 0 -; CHECK-NEXT: vnn %v1, %v1, %v0 -; CHECK-NEXT: vlgvg %r5, %v1, 1 -; CHECK-NEXT: vlgvg %r4, %v1, 0 +; CHECK-NEXT: vlgvg %r5, %v2, 1 +; CHECK-NEXT: vlgvg %r4, %v2, 0 ; CHECK-NEXT: cdsg %r0, %r4, 0(%r3) ; CHECK-NEXT: vlvgp %v1, %r0, %r1 ; CHECK-NEXT: jl .LBB4_1 @@ -335,10 +335,10 @@ define i128 @atomicrmw_uinc_wrap(ptr %src, i128 %b) { ; CHECK-LABEL: atomicrmw_uinc_wrap: ; CHECK: # %bb.0: ; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: larl %r4, .LCPI11_0 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: lpq %r0, 0(%r3) ; CHECK-NEXT: vlvgp %v2, %r0, %r1 -; CHECK-NEXT: larl %r1, .LCPI11_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 ; CHECK-NEXT: j .LBB11_2 ; CHECK-NEXT: .LBB11_1: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB11_2 Depth=1 @@ -376,11 +376,11 @@ define i128 @atomicrmw_udec_wrap(ptr %src, i128 %b) { ; CHECK-LABEL: atomicrmw_udec_wrap: ; CHECK: # %bb.0: ; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vgbm %v2, 65535 +; CHECK-NEXT: larl %r4, .LCPI12_0 +; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: lpq %r0, 0(%r3) ; CHECK-NEXT: vlvgp %v3, %r0, %r1 -; CHECK-NEXT: larl %r1, .LCPI12_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vgbm %v2, 65535 ; CHECK-NEXT: j .LBB12_2 ; CHECK-NEXT: .LBB12_1: # %atomicrmw.start ; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1 diff --git a/llvm/test/CodeGen/SystemZ/bswap-09.ll b/llvm/test/CodeGen/SystemZ/bswap-09.ll index a2d8273c89695..e8468acaee431 100644 --- a/llvm/test/CodeGen/SystemZ/bswap-09.ll +++ b/llvm/test/CodeGen/SystemZ/bswap-09.ll @@ -9,14 +9,14 @@ declare i128 @llvm.bswap.i128(i128 %a) define i128 @f1(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: vaq %v1, %v2, %v1 ; CHECK-NEXT: vl %v2, 0(%r1), 3 -; CHECK-NEXT: vl %v0, 0(%r5), 3 -; CHECK-NEXT: vperm %v1, %v1, %v1, %v2 +; CHECK-NEXT: vl %v3, 0(%r5), 3 ; CHECK-NEXT: vaq %v0, %v1, %v0 +; CHECK-NEXT: vperm %v0, %v0, %v0, %v2 +; CHECK-NEXT: vaq %v0, %v0, %v3 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %in = add i128 %a, %b @@ -32,9 +32,9 @@ define i128 @f2(i128 %a, i128 %b) { ; CHECK-NEXT: vl %v0, 0(%r4), 3 ; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: larl %r1, .LCPI1_0 +; CHECK-NEXT: vl %v2, 0(%r1), 3 ; CHECK-NEXT: vaq %v0, %v1, %v0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vperm %v0, %v0, %v0, %v1 +; CHECK-NEXT: vperm %v0, %v0, %v0, %v2 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %in = add i128 %a, %b @@ -47,11 +47,11 @@ define i128 @f3(i128 %a, i128 %b) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI2_0 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vl %v2, 0(%r1), 3 -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vperm %v1, %v1, %v1, %v2 -; CHECK-NEXT: vaq %v0, %v1, %v0 +; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vl %v2, 0(%r4), 3 +; CHECK-NEXT: vperm %v0, %v0, %v0, %v1 +; CHECK-NEXT: vaq %v0, %v0, %v2 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %swapped = call i128 @llvm.bswap.i128(i128 %a) diff --git a/llvm/test/CodeGen/SystemZ/bswap-10.ll b/llvm/test/CodeGen/SystemZ/bswap-10.ll index 6de2970b80e2e..465c666808958 100644 --- a/llvm/test/CodeGen/SystemZ/bswap-10.ll +++ b/llvm/test/CodeGen/SystemZ/bswap-10.ll @@ -9,14 +9,14 @@ declare i128 @llvm.bswap.i128(i128 %a) define i128 @f1(i128 %a, i128 %b, i128 %c) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: vaq %v1, %v2, %v1 ; CHECK-NEXT: vl %v2, 0(%r1), 3 -; CHECK-NEXT: vl %v0, 0(%r5), 3 -; CHECK-NEXT: vperm %v1, %v1, %v1, %v2 +; CHECK-NEXT: vl %v3, 0(%r5), 3 ; CHECK-NEXT: vaq %v0, %v1, %v0 +; CHECK-NEXT: vperm %v0, %v0, %v0, %v2 +; CHECK-NEXT: vaq %v0, %v0, %v3 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %in = add i128 %a, %b diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll index 20bf2687c957e..32d29cb8ebc08 100644 --- a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll +++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll @@ -9,15 +9,15 @@ entry: } ; CHECK-LABEL: sum_vecs1 -; CHECK: vaf 1,24,25 -; CHECK: vaf 1,1,26 -; CHECK: vaf 1,1,27 -; CHECK: vaf 1,1,28 -; CHECK: vaf 1,1,29 -; CHECK: vl 0,2304(4),4 -; CHECK: vaf 1,1,30 -; CHECK: vaf 1,1,31 -; CHECK: vaf 24,1,0 +; CHECK: vaf 0,24,25 +; CHECK: vaf 0,0,26 +; CHECK: vaf 0,0,27 +; CHECK: vaf 0,0,28 +; CHECK: vaf 0,0,29 +; CHECK: vl 1,2304(4),4 +; CHECK: vaf 0,0,30 +; CHECK: vaf 0,0,31 +; CHECK: vaf 24,0,1 define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) { entry: %add0 = add <4 x i32> %v1, %v2 diff --git a/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll b/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll index e6659d385ae5f..e7e9bbc207af2 100644 --- a/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll +++ b/llvm/test/CodeGen/SystemZ/canonicalize-vars.ll @@ -129,13 +129,13 @@ define <8 x half> @canonicalize_v8f16(<8 x half> %a) nounwind { ; Z16-NEXT: vgmf %v1, 2, 8 ; Z16-NEXT: meebr %f0, %f1 ; Z16-NEXT: brasl %r14, __truncsfhf2@PLT -; Z16-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v2, 192(%r15), 3 # 16-byte Reload ; Z16-NEXT: # kill: def $f0h killed $f0h def $v0 -; Z16-NEXT: vmrhh %v0, %v0, %v1 +; Z16-NEXT: vreph %v1, %v1, 5 +; Z16-NEXT: vmrhh %v0, %v0, %v2 ; Z16-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill -; Z16-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload -; Z16-NEXT: vreph %v0, %v0, 5 -; Z16-NEXT: # kill: def $f0h killed $f0h killed $v0 +; Z16-NEXT: ldr %f0, %f1 ; Z16-NEXT: brasl %r14, __extendhfsf2@PLT ; Z16-NEXT: vgmf %v1, 2, 8 ; Z16-NEXT: meebr %f0, %f1 @@ -150,13 +150,13 @@ define <8 x half> @canonicalize_v8f16(<8 x half> %a) nounwind { ; Z16-NEXT: meebr %f0, %f1 ; Z16-NEXT: brasl %r14, __truncsfhf2@PLT ; Z16-NEXT: vl %v1, 176(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v2, 192(%r15), 3 # 16-byte Reload ; Z16-NEXT: # kill: def $f0h killed $f0h def $v0 -; Z16-NEXT: vmrhh %v0, %v0, %v1 -; Z16-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload -; Z16-NEXT: vmrhf %v0, %v0, %v1 -; Z16-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill +; Z16-NEXT: vmrhh %v1, %v0, %v1 ; Z16-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; Z16-NEXT: vmrhf %v1, %v1, %v2 ; Z16-NEXT: vreph %v0, %v0, 3 +; Z16-NEXT: vst %v1, 192(%r15), 3 # 16-byte Spill ; Z16-NEXT: # kill: def $f0h killed $f0h killed $v0 ; Z16-NEXT: brasl %r14, __extendhfsf2@PLT ; Z16-NEXT: vgmf %v1, 2, 8 @@ -262,13 +262,13 @@ define void @canonicalize_ptr_v8f16(ptr %out) nounwind { ; Z16-NEXT: vgmf %v1, 2, 8 ; Z16-NEXT: meebr %f0, %f1 ; Z16-NEXT: brasl %r14, __truncsfhf2@PLT -; Z16-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v2, 192(%r15), 3 # 16-byte Reload ; Z16-NEXT: # kill: def $f0h killed $f0h def $v0 -; Z16-NEXT: vmrhh %v0, %v0, %v1 +; Z16-NEXT: vreph %v1, %v1, 5 +; Z16-NEXT: vmrhh %v0, %v0, %v2 ; Z16-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill -; Z16-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload -; Z16-NEXT: vreph %v0, %v0, 5 -; Z16-NEXT: # kill: def $f0h killed $f0h killed $v0 +; Z16-NEXT: ldr %f0, %f1 ; Z16-NEXT: brasl %r14, __extendhfsf2@PLT ; Z16-NEXT: vgmf %v1, 2, 8 ; Z16-NEXT: meebr %f0, %f1 @@ -283,13 +283,13 @@ define void @canonicalize_ptr_v8f16(ptr %out) nounwind { ; Z16-NEXT: meebr %f0, %f1 ; Z16-NEXT: brasl %r14, __truncsfhf2@PLT ; Z16-NEXT: vl %v1, 176(%r15), 3 # 16-byte Reload +; Z16-NEXT: vl %v2, 192(%r15), 3 # 16-byte Reload ; Z16-NEXT: # kill: def $f0h killed $f0h def $v0 -; Z16-NEXT: vmrhh %v0, %v0, %v1 -; Z16-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload -; Z16-NEXT: vmrhf %v0, %v0, %v1 -; Z16-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill +; Z16-NEXT: vmrhh %v1, %v0, %v1 ; Z16-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; Z16-NEXT: vmrhf %v1, %v1, %v2 ; Z16-NEXT: vreph %v0, %v0, 3 +; Z16-NEXT: vst %v1, 192(%r15), 3 # 16-byte Spill ; Z16-NEXT: # kill: def $f0h killed $f0h killed $v0 ; Z16-NEXT: brasl %r14, __extendhfsf2@PLT ; Z16-NEXT: vgmf %v1, 2, 8 diff --git a/llvm/test/CodeGen/SystemZ/codegenprepare-sink-and-for-tm.ll b/llvm/test/CodeGen/SystemZ/codegenprepare-sink-and-for-tm.ll index 5f4b1dbf1bc10..40c138b84cd40 100644 --- a/llvm/test/CodeGen/SystemZ/codegenprepare-sink-and-for-tm.ll +++ b/llvm/test/CodeGen/SystemZ/codegenprepare-sink-and-for-tm.ll @@ -7,8 +7,8 @@ define void @fun(i32 %Arg) { ; CHECK-LABEL: fun: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: ahi %r2, 1 ; CHECK-NEXT: lhi %r0, 0 +; CHECK-NEXT: ahi %r2, 1 ; CHECK-NEXT: cijlh %r0, 0, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb1 ; CHECK-NEXT: tmll %r2, 16 diff --git a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll index 60528ad67d039..692d74250c1a0 100644 --- a/llvm/test/CodeGen/SystemZ/dag-combine-05.ll +++ b/llvm/test/CodeGen/SystemZ/dag-combine-05.ll @@ -9,18 +9,18 @@ define void @fun(i16 %arg0, ptr %src, ptr %dst) { ; CHECK-LABEL: fun: ; CHECK: # %bb.0: # %bb -; CHECK-NEXT: llhr %r0, %r2 -; CHECK-NEXT: llh %r2, 0(%r3) -; CHECK-NEXT: chi %r0, 9616 +; CHECK-NEXT: llh %r0, 0(%r3) +; CHECK-NEXT: llhr %r1, %r2 +; CHECK-NEXT: chi %r1, 9616 ; CHECK-NEXT: lhi %r1, 0 ; CHECK-NEXT: lochil %r1, 1 -; CHECK-NEXT: afi %r2, 65535 -; CHECK-NEXT: llhr %r3, %r2 -; CHECK-NEXT: lhi %r0, 0 -; CHECK-NEXT: cr %r3, %r2 -; CHECK-NEXT: lochilh %r0, 1 -; CHECK-NEXT: ar %r0, %r1 -; CHECK-NEXT: st %r0, 0(%r4) +; CHECK-NEXT: afi %r0, 65535 +; CHECK-NEXT: llhr %r2, %r0 +; CHECK-NEXT: lhi %r3, 0 +; CHECK-NEXT: cr %r2, %r0 +; CHECK-NEXT: lochilh %r3, 1 +; CHECK-NEXT: ar %r1, %r3 +; CHECK-NEXT: st %r1, 0(%r4) ; CHECK-NEXT: br %r14 bb: %tmp = icmp ult i16 %arg0, 9616 diff --git a/llvm/test/CodeGen/SystemZ/dag-combine-07.ll b/llvm/test/CodeGen/SystemZ/dag-combine-07.ll index 7dd76b61846ef..4c65a5371583a 100644 --- a/llvm/test/CodeGen/SystemZ/dag-combine-07.ll +++ b/llvm/test/CodeGen/SystemZ/dag-combine-07.ll @@ -11,11 +11,11 @@ define void @func_5(ptr %Dst) { ; CHECK-LABEL: func_5: ; CHECK: # %bb.0: ; CHECK-NEXT: lgrl %r1, G2@GOT +; CHECK-NEXT: lgrl %r3, G1@GOT ; CHECK-NEXT: llihl %r0, 50 ; CHECK-NEXT: oill %r0, 2 ; CHECK-NEXT: stg %r0, 0(%r1) -; CHECK-NEXT: lgrl %r1, G1@GOT -; CHECK-NEXT: stg %r0, 0(%r1) +; CHECK-NEXT: stg %r0, 0(%r3) ; CHECK-NEXT: mvhi 0(%r2), 2 ; CHECK-NEXT: br %r14 store i64 214748364802, ptr @G2, align 8 diff --git a/llvm/test/CodeGen/SystemZ/fold-masked-merge.ll b/llvm/test/CodeGen/SystemZ/fold-masked-merge.ll index c014345507f69..86edaeda71e94 100644 --- a/llvm/test/CodeGen/SystemZ/fold-masked-merge.ll +++ b/llvm/test/CodeGen/SystemZ/fold-masked-merge.ll @@ -96,17 +96,17 @@ define i32 @not_a_masked_merge0(i32 %a0, i32 %a1, i32 %a2) { ; NO-MISC3-LABEL: not_a_masked_merge0: ; NO-MISC3: # %bb.0: ; NO-MISC3-NEXT: lcr %r0, %r2 -; NO-MISC3-NEXT: nr %r3, %r2 +; NO-MISC3-NEXT: nr %r2, %r3 ; NO-MISC3-NEXT: nr %r0, %r4 -; NO-MISC3-NEXT: ork %r2, %r3, %r0 +; NO-MISC3-NEXT: or %r2, %r0 ; NO-MISC3-NEXT: br %r14 ; ; MISC3-LABEL: not_a_masked_merge0: ; MISC3: # %bb.0: ; MISC3-NEXT: lcr %r0, %r2 -; MISC3-NEXT: nr %r3, %r2 +; MISC3-NEXT: nr %r2, %r3 ; MISC3-NEXT: nr %r0, %r4 -; MISC3-NEXT: ork %r2, %r3, %r0 +; MISC3-NEXT: or %r2, %r0 ; MISC3-NEXT: br %r14 %and0 = and i32 %a0, %a1 %not_a_not = sub i32 0, %a0 @@ -162,10 +162,10 @@ define i32 @not_a_masked_merge2(i32 %a0, i32 %a1, i32 %a2) { define i32 @not_a_masked_merge3(i32 %a0, i32 %a1, i32 %a2) { ; NO-MISC3-LABEL: not_a_masked_merge3: ; NO-MISC3: # %bb.0: -; NO-MISC3-NEXT: nr %r3, %r2 -; NO-MISC3-NEXT: xr %r2, %r4 -; NO-MISC3-NEXT: xilf %r2, 4294967295 -; NO-MISC3-NEXT: or %r2, %r3 +; NO-MISC3-NEXT: xr %r4, %r2 +; NO-MISC3-NEXT: nr %r2, %r3 +; NO-MISC3-NEXT: xilf %r4, 4294967295 +; NO-MISC3-NEXT: or %r2, %r4 ; NO-MISC3-NEXT: br %r14 ; ; MISC3-LABEL: not_a_masked_merge3: @@ -238,8 +238,8 @@ define i32 @masked_merge_no_transform1(i32 %a0, i32 %a1, i32 %a2, ptr %p1) { ; MISC3: # %bb.0: ; MISC3-NEXT: nrk %r0, %r2, %r3 ; MISC3-NEXT: ncrk %r1, %r4, %r2 -; MISC3-NEXT: xilf %r2, 4294967295 ; MISC3-NEXT: or %r0, %r1 +; MISC3-NEXT: xilf %r2, 4294967295 ; MISC3-NEXT: st %r2, 0(%r5) ; MISC3-NEXT: lr %r2, %r0 ; MISC3-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll index c2c5889a689e4..d1997e98fbe68 100644 --- a/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-copysign-03.ll @@ -209,16 +209,16 @@ define fp128 @f13(fp128 %a, float %b) { ; ; Z16-LABEL: f13: ; Z16: # %bb.0: -; Z16-NEXT: vl %v1, 0(%r3), 3 ; Z16-NEXT: vlgvf %r0, %v0, 0 +; Z16-NEXT: vl %v0, 0(%r3), 3 ; Z16-NEXT: tmlh %r0, 32768 ; Z16-NEXT: je .LBB13_2 ; Z16-NEXT: # %bb.1: -; Z16-NEXT: wflnxb %v0, %v1 +; Z16-NEXT: wflnxb %v0, %v0 ; Z16-NEXT: vst %v0, 0(%r2), 3 ; Z16-NEXT: br %r14 ; Z16-NEXT: .LBB13_2: -; Z16-NEXT: wflpxb %v0, %v1 +; Z16-NEXT: wflpxb %v0, %v0 ; Z16-NEXT: vst %v0, 0(%r2), 3 ; Z16-NEXT: br %r14 %b128 = fpext float %b to fp128 @@ -239,16 +239,16 @@ define fp128 @f14(fp128 %a, double %b) { ; ; Z16-LABEL: f14: ; Z16: # %bb.0: -; Z16-NEXT: vl %v1, 0(%r3), 3 ; Z16-NEXT: lgdr %r0, %f0 +; Z16-NEXT: vl %v0, 0(%r3), 3 ; Z16-NEXT: tmhh %r0, 32768 ; Z16-NEXT: je .LBB14_2 ; Z16-NEXT: # %bb.1: -; Z16-NEXT: wflnxb %v0, %v1 +; Z16-NEXT: wflnxb %v0, %v0 ; Z16-NEXT: vst %v0, 0(%r2), 3 ; Z16-NEXT: br %r14 ; Z16-NEXT: .LBB14_2: -; Z16-NEXT: wflpxb %v0, %v1 +; Z16-NEXT: wflpxb %v0, %v0 ; Z16-NEXT: vst %v0, 0(%r2), 3 ; Z16-NEXT: br %r14 %b128 = fpext double %b to fp128 diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector-binops.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector-binops.ll index 825472299d028..f141ee0599222 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-vector-binops.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector-binops.ll @@ -198,10 +198,10 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: std %f8, 240(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: vl %v0, 16(%r2), 3 -; VECTOR-NEXT: mvc 160(16,%r15), 0(%r2) # 16-byte Folded Spill ; VECTOR-NEXT: lgr %r13, %r3 ; VECTOR-NEXT: vst %v0, 176(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vreph %v0, %v0, 7 +; VECTOR-NEXT: mvc 160(16,%r15), 0(%r2) # 16-byte Folded Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -224,13 +224,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: aebr %f0, %f8 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 176(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v2, 208(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 -; VECTOR-NEXT: vmrhh %v0, %v0, %v1 +; VECTOR-NEXT: vreph %v1, %v1, 5 +; VECTOR-NEXT: vmrhh %v0, %v0, %v2 ; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill -; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload -; VECTOR-NEXT: vreph %v0, %v0, 5 -; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 +; VECTOR-NEXT: ldr %f0, %f1 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 ; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload @@ -253,13 +253,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: aebr %f0, %f8 ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT ; VECTOR-NEXT: vl %v1, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v2, 208(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: # kill: def $f0h killed $f0h def $v0 -; VECTOR-NEXT: vmrhh %v0, %v0, %v1 -; VECTOR-NEXT: vl %v1, 208(%r15), 3 # 16-byte Reload -; VECTOR-NEXT: vmrhf %v0, %v0, %v1 -; VECTOR-NEXT: vst %v0, 208(%r15), 3 # 16-byte Spill +; VECTOR-NEXT: vmrhh %v1, %v0, %v1 ; VECTOR-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vmrhf %v1, %v1, %v2 ; VECTOR-NEXT: vreph %v0, %v0, 3 +; VECTOR-NEXT: vst %v1, 208(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 diff --git a/llvm/test/CodeGen/SystemZ/fp-half-vector-fcmp-select.ll b/llvm/test/CodeGen/SystemZ/fp-half-vector-fcmp-select.ll index a453d29705ff2..8c23a06df8cf2 100644 --- a/llvm/test/CodeGen/SystemZ/fp-half-vector-fcmp-select.ll +++ b/llvm/test/CodeGen/SystemZ/fp-half-vector-fcmp-select.ll @@ -242,10 +242,10 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: std %f8, 208(%r15) # 8-byte Spill ; VECTOR-NEXT: .cfi_offset %f8, -168 ; VECTOR-NEXT: vl %v0, 16(%r2), 3 -; VECTOR-NEXT: mvc 176(16,%r15), 0(%r2) # 16-byte Folded Spill ; VECTOR-NEXT: lgr %r13, %r3 ; VECTOR-NEXT: vst %v0, 192(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vreph %v0, %v0, 7 +; VECTOR-NEXT: mvc 176(16,%r15), 0(%r2) # 16-byte Folded Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -256,9 +256,9 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: cebr %f0, %f8 ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r11, 0 +; VECTOR-NEXT: vreph %v0, %v0, 3 ; VECTOR-NEXT: lhi %r12, 0 ; VECTOR-NEXT: lochie %r11, -1 -; VECTOR-NEXT: vreph %v0, %v0, 3 ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -279,13 +279,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: cebr %f0, %f8 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r0, 0 -; VECTOR-NEXT: lochie %r0, -1 -; VECTOR-NEXT: vlvgh %v0, %r0, 0 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: lochie %r0, -1 +; VECTOR-NEXT: vlvgh %v1, %r0, 0 ; VECTOR-NEXT: vreph %v0, %v0, 1 +; VECTOR-NEXT: vst %v1, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -294,13 +294,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: cebr %f0, %f8 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r0, 0 -; VECTOR-NEXT: lochie %r0, -1 -; VECTOR-NEXT: vlvgh %v0, %r0, 1 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: lochie %r0, -1 +; VECTOR-NEXT: vlvgh %v1, %r0, 1 ; VECTOR-NEXT: vreph %v0, %v0, 2 +; VECTOR-NEXT: vst %v1, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -309,13 +309,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: cebr %f0, %f8 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r0, 0 -; VECTOR-NEXT: lochie %r0, -1 -; VECTOR-NEXT: vlvgh %v0, %r0, 2 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: lochie %r0, -1 +; VECTOR-NEXT: vlvgh %v1, %r0, 2 ; VECTOR-NEXT: vreph %v0, %v0, 4 +; VECTOR-NEXT: vst %v1, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -324,13 +324,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: cebr %f0, %f8 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r0, 0 -; VECTOR-NEXT: lochie %r0, -1 -; VECTOR-NEXT: vlvgh %v0, %r0, 4 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: lochie %r0, -1 +; VECTOR-NEXT: vlvgh %v1, %r0, 4 ; VECTOR-NEXT: vreph %v0, %v0, 5 +; VECTOR-NEXT: vst %v1, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -339,13 +339,13 @@ define void @fun0(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: cebr %f0, %f8 -; VECTOR-NEXT: vl %v0, 160(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload ; VECTOR-NEXT: lhi %r0, 0 -; VECTOR-NEXT: lochie %r0, -1 -; VECTOR-NEXT: vlvgh %v0, %r0, 5 -; VECTOR-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: vl %v0, 192(%r15), 3 # 16-byte Reload +; VECTOR-NEXT: lochie %r0, -1 +; VECTOR-NEXT: vlvgh %v1, %r0, 5 ; VECTOR-NEXT: vreph %v0, %v0, 6 +; VECTOR-NEXT: vst %v1, 160(%r15), 3 # 16-byte Spill ; VECTOR-NEXT: # kill: def $f0h killed $f0h killed $v0 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f8, %f0 @@ -460,31 +460,31 @@ define void @fun1(ptr %Src, ptr %Dst) { ; VECTOR-NEXT: vlreph %v0, 4(%r2) ; VECTOR-NEXT: vlreph %v8, 2(%r2) ; VECTOR-NEXT: vlreph %v11, 0(%r2) -; VECTOR-NEXT: vlreph %v9, 6(%r2) +; VECTOR-NEXT: vlreph %v10, 6(%r2) ; VECTOR-NEXT: lgr %r13, %r3 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT -; VECTOR-NEXT: ldr %f10, %f0 +; VECTOR-NEXT: ldr %f9, %f0 ; VECTOR-NEXT: ldr %f0, %f11 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT -; VECTOR-NEXT: cebr %f0, %f10 +; VECTOR-NEXT: cebr %f0, %f9 ; VECTOR-NEXT: je .LBB1_2 ; VECTOR-NEXT: # %bb.1: -; VECTOR-NEXT: ldr %f0, %f10 +; VECTOR-NEXT: ldr %f0, %f9 ; VECTOR-NEXT: .LBB1_2: ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: ldr %f10, %f0 -; VECTOR-NEXT: ldr %f0, %f9 -; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT ; VECTOR-NEXT: ldr %f9, %f0 +; VECTOR-NEXT: ldr %f0, %f10 +; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT +; VECTOR-NEXT: ldr %f10, %f0 ; VECTOR-NEXT: ldr %f0, %f8 ; VECTOR-NEXT: brasl %r14, __extendhfsf2@PLT -; VECTOR-NEXT: cebr %f0, %f9 +; VECTOR-NEXT: cebr %f0, %f10 ; VECTOR-NEXT: je .LBB1_4 ; VECTOR-NEXT: # %bb.3: -; VECTOR-NEXT: ldr %f0, %f9 +; VECTOR-NEXT: ldr %f0, %f10 ; VECTOR-NEXT: .LBB1_4: ; VECTOR-NEXT: brasl %r14, __truncsfhf2@PLT -; VECTOR-NEXT: vsteh %v10, 0(%r13), 0 +; VECTOR-NEXT: vsteh %v9, 0(%r13), 0 ; VECTOR-NEXT: ld %f8, 184(%r15) # 8-byte Reload ; VECTOR-NEXT: ld %f9, 176(%r15) # 8-byte Reload ; VECTOR-NEXT: ld %f10, 168(%r15) # 8-byte Reload diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll index 6228ffaa35fa2..cb6e9ca81bcfa 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-explicit-regs-zEC12.ll @@ -212,13 +212,13 @@ define <4 x i32> @vec128_and_f(<4 x i32> %cc_dep1) { ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 ; CHECK-NEXT: # kill: def $r4l killed $r4l def $r4d +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d ; CHECK-NEXT: sllg %r0, %r4, 32 ; CHECK-NEXT: lr %r0, %r5 -; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: sllg %r1, %r2, 32 +; CHECK-NEXT: lr %r1, %r3 ; CHECK-NEXT: stg %r0, 168(%r15) -; CHECK-NEXT: sllg %r0, %r2, 32 -; CHECK-NEXT: lr %r0, %r3 -; CHECK-NEXT: stg %r0, 160(%r15) +; CHECK-NEXT: stg %r1, 160(%r15) ; CHECK-NEXT: ld %f0, 160(%r15) ; CHECK-NEXT: ld %f2, 168(%r15) ; CHECK-NEXT: #APP diff --git a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll index 19969ccf4e297..7f06f66e6411e 100644 --- a/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll +++ b/llvm/test/CodeGen/SystemZ/inline-asm-fp-int-casting-zEC12.ll @@ -205,13 +205,13 @@ define <4 x i32> @vec128_and_f(<4 x i32> %cc_dep1) { ; CHECK-NEXT: aghi %r15, -176 ; CHECK-NEXT: .cfi_def_cfa_offset 336 ; CHECK-NEXT: # kill: def $r4l killed $r4l def $r4d +; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d ; CHECK-NEXT: sllg %r0, %r4, 32 ; CHECK-NEXT: lr %r0, %r5 -; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d +; CHECK-NEXT: sllg %r1, %r2, 32 +; CHECK-NEXT: lr %r1, %r3 ; CHECK-NEXT: stg %r0, 168(%r15) -; CHECK-NEXT: sllg %r0, %r2, 32 -; CHECK-NEXT: lr %r0, %r3 -; CHECK-NEXT: stg %r0, 160(%r15) +; CHECK-NEXT: stg %r1, 160(%r15) ; CHECK-NEXT: ld %f0, 160(%r15) ; CHECK-NEXT: ld %f2, 168(%r15) ; CHECK-NEXT: #APP diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll index f1d850200fe41..d9aff41f597b4 100644 --- a/llvm/test/CodeGen/SystemZ/int-cmp-65.ll +++ b/llvm/test/CodeGen/SystemZ/int-cmp-65.ll @@ -247,10 +247,10 @@ define i128 @i128_addc_xor_inv(i128 %a, i128 %b) { ; ; Z13-LABEL: i128_addc_xor_inv: ; Z13: # %bb.0: -; Z13-NEXT: vl %v1, 0(%r4), 3 -; Z13-NEXT: vl %v0, 0(%r3), 3 -; Z13-NEXT: vno %v1, %v1, %v1 -; Z13-NEXT: vscbiq %v0, %v1, %v0 +; Z13-NEXT: vl %v0, 0(%r4), 3 +; Z13-NEXT: vl %v1, 0(%r3), 3 +; Z13-NEXT: vno %v0, %v0, %v0 +; Z13-NEXT: vscbiq %v0, %v0, %v1 ; Z13-NEXT: vst %v0, 0(%r2), 3 ; Z13-NEXT: br %r14 %b.not = xor i128 %b, -1 diff --git a/llvm/test/CodeGen/SystemZ/int-conv-14.ll b/llvm/test/CodeGen/SystemZ/int-conv-14.ll index baab5ac7f4b5c..b8e40379b07fc 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-14.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-14.ll @@ -332,9 +332,9 @@ define i128 @f25(i1 %a) { ; CHECK-LABEL: f25: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI24_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vlvgp %v0, %r3, %r3 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r1), 3 +; CHECK-NEXT: vlvgp %v1, %r3, %r3 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-NEXT: vgbm %v1, 0 ; CHECK-NEXT: vsq %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 @@ -347,13 +347,13 @@ define i128 @f25(i1 %a) { define i128 @f26(ptr %ptr) { ; CHECK-LABEL: f26: ; CHECK: # %bb.0: -; CHECK-NEXT: vgbm %v1, 0 -; CHECK-NEXT: vleb %v1, 0(%r3), 15 -; CHECK-NEXT: larl %r1, .LCPI25_0 -; CHECK-NEXT: vl %v2, 0(%r1), 3 ; CHECK-NEXT: vgbm %v0, 0 -; CHECK-NEXT: vn %v1, %v1, %v2 -; CHECK-NEXT: vsq %v0, %v0, %v1 +; CHECK-NEXT: vleb %v0, 0(%r3), 15 +; CHECK-NEXT: larl %r1, .LCPI25_0 +; CHECK-NEXT: vl %v1, 0(%r1), 3 +; CHECK-NEXT: vgbm %v2, 0 +; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vsq %v0, %v2, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %a = load i1, ptr %ptr @@ -366,9 +366,9 @@ define i128 @f27(i1 %a) { ; CHECK-LABEL: f27: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI26_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vlvgp %v0, %r3, %r3 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r1), 3 +; CHECK-NEXT: vlvgp %v1, %r3, %r3 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %res = zext i1 %a to i128 diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll index f2c9ee5fa1f57..da9a2be186a73 100644 --- a/llvm/test/CodeGen/SystemZ/int-conv-15.ll +++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll @@ -332,9 +332,9 @@ define i128 @f25(i1 %a) { ; CHECK-LABEL: f25: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI24_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vlvgp %v0, %r3, %r3 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r1), 3 +; CHECK-NEXT: vlvgp %v1, %r3, %r3 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-NEXT: vlcq %v0, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -364,9 +364,9 @@ define i128 @f27(i1 %a) { ; CHECK-LABEL: f27: ; CHECK: # %bb.0: ; CHECK-NEXT: larl %r1, .LCPI26_0 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vlvgp %v0, %r3, %r3 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r1), 3 +; CHECK-NEXT: vlvgp %v1, %r3, %r3 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %res = zext i1 %a to i128 diff --git a/llvm/test/CodeGen/SystemZ/int-mul-12.ll b/llvm/test/CodeGen/SystemZ/int-mul-12.ll index e7005f50a12fd..96132ad36f8b4 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-12.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-12.ll @@ -7,21 +7,20 @@ define i128 @f1(i128 %a, i128 %b) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: stmg %r12, %r15, 96(%r15) -; CHECK-NEXT: .cfi_offset %r12, -64 -; CHECK-NEXT: .cfi_offset %r13, -56 +; CHECK-NEXT: stmg %r14, %r15, 112(%r15) +; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r13, 8(%r3) -; CHECK-NEXT: lg %r0, 8(%r4) -; CHECK-NEXT: lgr %r1, %r13 -; CHECK-NEXT: mlgr %r12, %r0 -; CHECK-NEXT: msg %r1, 0(%r4) -; CHECK-NEXT: msg %r0, 0(%r3) -; CHECK-NEXT: agr %r1, %r12 -; CHECK-NEXT: agr %r0, %r1 -; CHECK-NEXT: stg %r13, 8(%r2) -; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: lmg %r12, %r15, 96(%r15) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: lg %r5, 8(%r4) +; CHECK-NEXT: lgr %r14, %r1 +; CHECK-NEXT: mlgr %r0, %r5 +; CHECK-NEXT: msg %r14, 0(%r4) +; CHECK-NEXT: msg %r5, 0(%r3) +; CHECK-NEXT: agr %r14, %r0 +; CHECK-NEXT: agr %r5, %r14 +; CHECK-NEXT: stg %r1, 8(%r2) +; CHECK-NEXT: stg %r5, 0(%r2) +; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %res = mul i128 %a, %b ret i128 %res diff --git a/llvm/test/CodeGen/SystemZ/int-mul-13.ll b/llvm/test/CodeGen/SystemZ/int-mul-13.ll index 82937cf66c629..db159187f7ec8 100644 --- a/llvm/test/CodeGen/SystemZ/int-mul-13.ll +++ b/llvm/test/CodeGen/SystemZ/int-mul-13.ll @@ -24,12 +24,12 @@ define i64 @f1(i64 %dummy, i64 %a, i64 %b) { define i64 @f2(i64 %dummy, i64 %a, i64 %b) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: srag %r1, %r4, 63 +; CHECK-NEXT: srag %r0, %r4, 63 ; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q -; CHECK-NEXT: srag %r0, %r3, 63 -; CHECK-NEXT: ngr %r1, %r3 +; CHECK-NEXT: srag %r1, %r3, 63 +; CHECK-NEXT: ngr %r0, %r3 ; CHECK-NEXT: mlgr %r2, %r4 -; CHECK-NEXT: ngr %r0, %r4 +; CHECK-NEXT: ngr %r1, %r4 ; CHECK-NEXT: agr %r0, %r1 ; CHECK-NEXT: sgr %r2, %r0 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-14.ll b/llvm/test/CodeGen/SystemZ/int-uadd-14.ll index c8873a4dfadef..a3603111aa2dc 100644 --- a/llvm/test/CodeGen/SystemZ/int-uadd-14.ll +++ b/llvm/test/CodeGen/SystemZ/int-uadd-14.ll @@ -6,17 +6,17 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r3), 3 -; CHECK-NEXT: vl %v3, 16(%r2), 3 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r2), 3 -; CHECK-NEXT: vaccq %v4, %v3, %v2 -; CHECK-NEXT: vacccq %v5, %v1, %v0, %v4 +; CHECK-NEXT: vl %v0, 16(%r3), 3 +; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v3, 0(%r2), 3 +; CHECK-NEXT: vaccq %v4, %v1, %v0 +; CHECK-NEXT: vaq %v0, %v1, %v0 +; CHECK-NEXT: vacccq %v5, %v3, %v2, %v4 ; CHECK-NEXT: vlgvg %r2, %v5, 1 -; CHECK-NEXT: vacq %v0, %v1, %v0, %v4 -; CHECK-NEXT: vaq %v1, %v3, %v2 -; CHECK-NEXT: vst %v1, 16(%r4), 3 -; CHECK-NEXT: vst %v0, 0(%r4), 3 +; CHECK-NEXT: vacq %v2, %v3, %v2, %v4 +; CHECK-NEXT: vst %v0, 16(%r4), 3 +; CHECK-NEXT: vst %v2, 0(%r4), 3 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.uadd.with.overflow.i256(i256 %a, i256 %b) %val = extractvalue {i256, i1} %t, 0 @@ -28,12 +28,12 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) { define zeroext i1 @f2(i256 %a, i256 %b) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r3), 3 -; CHECK-NEXT: vl %v3, 16(%r2), 3 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r2), 3 -; CHECK-NEXT: vaccq %v2, %v3, %v2 -; CHECK-NEXT: vacccq %v0, %v1, %v0, %v2 +; CHECK-NEXT: vl %v0, 16(%r3), 3 +; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v3, 0(%r2), 3 +; CHECK-NEXT: vaccq %v0, %v1, %v0 +; CHECK-NEXT: vacccq %v0, %v3, %v2, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.uadd.with.overflow.i256(i256 %a, i256 %b) @@ -44,15 +44,15 @@ define zeroext i1 @f2(i256 %a, i256 %b) { define i256 @f3(i256 %a, i256 %b) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r4), 3 -; CHECK-NEXT: vl %v3, 16(%r3), 3 -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vaccq %v4, %v3, %v2 -; CHECK-NEXT: vacq %v0, %v1, %v0, %v4 -; CHECK-NEXT: vaq %v1, %v3, %v2 -; CHECK-NEXT: vst %v1, 16(%r2), 3 -; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: vl %v0, 16(%r4), 3 +; CHECK-NEXT: vl %v1, 16(%r3), 3 +; CHECK-NEXT: vl %v2, 0(%r4), 3 +; CHECK-NEXT: vl %v3, 0(%r3), 3 +; CHECK-NEXT: vaccq %v4, %v1, %v0 +; CHECK-NEXT: vaq %v0, %v1, %v0 +; CHECK-NEXT: vacq %v2, %v3, %v2, %v4 +; CHECK-NEXT: vst %v0, 16(%r2), 3 +; CHECK-NEXT: vst %v2, 0(%r2), 3 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.uadd.with.overflow.i256(i256 %a, i256 %b) %val = extractvalue {i256, i1} %t, 0 diff --git a/llvm/test/CodeGen/SystemZ/int-usub-13.ll b/llvm/test/CodeGen/SystemZ/int-usub-13.ll index 794af3b73fbe2..ebdda5c69ecbd 100644 --- a/llvm/test/CodeGen/SystemZ/int-usub-13.ll +++ b/llvm/test/CodeGen/SystemZ/int-usub-13.ll @@ -6,18 +6,18 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r3), 3 -; CHECK-NEXT: vl %v3, 16(%r2), 3 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r2), 3 -; CHECK-NEXT: vscbiq %v4, %v3, %v2 -; CHECK-NEXT: vsbcbiq %v5, %v1, %v0, %v4 +; CHECK-NEXT: vl %v0, 16(%r3), 3 +; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v3, 0(%r2), 3 +; CHECK-NEXT: vscbiq %v4, %v1, %v0 +; CHECK-NEXT: vsq %v0, %v1, %v0 +; CHECK-NEXT: vsbcbiq %v5, %v3, %v2, %v4 ; CHECK-NEXT: vlgvg %r2, %v5, 1 -; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4 -; CHECK-NEXT: vsq %v1, %v3, %v2 +; CHECK-NEXT: vsbiq %v2, %v3, %v2, %v4 ; CHECK-NEXT: xilf %r2, 1 -; CHECK-NEXT: vst %v1, 16(%r4), 3 -; CHECK-NEXT: vst %v0, 0(%r4), 3 +; CHECK-NEXT: vst %v0, 16(%r4), 3 +; CHECK-NEXT: vst %v2, 0(%r4), 3 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b) %val = extractvalue {i256, i1} %t, 0 @@ -29,12 +29,12 @@ define zeroext i1 @f1(i256 %a, i256 %b, ptr %res) { define zeroext i1 @f2(i256 %a, i256 %b) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r3), 3 -; CHECK-NEXT: vl %v3, 16(%r2), 3 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vl %v1, 0(%r2), 3 -; CHECK-NEXT: vscbiq %v2, %v3, %v2 -; CHECK-NEXT: vsbcbiq %v0, %v1, %v0, %v2 +; CHECK-NEXT: vl %v0, 16(%r3), 3 +; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v3, 0(%r2), 3 +; CHECK-NEXT: vscbiq %v0, %v1, %v0 +; CHECK-NEXT: vsbcbiq %v0, %v3, %v2, %v0 ; CHECK-NEXT: vlgvg %r2, %v0, 1 ; CHECK-NEXT: xilf %r2, 1 ; CHECK-NEXT: br %r14 @@ -46,15 +46,15 @@ define zeroext i1 @f2(i256 %a, i256 %b) { define i256 @f3(i256 %a, i256 %b) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v2, 16(%r4), 3 -; CHECK-NEXT: vl %v3, 16(%r3), 3 -; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vscbiq %v4, %v3, %v2 -; CHECK-NEXT: vsbiq %v0, %v1, %v0, %v4 -; CHECK-NEXT: vsq %v1, %v3, %v2 -; CHECK-NEXT: vst %v1, 16(%r2), 3 -; CHECK-NEXT: vst %v0, 0(%r2), 3 +; CHECK-NEXT: vl %v0, 16(%r4), 3 +; CHECK-NEXT: vl %v1, 16(%r3), 3 +; CHECK-NEXT: vl %v2, 0(%r4), 3 +; CHECK-NEXT: vl %v3, 0(%r3), 3 +; CHECK-NEXT: vscbiq %v4, %v1, %v0 +; CHECK-NEXT: vsq %v0, %v1, %v0 +; CHECK-NEXT: vsbiq %v2, %v3, %v2, %v4 +; CHECK-NEXT: vst %v0, 16(%r2), 3 +; CHECK-NEXT: vst %v2, 0(%r2), 3 ; CHECK-NEXT: br %r14 %t = call {i256, i1} @llvm.usub.with.overflow.i256(i256 %a, i256 %b) %val = extractvalue {i256, i1} %t, 0 diff --git a/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll b/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll index fdf1be68a5430..ec2e3b946659e 100644 --- a/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll +++ b/llvm/test/CodeGen/SystemZ/machine-combiner-reassoc-fp.ll @@ -11,11 +11,11 @@ define double @fun0_fadd(ptr %x) { ; CHECK-NEXT: adb %f0, 8(%r2) ; CHECK-NEXT: ld %f1, 24(%r2) ; CHECK-NEXT: adb %f1, 16(%r2) +; CHECK-NEXT: ld %f2, 40(%r2) +; CHECK-NEXT: adb %f2, 32(%r2) +; CHECK-NEXT: adb %f2, 48(%r2) ; CHECK-NEXT: adbr %f0, %f1 -; CHECK-NEXT: ld %f1, 40(%r2) -; CHECK-NEXT: adb %f1, 32(%r2) -; CHECK-NEXT: adb %f1, 48(%r2) -; CHECK-NEXT: adbr %f0, %f1 +; CHECK-NEXT: adbr %f0, %f2 ; CHECK-NEXT: adb %f0, 56(%r2) ; CHECK-NEXT: br %r14 entry: @@ -51,11 +51,11 @@ define float @fun1_fadd(ptr %x) { ; CHECK-NEXT: aeb %f0, 4(%r2) ; CHECK-NEXT: lde %f1, 12(%r2) ; CHECK-NEXT: aeb %f1, 8(%r2) +; CHECK-NEXT: lde %f2, 20(%r2) +; CHECK-NEXT: aeb %f2, 16(%r2) +; CHECK-NEXT: aeb %f2, 24(%r2) ; CHECK-NEXT: aebr %f0, %f1 -; CHECK-NEXT: lde %f1, 20(%r2) -; CHECK-NEXT: aeb %f1, 16(%r2) -; CHECK-NEXT: aeb %f1, 24(%r2) -; CHECK-NEXT: aebr %f0, %f1 +; CHECK-NEXT: aebr %f0, %f2 ; CHECK-NEXT: aeb %f0, 28(%r2) ; CHECK-NEXT: br %r14 entry: @@ -89,16 +89,16 @@ define fp128 @fun2_fadd(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vl %v1, 16(%r3), 3 +; CHECK-NEXT: vl %v2, 32(%r3), 3 +; CHECK-NEXT: vl %v3, 48(%r3), 3 +; CHECK-NEXT: vl %v4, 64(%r3), 3 +; CHECK-NEXT: vl %v5, 80(%r3), 3 ; CHECK-NEXT: wfaxb %v0, %v1, %v0 -; CHECK-NEXT: vl %v1, 32(%r3), 3 -; CHECK-NEXT: vl %v2, 48(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 +; CHECK-NEXT: wfaxb %v1, %v2, %v3 +; CHECK-NEXT: wfaxb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r3), 3 ; CHECK-NEXT: wfaxb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r3), 3 -; CHECK-NEXT: vl %v2, 80(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 +; CHECK-NEXT: wfaxb %v1, %v2, %v3 ; CHECK-NEXT: wfaxb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r3), 3 ; CHECK-NEXT: wfaxb %v0, %v0, %v1 @@ -135,16 +135,16 @@ define <2 x double> @fun3_fadd(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfadb %v0, %v1, %v0 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 +; CHECK-NEXT: vfadb %v1, %v2, %v3 +; CHECK-NEXT: vfadb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfadb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 +; CHECK-NEXT: vfadb %v1, %v2, %v3 ; CHECK-NEXT: vfadb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfadb %v24, %v0, %v1 @@ -180,16 +180,16 @@ define <4 x float> @fun4_fadd(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfasb %v0, %v1, %v0 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 +; CHECK-NEXT: vfasb %v1, %v2, %v3 +; CHECK-NEXT: vfasb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfasb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 +; CHECK-NEXT: vfasb %v1, %v2, %v3 ; CHECK-NEXT: vfasb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfasb %v24, %v0, %v1 @@ -227,11 +227,11 @@ define double @fun5_fsub(ptr %x) { ; CHECK-NEXT: sdb %f0, 8(%r2) ; CHECK-NEXT: ld %f1, 24(%r2) ; CHECK-NEXT: adb %f1, 16(%r2) +; CHECK-NEXT: ld %f2, 40(%r2) +; CHECK-NEXT: adb %f2, 32(%r2) +; CHECK-NEXT: adb %f2, 48(%r2) ; CHECK-NEXT: sdbr %f0, %f1 -; CHECK-NEXT: ld %f1, 40(%r2) -; CHECK-NEXT: adb %f1, 32(%r2) -; CHECK-NEXT: adb %f1, 48(%r2) -; CHECK-NEXT: sdbr %f0, %f1 +; CHECK-NEXT: sdbr %f0, %f2 ; CHECK-NEXT: sdb %f0, 56(%r2) ; CHECK-NEXT: br %r14 entry: @@ -267,11 +267,11 @@ define float @fun6_fsub(ptr %x) { ; CHECK-NEXT: seb %f0, 4(%r2) ; CHECK-NEXT: lde %f1, 12(%r2) ; CHECK-NEXT: aeb %f1, 8(%r2) +; CHECK-NEXT: lde %f2, 20(%r2) +; CHECK-NEXT: aeb %f2, 16(%r2) +; CHECK-NEXT: aeb %f2, 24(%r2) ; CHECK-NEXT: sebr %f0, %f1 -; CHECK-NEXT: lde %f1, 20(%r2) -; CHECK-NEXT: aeb %f1, 16(%r2) -; CHECK-NEXT: aeb %f1, 24(%r2) -; CHECK-NEXT: sebr %f0, %f1 +; CHECK-NEXT: sebr %f0, %f2 ; CHECK-NEXT: seb %f0, 28(%r2) ; CHECK-NEXT: br %r14 entry: @@ -305,16 +305,16 @@ define fp128 @fun7_fsub(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vl %v1, 16(%r3), 3 +; CHECK-NEXT: vl %v2, 32(%r3), 3 +; CHECK-NEXT: vl %v3, 48(%r3), 3 +; CHECK-NEXT: vl %v4, 64(%r3), 3 +; CHECK-NEXT: vl %v5, 80(%r3), 3 ; CHECK-NEXT: wfsxb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r3), 3 -; CHECK-NEXT: vl %v2, 48(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 +; CHECK-NEXT: wfaxb %v1, %v2, %v3 +; CHECK-NEXT: wfaxb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r3), 3 ; CHECK-NEXT: wfsxb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r3), 3 -; CHECK-NEXT: vl %v2, 80(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r3), 3 -; CHECK-NEXT: wfaxb %v1, %v1, %v2 +; CHECK-NEXT: wfaxb %v1, %v2, %v3 ; CHECK-NEXT: wfsxb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r3), 3 ; CHECK-NEXT: wfsxb %v0, %v0, %v1 @@ -351,16 +351,16 @@ define <2 x double> @fun8_fsub(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfsdb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 +; CHECK-NEXT: vfadb %v1, %v2, %v3 +; CHECK-NEXT: vfadb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfsdb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfadb %v1, %v1, %v2 +; CHECK-NEXT: vfadb %v1, %v2, %v3 ; CHECK-NEXT: vfsdb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfsdb %v24, %v0, %v1 @@ -396,16 +396,16 @@ define <4 x float> @fun9_fsub(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfssb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 +; CHECK-NEXT: vfasb %v1, %v2, %v3 +; CHECK-NEXT: vfasb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfssb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfasb %v1, %v1, %v2 +; CHECK-NEXT: vfasb %v1, %v2, %v3 ; CHECK-NEXT: vfssb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfssb %v24, %v0, %v1 @@ -443,11 +443,11 @@ define double @fun10_fmul(ptr %x) { ; CHECK-NEXT: mdb %f0, 0(%r2) ; CHECK-NEXT: ld %f1, 24(%r2) ; CHECK-NEXT: mdb %f1, 16(%r2) +; CHECK-NEXT: ld %f2, 40(%r2) +; CHECK-NEXT: mdb %f2, 32(%r2) +; CHECK-NEXT: mdb %f2, 48(%r2) ; CHECK-NEXT: mdbr %f0, %f1 -; CHECK-NEXT: ld %f1, 40(%r2) -; CHECK-NEXT: mdb %f1, 32(%r2) -; CHECK-NEXT: mdb %f1, 48(%r2) -; CHECK-NEXT: mdbr %f0, %f1 +; CHECK-NEXT: mdbr %f0, %f2 ; CHECK-NEXT: mdb %f0, 56(%r2) ; CHECK-NEXT: br %r14 entry: @@ -483,11 +483,11 @@ define float @fun11_fmul(ptr %x) { ; CHECK-NEXT: meeb %f0, 0(%r2) ; CHECK-NEXT: lde %f1, 12(%r2) ; CHECK-NEXT: meeb %f1, 8(%r2) +; CHECK-NEXT: lde %f2, 20(%r2) +; CHECK-NEXT: meeb %f2, 16(%r2) +; CHECK-NEXT: meeb %f2, 24(%r2) ; CHECK-NEXT: meebr %f0, %f1 -; CHECK-NEXT: lde %f1, 20(%r2) -; CHECK-NEXT: meeb %f1, 16(%r2) -; CHECK-NEXT: meeb %f1, 24(%r2) -; CHECK-NEXT: meebr %f0, %f1 +; CHECK-NEXT: meebr %f0, %f2 ; CHECK-NEXT: meeb %f0, 28(%r2) ; CHECK-NEXT: br %r14 entry: @@ -521,16 +521,16 @@ define fp128 @fun12_fmul(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vl %v1, 16(%r3), 3 +; CHECK-NEXT: vl %v2, 32(%r3), 3 +; CHECK-NEXT: vl %v3, 48(%r3), 3 +; CHECK-NEXT: vl %v4, 64(%r3), 3 +; CHECK-NEXT: vl %v5, 80(%r3), 3 ; CHECK-NEXT: wfmxb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r3), 3 -; CHECK-NEXT: vl %v2, 48(%r3), 3 -; CHECK-NEXT: wfmxb %v1, %v1, %v2 +; CHECK-NEXT: wfmxb %v1, %v2, %v3 +; CHECK-NEXT: wfmxb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r3), 3 ; CHECK-NEXT: wfmxb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r3), 3 -; CHECK-NEXT: vl %v2, 80(%r3), 3 -; CHECK-NEXT: wfmxb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r3), 3 -; CHECK-NEXT: wfmxb %v1, %v1, %v2 +; CHECK-NEXT: wfmxb %v1, %v2, %v3 ; CHECK-NEXT: wfmxb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r3), 3 ; CHECK-NEXT: wfmxb %v0, %v0, %v1 @@ -567,16 +567,16 @@ define <2 x double> @fun13_fmul(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfmdb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfmdb %v1, %v1, %v2 +; CHECK-NEXT: vfmdb %v1, %v2, %v3 +; CHECK-NEXT: vfmdb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfmdb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfmdb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfmdb %v1, %v1, %v2 +; CHECK-NEXT: vfmdb %v1, %v2, %v3 ; CHECK-NEXT: vfmdb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfmdb %v24, %v0, %v1 @@ -612,16 +612,16 @@ define <4 x float> @fun14_fmul(ptr %x) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vl %v0, 0(%r2), 3 ; CHECK-NEXT: vl %v1, 16(%r2), 3 +; CHECK-NEXT: vl %v2, 32(%r2), 3 +; CHECK-NEXT: vl %v3, 48(%r2), 3 +; CHECK-NEXT: vl %v4, 64(%r2), 3 +; CHECK-NEXT: vl %v5, 80(%r2), 3 ; CHECK-NEXT: vfmsb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 32(%r2), 3 -; CHECK-NEXT: vl %v2, 48(%r2), 3 -; CHECK-NEXT: vfmsb %v1, %v1, %v2 +; CHECK-NEXT: vfmsb %v1, %v2, %v3 +; CHECK-NEXT: vfmsb %v2, %v4, %v5 +; CHECK-NEXT: vl %v3, 96(%r2), 3 ; CHECK-NEXT: vfmsb %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 64(%r2), 3 -; CHECK-NEXT: vl %v2, 80(%r2), 3 -; CHECK-NEXT: vfmsb %v1, %v1, %v2 -; CHECK-NEXT: vl %v2, 96(%r2), 3 -; CHECK-NEXT: vfmsb %v1, %v1, %v2 +; CHECK-NEXT: vfmsb %v1, %v2, %v3 ; CHECK-NEXT: vfmsb %v0, %v0, %v1 ; CHECK-NEXT: vl %v1, 112(%r2), 3 ; CHECK-NEXT: vfmsb %v24, %v0, %v1 diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-cmp-elim.mir b/llvm/test/CodeGen/SystemZ/misched-prera-cmp-elim.mir index 73cc9fdb74e6e..421b2328c3612 100644 --- a/llvm/test/CodeGen/SystemZ/misched-prera-cmp-elim.mir +++ b/llvm/test/CodeGen/SystemZ/misched-prera-cmp-elim.mir @@ -3,37 +3,54 @@ # RUN: | FileCheck %s # REQUIRES: asserts -# Schedule the AGHIK that defines the compare source low to help comparison +# Schedule the NRK that defines the compare source low to help comparison # elimination. # CHECK: ********** MI Scheduling ********** # CHECK-NEXT: fun0:%bb.0 # CHECK: ********** MI Scheduling ********** # CHECK-NEXT: fun0:%bb.1 -# CHECK: Queue BotQ.A: 1 0 -# CHECK-NEXT: Cand SU(1) FIRST -# CHECK-NEXT: Cand SU(0) WEAK +# CHECK: Queue BotQ.A: 4 5 +# CHECK-NEXT: Cand SU(4) FIRST # CHECK-NEXT: Pick Bot WEAK [pre-RA] -# CHECK-NEXT: Scheduling SU(0) %2:gr64bit = AGHIK %0:gr64bit, -1, impl +# CHECK-NEXT: Scheduling SU(4) %7:gr32bit = NRK %6:gr32bit, %0:gr32bit, implicit-def dead $cc # CHECK: *** Final schedule for %bb.1 *** -# CHECK-NEXT: SU(1): dead %3:gr64bit = AGHIK %1:gr64bit, 1, implicit-def dead $cc -# CHECK-NEXT: SU(0): %2:gr64bit = AGHIK %0:gr64bit, -1, implicit-def dead $cc -# CHECK-NEXT: SU(2): CGHI %2:gr64bit, 0, implicit-def $cc +# CHECK-NEXT: SU(0): %5:gr32bit = NRK %2:gr32bit, %0:gr32bit, implicit-def dead $cc +# CHECK-NEXT: SU(1): CHIMux %5:gr32bit, 0, implicit-def $cc +# CHECK-NEXT: SU(2): %6:gr32bit = LHIMux 0 +# CHECK-NEXT: SU(3): %6:gr32bit = LOCHIMux %6:gr32bit(tied-def 0), 1, 14, 8, implicit $cc +# CHECK-NEXT: SU(5): %3:addr32bit = OILMux %3:addr32bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(4): %7:gr32bit = NRK %6:gr32bit, %0:gr32bit, implicit-def dead $cc +# CHECK-NEXT: SU(6): %8:gr32bit = SLLK %1:gr32bit, %3:addr32bit, 0 +# CHECK-NEXT: SU(7): CHIMux %7:gr32bit, 0, implicit-def $cc +# CHECK-NEXT: SU(8): %8:gr32bit = LOCHIMux %8:gr32bit(tied-def 0), 0, 14, 8, implicit $cc +# CHECK-NEXT: SU(9): %4:gr32bit = ORK %8:gr32bit, %4:gr32bit, implicit-def dead $cc +# CHECK-NEXT: SU(10): %2:gr32bit = LHIMux 1 +# CHECK-NEXT: SU(11): %3:addr32bit = LHIMux 0 --- name: fun0 tracksRegLiveness: true body: | bb.0: - liveins: $r2d, $r3d + liveins: $r2l - %0:gr64bit = COPY $r2d - %1:gr64bit = COPY $r3d + %0:gr32bit = COPY $r2l + %1:gr32bit = LHIMux 1 + %2:gr32bit = LHIMux 0 + %3:addr32bit = LHIMux 1 + %4:gr32bit = LHIMux 0 bb.1: - %2:gr64bit = AGHIK %0, -1, implicit-def dead $cc - %3:gr64bit = AGHIK %1, 1, implicit-def dead $cc - CGHI %2:gr64bit, 0, implicit-def $cc - BRC 14, 8, %bb.1, implicit killed $cc - - bb.2: - Return + %5:gr32bit = NRK %2, %0, implicit-def dead $cc + CHIMux %5, 0, implicit-def $cc + %6:gr32bit = LHIMux 0 + %6:gr32bit = LOCHIMux %6, 1, 14, 8, implicit killed $cc + %7:gr32bit = NRK %6, %0, implicit-def dead $cc + %3:addr32bit = OILMux %3, 1, implicit-def dead $cc + %8:gr32bit = SLLK %1, %3, 0 + CHIMux %7, 0, implicit-def $cc + %8:gr32bit = LOCHIMux %8, 0, 14, 8, implicit killed $cc + %4:gr32bit = ORK %8, %4, implicit-def dead $cc + %2:gr32bit = LHIMux 1 + %3:addr32bit = LHIMux 0 + J %bb.1 ... diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-latencies.mir b/llvm/test/CodeGen/SystemZ/misched-prera-latencies.mir index c0b229809abaa..974c959323ecf 100644 --- a/llvm/test/CodeGen/SystemZ/misched-prera-latencies.mir +++ b/llvm/test/CodeGen/SystemZ/misched-prera-latencies.mir @@ -8,7 +8,6 @@ # CHECK-NEXT: fun0:%bb.0 # CHECK: ********** MI Scheduling ********** # CHECK-NEXT: fun0:%bb.1 -# CHECK: Number of nodes in def-use sequences: 10. Latency scheduling enabled for data sequences. # CHECK: *** Final schedule for %bb.1 *** # CHECK-NEXT: SU(0): %4:fp64bit = LZDR # CHECK-NEXT: SU(5): %9:fp64bit = LZDR @@ -53,7 +52,6 @@ body: | # This function has a data flow sequence and latency scheduling puts the WFDDB high. # CHECK: ********** MI Scheduling ********** # CHECK-NEXT: fun1:%bb.0 -# CHECK: Number of nodes in def-use sequences: 4. Latency scheduling enabled for data sequences. # CHECK: *** Final schedule for %bb.0 *** # CHECK-NEXT: SU(1): undef %1.subreg_h64:vr128bit = WFDDB undef %2:fp64bit, undef %3:fp64bit, implicit $fpc # CHECK-NEXT: SU(2): %4:fp64bit = COPY %1.subreg_h64:vr128bit @@ -73,33 +71,12 @@ body: | Return ... -# Same, but there is no sequence, so no latency scheduling is done. +# Single block loop that should also have latency enabled. # CHECK: ********** MI Scheduling ********** -# CHECK-NEXT: fun2:%bb.0 -# CHECK: Latency scheduling not enabled for data sequences. -# CHECK: *** Final schedule for %bb.0 *** -# CHECK-NEXT: SU(0): dead %0:fp64bit = LZDR -# CHECK-NEXT: SU(1): undef %1.subreg_h64:vr128bit = WFDDB undef %2:fp64bit, undef %3:fp64bit, implicit $fpc -# CHECK-NEXT: SU(2): VST64 %1.subreg_h64:vr128bit, $noreg, 0, $noreg :: (store (s128) into `ptr null`, align 8) ---- -name: fun2 -tracksRegLiveness: true -body: | - bb.0: - %0:fp64bit = LZDR - undef %1.subreg_h64:vr128bit = WFDDB undef %2:fp64bit, undef %3:fp64bit, implicit $fpc - VST64 %1.subreg_h64:vr128bit , $noreg, 0, $noreg :: (store (s128) into `ptr null`, align 8) - Return -... - -# Use the GenericScheduler latency heuristic for this single block loop. -# CHECK: ********** MI Scheduling ********** -# CHECK-NEXT: fun3:%bb.1 -# CHECK: Latency scheduling not enabled for data sequences. -# CHECK: ACYCLIC LATENCY LIMIT +# CHECK-NEXT: fun2:%bb.1 # CHECK: Pick Bot BOT-HEIGHT [pre-RA] --- -name: fun3 +name: fun2 tracksRegLiveness: true body: | bb.0: @@ -128,8 +105,7 @@ body: | # "wide". Don't interleave the data flows in cases like this, as it could # result in too much ILP and spilling. # CHECK: ********** MI Scheduling ********** -# CHECK-NEXT: fun4:%bb.0 -# CHECK: Latency scheduling not enabled for data sequences. +# CHECK-NEXT: fun3:%bb.0 # CHECK: *** Final schedule for %bb.0 *** # CHECK-NEXT: SU(0): %0:gr64bit = COPY undef %1:gr64bit # CHECK-NEXT: SU(1): dead %2:gr64bit = AGRK %0:gr64bit, %0:gr64bit, @@ -144,7 +120,7 @@ body: | # CHECK-NEXT: SU(10): %11:gr64bit = COPY undef %1:gr64bit # CHECK-NEXT: SU(11): dead %12:gr64bit = AGRK %11:gr64bit, %11:gr64bit, --- -name: fun4 +name: fun3 tracksRegLiveness: true body: | bb.0: @@ -166,10 +142,9 @@ body: | # The TMLL64 should be scheduled first even though the LA is available and of # lesser height, because the TMLL64 Depth equals the remaining latency (on CP). # CHECK: ********** MI Scheduling ********** -# CHECK-NEXT: fun5:%bb.0 +# CHECK-NEXT: fun4:%bb.0 # CHECK: ********** MI Scheduling ********** -# CHECK-NEXT: fun5:%bb.1 -# CHECK: Number of nodes in def-use sequences: 4. Latency scheduling enabled for data sequences. +# CHECK-NEXT: fun4:%bb.1 # CHECK: SU(0): dead %2:addr64bit = LA %0:addr64bit, 1, $noreg # CHECK-NEXT: # preds left : 0 # CHECK-NEXT: # succs left : 0 @@ -192,7 +167,7 @@ body: | # CHECK-NEXT: SU(0): dead %2:addr64bit = LA %0:addr64bit, 1, $noreg # CHECK-NEXT: SU(4): TMLL64 %5:gr64bit, 1, implicit-def $cc --- -name: fun5 +name: fun4 tracksRegLiveness: true body: | bb.0: diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir b/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir new file mode 100644 index 0000000000000..ef4b3a19cb781 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir @@ -0,0 +1,627 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z17 -verify-machineinstrs \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler \ +# RUN: 2>&1 | FileCheck %s + +--- | + + define void @fun0() { ret void } + define void @fun1() { ret void } + define void @fun2() { ret void } + define void @fun3() { ret void } + define void @fun4() { ret void } + define void @fun5() { ret void } + define void @fun6(ptr %Arg) { ret void } +... + +# The LHIMux could be scheduled below the CGHI to avoid the overlap of %0 and +# %3, but this is not done in small regions like this. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun0:%bb.0 +# CHECK-NOT: Pick Bot REG-EXCESS [pre-RA] +# CHECK: *** Final schedule for %bb.0 *** +# CHECK-NEXT: SU(0): %0:gr64bit = COPY $r2d +# CHECK-NEXT: SU(2): %2:gr64bit = LCGR %0:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(3): %3:gr64bit = RISBGN undef %3:gr64bit(tied-def 0), %2:gr64bit, 29, 188, 0 +# CHECK-NEXT: SU(1): %1:gr32bit = LHIMux 1 +# CHECK-NEXT: SU(4): CGHI %3:gr64bit, 0, implicit-def $cc +# CHECK: fun0:%bb.1 +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.2(0x30000000), %bb.1(0x50000000) + liveins: $r2d + + %0:gr64bit = COPY $r2d + %1:gr32bit = LHIMux 1 + %2:gr64bit = LCGR %0, implicit-def dead $cc + %3:gr64bit = RISBGN undef %3, %2, 29, 188, 0 + CGHI %3, 0, implicit-def $cc + BRC 14, 8, %bb.2, implicit killed $cc + J %bb.1 + + bb.1: + %5:gr64bit = LLGFR %1 + $r2d = COPY %5 + Return implicit $r2d + + bb.2: + $r2d = LGHI 0 + Return implicit $r2d +... + +# This test tries to capture that one of the higher SUs is not scheduled low +# for the sake of closing a live range. The %4 DLGR is such a node where the +# result is live out and all operands are already live. It is still later +# scheduled relative low (although not as low as possible), but for the +# reason of latency reduction, below SU(33) which is higher. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun1:%bb.0 +# CHECK: SU(5): %4:gr128bit = DLGR %3:gr128bit(tied-def 0), %1:gr64bit +# CHECK: Height : 29 +# CHECK: Pressure Diff : GRX32Bit -4 +# CHECK: SU(6): +# CHECK: SU(33): %32:gr64bit = AGFI %31:gr64bit(tied-def 0), 1, implicit-def $cc +# CHECK: Height : 35 +# CHECK: SU(34): +# +# CHECK: Move SU(5) into Available Q +# CHECK: Queue BotQ.A: 39 5 +# CHECK-NOT: Cand SU(5) REG-EXCESS +# CHECK-NOT: Scheduling SU(5) +# CHECK: Scheduling SU(39) +# +# CHECK: Queue BotQ.A: 33 5 +# CHECK: Pick Bot BOT-HEIGHT [pre-RA] +# CHECK-NEXT: Scheduling SU(5) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(5): %4:gr128bit = DLGR %3:gr128bit(tied-def 0), %1:gr64bit +# CHECK-NEXT: SU(34): %33:gr64bit = AGFI %32:gr64bit(tied-def 0), 1, implicit-def $cc +# CHECK-NEXT: SU(35): %34:gr64bit = AGFI %33:gr64bit(tied-def 0), 1, implicit-def $cc +# CHECK-NEXT: SU(36): %35:gr64bit = AGFI %34:gr64bit(tied-def 0), 1, implicit-def $cc +# CHECK-NEXT: SU(37): undef %36.subreg_h64:gr128bit = LLILL 0 +# CHECK-NEXT: SU(38): %36.subreg_l64:gr128bit = COPY %35:gr64bit +# CHECK-NEXT: SU(39): %37:gr128bit = COPY %36:gr128bit +# CHECK-NEXT: SU(40): dead %38:gr128bit = DLGR %37:gr128bit(tied-def 0), %1:gr64bit +# CHECK: fun1:%bb.1 +--- +name: fun1 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d, $r3d + %0:gr64bit = COPY $r2d + %1:gr64bit = COPY $r3d + + undef %2.subreg_h64:gr128bit = LLILL 0 + %2.subreg_l64:gr128bit = COPY %0 + %3:gr128bit = COPY %2 + %4:gr128bit = DLGR %3, %1 + + %5:gr64bit = AGFI %0, 1, implicit-def $cc + %6:gr64bit = AGFI %5, 1, implicit-def $cc + %7:gr64bit = AGFI %6, 1, implicit-def $cc + %8:gr64bit = AGFI %7, 1, implicit-def $cc + %9:gr64bit = AGFI %8, 1, implicit-def $cc + %10:gr64bit = AGFI %9, 1, implicit-def $cc + %11:gr64bit = AGFI %10, 1, implicit-def $cc + %12:gr64bit = AGFI %11, 1, implicit-def $cc + %13:gr64bit = AGFI %12, 1, implicit-def $cc + %14:gr64bit = AGFI %13, 1, implicit-def $cc + %15:gr64bit = AGFI %14, 1, implicit-def $cc + %16:gr64bit = AGFI %15, 1, implicit-def $cc + %17:gr64bit = AGFI %16, 1, implicit-def $cc + %18:gr64bit = AGFI %17, 1, implicit-def $cc + %19:gr64bit = AGFI %18, 1, implicit-def $cc + %20:gr64bit = AGFI %19, 1, implicit-def $cc + %21:gr64bit = AGFI %20, 1, implicit-def $cc + %22:gr64bit = AGFI %21, 1, implicit-def $cc + %23:gr64bit = AGFI %22, 1, implicit-def $cc + %24:gr64bit = AGFI %23, 1, implicit-def $cc + %25:gr64bit = AGFI %24, 1, implicit-def $cc + %26:gr64bit = AGFI %25, 1, implicit-def $cc + %27:gr64bit = AGFI %26, 1, implicit-def $cc + %28:gr64bit = AGFI %27, 1, implicit-def $cc + %29:gr64bit = AGFI %28, 1, implicit-def $cc + %30:gr64bit = AGFI %29, 1, implicit-def $cc + %31:gr64bit = AGFI %30, 1, implicit-def $cc + %32:gr64bit = AGFI %31, 1, implicit-def $cc + %33:gr64bit = AGFI %32, 1, implicit-def $cc + %34:gr64bit = AGFI %33, 1, implicit-def $cc + %35:gr64bit = AGFI %34, 1, implicit-def $cc + + undef %36.subreg_h64:gr128bit = LLILL 0 + %36.subreg_l64:gr128bit = COPY %35 + %37:gr128bit = COPY %36 + %38:gr128bit = DLGR %37, %1 + + bb.1: + STG %35:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + + STG %1:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + %100:gr64bit = COPY %3.subreg_h64:gr128bit + STG %100:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + %103:gr64bit = COPY %4.subreg_h64:gr128bit + STG %103:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + Return +... + +# In this test, instead of a DLGR an MSGRKC with shorter latency is high in +# the input order with result and uses live. This node should be scheduled +# low to close the live range, but should also wait until it does not extend +# the scheduled latency. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun2:%bb.0 +# CHECK: SU(2): %2:gr64bit = MSGRKC %0:gr64bit, %1:gr64bit, implicit-def dead $cc +# CHECK: Height : 3 +# CHECK: Pressure Diff : GRX32Bit -2 +# CHECK: SU(3): +# CHECK: SU(33): %33:gr64bit = AGFI %32:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: Height : 3 +# CHECK: SU(34): %34:gr64bit = AGFI %33:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: Height : 2 +# CHECK: SU(35): +# CHECK: SU(36): %36:gr64bit = AGFI %35:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: Height : 0 +# +# CHECK: Queue BotQ.A: 36 2 +# CHECK-NOT: Cand SU(2) REG-EXCESS +# CHECK-NOT: Scheduling SU(2) +# CHECK: Pick Bot BOT-HEIGHT [pre-RA] +# CHECK-NEXT: Scheduling SU(36) +# CHECK: Queue BotQ.A: 2 32 +# CHECK: Pick Bot REG-EXCESS [pre-RA] +# CHECK-NEXT: Scheduling SU(2) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(32): %32:gr64bit = AGFI %31:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(2): %2:gr64bit = MSGRKC %0:gr64bit, %1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(33): %33:gr64bit = AGFI %32:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(34): %34:gr64bit = AGFI %33:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(35): %35:gr64bit = AGFI %34:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(36): %36:gr64bit = AGFI %35:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: fun2:%bb.1 +--- +name: fun2 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d, $r3d + %0:gr64bit = COPY $r2d + %1:gr64bit = COPY $r3d + + %2:gr64bit = MSGRKC %0, %1, implicit-def dead $cc + + %5:gr64bit = AGFI %0, 1, implicit-def dead $cc + %6:gr64bit = AGFI %5, 1, implicit-def dead $cc + %7:gr64bit = AGFI %6, 1, implicit-def dead $cc + %8:gr64bit = AGFI %7, 1, implicit-def dead $cc + %9:gr64bit = AGFI %8, 1, implicit-def dead $cc + %10:gr64bit = AGFI %9, 1, implicit-def dead $cc + %11:gr64bit = AGFI %10, 1, implicit-def dead $cc + %12:gr64bit = AGFI %11, 1, implicit-def dead $cc + %13:gr64bit = AGFI %12, 1, implicit-def dead $cc + %14:gr64bit = AGFI %13, 1, implicit-def dead $cc + %15:gr64bit = AGFI %14, 1, implicit-def dead $cc + %16:gr64bit = AGFI %15, 1, implicit-def dead $cc + %17:gr64bit = AGFI %16, 1, implicit-def dead $cc + %18:gr64bit = AGFI %17, 1, implicit-def dead $cc + %19:gr64bit = AGFI %18, 1, implicit-def dead $cc + %20:gr64bit = AGFI %19, 1, implicit-def dead $cc + %21:gr64bit = AGFI %20, 1, implicit-def dead $cc + %22:gr64bit = AGFI %21, 1, implicit-def dead $cc + %23:gr64bit = AGFI %22, 1, implicit-def dead $cc + %24:gr64bit = AGFI %23, 1, implicit-def dead $cc + %25:gr64bit = AGFI %24, 1, implicit-def dead $cc + %26:gr64bit = AGFI %25, 1, implicit-def dead $cc + %27:gr64bit = AGFI %26, 1, implicit-def dead $cc + %28:gr64bit = AGFI %27, 1, implicit-def dead $cc + %29:gr64bit = AGFI %28, 1, implicit-def dead $cc + %30:gr64bit = AGFI %29, 1, implicit-def dead $cc + %31:gr64bit = AGFI %30, 1, implicit-def dead $cc + %32:gr64bit = AGFI %31, 1, implicit-def dead $cc + %33:gr64bit = AGFI %32, 1, implicit-def dead $cc + %34:gr64bit = AGFI %33, 1, implicit-def dead $cc + %35:gr64bit = AGFI %34, 1, implicit-def dead $cc + %36:gr64bit = AGFI %35, 1, implicit-def dead $cc + %37:gr64bit = AGFI %36, 1, implicit-def dead $cc + %38:gr64bit = AGFI %37, 1, implicit-def dead $cc + + bb.1: + STG %0:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + STG %1:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + STG %2:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + STG %38:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + Return +... + +# Check a case (AGR) where a use is tied to the def which means the register +# is live into the instruction and there is no point in scheduling it low +# (pressure diff is empty). It still ends up low in the final schedule due to +# the latency reduction heuristic. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun3:%bb.0 +# +# CHECK: SU(3): %2:gr64bit = AGR %2:gr64bit(tied-def 0), %1:gr64bit, implicit-def dead $cc +# CHECK: Height : 0 +# CHECK: Pressure Diff : {{$}} +# CHECK: SU(36): %35:gr64bit = AGFI %34:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: Height : 1 +# CHECK: Pressure Diff : {{$}} +# CHECK: SU(37): %36:gr64bit = AGFI %35:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: Height : 0 +# CHECK: Pressure Diff : {{$}} +# +# CHECK: Queue BotQ.A: 37 3 +# CHECK: Scheduling SU(37) +# CHECK: Queue BotQ.A: 3 36 +# CHECK: Pick Bot BOT-HEIGHT [pre-RA] +# CHECK-NEXT: Scheduling SU(3) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(36): %35:gr64bit = AGFI %34:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK-NEXT: SU(3): %2:gr64bit = AGR %2:gr64bit(tied-def 0), %1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(37): %36:gr64bit = AGFI %35:gr64bit(tied-def 0), 1, implicit-def dead $cc +# CHECK: fun3:%bb.1 +--- +name: fun3 +tracksRegLiveness: true +machineFunctionInfo: {} +body: | + bb.0: + liveins: $r2d, $r3d + %0:gr64bit = COPY $r2d + %1:gr64bit = COPY $r3d + + %2:gr64bit = COPY %1 + %2:gr64bit = AGR %2, %1, implicit-def dead $cc + + %3:gr64bit = AGFI %0, 1, implicit-def dead $cc + %4:gr64bit = AGFI %3, 1, implicit-def dead $cc + %5:gr64bit = AGFI %4, 1, implicit-def dead $cc + %6:gr64bit = AGFI %5, 1, implicit-def dead $cc + %7:gr64bit = AGFI %6, 1, implicit-def dead $cc + %8:gr64bit = AGFI %7, 1, implicit-def dead $cc + %9:gr64bit = AGFI %8, 1, implicit-def dead $cc + %10:gr64bit = AGFI %9, 1, implicit-def dead $cc + %11:gr64bit = AGFI %10, 1, implicit-def dead $cc + %12:gr64bit = AGFI %11, 1, implicit-def dead $cc + %13:gr64bit = AGFI %12, 1, implicit-def dead $cc + %14:gr64bit = AGFI %13, 1, implicit-def dead $cc + %15:gr64bit = AGFI %14, 1, implicit-def dead $cc + %16:gr64bit = AGFI %15, 1, implicit-def dead $cc + %17:gr64bit = AGFI %16, 1, implicit-def dead $cc + %18:gr64bit = AGFI %17, 1, implicit-def dead $cc + %19:gr64bit = AGFI %18, 1, implicit-def dead $cc + %20:gr64bit = AGFI %19, 1, implicit-def dead $cc + %21:gr64bit = AGFI %20, 1, implicit-def dead $cc + %22:gr64bit = AGFI %21, 1, implicit-def dead $cc + %23:gr64bit = AGFI %22, 1, implicit-def dead $cc + %24:gr64bit = AGFI %23, 1, implicit-def dead $cc + %25:gr64bit = AGFI %24, 1, implicit-def dead $cc + %26:gr64bit = AGFI %25, 1, implicit-def dead $cc + %27:gr64bit = AGFI %26, 1, implicit-def dead $cc + %28:gr64bit = AGFI %27, 1, implicit-def dead $cc + %29:gr64bit = AGFI %28, 1, implicit-def dead $cc + %30:gr64bit = AGFI %29, 1, implicit-def dead $cc + %31:gr64bit = AGFI %30, 1, implicit-def dead $cc + %32:gr64bit = AGFI %31, 1, implicit-def dead $cc + %33:gr64bit = AGFI %32, 1, implicit-def dead $cc + %34:gr64bit = AGFI %33, 1, implicit-def dead $cc + %35:gr64bit = AGFI %34, 1, implicit-def dead $cc + %36:gr64bit = AGFI %35, 1, implicit-def dead $cc + + bb.1: + STG %1:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + STG %2:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + STG %36:gr64bit, undef %101:addr64bit, 0, undef %102:addr64bit + Return +... + +# Test that an instruction gets scheduled low as soon as its use operands +# become live, and not before. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun4:%bb.0 +# +# CHECK: SU(30): %M0:vr64bit = nofpexcept WFMDB %Z0:fp64bit, %Z0:fp64bit, implicit $fpc +# CHECK: Height : 6 +# CHECK: Pressure Diff : FP16Bit 1 +# CHECK: SU(31): %M1:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK: Height : 6 +# CHECK: Pressure Diff : FP16Bit 1 +# CHECK: SU(32): %M2:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK: Height : 6 +# CHECK: Pressure Diff : FP16Bit 1 +# CHECK: SU(33): %M3:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK: Height : 6 +# CHECK: SU(36): %M6:vr64bit = nofpexcept WFMDB %M5:vr64bit, %Z0:fp64bit, implicit $fpc +# CHECK: Height : 6 +# CHECK: SU(37): +# CHECK: SU(38): %M9:vr64bit = nofpexcept WFMDB %M7:vr64bit, %M7:vr64bit, implicit $fpc +# CHECK: Height : 6 +# +# CHECK: Queue BotQ.A: 27 38 36 34 33 32 31 30 +# CHECK: Pick Bot ORDER [pre-RA] +# CHECK-NEXT: Scheduling SU(38) +# CHECK: Queue BotQ.A: 27 30 36 34 33 32 31 37 +# CHECK: Pick Bot BOT-HEIGHT [pre-RA] +# CHECK-NEXT: Scheduling SU(36) +# CHECK: Queue BotQ.A: 27 30 37 34 33 32 31 35 +# CHECK: Pick Bot REG-EXCESS [pre-RA] +# CHECK-NEXT: Scheduling SU(30) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(28): %Z0:fp64bit = LZDR +# CHECK-NEXT: SU(29): %Z1:fp64bit = LZDR +# CHECK-NEXT: SU(31): %M1:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK-NEXT: SU(32): %M2:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK-NEXT: SU(33): %M3:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK-NEXT: SU(34): %M4:vr64bit = nofpexcept WFMDB %Z1:fp64bit, %Z1:fp64bit, implicit $fpc +# CHECK-NEXT: SU(30): %M0:vr64bit = nofpexcept WFMDB %Z0:fp64bit, %Z0:fp64bit, implicit $fpc +# CHECK-NEXT: SU(36): %M6:vr64bit = nofpexcept WFMDB %M5:vr64bit, %Z0:fp64bit, implicit $fpc +# CHECK-NEXT: SU(38): %M9:vr64bit = nofpexcept WFMDB %M7:vr64bit, %M7:vr64bit, implicit $fpc +# CHECK-NEXT: SU(39): VST64 %M0:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(40): VST64 %M1:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(41): VST64 %M2:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(42): VST64 %M3:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(43): VST64 %M4:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(44): VST64 %M6:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(45): VST64 %M9:vr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(46): $f0d = LZDR +--- +name: fun4 +tracksRegLiveness: true +body: | + bb.0: + + ; Make for a top of region in terms of height, enabling heuristic for lower SUs. + %D0:vr64bit = nofpexcept WFDDB undef %LHS0:fp64bit, undef %RHS0:fp64bit, implicit $fpc + %D1:vr64bit = nofpexcept WFDDB %D0:vr64bit, undef %RHS1:fp64bit, implicit $fpc + %D2:vr64bit = nofpexcept WFDDB %D1:vr64bit, undef %RHS2:fp64bit, implicit $fpc + %D3:vr64bit = nofpexcept WFDDB %D2:vr64bit, undef %RHS3:fp64bit, implicit $fpc + %D4:vr64bit = nofpexcept WFDDB %D3:vr64bit, undef %RHS4:fp64bit, implicit $fpc + %D5:vr64bit = nofpexcept WFDDB %D4:vr64bit, undef %RHS5:fp64bit, implicit $fpc + %D6:vr64bit = nofpexcept WFDDB %D5:vr64bit, undef %RHS6:fp64bit, implicit $fpc + %D7:vr64bit = nofpexcept WFDDB %D6:vr64bit, undef %RHS7:fp64bit, implicit $fpc + %D8:vr64bit = nofpexcept WFDDB %D7:vr64bit, undef %RHS8:fp64bit, implicit $fpc + %D9:vr64bit = nofpexcept WFDDB %D8:vr64bit, undef %RHS9:fp64bit, implicit $fpc + %D10:vr64bit = nofpexcept WFDDB %D9:vr64bit, undef %RHS10:fp64bit, implicit $fpc + %D11:vr64bit = nofpexcept WFDDB %D10:vr64bit, undef %RHS11:fp64bit, implicit $fpc + %D12:vr64bit = nofpexcept WFDDB %D11:vr64bit, undef %RHS12:fp64bit, implicit $fpc + %D13:vr64bit = nofpexcept WFDDB %D12:vr64bit, undef %RHS13:fp64bit, implicit $fpc + %D14:vr64bit = nofpexcept WFDDB %D13:vr64bit, undef %RHS14:fp64bit, implicit $fpc + %D15:vr64bit = nofpexcept WFDDB %D14:vr64bit, undef %RHS15:fp64bit, implicit $fpc + %D16:vr64bit = nofpexcept WFDDB %D15:vr64bit, undef %RHS16:fp64bit, implicit $fpc + %D17:vr64bit = nofpexcept WFDDB %D16:vr64bit, undef %RHS17:fp64bit, implicit $fpc + %D18:vr64bit = nofpexcept WFDDB %D17:vr64bit, undef %RHS18:fp64bit, implicit $fpc + %D19:vr64bit = nofpexcept WFDDB %D18:vr64bit, undef %RHS19:fp64bit, implicit $fpc + %D20:vr64bit = nofpexcept WFDDB %D19:vr64bit, undef %RHS20:fp64bit, implicit $fpc + %D21:vr64bit = nofpexcept WFDDB %D20:vr64bit, undef %RHS21:fp64bit, implicit $fpc + %D22:vr64bit = nofpexcept WFDDB %D21:vr64bit, undef %RHS22:fp64bit, implicit $fpc + %D23:vr64bit = nofpexcept WFDDB %D22:vr64bit, undef %RHS23:fp64bit, implicit $fpc + %D24:vr64bit = nofpexcept WFDDB %D23:vr64bit, undef %RHS24:fp64bit, implicit $fpc + %D25:vr64bit = nofpexcept WFDDB %D24:vr64bit, undef %RHS25:fp64bit, implicit $fpc + %D26:vr64bit = nofpexcept WFDDB %D25:vr64bit, undef %RHS26:fp64bit, implicit $fpc + %D27:vr64bit = nofpexcept WFDDB %D26:vr64bit, undef %RHS27:fp64bit, implicit $fpc + + %Z0:fp64bit = LZDR + %Z1:fp64bit = LZDR + %M0:vr64bit = nofpexcept WFMDB %Z0, %Z0, implicit $fpc + %M1:vr64bit = nofpexcept WFMDB %Z1, %Z1, implicit $fpc + %M2:vr64bit = nofpexcept WFMDB %Z1, %Z1, implicit $fpc + %M3:vr64bit = nofpexcept WFMDB %Z1, %Z1, implicit $fpc + %M4:vr64bit = nofpexcept WFMDB %Z1, %Z1, implicit $fpc + + %M5:vr64bit = nofpexcept WFMDB undef %F0:vr64bit, undef %F0, implicit $fpc + %M6:vr64bit = nofpexcept WFMDB %M5, %Z0, implicit $fpc + + %M7:vr64bit = nofpexcept WFMDB undef %F1:vr64bit, undef %F1, implicit $fpc + %M9:vr64bit = nofpexcept WFMDB %M7, %M7, implicit $fpc + + VST64 %M0, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M1, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M2, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M3, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M4, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M6, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + VST64 %M9, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + + $f0d = LZDR + Return implicit $f0d +... + +# Same, with GPR registers. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun5:%bb.0 +# +# CHECK: SU(30): %A0:gr64bit = AGRK %Z0:gr64bit, %Z0:gr64bit, implicit-def dead $cc +# CHECK: Height : 1 +# CHECK: SU(31): %A1:gr64bit = AGRK %Z1:gr64bit, %Z1:gr64bit, implicit-def dead $cc +# CHECK: Height : 1 +# CHECK: SU(32): +# CHECK: SU(36): %A6:gr64bit = AGRK %A5:gr64bit, %Z0:gr64bit, implicit-def dead $cc +# CHECK: Height : 1 +# CHECK: SU(37): +# CHECK: SU(38): %A9:gr64bit = AGRK %A7:gr64bit, %A7:gr64bit, implicit-def dead $cc +# CHECK: Height : 1 +# CHECK: SU(39): +# +# CHECK: Queue BotQ.A: 27 38 36 34 33 32 31 30 +# CHECK: Pick Bot ORDER [pre-RA] +# CHECK-NEXT: Scheduling SU(38) +# CHECK: Queue BotQ.A: 27 30 36 34 33 32 31 37 +# CHECK: Pick Bot BOT-HEIGHT [pre-RA] +# CHECK-NEXT: Scheduling SU(36) +# CHECK: Queue BotQ.A: 27 30 37 34 33 32 31 35 +# CHECK: Pick Bot REG-EXCESS [pre-RA] +# CHECK-NEXT: Scheduling SU(30) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(28): %Z0:gr64bit = LGHI 1 +# CHECK-NEXT: SU(29): %Z1:gr64bit = LGHI 1 +# CHECK-NEXT: SU(31): %A1:gr64bit = AGRK %Z1:gr64bit, %Z1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(32): %A2:gr64bit = AGRK %Z1:gr64bit, %Z1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(33): %A3:gr64bit = AGRK %Z1:gr64bit, %Z1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(35): %A5:gr64bit = AGRK undef %F0:gr64bit, undef %F0:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(37): %A7:gr64bit = AGRK undef %F1:gr64bit, undef %F1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(34): %A4:gr64bit = AGRK %Z1:gr64bit, %Z1:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(30): %A0:gr64bit = AGRK %Z0:gr64bit, %Z0:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(36): %A6:gr64bit = AGRK %A5:gr64bit, %Z0:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(38): %A9:gr64bit = AGRK %A7:gr64bit, %A7:gr64bit, implicit-def dead $cc +# CHECK-NEXT: SU(39): STG %A0:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(40): STG %A1:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(41): STG %A2:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(42): STG %A3:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(43): STG %A4:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(44): STG %A6:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(45): STG %A9:gr64bit, $noreg, 0, $noreg :: (store (s64) into `ptr null`) +# CHECK-NEXT: SU(46): $r2d = LGHI 0 +--- +name: fun5 +tracksRegLiveness: true +body: | + bb.0: + + ; Make for a top of region in terms of height, enabling heuristic for lower SUs. + %D0:vr64bit = nofpexcept WFDDB undef %LHS0:fp64bit, undef %RHS0:fp64bit, implicit $fpc + %D1:vr64bit = nofpexcept WFDDB %D0:vr64bit, undef %RHS1:fp64bit, implicit $fpc + %D2:vr64bit = nofpexcept WFDDB %D1:vr64bit, undef %RHS2:fp64bit, implicit $fpc + %D3:vr64bit = nofpexcept WFDDB %D2:vr64bit, undef %RHS3:fp64bit, implicit $fpc + %D4:vr64bit = nofpexcept WFDDB %D3:vr64bit, undef %RHS4:fp64bit, implicit $fpc + %D5:vr64bit = nofpexcept WFDDB %D4:vr64bit, undef %RHS5:fp64bit, implicit $fpc + %D6:vr64bit = nofpexcept WFDDB %D5:vr64bit, undef %RHS6:fp64bit, implicit $fpc + %D7:vr64bit = nofpexcept WFDDB %D6:vr64bit, undef %RHS7:fp64bit, implicit $fpc + %D8:vr64bit = nofpexcept WFDDB %D7:vr64bit, undef %RHS8:fp64bit, implicit $fpc + %D9:vr64bit = nofpexcept WFDDB %D8:vr64bit, undef %RHS9:fp64bit, implicit $fpc + %D10:vr64bit = nofpexcept WFDDB %D9:vr64bit, undef %RHS10:fp64bit, implicit $fpc + %D11:vr64bit = nofpexcept WFDDB %D10:vr64bit, undef %RHS11:fp64bit, implicit $fpc + %D12:vr64bit = nofpexcept WFDDB %D11:vr64bit, undef %RHS12:fp64bit, implicit $fpc + %D13:vr64bit = nofpexcept WFDDB %D12:vr64bit, undef %RHS13:fp64bit, implicit $fpc + %D14:vr64bit = nofpexcept WFDDB %D13:vr64bit, undef %RHS14:fp64bit, implicit $fpc + %D15:vr64bit = nofpexcept WFDDB %D14:vr64bit, undef %RHS15:fp64bit, implicit $fpc + %D16:vr64bit = nofpexcept WFDDB %D15:vr64bit, undef %RHS16:fp64bit, implicit $fpc + %D17:vr64bit = nofpexcept WFDDB %D16:vr64bit, undef %RHS17:fp64bit, implicit $fpc + %D18:vr64bit = nofpexcept WFDDB %D17:vr64bit, undef %RHS18:fp64bit, implicit $fpc + %D19:vr64bit = nofpexcept WFDDB %D18:vr64bit, undef %RHS19:fp64bit, implicit $fpc + %D20:vr64bit = nofpexcept WFDDB %D19:vr64bit, undef %RHS20:fp64bit, implicit $fpc + %D21:vr64bit = nofpexcept WFDDB %D20:vr64bit, undef %RHS21:fp64bit, implicit $fpc + %D22:vr64bit = nofpexcept WFDDB %D21:vr64bit, undef %RHS22:fp64bit, implicit $fpc + %D23:vr64bit = nofpexcept WFDDB %D22:vr64bit, undef %RHS23:fp64bit, implicit $fpc + %D24:vr64bit = nofpexcept WFDDB %D23:vr64bit, undef %RHS24:fp64bit, implicit $fpc + %D25:vr64bit = nofpexcept WFDDB %D24:vr64bit, undef %RHS25:fp64bit, implicit $fpc + %D26:vr64bit = nofpexcept WFDDB %D25:vr64bit, undef %RHS26:fp64bit, implicit $fpc + %D27:vr64bit = nofpexcept WFDDB %D26:vr64bit, undef %RHS27:fp64bit, implicit $fpc + + %Z0:gr64bit = LGHI 1 + %Z1:gr64bit = LGHI 1 + %A0:gr64bit = AGRK %Z0, %Z0, implicit-def dead $cc + %A1:gr64bit = AGRK %Z1, %Z1, implicit-def dead $cc + %A2:gr64bit = AGRK %Z1, %Z1, implicit-def dead $cc + %A3:gr64bit = AGRK %Z1, %Z1, implicit-def dead $cc + %A4:gr64bit = AGRK %Z1, %Z1, implicit-def dead $cc + + %A5:gr64bit = AGRK undef %F0:gr64bit, undef %F0, implicit-def dead $cc + %A6:gr64bit = AGRK %A5, %Z0, implicit-def dead $cc + + %A7:gr64bit = AGRK undef %F1:gr64bit, undef %F1, implicit-def dead $cc + %A9:gr64bit = AGRK %A7, %A7, implicit-def dead $cc + + STG %A0, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A1, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A2, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A3, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A4, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A6, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + STG %A9, $noreg, 0, $noreg :: (store (s64) into `ptr null`) + + $r2d = LGHI 0 + Return implicit $r2d +... + +# Test the priorization of vector (/fp) registers over GPRs. The VL64 is +# scheduled low to close the vr64 live range, even though the address +# register %1 becomes live. +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun6:%bb.0 +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun6:%bb.0 +# +# CHECK: SU(35): %L0:vr64bit = VL64 %1:addr64bit, 0, $noreg :: (load (s64) from %ir.Arg) +# CHECK: Height : 3 +# CHECK: Pressure Diff : VR16Bit -1 GRX32Bit 2 +# CHECK: SU(36): %L1:addr64bit = LG %1:addr64bit, 0, $noreg :: (load (s64) from `ptr null`) +# CHECK: Height : 3 +# CHECK: Pressure Diff : {{$}} +# CHECK: SU(37): %L2:addr64bit = LG %0:addr64bit, 0, $noreg :: (load (s64) from %ir.Arg) +# CHECK: Height : 3 +# CHECK: Pressure Diff : {{$}} +# +# CHECK: Queue BotQ.A: 37 36 35 34 +# CHECK: Cand SU(37) FIRST +# CHECK: Scheduling SU(37) +# CHECK: Queue BotQ.A: 34 36 35 0 +# CHECK: Cand SU(35) REG-EXCESS +# CHECK: Scheduling SU(35) +# +# CHECK: *** Final schedule for %bb.0 *** +# CHECK: SU(36): %L1:addr64bit = LG %1:addr64bit, 0, $noreg :: (load (s64) from `ptr null`) +# CHECK-NEXT: SU(35): %L0:vr64bit = VL64 %1:addr64bit, 0, $noreg :: (load (s64) from %ir.Arg) +# CHECK-NEXT: SU(37): %L2:addr64bit = LG %0:addr64bit, 0, $noreg :: (load (s64) from %ir.Arg) +--- +name: fun6 +tracksRegLiveness: true +body: | + bb.0: + liveins: $r2d, $r3d + + %0:addr64bit = COPY $r3d + %1:addr64bit = COPY $r2d + + ; Make for a top of region in terms of height, enabling heuristic for lower SUs. + %D0:vr64bit = nofpexcept WFDDB undef %LHS0:fp64bit, undef %RHS0:fp64bit, implicit $fpc + %D1:vr64bit = nofpexcept WFDDB %D0:vr64bit, undef %RHS1:fp64bit, implicit $fpc + %D2:vr64bit = nofpexcept WFDDB %D1:vr64bit, undef %RHS2:fp64bit, implicit $fpc + %D3:vr64bit = nofpexcept WFDDB %D2:vr64bit, undef %RHS3:fp64bit, implicit $fpc + %D4:vr64bit = nofpexcept WFDDB %D3:vr64bit, undef %RHS4:fp64bit, implicit $fpc + %D5:vr64bit = nofpexcept WFDDB %D4:vr64bit, undef %RHS5:fp64bit, implicit $fpc + %D6:vr64bit = nofpexcept WFDDB %D5:vr64bit, undef %RHS6:fp64bit, implicit $fpc + %D7:vr64bit = nofpexcept WFDDB %D6:vr64bit, undef %RHS7:fp64bit, implicit $fpc + %D8:vr64bit = nofpexcept WFDDB %D7:vr64bit, undef %RHS8:fp64bit, implicit $fpc + %D9:vr64bit = nofpexcept WFDDB %D8:vr64bit, undef %RHS9:fp64bit, implicit $fpc + %D10:vr64bit = nofpexcept WFDDB %D9:vr64bit, undef %RHS10:fp64bit, implicit $fpc + %D11:vr64bit = nofpexcept WFDDB %D10:vr64bit, undef %RHS11:fp64bit, implicit $fpc + %D12:vr64bit = nofpexcept WFDDB %D11:vr64bit, undef %RHS12:fp64bit, implicit $fpc + %D13:vr64bit = nofpexcept WFDDB %D12:vr64bit, undef %RHS13:fp64bit, implicit $fpc + %D14:vr64bit = nofpexcept WFDDB %D13:vr64bit, undef %RHS14:fp64bit, implicit $fpc + %D15:vr64bit = nofpexcept WFDDB %D14:vr64bit, undef %RHS15:fp64bit, implicit $fpc + %D16:vr64bit = nofpexcept WFDDB %D15:vr64bit, undef %RHS16:fp64bit, implicit $fpc + %D17:vr64bit = nofpexcept WFDDB %D16:vr64bit, undef %RHS17:fp64bit, implicit $fpc + %D18:vr64bit = nofpexcept WFDDB %D17:vr64bit, undef %RHS18:fp64bit, implicit $fpc + %D19:vr64bit = nofpexcept WFDDB %D18:vr64bit, undef %RHS19:fp64bit, implicit $fpc + %D20:vr64bit = nofpexcept WFDDB %D19:vr64bit, undef %RHS20:fp64bit, implicit $fpc + %D21:vr64bit = nofpexcept WFDDB %D20:vr64bit, undef %RHS21:fp64bit, implicit $fpc + %D22:vr64bit = nofpexcept WFDDB %D21:vr64bit, undef %RHS22:fp64bit, implicit $fpc + %D23:vr64bit = nofpexcept WFDDB %D22:vr64bit, undef %RHS23:fp64bit, implicit $fpc + %D24:vr64bit = nofpexcept WFDDB %D23:vr64bit, undef %RHS24:fp64bit, implicit $fpc + %D25:vr64bit = nofpexcept WFDDB %D24:vr64bit, undef %RHS25:fp64bit, implicit $fpc + %D26:vr64bit = nofpexcept WFDDB %D25:vr64bit, undef %RHS26:fp64bit, implicit $fpc + %D27:vr64bit = nofpexcept WFDDB %D26:vr64bit, undef %RHS27:fp64bit, implicit $fpc + %D28:vr64bit = nofpexcept WFDDB %D27:vr64bit, undef %RHS28:fp64bit, implicit $fpc + %D29:vr64bit = nofpexcept WFDDB %D28:vr64bit, undef %RHS29:fp64bit, implicit $fpc + %D30:vr64bit = nofpexcept WFDDB %D29:vr64bit, undef %RHS3:fp64bit, implicit $fpc + %D31:vr64bit = nofpexcept WFDDB %D30:vr64bit, undef %RHS31:fp64bit, implicit $fpc + %D32:vr64bit = nofpexcept WFDDB %D31:vr64bit, undef %RHS32:fp64bit, implicit $fpc + + %L0:vr64bit = VL64 %1, 0, $noreg :: (load (s64) from %ir.Arg) + %L1:addr64bit = LG %1, 0, $noreg :: (load (s64) from `ptr null`) + %L2:addr64bit = LG %0, 0, $noreg :: (load (s64) from %ir.Arg) + CallBASR implicit-def dead $r14d, implicit-def dead $cc, implicit $fpc + STG %L1, $noreg, 0, $noreg + VST64 %L0, $noreg, 0, $noreg + STG %L2, $noreg, 0, $noreg + Return +... diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir b/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir new file mode 100644 index 0000000000000..048c7fa203e07 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir @@ -0,0 +1,151 @@ +# RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler 2>&1\ +# RUN: | FileCheck %s + +# Some tests for Pressure Diffs of scheduling units. Each interesting register +# class is used in a def-use sequence and the initial Pressure Diff of each SU +# is checked. For all GPRs the GRX32Bit PressureSet should be present, and for +# all FP/Vector regs the VR16Bit PressureSet should be affected. + +--- | + + define void @fun0() { ret void } +... + + +# GR64Bit => GRX32Bit 2 +# +# CHECK: ********** MI Scheduling ********** +# CHECK-NEXT: fun0:%bb.0 +# CHECK: SU(0): %0:gr64bit = LGHI 0 +# CHECK: Pressure Diff : GRX32Bit -2 +# CHECK: SU(1): STG %0:gr64bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit 2 +# CHECK: SU(2): %1:grx32bit = LHIMux 0 +# CHECK: Pressure Diff : GRX32Bit -1 +# CHECK: SU(3): STMux %1:grx32bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit 1 +# CHECK: SU(4): %2:gr32bit = LHI 0 +# CHECK: Pressure Diff : GR32Bit -1 GRX32Bit -1 +# CHECK: SU(5): ST %2:gr32bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GR32Bit 1 GRX32Bit 1 +# CHECK: SU(6): %3:grh32bit = IIHF 0 +# CHECK: Pressure Diff : GRH32Bit -1 GRX32Bit -1 +# CHECK: SU(7): STFH %3:grh32bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRH32Bit 1 GRX32Bit 1 +# CHECK: SU(8): %4:addr64bit = LGHI 0 +# CHECK: Pressure Diff : GRX32Bit -2 +# CHECK: SU(9): STG %4:addr64bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit 2 +# CHECK: SU(10): %5:addr32bit = LHI 0 +# CHECK: Pressure Diff : GR32Bit -1 GRX32Bit -1 +# CHECK: SU(11): ST %5:addr32bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GR32Bit 1 GRX32Bit 1 +# CHECK: SU(12): %6:gr128bit = L128 $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit -4 +# CHECK: SU(13): ST128 %6:gr128bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit 4 +# CHECK: SU(14): %7:addr128bit = L128 $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit -4 +# CHECK: SU(15): ST128 %7:addr128bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : GRX32Bit 4 +# CHECK: SU(16): %8:vr16bit = LEFR_16 undef %9:gr32bit +# CHECK: Pressure Diff : VR16Bit -1 +# CHECK: SU(17): dead %9:gr32bit = LFER_16 %8:vr16bit +# CHECK: Pressure Diff : VR16Bit 1 +# CHECK: SU(18): %10:vr32bit = LEFR undef %11:gr32bit +# CHECK: Pressure Diff : VR16Bit -1 +# CHECK: SU(19): dead %12:gr64bit = LFER %10:vr32bit +# CHECK: Pressure Diff : VR16Bit 1 +# CHECK: SU(20): %13:vr64bit = SelectVR64 undef %14:vr64bit, undef %15:vr64bit +# CHECK: Pressure Diff : VR16Bit -1 +# CHECK: SU(21): VST64 %13:vr64bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : VR16Bit 1 +# CHECK: SU(22): %16:vr128bit = VZERO +# CHECK: Pressure Diff : VR16Bit -1 +# CHECK: SU(23): VST %16:vr128bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : VR16Bit 1 +# CHECK: SU(24): %17:fp16bit = LZER_16 +# CHECK: Pressure Diff : FP16Bit -1 VR16Bit -1 +# CHECK: SU(25): STE16 %17:fp16bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit 1 VR16Bit 1 +# CHECK: SU(26): %18:fp32bit = LZER +# CHECK: Pressure Diff : FP16Bit -1 VR16Bit -1 +# CHECK: SU(27): STE %18:fp32bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit 1 VR16Bit 1 +# CHECK: SU(28): %19:fp64bit = LZDR +# CHECK: Pressure Diff : FP16Bit -1 VR16Bit -1 +# CHECK: SU(29): STD %19:fp64bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit 1 VR16Bit 1 +# CHECK: SU(30): %20:vf128bit = VL $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit -1 VR16Bit -1 +# CHECK: SU(31): VST %20:vf128bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit 1 VR16Bit 1 +# CHECK: SU(32): %21:fp128bit = LZXR +# CHECK: Pressure Diff : FP16Bit -2 VR16Bit -2 +# CHECK: SU(33): STX %21:fp128bit, $noreg, 0, $noreg +# CHECK: Pressure Diff : FP16Bit 2 VR16Bit 2 +--- +name: fun0 +tracksRegLiveness: true +body: | + bb.0: + + %0:gr64bit = LGHI 0 + STG %0, $noreg, 0, $noreg + + %1:grx32bit = LHIMux 0 + STMux %1, $noreg, 0, $noreg + + %2:gr32bit = LHI 0 + ST %2, $noreg, 0, $noreg + + %3:grh32bit = IIHF 0 + STFH %3, $noreg, 0, $noreg + + %4:addr64bit = LGHI 0 + STG %4, $noreg, 0, $noreg + + %5:addr32bit = LHI 0 + ST %5, $noreg, 0, $noreg + + %6:gr128bit = L128 $noreg, 0, $noreg + ST128 %6, $noreg, 0, $noreg + + %7:addr128bit = L128 $noreg, 0, $noreg + ST128 %7, $noreg, 0, $noreg + + %8:vr16bit = LEFR_16 undef %9:gr32bit + %9:gr32bit = LFER_16 %8 + + %11:vr32bit = LEFR undef %10:gr32bit + %12:gr64bit = LFER %11 + + %13:vr64bit = SelectVR64 undef %14:vr64bit, undef %15:vr64bit, 0, 0, implicit undef $cc + VST64 %13, $noreg, 0, $noreg + + %16:vr128bit = VZERO + VST %16, $noreg, 0, $noreg + + %17:fp16bit = LZER_16 + STE16 %17, $noreg, 0, $noreg + + %18:fp32bit = LZER + STE %18, $noreg, 0, $noreg + + %19:fp64bit = LZDR + STD %19, $noreg, 0, $noreg + + %20:vf128bit = VL $noreg, 0, $noreg + VST %20, $noreg, 0, $noreg + + %21:fp128bit = LZXR + STX %21, $noreg, 0, $noreg + + NOP $noreg, 0, $noreg ; enable reg pressure tracking (>36 instrs). + NOP $noreg, 0, $noreg + NOP $noreg, 0, $noreg + NOP $noreg, 0, $noreg + + Return +... diff --git a/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir index c805d31aa11c6..7439b246b65b2 100644 --- a/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir +++ b/llvm/test/CodeGen/SystemZ/regcoal-subranges-update.mir @@ -25,11 +25,11 @@ body: | ; CHECK-LABEL: name: main ; CHECK: [[LGHI:%[0-9]+]]:gr64bit = LGHI 43 + ; CHECK-NEXT: undef [[LGHI:%[0-9]+]].subreg_l32:gr64bit = MSR [[LGHI]].subreg_l32, [[LGHI]].subreg_l32 + ; CHECK-NEXT: [[LGHI:%[0-9]+]].subreg_l32:gr64bit = AHIMux [[LGHI]].subreg_l32, 9, implicit-def dead $cc ; CHECK-NEXT: [[LGHI1:%[0-9]+]]:gr64bit = LGHI 43 - ; CHECK-NEXT: undef [[LGHI1:%[0-9]+]].subreg_l32:gr64bit = MSR [[LGHI1]].subreg_l32, [[LGHI1]].subreg_l32 - ; CHECK-NEXT: [[LGHI1:%[0-9]+]].subreg_l32:gr64bit = AHIMux [[LGHI1]].subreg_l32, 9, implicit-def dead $cc - ; CHECK-NEXT: undef [[LGFI:%[0-9]+]].subreg_l64:gr128bit = LGFI -245143785, implicit [[LGHI1]].subreg_l32 - ; CHECK-NEXT: [[LGFI:%[0-9]+]]:gr128bit = DLGR [[LGFI]], [[LGHI]] + ; CHECK-NEXT: undef [[LGFI:%[0-9]+]].subreg_l64:gr128bit = LGFI -245143785, implicit [[LGHI]].subreg_l32 + ; CHECK-NEXT: [[LGFI:%[0-9]+]]:gr128bit = DLGR [[LGFI]], [[LGHI1]] ; CHECK-NEXT: Return implicit [[LGFI]] %0:gr64bit = LGHI 43 %1:gr32bit = COPY %0.subreg_l32 diff --git a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll index ff9b6a34c1d53..f8f1542096b15 100644 --- a/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll +++ b/llvm/test/CodeGen/SystemZ/regcoal_remat_empty_subrange.ll @@ -14,10 +14,10 @@ define void @main(i16 %in) { ; CHECK-LABEL: main: ; CHECK: # %bb.0: -; CHECK-NEXT: lhr %r2, %r2 +; CHECK-NEXT: lhr %r0, %r2 ; CHECK-NEXT: larl %r1, g_151 ; CHECK-NEXT: lghi %r3, 0 -; CHECK-NEXT: chi %r2, 0 +; CHECK-NEXT: chi %r0, 0 ; CHECK-NEXT: lhi %r0, 1 ; CHECK-NEXT: locghile %r3, 1 ; CHECK-NEXT: o %r0, 0(%r1) diff --git a/llvm/test/CodeGen/SystemZ/risbg-04.ll b/llvm/test/CodeGen/SystemZ/risbg-04.ll index 5528c742ccc5a..c9f03b451d24a 100644 --- a/llvm/test/CodeGen/SystemZ/risbg-04.ll +++ b/llvm/test/CodeGen/SystemZ/risbg-04.ll @@ -252,14 +252,16 @@ define i64 @f20(i64 %foo) { } ; Now try an arithmetic right shift in which the sign bits aren't needed. -; Introduce a second use of %shr so that the ashr doesn't decompose to -; an lshr. +; Introduce a second use of %shr so that the ashr doesn't decompose to an +; lshr. TODO: Maybe better if pre-ra scheduler put the risblg below the the +; other use of %r2. define i32 @f21(i32 %foo, ptr %dest) { ; CHECK-LABEL: f21: ; CHECK: # %bb.0: -; CHECK-NEXT: srak %r0, %r2, 28 -; CHECK-NEXT: risblg %r2, %r2, 28, 158, 36 -; CHECK-NEXT: st %r0, 0(%r3) +; CHECK-NEXT: risblg %r0, %r2, 28, 158, 36 +; CHECK-NEXT: sra %r2, 28 +; CHECK-NEXT: st %r2, 0(%r3) +; CHECK-NEXT: lr %r2, %r0 ; CHECK-NEXT: br %r14 %shr = ashr i32 %foo, 28 store i32 %shr, ptr %dest @@ -271,9 +273,10 @@ define i32 @f21(i32 %foo, ptr %dest) { define i64 @f22(i64 %foo, ptr %dest) { ; CHECK-LABEL: f22: ; CHECK: # %bb.0: -; CHECK-NEXT: srag %r0, %r2, 60 -; CHECK-NEXT: risbg %r2, %r2, 60, 190, 4 -; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: risbg %r0, %r2, 60, 190, 4 +; CHECK-NEXT: srag %r1, %r2, 60 +; CHECK-NEXT: lgr %r2, %r0 +; CHECK-NEXT: stg %r1, 0(%r3) ; CHECK-NEXT: br %r14 %shr = ashr i64 %foo, 60 store i64 %shr, ptr %dest @@ -484,9 +487,10 @@ define i64 @f38(i64 %foo) { define i64 @f39(i64 %foo, ptr %dest) { ; CHECK-LABEL: f39: ; CHECK: # %bb.0: -; CHECK-NEXT: srag %r0, %r2, 35 -; CHECK-NEXT: risbg %r2, %r2, 33, 189, 31 -; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: risbg %r0, %r2, 33, 189, 31 +; CHECK-NEXT: srag %r1, %r2, 35 +; CHECK-NEXT: lgr %r2, %r0 +; CHECK-NEXT: stg %r1, 0(%r3) ; CHECK-NEXT: br %r14 %ashr = ashr i64 %foo, 35 store i64 %ashr, ptr %dest diff --git a/llvm/test/CodeGen/SystemZ/rot-03.ll b/llvm/test/CodeGen/SystemZ/rot-03.ll index 8f42439dabdf8..403ea976ebcea 100644 --- a/llvm/test/CodeGen/SystemZ/rot-03.ll +++ b/llvm/test/CodeGen/SystemZ/rot-03.ll @@ -8,10 +8,10 @@ define i128 @f1(i128 %val) { ; CHECK: # %bb.0: ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vrepib %v1, 100 -; CHECK-NEXT: vsrlb %v2, %v0, %v1 -; CHECK-NEXT: vsrl %v1, %v2, %v1 ; CHECK-NEXT: vrepib %v2, 28 +; CHECK-NEXT: vsrlb %v3, %v0, %v1 ; CHECK-NEXT: vslb %v0, %v0, %v2 +; CHECK-NEXT: vsrl %v1, %v3, %v1 ; CHECK-NEXT: vsl %v0, %v0, %v2 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vst %v0, 0(%r2), 3 @@ -47,19 +47,19 @@ define i128 @f3(i128 %val, i128 %amt) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 ; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v2, %v0, %v1 -; CHECK-NEXT: vsl %v1, %v2, %v1 -; CHECK-NEXT: vrepib %v2, 1 +; CHECK-NEXT: vlvgp %v1, %r0, %r0 ; CHECK-NEXT: xilf %r0, 4294967295 -; CHECK-NEXT: vsrl %v0, %v0, %v2 ; CHECK-NEXT: vlvgp %v2, %r0, %r0 +; CHECK-NEXT: vrepib %v3, 1 +; CHECK-NEXT: vrepb %v1, %v1, 15 +; CHECK-NEXT: vsrl %v3, %v0, %v3 ; CHECK-NEXT: vrepb %v2, %v2, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v2 -; CHECK-NEXT: vsrl %v0, %v0, %v2 -; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vslb %v0, %v0, %v1 +; CHECK-NEXT: vsrlb %v3, %v3, %v2 +; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vsrl %v1, %v3, %v2 +; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/shift-12.ll b/llvm/test/CodeGen/SystemZ/shift-12.ll index 419caeebbbf9d..f3858b28cd060 100644 --- a/llvm/test/CodeGen/SystemZ/shift-12.ll +++ b/llvm/test/CodeGen/SystemZ/shift-12.ll @@ -122,11 +122,11 @@ define i32 @f10(i32 %a, i32 %sh) { define i128 @f11(i128 %a, i32 %sh) { ; CHECK-LABEL: f11: ; CHECK: # %bb.0: -; CHECK-NEXT: vlvgp %v1, %r4, %r4 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 @@ -138,11 +138,11 @@ define i128 @f11(i128 %a, i32 %sh) { define i128 @f12(i128 %a, i32 %sh) { ; CHECK-LABEL: f12: ; CHECK: # %bb.0: -; CHECK-NEXT: vlvgp %v1, %r4, %r4 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 @@ -154,11 +154,11 @@ define i128 @f12(i128 %a, i32 %sh) { define i128 @f13(i128 %a, i32 %sh) { ; CHECK-LABEL: f13: ; CHECK: # %bb.0: -; CHECK-NEXT: vlvgp %v1, %r4, %r4 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r4, %r4 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 diff --git a/llvm/test/CodeGen/SystemZ/shift-13.ll b/llvm/test/CodeGen/SystemZ/shift-13.ll index 2e4899516d107..0ba08b34c48c8 100644 --- a/llvm/test/CodeGen/SystemZ/shift-13.ll +++ b/llvm/test/CodeGen/SystemZ/shift-13.ll @@ -48,11 +48,11 @@ define i128 @f4(i128 %a, i128 %sh) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %res = shl i128 %a, %sh @@ -64,11 +64,11 @@ define i128 @f5(i128 %a, i128 %sh) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 127 @@ -81,11 +81,11 @@ define i128 @f6(i128 %a, i128 %sh) { ; CHECK-LABEL: f6: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 511 @@ -99,11 +99,11 @@ define i128 @f7(i128 %a, i128 %sh) { ; CHECK: # %bb.0: ; CHECK-NEXT: lhi %r0, 63 ; CHECK-NEXT: n %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 63 @@ -115,15 +115,15 @@ define i128 @f7(i128 %a, i128 %sh) { define i128 @f8(i128 %a, i128 %b, i128 %sh) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vl %v2, 0(%r5), 3 -; CHECK-NEXT: vn %v1, %v2, %v1 -; CHECK-NEXT: vlgvf %r0, %v1, 3 -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r5), 3 +; CHECK-NEXT: vn %v0, %v1, %v0 +; CHECK-NEXT: vlgvf %r0, %v0, 3 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vslb %v1, %v1, %v0 +; CHECK-NEXT: vsl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, %b diff --git a/llvm/test/CodeGen/SystemZ/shift-14.ll b/llvm/test/CodeGen/SystemZ/shift-14.ll index 8f29d983c2344..321bbf3e6d478 100644 --- a/llvm/test/CodeGen/SystemZ/shift-14.ll +++ b/llvm/test/CodeGen/SystemZ/shift-14.ll @@ -48,11 +48,11 @@ define i128 @f4(i128 %a, i128 %sh) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %res = lshr i128 %a, %sh @@ -64,11 +64,11 @@ define i128 @f5(i128 %a, i128 %sh) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 127 @@ -81,11 +81,11 @@ define i128 @f6(i128 %a, i128 %sh) { ; CHECK-LABEL: f6: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 511 @@ -99,11 +99,11 @@ define i128 @f7(i128 %a, i128 %sh) { ; CHECK: # %bb.0: ; CHECK-NEXT: lhi %r0, 63 ; CHECK-NEXT: n %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 63 @@ -115,15 +115,15 @@ define i128 @f7(i128 %a, i128 %sh) { define i128 @f8(i128 %a, i128 %b, i128 %sh) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vl %v2, 0(%r5), 3 -; CHECK-NEXT: vn %v1, %v2, %v1 -; CHECK-NEXT: vlgvf %r0, %v1, 3 -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrlb %v0, %v0, %v1 -; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r5), 3 +; CHECK-NEXT: vn %v0, %v1, %v0 +; CHECK-NEXT: vlgvf %r0, %v0, 3 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrlb %v1, %v1, %v0 +; CHECK-NEXT: vsrl %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, %b diff --git a/llvm/test/CodeGen/SystemZ/shift-15.ll b/llvm/test/CodeGen/SystemZ/shift-15.ll index 01d1ae64682ec..6c32a39471efc 100644 --- a/llvm/test/CodeGen/SystemZ/shift-15.ll +++ b/llvm/test/CodeGen/SystemZ/shift-15.ll @@ -48,11 +48,11 @@ define i128 @f4(i128 %a, i128 %sh) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %res = ashr i128 %a, %sh @@ -64,11 +64,11 @@ define i128 @f5(i128 %a, i128 %sh) { ; CHECK-LABEL: f5: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 127 @@ -81,11 +81,11 @@ define i128 @f6(i128 %a, i128 %sh) { ; CHECK-LABEL: f6: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 511 @@ -99,11 +99,11 @@ define i128 @f7(i128 %a, i128 %sh) { ; CHECK: # %bb.0: ; CHECK-NEXT: lhi %r0, 63 ; CHECK-NEXT: n %r0, 12(%r4) -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, 63 @@ -115,15 +115,15 @@ define i128 @f7(i128 %a, i128 %sh) { define i128 @f8(i128 %a, i128 %b, i128 %sh) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vl %v2, 0(%r5), 3 -; CHECK-NEXT: vn %v1, %v2, %v1 -; CHECK-NEXT: vlgvf %r0, %v1, 3 -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepb %v1, %v1, 15 -; CHECK-NEXT: vsrab %v0, %v0, %v1 -; CHECK-NEXT: vsra %v0, %v0, %v1 +; CHECK-NEXT: vl %v0, 0(%r4), 3 +; CHECK-NEXT: vl %v1, 0(%r5), 3 +; CHECK-NEXT: vn %v0, %v1, %v0 +; CHECK-NEXT: vlgvf %r0, %v0, 3 +; CHECK-NEXT: vlvgp %v0, %r0, %r0 +; CHECK-NEXT: vl %v1, 0(%r3), 3 +; CHECK-NEXT: vrepb %v0, %v0, 15 +; CHECK-NEXT: vsrab %v1, %v1, %v0 +; CHECK-NEXT: vsra %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 %and = and i128 %sh, %b diff --git a/llvm/test/CodeGen/SystemZ/shift-16.ll b/llvm/test/CodeGen/SystemZ/shift-16.ll index d81c3546998be..68cd908c80a42 100644 --- a/llvm/test/CodeGen/SystemZ/shift-16.ll +++ b/llvm/test/CodeGen/SystemZ/shift-16.ll @@ -7,26 +7,26 @@ define i256 @f1(i256 %a, i256 %sh) { ; CHECK-LABEL: f1: ; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 28(%r4) ; CHECK-NEXT: vl %v1, 16(%r3), 3 ; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: l %r0, 28(%r4) ; CHECK-NEXT: clijhe %r0, 128, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lr %r1, %r0 ; CHECK-NEXT: xilf %r1, 4294967295 ; CHECK-NEXT: vlvgp %v2, %r0, %r0 -; CHECK-NEXT: vlvgp %v5, %r1, %r1 +; CHECK-NEXT: vlvgp %v3, %r1, %r1 ; CHECK-NEXT: vrepib %v4, 1 -; CHECK-NEXT: vrepb %v3, %v2, 15 -; CHECK-NEXT: vsrl %v4, %v1, %v4 -; CHECK-NEXT: vrepb %v5, %v5, 15 -; CHECK-NEXT: vslb %v2, %v0, %v3 -; CHECK-NEXT: vsrlb %v4, %v4, %v5 -; CHECK-NEXT: vslb %v1, %v1, %v3 -; CHECK-NEXT: vsl %v2, %v2, %v3 -; CHECK-NEXT: vsrl %v4, %v4, %v5 -; CHECK-NEXT: vo %v2, %v2, %v4 -; CHECK-NEXT: vsl %v1, %v1, %v3 +; CHECK-NEXT: vrepb %v5, %v2, 15 +; CHECK-NEXT: vsrl %v2, %v1, %v4 +; CHECK-NEXT: vrepb %v3, %v3, 15 +; CHECK-NEXT: vslb %v4, %v0, %v5 +; CHECK-NEXT: vsrlb %v2, %v2, %v3 +; CHECK-NEXT: vslb %v1, %v1, %v5 +; CHECK-NEXT: vsl %v4, %v4, %v5 +; CHECK-NEXT: vsrl %v2, %v2, %v3 +; CHECK-NEXT: vo %v2, %v4, %v2 +; CHECK-NEXT: vsl %v1, %v1, %v5 ; CHECK-NEXT: cijlh %r0, 0, .LBB0_3 ; CHECK-NEXT: j .LBB0_4 ; CHECK-NEXT: .LBB0_2: @@ -51,26 +51,26 @@ define i256 @f1(i256 %a, i256 %sh) { define i256 @f2(i256 %a, i256 %sh) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: +; CHECK-NEXT: l %r0, 28(%r4) ; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vl %v0, 16(%r3), 3 -; CHECK-NEXT: l %r0, 28(%r4) ; CHECK-NEXT: clijhe %r0, 128, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lr %r1, %r0 ; CHECK-NEXT: xilf %r1, 4294967295 ; CHECK-NEXT: vlvgp %v2, %r0, %r0 -; CHECK-NEXT: vlvgp %v5, %r1, %r1 +; CHECK-NEXT: vlvgp %v3, %r1, %r1 ; CHECK-NEXT: vrepib %v4, 1 -; CHECK-NEXT: vrepb %v3, %v2, 15 -; CHECK-NEXT: vsl %v4, %v1, %v4 -; CHECK-NEXT: vrepb %v5, %v5, 15 -; CHECK-NEXT: vsrlb %v2, %v0, %v3 -; CHECK-NEXT: vslb %v4, %v4, %v5 -; CHECK-NEXT: vsrlb %v1, %v1, %v3 -; CHECK-NEXT: vsrl %v2, %v2, %v3 -; CHECK-NEXT: vsl %v4, %v4, %v5 -; CHECK-NEXT: vo %v2, %v4, %v2 -; CHECK-NEXT: vsrl %v1, %v1, %v3 +; CHECK-NEXT: vrepb %v5, %v2, 15 +; CHECK-NEXT: vsl %v2, %v1, %v4 +; CHECK-NEXT: vrepb %v3, %v3, 15 +; CHECK-NEXT: vsrlb %v4, %v0, %v5 +; CHECK-NEXT: vslb %v2, %v2, %v3 +; CHECK-NEXT: vsrlb %v1, %v1, %v5 +; CHECK-NEXT: vsrl %v4, %v4, %v5 +; CHECK-NEXT: vsl %v2, %v2, %v3 +; CHECK-NEXT: vo %v2, %v2, %v4 +; CHECK-NEXT: vsrl %v1, %v1, %v5 ; CHECK-NEXT: cijlh %r0, 0, .LBB1_3 ; CHECK-NEXT: j .LBB1_4 ; CHECK-NEXT: .LBB1_2: @@ -95,37 +95,37 @@ define i256 @f2(i256 %a, i256 %sh) { define i256 @f3(i256 %a, i256 %sh) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v0, 16(%r3), 3 ; CHECK-NEXT: l %r0, 28(%r4) -; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vl %v0, 16(%r3), 3 +; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: clijhe %r0, 128, .LBB2_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vrepb %v3, %v1, 15 -; CHECK-NEXT: vsrab %v1, %v2, %v3 -; CHECK-NEXT: vsrlb %v4, %v0, %v3 -; CHECK-NEXT: vsra %v1, %v1, %v3 ; CHECK-NEXT: lr %r1, %r0 -; CHECK-NEXT: vsrl %v3, %v4, %v3 -; CHECK-NEXT: vrepib %v4, 1 ; CHECK-NEXT: xilf %r1, 4294967295 -; CHECK-NEXT: vsl %v2, %v2, %v4 -; CHECK-NEXT: vlvgp %v4, %r1, %r1 -; CHECK-NEXT: vrepb %v4, %v4, 15 -; CHECK-NEXT: vslb %v2, %v2, %v4 -; CHECK-NEXT: vsl %v2, %v2, %v4 -; CHECK-NEXT: vo %v2, %v2, %v3 +; CHECK-NEXT: vlvgp %v2, %r0, %r0 +; CHECK-NEXT: vlvgp %v3, %r1, %r1 +; CHECK-NEXT: vrepib %v4, 1 +; CHECK-NEXT: vrepb %v2, %v2, 15 +; CHECK-NEXT: vsl %v4, %v1, %v4 +; CHECK-NEXT: vrepb %v3, %v3, 15 +; CHECK-NEXT: vsrlb %v5, %v0, %v2 +; CHECK-NEXT: vslb %v4, %v4, %v3 +; CHECK-NEXT: vsrab %v1, %v1, %v2 +; CHECK-NEXT: vsrl %v5, %v5, %v2 +; CHECK-NEXT: vsl %v3, %v4, %v3 +; CHECK-NEXT: vsra %v1, %v1, %v2 +; CHECK-NEXT: vo %v2, %v3, %v5 ; CHECK-NEXT: cijlh %r0, 0, .LBB2_3 ; CHECK-NEXT: j .LBB2_4 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: vrepib %v1, 127 -; CHECK-NEXT: vsrab %v3, %v2, %v1 ; CHECK-NEXT: ahik %r1, %r0, -128 -; CHECK-NEXT: vsra %v1, %v3, %v1 -; CHECK-NEXT: vlvgp %v3, %r1, %r1 -; CHECK-NEXT: vrepb %v3, %v3, 15 -; CHECK-NEXT: vsrab %v2, %v2, %v3 -; CHECK-NEXT: vsra %v2, %v2, %v3 +; CHECK-NEXT: vlvgp %v2, %r1, %r1 +; CHECK-NEXT: vrepib %v3, 127 +; CHECK-NEXT: vrepb %v2, %v2, 15 +; CHECK-NEXT: vsrab %v4, %v1, %v3 +; CHECK-NEXT: vsrab %v5, %v1, %v2 +; CHECK-NEXT: vsra %v1, %v4, %v3 +; CHECK-NEXT: vsra %v2, %v5, %v2 ; CHECK-NEXT: cije %r0, 0, .LBB2_4 ; CHECK-NEXT: .LBB2_3: ; CHECK-NEXT: vlr %v0, %v2 diff --git a/llvm/test/CodeGen/SystemZ/shift-17.ll b/llvm/test/CodeGen/SystemZ/shift-17.ll index 8f5f9abd0540b..474b1a9230c77 100644 --- a/llvm/test/CodeGen/SystemZ/shift-17.ll +++ b/llvm/test/CodeGen/SystemZ/shift-17.ll @@ -32,14 +32,14 @@ define i128 @f1(i128 %a, i128 %b) { define i128 @f2(i128 %a, i128 %b) { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vrepib %v2, 5 -; CHECK-NEXT: vsl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 123 -; CHECK-NEXT: vsrlb %v0, %v0, %v2 -; CHECK-NEXT: vsrl %v0, %v0, %v2 -; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vrepib %v1, 123 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vsrlb %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v3, 5 +; CHECK-NEXT: vsl %v2, %v2, %v3 +; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vo %v0, %v2, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 ; @@ -58,14 +58,14 @@ define i128 @f2(i128 %a, i128 %b) { define i128 @f3(i128 %a, i128 %b) { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vrepib %v2, 86 +; CHECK-NEXT: vrepib %v3, 42 ; CHECK-NEXT: vsrlb %v1, %v1, %v2 -; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vslb %v0, %v0, %v3 ; CHECK-NEXT: vsrl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 42 -; CHECK-NEXT: vslb %v0, %v0, %v2 -; CHECK-NEXT: vsl %v0, %v0, %v2 +; CHECK-NEXT: vsl %v0, %v0, %v3 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -88,18 +88,18 @@ define i128 @f4(i128 %a, i128 %b, i128 %sh) { ; CHECK-LABEL: f4: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r5) -; CHECK-NEXT: vlvgp %v2, %r0, %r0 -; CHECK-NEXT: vl %v1, 0(%r3), 3 -; CHECK-NEXT: vrepb %v2, %v2, 15 -; CHECK-NEXT: vslb %v1, %v1, %v2 ; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vsl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 1 +; CHECK-NEXT: vlvgp %v1, %r0, %r0 ; CHECK-NEXT: xilf %r0, 4294967295 -; CHECK-NEXT: vsrl %v0, %v0, %v2 ; CHECK-NEXT: vlvgp %v2, %r0, %r0 +; CHECK-NEXT: vl %v3, 0(%r3), 3 +; CHECK-NEXT: vrepib %v4, 1 +; CHECK-NEXT: vrepb %v1, %v1, 15 +; CHECK-NEXT: vsrl %v0, %v0, %v4 ; CHECK-NEXT: vrepb %v2, %v2, 15 +; CHECK-NEXT: vslb %v3, %v3, %v1 ; CHECK-NEXT: vsrlb %v0, %v0, %v2 +; CHECK-NEXT: vsl %v1, %v3, %v1 ; CHECK-NEXT: vsrl %v0, %v0, %v2 ; CHECK-NEXT: vo %v0, %v1, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 @@ -108,18 +108,18 @@ define i128 @f4(i128 %a, i128 %b, i128 %sh) { ; Z15-LABEL: f4: ; Z15: # %bb.0: ; Z15-NEXT: l %r0, 12(%r5) -; Z15-NEXT: vlvgp %v2, %r0, %r0 -; Z15-NEXT: vl %v1, 0(%r3), 3 -; Z15-NEXT: vrepb %v2, %v2, 15 ; Z15-NEXT: vl %v0, 0(%r4), 3 -; Z15-NEXT: vslb %v1, %v1, %v2 -; Z15-NEXT: vsl %v1, %v1, %v2 -; Z15-NEXT: vrepib %v2, 1 +; Z15-NEXT: vlvgp %v1, %r0, %r0 ; Z15-NEXT: xilf %r0, 4294967295 -; Z15-NEXT: vsrl %v0, %v0, %v2 ; Z15-NEXT: vlvgp %v2, %r0, %r0 +; Z15-NEXT: vl %v3, 0(%r3), 3 +; Z15-NEXT: vrepib %v4, 1 +; Z15-NEXT: vrepb %v1, %v1, 15 +; Z15-NEXT: vsrl %v0, %v0, %v4 ; Z15-NEXT: vrepb %v2, %v2, 15 +; Z15-NEXT: vslb %v3, %v3, %v1 ; Z15-NEXT: vsrlb %v0, %v0, %v2 +; Z15-NEXT: vsl %v1, %v3, %v1 ; Z15-NEXT: vsrl %v0, %v0, %v2 ; Z15-NEXT: vo %v0, %v1, %v0 ; Z15-NEXT: vst %v0, 0(%r2), 3 @@ -153,14 +153,14 @@ define i128 @f5(i128 %a, i128 %b) { define i128 @f6(i128 %a, i128 %b) { ; CHECK-LABEL: f6: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepib %v2, 5 -; CHECK-NEXT: vsrl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 123 -; CHECK-NEXT: vslb %v0, %v0, %v2 -; CHECK-NEXT: vsl %v0, %v0, %v2 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v1, 123 +; CHECK-NEXT: vl %v2, 0(%r4), 3 +; CHECK-NEXT: vslb %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v3, 5 +; CHECK-NEXT: vsrl %v2, %v2, %v3 +; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vo %v0, %v0, %v2 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 ; @@ -179,14 +179,14 @@ define i128 @f6(i128 %a, i128 %b) { define i128 @f7(i128 %a, i128 %b) { ; CHECK-LABEL: f7: ; CHECK: # %bb.0: +; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vrepib %v2, 42 +; CHECK-NEXT: vrepib %v3, 86 ; CHECK-NEXT: vsrlb %v1, %v1, %v2 -; CHECK-NEXT: vl %v0, 0(%r3), 3 +; CHECK-NEXT: vslb %v0, %v0, %v3 ; CHECK-NEXT: vsrl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 86 -; CHECK-NEXT: vslb %v0, %v0, %v2 -; CHECK-NEXT: vsl %v0, %v0, %v2 +; CHECK-NEXT: vsl %v0, %v0, %v3 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 @@ -209,18 +209,18 @@ define i128 @f8(i128 %a, i128 %b, i128 %sh) { ; CHECK-LABEL: f8: ; CHECK: # %bb.0: ; CHECK-NEXT: l %r0, 12(%r5) -; CHECK-NEXT: vlvgp %v2, %r0, %r0 -; CHECK-NEXT: vl %v1, 0(%r4), 3 -; CHECK-NEXT: vrepb %v2, %v2, 15 -; CHECK-NEXT: vsrlb %v1, %v1, %v2 ; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vsrl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 1 +; CHECK-NEXT: vlvgp %v1, %r0, %r0 ; CHECK-NEXT: xilf %r0, 4294967295 -; CHECK-NEXT: vsl %v0, %v0, %v2 ; CHECK-NEXT: vlvgp %v2, %r0, %r0 +; CHECK-NEXT: vl %v3, 0(%r4), 3 +; CHECK-NEXT: vrepib %v4, 1 +; CHECK-NEXT: vrepb %v1, %v1, 15 +; CHECK-NEXT: vsl %v0, %v0, %v4 ; CHECK-NEXT: vrepb %v2, %v2, 15 +; CHECK-NEXT: vsrlb %v3, %v3, %v1 ; CHECK-NEXT: vslb %v0, %v0, %v2 +; CHECK-NEXT: vsrl %v1, %v3, %v1 ; CHECK-NEXT: vsl %v0, %v0, %v2 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vst %v0, 0(%r2), 3 @@ -229,18 +229,18 @@ define i128 @f8(i128 %a, i128 %b, i128 %sh) { ; Z15-LABEL: f8: ; Z15: # %bb.0: ; Z15-NEXT: l %r0, 12(%r5) -; Z15-NEXT: vlvgp %v2, %r0, %r0 -; Z15-NEXT: vl %v1, 0(%r4), 3 -; Z15-NEXT: vrepb %v2, %v2, 15 ; Z15-NEXT: vl %v0, 0(%r3), 3 -; Z15-NEXT: vsrlb %v1, %v1, %v2 -; Z15-NEXT: vsrl %v1, %v1, %v2 -; Z15-NEXT: vrepib %v2, 1 +; Z15-NEXT: vlvgp %v1, %r0, %r0 ; Z15-NEXT: xilf %r0, 4294967295 -; Z15-NEXT: vsl %v0, %v0, %v2 ; Z15-NEXT: vlvgp %v2, %r0, %r0 +; Z15-NEXT: vl %v3, 0(%r4), 3 +; Z15-NEXT: vrepib %v4, 1 +; Z15-NEXT: vrepb %v1, %v1, 15 +; Z15-NEXT: vsl %v0, %v0, %v4 ; Z15-NEXT: vrepb %v2, %v2, 15 +; Z15-NEXT: vsrlb %v3, %v3, %v1 ; Z15-NEXT: vslb %v0, %v0, %v2 +; Z15-NEXT: vsrl %v1, %v3, %v1 ; Z15-NEXT: vsl %v0, %v0, %v2 ; Z15-NEXT: vo %v0, %v0, %v1 ; Z15-NEXT: vst %v0, 0(%r2), 3 @@ -253,14 +253,14 @@ define i128 @f8(i128 %a, i128 %b, i128 %sh) { define i128 @f9(i128 %a, i128 %b) { ; CHECK-LABEL: f9: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r4), 3 ; CHECK-NEXT: vl %v0, 0(%r3), 3 -; CHECK-NEXT: vrepib %v2, 5 -; CHECK-NEXT: vsrl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 123 -; CHECK-NEXT: vslb %v0, %v0, %v2 -; CHECK-NEXT: vsl %v0, %v0, %v2 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v1, 123 +; CHECK-NEXT: vl %v2, 0(%r4), 3 +; CHECK-NEXT: vslb %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v3, 5 +; CHECK-NEXT: vsrl %v2, %v2, %v3 +; CHECK-NEXT: vsl %v0, %v0, %v1 +; CHECK-NEXT: vo %v0, %v0, %v2 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 ; @@ -279,14 +279,14 @@ define i128 @f9(i128 %a, i128 %b) { define i128 @f10(i128 %a, i128 %b) { ; CHECK-LABEL: f10: ; CHECK: # %bb.0: -; CHECK-NEXT: vl %v1, 0(%r3), 3 ; CHECK-NEXT: vl %v0, 0(%r4), 3 -; CHECK-NEXT: vrepib %v2, 5 -; CHECK-NEXT: vsl %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v2, 123 -; CHECK-NEXT: vsrlb %v0, %v0, %v2 -; CHECK-NEXT: vsrl %v0, %v0, %v2 -; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vrepib %v1, 123 +; CHECK-NEXT: vl %v2, 0(%r3), 3 +; CHECK-NEXT: vsrlb %v0, %v0, %v1 +; CHECK-NEXT: vrepib %v3, 5 +; CHECK-NEXT: vsl %v2, %v2, %v3 +; CHECK-NEXT: vsrl %v0, %v0, %v1 +; CHECK-NEXT: vo %v0, %v2, %v0 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: br %r14 ; diff --git a/llvm/test/CodeGen/SystemZ/soft-float-args.ll b/llvm/test/CodeGen/SystemZ/soft-float-args.ll index 06b362672b1f6..542c61b27ace0 100644 --- a/llvm/test/CodeGen/SystemZ/soft-float-args.ll +++ b/llvm/test/CodeGen/SystemZ/soft-float-args.ll @@ -56,8 +56,9 @@ define fp128 @f2_fp128(fp128 %arg) { define <2 x double> @f3(<2 x double> %arg) { ; CHECK-LABEL: f3: ; CHECK-NOT: %{{[fv]}} -; CHECK: lg %r13, 8(%r2) -; CHECK-NEXT: lg %r2, 0(%r2) +; CHECK: lg %r0, 0(%r2) +; CHECK-NEXT: lg %r13, 8(%r2) +; CHECK-NEXT: lgr %r2, %r0 ; CHECK-NEXT: llihh %r3, 16368 ; CHECK-NEXT: brasl %r14, __adddf3@PLT ; CHECK-NEXT: lgr %r12, %r2 @@ -156,10 +157,11 @@ define <2 x double> @f9(<2 x double> %A, <2 x double> %B, <2 x double> %C, ; CHECK: aghi %r15, -160 ; CHECK-NEXT: .cfi_def_cfa_offset 320 ; CHECK-NEXT: lg %r1, 344(%r15) -; CHECK-NEXT: lg %r13, 8(%r2) -; CHECK-NEXT: lg %r2, 0(%r2) +; CHECK-NEXT: lg %r0, 0(%r2) ; CHECK-NEXT: lg %r3, 0(%r1) +; CHECK-NEXT: lg %r13, 8(%r2) ; CHECK-NEXT: lg %r12, 8(%r1) +; CHECK-NEXT: lgr %r2, %r0 ; CHECK-NEXT: brasl %r14, __adddf3@PLT ; CHECK-NEXT: lgr %r11, %r2 ; CHECK-NEXT: lgr %r2, %r13 diff --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll index f7bbad9055afd..4f2ec35868af7 100644 --- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -5,34 +5,34 @@ define void @fun0(<4 x i31> %src, ptr %p) ; CHECK-LABEL: fun0: ; CHECK: # %bb.0: -; CHECK-NEXT: vlgvf %r0, %v24, 0 -; CHECK-NEXT: vlvgp %v0, %r0, %r0 -; CHECK-NEXT: vrepib %v1, 93 ; CHECK-NEXT: vlgvf %r0, %v24, 1 -; CHECK-NEXT: vslb %v0, %v0, %v1 -; CHECK-NEXT: larl %r1, .LCPI0_0 -; CHECK-NEXT: vl %v2, 0(%r1), 3 -; CHECK-NEXT: vsl %v0, %v0, %v1 -; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vn %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v3, 62 -; CHECK-NEXT: vslb %v1, %v1, %v3 -; CHECK-NEXT: vlgvf %r0, %v24, 2 -; CHECK-NEXT: vsl %v1, %v1, %v3 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-NEXT: vlgvf %r1, %v24, 0 +; CHECK-NEXT: larl %r3, .LCPI0_0 +; CHECK-NEXT: vlgvf %r4, %v24, 2 +; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vlvgp %v1, %r0, %r0 -; CHECK-NEXT: vn %v1, %v1, %v2 -; CHECK-NEXT: vrepib %v3, 31 -; CHECK-NEXT: vslb %v1, %v1, %v3 +; CHECK-NEXT: vlvgp %v2, %r1, %r1 +; CHECK-NEXT: vlvgp %v3, %r4, %r4 +; CHECK-NEXT: vrepib %v4, 93 +; CHECK-NEXT: vn %v1, %v1, %v0 +; CHECK-NEXT: vrepib %v5, 62 +; CHECK-NEXT: vslb %v2, %v2, %v4 +; CHECK-NEXT: vslb %v1, %v1, %v5 +; CHECK-NEXT: vn %v3, %v3, %v0 +; CHECK-NEXT: vrepib %v6, 31 ; CHECK-NEXT: vlgvf %r0, %v24, 3 -; CHECK-NEXT: vsl %v1, %v1, %v3 -; CHECK-NEXT: vo %v0, %v0, %v1 -; CHECK-NEXT: vlvgp %v1, %r0, %r0 +; CHECK-NEXT: vslb %v3, %v3, %v6 +; CHECK-NEXT: vsl %v2, %v2, %v4 +; CHECK-NEXT: vsl %v1, %v1, %v5 +; CHECK-NEXT: vlvgp %v4, %r0, %r0 ; CHECK-NEXT: larl %r1, .LCPI0_1 -; CHECK-NEXT: vn %v1, %v1, %v2 -; CHECK-NEXT: vo %v0, %v0, %v1 -; CHECK-NEXT: vl %v1, 0(%r1), 3 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vo %v1, %v2, %v1 +; CHECK-NEXT: vsl %v2, %v3, %v6 +; CHECK-NEXT: vo %v1, %v1, %v2 +; CHECK-NEXT: vl %v2, 0(%r1), 3 +; CHECK-NEXT: vn %v0, %v4, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vn %v0, %v0, %v2 ; CHECK-NEXT: vst %v0, 0(%r2), 4 ; CHECK-NEXT: br %r14 { @@ -49,34 +49,34 @@ define i16 @fun1(<16 x i1> %src) ; CHECK-NEXT: vlgvb %r0, %v24, 0 ; CHECK-NEXT: vlgvb %r1, %v24, 1 ; CHECK-NEXT: risblg %r0, %r0, 16, 144, 15 +; CHECK-NEXT: vlgvb %r2, %v24, 2 ; CHECK-NEXT: rosbg %r0, %r1, 49, 49, 14 -; CHECK-NEXT: vlgvb %r1, %v24, 2 -; CHECK-NEXT: rosbg %r0, %r1, 50, 50, 13 ; CHECK-NEXT: vlgvb %r1, %v24, 3 +; CHECK-NEXT: rosbg %r0, %r2, 50, 50, 13 +; CHECK-NEXT: vlgvb %r2, %v24, 4 ; CHECK-NEXT: rosbg %r0, %r1, 51, 51, 12 -; CHECK-NEXT: vlgvb %r1, %v24, 4 -; CHECK-NEXT: rosbg %r0, %r1, 52, 52, 11 ; CHECK-NEXT: vlgvb %r1, %v24, 5 +; CHECK-NEXT: rosbg %r0, %r2, 52, 52, 11 +; CHECK-NEXT: vlgvb %r2, %v24, 6 ; CHECK-NEXT: rosbg %r0, %r1, 53, 53, 10 -; CHECK-NEXT: vlgvb %r1, %v24, 6 -; CHECK-NEXT: rosbg %r0, %r1, 54, 54, 9 ; CHECK-NEXT: vlgvb %r1, %v24, 7 +; CHECK-NEXT: rosbg %r0, %r2, 54, 54, 9 +; CHECK-NEXT: vlgvb %r2, %v24, 8 ; CHECK-NEXT: rosbg %r0, %r1, 55, 55, 8 -; CHECK-NEXT: vlgvb %r1, %v24, 8 -; CHECK-NEXT: rosbg %r0, %r1, 56, 56, 7 ; CHECK-NEXT: vlgvb %r1, %v24, 9 +; CHECK-NEXT: rosbg %r0, %r2, 56, 56, 7 +; CHECK-NEXT: vlgvb %r2, %v24, 10 ; CHECK-NEXT: rosbg %r0, %r1, 57, 57, 6 -; CHECK-NEXT: vlgvb %r1, %v24, 10 -; CHECK-NEXT: rosbg %r0, %r1, 58, 58, 5 ; CHECK-NEXT: vlgvb %r1, %v24, 11 +; CHECK-NEXT: rosbg %r0, %r2, 58, 58, 5 +; CHECK-NEXT: vlgvb %r2, %v24, 12 ; CHECK-NEXT: rosbg %r0, %r1, 59, 59, 4 -; CHECK-NEXT: vlgvb %r1, %v24, 12 -; CHECK-NEXT: rosbg %r0, %r1, 60, 60, 3 ; CHECK-NEXT: vlgvb %r1, %v24, 13 +; CHECK-NEXT: rosbg %r0, %r2, 60, 60, 3 +; CHECK-NEXT: vlgvb %r2, %v24, 14 ; CHECK-NEXT: rosbg %r0, %r1, 61, 61, 2 -; CHECK-NEXT: vlgvb %r1, %v24, 14 -; CHECK-NEXT: rosbg %r0, %r1, 62, 62, 1 ; CHECK-NEXT: vlgvb %r1, %v24, 15 +; CHECK-NEXT: rosbg %r0, %r2, 62, 62, 1 ; CHECK-NEXT: rosbg %r0, %r1, 63, 63, 0 ; CHECK-NEXT: llhr %r2, %r0 ; CHECK-NEXT: aghi %r15, 168 @@ -179,9 +179,9 @@ define void @fun3(ptr %src, ptr %p) ; CHECK-NEXT: vrepib %v2, 32 ; CHECK-NEXT: vslb %v0, %v0, %v2 ; CHECK-NEXT: vo %v0, %v1, %v0 +; CHECK-NEXT: vsrlb %v1, %v0, %v2 ; CHECK-NEXT: vstef %v0, 8(%r3), 3 -; CHECK-NEXT: vsrlb %v0, %v0, %v2 -; CHECK-NEXT: vsteg %v0, 0(%r3), 1 +; CHECK-NEXT: vsteg %v1, 0(%r3), 1 ; CHECK-NEXT: br %r14 { %tmp = load <3 x i31>, ptr %src diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll index c13293bd64a16..5034358b675c7 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -63,7 +63,7 @@ define <16 x i16> @fun3(<16 x i8> %val1, <16 x i8> %val2, <16 x i16> %val3, <16 ; CHECK-DAG: vceqh [[REG4:%v[0-9]+]], %v30, %v27 ; CHECK-DAG: vl [[REG5:%v[0-9]+]], 176(%r15) ; CHECK-DAG: vl [[REG6:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vo [[REG7:%v[0-9]+]], %v2, [[REG4]] +; CHECK-DAG: vo [[REG7:%v[0-9]+]], %v0, [[REG4]] ; CHECK-DAG: vo [[REG8:%v[0-9]+]], [[REG2]], [[REG3]] ; CHECK-DAG: vsel %v24, %v29, [[REG6]], [[REG8]] ; CHECK-DAG: vsel %v26, %v31, [[REG5]], [[REG7]] @@ -117,10 +117,10 @@ define <2 x i8> @fun5(<2 x i16> %val1, <2 x i16> %val2, <2 x i8> %val3, <2 x i8> define <2 x i16> @fun6(<2 x i16> %val1, <2 x i16> %val2, <2 x i8> %val3, <2 x i8> %val4, <2 x i16> %val5, <2 x i16> %val6) { ; CHECK-LABEL: fun6: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqb %v1, %v28, %v30 -; CHECK-NEXT: vceqh %v0, %v24, %v26 -; CHECK-NEXT: vuphb %v1, %v1 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-NEXT: vceqb %v0, %v28, %v30 +; CHECK-NEXT: vceqh %v1, %v24, %v26 +; CHECK-NEXT: vuphb %v0, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <2 x i16> %val1, %val2 @@ -133,10 +133,10 @@ define <2 x i16> @fun6(<2 x i16> %val1, <2 x i16> %val2, <2 x i8> %val3, <2 x i8 define <2 x i32> @fun7(<2 x i16> %val1, <2 x i16> %val2, <2 x i8> %val3, <2 x i8> %val4, <2 x i32> %val5, <2 x i32> %val6) { ; CHECK-LABEL: fun7: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqb %v1, %v28, %v30 -; CHECK-NEXT: vceqh %v0, %v24, %v26 -; CHECK-NEXT: vuphb %v1, %v1 -; CHECK-NEXT: vo %v0, %v0, %v1 +; CHECK-NEXT: vceqb %v0, %v28, %v30 +; CHECK-NEXT: vceqh %v1, %v24, %v26 +; CHECK-NEXT: vuphb %v0, %v0 +; CHECK-NEXT: vo %v0, %v1, %v0 ; CHECK-NEXT: vuphh %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -259,10 +259,10 @@ define <16 x i16> @fun12(<16 x i16> %val1, <16 x i16> %val2, <16 x i32> %val3, < define <2 x i16> @fun13(<2 x i32> %val1, <2 x i32> %val2, <2 x i64> %val3, <2 x i64> %val4, <2 x i16> %val5, <2 x i16> %val6) { ; CHECK-LABEL: fun13: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqg %v1, %v28, %v30 -; CHECK-NEXT: vceqf %v0, %v24, %v26 -; CHECK-NEXT: vpkg %v1, %v1, %v1 -; CHECK-NEXT: vx %v0, %v0, %v1 +; CHECK-NEXT: vceqg %v0, %v28, %v30 +; CHECK-NEXT: vceqf %v1, %v24, %v26 +; CHECK-NEXT: vpkg %v0, %v0, %v0 +; CHECK-NEXT: vx %v0, %v1, %v0 ; CHECK-NEXT: vpkf %v0, %v0, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -276,10 +276,10 @@ define <2 x i16> @fun13(<2 x i32> %val1, <2 x i32> %val2, <2 x i64> %val3, <2 x define <2 x i32> @fun14(<2 x i32> %val1, <2 x i32> %val2, <2 x i64> %val3, <2 x i64> %val4, <2 x i32> %val5, <2 x i32> %val6) { ; CHECK-LABEL: fun14: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqg %v1, %v28, %v30 -; CHECK-NEXT: vceqf %v0, %v24, %v26 -; CHECK-NEXT: vpkg %v1, %v1, %v1 -; CHECK-NEXT: vx %v0, %v0, %v1 +; CHECK-NEXT: vceqg %v0, %v28, %v30 +; CHECK-NEXT: vceqf %v1, %v24, %v26 +; CHECK-NEXT: vpkg %v0, %v0, %v0 +; CHECK-NEXT: vx %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <2 x i32> %val1, %val2 @@ -324,10 +324,10 @@ define <4 x i16> @fun16(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x define <4 x i32> @fun17(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x i16> %val4, <4 x i32> %val5, <4 x i32> %val6) { ; CHECK-LABEL: fun17: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqh %v1, %v28, %v30 -; CHECK-NEXT: vceqf %v0, %v24, %v26 -; CHECK-NEXT: vuphh %v1, %v1 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vceqh %v0, %v28, %v30 +; CHECK-NEXT: vceqf %v1, %v24, %v26 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 %cmp0 = icmp eq <4 x i32> %val1, %val2 @@ -340,10 +340,10 @@ define <4 x i32> @fun17(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x define <4 x i64> @fun18(<4 x i32> %val1, <4 x i32> %val2, <4 x i16> %val3, <4 x i16> %val4, <4 x i64> %val5, <4 x i64> %val6) { ; CHECK-LABEL: fun18: ; CHECK: # %bb.0: -; CHECK-NEXT: vceqh %v1, %v28, %v30 -; CHECK-NEXT: vceqf %v0, %v24, %v26 -; CHECK-NEXT: vuphh %v1, %v1 -; CHECK-NEXT: vn %v0, %v0, %v1 +; CHECK-NEXT: vceqh %v0, %v28, %v30 +; CHECK-NEXT: vceqf %v1, %v24, %v26 +; CHECK-NEXT: vuphh %v0, %v0 +; CHECK-NEXT: vn %v0, %v1, %v0 ; CHECK-DAG: vuphf [[REG0:%v[0-9]+]], %v0 ; CHECK-DAG: vuplf [[REG1:%v[0-9]+]], %v0 ; CHECK-NEXT: vsel %v24, %v25, %v29, [[REG0]] @@ -475,27 +475,27 @@ define <2 x float> @fun25(<2 x float> %val1, <2 x float> %val2, <2 x double> %va ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 +; CHECK-NEXT: vfchdb %v2, %v28, %v30 ; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-NEXT: vpkg %v1, %v1, %v1 +; CHECK-NEXT: vpkg %v1, %v2, %v2 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun25: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vfchdb %v1, %v28, %v30 -; CHECK-Z14-NEXT: vfchsb %v0, %v24, %v26 -; CHECK-Z14-NEXT: vpkg %v1, %v1, %v1 -; CHECK-Z14-NEXT: vo %v0, %v0, %v1 +; CHECK-Z14-NEXT: vfchdb %v0, %v28, %v30 +; CHECK-Z14-NEXT: vfchsb %v1, %v24, %v26 +; CHECK-Z14-NEXT: vpkg %v0, %v0, %v0 +; CHECK-Z14-NEXT: vo %v0, %v1, %v0 ; CHECK-Z14-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <2 x float> %val1, %val2 @@ -510,14 +510,14 @@ define <2 x double> @fun26(<2 x float> %val1, <2 x float> %val2, <2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vfchdb %v1, %v28, %v30 @@ -581,8 +581,8 @@ define <4 x float> @fun28(<4 x float> %val1, <4 x float> %val2, <4 x float> %val ; CHECK-DAG: vmrhf [[REG17:%v[0-9]+]], %v30, %v30 ; CHECK-DAG: vldeb [[REG19:%v[0-9]+]], [[REG17]] ; CHECK-DAG: vldeb [[REG20:%v[0-9]+]], [[REG8]] -; CHECK-NEXT: vfchdb %v2, [[REG20]], [[REG19]] -; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], %v2, [[REG16]] +; CHECK-NEXT: vfchdb %v3, [[REG20]], [[REG19]] +; CHECK-NEXT: vpkg [[REG21:%v[0-9]+]], %v3, [[REG16]] ; CHECK-NEXT: vx %v0, [[REG11]], [[REG21]] ; CHECK-NEXT: vsel %v24, %v25, %v27, %v0 ; CHECK-NEXT: br %r14 @@ -606,26 +606,26 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 +; CHECK-NEXT: vmrlf %v4, %v30, %v30 +; CHECK-NEXT: vmrlf %v5, %v28, %v28 +; CHECK-NEXT: vmrhf %v6, %v30, %v30 +; CHECK-NEXT: vmrhf %v7, %v28, %v28 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vmrhf %v3, %v28, %v28 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v30, %v30 -; CHECK-NEXT: vmrlf %v2, %v28, %v28 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v30, %v30 ; CHECK-NEXT: vldeb %v2, %v2 ; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 +; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-NEXT: vldeb %v4, %v4 +; CHECK-NEXT: vldeb %v5, %v5 +; CHECK-NEXT: vfchdb %v2, %v5, %v4 +; CHECK-NEXT: vldeb %v6, %v6 +; CHECK-NEXT: vldeb %v7, %v7 +; CHECK-NEXT: vfchdb %v3, %v7, %v6 +; CHECK-NEXT: vpkg %v1, %v3, %v2 ; CHECK-NEXT: vx %v0, %v0, %v1 ; CHECK-NEXT: vuplf %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 @@ -653,71 +653,71 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x float> %va define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x double> %val3, <8 x double> %val4, <8 x float> %val5, <8 x float> %val6) { ; CHECK-LABEL: fun30: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf %v16, %v28, %v28 -; CHECK-NEXT: vmrlf %v17, %v24, %v24 -; CHECK-NEXT: vldeb %v16, %v16 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vfchdb %v16, %v17, %v16 -; CHECK-NEXT: vmrhf %v17, %v28, %v28 -; CHECK-NEXT: vmrhf %v18, %v24, %v24 -; CHECK-NEXT: vldeb %v17, %v17 -; CHECK-NEXT: vl %v4, 192(%r15) -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vl %v5, 208(%r15) -; CHECK-NEXT: vl %v6, 160(%r15) -; CHECK-NEXT: vl %v7, 176(%r15) -; CHECK-NEXT: vl %v0, 272(%r15) -; CHECK-NEXT: vl %v1, 240(%r15) -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vl %v2, 256(%r15) -; CHECK-NEXT: vl %v3, 224(%r15) -; CHECK-NEXT: vpkg %v16, %v17, %v16 -; CHECK-NEXT: vmrlf %v17, %v30, %v30 -; CHECK-NEXT: vmrlf %v18, %v26, %v26 -; CHECK-NEXT: vmrhf %v19, %v26, %v26 -; CHECK-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-NEXT: vpkg %v6, %v6, %v7 -; CHECK-NEXT: vpkg %v4, %v4, %v5 -; CHECK-NEXT: vn %v5, %v16, %v6 -; CHECK-DAG: vsel %v24, %v3, %v2, %v5 -; CHECK-DAG: vldeb %v17, %v17 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vfchdb %v17, %v18, %v17 -; CHECK-NEXT: vmrhf %v18, %v30, %v30 -; CHECK-NEXT: vldeb %v18, %v18 -; CHECK-NEXT: vldeb %v19, %v19 -; CHECK-NEXT: vfchdb %v18, %v19, %v18 -; CHECK-NEXT: vpkg %v17, %v18, %v17 -; CHECK-NEXT: vn %v4, %v17, %v4 -; CHECK-NEXT: vsel %v26, %v1, %v0, %v4 -; CHECK-NEXT: br %r14 +; CHECK-NEXT: vmrlf %v0, %v28, %v28 +; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v28, %v28 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 +; CHECK-NEXT: vmrlf %v4, %v30, %v30 +; CHECK-NEXT: vmrlf %v5, %v26, %v26 +; CHECK-NEXT: vmrhf %v6, %v30, %v30 +; CHECK-NEXT: vmrhf %v7, %v26, %v26 +; CHECK-NEXT: vldeb %v0, %v0 +; CHECK-NEXT: vldeb %v1, %v1 +; CHECK-NEXT: vl %v16, 192(%r15), 3 +; CHECK-NEXT: vl %v17, 208(%r15), 3 +; CHECK-NEXT: vldeb %v2, %v2 +; CHECK-NEXT: vl %v18, 160(%r15), 3 +; CHECK-NEXT: vl %v19, 176(%r15), 3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 +; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-NEXT: vldeb %v4, %v4 +; CHECK-NEXT: vldeb %v5, %v5 +; CHECK-NEXT: vfchdb %v2, %v5, %v4 +; CHECK-NEXT: vfchdb %v4, %v27, %v19 +; CHECK-NEXT: vfchdb %v5, %v25, %v18 +; CHECK-NEXT: vldeb %v6, %v6 +; CHECK-NEXT: vldeb %v7, %v7 +; CHECK-NEXT: vfchdb %v3, %v7, %v6 +; CHECK-NEXT: vfchdb %v6, %v31, %v17 +; CHECK-NEXT: vfchdb %v7, %v29, %v16 +; CHECK-NEXT: vl %v16, 224(%r15), 3 +; CHECK-NEXT: vpkg %v1, %v3, %v2 +; CHECK-NEXT: vl %v3, 272(%r15), 3 +; CHECK-NEXT: vpkg %v2, %v5, %v4 +; CHECK-NEXT: vl %v4, 240(%r15), 3 +; CHECK-NEXT: vl %v5, 256(%r15), 3 +; CHECK-NEXT: vpkg %v6, %v7, %v6 +; CHECK-NEXT: vn %v1, %v1, %v6 +; CHECK-NEXT: vn %v0, %v0, %v2 +; CHECK-NEXT: vsel %v24, %v16, %v5, %v0 +; CHECK-NEXT: vsel %v26, %v4, %v3, %v1 +; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun30: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vl %v4, 192(%r15) -; CHECK-Z14-NEXT: vl %v5, 208(%r15) -; CHECK-Z14-NEXT: vl %v6, 160(%r15) -; CHECK-Z14-NEXT: vl %v7, 176(%r15) -; CHECK-Z14-NEXT: vfchdb %v7, %v27, %v7 -; CHECK-Z14-NEXT: vfchdb %v6, %v25, %v6 -; CHECK-Z14-NEXT: vfchdb %v5, %v31, %v5 -; CHECK-Z14-NEXT: vfchdb %v4, %v29, %v4 -; CHECK-Z14-DAG: vfchsb %v16, %v24, %v28 -; CHECK-Z14-DAG: vfchsb %v17, %v26, %v30 -; CHECK-Z14-DAG: vpkg %v6, %v6, %v7 -; CHECK-Z14-DAG: vpkg %v4, %v4, %v5 -; CHECK-Z14-DAG: vl %v0, 272(%r15) -; CHECK-Z14-DAG: vl %v1, 240(%r15) -; CHECK-Z14-DAG: vl %v2, 256(%r15) -; CHECK-Z14-DAG: vl %v3, 224(%r15) -; CHECK-Z14-NEXT: vn %v4, %v17, %v4 -; CHECK-Z14-NEXT: vn %v5, %v16, %v6 -; CHECK-Z14-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v26, %v1, %v0, %v4 -; CHECK-Z14-NEXT: br %r14 +; CHECK-Z14-NEXT: vl %v0, 192(%r15), 3 +; CHECK-Z14-NEXT: vl %v1, 208(%r15), 3 +; CHECK-Z14-NEXT: vl %v2, 160(%r15), 3 +; CHECK-Z14-NEXT: vl %v3, 176(%r15), 3 +; CHECK-Z14-NEXT: vfchdb %v3, %v27, %v3 +; CHECK-Z14-NEXT: vfchdb %v2, %v25, %v2 +; CHECK-Z14-NEXT: vfchdb %v1, %v31, %v1 +; CHECK-Z14-NEXT: vfchdb %v0, %v29, %v0 +; CHECK-Z14-NEXT: vl %v4, 272(%r15), 3 +; CHECK-Z14-NEXT: vl %v5, 240(%r15), 3 +; CHECK-Z14-NEXT: vl %v6, 256(%r15), 3 +; CHECK-Z14-NEXT: vl %v7, 224(%r15), 3 +; CHECK-Z14-NEXT: vfchsb %v16, %v24, %v28 +; CHECK-Z14-NEXT: vfchsb %v17, %v26, %v30 +; CHECK-Z14-NEXT: vpkg %v2, %v2, %v3 +; CHECK-Z14-NEXT: vpkg %v0, %v0, %v1 +; CHECK-Z14-NEXT: vn %v0, %v17, %v0 +; CHECK-Z14-NEXT: vn %v1, %v16, %v2 +; CHECK-Z14-NEXT: vsel %v24, %v7, %v6, %v1 +; CHECK-Z14-NEXT: vsel %v26, %v5, %v4, %v0 +; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <8 x float> %val1, %val2 %cmp1 = fcmp ogt <8 x double> %val3, %val4 %and = and <8 x i1> %cmp0, %cmp1 @@ -759,22 +759,22 @@ define <2 x double> @fun32(<2 x double> %val1, <2 x double> %val2, <2 x double> define <4 x float> @fun33(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x float> %val5, <4 x float> %val6) { ; CHECK-LABEL: fun33: ; CHECK: # %bb.0: -; CHECK-NEXT: vfchdb %v0, %v26, %v30 -; CHECK-NEXT: vfchdb %v1, %v24, %v28 -; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v27, %v27 -; CHECK-NEXT: vmrlf %v2, %v25, %v25 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v27, %v27 -; CHECK-NEXT: vmrhf %v3, %v25, %v25 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 -; CHECK-NEXT: vn %v0, %v0, %v1 -; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 +; CHECK-NEXT: vmrlf %v0, %v27, %v27 +; CHECK-NEXT: vmrlf %v1, %v25, %v25 +; CHECK-NEXT: vmrhf %v2, %v27, %v27 +; CHECK-NEXT: vmrhf %v3, %v25, %v25 +; CHECK-NEXT: vldeb %v0, %v0 +; CHECK-NEXT: vldeb %v1, %v1 +; CHECK-NEXT: vldeb %v2, %v2 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v4, %v26, %v30 +; CHECK-NEXT: vfchdb %v5, %v24, %v28 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 +; CHECK-NEXT: vpkg %v2, %v5, %v4 +; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-NEXT: vn %v0, %v2, %v0 +; CHECK-NEXT: vsel %v24, %v29, %v31, %v0 ; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun33: @@ -796,43 +796,43 @@ define <4 x float> @fun33(<4 x double> %val1, <4 x double> %val2, <4 x float> %v define <4 x double> @fun34(<4 x double> %val1, <4 x double> %val2, <4 x float> %val3, <4 x float> %val4, <4 x double> %val5, <4 x double> %val6) { ; CHECK-LABEL: fun34: ; CHECK: # %bb.0: -; CHECK-NEXT: vmrlf [[REG0:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrlf [[REG1:%v[0-9]+]], %v25, %v25 -; CHECK-NEXT: vldeb [[REG2:%v[0-9]+]], [[REG0]] -; CHECK-NEXT: vldeb [[REG3:%v[0-9]+]], [[REG1]] -; CHECK-NEXT: vfchdb [[REG4:%v[0-9]+]], [[REG3]], [[REG2]] -; CHECK-NEXT: vmrhf [[REG5:%v[0-9]+]], %v27, %v27 -; CHECK-NEXT: vmrhf [[REG6:%v[0-9]+]], %v25, %v25 -; CHECK-DAG: vldeb [[REG7:%v[0-9]+]], [[REG5]] -; CHECK-DAG: vl [[REG8:%v[0-9]+]], 176(%r15) -; CHECK-DAG: vldeb [[REG9:%v[0-9]+]], [[REG6]] -; CHECK-DAG: vl [[REG10:%v[0-9]+]], 160(%r15) -; CHECK-DAG: vfchdb [[REG11:%v[0-9]+]], [[REG9]], [[REG7]] -; CHECK-DAG: vpkg [[REG12:%v[0-9]+]], [[REG11]], [[REG4]] -; CHECK-DAG: vuphf [[REG13:%v[0-9]+]], [[REG12]] -; CHECK-DAG: vuplf [[REG14:%v[0-9]+]], [[REG12]] -; CHECK-DAG: vfchdb [[REG15:%v[0-9]+]], %v24, %v28 -; CHECK-DAG: vfchdb [[REG16:%v[0-9]+]], %v26, %v30 -; CHECK-NEXT: vn [[REG18:%v[0-9]+]], [[REG16]], [[REG14]] -; CHECK-NEXT: vn [[REG19:%v[0-9]+]], [[REG15]], [[REG13]] -; CHECK-NEXT: vsel %v24, %v29, [[REG10]], [[REG19]] -; CHECK-NEXT: vsel %v26, %v31, [[REG8]], [[REG18]] -; CHECK-NEXT: br %r14 +; CHECK-NEXT: vmrlf %v0, %v27, %v27 +; CHECK-NEXT: vmrlf %v1, %v25, %v25 +; CHECK-NEXT: vmrhf %v2, %v27, %v27 +; CHECK-NEXT: vmrhf %v3, %v25, %v25 +; CHECK-NEXT: vldeb %v0, %v0 +; CHECK-NEXT: vldeb %v1, %v1 +; CHECK-NEXT: vldeb %v2, %v2 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 +; CHECK-NEXT: vl %v2, 160(%r15), 3 +; CHECK-NEXT: vpkg %v0, %v1, %v0 +; CHECK-NEXT: vl %v1, 176(%r15), 3 +; CHECK-NEXT: vfchdb %v3, %v24, %v28 +; CHECK-NEXT: vfchdb %v4, %v26, %v30 +; CHECK-NEXT: vuphf %v5, %v0 +; CHECK-NEXT: vuplf %v0, %v0 +; CHECK-NEXT: vn %v0, %v4, %v0 +; CHECK-NEXT: vn %v3, %v3, %v5 +; CHECK-NEXT: vsel %v24, %v29, %v2, %v3 +; CHECK-NEXT: vsel %v26, %v31, %v1, %v0 +; CHECK-NEXT: br %r14 ; ; CHECK-Z14-LABEL: fun34: ; CHECK-Z14: # %bb.0: -; CHECK-Z14-NEXT: vfchsb %v4, %v25, %v27 -; CHECK-Z14-NEXT: vl %v0, 176(%r15) -; CHECK-Z14-NEXT: vl %v1, 160(%r15) -; CHECK-Z14-NEXT: vfchdb %v2, %v24, %v28 -; CHECK-Z14-NEXT: vfchdb %v3, %v26, %v30 -; CHECK-Z14-NEXT: vuphf %v5, %v4 -; CHECK-Z14-NEXT: vuplf %v4, %v4 -; CHECK-Z14-NEXT: vn %v3, %v3, %v4 -; CHECK-Z14-NEXT: vn %v2, %v2, %v5 -; CHECK-Z14-NEXT: vsel %v24, %v29, %v1, %v2 -; CHECK-Z14-NEXT: vsel %v26, %v31, %v0, %v3 -; CHECK-Z14-NEXT: br %r14 +; CHECK-Z14-NEXT: vfchsb %v0, %v25, %v27 +; CHECK-Z14-NEXT: vl %v1, 176(%r15), 3 +; CHECK-Z14-NEXT: vl %v2, 160(%r15), 3 +; CHECK-Z14-NEXT: vfchdb %v3, %v24, %v28 +; CHECK-Z14-NEXT: vfchdb %v4, %v26, %v30 +; CHECK-Z14-NEXT: vuphf %v5, %v0 +; CHECK-Z14-NEXT: vuplf %v0, %v0 +; CHECK-Z14-NEXT: vn %v0, %v4, %v0 +; CHECK-Z14-NEXT: vn %v3, %v3, %v5 +; CHECK-Z14-NEXT: vsel %v24, %v29, %v2, %v3 +; CHECK-Z14-NEXT: vsel %v26, %v31, %v1, %v0 +; CHECK-Z14-NEXT: br %r14 %cmp0 = fcmp ogt <4 x double> %val1, %val2 %cmp1 = fcmp ogt <4 x float> %val3, %val4 %and = and <4 x i1> %cmp0, %cmp1 diff --git a/llvm/test/CodeGen/SystemZ/vec-cmpsel-01.ll b/llvm/test/CodeGen/SystemZ/vec-cmpsel-01.ll index 7c887c0eb3278..fb0ef2eb0ff06 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmpsel-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmpsel-01.ll @@ -315,14 +315,14 @@ define <2 x float> @fun25(<2 x float> %val1, <2 x float> %val2, <2 x float> %val ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -336,14 +336,14 @@ define <2 x double> @fun26(<2 x float> %val1, <2 x float> %val2, <2 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 @@ -373,14 +373,14 @@ define <4 x float> @fun28(<4 x float> %val1, <4 x float> %val2, <4 x float> %val ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vsel %v24, %v28, %v30, %v0 ; CHECK-NEXT: br %r14 @@ -394,14 +394,14 @@ define <4 x double> @fun29(<4 x float> %val1, <4 x float> %val2, <4 x double> %v ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v26, %v26 ; CHECK-NEXT: vmrlf %v1, %v24, %v24 +; CHECK-NEXT: vmrhf %v2, %v26, %v26 +; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v26, %v26 -; CHECK-NEXT: vmrhf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v0, %v1, %v0 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 ; CHECK-NEXT: vuplf %v1, %v0 ; CHECK-NEXT: vuphf %v0, %v0 @@ -419,27 +419,27 @@ define <8 x float> @fun30(<8 x float> %val1, <8 x float> %val2, <8 x float> %val ; CHECK: # %bb.0: ; CHECK-NEXT: vmrlf %v0, %v30, %v30 ; CHECK-NEXT: vmrlf %v1, %v26, %v26 +; CHECK-NEXT: vmrhf %v2, %v30, %v30 +; CHECK-NEXT: vmrhf %v3, %v26, %v26 +; CHECK-NEXT: vmrlf %v4, %v28, %v28 +; CHECK-NEXT: vmrlf %v5, %v24, %v24 +; CHECK-NEXT: vmrhf %v6, %v28, %v28 +; CHECK-NEXT: vmrhf %v7, %v24, %v24 ; CHECK-NEXT: vldeb %v0, %v0 ; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vfchdb %v0, %v1, %v0 -; CHECK-NEXT: vmrhf %v1, %v30, %v30 -; CHECK-NEXT: vmrhf %v2, %v26, %v26 -; CHECK-NEXT: vldeb %v1, %v1 -; CHECK-NEXT: vmrhf %v3, %v24, %v24 ; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v3, %v3 +; CHECK-NEXT: vfchdb %v1, %v3, %v2 ; CHECK-NEXT: vpkg %v0, %v1, %v0 -; CHECK-NEXT: vmrlf %v1, %v28, %v28 -; CHECK-NEXT: vmrlf %v2, %v24, %v24 -; CHECK-NEXT: vldeb %v1, %v1 ; CHECK-NEXT: vsel %v26, %v27, %v31, %v0 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vfchdb %v1, %v2, %v1 -; CHECK-NEXT: vmrhf %v2, %v28, %v28 -; CHECK-NEXT: vldeb %v2, %v2 -; CHECK-NEXT: vldeb %v3, %v3 -; CHECK-NEXT: vfchdb %v2, %v3, %v2 -; CHECK-NEXT: vpkg %v1, %v2, %v1 +; CHECK-NEXT: vldeb %v4, %v4 +; CHECK-NEXT: vldeb %v5, %v5 +; CHECK-NEXT: vfchdb %v2, %v5, %v4 +; CHECK-NEXT: vldeb %v6, %v6 +; CHECK-NEXT: vldeb %v7, %v7 +; CHECK-NEXT: vfchdb %v3, %v7, %v6 +; CHECK-NEXT: vpkg %v1, %v3, %v2 ; CHECK-NEXT: vsel %v24, %v25, %v29, %v1 ; CHECK-NEXT: br %r14 %cmp = fcmp ogt <8 x float> %val1, %val2 diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll index bcdedcd3a407b..98f754880d0bd 100644 --- a/llvm/test/CodeGen/SystemZ/vec-eval.ll +++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll @@ -596,10 +596,10 @@ entry: define <16 x i8> @eval45(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval45: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: vo %v0, %v28, %v24 -; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 -; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47 +; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v1, %v28, %v24 +; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 +; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 ; CHECK-NEXT: br %r14 entry: %0 = or <16 x i8> %src3, %src1 @@ -770,10 +770,10 @@ entry: define <16 x i8> @eval57(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval57: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: vo %v0, %v28, %v26 -; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 47 -; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 47 +; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v1, %v28, %v26 +; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47 +; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 ; CHECK-NEXT: br %r14 entry: %not = xor <16 x i8> %src1, splat(i8 -1) @@ -1541,8 +1541,8 @@ define <16 x i8> @eval109(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 -; CHECK-NEXT: vn %v2, %v26, %v24 -; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7 +; CHECK-NEXT: vn %v1, %v26, %v24 +; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7 ; CHECK-NEXT: vo %v1, %v28, %v24 ; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 ; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 @@ -1754,8 +1754,8 @@ define <16 x i8> @eval121(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vgbm %v0, 65535 ; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 -; CHECK-NEXT: vn %v2, %v26, %v24 -; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7 +; CHECK-NEXT: vn %v1, %v26, %v24 +; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7 ; CHECK-NEXT: vo %v1, %v28, %v26 ; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47 ; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47 @@ -2084,10 +2084,10 @@ entry: define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval141: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: vo %v0, %v26, %v24 -; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 -; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143 +; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1 +; CHECK-NEXT: vo %v1, %v26, %v24 +; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 +; CHECK-NEXT: veval %v24, %v0, %v1, %v28, 143 ; CHECK-NEXT: br %r14 entry: %not1 = xor <16 x i8> %src2, splat(i8 -1) @@ -2253,10 +2253,10 @@ entry: define <16 x i8> @eval151(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval151: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2 -; CHECK-NEXT: vx %v0, %v28, %v26 -; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31 -; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143 +; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 2 +; CHECK-NEXT: vx %v1, %v28, %v26 +; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 31 +; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 143 ; CHECK-NEXT: br %r14 entry: %not1 = xor <16 x i8> %src2, splat(i8 -1) @@ -2365,10 +2365,10 @@ entry: define <16 x i8> @eval157(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval157: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: vx %v0, %v28, %v26 -; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 -; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143 +; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1 +; CHECK-NEXT: vx %v1, %v28, %v26 +; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 +; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 143 ; CHECK-NEXT: br %r14 entry: %not1 = xor <16 x i8> %src2, splat(i8 -1) @@ -2778,10 +2778,10 @@ entry: define <16 x i8> @eval183(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval183: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2 -; CHECK-NEXT: voc %v0, %v26, %v28 -; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31 -; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47 +; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 2 +; CHECK-NEXT: voc %v1, %v26, %v28 +; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 31 +; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 47 ; CHECK-NEXT: br %r14 entry: %not = xor <16 x i8> %src1, splat(i8 -1) @@ -2884,10 +2884,10 @@ entry: define <16 x i8> @eval189(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval189: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: voc %v0, %v26, %v28 -; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 -; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47 +; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1 +; CHECK-NEXT: voc %v1, %v26, %v28 +; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47 +; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 47 ; CHECK-NEXT: br %r14 entry: %not = xor <16 x i8> %src1, splat(i8 -1) @@ -3480,10 +3480,10 @@ define <16 x i8> @eval228(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval228: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vno %v0, %v26, %v26 -; CHECK-NEXT: veval %v2, %v24, %v28, %v26, 2 -; CHECK-NEXT: vo %v1, %v28, %v24 -; CHECK-NEXT: veval %v0, %v2, %v0, %v24, 47 -; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 +; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2 +; CHECK-NEXT: vo %v2, %v28, %v24 +; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47 +; CHECK-NEXT: veval %v24, %v0, %v26, %v2, 47 ; CHECK-NEXT: br %r14 entry: %not = xor <16 x i8> %src1, splat(i8 -1) @@ -3539,11 +3539,11 @@ define <16 x i8> @eval231(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval231: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vno %v0, %v26, %v26 -; CHECK-NEXT: vnc %v2, %v24, %v26 -; CHECK-NEXT: vo %v1, %v28, %v24 +; CHECK-NEXT: vnc %v1, %v24, %v26 +; CHECK-NEXT: vo %v2, %v28, %v24 ; CHECK-NEXT: vsel %v0, %v26, %v0, %v24 -; CHECK-NEXT: veval %v0, %v0, %v2, %v28, 31 -; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47 +; CHECK-NEXT: veval %v0, %v0, %v1, %v28, 31 +; CHECK-NEXT: veval %v24, %v0, %v26, %v2, 47 ; CHECK-NEXT: br %r14 entry: %not = xor <16 x i8> %src1, splat(i8 -1) diff --git a/llvm/test/CodeGen/SystemZ/vec-move-23.ll b/llvm/test/CodeGen/SystemZ/vec-move-23.ll index 1976e6710ecf9..d9a96165861b0 100644 --- a/llvm/test/CodeGen/SystemZ/vec-move-23.ll +++ b/llvm/test/CodeGen/SystemZ/vec-move-23.ll @@ -42,20 +42,20 @@ define void @fun2(<2 x i32> %Src, ptr %Dst) { define void @fun3(<4 x i16> %Src, ptr %Dst) { ; CHECK-LABEL: fun3: -; Z14: vuphh %v0, %v24 -; Z14-NEXT: vlgvf %r0, %v0, 3 -; Z14-NEXT: cefbr %f1, %r0 -; Z14-NEXT: vlgvf %r0, %v0, 2 -; Z14-NEXT: cefbr %f2, %r0 -; Z14-NEXT: vlgvf %r0, %v0, 1 -; Z14-NEXT: vmrhf %v1, %v2, %v1 -; Z14-NEXT: cefbr %f2, %r0 -; Z14-NEXT: vlgvf %r0, %v0, 0 -; Z14-NEXT: cefbr %f0, %r0 -; Z14-NEXT: vmrhf %v0, %v0, %v2 -; Z14-NEXT: vmrhg %v0, %v0, %v1 -; Z14-NEXT: vst %v0, 0(%r2), 3 -; Z14-NEXT: br %r14 +; Z14: vuphh %v0, %v24 +; Z14-NEXT: vlgvf %r0, %v0, 3 +; Z14-NEXT: vlgvf %r1, %v0, 2 +; Z14-NEXT: cefbr %f1, %r1 +; Z14-NEXT: vlgvf %r3, %v0, 1 +; Z14-NEXT: cefbr %f2, %r3 +; Z14-NEXT: vlgvf %r4, %v0, 0 +; Z14-NEXT: cefbr %f0, %r0 +; Z14-NEXT: vmrhf %v0, %v1, %v0 +; Z14-NEXT: cefbr %f3, %r4 +; Z14-NEXT: vmrhf %v1, %v3, %v2 +; Z14-NEXT: vmrhg %v0, %v1, %v0 +; Z14-NEXT: vst %v0, 0(%r2), 3 +; Z14-NEXT: br %r14 ; Z15: vuphh %v0, %v24 ; Z15-NEXT: vcefb %v0, %v0, 0, 0 @@ -106,20 +106,20 @@ define void @fun6(<2 x i32> %Src, ptr %Dst) { define void @fun7(<4 x i16> %Src, ptr %Dst) { ; CHECK-LABEL: fun7: -; Z14: vuplhh %v0, %v24 -; Z14-NEXT: vlgvf %r0, %v0, 3 -; Z14-NEXT: celfbr %f1, 0, %r0, 0 -; Z14-NEXT: vlgvf %r0, %v0, 2 -; Z14-NEXT: celfbr %f2, 0, %r0, 0 -; Z14-NEXT: vlgvf %r0, %v0, 1 -; Z14-NEXT: vmrhf %v1, %v2, %v1 -; Z14-NEXT: celfbr %f2, 0, %r0, 0 -; Z14-NEXT: vlgvf %r0, %v0, 0 -; Z14-NEXT: celfbr %f0, 0, %r0, 0 -; Z14-NEXT: vmrhf %v0, %v0, %v2 -; Z14-NEXT: vmrhg %v0, %v0, %v1 -; Z14-NEXT: vst %v0, 0(%r2), 3 -; Z14-NEXT: br %r14 +; Z14: vuplhh %v0, %v24 +; Z14-NEXT: vlgvf %r0, %v0, 3 +; Z14-NEXT: vlgvf %r1, %v0, 2 +; Z14-NEXT: celfbr %f1, 0, %r1, 0 +; Z14-NEXT: vlgvf %r3, %v0, 1 +; Z14-NEXT: celfbr %f2, 0, %r3, 0 +; Z14-NEXT: vlgvf %r4, %v0, 0 +; Z14-NEXT: celfbr %f0, 0, %r0, 0 +; Z14-NEXT: vmrhf %v0, %v1, %v0 +; Z14-NEXT: celfbr %f3, 0, %r4, 0 +; Z14-NEXT: vmrhf %v1, %v3, %v2 +; Z14-NEXT: vmrhg %v0, %v1, %v0 +; Z14-NEXT: vst %v0, 0(%r2), 3 +; Z14-NEXT: br %r14 ; Z15: vuplhh %v0, %v24 ; Z15-NEXT: vcelfb %v0, %v0, 0, 0 diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll index 583561625cfc0..1888ad2169506 100644 --- a/llvm/test/CodeGen/SystemZ/vec-mul-07.ll +++ b/llvm/test/CodeGen/SystemZ/vec-mul-07.ll @@ -192,11 +192,11 @@ define <2 x i64> @f9_not(<4 x i32> %val1, <4 x i32> %val2) { ; CHECK-NEXT: vuplhf %v1, %v26 ; CHECK-NEXT: vlgvg %r0, %v1, 1 ; CHECK-NEXT: vlgvg %r1, %v0, 1 +; CHECK-NEXT: vlgvg %r2, %v1, 0 +; CHECK-NEXT: vlgvg %r3, %v0, 0 ; CHECK-NEXT: msgr %r1, %r0 -; CHECK-NEXT: vlgvg %r0, %v1, 0 -; CHECK-NEXT: vlgvg %r2, %v0, 0 -; CHECK-NEXT: msgr %r2, %r0 -; CHECK-NEXT: vlvgp %v24, %r2, %r1 +; CHECK-NEXT: msgr %r3, %r2 +; CHECK-NEXT: vlvgp %v24, %r3, %r1 ; CHECK-NEXT: br %r14 %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> %zext1 = zext <2 x i32> %shuf1 to <2 x i64> @@ -243,11 +243,11 @@ define <2 x i64> @f11_not(<4 x i32> %val1, <4 x i32> %val2) { ; CHECK-NEXT: vuphf %v1, %v26 ; CHECK-NEXT: vlgvg %r0, %v1, 1 ; CHECK-NEXT: vlgvg %r1, %v0, 1 +; CHECK-NEXT: vlgvg %r2, %v1, 0 +; CHECK-NEXT: vlgvg %r3, %v0, 0 ; CHECK-NEXT: msgr %r1, %r0 -; CHECK-NEXT: vlgvg %r0, %v1, 0 -; CHECK-NEXT: vlgvg %r2, %v0, 0 -; CHECK-NEXT: msgr %r2, %r0 -; CHECK-NEXT: vlvgp %v24, %r2, %r1 +; CHECK-NEXT: msgr %r3, %r2 +; CHECK-NEXT: vlvgp %v24, %r3, %r1 ; CHECK-NEXT: br %r14 %shuf1 = shufflevector <4 x i32> %val1, <4 x i32> poison, <2 x i32> %sext1 = sext <2 x i32> %shuf1 to <2 x i64> diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-12.ll b/llvm/test/CodeGen/SystemZ/vec-perm-12.ll index b59eafaf0768b..5f9f38d7ad038 100644 --- a/llvm/test/CodeGen/SystemZ/vec-perm-12.ll +++ b/llvm/test/CodeGen/SystemZ/vec-perm-12.ll @@ -10,17 +10,17 @@ define <4 x i32> @f1(<4 x i32> %x, i64 %y) { ; CHECK-CODE-LABEL: f1: ; CHECK-CODE: # %bb.0: ; CHECK-CODE-NEXT: larl %r1, .LCPI0_0 -; CHECK-CODE-NEXT: vl %v1, 0(%r1), 3 -; CHECK-CODE-NEXT: vlvgf %v0, %r2, 0 -; CHECK-CODE-NEXT: vperm %v24, %v24, %v0, %v1 +; CHECK-CODE-NEXT: vl %v0, 0(%r1), 3 +; CHECK-CODE-NEXT: vlvgf %v1, %r2, 0 +; CHECK-CODE-NEXT: vperm %v24, %v24, %v1, %v0 ; CHECK-CODE-NEXT: br %r14 ; ; CHECK-VECTOR-LABEL: f1: ; CHECK-VECTOR: # %bb.0: ; CHECK-VECTOR-NEXT: larl %r1, .LCPI0_0 -; CHECK-VECTOR-NEXT: vl %v1, 0(%r1), 3 -; CHECK-VECTOR-NEXT: vlvgf %v0, %r2, 0 -; CHECK-VECTOR-NEXT: vperm %v24, %v24, %v0, %v1 +; CHECK-VECTOR-NEXT: vl %v0, 0(%r1), 3 +; CHECK-VECTOR-NEXT: vlvgf %v1, %r2, 0 +; CHECK-VECTOR-NEXT: vperm %v24, %v24, %v1, %v0 ; CHECK-VECTOR-NEXT: br %r14 diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll index 42d787d945145..e67f2f631bc93 100644 --- a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll +++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i16.ll @@ -23,11 +23,11 @@ define fastcc void @test_shuffle_with_trunc() { ; CHECK: # %bb.0: ; CHECK-NEXT: lh %r1, 0 ; CHECK-NEXT: l %r0, 0 -; CHECK-NEXT: vlvgh %v1, %r1, 0 -; CHECK-NEXT: larl %r1, .LCPI1_0 -; CHECK-NEXT: vl %v2, 0(%r1), 3 -; CHECK-NEXT: vlvgf %v0, %r0, 0 -; CHECK-NEXT: vperm %v0, %v0, %v1, %v2 +; CHECK-NEXT: larl %r2, .LCPI1_0 +; CHECK-NEXT: vl %v0, 0(%r2), 3 +; CHECK-NEXT: vlvgf %v1, %r0, 0 +; CHECK-NEXT: vlvgh %v2, %r1, 0 +; CHECK-NEXT: vperm %v0, %v1, %v2, %v0 ; CHECK-NEXT: vst %v0, 0, 3 ; CHECK-NEXT: br %r14 %1 = load i32, ptr null, align 8 diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index bde3635f48446..baa4c5adea6be 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -76,9 +76,9 @@ define <3 x float> @constrained_vector_fdiv_v3f32() #0 { ; SZ13-LABEL: constrained_vector_fdiv_v3f32: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: larl %r1, .LCPI2_0 +; SZ13-NEXT: larl %r2, .LCPI2_1 ; SZ13-NEXT: lde %f0, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI2_1 -; SZ13-NEXT: lde %f1, 0(%r1) +; SZ13-NEXT: lde %f1, 0(%r2) ; SZ13-NEXT: debr %f1, %f0 ; SZ13-NEXT: vgmf %v2, 2, 8 ; SZ13-NEXT: vgmf %v3, 1, 1 @@ -116,15 +116,15 @@ define void @constrained_vector_fdiv_v3f64(ptr %a) #0 { ; ; SZ13-LABEL: constrained_vector_fdiv_v3f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: larl %r1, .LCPI3_0 -; SZ13-NEXT: ld %f1, 0(%r1) -; SZ13-NEXT: ddb %f1, 16(%r2) ; SZ13-NEXT: larl %r1, .LCPI3_1 ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: std %f1, 16(%r2) -; SZ13-NEXT: vfddb %v0, %v2, %v0 +; SZ13-NEXT: vl %v1, 0(%r1), 3 +; SZ13-NEXT: vfddb %v0, %v1, %v0 +; SZ13-NEXT: larl %r1, .LCPI3_0 +; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: ddb %f2, 16(%r2) ; SZ13-NEXT: vst %v0, 0(%r2), 4 +; SZ13-NEXT: std %f2, 16(%r2) ; SZ13-NEXT: br %r14 entry: %b = load <3 x double>, ptr %a @@ -432,12 +432,13 @@ define void @constrained_vector_frem_v3f64(ptr %a) #0 { ; SZ13-NEXT: vgmg %v0, 1, 1 ; SZ13-NEXT: # kill: def $f2d killed $f2d killed $v2 ; SZ13-NEXT: brasl %r14, fmod@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v1, %v0 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI8_0 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v2, %v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, fmod@PLT ; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload @@ -525,12 +526,13 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 { ; SZ13-NEXT: vgmg %v0, 2, 11 ; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, fmod@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI9_1 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, fmod@PLT ; SZ13-NEXT: larl %r1, .LCPI9_2 @@ -628,15 +630,15 @@ define <3 x float> @constrained_vector_fmul_v3f32() #0 { ; SZ13-LABEL: constrained_vector_fmul_v3f32: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vgmf %v0, 1, 8 +; SZ13-NEXT: vgmf %v1, 2, 8 ; SZ13-NEXT: larl %r1, .LCPI12_0 -; SZ13-NEXT: vgmf %v2, 2, 8 -; SZ13-NEXT: vgmf %v1, 1, 8 -; SZ13-NEXT: meeb %f1, 0(%r1) -; SZ13-NEXT: larl %r1, .LCPI12_1 -; SZ13-NEXT: meebr %f2, %f0 -; SZ13-NEXT: meeb %f0, 0(%r1) -; SZ13-NEXT: vmrhf %v0, %v2, %v0 -; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: larl %r2, .LCPI12_1 +; SZ13-NEXT: vgmf %v2, 1, 8 +; SZ13-NEXT: meeb %f2, 0(%r1) +; SZ13-NEXT: meebr %f1, %f0 +; SZ13-NEXT: meeb %f0, 0(%r2) +; SZ13-NEXT: vmrhf %v0, %v1, %v0 +; SZ13-NEXT: vrepf %v1, %v2, 0 ; SZ13-NEXT: vmrhg %v24, %v0, %v1 ; SZ13-NEXT: br %r14 entry: @@ -666,15 +668,15 @@ define void @constrained_vector_fmul_v3f64(ptr %a) #0 { ; ; SZ13-LABEL: constrained_vector_fmul_v3f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: larl %r1, .LCPI13_0 -; SZ13-NEXT: ld %f1, 0(%r1) ; SZ13-NEXT: larl %r1, .LCPI13_1 ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: mdb %f1, 16(%r2) -; SZ13-NEXT: vfmdb %v0, %v2, %v0 +; SZ13-NEXT: larl %r3, .LCPI13_0 +; SZ13-NEXT: vl %v1, 0(%r1), 3 +; SZ13-NEXT: ld %f2, 0(%r3) +; SZ13-NEXT: mdb %f2, 16(%r2) +; SZ13-NEXT: vfmdb %v0, %v1, %v0 ; SZ13-NEXT: vst %v0, 0(%r2), 4 -; SZ13-NEXT: std %f1, 16(%r2) +; SZ13-NEXT: std %f2, 16(%r2) ; SZ13-NEXT: br %r14 entry: %b = load <3 x double>, ptr %a @@ -799,14 +801,14 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 { ; SZ13-LABEL: constrained_vector_fadd_v3f32: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vgbm %v0, 61440 -; SZ13-NEXT: vgmf %v2, 1, 1 -; SZ13-NEXT: vgmf %v3, 2, 8 -; SZ13-NEXT: lzer %f1 +; SZ13-NEXT: vgmf %v1, 1, 1 +; SZ13-NEXT: vgmf %v2, 2, 8 +; SZ13-NEXT: lzer %f3 +; SZ13-NEXT: aebr %f3, %f0 ; SZ13-NEXT: aebr %f1, %f0 ; SZ13-NEXT: aebr %f2, %f0 -; SZ13-NEXT: aebr %f3, %f0 -; SZ13-NEXT: vmrhf %v0, %v2, %v3 -; SZ13-NEXT: vrepf %v1, %v1, 0 +; SZ13-NEXT: vmrhf %v0, %v1, %v2 +; SZ13-NEXT: vrepf %v1, %v3, 0 ; SZ13-NEXT: vmrhg %v24, %v0, %v1 ; SZ13-NEXT: br %r14 entry: @@ -836,15 +838,15 @@ define void @constrained_vector_fadd_v3f64(ptr %a) #0 { ; ; SZ13-LABEL: constrained_vector_fadd_v3f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: larl %r1, .LCPI18_0 -; SZ13-NEXT: ld %f1, 0(%r1) ; SZ13-NEXT: larl %r1, .LCPI18_1 ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: adb %f1, 16(%r2) -; SZ13-NEXT: vfadb %v0, %v2, %v0 +; SZ13-NEXT: larl %r3, .LCPI18_0 +; SZ13-NEXT: vl %v1, 0(%r1), 3 +; SZ13-NEXT: ld %f2, 0(%r3) +; SZ13-NEXT: adb %f2, 16(%r2) +; SZ13-NEXT: vfadb %v0, %v1, %v0 ; SZ13-NEXT: vst %v0, 0(%r2), 4 -; SZ13-NEXT: std %f1, 16(%r2) +; SZ13-NEXT: std %f2, 16(%r2) ; SZ13-NEXT: br %r14 entry: %b = load <3 x double>, ptr %a @@ -968,17 +970,17 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 { ; ; SZ13-LABEL: constrained_vector_fsub_v3f32: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vgbm %v2, 61440 -; SZ13-NEXT: lzer %f1 -; SZ13-NEXT: sebr %f2, %f1 -; SZ13-NEXT: vgmf %v1, 1, 1 -; SZ13-NEXT: vgbm %v3, 61440 ; SZ13-NEXT: vgbm %v0, 61440 -; SZ13-NEXT: sebr %f3, %f1 -; SZ13-NEXT: vgmf %v1, 2, 8 -; SZ13-NEXT: sebr %f0, %f1 +; SZ13-NEXT: vgbm %v1, 61440 +; SZ13-NEXT: vgmf %v2, 1, 1 +; SZ13-NEXT: vgbm %v3, 61440 +; SZ13-NEXT: vgmf %v4, 2, 8 +; SZ13-NEXT: lzer %f5 +; SZ13-NEXT: sebr %f1, %f5 +; SZ13-NEXT: sebr %f3, %f2 +; SZ13-NEXT: sebr %f0, %f4 ; SZ13-NEXT: vmrhf %v0, %v3, %v0 -; SZ13-NEXT: vrepf %v1, %v2, 0 +; SZ13-NEXT: vrepf %v1, %v1, 0 ; SZ13-NEXT: vmrhg %v24, %v0, %v1 ; SZ13-NEXT: br %r14 entry: @@ -1009,12 +1011,12 @@ define void @constrained_vector_fsub_v3f64(ptr %a) #0 { ; SZ13-LABEL: constrained_vector_fsub_v3f64: ; SZ13: # %bb.0: # %entry ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: vgmg %v2, 12, 10 -; SZ13-NEXT: sdb %f2, 16(%r2) ; SZ13-NEXT: vgmg %v1, 12, 10 -; SZ13-NEXT: vfsdb %v0, %v1, %v0 +; SZ13-NEXT: sdb %f1, 16(%r2) +; SZ13-NEXT: vgmg %v2, 12, 10 +; SZ13-NEXT: vfsdb %v0, %v2, %v0 ; SZ13-NEXT: vst %v0, 0(%r2), 4 -; SZ13-NEXT: std %f2, 16(%r2) +; SZ13-NEXT: std %f1, 16(%r2) ; SZ13-NEXT: br %r14 entry: %b = load <3 x double>, ptr %a @@ -1450,10 +1452,10 @@ define void @constrained_vector_pow_v3f64(ptr %a) #0 { ; SZ13-NEXT: .cfi_offset %f8, -168 ; SZ13-NEXT: .cfi_offset %f9, -176 ; SZ13-NEXT: larl %r1, .LCPI33_0 -; SZ13-NEXT: ld %f9, 0(%r1) +; SZ13-NEXT: ld %f8, 0(%r1) ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: ld %f8, 16(%r2) -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ld %f9, 16(%r2) +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: lgr %r13, %r2 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Spill ; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 @@ -1461,7 +1463,7 @@ define void @constrained_vector_pow_v3f64(ptr %a) #0 { ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: vrepg %v0, %v0, 1 ; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 ; SZ13-NEXT: brasl %r14, pow@PLT @@ -1469,8 +1471,8 @@ define void @constrained_vector_pow_v3f64(ptr %a) #0 { ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vmrhg %v0, %v1, %v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ldr %f0, %f8 -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ldr %f0, %f9 +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, pow@PLT ; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload ; SZ13-NEXT: ld %f8, 200(%r15) # 8-byte Reload @@ -1560,12 +1562,13 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 { ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, pow@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI34_3 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, pow@PLT ; SZ13-NEXT: larl %r1, .LCPI34_4 @@ -1819,12 +1822,13 @@ define void @constrained_vector_powi_v3f64(ptr %a) #0 { ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: lghi %r2, 3 ; SZ13-NEXT: brasl %r14, __powidf2@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI38_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: lghi %r2, 3 ; SZ13-NEXT: brasl %r14, __powidf2@PLT ; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload @@ -1904,12 +1908,13 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 { ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: lghi %r2, 3 ; SZ13-NEXT: brasl %r14, __powidf2@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI39_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: lghi %r2, 3 ; SZ13-NEXT: brasl %r14, __powidf2@PLT ; SZ13-NEXT: larl %r1, .LCPI39_3 @@ -2223,12 +2228,13 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, sin@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI44_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, sin@PLT ; SZ13-NEXT: larl %r1, .LCPI44_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -2539,12 +2545,13 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, cos@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI49_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, cos@PLT ; SZ13-NEXT: larl %r1, .LCPI49_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -2855,12 +2862,13 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, exp@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI54_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, exp@PLT ; SZ13-NEXT: larl %r1, .LCPI54_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -3171,12 +3179,13 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, exp2@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI59_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, exp2@PLT ; SZ13-NEXT: larl %r1, .LCPI59_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -3487,12 +3496,13 @@ define <4 x double> @constrained_vector_log_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, log@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI64_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, log@PLT ; SZ13-NEXT: larl %r1, .LCPI64_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -3803,12 +3813,13 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, log10@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI69_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, log10@PLT ; SZ13-NEXT: larl %r1, .LCPI69_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -4119,12 +4130,13 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, log2@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI74_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, log2@PLT ; SZ13-NEXT: larl %r1, .LCPI74_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 @@ -4765,8 +4777,8 @@ define void @constrained_vector_log10_maxnum_v3f64(ptr %a) #0 { ; SZ13-NEXT: larl %r1, .LCPI88_1 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload ; SZ13-NEXT: ld %f2, 0(%r1) +; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload ; SZ13-NEXT: vrepg %v0, %v0, 1 ; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 ; SZ13-NEXT: brasl %r14, fmax@PLT @@ -5133,10 +5145,10 @@ define void @constrained_vector_minnum_v3f64(ptr %a) #0 { ; SZ13-NEXT: .cfi_offset %f8, -168 ; SZ13-NEXT: .cfi_offset %f9, -176 ; SZ13-NEXT: larl %r1, .LCPI93_0 -; SZ13-NEXT: ld %f9, 0(%r1) +; SZ13-NEXT: ld %f8, 0(%r1) ; SZ13-NEXT: vl %v0, 0(%r2), 4 -; SZ13-NEXT: ld %f8, 16(%r2) -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ld %f9, 16(%r2) +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: lgr %r13, %r2 ; SZ13-NEXT: vst %v0, 176(%r15), 3 # 16-byte Spill ; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 @@ -5144,7 +5156,7 @@ define void @constrained_vector_minnum_v3f64(ptr %a) #0 { ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: vl %v0, 176(%r15), 3 # 16-byte Reload -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: vrepg %v0, %v0, 1 ; SZ13-NEXT: # kill: def $f0d killed $f0d killed $v0 ; SZ13-NEXT: brasl %r14, fmin@PLT @@ -5152,8 +5164,8 @@ define void @constrained_vector_minnum_v3f64(ptr %a) #0 { ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 ; SZ13-NEXT: vmrhg %v0, %v1, %v0 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ldr %f0, %f8 -; SZ13-NEXT: ldr %f2, %f9 +; SZ13-NEXT: ldr %f0, %f9 +; SZ13-NEXT: ldr %f2, %f8 ; SZ13-NEXT: brasl %r14, fmin@PLT ; SZ13-NEXT: mvc 0(16,%r13), 160(%r15) # 16-byte Folded Reload ; SZ13-NEXT: ld %f8, 200(%r15) # 8-byte Reload @@ -5344,15 +5356,15 @@ define void @constrained_vector_fptrunc_v3f64(ptr %src, ptr %dest) #0 { ; ; SZ13-LABEL: constrained_vector_fptrunc_v3f64: ; SZ13: # %bb.0: # %entry -; SZ13-NEXT: vl %v1, 0(%r2), 4 -; SZ13-NEXT: ld %f0, 16(%r2) -; SZ13-NEXT: vledb %v1, %v1, 0, 0 +; SZ13-NEXT: vl %v0, 0(%r2), 4 +; SZ13-NEXT: ld %f1, 16(%r2) +; SZ13-NEXT: vledb %v0, %v0, 0, 0 ; SZ13-NEXT: larl %r1, .LCPI97_0 -; SZ13-NEXT: ledbra %f0, 0, %f0, 0 +; SZ13-NEXT: ledbra %f1, 0, %f1, 0 ; SZ13-NEXT: vl %v2, 0(%r1), 3 -; SZ13-NEXT: vperm %v1, %v1, %v1, %v2 -; SZ13-NEXT: ste %f0, 8(%r3) -; SZ13-NEXT: vsteg %v1, 0(%r3), 0 +; SZ13-NEXT: vperm %v0, %v0, %v0, %v2 +; SZ13-NEXT: ste %f1, 8(%r3) +; SZ13-NEXT: vsteg %v0, 0(%r3), 0 ; SZ13-NEXT: br %r14 entry: %b = load <3 x double>, ptr %src @@ -6673,12 +6685,13 @@ define <4 x double> @constrained_vector_tan_v4f64() #0 { ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill ; SZ13-NEXT: ld %f0, 0(%r1) ; SZ13-NEXT: brasl %r14, tan@PLT -; SZ13-NEXT: vl %v1, 160(%r15), 3 # 16-byte Reload -; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 -; SZ13-NEXT: vmrhg %v0, %v0, %v1 +; SZ13-NEXT: vl %v2, 160(%r15), 3 # 16-byte Reload ; SZ13-NEXT: larl %r1, .LCPI127_2 +; SZ13-NEXT: ld %f1, 0(%r1) +; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 +; SZ13-NEXT: vmrhg %v0, %v0, %v2 ; SZ13-NEXT: vst %v0, 160(%r15), 3 # 16-byte Spill -; SZ13-NEXT: ld %f0, 0(%r1) +; SZ13-NEXT: ldr %f0, %f1 ; SZ13-NEXT: brasl %r14, tan@PLT ; SZ13-NEXT: larl %r1, .LCPI127_3 ; SZ13-NEXT: # kill: def $f0d killed $f0d def $v0 From 24d638a914cace6a4f6ea57219641a9618278aa2 Mon Sep 17 00:00:00 2001 From: adream307 Date: Tue, 23 Jun 2026 19:07:48 +0800 Subject: [PATCH 161/511] [clang][X86] Add constexpr support for mpsadbw128/256 intrinsics (#202257) Enable constexpr evaluation for `_mm_mpsadbw_epu8` and `_mm256_mpsadbw_epu8` (`__builtin_ia32_mpsadbw128`/`mpsadbw256`). Fixes #157522. --- clang/include/clang/Basic/BuiltinsX86.td | 7 +-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 54 ++++++++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 43 +++++++++++++++++++ clang/test/CodeGen/X86/avx2-builtins.c | 19 +++++++++ clang/test/CodeGen/X86/sse41-builtins.c | 11 +++++ 5 files changed, 131 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index f0112a2db0f1d..b0f95d98b8471 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -315,10 +315,11 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">; def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2,double>, _Constant char)">; - def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; } let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def mpsadbw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; + def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; @@ -574,14 +575,14 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; - def psadbw256 : X86Builtin< "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def mpsadbw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; + def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">; def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long " diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index ab94bb96829ac..e59d14db896a2 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3019,6 +3019,56 @@ static bool interp__builtin_ia32_dbpsadbw(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_mpsadbw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 3); + uint64_t Imm; + if (!popToUInt64(S, Call->getArg(2), Imm)) + return false; + + const Pointer &Src2 = S.Stk.pop(); + const Pointer &Src1 = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + const auto *SrcVT = Call->getArg(0)->getType()->castAs(); + PrimType SrcElemT = *S.getContext().classify(SrcVT->getElementType()); + unsigned SourceLen = SrcVT->getNumElements(); + assert((SourceLen == 16 || SourceLen == 32) && + "MPSADBW operates on 128-bit or 256-bit vectors"); + + const auto *DestVT = Call->getType()->castAs(); + PrimType DestElemT = *S.getContext().classify(DestVT->getElementType()); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + constexpr unsigned LaneSize = 16; // 128-bit lane = 16 bytes + unsigned NumLanes = SourceLen / LaneSize; + + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + unsigned Ctrl = (Imm >> (3 * Lane)) & 0x7; + unsigned AOff = ((Ctrl >> 2) & 1) * 4; + unsigned BOff = (Ctrl & 3) * 4; + for (unsigned J = 0; J != 8; ++J) { + uint16_t Sad = 0; + for (unsigned K = 0; K != 4; ++K) { + uint8_t A, B; + INT_TYPE_SWITCH_NO_BOOL(SrcElemT, { + A = static_cast( + Src1.elem(Lane * LaneSize + AOff + J + K)); + B = static_cast(Src2.elem(Lane * LaneSize + BOff + K)); + }); + Sad += (A > B) ? (A - B) : (B - A); + } + INT_TYPE_SWITCH_NO_BOOL(DestElemT, { + Dst.elem(Lane * 8 + J) = + static_cast(APSInt(APInt(16, Sad), DestUnsigned)); + }); + } + } + + Dst.initializeAllElements(); + return true; +} + static bool interp_builtin_horizontal_int_binop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref Fn) { @@ -5377,6 +5427,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_dbpsadbw512: return interp__builtin_ia32_dbpsadbw(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_mpsadbw128: + case clang::X86::BI__builtin_ia32_mpsadbw256: + return interp__builtin_ia32_mpsadbw(S, OpPC, Call); + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 220a7a22c6992..8efceff7e8c31 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12738,6 +12738,49 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case clang::X86::BI__builtin_ia32_mpsadbw128: + case clang::X86::BI__builtin_ia32_mpsadbw256: { + APValue SourceA, SourceB; + APSInt SourceImm; + if (!EvaluateVector(E->getArg(0), SourceA, Info) || + !EvaluateVector(E->getArg(1), SourceB, Info) || + !EvaluateInteger(E->getArg(2), SourceImm, Info)) + return false; + unsigned SourceLen = SourceA.getVectorLength(); + constexpr unsigned LaneSize = 16; + assert((SourceLen == LaneSize || SourceLen == 2 * LaneSize) && + "MPSADBW operates on 128-bit or 256-bit vectors"); + unsigned NumLanes = SourceLen / LaneSize; + unsigned Imm = SourceImm.getZExtValue(); + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + bool DestUnsigned = DestEltTy->isUnsignedIntegerOrEnumerationType(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen / 2); + + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + unsigned Ctrl = (Imm >> (3 * Lane)) & 0x7; + unsigned AOff = ((Ctrl >> 2) & 1) * 4; + unsigned BOff = (Ctrl & 3) * 4; + for (unsigned J = 0; J != 8; ++J) { + uint16_t Sad = 0; + for (unsigned K = 0; K != 4; ++K) { + uint8_t A = static_cast( + SourceA.getVectorElt(Lane * LaneSize + AOff + J + K) + .getInt() + .getZExtValue()); + uint8_t B = static_cast( + SourceB.getVectorElt(Lane * LaneSize + BOff + K) + .getInt() + .getZExtValue()); + Sad += (A > B) ? (A - B) : (B - A); + } + ResultElements.push_back(APValue(APSInt(APInt(16, Sad), DestUnsigned))); + } + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case clang::X86::BI__builtin_ia32_pmulhuw128: case clang::X86::BI__builtin_ia32_pmulhuw256: case clang::X86::BI__builtin_ia32_pmulhuw512: diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index de3d92ea1c6cc..cb14d1aafedde 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1009,6 +1009,25 @@ __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { // CHECK: call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}, i8 3) return _mm256_mpsadbw_epu8(x, y, 3); } +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 0), 353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 1), 415,406,396,384,372,360,344,330,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 2), 517,508,498,486,474,462,446,432,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 3), 603,594,584,572,560,548,532,518,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 4), 310,298,282,268,250,232,218,202,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 5), 372,360,344,330,312,294,280,264,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 6), 474,462,446,432,414,396,382,366,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 7), 560,548,532,518,500,482,468,452,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 0<<3), 353,344,334,322,310,298,282,268,442,428,410,394,376,352,330,310)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 1<<3), 353,344,334,322,310,298,282,268,522,508,490,474,456,432,410,390)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 2<<3), 353,344,334,322,310,298,282,268,632,618,600,584,566,542,520,500)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 3<<3), 353,344,334,322,310,298,282,268,706,692,674,658,640,616,594,574)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 4<<3), 353,344,334,322,310,298,282,268,376,352,330,310,292,280,268,244)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 5<<3), 353,344,334,322,310,298,282,268,456,432,410,390,372,360,348,324)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 6<<3), 353,344,334,322,310,298,282,268,566,542,520,500,482,470,458,434)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97,101,103,107,109,113,127,131}), ((__m256i)(__v32qu){83,89,97,101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 7<<3), 353,344,334,322,310,298,282,268,640,616,594,574,556,544,532,508)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), 0x3F), 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), ((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), 0x3F), 1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020)); +TEST_CONSTEXPR(match_v16hu(_mm256_mpsadbw_epu8(((__m256i)(__v32qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((__m256i)(__v32qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), 0x3F), 1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020,1020)); __m256i test_mm256_mul_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_mul_epi32 diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 1be1aa71de737..a12fc724f1398 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -402,6 +402,17 @@ __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { // CHECK: call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 1) return _mm_mpsadbw_epu8(x, y, 1); } +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 0), 640,628,612,598,580,562,548,532)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 1), 720,708,692,678,660,642,628,612)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 2), 830,818,802,788,770,752,738,722)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 3), 904,892,876,862,844,826,812,796)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 4), 580,562,548,532,516,498,480,460)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 5), 660,642,628,612,596,578,560,540)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 6), 770,752,738,722,706,688,670,650)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71}), ((__m128i)(__v16qu){167,173,179,181,191,193,197,199,211,223,227,229,233,239,241,251}), 7), 844,826,812,796,780,762,744,724)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), 7), 0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), ((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), 7), 1020,1020,1020,1020,1020,1020,1020,1020)); +TEST_CONSTEXPR(match_v8hu(_mm_mpsadbw_epu8(((__m128i)(__v16qu){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((__m128i)(__v16qu){255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255}), 7), 1020,1020,1020,1020,1020,1020,1020,1020)); __m128i test_mm_mul_epi32(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mul_epi32 From 7020c142f15b4cd590a0fd3d89dcdf6f914329ed Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 23 Jun 2026 13:13:33 +0200 Subject: [PATCH 162/511] AMDGPU: Rename AMDGPUTargetID to TargetID (#205269) The AMDGPU prefix is redundant with the namespace. Co-Authored-By: Claude --- .../llvm/TargetParser/AMDGPUTargetParser.h | 24 +++++++------------ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 2 +- .../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 4 ++-- .../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 8 +++---- .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 16 ++++++------- llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 ++-- .../MCTargetDesc/AMDGPUTargetStreamer.h | 6 ++--- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 18 +++++++------- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 8 +++---- llvm/lib/TargetParser/AMDGPUTargetParser.cpp | 19 +++++++-------- 10 files changed, 50 insertions(+), 59 deletions(-) diff --git a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h index b35128ba926e3..1376f0942852d 100644 --- a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h +++ b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.h @@ -108,7 +108,7 @@ fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, StringMap &Features); enum class TargetIDSetting { Unsupported, Any, Off, On }; -class LLVM_ABI AMDGPUTargetID { +class LLVM_ABI TargetID { private: GPUKind Arch; std::string TargetTripleString; @@ -117,10 +117,10 @@ class LLVM_ABI AMDGPUTargetID { bool IsAMDHSA; public: - AMDGPUTargetID(GPUKind Arch, const Triple &TT, TargetIDSetting XnackSetting, - TargetIDSetting SramEccSetting); + TargetID(GPUKind Arch, const Triple &TT, TargetIDSetting XnackSetting, + TargetIDSetting SramEccSetting); - ~AMDGPUTargetID() = default; + ~TargetID() = default; /// \return True if the current xnack setting is not "Unsupported". bool isXnackSupported() const { @@ -185,26 +185,18 @@ class LLVM_ABI AMDGPUTargetID { /// \returns True if this is an AMDHSA target. bool isAMDHSA() const { return IsAMDHSA; } - /// Parse a target ID directive string (e.g., - /// "amdgcn-amd-amdhsa--gfx1010:xnack-") and return an AMDGPUTargetID. - /// \returns AMDGPUTargetID or std::nullopt if malformed. - static std::optional + static std::optional parseTargetIDString(StringRef TargetIDDirective); - /// Write string representation to \p OS void print(raw_ostream &OS) const; - /// \returns String representation of an object. std::string toString() const; - bool operator==(const AMDGPUTargetID &Other) const; - bool operator!=(const AMDGPUTargetID &Other) const { - return !(*this == Other); - } + bool operator==(const TargetID &Other) const; + bool operator!=(const TargetID &Other) const { return !(*this == Other); } }; -inline raw_ostream &operator<<(raw_ostream &OS, - const AMDGPUTargetID &TargetID) { +inline raw_ostream &operator<<(raw_ostream &OS, const TargetID &TargetID) { TargetID.print(OS); return OS; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index a08f23f0f768a..3d191125aac47 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -1212,7 +1212,7 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) { break; const GCNSubtarget &STM = TM.getSubtarget(F); - const AMDGPUTargetID &STMTargetID = STM.getTargetID(); + const AMDGPU::TargetID &STMTargetID = STM.getTargetID(); if (TSTargetID->isXnackSupported()) if (TSTargetID->getXnackSetting() == AMDGPU::TargetIDSetting::Any) TSTargetID->setXnackSetting(STMTargetID.getXnackSetting()); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp index 78fe4df844714..793554e9bb432 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp @@ -215,7 +215,7 @@ void MetadataStreamerMsgPackV4::emitVersion() { getRootMetadata("amdhsa.version") = Version; } -void MetadataStreamerMsgPackV4::emitTargetID(const AMDGPUTargetID &TargetID) { +void MetadataStreamerMsgPackV4::emitTargetID(const TargetID &TargetID) { getRootMetadata("amdhsa.target") = HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true); } @@ -558,7 +558,7 @@ bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) { } void MetadataStreamerMsgPackV4::begin(const Module &Mod, - const AMDGPUTargetID &TargetID) { + const TargetID &TargetID) { emitVersion(); emitTargetID(TargetID); emitPrintf(Mod); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h index f2cd8dd5882f8..f4b4eb97ec750 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h @@ -36,7 +36,7 @@ class Type; namespace AMDGPU { -class AMDGPUTargetID; +class TargetID; namespace HSAMD { @@ -46,7 +46,7 @@ class MetadataStreamer { virtual bool emitTo(AMDGPUTargetStreamer &TargetStreamer) = 0; - virtual void begin(const Module &Mod, const AMDGPUTargetID &TargetID) = 0; + virtual void begin(const Module &Mod, const TargetID &TargetID) = 0; virtual void end() = 0; @@ -93,7 +93,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4 void emitVersion() override; - void emitTargetID(const AMDGPUTargetID &TargetID); + void emitTargetID(const TargetID &TargetID); void emitPrintf(const Module &Mod); @@ -132,7 +132,7 @@ class LLVM_EXTERNAL_VISIBILITY MetadataStreamerMsgPackV4 bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override; - void begin(const Module &Mod, const AMDGPUTargetID &TargetID) override; + void begin(const Module &Mod, const TargetID &TargetID) override; void end() override; diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index b5a5285811da0..161395fffca77 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -5980,13 +5980,13 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { if (getParser().parseEscapedString(TargetIDDirective)) return true; - std::optional MaybeParsed = - AMDGPU::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + std::optional MaybeParsed = + AMDGPU::TargetID::parseTargetIDString(TargetIDDirective); if (!MaybeParsed) return getParser().Error(TargetStart, "malformed target ID"); - const AMDGPU::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; - const std::optional &CurrentTargetID = + const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = getTargetStreamer().getTargetID(); if (*CurrentTargetID != ParsedTargetID) { @@ -6692,13 +6692,13 @@ bool AMDGPUAsmParser::ParseDirectiveISAVersion() { StringRef TargetIDDirective = getLexer().getTok().getStringContents(); - std::optional MaybeParsed = - AMDGPU::AMDGPUTargetID::parseTargetIDString(TargetIDDirective); + std::optional MaybeParsed = + AMDGPU::TargetID::parseTargetIDString(TargetIDDirective); if (!MaybeParsed) return Error(getParser().getTok().getLoc(), "malformed target id"); - const AMDGPU::AMDGPUTargetID &ParsedTargetID = *MaybeParsed; - const std::optional &CurrentTargetID = + const AMDGPU::TargetID &ParsedTargetID = *MaybeParsed; + const std::optional &CurrentTargetID = getTargetStreamer().getTargetID(); if (*CurrentTargetID != ParsedTargetID) { diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 1ab883dea24ec..a0938564a9250 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -71,7 +71,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, protected: // Basic subtarget description. - AMDGPU::AMDGPUTargetID TargetID; + AMDGPU::TargetID TargetID; unsigned Gen = INVALID; InstrItineraryData InstrItins; int LDSBankCount = 0; @@ -157,7 +157,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, return RegBankInfo.get(); } - const AMDGPU::AMDGPUTargetID &getTargetID() const { return TargetID; } + const AMDGPU::TargetID &getTargetID() const { return TargetID; } const InstrItineraryData *getInstrItineraryData() const override { return &InstrItins; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h index c8135276b8c5f..c42ec5c2683cf 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h @@ -57,7 +57,7 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { protected: // TODO: Move HSAMetadataStream to AMDGPUTargetStreamer. - std::optional TargetID; + std::optional TargetID; unsigned CodeObjectVersion; MCContext &getContext() const { return Streamer.getContext(); } @@ -133,10 +133,10 @@ class AMDGPUTargetStreamer : public MCTargetStreamer { static StringRef getArchNameFromElfMach(unsigned ElfMach); static unsigned getElfMach(StringRef GPU); - const std::optional &getTargetID() const { + const std::optional &getTargetID() const { return TargetID; } - std::optional &getTargetID() { return TargetID; } + std::optional &getTargetID() { return TargetID; } void initializeTargetID(const MCSubtargetInfo &STI, StringRef FeatureString) { assert(TargetID == std::nullopt && "TargetID can only be initialized once"); TargetID = AMDGPU::createAMDGPUTargetID(STI, FeatureString); diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index efcb8d18bc503..96571dd028b14 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1097,15 +1097,15 @@ VOPD::InstInfo getVOPDInstInfo(unsigned VOPDOpcode, return VOPD::InstInfo(OpXInfo, OpYInfo); } -AMDGPUTargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, - StringRef FeatureString) { - AMDGPUTargetID TargetID(parseArchAMDGCN(STI.getCPU()), STI.getTargetTriple(), - STI.getFeatureBits().test(FeatureSupportsXNACK) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported, - STI.getFeatureBits().test(FeatureSupportsSRAMECC) - ? TargetIDSetting::Any - : TargetIDSetting::Unsupported); +TargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, + StringRef FeatureString) { + TargetID TargetID(parseArchAMDGCN(STI.getCPU()), STI.getTargetTriple(), + STI.getFeatureBits().test(FeatureSupportsXNACK) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported, + STI.getFeatureBits().test(FeatureSupportsSRAMECC) + ? TargetIDSetting::Any + : TargetIDSetting::Unsupported); // Check if xnack or sramecc is explicitly enabled or disabled. In the // absence of the target features we assume we must generate code that can run diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 1fb0f6b1dbc30..1623dc72d2810 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -144,12 +144,12 @@ struct WMMAInstInfo { #include "AMDGPUGenSearchableTables.inc" using TargetIDSetting = AMDGPU::TargetIDSetting; -using AMDGPUTargetID = AMDGPU::AMDGPUTargetID; +using TargetID = AMDGPU::TargetID; -/// Construct AMDGPUTargetID from MCSubtargetInfo. \p FeatureString is used to +/// Construct TargetID from MCSubtargetInfo. \p FeatureString is used to /// determine explicitly requested xnack/sramecc settings. -AMDGPUTargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, - StringRef FeatureString); +TargetID createAMDGPUTargetID(const MCSubtargetInfo &STI, + StringRef FeatureString); namespace IsaInfo { diff --git a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp index fe6a62a9e9e97..5820d0c148ce6 100644 --- a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp +++ b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp @@ -673,9 +673,8 @@ AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T, return {NO_ERROR, StringRef()}; } -AMDGPUTargetID::AMDGPUTargetID(GPUKind Arch, const Triple &TT, - TargetIDSetting XnackSetting, - TargetIDSetting SramEccSetting) +TargetID::TargetID(GPUKind Arch, const Triple &TT, TargetIDSetting XnackSetting, + TargetIDSetting SramEccSetting) : Arch(Arch), TargetTripleString(TT.normalize(Triple::CanonicalForm::FOUR_IDENT)), XnackSetting(XnackSetting), SramEccSetting(SramEccSetting), @@ -691,7 +690,7 @@ getTargetIDSettingFromFeatureString(StringRef FeatureString) { llvm_unreachable("Malformed feature string"); } -void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { +void TargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { SmallVector TargetIDSplit; TargetID.split(TargetIDSplit, ':'); @@ -703,8 +702,8 @@ void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { } } -std::optional -AMDGPUTargetID::parseTargetIDString(StringRef TargetIDDirective) { +std::optional +TargetID::parseTargetIDString(StringRef TargetIDDirective) { // Split on '-' to get arch-vendor-os-environment-processor:features // There is a single dash separator after the 4-component triple SmallVector Parts; @@ -742,10 +741,10 @@ AMDGPUTargetID::parseTargetIDString(StringRef TargetIDDirective) { SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); } - return AMDGPUTargetID(Arch, TT, XnackSetting, SramEccSetting); + return TargetID(Arch, TT, XnackSetting, SramEccSetting); } -void AMDGPUTargetID::print(raw_ostream &StreamRep) const { +void TargetID::print(raw_ostream &StreamRep) const { StreamRep << TargetTripleString << '-' << getArchNameAMDGCN(Arch); if (IsAMDHSA) { @@ -763,14 +762,14 @@ void AMDGPUTargetID::print(raw_ostream &StreamRep) const { } } -std::string AMDGPUTargetID::toString() const { +std::string TargetID::toString() const { std::string Str; raw_string_ostream OS(Str); OS << *this; return Str; } -bool AMDGPUTargetID::operator==(const AMDGPUTargetID &Other) const { +bool TargetID::operator==(const TargetID &Other) const { return Arch == Other.Arch && XnackSetting == Other.XnackSetting && SramEccSetting == Other.SramEccSetting && IsAMDHSA == Other.IsAMDHSA && TargetTripleString == Other.TargetTripleString; From a15c36fb34ddc7bccf81e9f5ad68b1ac8d32dc1d Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 23 Jun 2026 12:18:37 +0100 Subject: [PATCH 163/511] [LLVM][ConstantFP] Replace uses of isExactlyValue(+/-1.0) with isOne/isMinusOne. (#204333) --- clang/lib/AST/ByteCode/Floating.h | 4 +-- llvm/include/llvm/ADT/APFloat.h | 16 +++++++++++ llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 6 ++++ llvm/include/llvm/IR/Constants.h | 6 ++++ llvm/lib/Analysis/ConstantFolding.cpp | 6 ++-- .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 2 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 28 +++++++++---------- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 +++--- .../lib/Target/AArch64/AArch64InstrFormats.td | 2 +- .../Target/AMDGPU/AMDGPUCodeGenPrepare.cpp | 7 ++--- llvm/lib/Target/AMDGPU/AMDGPUInstructions.td | 2 +- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 8 +++--- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 4 +-- .../Target/AMDGPU/AMDGPURegBankCombiner.cpp | 6 ++-- llvm/lib/Target/AMDGPU/R600ISelLowering.cpp | 4 +-- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 17 ++++++----- .../LoongArch/LoongArchISelLowering.cpp | 4 +-- .../Target/LoongArch/LoongArchInstrInfo.td | 2 +- llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 4 +-- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++-- llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp | 2 +- llvm/lib/Target/X86/X86InstrFragments.td | 4 +-- llvm/lib/Transforms/Scalar/Reassociate.cpp | 2 +- .../llvm-reduce/deltas/ReduceOperands.cpp | 3 +- llvm/unittests/ADT/APFloatTest.cpp | 26 +++++++++++++++++ llvm/unittests/IR/MDBuilderTest.cpp | 2 +- 26 files changed, 116 insertions(+), 65 deletions(-) diff --git a/clang/lib/AST/ByteCode/Floating.h b/clang/lib/AST/ByteCode/Floating.h index cc918dc12deb6..f848538a73b90 100644 --- a/clang/lib/AST/ByteCode/Floating.h +++ b/clang/lib/AST/ByteCode/Floating.h @@ -144,7 +144,7 @@ class Floating final { bool isZero() const { return getValue().isZero(); } bool isNonZero() const { return getValue().isNonZero(); } bool isMin() const { return getValue().isSmallest(); } - bool isMinusOne() const { return getValue().isExactlyValue(-1.0); } + bool isMinusOne() const { return getValue().isMinusOne(); } bool isNan() const { return getValue().isNaN(); } bool isSignaling() const { return getValue().isSignaling(); } bool isInf() const { return getValue().isInfinity(); } @@ -166,7 +166,7 @@ class Floating final { case llvm::APFloatBase::cmpUnordered: return ComparisonCategoryResult::Unordered; } - llvm_unreachable("Inavlid cmpResult value"); + llvm_unreachable("Invalid cmpResult value"); } static APFloat::opStatus fromIntegral(APSInt Val, diff --git a/llvm/include/llvm/ADT/APFloat.h b/llvm/include/llvm/ADT/APFloat.h index f76c1fd7e04f6..91bfc425a0ab0 100644 --- a/llvm/include/llvm/ADT/APFloat.h +++ b/llvm/include/llvm/ADT/APFloat.h @@ -1608,6 +1608,22 @@ class APFloat : public APFloatBase { return isNegative() ? INT_MIN : getExactLog2Abs(); } + // Returns true if this value is exactly 2^N. + LLVM_READONLY + bool isPowerOf2(int N) const { return N != INT_MIN && getExactLog2() == N; } + + // Returns true if this value is exactly -(2^N). + LLVM_READONLY + bool isNegPowerOf2(int N) const { + return N != INT_MIN && isNegative() && getExactLog2Abs() == N; + } + + // Returns true if this value is exactly +1.0. + LLVM_READONLY bool isOne() const { return isPowerOf2(0); } + + // Returns true if this value is exactly -1.0. + LLVM_READONLY bool isMinusOne() const { return isNegPowerOf2(0); } + LLVM_ABI friend hash_code hash_value(const APFloat &Arg); friend int ilogb(const APFloat &Arg); friend APFloat scalbn(APFloat X, int Exp, roundingMode RM); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 6292dcd97fc8d..b280bd48c94e9 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -1895,6 +1895,12 @@ class ConstantFPSDNode : public SDNode { /// Return true if the value is negative. bool isNegative() const { return Value->isNegative(); } + /// Returns true if this value is exactly +1.0. + bool isOne() const { return Value->isOne(); } + + /// Returns true if this value is exactly -1.0. + bool isMinusOne() const { return Value->isMinusOne(); } + /// We don't rely on operator== working on double values, as /// it returns true for things that are clearly not equal, like -0.0 and 0.0. /// As such, this method can be used to do an exact bit-for-bit comparison of diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h index d1a65db0af31f..d5299aebd16fe 100644 --- a/llvm/include/llvm/IR/Constants.h +++ b/llvm/include/llvm/IR/Constants.h @@ -481,6 +481,12 @@ class ConstantFP final : public ConstantData { /// Return true if the value is a NaN. bool isNaN() const { return Val.isNaN(); } + /// Returns true if this value is exactly +1.0. + bool isOne() const { return Val.isOne(); } + + /// Returns true if this value is exactly -1.0. + bool isMinusOne() const { return Val.isMinusOne(); } + /// We don't rely on operator== working on double values, as it returns true /// for things that are clearly not equal, like -0.0 and 0.0. /// As such, this method can be used to do an exact bit-for-bit comparison of diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 3f663bc61243d..35becaa728ba5 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -2849,7 +2849,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::getInfinity(Ty, true); if (U.isNegative()) return ConstantFP::getNaN(Ty); - if (U.isExactlyValue(1.0)) + if (U.isOne()) return ConstantFP::getZero(Ty); return ConstantFoldFP(log, APF, Ty); case Intrinsic::log2: @@ -2857,7 +2857,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::getInfinity(Ty, true); if (U.isNegative()) return ConstantFP::getNaN(Ty); - if (U.isExactlyValue(1.0)) + if (U.isOne()) return ConstantFP::getZero(Ty); // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log2, APF, Ty); @@ -2866,7 +2866,7 @@ static Constant *ConstantFoldScalarCall1(StringRef Name, return ConstantFP::getInfinity(Ty, true); if (U.isNegative()) return ConstantFP::getNaN(Ty); - if (U.isExactlyValue(1.0)) + if (U.isOne()) return ConstantFP::getZero(Ty); // TODO: What about hosts that lack a C99 library? return ConstantFoldFP(log10, APF, Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 8853a941e137f..b4809c95f872c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -7032,7 +7032,7 @@ bool CombinerHelper::matchRepeatedFPDivisor( auto IsOne = [this](Register X) { auto N0CFP = isConstantOrConstantSplatVectorFP(*MRI.getVRegDef(X), MRI); - return N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)); + return N0CFP && (N0CFP->isOne() || N0CFP->isMinusOne()); }; // Skip if current node is a reciprocal/fneg-reciprocal. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4fdef7d4afb5d..6c583c87b5b0f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18787,10 +18787,10 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { auto FuseFADD = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) { if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) { - if (C->isExactlyValue(+1.0)) + if (C->isOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); - if (C->isExactlyValue(-1.0)) + if (C->isMinusOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); } @@ -18810,20 +18810,20 @@ SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) { auto FuseFSUB = [&](SDValue X, SDValue Y) { if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) { if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) { - if (C0->isExactlyValue(+1.0)) + if (C0->isOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, Y); - if (C0->isExactlyValue(-1.0)) + if (C0->isMinusOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); } if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) { - if (C1->isExactlyValue(+1.0)) + if (C1->isOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, DAG.getNode(ISD::FNEG, SL, VT, Y)); - if (C1->isExactlyValue(-1.0)) + if (C1->isMinusOne()) return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y); } @@ -19306,7 +19306,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return DAG.getNode(ISD::FADD, DL, VT, N0, N0); // fold (fmul X, -1.0) -> (fsub -0.0, X) - if (N1CFP && N1CFP->isExactlyValue(-1.0)) { + if (N1CFP && N1CFP->isMinusOne()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { return DAG.getNode(ISD::FSUB, DL, VT, DAG.getConstantFP(-0.0, DL, VT), N0, Flags); @@ -19362,11 +19362,11 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { case ISD::SETUGE: case ISD::SETGT: case ISD::SETGE: - if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) && + if (TrueOpnd->isMinusOne() && FalseOpnd->isOne() && TLI.isOperationLegal(ISD::FNEG, VT)) return DAG.getNode(ISD::FNEG, DL, VT, DAG.getNode(ISD::FABS, DL, VT, X)); - if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0)) + if (TrueOpnd->isOne() && FalseOpnd->isMinusOne()) return DAG.getNode(ISD::FABS, DL, VT, X); break; @@ -19430,9 +19430,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } } - if (N0CFP && N0CFP->isExactlyValue(1.0)) + if (N0CFP && N0CFP->isOne()) return DAG.getNode(ISD::FADD, DL, VT, N1, N2); - if (N1CFP && N1CFP->isExactlyValue(1.0)) + if (N1CFP && N1CFP->isOne()) return DAG.getNode(ISD::FADD, DL, VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) @@ -19462,10 +19462,10 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { // (fma x, -1, y) -> (fadd (fneg x), y) if (N1CFP) { - if (N1CFP->isExactlyValue(1.0)) + if (N1CFP->isOne()) return DAG.getNode(ISD::FADD, DL, VT, N0, N2); - if (N1CFP->isExactlyValue(-1.0) && + if (N1CFP->isMinusOne() && (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0); AddToWorklist(RHSNeg.getNode()); @@ -19554,7 +19554,7 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) { // Skip if current node is a reciprocal/fneg-reciprocal. SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true); - if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0))) + if (N0CFP && (N0CFP->isOne() || N0CFP->isMinusOne())) return SDValue(); // Exit early if the target does not want this transform or if there can't diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 76556531ada42..853d0712dd7c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11689,7 +11689,7 @@ SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y, // X * 1.0 --> X // X / 1.0 --> X if (Opcode == ISD::FMUL || Opcode == ISD::FDIV) - if (YC->getValueAPF().isExactlyValue(1.0)) + if (YC->getValueAPF().isOne()) return X; // X * 0.0 --> 0.0 @@ -13706,9 +13706,9 @@ bool SelectionDAG::isIdentityElement(unsigned Opcode, SDNodeFlags Flags, return OperandNo == 1 && ConstFP->isZero() && (Flags.hasNoSignedZeros() || !ConstFP->isNegative()); case ISD::FMUL: - return ConstFP->isExactlyValue(1.0); + return ConstFP->isOne(); case ISD::FDIV: - return OperandNo == 1 && ConstFP->isExactlyValue(1.0); + return OperandNo == 1 && ConstFP->isOne(); case ISD::FMINNUM: case ISD::FMAXNUM: { // Neutral element for fminnum is NaN, Inf or FLT_MAX, depending on FMF. @@ -13863,7 +13863,7 @@ bool llvm::isOneOrOneSplat(SDValue N, bool AllowUndefs) { bool llvm::isOneOrOneSplatFP(SDValue N, bool AllowUndefs) { ConstantFPSDNode *C = isConstOrConstSplatFP(N, AllowUndefs); - return C && C->isExactlyValue(1.0); + return C && C->isOne(); } bool llvm::isAllOnesOrAllOnesSplat(SDValue N, bool AllowUndefs) { diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 787a11401cb6e..daf53c99988b0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1568,7 +1568,7 @@ def fpimm_half : FPImmLeaf; def fpimm_one : FPImmLeaf; def fpimm_two : FPImmLeafisExactlyValue(1.0) || (IsNegative = CLHS->isExactlyValue(-1.0))) { + if (CLHS->isOne() || (IsNegative = CLHS->isMinusOne())) { // Add in the sqrt flags. IRBuilder<>::FastMathFlagGuard Guard(Builder); Builder.setFastMathFlags(DivFMF | SqrtFMF); @@ -779,8 +779,7 @@ AMDGPUCodeGenPrepareImpl::optimizeWithRcp(IRBuilder<> &Builder, Value *Num, if (const ConstantFP *CLHS = dyn_cast(Num)) { bool IsNegative = false; - if (CLHS->isExactlyValue(1.0) || - (IsNegative = CLHS->isExactlyValue(-1.0))) { + if (CLHS->isOne() || (IsNegative = CLHS->isMinusOne())) { Value *Src = Den; if (HasFP32DenormalFlush || FMF.approxFunc()) { @@ -843,7 +842,7 @@ Value *AMDGPUCodeGenPrepareImpl::optimizeWithFDivFast( bool NumIsOne = false; if (const ConstantFP *CNum = dyn_cast(Num)) { - if (CNum->isExactlyValue(+1.0) || CNum->isExactlyValue(-1.0)) + if (CNum->isOne() || CNum->isMinusOne()) NumIsOne = true; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td index 11d8aef61c858..2387afe1cd362 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -766,7 +766,7 @@ int BF16_NEG_ONE = 0xBF80; def CONST : Constants; def fpimm_zero : FPImmLeaf ; -def fpimm_one : FPImmLeaf ; +def fpimm_one : FPImmLeaf ; def fpimm_half : FPImmLeaf ; /* Generic helper patterns for intrinsics */ diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 65ae77cec0846..57d7d42bb5b9c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -5514,7 +5514,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, // v_rcp_f16 and v_rsq_f16 DO support denormals and 0.51ulp. // 1 / x -> RCP(x) - if (CLHS->isExactlyValue(1.0)) { + if (CLHS->isOne()) { B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res) .addUse(RHS) .setMIFlags(Flags); @@ -5524,7 +5524,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI, } // -1 / x -> RCP( FNEG(x) ) - if (CLHS->isExactlyValue(-1.0)) { + if (CLHS->isMinusOne()) { auto FNeg = B.buildFNeg(ResTy, RHS, Flags); B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res) .addUse(FNeg.getReg(0)) @@ -5566,7 +5566,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI, return false; const ConstantFP *CLHS = getConstantFPVRegVal(X, MRI); - bool IsNegRcp = CLHS && CLHS->isExactlyValue(-1.0); + bool IsNegRcp = CLHS && CLHS->isMinusOne(); // Pull out the negation so it folds for free into the source modifiers. if (IsNegRcp) @@ -5588,7 +5588,7 @@ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV64(MachineInstr &MI, R = B.buildFMA(ResTy, Tmp1, R, R); // Skip the last 2 correction terms for reciprocal. - if (IsNegRcp || (CLHS && CLHS->isExactlyValue(1.0))) { + if (IsNegRcp || (CLHS && CLHS->isOne())) { B.buildCopy(Res, R); MI.eraseFromParent(); return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index 2d1cc1c0c66c0..f01487effb8ad 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -908,7 +908,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, replaceCall(FPOp, cnval); return true; } - if ((CF && CF->isExactlyValue(1.0)) || (CINT && ci_opr1 == 1)) { + if ((CF && CF->isOne()) || (CINT && ci_opr1 == 1)) { // pow/powr/pown(x, 1.0) = x LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> " << *opr0 << "\n"); replaceCall(FPOp, opr0); @@ -922,7 +922,7 @@ bool AMDGPULibCalls::fold_pow(FPMathOperator *FPOp, IRBuilder<> &B, replaceCall(FPOp, nval); return true; } - if ((CF && CF->isExactlyValue(-1.0)) || (CINT && ci_opr1 == -1)) { + if ((CF && CF->isMinusOne()) || (CINT && ci_opr1 == -1)) { // pow/powr/pown(x, -1.0) = 1.0/x LLVM_DEBUG(errs() << "AMDIC: " << *FPOp << " ---> 1 / " << *opr0 << "\n"); Constant *cnval = ConstantFP::get(eltType, 1.0); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp index 74184333e5b69..f9d8a09e29b0d 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankCombiner.cpp @@ -297,7 +297,7 @@ bool AMDGPURegBankCombinerImpl::matchFPMinMaxToClamp(MachineInstr &MI, if (!matchMed(MI, MRI, OpcodeTriple, Val, K0, K1)) return false; - if (!K0->Value.isPosZero() || !K1->Value.isExactlyValue(1.0)) + if (!K0->Value.isPosZero() || !K1->Value.isOne()) return false; // For IEEE=false perform combine only when it's safe to assume that there are @@ -596,8 +596,8 @@ bool AMDGPURegBankCombinerImpl::isClampZeroToOne(MachineInstr *K0, if (isFCst(K0) && isFCst(K1)) { const ConstantFP *KO_FPImm = K0->getOperand(1).getFPImm(); const ConstantFP *K1_FPImm = K1->getOperand(1).getFPImm(); - return (KO_FPImm->isPosZero() && K1_FPImm->isExactlyValue(1.0)) || - (KO_FPImm->isExactlyValue(1.0) && K1_FPImm->isPosZero()); + return (KO_FPImm->isPosZero() && K1_FPImm->isOne()) || + (KO_FPImm->isOne() && K1_FPImm->isPosZero()); } return false; } diff --git a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp index a3825ac909881..981a30d4fb356 100644 --- a/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -786,7 +786,7 @@ bool R600TargetLowering::isZero(SDValue Op) const { bool R600TargetLowering::isHWTrueValue(SDValue Op) const { if (ConstantFPSDNode * CFP = dyn_cast(Op)) { - return CFP->isExactlyValue(1.0); + return CFP->isOne(); } return isAllOnesConstant(Op); } @@ -1581,7 +1581,7 @@ static SDValue CompactSwizzlableVector( if (C->isZero()) { RemapSwizzle[i] = 4; // SEL_0 NewBldVec[i] = DAG.getUNDEF(MVT::f32); - } else if (C->isExactlyValue(1.0)) { + } else if (C->isOne()) { RemapSwizzle[i] = 5; // SEL_1 NewBldVec[i] = DAG.getUNDEF(MVT::f32); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 076bd7c97c44c..145c24c6dc7c7 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -13273,7 +13273,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, if (!AllowInaccurateRcp && VT != MVT::f16 && VT != MVT::bf16) return SDValue(); - if (CLHS->isExactlyValue(1.0)) { + if (CLHS->isOne()) { // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to // the CI documentation has a worst case error of 1 ulp. // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to @@ -13290,7 +13290,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op, } // Same as for 1.0, but expand the sign out of the constant. - if (CLHS->isExactlyValue(-1.0)) { + if (CLHS->isMinusOne()) { // -1.0 / x -> rcp (fneg x) SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS); @@ -13322,7 +13322,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op, return SDValue(); const ConstantFPSDNode *CLHS = dyn_cast(X); - bool IsNegRcp = CLHS && CLHS->isExactlyValue(-1.0); + bool IsNegRcp = CLHS && CLHS->isMinusOne(); // Pull out the negation so it folds for free into the source modifiers. if (IsNegRcp) @@ -13342,7 +13342,7 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV64(SDValue Op, R = DAG.getNode(ISD::FMA, SL, VT, Tmp1, R, R); // Skip the last 2 correction terms for reciprocal. - if (IsNegRcp || (CLHS && CLHS->isExactlyValue(1.0))) + if (IsNegRcp || (CLHS && CLHS->isOne())) return R; SDValue Ret = DAG.getNode(ISD::FMUL, SL, VT, X, R); @@ -16227,7 +16227,7 @@ SDValue SITargetLowering::performFPMed3ImmCombine(SelectionDAG &DAG, // If dx10_clamp is enabled, NaNs clamp to 0.0. This is the same as the // hardware fmed3 behavior converting to a min. // FIXME: Should this be allowing -0.0? - if (K1->isExactlyValue(1.0) && K0->isPosZero()) + if (K1->isOne() && K0->isPosZero()) return DAG.getNode(AMDGPUISD::CLAMP, SL, VT, Op0.getOperand(0)); } @@ -16425,8 +16425,8 @@ static bool isClampZeroToOne(SDValue A, SDValue B) { if (ConstantFPSDNode *CA = dyn_cast(A)) { if (ConstantFPSDNode *CB = dyn_cast(B)) { // FIXME: Should this be allowing -0.0? - return (CA->isPosZero() && CB->isExactlyValue(1.0)) || - (CA->isExactlyValue(1.0) && CB->isPosZero()); + return (CA->isPosZero() && CB->isOne()) || + (CA->isOne() && CB->isPosZero()); } } @@ -17816,8 +17816,7 @@ SDValue SITargetLowering::performFDivCombine(SDNode *N, if (const ConstantFPSDNode *CLHS = dyn_cast(LHS)) { bool IsNegative = false; - if (CLHS->isExactlyValue(1.0) || - (IsNegative = CLHS->isExactlyValue(-1.0))) { + if (CLHS->isOne() || (IsNegative = CLHS->isMinusOne())) { // fdiv contract 1.0, (sqrt contract x) -> rsq // fdiv contract -1.0, (sqrt contract x) -> fneg(rsq) if (RHS.getOpcode() == ISD::FSQRT) { diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 8748f4723339b..61e77c35fef9d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -929,7 +929,7 @@ SDValue LoongArchTargetLowering::lowerConstantFP(SDValue Op, // use floating point load from the constant pool. auto Seq = LoongArchMatInt::generateInstSeq(INTVal.getSExtValue()); int InsNum = Seq.size() + ((VT == MVT::f64 && !Subtarget.is64Bit()) ? 2 : 1); - if (InsNum > MaterializeFPImmInsNum && !FPVal.isExactlyValue(+1.0)) + if (InsNum > MaterializeFPImmInsNum && !FPVal.isOne()) return SDValue(); switch (VT.getSimpleVT().SimpleTy) { @@ -10564,7 +10564,7 @@ bool LoongArchTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, return false; if (VT == MVT::f64 && !Subtarget.hasBasicD()) return false; - return (Imm.isZero() || Imm.isExactlyValue(1.0) || isFPImmVLDILegal(Imm, VT)); + return (Imm.isZero() || Imm.isOne() || isFPImmVLDILegal(Imm, VT)); } bool LoongArchTargetLowering::isCheapToSpeculateCttz(Type *) const { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td index 019539fbeab47..6aacfd8138ff5 100644 --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -579,7 +579,7 @@ def NegImm : SDNodeXFormisPosZero();}]>; def fpimm0neg : PatLeaf<(fpimm), [{return N->isNegZero();}]>; -def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>; +def fpimm1 : PatLeaf<(fpimm), [{return N->isOne();}]>; // Return an immediate subtracted from 32. def ImmSubFrom32 : SDNodeXForm : def fpimm_pos_inf : FPImmLeaf; def fpimm_0 : FPImmLeaf; -def fpimm_1 : FPImmLeaf; -def fpimm_neg_1 : FPImmLeaf; +def fpimm_1 : FPImmLeaf; +def fpimm_neg_1 : FPImmLeaf; // Operands which can hold a Register or an Immediate. diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 958fdc3812408..7a2b9611683c6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -10251,9 +10251,9 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { const ConstantFPSDNode *FPTV = dyn_cast(TrueV); const ConstantFPSDNode *FPFV = dyn_cast(FalseV); if (FPTV && FPFV) { - if (FPTV->isExactlyValue(1.0) && FPFV->isPosZero()) + if (FPTV->isOne() && FPFV->isPosZero()) return DAG.getNode(ISD::SINT_TO_FP, DL, VT, CondV); - if (FPTV->isPosZero() && FPFV->isExactlyValue(1.0)) { + if (FPTV->isPosZero() && FPFV->isOne()) { SDValue XOR = DAG.getNode(ISD::XOR, DL, XLenVT, CondV, DAG.getConstant(1, DL, XLenVT)); return DAG.getNode(ISD::SINT_TO_FP, DL, VT, XOR); @@ -21967,7 +21967,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, if (N0->getOpcode() != ISD::FCOPYSIGN) return SDValue(); ConstantFPSDNode *C = dyn_cast(N0->getOperand(0)); - if (!C || !C->getValueAPF().isExactlyValue(+1.0)) + if (!C || !C->getValueAPF().isOne()) return SDValue(); if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT)) return SDValue(); diff --git a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp index e4af82b11e7d8..660bee0f7a2cd 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCombinerHelper.cpp @@ -150,7 +150,7 @@ bool SPIRVCombinerHelper::matchSelectToFaceForward(MachineInstr &MI) const { mi_match(FalseReg, MRI, m_GFMul(m_GFCstOrSplat(MulConstant), m_SpecificReg(TrueReg)))) { - if (!MulConstant || !MulConstant->Value.isExactlyValue(-1.0)) + if (!MulConstant || !MulConstant->Value.isMinusOne()) return false; } else if (!AreNegatedConstantsOrSplats(TrueReg, FalseReg)) return false; diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td index 923b968382866..c183849d4f575 100644 --- a/llvm/lib/Target/X86/X86InstrFragments.td +++ b/llvm/lib/Target/X86/X86InstrFragments.td @@ -1052,9 +1052,9 @@ def fpimmneg0 : FPImmLeaf; def fpimm1 : FPImmLeaf; def fpimmneg1 : FPImmLeaf; diff --git a/llvm/lib/Transforms/Scalar/Reassociate.cpp b/llvm/lib/Transforms/Scalar/Reassociate.cpp index 1ef4d02127ed2..a5105b84c3117 100644 --- a/llvm/lib/Transforms/Scalar/Reassociate.cpp +++ b/llvm/lib/Transforms/Scalar/Reassociate.cpp @@ -2411,7 +2411,7 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) { cast(I->user_back())->getOpcode() == Instruction::FAdd && isa(Ops.back().Op) && - cast(Ops.back().Op)->isExactlyValue(-1.0)) { + cast(Ops.back().Op)->isMinusOne()) { ValueEntry Tmp = Ops.pop_back_val(); Ops.insert(Ops.begin(), Tmp); } diff --git a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp index 79272fe2eaeb7..a85131e6e01dd 100644 --- a/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp +++ b/llvm/tools/llvm-reduce/deltas/ReduceOperands.cpp @@ -57,8 +57,7 @@ static bool isZero(Use &Op) { static bool isZeroOrOneFP(Value *Op) { const APFloat *C; - return match(Op, m_APFloat(C)) && - ((C->isZero() && !C->isNegative()) || C->isExactlyValue(1.0)); + return match(Op, m_APFloat(C)) && (C->isPosZero() || C->isOne()); } static bool shouldReduceOperand(Use &Op) { diff --git a/llvm/unittests/ADT/APFloatTest.cpp b/llvm/unittests/ADT/APFloatTest.cpp index 4814f3d7e8638..8d8ba27d87a4c 100644 --- a/llvm/unittests/ADT/APFloatTest.cpp +++ b/llvm/unittests/ADT/APFloatTest.cpp @@ -9491,6 +9491,32 @@ TEST(APFloatTest, getExactLog2) { } } +TEST(APFloatTest, isPowerOf2) { + const fltSemantics &Semantics = APFloat::IEEEdouble(); + + EXPECT_TRUE(APFloat(Semantics, "1.0").isPowerOf2(0)); + EXPECT_TRUE(APFloat(Semantics, "8.0").isPowerOf2(3)); + EXPECT_TRUE(APFloat(Semantics, "0.25").isPowerOf2(-2)); + + EXPECT_FALSE(APFloat(Semantics, "3.0").isPowerOf2(1)); + EXPECT_FALSE(APFloat(Semantics, "-8.0").isPowerOf2(3)); + EXPECT_FALSE(APFloat(Semantics, "-8.0").isPowerOf2(INT_MIN)); + EXPECT_FALSE(APFloat::getZero(Semantics, false).isPowerOf2(0)); + EXPECT_FALSE(APFloat::getInf(Semantics).isPowerOf2(0)); + EXPECT_FALSE(APFloat::getNaN(Semantics, false).isPowerOf2(0)); + + EXPECT_TRUE(APFloat(Semantics, "-1.0").isNegPowerOf2(0)); + EXPECT_TRUE(APFloat(Semantics, "-8.0").isNegPowerOf2(3)); + EXPECT_TRUE(APFloat(Semantics, "-0.25").isNegPowerOf2(-2)); + + EXPECT_FALSE(APFloat(Semantics, "-3.0").isNegPowerOf2(1)); + EXPECT_FALSE(APFloat(Semantics, "8.0").isNegPowerOf2(3)); + EXPECT_FALSE(APFloat(Semantics, "8.0").isNegPowerOf2(INT_MIN)); + EXPECT_FALSE(APFloat::getZero(Semantics, false).isNegPowerOf2(0)); + EXPECT_FALSE(APFloat::getInf(Semantics).isNegPowerOf2(0)); + EXPECT_FALSE(APFloat::getNaN(Semantics, false).isNegPowerOf2(0)); +} + TEST(APFloatTest, Float8E8M0FNUGetZero) { #ifdef GTEST_HAS_DEATH_TEST #ifndef NDEBUG diff --git a/llvm/unittests/IR/MDBuilderTest.cpp b/llvm/unittests/IR/MDBuilderTest.cpp index 4656c70ce9cad..a923418a05d64 100644 --- a/llvm/unittests/IR/MDBuilderTest.cpp +++ b/llvm/unittests/IR/MDBuilderTest.cpp @@ -40,7 +40,7 @@ TEST_F(MDBuilderTest, createFPMath) { EXPECT_TRUE(mdconst::hasa(Op)); ConstantFP *Val = mdconst::extract(Op); EXPECT_TRUE(Val->getType()->isFloatingPointTy()); - EXPECT_TRUE(Val->isExactlyValue(1.0)); + EXPECT_TRUE(Val->isOne()); } TEST_F(MDBuilderTest, createRangeMetadata) { MDBuilder MDHelper(Context); From 4453b5debd777281d7889076f83a1fc8fa9dfd28 Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Tue, 23 Jun 2026 12:29:11 +0100 Subject: [PATCH 164/511] [LLVM][CodeGen] Remove +bf16 for ARM/AArch64 tests that don't strictly need the feature flag. (#204199) Tests that use bfloat purely as an opaque datatype should not use instructions that require the bf16 feature. --- .../CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-build-vector.ll | 4 ++-- llvm/test/CodeGen/AArch64/bf16-select.ll | 2 -- llvm/test/CodeGen/AArch64/bf16-shuffle.ll | 3 +-- llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll | 2 +- llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll | 2 +- llvm/test/CodeGen/AArch64/bf16.ll | 4 ++-- llvm/test/CodeGen/AArch64/load-insert-undef.ll | 2 +- llvm/test/CodeGen/AArch64/load-insert-zero.ll | 2 +- llvm/test/CodeGen/AArch64/luti-with-sme2.ll | 2 +- llvm/test/CodeGen/AArch64/neon-luti.ll | 2 +- llvm/test/CodeGen/AArch64/spillfill-sve.ll | 2 +- .../test/CodeGen/AArch64/sve-extract-fixed-vector.ll | 2 +- .../CodeGen/AArch64/sve-extract-scalable-vector.ll | 2 +- llvm/test/CodeGen/AArch64/sve-insert-element.ll | 2 +- llvm/test/CodeGen/AArch64/sve-insert-vector.ll | 2 +- llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll | 2 +- .../AArch64/sve-ld1-addressing-mode-reg-reg.ll | 4 ++-- .../AArch64/sve-st1-addressing-mode-reg-reg.ll | 4 ++-- .../sve2p1-intrinsics-gather-loads-128bit-index.ll | 2 +- ...intrinsics-gather-loads-128bit-unscaled-offset.ll | 2 +- .../sve2p1-intrinsics-scatter-stores-128bit-index.ll | 2 +- ...trinsics-scatter-stores-128bit-unscaled-offset.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll | 2 +- llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll | 4 ++-- .../CodeGen/AArch64/vector-extract-last-active.ll | 4 ++-- .../test/CodeGen/AArch64/zeroing-forms-counts-not.ll | 12 ++++++------ llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll | 2 +- llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll | 2 +- llvm/test/CodeGen/ARM/bf16-imm.ll | 6 +++--- llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll | 2 +- llvm/test/CodeGen/ARM/bf16-shuffle.ll | 4 ++-- llvm/test/CodeGen/ARM/bfloat.ll | 4 ++-- 38 files changed, 52 insertions(+), 55 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll b/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll index b2643dc8f9dcb..421a95a70fde3 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bf16-ldst-intrinsics.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple aarch64 -asm-verbose=1 -mattr=+bf16 %s -o - | FileCheck %s +; RUN: llc -mtriple aarch64 -asm-verbose=1 %s -o - | FileCheck %s %struct.bfloat16x4x2_t = type { [2 x <4 x bfloat>] } %struct.bfloat16x8x2_t = type { [2 x <8 x bfloat>] } diff --git a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll index 914f431866cce..f43aa27823494 100644 --- a/llvm/test/CodeGen/AArch64/arm64-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/arm64-build-vector.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI ; Check that building a vector from floats doesn't insert an unnecessary ; copy for lane zero. diff --git a/llvm/test/CodeGen/AArch64/bf16-select.ll b/llvm/test/CodeGen/AArch64/bf16-select.ll index e3479f49e86b6..a8cd1a5e0d527 100644 --- a/llvm/test/CodeGen/AArch64/bf16-select.ll +++ b/llvm/test/CodeGen/AArch64/bf16-select.ll @@ -1,8 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple aarch64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE ; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16 -; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-BASE -; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+bf16,+fullfp16 -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK-FP16 define bfloat @test_select(bfloat %a, bfloat %b, i1 zeroext %c) { ; CHECK-BASE-LABEL: test_select: diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll index 26a2f9d5299db..50ee7f52c8cf2 100644 --- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon < %s | FileCheck %s -; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon,+bf16 < %s | FileCheck %s -; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon,+fullfp16,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+v8.6a,+neon,+fullfp16 < %s | FileCheck %s %struct.float16x4x2_t = type { [2 x <4 x bfloat>] } %struct.float16x8x2_t = type { [2 x <8 x bfloat>] } diff --git a/llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll b/llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll index e371748a43b29..e0b316b1de3ed 100644 --- a/llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/bf16-vector-bitcast.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s define <4 x i16> @v4bf16_to_v4i16(float, <4 x bfloat> %a) nounwind { ; CHECK-LABEL: v4bf16_to_v4i16: diff --git a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll index 9e13201692db6..f1fa8797565dd 100644 --- a/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-vector-shuffle.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s ; bfloat16x4_t test_vcreate_bf16(uint64_t a) { return vcreate_bf16(a); } define <4 x bfloat> @test_vcreate_bf16(i64 %a) nounwind { diff --git a/llvm/test/CodeGen/AArch64/bf16.ll b/llvm/test/CodeGen/AArch64/bf16.ll index a36b367a56b75..cb584b90cdb35 100644 --- a/llvm/test/CodeGen/AArch64/bf16.ll +++ b/llvm/test/CodeGen/AArch64/bf16.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=arm64-eabi -mattr=+bf16 | FileCheck %s -; RUN: llc < %s -mtriple=aarch64 -mattr=+bf16 | FileCheck %s +; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 | FileCheck %s ; test argument passing and simple load/store diff --git a/llvm/test/CodeGen/AArch64/load-insert-undef.ll b/llvm/test/CodeGen/AArch64/load-insert-undef.ll index b1b1289a0e53f..1c1c5f17417cf 100644 --- a/llvm/test/CodeGen/AArch64/load-insert-undef.ll +++ b/llvm/test/CodeGen/AArch64/load-insert-undef.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+sve | FileCheck %s define <8 x i8> @loadv8i8(ptr %p) { ; CHECK-LABEL: loadv8i8: diff --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll index d6150b6dc4585..7841b29441eb0 100644 --- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll +++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+bf16,+sve | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16,+sve | FileCheck %s define <8 x i8> @loadv8i8(ptr %p) { ; CHECK-LABEL: loadv8i8: diff --git a/llvm/test/CodeGen/AArch64/luti-with-sme2.ll b/llvm/test/CodeGen/AArch64/luti-with-sme2.ll index ae8ed58bcf8f8..de8b086a239e7 100644 --- a/llvm/test/CodeGen/AArch64/luti-with-sme2.ll +++ b/llvm/test/CodeGen/AArch64/luti-with-sme2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -enable-subreg-liveness -force-streaming -mattr=+sve2,+sme2,+lut,+bf16 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -enable-subreg-liveness -force-streaming -mattr=+sve2,+sme2,+lut | FileCheck %s define { , } @test_luti4_lane_i16_x2_tuple(i64 %stride, ptr %ptr, %indices) { ; CHECK-LABEL: test_luti4_lane_i16_x2_tuple: diff --git a/llvm/test/CodeGen/AArch64/neon-luti.ll b/llvm/test/CodeGen/AArch64/neon-luti.ll index 5436662753762..85aa52e854f88 100644 --- a/llvm/test/CodeGen/AArch64/neon-luti.ll +++ b/llvm/test/CodeGen/AArch64/neon-luti.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon,+lut,+bf16 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon,+lut | FileCheck %s define <16 x i8> @test_luti2_lane_i8(<8 x i8> %vn, <8 x i8> %vm){ ; CHECK-LABEL: test_luti2_lane_i8: diff --git a/llvm/test/CodeGen/AArch64/spillfill-sve.ll b/llvm/test/CodeGen/AArch64/spillfill-sve.ll index 60f5a60144237..2ed7edd2bcf30 100644 --- a/llvm/test/CodeGen/AArch64/spillfill-sve.ll +++ b/llvm/test/CodeGen/AArch64/spillfill-sve.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve -mattr=+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s | FileCheck %s ; This file checks that unpredicated load/store instructions to locals ; use the right instructions and offsets. diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll index 1cefe96962e29..0be662720c416 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK ; Should codegen to a nop, since idx is zero. define <2 x i64> @extract_v2i64_nxv2i64( %vec) nounwind { diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll index 1cfff7e239de4..4c37ed87a094c 100644 --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK ; Extracting illegal subvectors diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll index fc4d27c5db57a..29da2a7db8dc4 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s define @test_lane0_16xi8( %a) { ; CHECK-LABEL: test_lane0_16xi8: diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll index 5b0f4b9982595..9aebf291c0b3e 100644 --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+bf16 < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK define @insert_v2i64_nxv2i64( %vec, <2 x i64> %subvec) nounwind { ; CHECK-LABEL: insert_v2i64_nxv2i64: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll index 723b217cf15a3..13b6d41f313be 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s ; ; LD1RQB diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll index 29e94dd6c5242..a55b7632c716d 100644 --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-reg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE -; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE ; LD1B diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll index 0bf6b12a5d020..17675fbc51d62 100644 --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-reg.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE -; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LE +; RUN: llc -mtriple=aarch64_be-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BE ; ST1B diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll index 4ad13ee97f010..65e34c94ee2e9 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-index.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(, , i64) declare @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(, , i64) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll index 8bee44be9f0cd..6896db7743361 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-gather-loads-128bit-unscaled-offset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s ; ; LD1Q: vector base + unscaled offset diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll index 29c9c6a23235e..4e0bdf636faa6 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-index.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64(, , , i64) declare void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv4i32.nxv2i64(, , , i64) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll index 6493640c06abd..077cfa319a393 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-scatter-stores-128bit-unscaled-offset.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s ; ; ST1Q: vector base + unscaled offset diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll index cc9bbcfe47102..9538f15f31212 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tblq.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_tblq_i8 ( %zn, %zm) { ; CHECK-LABEL: test_tblq_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll index 29265b5d02e51..c0f5b5c2338e7 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-tbxq.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_tbxq_i8 ( %passthru, %zn, %zm) { ; CHECK-LABEL: test_tbxq_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll index 7a83b15008462..a69e8ae55dea0 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq1.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_uzpq1_i8( %zn, %zm) { ; CHECK-LABEL: test_uzpq1_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll index 8695f1c3e1ce9..ad4be827cc985 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-uzpq2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_uzpq2_i8( %zn, %zm) { ; CHECK-LABEL: test_uzpq2_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll index 89383c1a3f230..11d6e7f01809d 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq1.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_zipq1_i8( %zn, %zm) { ; CHECK-LABEL: test_zipq1_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll index c9aaae3371e80..f3a4f8bcd4b8c 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-zipq2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s define @test_zipq2_i8( %zn, %zm) { ; CHECK-LABEL: test_zipq2_i8: diff --git a/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll index da83b27ce4d55..a2d4ababd919f 100644 --- a/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-vector-shuffles.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s --check-prefixes=CHECK,SVE -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1,+bf16 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,SVE +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SME define void @dupq_i8_256b(ptr %addr) #0 { ; CHECK-LABEL: dupq_i8_256b: diff --git a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll index a99b4a5f477d9..2c7fd22622853 100644 --- a/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll +++ b/llvm/test/CodeGen/AArch64/vector-extract-last-active.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -mtriple=aarch64 -mattr=+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED -; RUN: llc -mtriple=aarch64 -mattr=+sve,+bf16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED +; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED +; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) { ; NEON-FIXED-LABEL: extract_last_i8: diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll index 07b3867962c40..767e5602a6cd9 100644 --- a/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll +++ b/llvm/test/CodeGen/AArch64/zeroing-forms-counts-not.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mattr=+bf16,+sve < %s | FileCheck %s -check-prefixes=CHECK,SVE -; RUN: llc -mattr=+bf16,+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2 -; RUN: llc -mattr=+bf16,+sve,+sme2p2 < %s | FileCheck %s -check-prefix CHECK-2p2 +; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefixes=CHECK,SVE +; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2 +; RUN: llc -mattr=+sve,+sme2p2 < %s | FileCheck %s -check-prefix CHECK-2p2 -; RUN: llc -mattr=+bf16,+sme -force-streaming < %s | FileCheck %s -check-prefixes=CHECK,STREAMING-SVE -; RUN: llc -mattr=+bf16,+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2 -; RUN: llc -mattr=+bf16,+sme,+sve2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2 +; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s -check-prefixes=CHECK,STREAMING-SVE +; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2 +; RUN: llc -mattr=+sme,+sve2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2 target triple = "aarch64-linux" diff --git a/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll b/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll index 39416ccdd7765..34eb403c1886b 100644 --- a/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll +++ b/llvm/test/CodeGen/ARM/bf16-create-get-set-dup.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+bf16,+neon,fullfp16 < %s | FileCheck %s +; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+neon,fullfp16 < %s | FileCheck %s ; FIXME: Remove fullfp16 once bfloat arguments and returns lowering stops ; depending on it. diff --git a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll index 3416e5032f88f..98b4e481edbc6 100644 --- a/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll +++ b/llvm/test/CodeGen/ARM/bf16-getlane-with-fp16.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+bf16,+neon,+fullfp16 < %s | FileCheck %s +; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+neon,+fullfp16 < %s | FileCheck %s target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv8.6a-arm-none-eabi" diff --git a/llvm/test/CodeGen/ARM/bf16-imm.ll b/llvm/test/CodeGen/ARM/bf16-imm.ll index 7532bbcd09b3f..d7b3c9b6b363b 100644 --- a/llvm/test/CodeGen/ARM/bf16-imm.ll +++ b/llvm/test/CodeGen/ARM/bf16-imm.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=armv8.6a-none-none-eabi < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon < %s | FileCheck %s --check-prefixes=CHECK -; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon,+fullfp16 < %s | FileCheck %s --check-prefix=CHECK-FP16 -; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+bf16,+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECK-HARD +; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+neon < %s | FileCheck %s --check-prefixes=CHECK +; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+neon,+fullfp16 < %s | FileCheck %s --check-prefix=CHECK-FP16 +; RUN: llc -mtriple=armv8.6a-none-none-eabi -mattr=+neon -float-abi=hard < %s | FileCheck %s --check-prefix=CHECK-HARD define bfloat @bitcast_zero() { ; CHECK-LABEL: bitcast_zero: diff --git a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll index 846cf239e8987..21a6892c05fc6 100644 --- a/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll +++ b/llvm/test/CodeGen/ARM/bf16-intrinsics-ld-st.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+bf16,+neon,+fullfp16 < %s | FileCheck %s +; RUN: llc -mtriple=armv8.6a-arm-none-eabi -mattr=+neon,+fullfp16 < %s | FileCheck %s ; FIXME: Remove fullfp16 once bfloat arguments and returns lowering stops ; depending on it. diff --git a/llvm/test/CodeGen/ARM/bf16-shuffle.ll b/llvm/test/CodeGen/ARM/bf16-shuffle.ll index a45ad8f698b23..3a8ef908ebbee 100644 --- a/llvm/test/CodeGen/ARM/bf16-shuffle.ll +++ b/llvm/test/CodeGen/ARM/bf16-shuffle.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=arm-eabi -mattr=+v8.6a,+neon -float-abi=hard < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc -mtriple=arm-eabi -mattr=+v8.6a,+neon,+bf16 -float-abi=hard < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 -; RUN: llc -mtriple=arm-eabi -mattr=+v8.6a,+neon,+fullfp16,+bf16 -float-abi=hard < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 +; RUN: llc -mtriple=arm-eabi -mattr=+v8.6a,+neon -float-abi=hard < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16 +; RUN: llc -mtriple=arm-eabi -mattr=+v8.6a,+neon,+fullfp16 -float-abi=hard < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 %struct.float16x4x2_t = type { [2 x <4 x bfloat>] } %struct.float16x8x2_t = type { [2 x <8 x bfloat>] } diff --git a/llvm/test/CodeGen/ARM/bfloat.ll b/llvm/test/CodeGen/ARM/bfloat.ll index be62e076c5739..01b0786c036db 100644 --- a/llvm/test/CodeGen/ARM/bfloat.ll +++ b/llvm/test/CodeGen/ARM/bfloat.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -float-abi hard -mattr=+bf16,+fullfp16 < %s | FileCheck %s --check-prefix=HARD -; RUN: llc -float-abi soft -mattr=+bf16,+fullfp16 < %s | FileCheck %s --check-prefix=SOFT +; RUN: llc -float-abi hard -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=HARD +; RUN: llc -float-abi soft -mattr=+fullfp16 < %s | FileCheck %s --check-prefix=SOFT target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" target triple = "armv8.6a-arm-none-eabi" From 6bb90df5bd6e7430d6dd42eedc5dd8b7f20803d4 Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Tue, 23 Jun 2026 13:31:22 +0200 Subject: [PATCH 165/511] [CI] switch to BuildTools variant of Visual Studio (#201335) Follow up to: - https://github.com/swiftlang/swift-docker/pull/566 - https://github.com/swiftlang/swift-docker/pull/565 It's no longer necessary to install packaging at runtime and the new image will use `BuildTools`. --- .ci/green-dragon/lldb-windows.groovy | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.ci/green-dragon/lldb-windows.groovy b/.ci/green-dragon/lldb-windows.groovy index 48224479f1d28..de37f0118f239 100644 --- a/.ci/green-dragon/lldb-windows.groovy +++ b/.ci/green-dragon/lldb-windows.groovy @@ -49,9 +49,7 @@ pipeline { timeout(240) { catchError(buildResult: 'FAILURE', stageResult: 'FAILURE') { writeFile file: 'build.bat', text: '''@echo off -call "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\VC\\Auxiliary\\Build\\vcvars64.bat" || exit /b 1 - -"C:\\Program Files\\Python313\\python.exe" -m pip install packaging || exit /b 1 +call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2022\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat" || exit /b 1 cmake -G Ninja ^ -S llvm ^ From 8e7d0ad0d3b78512d3df0227d421ff90718f933b Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 23 Jun 2026 21:57:07 +1000 Subject: [PATCH 166/511] [orc-rt] Split AllocAction tests by SPS dependency. (#205322) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rewrites AllocActionTest.cpp's integration tests (RunBasicAction, RunFinalize*) to drive AllocActionFunction::handle with a small local IntPtrDeserializer / IdentitySerializer pair instead of going through SPS, and moves the existing SPS-using AllocAction tests into SPSAllocActionTest.cpp. Also adds two new SPS tests covering previously-uncovered paths: - RunActionWithSPSArgsAndWFBReturn — SPS argument deserialization plus AllocActionSPSSerializer's identity (WrapperFunctionBuffer) overload. - RunActionWithUndecodableArgs — the deserialization-failure path in AllocActionFunction::handle. After the split, an AllocActionTest failure indicates problems with the AllocAction machinery, and an SPSAllocActionTest failure without a corresponding AllocActionTest failure indicates an SPS encoding / decoding issue for AllocAction. --- orc-rt/unittests/AllocActionTest.cpp | 178 +++++++++++------------- orc-rt/unittests/SPSAllocActionTest.cpp | 79 +++++++++++ 2 files changed, 161 insertions(+), 96 deletions(-) diff --git a/orc-rt/unittests/AllocActionTest.cpp b/orc-rt/unittests/AllocActionTest.cpp index 4e431d9099dc6..0777eb27aa79e 100644 --- a/orc-rt/unittests/AllocActionTest.cpp +++ b/orc-rt/unittests/AllocActionTest.cpp @@ -8,64 +8,95 @@ // // Tests for orc-rt's AllocAction.h APIs. // +// These tests exercise the AllocAction layer directly, using a small bespoke +// (de)serializer that exchanges raw int* values via memcpy. SPS-layer tests +// live in SPSAllocActionTest.cpp. +// //===----------------------------------------------------------------------===// #include "orc-rt/AllocAction.h" -#include "orc-rt/ExecutorAddress.h" -#include "orc-rt/SPSAllocAction.h" -#include "AllocActionTestUtils.h" #include "gtest/gtest.h" +#include + using namespace orc_rt; -TEST(AllocActionTest, DefaultConstruct) { - AllocAction AA; - EXPECT_FALSE(AA); +namespace { + +// A minimal AllocActionFunction (de)serializer pair that exchanges a single +// int* via memcpy. Used to drive AllocActionFunction::handle without pulling +// in SPS. +struct IntPtrDeserializer { + bool deserialize(const char *ArgData, size_t ArgSize, + std::tuple &Args) { + if (ArgSize != sizeof(int *)) + return false; + memcpy(&std::get<0>(Args), ArgData, sizeof(int *)); + return true; + } +}; + +struct IdentitySerializer { + static WrapperFunctionBuffer serialize(WrapperFunctionBuffer B) { return B; } +}; + +WrapperFunctionBuffer makeIntPtrArgBuffer(int *P) { + auto B = WrapperFunctionBuffer::allocate(sizeof(int *)); + memcpy(B.data(), &P, sizeof(int *)); + return B; } +} // anonymous namespace + static orc_rt_WrapperFunctionBuffer noopAction(const char *ArgData, size_t ArgSize) { return WrapperFunctionBuffer().release(); } -TEST(AllocActionTest, ConstructWithAction) { - AllocAction AA(noopAction, WrapperFunctionBuffer()); - EXPECT_TRUE(AA); -} - -// Increments int via pointer. +// Increments an int via pointer. static orc_rt_WrapperFunctionBuffer -increment_sps_allocaction(const char *ArgData, size_t ArgSize) { - return SPSAllocActionFunction::handle( - ArgData, ArgSize, - [](ExecutorAddr IntPtr) { - *IntPtr.toPtr() += 1; - return WrapperFunctionBuffer(); - }) +increment_int_ptr_action(const char *ArgData, size_t ArgSize) { + return AllocActionFunction::handle(ArgData, ArgSize, IntPtrDeserializer(), + IdentitySerializer(), + [](int *P) { + ++*P; + return WrapperFunctionBuffer(); + }) .release(); } -// Increments int via pointer. +// Decrements an int via pointer. static orc_rt_WrapperFunctionBuffer -decrement_sps_allocaction(const char *ArgData, size_t ArgSize) { - return SPSAllocActionFunction::handle( - ArgData, ArgSize, - [](ExecutorAddr IntPtr) { - *IntPtr.toPtr() -= 1; - return WrapperFunctionBuffer(); - }) +decrement_int_ptr_action(const char *ArgData, size_t ArgSize) { + return AllocActionFunction::handle(ArgData, ArgSize, IntPtrDeserializer(), + IdentitySerializer(), + [](int *P) { + --*P; + return WrapperFunctionBuffer(); + }) .release(); } -template -static WrapperFunctionBuffer makeExecutorAddrBuffer(T *P) { - return *spsSerialize>(ExecutorAddr::fromPtr(P)); +// Always returns an out-of-band error. +static orc_rt_WrapperFunctionBuffer fail_action(const char *ArgData, + size_t ArgSize) { + return WrapperFunctionBuffer::createOutOfBandError("failed").release(); +} + +TEST(AllocActionTest, DefaultConstruct) { + AllocAction AA; + EXPECT_FALSE(AA); +} + +TEST(AllocActionTest, ConstructWithAction) { + AllocAction AA(noopAction, WrapperFunctionBuffer()); + EXPECT_TRUE(AA); } TEST(AllocActionTest, RunBasicAction) { int Val = 0; - AllocAction IncVal(increment_sps_allocaction, makeExecutorAddrBuffer(&Val)); + AllocAction IncVal(increment_int_ptr_action, makeIntPtrArgBuffer(&Val)); EXPECT_TRUE(IncVal); auto B = IncVal(); EXPECT_TRUE(B.empty()); @@ -78,13 +109,12 @@ TEST(AllocActionTest, RunFinalizationActionsComplete) { std::vector InitialActions; auto MakeAAOnVal = [&](AllocActionFn Fn) { - return *MakeAllocAction::from(Fn, - ExecutorAddr::fromPtr(&Val)); + return AllocAction(Fn, makeIntPtrArgBuffer(&Val)); }; - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + MakeAAOnVal(decrement_int_ptr_action)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + MakeAAOnVal(decrement_int_ptr_action)}); auto DeallocActions = cantFail(runFinalizeActions(std::move(InitialActions))); @@ -95,24 +125,18 @@ TEST(AllocActionTest, RunFinalizationActionsComplete) { EXPECT_EQ(Val, 0); } -static orc_rt_WrapperFunctionBuffer fail_sps_allocaction(const char *ArgData, - size_t ArgSize) { - return WrapperFunctionBuffer::createOutOfBandError("failed").release(); -} - TEST(AllocActionTest, RunFinalizeActionsFail) { int Val = 0; std::vector InitialActions; auto MakeAAOnVal = [&](AllocActionFn Fn) { - return *MakeAllocAction::from(Fn, - ExecutorAddr::fromPtr(&Val)); + return AllocAction(Fn, makeIntPtrArgBuffer(&Val)); }; - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); - InitialActions.push_back({*MakeAllocAction<>::from(fail_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + MakeAAOnVal(decrement_int_ptr_action)}); + InitialActions.push_back({AllocAction(fail_action, WrapperFunctionBuffer()), + MakeAAOnVal(decrement_int_ptr_action)}); auto DeallocActions = runFinalizeActions(std::move(InitialActions)); @@ -133,13 +157,12 @@ TEST(AllocActionTest, RunFinalizeActionsNullFinalize) { std::vector InitialActions; auto MakeAAOnVal = [&](AllocActionFn Fn) { - return *MakeAllocAction::from(Fn, - ExecutorAddr::fromPtr(&Val)); + return AllocAction(Fn, makeIntPtrArgBuffer(&Val)); }; - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); - InitialActions.push_back({*MakeAllocAction<>::from(nullptr), - MakeAAOnVal(decrement_sps_allocaction)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + MakeAAOnVal(decrement_int_ptr_action)}); + InitialActions.push_back({AllocAction(nullptr, WrapperFunctionBuffer()), + MakeAAOnVal(decrement_int_ptr_action)}); auto DeallocActions = cantFail(runFinalizeActions(std::move(InitialActions))); @@ -158,13 +181,12 @@ TEST(AllocActionTest, RunFinalizeActionsNullDealloc) { std::vector InitialActions; auto MakeAAOnVal = [&](AllocActionFn Fn) { - return *MakeAllocAction::from(Fn, - ExecutorAddr::fromPtr(&Val)); + return AllocAction(Fn, makeIntPtrArgBuffer(&Val)); }; - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - MakeAAOnVal(decrement_sps_allocaction)}); - InitialActions.push_back({MakeAAOnVal(increment_sps_allocaction), - *MakeAllocAction<>::from(nullptr)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + MakeAAOnVal(decrement_int_ptr_action)}); + InitialActions.push_back({MakeAAOnVal(increment_int_ptr_action), + AllocAction(nullptr, WrapperFunctionBuffer())}); auto DeallocActions = cantFail(runFinalizeActions(std::move(InitialActions))); @@ -175,39 +197,3 @@ TEST(AllocActionTest, RunFinalizeActionsNullDealloc) { EXPECT_EQ(Val, 1); } - -// Handler that returns Error::success(). Exercises the -// AllocActionSPSSerializer::serialize(Error) overload's success path. -static orc_rt_WrapperFunctionBuffer -errorSuccess_sps_allocaction(const char *ArgData, size_t ArgSize) { - return SPSAllocActionFunction<>::handle( - ArgData, ArgSize, []() -> Error { return Error::success(); }) - .release(); -} - -// Handler that returns a StringError. Exercises the -// AllocActionSPSSerializer::serialize(Error) overload's failure path. -static orc_rt_WrapperFunctionBuffer -errorFailure_sps_allocaction(const char *ArgData, size_t ArgSize) { - return SPSAllocActionFunction<>::handle( - ArgData, ArgSize, - []() -> Error { return make_error("test failure"); }) - .release(); -} - -TEST(AllocActionTest, RunActionWithErrorSuccessReturn) { - // A handler returning Error::success() should produce a non-out-of-band - // result buffer. - AllocAction AA(errorSuccess_sps_allocaction, WrapperFunctionBuffer()); - auto B = AA(); - EXPECT_EQ(B.getOutOfBandError(), nullptr); -} - -TEST(AllocActionTest, RunActionWithErrorFailureReturn) { - // A handler returning a real Error should produce an out-of-band error - // result buffer carrying the Error's string form. - AllocAction AA(errorFailure_sps_allocaction, WrapperFunctionBuffer()); - auto B = AA(); - ASSERT_NE(B.getOutOfBandError(), nullptr); - EXPECT_STREQ(B.getOutOfBandError(), "test failure"); -} diff --git a/orc-rt/unittests/SPSAllocActionTest.cpp b/orc-rt/unittests/SPSAllocActionTest.cpp index b7791e3385382..2145ff251c0f5 100644 --- a/orc-rt/unittests/SPSAllocActionTest.cpp +++ b/orc-rt/unittests/SPSAllocActionTest.cpp @@ -12,6 +12,9 @@ #include "orc-rt/SPSAllocAction.h" +#include "orc-rt/ExecutorAddress.h" + +#include "AllocActionTestUtils.h" #include "SimplePackedSerializationTestUtils.h" #include "gtest/gtest.h" @@ -47,3 +50,79 @@ TEST(SPSAllocActionTest, AllocActionPairSerialization) { blobSerializationRoundTrip(AAP, AAPEQ); } + +// Handler that returns Error::success(). Exercises the +// AllocActionSPSSerializer::serialize(Error) overload's success path. +static orc_rt_WrapperFunctionBuffer +errorSuccess_sps_allocaction(const char *ArgData, size_t ArgSize) { + return SPSAllocActionFunction<>::handle( + ArgData, ArgSize, []() -> Error { return Error::success(); }) + .release(); +} + +// Handler that returns a StringError. Exercises the +// AllocActionSPSSerializer::serialize(Error) overload's failure path. +static orc_rt_WrapperFunctionBuffer +errorFailure_sps_allocaction(const char *ArgData, size_t ArgSize) { + return SPSAllocActionFunction<>::handle( + ArgData, ArgSize, + []() -> Error { return make_error("test failure"); }) + .release(); +} + +TEST(SPSAllocActionTest, RunActionWithErrorSuccessReturn) { + // A handler returning Error::success() should produce a non-out-of-band + // result buffer. + AllocAction AA(errorSuccess_sps_allocaction, WrapperFunctionBuffer()); + auto B = AA(); + EXPECT_EQ(B.getOutOfBandError(), nullptr); +} + +TEST(SPSAllocActionTest, RunActionWithErrorFailureReturn) { + // A handler returning a real Error should produce an out-of-band error + // result buffer carrying the Error's string form. + AllocAction AA(errorFailure_sps_allocaction, WrapperFunctionBuffer()); + auto B = AA(); + ASSERT_NE(B.getOutOfBandError(), nullptr); + EXPECT_STREQ(B.getOutOfBandError(), "test failure"); +} + +// Handler that takes an SPS-encoded int* argument and increments the int. +// Exercises both the SPS argument-deserialization path and the identity +// (WrapperFunctionBuffer) overload of AllocActionSPSSerializer. +static orc_rt_WrapperFunctionBuffer +increment_sps_allocaction(const char *ArgData, size_t ArgSize) { + return SPSAllocActionFunction::handle( + ArgData, ArgSize, + [](ExecutorAddr IntPtr) { + *IntPtr.toPtr() += 1; + return WrapperFunctionBuffer(); + }) + .release(); +} + +TEST(SPSAllocActionTest, RunActionWithSPSArgsAndWFBReturn) { + // Verifies that handlers with SPS-encoded arguments work end-to-end: SPS + // deserializes the int*, the handler runs, and its empty WFB return is + // passed through by AllocActionSPSSerializer's identity overload. + int Val = 0; + auto IncVal = *MakeAllocAction::from( + increment_sps_allocaction, ExecutorAddr::fromPtr(&Val)); + EXPECT_TRUE(IncVal); + auto B = IncVal(); + EXPECT_EQ(B.getOutOfBandError(), nullptr); + EXPECT_EQ(Val, 1); +} + +TEST(SPSAllocActionTest, RunActionWithUndecodableArgs) { + // An arg buffer that's too small for the wrapper's declared SPS signature + // (SPSExecutorAddr expects 8 bytes here) should cause + // AllocActionFunction::handle to return its out-of-band deserialization + // error without invoking the handler. + AllocAction AA(increment_sps_allocaction, + WrapperFunctionBuffer::allocate(/*Size=*/1)); + auto B = AA(); + ASSERT_NE(B.getOutOfBandError(), nullptr); + EXPECT_STREQ(B.getOutOfBandError(), + "Could not deserialize allocation action argument buffer"); +} From b6f76f21f23a33f34706f77dea43c4eb1678d18f Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 23 Jun 2026 08:09:57 -0400 Subject: [PATCH 167/511] Document the [[clang::annotate]] attribute (#203303) The new documentation mirrors the existing docs for annotate_type. --- clang/include/clang/Basic/Attr.td | 2 +- clang/include/clang/Basic/AttrDocs.td | 27 +++++++++++++++++++++++++++ clang/test/AST/undocumented-attrs.cpp | 1 - 3 files changed, 28 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index a222092cd42cf..12bc0732fc19e 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1039,7 +1039,7 @@ def Annotate : InheritableParamOrStmtAttr { }]; let PragmaAttributeSupport = 1; let AcceptsExprPack = 1; - let Documentation = [Undocumented]; + let Documentation = [AnnotateDocs]; } def AnnotateType : TypeAttr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 04362de2d5be2..0f1a66ec34197 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9172,6 +9172,33 @@ point." }]; } +def AnnotateDocs : Documentation { + let Category = DocCatDecl; + let Content = [{ +The `annotate` attribute is used to add annotations to declarations or statements, +typically for use by static analysis tools that are not integrated into the +core Clang compiler (e.g., Clang-Tidy checks or out-of-tree Clang-based tools). +It is a counterpart to the `annotate_type` attribute, which serves the same +purpose, but for types. + +The attribute takes a mandatory string literal argument specifying the +annotation category and an arbitrary number of optional arguments that provide +additional information specific to the annotation category. The optional +arguments must be constant expressions of arbitrary type. + +For example: + +.. code-block:: c++ + + [[clang::annotate("category1", "foo", 1)]] void func(int val [[clang::annotate("category2")]]) { + [[clang::annotate("category3")]] if (val) { + + } + } + + }]; +} + def AnnotateTypeDocs : Documentation { let Category = DocCatType; let Heading = "annotate_type"; diff --git a/clang/test/AST/undocumented-attrs.cpp b/clang/test/AST/undocumented-attrs.cpp index eeebd7f938644..7f02fdb3622d6 100644 --- a/clang/test/AST/undocumented-attrs.cpp +++ b/clang/test/AST/undocumented-attrs.cpp @@ -11,7 +11,6 @@ CHECK-NEXT: AcquiredBefore CHECK-NEXT: Alias CHECK-NEXT: Aligned CHECK-NEXT: AnalyzerNoReturn -CHECK-NEXT: Annotate CHECK-NEXT: ArcWeakrefUnavailable CHECK-NEXT: AvailableOnlyInDefaultEvalMethod CHECK-NEXT: Blocks From 0f61b338dc499a255c9775177f478e215b32862b Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Tue, 23 Jun 2026 14:12:15 +0200 Subject: [PATCH 168/511] [SYCL][DOC] Update sycl_ext_oneapi_filter_selector (#22396) Don't mention SYCL 1.2.1 device_selector::select_device, it's an implementation-specific detail, and filter_selector has no need to leverage this. --- .../supported/sycl_ext_oneapi_filter_selector.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc b/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc index c25d96b36238d..0268714824932 100644 --- a/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc +++ b/sycl/doc/extensions/supported/sycl_ext_oneapi_filter_selector.asciidoc @@ -8,7 +8,7 @@ This document presents an extension on top of the SYCL specification. The goal == Filter Selector -The filter selector is a new device selector class that accepts a string of one or more filters that refine the set of devices that may be returned when the selector's `select_device` method is invoked. Devices that match the specified filter(s) are ranked by the `default_selector` to determine which device is ultimately selected. The `default_selector` is used to prefer an implementation's preferences for one device over another when multiple devices satisfy the provided filters. +The filter selector is a new device selector class that accepts a string of one or more filters that refine the set of devices that the selector selects. Devices that match the specified filter(s) are ranked by the `default_selector` to determine which device is ultimately selected. The `default_selector` is used to prefer an implementation's preferences for one device over another when multiple devices satisfy the provided filters. === DSL for Specifying Filters From eb3ebb319869c2e972605f31cfd5c8bf1b4fa752 Mon Sep 17 00:00:00 2001 From: Aditya Medhane Date: Tue, 23 Jun 2026 17:47:41 +0530 Subject: [PATCH 169/511] [OpenMP] Remove unused isStrictSubset template (NFC) (#202987) The `isStrictSubset` `ArrayRef` template has no callers, so it never instantiates and trips `-Wunused-template`. The `VariantMatchInfo` overload does the work that's actually used, and `isSubset` stays untouched. Removing the dead template. NFC. Part of #202945. --- llvm/lib/Frontend/OpenMP/OMPContext.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/llvm/lib/Frontend/OpenMP/OMPContext.cpp b/llvm/lib/Frontend/OpenMP/OMPContext.cpp index 6ced5c104c8ef..7443c4c338ada 100644 --- a/llvm/lib/Frontend/OpenMP/OMPContext.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPContext.cpp @@ -162,15 +162,6 @@ template static bool isSubset(ArrayRef C0, ArrayRef C1) { return true; } -/// Return true if \p C0 is a strict subset of \p C1. Note that both arrays are -/// expected to be sorted. -template -static bool isStrictSubset(ArrayRef C0, ArrayRef C1) { - if (C0.size() >= C1.size()) - return false; - return isSubset(C0, C1); -} - static bool isStrictSubset(const VariantMatchInfo &VMI0, const VariantMatchInfo &VMI1) { // If all required traits are a strict subset and the ordered vectors storing From 348168477afc7a0059e6f1ff321681b1677362a9 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 23 Jun 2026 08:30:49 -0400 Subject: [PATCH 170/511] Fix test after 844136348ef4c03e84bed0a51d8557d53f466b0d (#205328) --- clang/test/AST/undocumented-attrs.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/AST/undocumented-attrs.cpp b/clang/test/AST/undocumented-attrs.cpp index 7f02fdb3622d6..64c7d165915cc 100644 --- a/clang/test/AST/undocumented-attrs.cpp +++ b/clang/test/AST/undocumented-attrs.cpp @@ -89,4 +89,4 @@ CHECK-NEXT: Visibility CHECK-NEXT: WeakImport CHECK-NEXT: WeakRef CHECK-NEXT: WorkGroupSizeHint -CHECK-NEXT: Total: 84 +CHECK-NEXT: Total: 83 From 4dfe1cf39b075d209a69694de267f7f2fe61b49c Mon Sep 17 00:00:00 2001 From: Raphael Isemann Date: Tue, 23 Jun 2026 13:32:25 +0100 Subject: [PATCH 171/511] [lldb] Fix race/timeout in TestInternalThreadSuspension (#203202) This test launches a thread and then waits for a signal from the launched thread. Below is one possible interleaving, where the `pthread_cond_signal` (2) wins the race and becomes a no-op while (3) is locking until the test times out. ``` void * suspend_func (void *unused) { [...] // 2. Created thread reaches this and signals. pthread_cond_signal(&signal_cond); [...] } int main() { pthread_mutex_lock(&signal_mutex); // 1. Thread is created pthread_create(&suspend_thread, NULL, suspend_func, NULL); // Enable this to make race reliable: // sleep(1); // 3. We start waiting on signal_cond, but 2. already executed. pthread_cond_wait(&signal_cond, &signal_mutex); ``` This patch guards (2) with signal_mutex so it can only be executed after pthread_cond_wait unlocks signal_mutex. --- lldb/test/API/macosx/thread_suspend/main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/macosx/thread_suspend/main.c b/lldb/test/API/macosx/thread_suspend/main.c index 03da7a71505c7..cb4c1f34ae7ff 100644 --- a/lldb/test/API/macosx/thread_suspend/main.c +++ b/lldb/test/API/macosx/thread_suspend/main.c @@ -16,7 +16,9 @@ function_to_call() { void * suspend_func (void *unused) { pthread_setname_np("Look for me"); + pthread_mutex_lock(&signal_mutex); pthread_cond_signal(&signal_cond); + pthread_mutex_unlock(&signal_mutex); pthread_mutex_lock(&suspend_mutex); return NULL; // We allowed the suspend thread to run @@ -41,7 +43,7 @@ main() pthread_create(&suspend_thread, NULL, suspend_func, NULL); pthread_cond_wait(&signal_cond, &signal_mutex); - + mach_port_t th_port = pthread_mach_thread_np(suspend_thread); thread_suspend(th_port); From 835598504558ba72545cfa7ef7b062ab80b91159 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 23 Jun 2026 14:44:02 +0200 Subject: [PATCH 172/511] Reapply "[InstCombine] Merge consecutive assumes" (#205177) (#205324) The crash was caused by using `getOperandBundle` for an assume, which requires that the operand bundles are unique. This isn't guaranteed by assume bundles. This patch adds `hasOperandBundle` instead, which doesn't have the same constraint. Original message: This should make assumes a bit more efficient, since it removes a few instructions. This should also help with optimizations that are limited in how many instructions they step through. This reverts commit 3f0ef1efb26206c3f5d5621d86d740c7f466c67b. --- llvm/include/llvm/IR/InstrTypes.h | 6 ++++ .../InstCombine/InstCombineCalls.cpp | 22 ++++++++++-- .../InstCombine/InstructionCombining.cpp | 2 +- .../InstCombine/assume-loop-align.ll | 3 +- llvm/test/Transforms/InstCombine/assume.ll | 36 +++++++++++-------- .../PhaseOrdering/AArch64/std-find.ll | 3 +- 6 files changed, 50 insertions(+), 22 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 5f7df6a4eb6f8..681c4b18375f4 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -2359,6 +2359,12 @@ class CallBase : public Instruction { }); } + /// Return whether there exists an operand bundle of type ID + bool hasOperandBundle(uint32_t ID) const { + return any_of(operand_bundles(), + [&](OperandBundleUse OBU) { return OBU.getTagID() == ID; }); + } + /// Populate the BundleOpInfo instances and the Use& vector from \p /// Bundles. Return the op_iterator pointing to the Use& one past the last /// last bundle operand use. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index ce9e4b836a56e..880d896e12d6e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3844,10 +3844,26 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - // If the assume has operand bundles, the folds below will never work, so - // don't bother trying. - if (II->hasOperandBundles()) + if (II->hasOperandBundles()) { + // Merge consecutive assumes to save some resources + if (auto *PrevAI = dyn_cast_or_null(II->getPrevNode()); + PrevAI && PrevAI->hasOperandBundles()) { + SmallVector Bundles; + Bundles.reserve(II->getNumOperandBundles() + + PrevAI->getNumOperandBundles()); + for (auto Bundle : PrevAI->operand_bundles()) + Bundles.emplace_back(Bundle); + for (auto Bundle : II->operand_bundles()) + Bundles.emplace_back(Bundle); + Builder.CreateAssumption(Bundles); + eraseInstFromFunction(*PrevAI); + return eraseInstFromFunction(*II); + } + + // If the assume has operand bundles, the folds below will never work, so + // don't bother trying. break; + } Value *IIOperand = II->getArgOperand(0); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 1e24ff8d51057..ece0b36a15b72 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5849,7 +5849,7 @@ bool InstCombinerImpl::run() { // removed. auto II = dyn_cast(User); if (II->getIntrinsicID() != Intrinsic::assume || - !II->getOperandBundle("dereferenceable")) + !II->hasOperandBundle(LLVMContext::OB_Dereferenceable)) continue; } diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll index 0c5e403ca54a9..2701775f011e8 100644 --- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll @@ -10,8 +10,7 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, ptr %b) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[B:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64), "align"(ptr [[B:%.*]], i64 64) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 69220811ac206..8c819ad17a1f3 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -134,8 +134,7 @@ define i1 @align_with_offset_on_gep(ptr %base) { define void @align_with_constant_offset_0(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_0( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -145,8 +144,7 @@ define void @align_with_constant_offset_0(ptr %ptr) { define void @align_with_constant_offset_1(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_1( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 -8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 -8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -157,8 +155,7 @@ define void @align_with_constant_offset_1(ptr %ptr) { define void @align_with_constant_offset_4(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_4( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -169,8 +166,7 @@ define void @align_with_constant_offset_4(ptr %ptr) { define void @align_with_constant_offset_8(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_8( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -180,8 +176,7 @@ define void @align_with_constant_offset_8(ptr %ptr) { define void @align_with_variable_offset(ptr %ptr, i64 %offset) { ; CHECK-LABEL: @align_with_variable_offset( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -627,10 +622,7 @@ define void @redundant_nonnull3(ptr %ptr) { define void @partially_redundant(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5) { ; CHECK-LABEL: @partially_redundant( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR2:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]), "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]), "nonnull"(ptr [[PTR:%.*]]), "nonnull"(ptr [[PTR2:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr), "nonnull"(ptr %ptr2) ] @@ -1392,6 +1384,22 @@ define i32 @assume_noundef_on_load_after_call(ptr %ptr) { ret i32 %val } +define ptr @avoid_get_operand_bundle() { +; CHECK-LABEL: @avoid_get_operand_bundle( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[LOAD:%.*]] = load volatile ptr, ptr null, align 8 +; CHECK-NEXT: [[PTRTOINT_I:%.*]] = ptrtoint ptr [[LOAD]] to i64 +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 [[PTRTOINT_I]]), "dereferenceable"(ptr null, i64 [[PTRTOINT_I]]) ] +; CHECK-NEXT: ret ptr null +; +bb: + %load = load volatile ptr, ptr null, align 8 + %ptrtoint.i = ptrtoint ptr %load to i64 + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 %ptrtoint.i) ] + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 %ptrtoint.i) ] + ret ptr null +} + declare void @use(i1) declare void @block() declare void @llvm.dbg.value(metadata, metadata, metadata) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index 69b23200b239b..4ca7f780cdc5e 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -244,8 +244,7 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2), "align"(ptr [[LAST]], i64 2) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: From 12f7208b3624469ab7c489aa70cd06046a6e052d Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 23 Jun 2026 13:48:26 +0100 Subject: [PATCH 173/511] [ARM] Regenerate vcombine.ll. NFC (#205335) --- llvm/test/CodeGen/ARM/vcombine.ll | 180 +++++++++++++++++++++--------- 1 file changed, 130 insertions(+), 50 deletions(-) diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll index 3871f8ceddce9..0fb998ac620f1 100644 --- a/llvm/test/CodeGen/ARM/vcombine.ll +++ b/llvm/test/CodeGen/ARM/vcombine.ll @@ -1,16 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE ; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE define <16 x i8> @vcombine8(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: vcombine8 -; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] -; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] +; CHECK-LE-LABEL: vcombine8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r1] +; CHECK-LE-NEXT: vldr d17, [r0] +; CHECK-LE-NEXT: vmov r2, r3, d16 +; CHECK-LE-NEXT: vmov r0, r1, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombine8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r1] +; CHECK-BE-NEXT: vldr d17, [r0] +; CHECK-BE-NEXT: vrev64.8 d19, d16 +; CHECK-BE-NEXT: vrev64.8 d18, d17 +; CHECK-BE-NEXT: vrev64.8 q8, q9 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: mov pc, lr -; CHECK-LE-DAG: vmov r0, r1, [[LD0]] -; CHECK-LE-DAG: vmov r2, r3, [[LD1]] -; CHECK-BE-DAG: vmov r1, r0, d16 -; CHECK-BE-DAG: vmov r3, r2, d17 %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> @@ -18,15 +30,26 @@ define <16 x i8> @vcombine8(ptr %A, ptr %B) nounwind { } define <8 x i16> @vcombine16(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: vcombine16 -; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] -; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] +; CHECK-LE-LABEL: vcombine16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r1] +; CHECK-LE-NEXT: vldr d17, [r0] +; CHECK-LE-NEXT: vmov r2, r3, d16 +; CHECK-LE-NEXT: vmov r0, r1, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombine16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r1] +; CHECK-BE-NEXT: vldr d17, [r0] +; CHECK-BE-NEXT: vrev64.16 d19, d16 +; CHECK-BE-NEXT: vrev64.16 d18, d17 +; CHECK-BE-NEXT: vrev64.16 q8, q9 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: mov pc, lr -; CHECK-LE-DAG: vmov r0, r1, [[LD0]] -; CHECK-LE-DAG: vmov r2, r3, [[LD1]] -; CHECK-BE-DAG: vmov r1, r0, d16 -; CHECK-BE-DAG: vmov r3, r2, d17 %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> @@ -34,16 +57,27 @@ define <8 x i16> @vcombine16(ptr %A, ptr %B) nounwind { } define <4 x i32> @vcombine32(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: vcombine32 +; CHECK-LE-LABEL: vcombine32: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r1] +; CHECK-LE-NEXT: vldr d17, [r0] +; CHECK-LE-NEXT: vmov r2, r3, d16 +; CHECK-LE-NEXT: vmov r0, r1, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombine32: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r1] +; CHECK-BE-NEXT: vldr d17, [r0] +; CHECK-BE-NEXT: vrev64.32 d19, d16 +; CHECK-BE-NEXT: vrev64.32 d18, d17 +; CHECK-BE-NEXT: vrev64.32 q8, q9 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: mov pc, lr -; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] -; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] -; CHECK-LE: vmov r2, r3, [[LD1]] -; CHECK-LE: vmov r0, r1, [[LD0]] -; CHECK-BE: vmov r1, r0, d16 -; CHECK-BE: vmov r3, r2, d17 %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> @@ -51,16 +85,27 @@ define <4 x i32> @vcombine32(ptr %A, ptr %B) nounwind { } define <4 x float> @vcombinefloat(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: vcombinefloat +; CHECK-LE-LABEL: vcombinefloat: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r1] +; CHECK-LE-NEXT: vldr d17, [r0] +; CHECK-LE-NEXT: vmov r2, r3, d16 +; CHECK-LE-NEXT: vmov r0, r1, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombinefloat: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r1] +; CHECK-BE-NEXT: vldr d17, [r0] +; CHECK-BE-NEXT: vrev64.32 d19, d16 +; CHECK-BE-NEXT: vrev64.32 d18, d17 +; CHECK-BE-NEXT: vrev64.32 q8, q9 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: mov pc, lr -; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] -; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] -; CHECK-LE: vmov r2, r3, [[LD1]] -; CHECK-LE: vmov r0, r1, [[LD0]] -; CHECK-BE: vmov r1, r0, d16 -; CHECK-BE: vmov r3, r2, d17 %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> @@ -68,15 +113,23 @@ define <4 x float> @vcombinefloat(ptr %A, ptr %B) nounwind { } define <2 x i64> @vcombine64(ptr %A, ptr %B) nounwind { -; CHECK-LABEL: vcombine64 -; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0] -; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1] +; CHECK-LE-LABEL: vcombine64: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r1] +; CHECK-LE-NEXT: vldr d17, [r0] +; CHECK-LE-NEXT: vmov r2, r3, d16 +; CHECK-LE-NEXT: vmov r0, r1, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombine64: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r1] +; CHECK-BE-NEXT: vldr d17, [r0] +; CHECK-BE-NEXT: vmov r3, r2, d16 +; CHECK-BE-NEXT: vmov r1, r0, d17 +; CHECK-BE-NEXT: mov pc, lr -; CHECK-LE: vmov r2, r3, [[LD1]] -; CHECK-LE: vmov r0, r1, [[LD0]] -; CHECK-BE: vmov r3, r2, [[LD1]] -; CHECK-BE: vmov r1, r0, [[LD0]] %tmp1 = load <1 x i64>, ptr %A %tmp2 = load <1 x i64>, ptr %B %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> @@ -87,22 +140,34 @@ define <2 x i64> @vcombine64(ptr %A, ptr %B) nounwind { ; They should not require storing to the stack. define <4 x i16> @vget_low16(ptr %A) nounwind { -; CHECK: vget_low16 -; CHECK-NOT: vst -; CHECK-LE: vmov r0, r1, d16 -; CHECK-BE: vmov r1, r0, d16 +; CHECK-LE-LABEL: vget_low16: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r0] +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vget_low16: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r0] +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: mov pc, lr %tmp1 = load <8 x i16>, ptr %A %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> ret <4 x i16> %tmp2 } define <8 x i8> @vget_high8(ptr %A) nounwind { -; CHECK: vget_high8 -; CHECK-NOT: vst -; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0] -; CHECK-LE: vldr d16, [r0, #8] -; CHECK-LE: vmov r0, r1, d16 -; CHECK-BE: vmov r1, r0, d16 +; CHECK-LE-LABEL: vget_high8: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: vldr d16, [r0, #8] +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vget_high8: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: vldr d16, [r0, #8] +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: mov pc, lr %tmp1 = load <16 x i8>, ptr %A %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> ret <8 x i8> %tmp2 @@ -110,11 +175,24 @@ define <8 x i8> @vget_high8(ptr %A) nounwind { ; vcombine(vld1_dup(p), vld1_dup(p2)) define <8 x i16> @vcombine_vdup(<8 x i16> %src, ptr nocapture readonly %p) { -; CHECK-LABEL: vcombine_vdup: -; CHECK: vld1.16 {d16[]}, -; CHECK: vld1.16 {d17[]}, -; CHECK-LE: vmov r0, r1, d16 -; CHECK-LE: vmov r2, r3, d17 +; CHECK-LE-LABEL: vcombine_vdup: +; CHECK-LE: @ %bb.0: +; CHECK-LE-NEXT: ldr r0, [sp] +; CHECK-LE-NEXT: vld1.16 {d16[]}, [r0:16]! +; CHECK-LE-NEXT: vld1.16 {d17[]}, [r0:16] +; CHECK-LE-NEXT: vmov r0, r1, d16 +; CHECK-LE-NEXT: vmov r2, r3, d17 +; CHECK-LE-NEXT: mov pc, lr +; +; CHECK-BE-LABEL: vcombine_vdup: +; CHECK-BE: @ %bb.0: +; CHECK-BE-NEXT: ldr r0, [sp] +; CHECK-BE-NEXT: vld1.16 {d16[]}, [r0:16]! +; CHECK-BE-NEXT: vld1.16 {d17[]}, [r0:16] +; CHECK-BE-NEXT: vrev64.16 q8, q8 +; CHECK-BE-NEXT: vmov r1, r0, d16 +; CHECK-BE-NEXT: vmov r3, r2, d17 +; CHECK-BE-NEXT: mov pc, lr %a1 = load i16, ptr %p, align 2 %a2 = insertelement <4 x i16> undef, i16 %a1, i32 0 %a3 = shufflevector <4 x i16> %a2, <4 x i16> undef, <4 x i32> zeroinitializer @@ -125,3 +203,5 @@ define <8 x i16> @vcombine_vdup(<8 x i16> %src, ptr nocapture readonly %p) { %shuffle = shufflevector <4 x i16> %a3, <4 x i16> %b3, <8 x i32> ret <8 x i16> %shuffle } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} From ebb68056b0766d8c0f787069da8b89a2b041ddbb Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Tue, 23 Jun 2026 14:53:42 +0200 Subject: [PATCH 174/511] [SYCL][DOC] Updates to sycl_ext_oneapi_reusable_events specification. (#22201) List of changes: - Counter based events can be enqueued for signaling on any device in the system, so there is no requirement for P2P access between devices. - The `device` argument is not required for make_event, since the event is associated with a device on the first call to enqueue_signal_event. Which device is associated with an event (if any) right after the make_event call is implementation specific. - A new device aspect for reusable events (since this extension might not be supported on all backends). --- .../sycl_ext_oneapi_reusable_events.asciidoc | 121 ++++-------------- 1 file changed, 24 insertions(+), 97 deletions(-) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_reusable_events.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_reusable_events.asciidoc index 60477fb81be9b..5a41e62db6950 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_reusable_events.asciidoc +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_reusable_events.asciidoc @@ -111,7 +111,7 @@ to determine which of the extension's features the implementation supports. === New functions to create an event This extension adds the following factory functions which can create an event -with a specific context and device. +with a specific context or with associated properties. ''' @@ -120,7 +120,7 @@ with a specific context and device. namespace sycl::ext::oneapi::experimental { template -event make_event(const context &ctxt, const device &dev, PropertyListT props = {}); +event make_event(const context &ctxt, PropertyListT props = {}); } // namespace sycl::ext::oneapi::experimental ---- @@ -132,45 +132,13 @@ _Constraints:_ * `is_property_list_v` is `true` and contains no properties other than those listed below in section "New property for creating an event". -_Returns:_ An event that is associated with context `ctxt` and device `dev`. +_Returns:_ An event that is associated with context `ctxt`. The event has +`info::event_command_status::complete` status. -_Throws:_ - -* An `exception` with the `errc::feature_not_supported` error code if - `PropertyListT` contains an `enable_profiling` property that enables - profiling timestamps and if the platform containing `ctxt` does not support - creation of such events as reported by the `event_profiling` information - descriptor. - -* An `exception` with the `errc::invalid` error code if `ctxt` does not contain - `dev`. - -''' - -[source,c++] ----- -namespace sycl::ext::oneapi::experimental { - -template -event make_event(const device &dev, PropertyListT props = {}); - -} // namespace sycl::ext::oneapi::experimental ----- - -_Constraints:_ - -* `PropertyListT` is one of the properties listed below in section "New property - for creating an event"; or -* `is_property_list_v` is `true` and contains no properties other - than those listed below in section "New property for creating an event". - -_Effects:_ Equivalent to: - -[source,c++,indent=2] ----- -sycl::context ctxt = dev.get_platform().khr_get_default_context(); -return sycl::ext::oneapi::experimental::make_event(ctxt, dev, props); ----- +_Throws:_ An `exception` with the `errc::feature_not_supported` error code if +`PropertyListT` contains an `enable_profiling` property that enables profiling +timestamps and if the platform containing `ctxt` does not support creation of +such events as reported by the `event_profiling` information descriptor. ''' @@ -197,50 +165,11 @@ _Effects:_ Equivalent to: ---- sycl::device dev; sycl::context ctxt = dev.get_platform().khr_get_default_context(); -return sycl::ext::oneapi::experimental::make_event(ctxt, dev, props); +return sycl::ext::oneapi::experimental::make_event(ctxt, props); ---- ''' -=== New event member functions - -This extension adds the following new member functions to the event class: - -''' - -[source,c++] ----- -namespace sycl { - -class event { - // ... - device ext_oneapi_get_device() const; - context ext_oneapi_get_context() const; -}; - -} // namespace sycl ----- - -''' - -[source,c++] ----- -device ext_oneapi_get_device() const; ----- - -_Returns:_ The device object associated with this event. - -''' - -[source,c++] ----- -context ext_oneapi_get_context() const; ----- - -_Returns:_ The context object associated with this event. - -''' - === New property for creating an event This extension adds the following property, which can be used with `make_event`: @@ -313,7 +242,7 @@ Any commands submitted to the queue after this barrier cannot begin execution until all commands associated with `evt` or `evts` have completed. _Remarks:_ The event `evt` and the events in `evts` do _not_ need to have the -same context or the same device as `q`. +same context as `q`. ''' @@ -327,9 +256,7 @@ void enqueue_signal_event(queue q, event& evt); ---- _Effects:_ The event `evt` is immediately disassociated with any previous -command, and its status is set to `info::event_command_status::submitted`. The -event is also disassociated with any previous device and is associated with the -device returned by `q.get_device()`. +command, and its status is set to `info::event_command_status::submitted`. If the queue `q` is in-order (i.e. was constructed with `property::queue::in_order`), this function enqueues a lightweight "tag" @@ -361,6 +288,15 @@ Implementations are encouraged to transition the event directly from the "submitted" status to the "complete" status and are encouraged to set the "command_start" timestamp to the same value as the "command_end" timestamp. +_Throws:_ + + * An `exception` with the `errc::invalid` error code if `evt` and `q` don't + have the same context. + * An `exception` with the `errc::feature_not_supported` error code if `evt` + was created with the `enable_profiling` property that enables profiling + timestamps and if the platform associated with `q` does not support creation + of such events as reported by the `event_profiling` information descriptor. + [_Note:_ In order to understand why the "command_start" and "command_end" timestamps are encouraged to be the same, think of the tag operation as an empty kernel with an implicit set of dependencies on all previous commands in the @@ -373,20 +309,11 @@ is implemented by submitting an actual kernel, which has non-zero execution time. _{endnote}_] -_Throws:_ An `exception` with the `errc::invalid` error code if the device -associated with `evt` differs from the device associated with `q` and the -devices cannot P2P access each other. The P2P access between devices can be -queried using the `device::ext_oneapi_can_access_peer` function and enabled -using the `device::ext_oneapi_enable_peer_access` function defined in the -link:../experimental/sycl_ext_oneapi_peer_access.asciidoc[ -sycl_ext_oneapi_peer_access] extension. - === Interaction with other event APIs An event _E_ created via `make_event` can be used as a command dependency (e.g. via `handler::depends_on`) for a command submitted to some queue _Q_. -It is _not_ necessary for the context or device of _E_ to match the context or -device of _Q_. +It is _not_ necessary for the context of _E_ to match the context of _Q_. If an event _E_ is used as a command dependency for some command _C_ (e.g. via `handler::depends_on`), the dependency is captured at the point when _C_ is @@ -405,8 +332,8 @@ class. The default constructor creates an event that is equivalent to calling `make_event` with no parameters. Several of the `queue` class member functions from the core SYCL specification -return an event. These events are associated with the queue's context and the -queue's device as though created by `make_event` with that context and device. +return an event. These events are associated with the queue's context as though +created by `make_event` with that context. These events may be passed to any of the functions in this extension that take an event parameter. @@ -426,7 +353,7 @@ int main() { sycl::context ctxt = dev.get_platform().khr_get_default_context(); sycl::queue q1{ctxt, dev, sycl::property::queue::in_order{}}; sycl::queue q2{ctxt, dev, sycl::property::queue::in_order{}}; - sycl::event e = syclex::make_event(ctxt, dev); + sycl::event e = syclex::make_event(ctxt); // Launch a kernel on `q1` and then signal an event when the kernel completes. syclex::parallel_for(q1, {N}, [=](sycl::item<> it) { /* ... */ }); From 873de836408cc574070896ccdc85f93a7de6f05d Mon Sep 17 00:00:00 2001 From: Slawomir Ptak Date: Tue, 23 Jun 2026 14:53:53 +0200 Subject: [PATCH 175/511] [SYCL][DOC] Introduce IPC for events in sycl_ext_oneapi_inter_process_communication (#21334) Add support for events in sycl_ext_oneapi_inter_process_communication extension specification. --------- Co-authored-by: Greg Lueck --- ...neapi_inter_process_communication.asciidoc | 283 ++++++++++++++++++ 1 file changed, 283 insertions(+) diff --git a/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc index acc1d8830b6c5..979ee3692a29c 100644 --- a/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_inter_process_communication.asciidoc @@ -61,6 +61,8 @@ This extension also depends on the following other SYCL extensions: * link:sycl_ext_oneapi_virtual_mem.asciidoc[sycl_ext_oneapi_virtual_mem] * link:sycl_ext_oneapi_properties.asciidoc[sycl_ext_oneapi_properties] +* link:../proposed/sycl_ext_oneapi_reusable_events.asciidoc[ + sycl_ext_oneapi_reusable_events] == Status @@ -1121,6 +1123,287 @@ return ipc::physical_memory::open(handle_data_view, ctxt, d); |==== +=== Inter-process exchange of events + +This extension allows for the exchange of an event object between processes. +The event object can then be signaled in one of the processes and waited on in +another process. +The following aspect and set of free functions are specific to the event object +sharing feature. + +==== Extension to `enum class aspect` for events + +[source] +---- +namespace sycl { +enum class aspect { + ... + ext_oneapi_ipc_event +} +} +---- + +If a SYCL device has this aspect, that device supports inter-process exchange +of events as described in this section. + +==== Use of `enable_ipc` property for the make_event function + +This feature uses the `enable_ipc` property (defined above for `physical_mem`), +which can be used with `make_event` +(from the link:../proposed/sycl_ext_oneapi_reusable_events.asciidoc[ +sycl_ext_oneapi_reusable_events] extension): + +The `enable_ipc` property controls whether the event can be shared across +processes. + +_Remarks:_ + + * `make_event` throws an exception with the `errc::feature_not_supported` + error code if inter-process sharing is enabled via `enable_ipc` and not all + devices that are part of the context passed to the `make_event` function + have the aspect `aspect::ext_oneapi_ipc_event`. + + * `make_event` throws an exception with the `errc::invalid` error code if + both the `enable_profiling` and `enable_ipc` properties are set to true. + + * `enqueue_signal_event` function throws an exception with the `errc::invalid` + error code if inter-process sharing of the event is enabled via `enable_ipc` + and profiling is enabled for the queue with the + `property::queue::enable_profiling` property. + + * `enqueue_signal_event`, `enqueue_wait_event`, `enqueue_wait_events` (from + the sycl_ext_oneapi_reusable_events extension) and `handler::depends_on` + functions throw an exception with the `errc::feature_not_supported` error + code if inter-process sharing of the event is enabled via `enable_ipc` and a + device associated with the queue (passed to those functions or associated + with the `handler`) does not have the aspect `aspect::ext_oneapi_ipc_event`. + + * `enqueue_signal_event`, `enqueue_wait_event`, `enqueue_wait_events` (from + the sycl_ext_oneapi_reusable_events extension) and `handler::depends_on` + functions throw an exception with the `errc::feature_not_supported` error + code if inter-process sharing of the event is enabled via `enable_ipc` and a + queue (passed to those functions or associated with the `handler`) has an + enqueued host task that has not completed (scheduled using the + `handler::host_task` function). + + * If the event has been enqueued for signaling (using `enqueue_signal_event`) + in some process, and the IPC handle associated with that event has been + passed to the `open` function in another process, then the resulting event + can be used as a dependency (using `handler::depends_on`) or passed to + `enqueue_wait_event` or `enqueue_wait_events` functions only if the device + associated with the queue passed to `enqueue_signal_event` and the device + associated with the queue where the dependency is set, can P2P access each + other. + +==== New event member function + +This extension adds a new event type member function, which can be used to +check if a given event can be passed between processes. + +[source,c++] +---- +namespace sycl { + +class event { + // ... + bool ext_oneapi_ipc_enabled() const; +}; + +} // namespace sycl +---- + +_Returns:_ True if the event was created with inter-process sharing enabled. + +==== New functions associated with an event object + +This extension adds new free functions under the `ipc::event` experimental +namespace. + +``` +namespace sycl::ext::oneapi::experimental::ipc::event { + +handle get(const sycl::event &evt); + +void put(handle &ipc_handle, const sycl::context &ctx); + +void put(handle &ipc_handle); + +sycl::event open(const handle_data_t &handle_data, const sycl::context &ctx); + +sycl::event open(const handle_data_t &handle_data); + +// Requires C++20 +sycl::event open(const handle_data_view_t &handle_data_view, + const sycl::context &ctx); + +// Requires C++20 +sycl::event open(const handle_data_view_t &handle_data_view); + +} +``` + +|==== +a| +[frame=all,grid=none] +!==== +a! +[source] +---- +handle get(const sycl::event &evt) +---- +!==== + +_Returns:_ An IPC "handle" to this event. The bytes of this handle can be +transferred to another process on the same system, and the other process can +use the handle to get an event object synchronized with event `evt` through a +call to the `open` function. + +_Throws:_ An exception with the `errc::invalid` error code if event `evt` was +not created with the `enable_ipc` property. + +!==== +a! +[source] +---- +void put(handle &ipc_handle, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ + + * `ipc_handle` is the IPC "handle" to an event that was returned from a call + to `get` in this same process. The `put` function has not been previously + called on the handle. + * `ctx` is a context associated with the event object, which was passed to the + `get` function call that produced `ipc_handle`. + +_Effects:_ Deallocates resources associated with the handle. After the call to +the `put` function, the handle data is invalid and using it in the `put` and +`open` functions will result in undefined behavior. + +[_Note:_ Any objects retrieved through a call to the `open` function in any +process on the system will still be valid after a call to the `put` function. +_{endnote}_] + +[_Note:_ A call to `put` is required for each handle received from the `get` +function. +_{endnote}_] + +[_Note:_ The `put` function can only be called in the same process where the +handle was generated using `get`. +_{endnote}_] + +!==== +a! +[source] +---- +void put(handle &ipc_handle) +---- +!==== + +_Effects_: Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +ipc::event::put(ipc_handle, ctxt); +---- + +!==== +a! +[source] +---- +sycl::event open(const handle_data_t &handle_data, const sycl::context &ctx) +---- +!==== + +_Preconditions:_ + + * `handle_data` is the IPC "handle" to an event that was returned from a call + to the `get` function either in this process or in some other process on the + same system. + * `ctx` is the same context (contains the same set of devices) as the context + passed to the `make_event` function (from the + sycl_ext_oneapi_reusable_events extension) which was used to create the + event which produced `handle_data`. + +_Returns:_ An `event` object that represents the same event identified by +`handle_data`. The returned event is associated with context `ctx`. The +returned event and the event identified by `handle_data` share the same state. +When one is signaled, the state of the other event automatically becomes +signaled also. + +[_Note:_ The `open` function can be called multiple times on the same handle +within the same process. Each call to the `open` function may return a unique +event object even for the same handle. +_{endnote}_] + +[_Note:_ The event returned from a call to the `open` function is no longer +valid if the event associated with `handle_data` (passed to a `get` call) is +destroyed. +_{endnote}_] + +_Throws:_ + + * An exception with the `errc::feature_not_supported` error code if not all + devices that are part of `ctx` have `aspect::ext_oneapi_ipc_event`. + * An exception with the `errc::invalid` error code if the handle data + `handle_data` has an unexpected number of bytes. + +!==== +a! +[source] +---- +sycl::event open(const handle_data_t &handle_data) +---- +!==== + +_Effects:_ Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc::event::open(handle_data, ctxt); +---- + +!==== +a! +[source] +---- +sycl::event open(const handle_data_view_t &handle_data_view, + const sycl::context &ctx) +---- +!==== + +_Effects:_ Equivalent to: + +[source,c++,indent=2] +---- +handle_data_t handle_data{handle_data_view.begin(), handle_data_view.end()}; +return ipc::event::open(handle_data, ctx); +---- + +!==== +a! +[source] +---- +sycl::event open(const handle_data_view_t &handle_data_view) +---- +!==== + +_Effects:_ Equivalent to: + +[source,c++,indent=2] +---- +sycl::device d; +sycl::context ctxt = d.get_platform().khr_get_default_context(); +return ipc::event::open(handle_data_view, ctxt); +---- + +|==== + == Issues === Level Zero file descriptor duplication dependency From a4903d872eb3cc75cd7a9eb47adb82917d6970bb Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 23 Jun 2026 08:06:43 -0500 Subject: [PATCH 176/511] [LLVM][Runtimes] Forward 'LLVM_LIBDIR_SUFFIX' to runtimes by default (#205182) Summary: This option controls the logical path of the installed libraries. The runtimes obften reach into libraries, or want to install to the same location as the main build. Previously you had to set this per-runtime, but we should likely forward it by default. Fixes: https://github.com/llvm/llvm-project/issues/159762 --- llvm/runtimes/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt index 6d81b26d2d416..501ea55a327c7 100644 --- a/llvm/runtimes/CMakeLists.txt +++ b/llvm/runtimes/CMakeLists.txt @@ -3,7 +3,7 @@ # similar although simpler functionality. We should figure out how to merge # the two files. -set(COMMON_CMAKE_ARGS "-DHAVE_LLVM_LIT=ON;-DCLANG_RESOURCE_DIR=${CLANG_RESOURCE_DIR}") +set(COMMON_CMAKE_ARGS "-DHAVE_LLVM_LIT=ON;-DCLANG_RESOURCE_DIR=${CLANG_RESOURCE_DIR};-DLLVM_LIBDIR_SUFFIX=${LLVM_LIBDIR_SUFFIX}") if(APPLE AND CMAKE_OSX_SYSROOT AND (LLVM_TARGET_TRIPLE STREQUAL LLVM_HOST_TRIPLE)) # Only propagate the host sysroot for native runtimes builds. list(APPEND RUNTIMES_CMAKE_ARGS "-DCMAKE_OSX_SYSROOT=${CMAKE_OSX_SYSROOT}") From 902a29152087e1732f48000c2889642873f8495a Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 23 Jun 2026 08:07:49 -0500 Subject: [PATCH 177/511] [Clang] Accept 'noconvergent' attributes outside of CUDA (#205247) Summary: There is no reason that `convergent` should be a generic attributes but not `noconvergent`. --- clang/include/clang/Basic/Attr.td | 1 - clang/test/CodeGen/convergent-functions.cpp | 13 +++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 12bc0732fc19e..f1ae66bd7f2bb 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2391,7 +2391,6 @@ def NoConvergent : InheritableAttr { let Spellings = [Clang<"noconvergent">, Declspec<"noconvergent">]; let Subjects = SubjectList<[Function, Stmt], WarnDiag, "functions and statements">; - let LangOpts = [CUDA]; let Documentation = [NoConvergentDocs]; let SimpleHandler = 1; } diff --git a/clang/test/CodeGen/convergent-functions.cpp b/clang/test/CodeGen/convergent-functions.cpp index 7290c505ec6fe..8734054462e19 100644 --- a/clang/test/CodeGen/convergent-functions.cpp +++ b/clang/test/CodeGen/convergent-functions.cpp @@ -3,8 +3,17 @@ // Test that the -fconvergent-functions flag works -// CHECK: attributes #0 = { +// CHECK: define {{.*}} @func() #[[ATTR:[0-9]+]] +void func(void) { } + +// CONVFUNC: define {{.*}} @nofunc() #[[NOATTR:[0-9]+]] +__attribute__((noconvergent)) void nofunc(void) { } + +// CHECK: attributes #[[ATTR]] = { // NOCONVFUNC-NOT: convergent // CONVFUNC-SAME: convergent // CHECK-SAME: } -void func(void) { } + +// CONVFUNC: attributes #[[NOATTR]] = { +// CONVFUNC-NOT: convergent +// CONVFUNC-SAME: } From 6c1d02dd5a49b28ceb0e3057ade7028ec09776b5 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 23 Jun 2026 08:12:31 -0500 Subject: [PATCH 178/511] [llvm-offload-binary] Add `member` key to single out archive members (#205170) Summary: Currently, archives offer three approaches. 1. `--archive` which takes an archive and puts all the output in a new archive 2. No filename, which outputs based on the member names 3. Filename, which just matches everything. This has a gap for when people wnat a single file without relying on implicit naming that dumps all the contents to the CWD. This PR adds `member` which lets you specify the member names as you would get from `ar t libfoo.a` for this. --- .../docs/CommandGuide/llvm-offload-binary.rst | 5 +++++ .../llvm-offload-binary/member-extract.test | 22 +++++++++++++++++++ .../llvm-offload-binary.cpp | 6 ++++- 3 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/llvm-offload-binary/member-extract.test diff --git a/llvm/docs/CommandGuide/llvm-offload-binary.rst b/llvm/docs/CommandGuide/llvm-offload-binary.rst index 87005967775db..59ee472d8c3ee 100644 --- a/llvm/docs/CommandGuide/llvm-offload-binary.rst +++ b/llvm/docs/CommandGuide/llvm-offload-binary.rst @@ -78,6 +78,11 @@ OPTIONS during extraction, all images are automatically extracted with descriptive filenames. + When extracting from a static archive, the ``member`` key restricts the output + to a single archive member for cases when `--archive` is insufficient. + , e.g. + ``--image=member=foo.o,triple=nvptx64,arch=sm_70,file=out.o``. + .. option:: -o Write output to . When bundling, this specifies the fat binary filename. diff --git a/llvm/test/tools/llvm-offload-binary/member-extract.test b/llvm/test/tools/llvm-offload-binary/member-extract.test new file mode 100644 index 0000000000000..d005517a533e3 --- /dev/null +++ b/llvm/test/tools/llvm-offload-binary/member-extract.test @@ -0,0 +1,22 @@ +# RUN: split-file %s %t + +# Two members with the same triple, arch, and kind but distinct contents. +# RUN: llvm-offload-binary -o %t/a.o --image=file=%t/contentA,triple=x-y-z,arch=gfx90a,kind=openmp +# RUN: llvm-offload-binary -o %t/b.o --image=file=%t/contentB,triple=x-y-z,arch=gfx90a,kind=openmp +# RUN: llvm-ar rcs %t/lib.a %t/a.o %t/b.o + +# Each member can be extracted to a known file by its archive member name. +# RUN: llvm-offload-binary %t/lib.a --image=member=a.o,triple=x-y-z,arch=gfx90a,kind=openmp,file=%t/outA +# RUN: llvm-offload-binary %t/lib.a --image=member=b.o,triple=x-y-z,arch=gfx90a,kind=openmp,file=%t/outB +# RUN: diff %t/contentA %t/outA +# RUN: diff %t/contentB %t/outB + +# Without the 'member' selector the same filter matches both members. +# RUN: llvm-offload-binary %t/lib.a --image=triple=x-y-z,arch=gfx90a,kind=openmp,file=%t/both 2>&1 \ +# RUN: | FileCheck %s --check-prefix=WARN +# WARN: Multiple inputs match to a single file + +#--- contentA +aaaa +#--- contentB +bbbb diff --git a/llvm/tools/llvm-offload-binary/llvm-offload-binary.cpp b/llvm/tools/llvm-offload-binary/llvm-offload-binary.cpp index 1c429f2f85046..f3099f43147af 100644 --- a/llvm/tools/llvm-offload-binary/llvm-offload-binary.cpp +++ b/llvm/tools/llvm-offload-binary/llvm-offload-binary.cpp @@ -212,13 +212,17 @@ static Error unbundleImages() { SmallVector Extracted; for (const OffloadFile &File : Binaries) { const auto *Binary = File.getBinary(); - // We handle the 'file' and 'kind' identifiers differently. + // We handle the 'file', 'kind', and 'member' identifiers differently. bool Match = llvm::all_of(Args, [&](auto &Arg) { const auto [Key, Value] = Arg; if (Key == "file") return true; if (Key == "kind") return Binary->getOffloadKind() == getOffloadKind(Value); + if (Key == "member") + return sys::path::filename( + Binary->getMemoryBufferRef().getBufferIdentifier()) == + Value; return Binary->getString(Key) == Value; }); if (Match) From 52f39359a3d657c8505975f724de10788af193e6 Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Tue, 23 Jun 2026 14:13:03 +0100 Subject: [PATCH 179/511] [AArch64] Add flag to conditionally write FPMR (#203911) Add a AArch64 codegen flag to make llvm.aarch64.set.fpmr avoid writing FPMR when it already contains the requested value. By default, llvm.aarch64.set.fpmr continues to lower directly to an MSR FPMR instruction. With -aarch64-conditional-fpmr-write, the backend lowers the intrinsic to an MRS/MSR conditional branch sequence. This is based on the initial implementation from: https://github.com/llvm/llvm-project/pull/114248 However this PR keeps the conditional FPMR write sequence behind a codegen flag. One reason to change the codegen lowering is because GCC emits the conditional branch sequence unconditionally. LLVM preserves the existing direct MSR lowering by default. --- .../Target/AArch64/AArch64ISelLowering.cpp | 57 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 + llvm/lib/Target/AArch64/AArch64InstrInfo.td | 3 +- llvm/test/CodeGen/AArch64/arm64-fpenv.ll | 54 +++++++++++++++--- 4 files changed, 108 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 12a8c829dc380..e56a9be69dd7c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -163,6 +163,15 @@ static cl::opt UseFEATCPACodegen( "SelectionDAG for FEAT_CPA"), cl::init(false)); +// FPMR writes might be a synchronization barrier and thus carry a significant +// cost. Give users the option to skip writes when the requested value is +// already set. +static cl::opt UseConditionalFPMRWrite( + "aarch64-use-conditional-fpmr-write", cl::Hidden, + cl::desc("Only write FPMR when the requested value differs from the " + "current value"), + cl::init(false)); + /// Value type used for condition codes. constexpr MVT CondCodeVT = MVT::i32; @@ -3196,6 +3205,52 @@ MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet( return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitLoweredSetFpmr(MachineInstr &MI, + MachineBasicBlock *MBB) const { + MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + DebugLoc DL = MI.getDebugLoc(); + + if (!UseConditionalFPMRWrite) { + BuildMI(*MBB, MI, DL, TII->get(AArch64::MSR)) + .addImm(0xda22) + .add(MI.getOperand(0)) + .addDef(AArch64::FPMR, RegState::Implicit); + MI.eraseFromParent(); + return MBB; + } + + Register NewFpmrVal = MI.getOperand(0).getReg(); + const BasicBlock *LLVM_BB = MBB->getBasicBlock(); + MachineBasicBlock *MsrBB = MF->CreateMachineBasicBlock(LLVM_BB); + MachineBasicBlock *EndBB = MBB->splitAt(MI); + MF->insert(++MBB->getIterator(), MsrBB); + + Register CurrentFpmrVal = + MF->getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + BuildMI(*MBB, MI, DL, TII->get(AArch64::MRS), CurrentFpmrVal) + .addImm(0xda22) + .addUse(AArch64::FPMR, RegState::Implicit); + BuildMI(*MBB, MI, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) + .addReg(CurrentFpmrVal, RegState::Kill) + .addReg(NewFpmrVal) + .addImm(0); + BuildMI(*MBB, MI, DL, TII->get(AArch64::Bcc)) + .addImm(AArch64CC::EQ) + .addMBB(EndBB); + BuildMI(*MsrBB, MsrBB->begin(), DL, TII->get(AArch64::MSR)) + .addImm(0xda22) + .addReg(NewFpmrVal, getKillRegState(MI.getOperand(0).isDead())) + .addDef(AArch64::FPMR, RegState::Implicit); + + MBB->addSuccessor(MsrBB); + MsrBB->addSuccessor(EndBB); + + MI.eraseFromParent(); + return EndBB; +} + MachineBasicBlock * AArch64TargetLowering::EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const { @@ -3568,6 +3623,8 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( return EmitZTInstr(MI, BB, AArch64::ZERO_T, /*Op0IsDef=*/true); case AArch64::MOVT_TIZ_PSEUDO: return EmitZTInstr(MI, BB, AArch64::MOVT_TIZ, /*Op0IsDef=*/true); + case AArch64::MSR_FPMR: + return EmitLoweredSetFpmr(MI, BB); case AArch64::PAC: fixupPtrauthDiscriminator(MI, BB, MI.getOperand(3), MI.getOperand(4), diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 704eed7877bdc..4c3994e4e3d1d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -175,6 +175,9 @@ class AArch64TargetLowering : public TargetLowering { MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI, MachineBasicBlock *BB) const; + MachineBasicBlock *EmitLoweredSetFpmr(MachineInstr &MI, + MachineBasicBlock *MBB) const; + MachineBasicBlock *EmitDynamicProbedAlloc(MachineInstr &MI, MachineBasicBlock *MBB) const; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 96c77c2f75196..fc7aea5a4d5f5 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -2529,10 +2529,9 @@ def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val), PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>, Sched<[WriteSys]>; -let Defs = [FPMR] in +let Uses = [FPMR], Defs = [FPMR, NZCV], usesCustomInserter = 1 in def MSR_FPMR : Pseudo<(outs), (ins GPR64:$val), [(int_aarch64_set_fpmr i64:$val)]>, - PseudoInstExpansion<(MSR 0xda22, GPR64:$val)>, Sched<[WriteSys]>; // Generic system instructions diff --git a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll index 412f89ae67439..c67cbd8cdb406 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fpenv.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fpenv.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 -; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+neon,+fp8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,DIRECT +; RUN: llc -mtriple=aarch64 -mattr=+neon,+fp8 -verify-machineinstrs -aarch64-use-conditional-fpmr-write < %s | FileCheck %s --check-prefixes=CHECK,COND define i64 @get_fpcr() #0 { ; CHECK-LABEL: get_fpcr: @@ -38,12 +39,51 @@ define void @set_fpsr(i64 %sr) { } define void @set_fpmr(i64 %sr) { -; CHECK-LABEL: set_fpmr: -; CHECK: // %bb.0: -; CHECK-NEXT: msr FPMR, x0 -; CHECK-NEXT: ret - call void @llvm.aarch64.set.fpmr(i64 %sr) - ret void +; DIRECT-LABEL: set_fpmr: +; DIRECT: // %bb.0: +; DIRECT-NEXT: msr FPMR, x0 +; DIRECT-NEXT: ret +; COND-LABEL: set_fpmr: +; COND: // %bb.0: +; COND-NEXT: mrs x8, FPMR +; COND-NEXT: cmp x8, x0 +; COND-NEXT: b.eq .LBB4_2 +; COND-NEXT: // %bb.1: +; COND-NEXT: msr FPMR, x0 +; COND-NEXT: .LBB4_2: +; COND-NEXT: ret + call void @llvm.aarch64.set.fpmr(i64 %sr) + ret void +} + +define void @set_fpmr_then_fp8_cvt_and_store(i64 %fpmr, ptr %a, ptr %b, ptr %out) { +; DIRECT-LABEL: set_fpmr_then_fp8_cvt_and_store: +; DIRECT: // %bb.0: +; DIRECT-NEXT: msr FPMR, x0 +; DIRECT-NEXT: ldr d0, [x1] +; DIRECT-NEXT: ldr d1, [x2] +; DIRECT-NEXT: fcvtn v0.8b, v0.4h, v1.4h +; DIRECT-NEXT: str d0, [x3] +; DIRECT-NEXT: ret +; COND-LABEL: set_fpmr_then_fp8_cvt_and_store: +; COND: // %bb.0: +; COND-NEXT: mrs x8, FPMR +; COND-NEXT: cmp x8, x0 +; COND-NEXT: b.eq .LBB5_2 +; COND-NEXT: // %bb.1: +; COND-NEXT: msr FPMR, x0 +; COND-NEXT: .LBB5_2: +; COND-NEXT: ldr d0, [x1] +; COND-NEXT: ldr d1, [x2] +; COND-NEXT: fcvtn v0.8b, v0.4h, v1.4h +; COND-NEXT: str d0, [x3] +; COND-NEXT: ret + call void @llvm.aarch64.set.fpmr(i64 %fpmr) + %va = load <4 x half>, ptr %a, align 8 + %vb = load <4 x half>, ptr %b, align 8 + %res = call <8 x i8> @llvm.aarch64.neon.fp8.fcvtn.v8i8.v4f16(<4 x half> %va, <4 x half> %vb) + store <8 x i8> %res, ptr %out, align 8 + ret void } declare i64 @llvm.aarch64.get.fpcr() From 87ca9c9baafcde2387d6a8a89a04d2307ab72bb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 15:13:04 +0200 Subject: [PATCH 180/511] Reland "[clang][ssaf][NFC] Make SSAFOptions available in Builders and Extractors" (#205334) The original version of this was reverted part of #205279 because I didn't know if this or the other patch caused the Windows build failures. It turns out this patch is fine. I'm relating this now. --- Now that we have SSAFOptions, it would make it a lot more ergonomic if it was accessible from builders and extractors. This PR does exactly that. Part of rdar://179151023 Co-authored-by: Jan Korous Co-authored-by: Claude Opus 4.7 --- .../Core/TUSummary/TUSummaryBuilder.h | 8 +++++++- .../Core/TUSummary/TUSummaryExtractor.h | 4 ++++ .../Analyses/PointerFlow/PointerFlowExtractor.cpp | 3 +-- .../Core/TUSummary/TUSummaryExtractor.cpp | 4 ++++ .../Frontend/TUSummaryExtractorFrontendAction.cpp | 9 ++++++--- .../Analyses/CallGraph/CallGraphExtractorTest.cpp | 4 +++- .../Analyses/PointerFlow/PointerFlowTest.cpp | 4 +++- .../UnsafeBufferUsage/UnsafeBufferUsageTest.cpp | 4 +++- .../Registries/SummaryExtractorRegistryTest.cpp | 10 +++++++--- .../TUSummaryBuilderTest.cpp | 4 +++- 10 files changed, 41 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h index f9ebe5358b585..38bd60718ed9c 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h @@ -18,11 +18,13 @@ namespace clang::ssaf { class EntityName; +class SSAFOptions; class TUSummary; class TUSummaryBuilder { public: - explicit TUSummaryBuilder(TUSummary &Summary) : Summary(Summary) {} + TUSummaryBuilder(TUSummary &Summary, const SSAFOptions &Options) + : Summary(Summary), Options(Options) {} EntityId addEntity(const EntityName &EN, EntityLinkageType Linkage); @@ -35,8 +37,12 @@ class TUSummaryBuilder { std::pair addSummary(EntityId Entity, std::unique_ptr &&Data); + /// \returns the \c SSAFOptions of this builder. + const SSAFOptions &getOptions() const { return Options; } + private: TUSummary &Summary; + const SSAFOptions &Options; std::pair addSummaryImpl(EntityId Entity, std::unique_ptr &&Data); diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h index 46b0ae835d729..b943748873821 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h +++ b/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h @@ -15,6 +15,7 @@ #include namespace clang::ssaf { +class SSAFOptions; class TUSummaryBuilder; class TUSummaryExtractor : public ASTConsumer { @@ -32,6 +33,9 @@ class TUSummaryExtractor : public ASTConsumer { /// \returns the EntityId, or std::nullopt if EntityName creation fails. std::optional addEntityForReturn(const FunctionDecl *FD); + /// \returns the \c SSAFOptions of the builder. + const SSAFOptions &getOptions() const; + protected: TUSummaryBuilder &SummaryBuilder; }; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index 38e3e3ec3ab9e..ef5932c52a6c3 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -311,8 +311,7 @@ PointerFlowMatcher::matchesInitializerList(const ValueDecl *Base, class PointerFlowTUSummaryExtractor : public TUSummaryExtractor { public: - PointerFlowTUSummaryExtractor(TUSummaryBuilder &Builder) - : TUSummaryExtractor(Builder) {} + using TUSummaryExtractor::TUSummaryExtractor; /// \return a non-null unique pointer to a PointerFlowEntitySummary std::unique_ptr diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp index f0602e0d5550f..ccd5eef377d2d 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp @@ -74,3 +74,7 @@ TUSummaryExtractor::addEntityForReturn(const FunctionDecl *FD) { return std::nullopt; return SummaryBuilder.addEntity(*Name, getLinkageForDecl(FD)); } + +const SSAFOptions &TUSummaryExtractor::getOptions() const { + return SummaryBuilder.getOptions(); +} diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp index b4b3e85386428..4f290ccac3d16 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -106,9 +106,12 @@ class TUSummaryRunner final : public MultiplexConsumer { void HandleTranslationUnit(ASTContext &Ctx) override; TUSummary Summary; - TUSummaryBuilder Builder = TUSummaryBuilder(Summary); - std::unique_ptr Format; + + /// Owned by the \c CompilerInstance. const SSAFOptions &Opts; + + TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); + std::unique_ptr Format; }; } // namespace @@ -141,7 +144,7 @@ TUSummaryRunner::TUSummaryRunner(llvm::Triple TargetTriple, Summary(std::move(TargetTriple), BuildNamespace(BuildNamespaceKind::CompilationUnit, Opts.CompilationUnitId)), - Format(std::move(Format)), Opts(Opts) { + Opts(Opts), Format(std::move(Format)) { assert(this->Format); assert(!Opts.CompilationUnitId.empty()); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp index 69db546fbf93d..0a3685c4f1057 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp @@ -11,6 +11,7 @@ #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" #include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" @@ -123,10 +124,11 @@ template auto hasSummaryThat(const Matchers &...Ms) { static const SummaryName CallGraphName{CallGraphSummary::Name.str()}; struct CallGraphExtractorTest : ssaf::TestFixture { + SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder = TUSummaryBuilder(Summary); + TUSummaryBuilder Builder = TUSummaryBuilder(Summary, Opts); /// Creates the AST and extractor, then extracts the summaries from the AST. /// This will update the \c AST \c Builder and \c Summary data members. diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp index 44465a59d4cfd..49e7bdc21738b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp @@ -13,6 +13,7 @@ #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/AST/ExprCXX.h" #include "clang/Frontend/ASTUnit.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" @@ -146,6 +147,7 @@ struct EPLPair { class PointerFlowTest : public TestFixture { protected: + SSAFOptions Opts; TUSummary TUSum; TUSummaryBuilder Builder; std::unique_ptr Extractor; @@ -154,7 +156,7 @@ class PointerFlowTest : public TestFixture { PointerFlowTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum), Extractor(nullptr) {} + Builder(TUSum, Opts), Extractor(nullptr) {} template Extractor; @@ -43,7 +45,7 @@ class UnsafeBufferUsageTest : public TestFixture { UnsafeBufferUsageTest() : TUSum(llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")), - Builder(TUSum) {} + Builder(TUSum, Opts) {} bool setUpTest(StringRef Code) { AST = tooling::buildASTFromCodeWithArgs( diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp index fd6ad83225e10..2294c045d554b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp @@ -8,6 +8,7 @@ #include "MockTUSummaryBuilder.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" #include "clang/Tooling/Tooling.h" @@ -46,8 +47,9 @@ TEST(SummaryExtractorRegistryTest, EnumeratingRegistryEntries) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor1", FakeBuilder); @@ -60,8 +62,9 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor1) { } TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); { auto Consumer = makeTUSummaryExtractor("MockSummaryExtractor2", FakeBuilder); @@ -74,8 +77,9 @@ TEST(SummaryExtractorRegistryTest, InstantiatingExtractor2) { } TEST(SummaryExtractorRegistryTest, InvokingExtractors) { + SSAFOptions Opts; TUSummary Summary = makeFakeSummary(); - MockTUSummaryBuilder FakeBuilder(Summary); + MockTUSummaryBuilder FakeBuilder(Summary, Opts); std::vector> Consumers; for (std::string Name : {"MockSummaryExtractor1", "MockSummaryExtractor2"}) { auto Consumer = makeTUSummaryExtractor(Name, FakeBuilder); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp index ffcf068ce6956..55c5c781e42d9 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp +++ b/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp @@ -9,6 +9,7 @@ #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" #include "FindDecl.h" #include "TestFixture.h" +#include "clang/Frontend/SSAFOptions.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" #include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" @@ -89,10 +90,11 @@ void PrintTo(const MockSummaryData3 &S, std::ostream *OS) { } struct TUSummaryBuilderTest : ssaf::TestFixture { + SSAFOptions Opts; TUSummary Summary{ llvm::Triple("arm64-apple-macosx"), BuildNamespace(BuildNamespaceKind::CompilationUnit, "Mock.cpp")}; - TUSummaryBuilder Builder{Summary}; + TUSummaryBuilder Builder{Summary, Opts}; TUSummaryExtractor Extractor{Builder}; [[nodiscard]] EntityId addTestEntity(llvm::StringRef USR) { From adbb3a02ae9485af0da4eba045272ad56626e6e1 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 23 Jun 2026 23:15:31 +1000 Subject: [PATCH 181/511] [orc-rt] Add MacroUtils.h header for general purpose macros. (#205337) For now just contains ORC_RT_DEPAREN, a macro for stripping parentheses from its argument. This will be used in an upcoming commit. --- orc-rt/include/CMakeLists.txt | 1 + orc-rt/include/orc-rt/MacroUtils.h | 31 +++++++++++++++++++++ orc-rt/unittests/CMakeLists.txt | 1 + orc-rt/unittests/MacroUtilsTest.cpp | 42 +++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 orc-rt/include/orc-rt/MacroUtils.h create mode 100644 orc-rt/unittests/MacroUtilsTest.cpp diff --git a/orc-rt/include/CMakeLists.txt b/orc-rt/include/CMakeLists.txt index 52db75c4998ab..8ee4dfb075270 100644 --- a/orc-rt/include/CMakeLists.txt +++ b/orc-rt/include/CMakeLists.txt @@ -15,6 +15,7 @@ set(ORC_RT_HEADERS orc-rt/IntervalMap.h orc-rt/IntervalSet.h orc-rt/LockedAccess.h + orc-rt/MacroUtils.h orc-rt/Math.h orc-rt/MemoryFlags.h orc-rt/NativeDylibManager.h diff --git a/orc-rt/include/orc-rt/MacroUtils.h b/orc-rt/include/orc-rt/MacroUtils.h new file mode 100644 index 0000000000000..3ecba2e652692 --- /dev/null +++ b/orc-rt/include/orc-rt/MacroUtils.h @@ -0,0 +1,31 @@ +//===- MacroUtils.h - General-purpose preprocessor helpers ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Small preprocessor-utility macros not specific to any particular orc-rt +// subsystem. +// +//===----------------------------------------------------------------------===// + +#ifndef ORC_RT_MACROUTILS_H +#define ORC_RT_MACROUTILS_H + +#define ORC_RT_DETAIL_DEPAREN_HELPER(...) __VA_ARGS__ + +/// Strip a single layer of outer parentheses from a token sequence. +/// +/// Useful for passing a parenthesized comma-separated list (e.g. a list of +/// types) as a single argument to a function-like macro, then unwrapping it +/// at the use site: +/// +/// #define MY_MACRO(Types) some_template +/// MY_MACRO((int, double, char)) +/// -> some_template +/// +#define ORC_RT_DEPAREN(X) ORC_RT_DETAIL_DEPAREN_HELPER X + +#endif // ORC_RT_MACROUTILS_H diff --git a/orc-rt/unittests/CMakeLists.txt b/orc-rt/unittests/CMakeLists.txt index 9de90a7db94f7..f7b67e283aeb0 100644 --- a/orc-rt/unittests/CMakeLists.txt +++ b/orc-rt/unittests/CMakeLists.txt @@ -28,6 +28,7 @@ add_orc_rt_unittest(CoreTests IntervalMapTest.cpp IntervalSetTest.cpp LockedAccessTest.cpp + MacroUtilsTest.cpp MathTest.cpp MemoryAccessSPSCITest.cpp MemoryFlagsTest.cpp diff --git a/orc-rt/unittests/MacroUtilsTest.cpp b/orc-rt/unittests/MacroUtilsTest.cpp new file mode 100644 index 0000000000000..55b6d1de0116b --- /dev/null +++ b/orc-rt/unittests/MacroUtilsTest.cpp @@ -0,0 +1,42 @@ +//===- MacroUtilsTest.cpp -------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Tests for orc-rt's MacroUtils.h APIs. +// +// These are compile-time invariants — the static_asserts pin the contract +// of ORC_RT_DEPAREN. The trivial TEST exists so the file produces a gtest +// case to run. +// +//===----------------------------------------------------------------------===// + +#include "orc-rt/MacroUtils.h" + +#include "gtest/gtest.h" + +#include +#include + +// Multi-element list. +static_assert( + std::is_same_v, + std::tuple>, + "ORC_RT_DEPAREN should strip outer parens from a multi-element list"); + +// Single-element list. +static_assert( + std::is_same_v, std::tuple>, + "ORC_RT_DEPAREN should strip outer parens from a single-element list"); + +// Empty list. +static_assert(std::is_same_v, std::tuple<>>, + "ORC_RT_DEPAREN should produce nothing from an empty list"); + +TEST(MacroUtilsTest, DeParenCompiles) { + // The real coverage is in the static_asserts above; this case exists so + // the file contributes a runnable gtest entry. +} From 1d3f9ac8f1d6730b709002e723ed44ed4cc54592 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mirko=20Brku=C5=A1anin?= Date: Tue, 23 Jun 2026 15:17:04 +0200 Subject: [PATCH 182/511] [AMDGPU] Define new target gfx1154 (#204816) --- clang/include/clang/Basic/OffloadArch.h | 1 + clang/lib/Basic/OffloadArch.cpp | 1 + clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp | 1 + .../test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl | 1 + clang/test/Driver/amdgpu-macros.cl | 1 + clang/test/Driver/amdgpu-mcpu.cl | 2 ++ .../test/Misc/target-invalid-cpu-note/amdgcn.c | 1 + clang/test/Misc/target-invalid-cpu-note/nvptx.c | 1 + llvm/docs/AMDGPUUsage.rst | 17 +++++++++++------ llvm/include/llvm/BinaryFormat/ELF.h | 1 + .../llvm/TargetParser/AMDGPUTargetParser.def | 1 + llvm/lib/Target/AMDGPU/AMDGPU.td | 4 ---- llvm/lib/Target/AMDGPU/GCNProcessors.td | 9 +++++++-- .../MCTargetDesc/AMDGPUTargetStreamer.cpp | 2 ++ llvm/lib/TargetParser/AMDGPUTargetParser.cpp | 1 + .../CodeGen/AMDGPU/directive-amdgcn-target.ll | 2 ++ .../CodeGen/AMDGPU/elf-header-flags-mach.ll | 2 ++ .../Object/AMDGPU/elf-header-flags-mach.yaml | 7 +++++++ .../tools/llvm-objdump/ELF/AMDGPU/subtarget.ll | 5 +++++ .../llvm-readobj/ELF/AMDGPU/elf-headers.test | 9 +++++++++ 20 files changed, 57 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/OffloadArch.h b/clang/include/clang/Basic/OffloadArch.h index dc7840cd78a18..5ad2129b9548c 100644 --- a/clang/include/clang/Basic/OffloadArch.h +++ b/clang/include/clang/Basic/OffloadArch.h @@ -111,6 +111,7 @@ enum class OffloadArch { GFX1151, GFX1152, GFX1153, + GFX1154, GFX1170, GFX1171, GFX1172, diff --git a/clang/lib/Basic/OffloadArch.cpp b/clang/lib/Basic/OffloadArch.cpp index 709580fbe5be2..8cf97d1f7597e 100644 --- a/clang/lib/Basic/OffloadArch.cpp +++ b/clang/lib/Basic/OffloadArch.cpp @@ -104,6 +104,7 @@ static const OffloadArchToStringMap ArchNames[] = { GFX(1151), // gfx1151 GFX(1152), // gfx1152 GFX(1153), // gfx1153 + GFX(1154), // gfx1154 GFX(1170), // gfx1170 GFX(1171), // gfx1171 GFX(1172), // gfx1172 diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index 19e8c73884dfc..dec3bca0ef3eb 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -2366,6 +2366,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) { case OffloadArch::GFX1151: case OffloadArch::GFX1152: case OffloadArch::GFX1153: + case OffloadArch::GFX1154: case OffloadArch::GFX1170: case OffloadArch::GFX1171: case OffloadArch::GFX1172: diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl index b6f1c441e6cf0..a436090dc4557 100644 --- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl +++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl @@ -7,6 +7,7 @@ // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1151 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1152 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1153 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s +// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1154 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1170 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1171 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1172 -emit-llvm -o - %s | FileCheck --check-prefixes=CHECK,GCN %s diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index 6f41720a44c96..21b6db8445a8f 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -129,6 +129,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1151 -DFAMILY=GFX11 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1152 -DFAMILY=GFX11 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1153 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1153 -DFAMILY=GFX11 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1154 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1154 -DFAMILY=GFX11 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1170 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1170 -DFAMILY=GFX11 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1171 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1171 -DFAMILY=GFX11 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1172 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1172 -DFAMILY=GFX11 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index ecacb42121485..97f4153525fe2 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -113,6 +113,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1151 %s 2>&1 | FileCheck --check-prefix=GFX1151 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1152 %s 2>&1 | FileCheck --check-prefix=GFX1152 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1153 %s 2>&1 | FileCheck --check-prefix=GFX1153 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1154 %s 2>&1 | FileCheck --check-prefix=GFX1154 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1170 %s 2>&1 | FileCheck --check-prefix=GFX1170 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1171 %s 2>&1 | FileCheck --check-prefix=GFX1171 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1172 %s 2>&1 | FileCheck --check-prefix=GFX1172 %s @@ -174,6 +175,7 @@ // GFX1151: "-target-cpu" "gfx1151" // GFX1152: "-target-cpu" "gfx1152" // GFX1153: "-target-cpu" "gfx1153" +// GFX1154: "-target-cpu" "gfx1154" // GFX1170: "-target-cpu" "gfx1170" // GFX1171: "-target-cpu" "gfx1171" // GFX1172: "-target-cpu" "gfx1172" diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c index 87e156a53caf5..44c6a61b184af 100644 --- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c +++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c @@ -66,6 +66,7 @@ // CHECK-SAME: {{^}}, gfx1151 // CHECK-SAME: {{^}}, gfx1152 // CHECK-SAME: {{^}}, gfx1153 +// CHECK-SAME: {{^}}, gfx1154 // CHECK-SAME: {{^}}, gfx1170 // CHECK-SAME: {{^}}, gfx1171 // CHECK-SAME: {{^}}, gfx1172 diff --git a/clang/test/Misc/target-invalid-cpu-note/nvptx.c b/clang/test/Misc/target-invalid-cpu-note/nvptx.c index 799058feb43f9..6c234cc3a5c5c 100644 --- a/clang/test/Misc/target-invalid-cpu-note/nvptx.c +++ b/clang/test/Misc/target-invalid-cpu-note/nvptx.c @@ -93,6 +93,7 @@ // CHECK-SAME: {{^}}, gfx1151 // CHECK-SAME: {{^}}, gfx1152 // CHECK-SAME: {{^}}, gfx1153 +// CHECK-SAME: {{^}}, gfx1154 // CHECK-SAME: {{^}}, gfx1170 // CHECK-SAME: {{^}}, gfx1171 // CHECK-SAME: {{^}}, gfx1172 diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst index dba0997e4f099..8aad903f98561 100644 --- a/llvm/docs/AMDGPUUsage.rst +++ b/llvm/docs/AMDGPUUsage.rst @@ -520,6 +520,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following work-item Add product IDs names. + ``gfx1154`` ``amdgcn`` APU - cumode - Architected *TBA* + - wavefrontsize64 flat + scratch .. TODO:: + - Packed + work-item Add product + IDs names. + **GCN GFX11.7 (RDNA 4m)** ----------------------------------------------------------------------------------------------------------------------- ``gfx1170`` ``amdgcn`` APU - cumode - Architected *TBA* @@ -671,12 +678,10 @@ Generic processor code objects are versioned. See :ref:`amdgpu-generic-processor - ``gfx1102`` - Packed hazards specific to some targets - ``gfx1103`` work-item within this family. - ``gfx1150`` IDs - - ``gfx1151`` + - ``gfx1151`` Not all VGPRs can be used on: - ``gfx1152`` - - ``gfx1153`` Not all VGPRs can be used on: - - - ``gfx1100`` - - ``gfx1101`` + - ``gfx1153`` - ``gfx1100`` + - ``gfx1154`` - ``gfx1101`` - ``gfx1151`` SALU floating point instructions @@ -3048,7 +3053,7 @@ The AMDGPU backend uses the following ELF header: ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic`` ``EF_AMDGPU_MACH_AMDGCN_GFX1152`` 0x055 ``gfx1152``. *reserved* 0x056 Reserved. - *reserved* 0x057 Reserved. + ``EF_AMDGPU_MACH_AMDGCN_GFX1154`` 0x057 ``gfx1154``. ``EF_AMDGPU_MACH_AMDGCN_GFX1153`` 0x058 ``gfx1153``. ``EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC`` 0x059 ``gfx12-generic`` ``EF_AMDGPU_MACH_AMDGCN_GFX1251`` 0x05a ``gfx1251`` diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 72cded68463a6..5b9ed6100f20f 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -832,6 +832,7 @@ enum { X(0x53, EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10-3-generic") \ X(0x54, EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic") \ X(0x55, EF_AMDGPU_MACH_AMDGCN_GFX1152, "gfx1152") \ + X(0x57, EF_AMDGPU_MACH_AMDGCN_GFX1154, "gfx1154") \ X(0x58, EF_AMDGPU_MACH_AMDGCN_GFX1153, "gfx1153") \ X(0x59, EF_AMDGPU_MACH_AMDGCN_GFX12_GENERIC, "gfx12-generic") \ X(0x5a, EF_AMDGPU_MACH_AMDGCN_GFX1251, "gfx1251") \ diff --git a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.def b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.def index dcc0c28b1ee74..3c8b756ae2705 100644 --- a/llvm/include/llvm/TargetParser/AMDGPUTargetParser.def +++ b/llvm/include/llvm/TargetParser/AMDGPUTargetParser.def @@ -118,6 +118,7 @@ AMDGCN_GPU ("gfx1150", GK_GFX1150, (11, 5, 0), FEATURE_FAST_FMA_F32|FEAT AMDGCN_GPU ("gfx1151", GK_GFX1151, (11, 5, 1), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) AMDGCN_GPU ("gfx1152", GK_GFX1152, (11, 5, 2), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) AMDGCN_GPU ("gfx1153", GK_GFX1153, (11, 5, 3), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) +AMDGCN_GPU ("gfx1154", GK_GFX1154, (11, 5, 4), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) AMDGCN_GPU ("gfx1170", GK_GFX1170, (11, 7, 0), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) AMDGCN_GPU ("gfx1171", GK_GFX1171, (11, 7, 1), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) AMDGCN_GPU ("gfx1172", GK_GFX1172, (11, 7, 2), FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP) diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 2abb9c0154947..705e76ebeb7e5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -2057,10 +2057,6 @@ def FeatureISAVersion11_5_2 : FeatureSet< !listconcat(FeatureISAVersion11_5_Common.Features, [FeaturePointSampleAccel])>; -def FeatureISAVersion11_5_3 : FeatureSet< - !listconcat(FeatureISAVersion11_5_Common.Features, - [])>; - def FeatureISAVersion11_7_Common : FeatureSet< !listconcat(FeatureISAVersion11_Common.Features, [FeatureGFX11_7Insts, diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td index c2aac75a73ad0..20c9655739890 100644 --- a/llvm/lib/Target/AMDGPU/GCNProcessors.td +++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td @@ -301,7 +301,11 @@ def : ProcessorModel<"gfx1152", GFX11SpeedModel, >; def : ProcessorModel<"gfx1153", GFX11SpeedModel, - FeatureISAVersion11_5_3.Features + FeatureISAVersion11_5_Common.Features +>; + +def : ProcessorModel<"gfx1154", GFX11SpeedModel, + FeatureISAVersion11_5_Common.Features >; def : ProcessorModel<"gfx1170", GFX11SpeedModel, @@ -316,7 +320,8 @@ def : ProcessorModel<"gfx1172", GFX11SpeedModel, FeatureISAVersion11_7_Common.Features >; -// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1152, gfx1153] +// [gfx1100, gfx1101, gfx1102, gfx1103, +// gfx1150, gfx1151, gfx1152, gfx1153, gfx1154] def : ProcessorModel<"gfx11-generic", GFX11SpeedModel, FeatureISAVersion11_Generic.Features >; diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp index d999e99fb7d12..5db46edf18e67 100644 --- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp +++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp @@ -117,6 +117,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) { case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152: AK = GK_GFX1152; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153: AK = GK_GFX1153; break; + case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1154: AK = GK_GFX1154; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170: AK = GK_GFX1170; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171: AK = GK_GFX1171; break; case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172: AK = GK_GFX1172; break; @@ -209,6 +210,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) { case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151; case GK_GFX1152: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1152; case GK_GFX1153: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1153; + case GK_GFX1154: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1154; case GK_GFX1170: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1170; case GK_GFX1171: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1171; case GK_GFX1172: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1172; diff --git a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp index 5820d0c148ce6..bc9a94c4bbade 100644 --- a/llvm/lib/TargetParser/AMDGPUTargetParser.cpp +++ b/llvm/lib/TargetParser/AMDGPUTargetParser.cpp @@ -343,6 +343,7 @@ static void fillAMDGCNFeatureMap(StringRef GPU, const Triple &T, Features["swmmac-gfx1200-insts"] = true; Features["atomic-fmin-fmax-global-f32"] = true; break; + case GK_GFX1154: case GK_GFX1153: case GK_GFX1152: case GK_GFX1151: diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll index 562f2672f3d2a..7f1923edeb204 100644 --- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll +++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll @@ -104,6 +104,7 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=GFX1151 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1152 < %s | FileCheck --check-prefixes=GFX1152 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1153 < %s | FileCheck --check-prefixes=GFX1153 %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1154 < %s | FileCheck --check-prefixes=GFX1154 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1170 < %s | FileCheck --check-prefixes=GFX1170 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1171 < %s | FileCheck --check-prefixes=GFX1171 %s ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1172 < %s | FileCheck --check-prefixes=GFX1172 %s @@ -208,6 +209,7 @@ ; GFX1151: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1151" ; GFX1152: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1152" ; GFX1153: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1153" +; GFX1154: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1154" ; GFX1170: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1170" ; GFX1171: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1171" ; GFX1172: .amdgcn_target "amdgcn-amd-amdhsa-unknown-gfx1172" diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll index 830a4134170a8..04b7415b8a4da 100644 --- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll +++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll @@ -75,6 +75,7 @@ ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1151 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1151 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1152 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1152 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1153 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1153 %s +; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1154 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1154 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1170 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1170 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1171 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1171 %s ; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1172 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1172 %s @@ -163,6 +164,7 @@ ; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A) ; GFX1152: EF_AMDGPU_MACH_AMDGCN_GFX1152 (0x55) ; GFX1153: EF_AMDGPU_MACH_AMDGCN_GFX1153 (0x58) +; GFX1154: EF_AMDGPU_MACH_AMDGCN_GFX1154 (0x57) ; GFX1170: EF_AMDGPU_MACH_AMDGCN_GFX1170 (0x5D) ; GFX1171: EF_AMDGPU_MACH_AMDGCN_GFX1171 (0x5E) ; GFX1172: EF_AMDGPU_MACH_AMDGCN_GFX1172 (0x5C) diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml index 6de089e9b5272..94d5c19b3faa8 100644 --- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml +++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml @@ -234,6 +234,10 @@ # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1153 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1153 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1153 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1153 %s +# RUN: sed -e 's//64/' -e 's//AMDGCN_GFX1154/' %s | yaml2obj -o %t.o.AMDGCN_GFX1154 +# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1154 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1154 %s +# RUN: obj2yaml %t.o.AMDGCN_GFX1154 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1154 %s + # RUN: sed -e 's//64/' -e 's//AMDGCN_GFX1170/' %s | yaml2obj -o %t.o.AMDGCN_GFX1170 # RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1170 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1170 %s # RUN: obj2yaml %t.o.AMDGCN_GFX1170 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1170 %s @@ -489,6 +493,9 @@ # ELF-AMDGCN-GFX1153: EF_AMDGPU_MACH_AMDGCN_GFX1153 (0x58) # YAML-AMDGCN-GFX1153: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1153 ] +# ELF-AMDGCN-GFX1154: EF_AMDGPU_MACH_AMDGCN_GFX1154 (0x57) +# YAML-AMDGCN-GFX1154: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1154 ] + # ELF-AMDGCN-GFX1170: EF_AMDGPU_MACH_AMDGCN_GFX1170 (0x5D) # YAML-AMDGCN-GFX1170: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1170 ] diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll index cfacaff1f2431..ce5d4032cd420 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll @@ -65,6 +65,11 @@ define amdgpu_kernel void @test_kernel() { ; RUN: llvm-objdump -D %t.o > %t-detect.txt ; RUN: diff %t-specify.txt %t-detect.txt +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1154 -filetype=obj -O0 -o %t.o %s +; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1154 %t.o > %t-specify.txt +; RUN: llvm-objdump -D %t.o > %t-detect.txt +; RUN: diff %t-specify.txt %t-detect.txt + ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1153 -filetype=obj -O0 -o %t.o %s ; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1153 %t.o > %t-specify.txt ; RUN: llvm-objdump -D %t.o > %t-detect.txt diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test index c76684f09e1ef..2dc41ad792c3f 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test +++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test @@ -433,6 +433,15 @@ # RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1153 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1153 -DFLAG_VALUE=0x58 +# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 -DFLAG_VALUE=0x57 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 -DFLAG_VALUE=0x57 + +# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 +# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1154 -DFLAG_VALUE=0x57 + # RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1170 # RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1170 -DFLAG_VALUE=0x5D From 8c780f06509d57f59c61e1b817b6dff7cb6e09db Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Tue, 23 Jun 2026 21:20:51 +0800 Subject: [PATCH 183/511] [mlir][memref] Add SCFDialect dependency to RuntimeOpVerification (#205241) Explicitly load SCFDialect as a dependent dialect in RuntimeOpVerification to avoid unregistered dialect errors when generating `scf.if`/`scf.yield` ops. Fixes #204295. --- .../MemRef/Transforms/RuntimeOpVerification.cpp | 2 +- .../test/Dialect/MemRef/runtime-verification.mlir | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp index e5cc41e2c43ba..d7fdedce415d9 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/RuntimeOpVerification.cpp @@ -418,6 +418,6 @@ void mlir::memref::registerRuntimeVerifiableOpInterfaceExternalModels( // Load additional dialects of which ops may get created. ctx->loadDialect(); + cf::ControlFlowDialect, scf::SCFDialect>(); }); } diff --git a/mlir/test/Dialect/MemRef/runtime-verification.mlir b/mlir/test/Dialect/MemRef/runtime-verification.mlir index 28777a3e88672..2198f0032aff5 100644 --- a/mlir/test/Dialect/MemRef/runtime-verification.mlir +++ b/mlir/test/Dialect/MemRef/runtime-verification.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -generate-runtime-verification -cse | FileCheck %s +// RUN: mlir-opt %s -generate-runtime-verification -cse -split-input-file | FileCheck %s // CHECK-LABEL: func @expand_shape( // CHECK-SAME: %[[m:.*]]: memref @@ -13,3 +13,16 @@ func.func @expand_shape(%m: memref, %sz0: index) -> memref { %0 = memref.expand_shape %m [[0, 1]] output_shape [%sz0, 5] : memref into memref return %0 : memref } + +// ----- + +// Ensure the SCF dialect is loaded. + +// CHECK-LABEL: func @subview( +// CHECK: scf.if +func.func @subview(%memref: memref<1xf32>, %offset: index) { + memref.subview %memref[%offset] [1] [1] : + memref<1xf32> to + memref<1xf32, strided<[1], offset: ?>> + return +} From 4b71420f751d113d64ad64cac24f6f0e06bc9261 Mon Sep 17 00:00:00 2001 From: Wooseok Lee Date: Tue, 23 Jun 2026 08:22:18 -0500 Subject: [PATCH 184/511] [AMDGPU][DAGCombiner] Fix UADDO/USUBO_CARRY carry-out miscompile and remove redundant AMDGPU combine (#204362) performAddCarrySubCarryCombine in SIISelLowering folded: uaddo_carry((x+y), 0, cc) -> uaddo_carry(x, y, cc) usubo_carry((x-y), 0, cc) -> usubo_carry(x, y, cc) Both produce the same value but differ in carry-out when x+y (or x-y) wraps. The fold was missing a !N->hasAnyUseOfValue(1) guard, giving wrong carry values to consumers. E.g. x=0xFFFFFFFF, y=1, cc=0: original: ((x+y) mod 2^32 + cc) >= 2^32 = 0 (correct) folded: (x+y+cc) >= 2^32 = 1 (wrong) The generic visitUADDO_CARRY (DAGCombiner.cpp) already handles the UADDO_CARRY/ADD fold with the correct guard. Since target combines fire before generic ones, the AMDGPU ADD arm was a buggy duplicate. The USUBO_CARRY/SUB arm is produced by AMDGPU's performAddCombine which converts add(sub(v,a), sext(cmp)) -> usubo_carry(sub(v,a), 0, cmp). There was no generic equivalent for this pattern. Fix by adding the symmetric fold to visitUSUBO_CARRY: (usubo_carry (sub X, Y), 0, Carry) -> (usubo_carry X, Y, Carry) with the same guard, then removing performAddCarrySubCarryCombine from SIISelLowering entirely as it is now fully subsumed. Tests: Add AMDGPU tests in uaddo.ll and usubo.ll covering the carry-out-used (fold suppressed) and carry-out-unused (fold applied) cases, autogenerated with update_llc_test_checks.py. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 30 --- llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 - llvm/test/CodeGen/AMDGPU/uaddo.ll | 164 +++++++++++++++++ llvm/test/CodeGen/AMDGPU/usubo.ll | 174 ++++++++++++++++++ 5 files changed, 345 insertions(+), 31 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6c583c87b5b0f..b261d6322df66 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4774,6 +4774,13 @@ SDValue DAGCombiner::visitUSUBO_CARRY(SDNode *N) { return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1); } + // Iff the flag result is dead: + // (usubo_carry (sub X, Y), 0, Carry) -> (usubo_carry X, Y, Carry) + if (N0.getOpcode() == ISD::SUB && isNullConstant(N1) && + !N->hasAnyUseOfValue(1)) + return DAG.getNode(ISD::USUBO_CARRY, SDLoc(N), N->getVTList(), + N0.getOperand(0), N0.getOperand(1), CarryIn); + return SDValue(); } diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 145c24c6dc7c7..03d7b936d4109 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1037,9 +1037,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine({ISD::ADD, ISD::PTRADD, - ISD::UADDO_CARRY, ISD::SUB, - ISD::USUBO_CARRY, ISD::MUL, ISD::FADD, ISD::FSUB, @@ -17682,31 +17680,6 @@ SDValue SITargetLowering::performSubCombine(SDNode *N, return SDValue(); } -SDValue -SITargetLowering::performAddCarrySubCarryCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - - if (N->getValueType(0) != MVT::i32) - return SDValue(); - - if (!isNullConstant(N->getOperand(1))) - return SDValue(); - - SelectionDAG &DAG = DCI.DAG; - SDValue LHS = N->getOperand(0); - - // uaddo_carry (add x, y), 0, cc => uaddo_carry x, y, cc - // usubo_carry (sub x, y), 0, cc => usubo_carry x, y, cc - unsigned LHSOpc = LHS.getOpcode(); - unsigned Opc = N->getOpcode(); - if ((LHSOpc == ISD::ADD && Opc == ISD::UADDO_CARRY) || - (LHSOpc == ISD::SUB && Opc == ISD::USUBO_CARRY)) { - SDValue Args[] = {LHS.getOperand(0), LHS.getOperand(1), N->getOperand(2)}; - return DAG.getNode(Opc, SDLoc(N), N->getVTList(), Args); - } - return SDValue(); -} - SDValue SITargetLowering::performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) @@ -18602,9 +18575,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, return performPtrAddCombine(N, DCI); case ISD::SUB: return performSubCombine(N, DCI); - case ISD::UADDO_CARRY: - case ISD::USUBO_CARRY: - return performAddCarrySubCarryCombine(N, DCI); case ISD::FADD: return performFAddCombine(N, DCI); case ISD::FSUB: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 95ff5bba7cfff..3d72723c9ca8f 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -241,7 +241,6 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performPtrAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll index bf8cf831e4bbf..d9a88b2ba2867 100644 --- a/llvm/test/CodeGen/AMDGPU/uaddo.ll +++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll @@ -1206,6 +1206,170 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128 ret void } +; Carry-out is USED: fold must be suppressed to preserve correct carry semantics. +define amdgpu_kernel void @uaddo_carry_of_add_carryout_used(ptr addrspace(1) %out, i32 %x, i32 %y, i1 %cc) #0 { +; SI-LABEL: uaddo_carry_of_add_carryout_used: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s0, s0, s1 +; SI-NEXT: s_and_b32 s1, s2, 1 +; SI-NEXT: v_mov_b32_e32 v0, s1 +; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0 +; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: uaddo_carry_of_add_carryout_used: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_i32 s0, s0, s1 +; VI-NEXT: s_and_b32 s1, s2, 1 +; VI-NEXT: v_mov_b32_e32 v0, s1 +; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0 +; VI-NEXT: v_mov_b32_e32 v2, s4 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-NEXT: v_mov_b32_e32 v3, s5 +; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: uaddo_carry_of_add_carryout_used: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s2, 1 +; GFX9-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: uaddo_carry_of_add_carryout_used: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v2, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: s_and_b32 s1, s2, 1 +; GFX10-NEXT: v_add_co_u32 v0, s0, s0, s1 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[6:7] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: uaddo_carry_of_add_carryout_used: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v2, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s2, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_add_co_u32 v0, s0, s0, s1 +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0 +; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] +; GFX11-NEXT: s_endpgm + %xy = add i32 %x, %y + %cc_i32 = zext i1 %cc to i32 + %r = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %xy, i32 %cc_i32) + %val = extractvalue {i32, i1} %r, 0 + %co = extractvalue {i32, i1} %r, 1 + %co_i32 = zext i1 %co to i32 + store i32 %val, ptr addrspace(1) %out + %p1 = getelementptr i32, ptr addrspace(1) %out, i64 1 + store i32 %co_i32, ptr addrspace(1) %p1 + ret void +} + +; Carry-out is NOT USED: fold is safe since the carry-out has no consumers. +define amdgpu_kernel void @uaddo_carry_of_add_carryout_unused(ptr addrspace(1) %out, i32 %x, i32 %y, i1 %cc) #0 { +; SI-LABEL: uaddo_carry_of_add_carryout_unused: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_add_i32 s0, s0, s1 +; SI-NEXT: s_and_b32 s1, s2, 1 +; SI-NEXT: s_add_i32 s0, s0, s1 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: uaddo_carry_of_add_carryout_unused: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_add_i32 s0, s0, s1 +; VI-NEXT: s_and_b32 s1, s2, 1 +; VI-NEXT: s_add_i32 s0, s0, s1 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: uaddo_carry_of_add_carryout_unused: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: s_and_b32 s1, s2, 1 +; GFX9-NEXT: s_add_i32 s0, s0, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: global_store_dword v0, v1, s[6:7] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: uaddo_carry_of_add_carryout_unused: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: s_and_b32 s1, s2, 1 +; GFX10-NEXT: s_add_i32 s0, s0, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, s0 +; GFX10-NEXT: global_store_dword v0, v1, s[6:7] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: uaddo_carry_of_add_carryout_unused: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: s_and_b32 s1, s2, 1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; GFX11-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX11-NEXT: s_endpgm + %xy = add i32 %x, %y + %cc_i32 = zext i1 %cc to i32 + %r = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %xy, i32 %cc_i32) + %val = extractvalue {i32, i1} %r, 0 + store i32 %val, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) #1 declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) #1 diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll index 8a5914b939db1..9286095352fe2 100644 --- a/llvm/test/CodeGen/AMDGPU/usubo.ll +++ b/llvm/test/CodeGen/AMDGPU/usubo.ll @@ -1136,6 +1136,180 @@ exit: ret void } +; Borrow-out is USED: fold must be suppressed to preserve correct borrow semantics. +define amdgpu_kernel void @usubo_carry_of_sub_borrowout_used(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z, i32 %w) #0 { +; SI-LABEL: usubo_carry_of_sub_borrowout_used: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_mov_b32_e32 v0, s3 +; SI-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 +; SI-NEXT: s_sub_i32 s0, s0, s1 +; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-NEXT: v_sub_i32_e32 v1, vcc, s0, v1 +; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; SI-NEXT: buffer_store_dword v2, off, s[4:7], 0 offset:8 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: usubo_carry_of_sub_borrowout_used: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: v_mov_b32_e32 v0, s3 +; VI-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 +; VI-NEXT: s_sub_i32 s0, s0, s1 +; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-NEXT: v_sub_u32_e32 v1, vcc, s0, v1 +; VI-NEXT: v_mov_b32_e32 v3, s4 +; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; VI-NEXT: v_mov_b32_e32 v4, s5 +; VI-NEXT: flat_store_dwordx3 v[3:4], v[0:2] +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: usubo_carry_of_sub_borrowout_used: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v3, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, s2, v0 +; GFX9-NEXT: s_sub_i32 s0, s0, s1 +; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-NEXT: v_sub_co_u32_e32 v1, vcc, s0, v1 +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: usubo_carry_of_sub_borrowout_used: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v3, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: v_sub_co_u32 v0, s2, s2, s3 +; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX10-NEXT: s_sub_i32 s0, s0, s1 +; GFX10-NEXT: v_sub_co_u32 v1, s0, s0, v1 +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX10-NEXT: global_store_dwordx3 v3, v[0:2], s[6:7] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: usubo_carry_of_sub_borrowout_used: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-NEXT: v_mov_b32_e32 v3, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v0, s2, s2, s3 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: v_sub_co_u32 v1, s0, s0, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0 +; GFX11-NEXT: global_store_b96 v3, v[0:2], s[4:5] +; GFX11-NEXT: s_endpgm + %zwo = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %z, i32 %w) + %cc = extractvalue {i32, i1} %zwo, 1 + %ccv = extractvalue {i32, i1} %zwo, 0 + store i32 %ccv, ptr addrspace(1) %out + %xy = sub i32 %x, %y + %cc_i32 = zext i1 %cc to i32 + %r = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %xy, i32 %cc_i32) + %val = extractvalue {i32, i1} %r, 0 + %bo = extractvalue {i32, i1} %r, 1 + %bo_i32 = zext i1 %bo to i32 + %p1 = getelementptr i32, ptr addrspace(1) %out, i64 1 + store i32 %val, ptr addrspace(1) %p1 + %p2 = getelementptr i32, ptr addrspace(1) %out, i64 2 + store i32 %bo_i32, ptr addrspace(1) %p2 + ret void +} + +; Borrow-out is NOT USED: fold is safe since no consumer observes the borrow. +define amdgpu_kernel void @usubo_carry_of_sub_borrowout_unused(ptr addrspace(1) %out, i32 %x, i32 %y, i32 %z, i32 %w) #0 { +; SI-LABEL: usubo_carry_of_sub_borrowout_unused: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_sub_u32 s2, s2, s3 +; SI-NEXT: s_subb_u32 s0, s0, s1 +; SI-NEXT: v_mov_b32_e32 v0, s0 +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-LABEL: usubo_carry_of_sub_borrowout_unused: +; VI: ; %bb.0: +; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-NEXT: s_waitcnt lgkmcnt(0) +; VI-NEXT: s_sub_u32 s2, s2, s3 +; VI-NEXT: s_subb_u32 s0, s0, s1 +; VI-NEXT: v_mov_b32_e32 v0, s4 +; VI-NEXT: v_mov_b32_e32 v1, s5 +; VI-NEXT: v_mov_b32_e32 v2, s0 +; VI-NEXT: flat_store_dword v[0:1], v2 +; VI-NEXT: s_endpgm +; +; GFX9-LABEL: usubo_carry_of_sub_borrowout_unused: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX9-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_sub_u32 s2, s2, s3 +; GFX9-NEXT: s_subb_u32 s0, s0, s1 +; GFX9-NEXT: v_mov_b32_e32 v1, s0 +; GFX9-NEXT: global_store_dword v0, v1, s[6:7] +; GFX9-NEXT: s_endpgm +; +; GFX10-LABEL: usubo_carry_of_sub_borrowout_unused: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_clause 0x1 +; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c +; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24 +; GFX10-NEXT: v_mov_b32_e32 v0, 0 +; GFX10-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-NEXT: s_sub_u32 s2, s2, s3 +; GFX10-NEXT: s_subb_u32 s0, s0, s1 +; GFX10-NEXT: v_mov_b32_e32 v1, s0 +; GFX10-NEXT: global_store_dword v0, v1, s[6:7] +; GFX10-NEXT: s_endpgm +; +; GFX11-LABEL: usubo_carry_of_sub_borrowout_unused: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x1 +; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x24 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: s_sub_u32 s2, s2, s3 +; GFX11-NEXT: s_subb_u32 s0, s0, s1 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0 +; GFX11-NEXT: global_store_b32 v0, v1, s[4:5] +; GFX11-NEXT: s_endpgm + %zwo = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %z, i32 %w) + %cc = extractvalue {i32, i1} %zwo, 1 + %xy = sub i32 %x, %y + %cc_i32 = zext i1 %cc to i32 + %r = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %xy, i32 %cc_i32) + %val = extractvalue {i32, i1} %r, 0 + store i32 %val, ptr addrspace(1) %out + ret void +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare { i16, i1 } @llvm.usub.with.overflow.i16(i16, i16) #1 declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) #1 From 2aa2916e08e81a0fee5a968d9eba81d1306d38e0 Mon Sep 17 00:00:00 2001 From: Alex Kremer Date: Tue, 23 Jun 2026 14:24:58 +0100 Subject: [PATCH 185/511] [libc++] P3798R1: The unexpected in std::expected (#204826) Closes #204394 Implements P3798 and related tests. Applies the paper as a Defect Report per https://wg21.link/P3798/github. --- libcxx/docs/FeatureTestMacroTable.rst | 2 +- libcxx/docs/ReleaseNotes/23.rst | 1 + libcxx/docs/Status/Cxx29Papers.csv | 2 +- libcxx/include/__expected/expected.h | 4 ++ libcxx/include/version | 4 +- .../utilities/expected/nodiscard.verify.cpp | 2 + .../expected.version.compile.pass.cpp | 8 ++-- .../version.version.compile.pass.cpp | 8 ++-- .../observers/has_error.pass.cpp | 42 +++++++++++++++++++ .../observers/has_error.pass.cpp | 42 +++++++++++++++++++ .../generate_feature_test_macro_components.py | 2 +- 11 files changed, 104 insertions(+), 13 deletions(-) create mode 100644 libcxx/test/std/utilities/expected/expected.expected/observers/has_error.pass.cpp create mode 100644 libcxx/test/std/utilities/expected/expected.void/observers/has_error.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 429cf2a7250fb..47f10324e2523 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -332,7 +332,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_containers_ranges`` ``202202L`` ---------------------------------------------------------- ----------------- - ``__cpp_lib_expected`` ``202211L`` + ``__cpp_lib_expected`` ``202606L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_flat_map`` ``202511L`` ---------------------------------------------------------- ----------------- diff --git a/libcxx/docs/ReleaseNotes/23.rst b/libcxx/docs/ReleaseNotes/23.rst index 8055792f697ea..a491306a3cffb 100644 --- a/libcxx/docs/ReleaseNotes/23.rst +++ b/libcxx/docs/ReleaseNotes/23.rst @@ -52,6 +52,7 @@ Implemented Papers - P3383R3: ``mdspan.at()`` (`Github `__) - P3369R0: constexpr for ``uninitialized_default_construct`` (`Github `__) - P3508R0: Wording for "constexpr for specialized memory algorithms" (`Github `__) +- P3798R1: The unexpected in ``std::expected`` (`Github `__) - P4206R0: Revert string support in ``std::constant_wrapper`` (`Github `__) Improvements and New Features diff --git a/libcxx/docs/Status/Cxx29Papers.csv b/libcxx/docs/Status/Cxx29Papers.csv index 3c48fe89be33d..790e8b290cc80 100644 --- a/libcxx/docs/Status/Cxx29Papers.csv +++ b/libcxx/docs/Status/Cxx29Papers.csv @@ -3,7 +3,7 @@ "`P4101R1 `__","Consteval-only Values for C++26","2026-06 (Brno)","","","`#204391 `__","Voted as a Defect Report." "`P2414R12 `__","Pointer lifetime-end zap proposed solutions","2026-06 (Brno)","","","`#204392 `__","Voted as a Defect Report." "`P3319R6 `__","Add an ``iota`` object for ``simd`` (and more)","2026-06 (Brno)","","","`#204393 `__","" -"`P3798R1 `__","The unexpected in ``std::expected``","2026-06 (Brno)","","","`#204394 `__","" +"`P3798R1 `__","The unexpected in ``std::expected``","2026-06 (Brno)","|Complete|","23","`#204394 `__","Applied as a Defect Report." "`P3052R2 `__","``view_interface::at()``","2026-06 (Brno)","","","`#204395 `__","" "`P4206R0 `__","Revert string support in ``std::constant_wrapper``","2026-06 (Brno)","|Complete|","23","`#203336 `__","Applied as a Defect Report." "`P3395R6 `__","Fix encoding issues and add a formatter for ``std::error_code``","2026-06 (Brno)","","","`#204396 `__","" diff --git a/libcxx/include/__expected/expected.h b/libcxx/include/__expected/expected.h index 32ed81a392702..10b03f061f6e0 100644 --- a/libcxx/include/__expected/expected.h +++ b/libcxx/include/__expected/expected.h @@ -825,6 +825,8 @@ class expected : private __expected_base<_Tp, _Err> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_error() const noexcept { return !this->has_value(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const _Tp& value() const& { static_assert(is_copy_constructible_v<_Err>, "error_type has to be copy constructible"); if (!this->__has_val()) { @@ -1599,6 +1601,8 @@ class expected<_Tp, _Err> : private __expected_void_base<_Err> { [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_value() const noexcept { return this->__has_val(); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr bool has_error() const noexcept { return !this->has_value(); } + _LIBCPP_HIDE_FROM_ABI constexpr void operator*() const noexcept { _LIBCPP_ASSERT_VALID_ELEMENT_ACCESS( this->__has_val(), "expected::operator* requires the expected to contain a value"); diff --git a/libcxx/include/version b/libcxx/include/version index ae02006d9d1c9..53e879959ec17 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -111,7 +111,7 @@ __cpp_lib_erase_if 202002L __cpp_lib_execution 201902L 201603L // C++17 -__cpp_lib_expected 202211L +__cpp_lib_expected 202606L __cpp_lib_filesystem 201703L __cpp_lib_flat_map 202511L __cpp_lib_flat_set 202511L @@ -502,7 +502,7 @@ __cpp_lib_void_t 201411L # define __cpp_lib_constexpr_memory 202202L # define __cpp_lib_constexpr_typeinfo 202106L # define __cpp_lib_containers_ranges 202202L -# define __cpp_lib_expected 202211L +# define __cpp_lib_expected 202606L # define __cpp_lib_flat_map 202511L # define __cpp_lib_flat_set 202511L # define __cpp_lib_format_ranges 202207L diff --git a/libcxx/test/libcxx/utilities/expected/nodiscard.verify.cpp b/libcxx/test/libcxx/utilities/expected/nodiscard.verify.cpp index c9af7a91c67f2..8dd54bd3e6c15 100644 --- a/libcxx/test/libcxx/utilities/expected/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/utilities/expected/nodiscard.verify.cpp @@ -47,6 +47,7 @@ void test() { *std::move(exp); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} exp.has_value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + exp.has_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} cExp.value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} exp.value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} @@ -110,6 +111,7 @@ void test() { const std::expected cVExp{}; vExp.has_value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + vExp.has_error(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} cVExp.error(); diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp index 4ec6c469dce4c..70217f98a8570 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/expected.version.compile.pass.cpp @@ -83,8 +83,8 @@ # ifndef __cpp_lib_expected # error "__cpp_lib_expected should be defined in c++23" # endif -# if __cpp_lib_expected != 202211L -# error "__cpp_lib_expected should have the value 202211L in c++23" +# if __cpp_lib_expected != 202606L +# error "__cpp_lib_expected should have the value 202606L in c++23" # endif # ifdef __cpp_lib_freestanding_expected @@ -103,8 +103,8 @@ # ifndef __cpp_lib_expected # error "__cpp_lib_expected should be defined in c++26" # endif -# if __cpp_lib_expected != 202211L -# error "__cpp_lib_expected should have the value 202211L in c++26" +# if __cpp_lib_expected != 202606L +# error "__cpp_lib_expected should have the value 202606L in c++26" # endif # if !defined(_LIBCPP_VERSION) diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 282936a0b3f63..d4808fde45444 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -5133,8 +5133,8 @@ # ifndef __cpp_lib_expected # error "__cpp_lib_expected should be defined in c++23" # endif -# if __cpp_lib_expected != 202211L -# error "__cpp_lib_expected should have the value 202211L in c++23" +# if __cpp_lib_expected != 202606L +# error "__cpp_lib_expected should have the value 202606L in c++23" # endif # if !defined(_LIBCPP_VERSION) || _LIBCPP_HAS_FILESYSTEM @@ -6903,8 +6903,8 @@ # ifndef __cpp_lib_expected # error "__cpp_lib_expected should be defined in c++26" # endif -# if __cpp_lib_expected != 202211L -# error "__cpp_lib_expected should have the value 202211L in c++26" +# if __cpp_lib_expected != 202606L +# error "__cpp_lib_expected should have the value 202606L in c++26" # endif # if !defined(_LIBCPP_VERSION) || _LIBCPP_HAS_FILESYSTEM diff --git a/libcxx/test/std/utilities/expected/expected.expected/observers/has_error.pass.cpp b/libcxx/test/std/utilities/expected/expected.expected/observers/has_error.pass.cpp new file mode 100644 index 0000000000000..93ee0ea2dfeea --- /dev/null +++ b/libcxx/test/std/utilities/expected/expected.expected/observers/has_error.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++23 + +// constexpr bool has_error() const noexcept; + +#include +#include +#include +#include +#include + +#include "../../types.h" + +constexpr bool test() { + { + const std::expected e(std::unexpect, 5); + static_assert(noexcept(e.has_error())); + std::same_as decltype(auto) has_err = e.has_error(); + assert(has_err); + } + + { + const std::expected e(5); + assert(!e.has_error()); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/utilities/expected/expected.void/observers/has_error.pass.cpp b/libcxx/test/std/utilities/expected/expected.void/observers/has_error.pass.cpp new file mode 100644 index 0000000000000..3072530066361 --- /dev/null +++ b/libcxx/test/std/utilities/expected/expected.void/observers/has_error.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++23 + +// constexpr bool has_error() const noexcept; + +#include +#include +#include +#include +#include + +#include "../../types.h" + +constexpr bool test() { + { + const std::expected e(std::unexpect, 5); + static_assert(noexcept(e.has_error())); + std::same_as decltype(auto) has_err = e.has_error(); + assert(has_err); + } + + { + const std::expected e; + assert(!e.has_error()); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 776689b3d3935..56b5d620d73a4 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -557,7 +557,7 @@ def add_version_header(tc): }, { "name": "__cpp_lib_expected", - "values": {"c++23": 202211}, + "values": {"c++23": 202606}, "headers": ["expected"], }, { From 4998771d8fdc17a9effae65b0f916acb7fae49e4 Mon Sep 17 00:00:00 2001 From: John Paul Jepko Date: Tue, 23 Jun 2026 08:27:35 -0500 Subject: [PATCH 186/511] [Clang][Test] Fix warn-memset-bad-sizeof.cpp after #183004 (#205201) The new `-Wstringop-overread` warning (added in #183004) fires on the SemaCXX test warn-memset-bad-sizeof.cpp. This happens on targets where unsigned matches size_t, such as 32-bit ARM, because clang will match the declaration with the builtin prototype (specifically, argument `unsigned n`). Suppress the warning since this test is exercising `-Wsizeof-pointer-memaccess`, not source buffer overreads. Fixes: - https://lab.llvm.org/buildbot/#/builders/154/builds/32985 - https://lab.llvm.org/buildbot/#/builders/135/builds/3888 - https://lab.llvm.org/buildbot/#/builders/38/builds/9363 --- clang/test/SemaCXX/warn-memset-bad-sizeof.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp index 6f1cd4dd639ec..405652e3daf5e 100644 --- a/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp +++ b/clang/test/SemaCXX/warn-memset-bad-sizeof.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -Wno-sizeof-array-argument %s +// RUN: %clang_cc1 -fsyntax-only -verify -Wno-sizeof-array-argument -Wno-stringop-overread %s // extern "C" void *bzero(void *, unsigned); extern "C" void *memset(void *, int, unsigned); From 8b88c7c3b8de87d5affe68e31d2e457d093f0117 Mon Sep 17 00:00:00 2001 From: Jiahao Guo Date: Tue, 23 Jun 2026 21:32:00 +0800 Subject: [PATCH 187/511] [CIR][NFC] Rename zero result flag variable (#205242) ###summary This is a follow up of https://github.com/llvm/llvm-project/pull/202273 Just a light patch for renaming the zero result flag variable. --- clang/utils/TableGen/CIRLoweringEmitter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/utils/TableGen/CIRLoweringEmitter.cpp b/clang/utils/TableGen/CIRLoweringEmitter.cpp index 4c4d0c370e4cd..ef1d606580abb 100644 --- a/clang/utils/TableGen/CIRLoweringEmitter.cpp +++ b/clang/utils/TableGen/CIRLoweringEmitter.cpp @@ -138,7 +138,7 @@ void GenerateLLVMLoweringPattern(llvm::StringRef OpName, llvm::StringRef PatternName, bool IsRecursive, llvm::StringRef ExtraDecl, const Record *CustomCtorRec, - llvm::StringRef LLVMOp, bool IsZeroResult) { + llvm::StringRef LLVMOp, bool HasZeroResult) { std::optional CustomCtor = parseCustomLoweringCtor(CustomCtorRec); std::string CodeBuffer; @@ -188,7 +188,7 @@ void GenerateLLVMLoweringPattern(llvm::StringRef OpName, << " mlir::LogicalResult matchAndRewrite(cir::" << OpName << " op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) " "const override {\n"; - if (IsZeroResult) { + if (HasZeroResult) { Code << " rewriter.replaceOpWithNewOp(op, mlir::TypeRange{}, adaptor.getOperands());\n"; } else { From 238c304746e9e82041cc9a5d6c8fc1e49890282b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Tue, 23 Jun 2026 15:35:32 +0200 Subject: [PATCH 188/511] [analyzer] Allow SVals as llvm::Immutable{Map,Set} keys (#205319) This will allow maps and sets being declared: ``` REGISTER_MAP_WITH_PROGRAMSTATE(MyMap, SVal, SVal) REGISTER_SET_WITH_PROGRAMSTATE(MySet, SVal) ``` --- .../StaticAnalyzer/Core/PathSensitive/SVals.h | 31 ++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h index aeb57b28077c6..0561a2b8d1d77 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/SVals.h @@ -22,6 +22,7 @@ #include "llvm/ADT/APSInt.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableList.h" +#include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/PointerUnion.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/ADT/iterator_range.h" @@ -99,7 +100,12 @@ class SVal { ID.AddInteger(llvm::to_underlying(getKind())); } - bool operator==(SVal R) const { return Kind == R.Kind && Data == R.Data; } + bool operator<(SVal R) const { + return std::tie(Data, Kind) < std::tie(R.Data, R.Kind); + } + bool operator==(SVal R) const { + return std::tie(Data, Kind) == std::tie(R.Data, R.Kind); + } bool operator!=(SVal R) const { return !(*this == R); } bool isUnknown() const { return getKind() == UnknownValKind; } @@ -529,6 +535,29 @@ class ConcreteInt : public Loc { } // namespace clang namespace llvm { +// Allow SVal to be used as a key in ImmutableSet / ImmutableMap. +template <> +struct ImutContainerInfo + : public ImutProfileInfo { + using value_type = clang::ento::SVal; + using value_type_ref = clang::ento::SVal; + using key_type = value_type; + using key_type_ref = value_type_ref; + using data_type = bool; + using data_type_ref = bool; + + static key_type_ref KeyOfValue(value_type_ref D) { return D; } + static data_type_ref DataOfValue(value_type_ref) { return true; } + + static bool isEqual(clang::ento::SVal L, clang::ento::SVal R) { + return L == R; + } + + static bool isLess(clang::ento::SVal L, clang::ento::SVal R) { return L < R; } + + static bool isDataEqual(data_type_ref, data_type_ref) { return true; } +}; + template struct CastInfo< To, From, From b7f4ed304b0e02343fcf7d22950bbba001d6e482 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 23 Jun 2026 15:41:55 +0200 Subject: [PATCH 189/511] [libc] Add CPU_CLR(_S) macros (#204590) This patch implements CPU_CLR and CPU_CLR_S macros following the existing CPU_SET pattern. The macro just forwards to an internal entry point. Assisted by Gemini. --- libc/config/linux/aarch64/entrypoints.txt | 1 + libc/config/linux/riscv/entrypoints.txt | 1 + libc/config/linux/x86_64/entrypoints.txt | 1 + .../llvm-libc-macros/linux/sched-macros.h | 2 ++ libc/include/sched.yaml | 8 +++++ libc/src/sched/CMakeLists.txt | 7 ++++ libc/src/sched/linux/CMakeLists.txt | 15 ++++++++ libc/src/sched/linux/sched_clrcpuset.cpp | 36 +++++++++++++++++++ libc/src/sched/sched_clrcpuset.h | 28 +++++++++++++++ libc/test/src/sched/CMakeLists.txt | 1 + libc/test/src/sched/cpu_count_test.cpp | 9 +++++ 11 files changed, 109 insertions(+) create mode 100644 libc/src/sched/linux/sched_clrcpuset.cpp create mode 100644 libc/src/sched/sched_clrcpuset.h diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 5cddf3dc89799..77bf17b666a14 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -1101,6 +1101,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_setspecific # sched.h entrypoints + libc.src.sched.__sched_clrcpuset libc.src.sched.__sched_cpualloc libc.src.sched.__sched_cpufree libc.src.sched.__sched_getcpucount diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index a57efbb8e464d..5e73718c4fc63 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -1234,6 +1234,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_setspecific # sched.h entrypoints + libc.src.sched.__sched_clrcpuset libc.src.sched.__sched_cpualloc libc.src.sched.__sched_cpufree libc.src.sched.__sched_getcpucount diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index ce88a6749d9dc..eb7d4781936ee 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1296,6 +1296,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.pthread.pthread_setspecific # sched.h entrypoints + libc.src.sched.__sched_clrcpuset libc.src.sched.__sched_cpualloc libc.src.sched.__sched_cpufree libc.src.sched.__sched_getcpucount diff --git a/libc/include/llvm-libc-macros/linux/sched-macros.h b/libc/include/llvm-libc-macros/linux/sched-macros.h index 539b3530f6e14..42db677b38305 100644 --- a/libc/include/llvm-libc-macros/linux/sched-macros.h +++ b/libc/include/llvm-libc-macros/linux/sched-macros.h @@ -25,6 +25,8 @@ #define CPU_SETSIZE __CPU_SETSIZE #define NCPUBITS __NCPUBITS +#define CPU_CLR_S(cpu, setsize, set) __sched_clrcpuset(cpu, setsize, set) +#define CPU_CLR(cpu, set) CPU_CLR_S(cpu, sizeof(cpu_set_t), set) #define CPU_COUNT_S(setsize, set) __sched_getcpucount(setsize, set) #define CPU_COUNT(set) CPU_COUNT_S(sizeof(cpu_set_t), set) #define CPU_ZERO_S(setsize, set) __sched_setcpuzero(setsize, set) diff --git a/libc/include/sched.yaml b/libc/include/sched.yaml index afe357e38bf1c..4c657d15fb4f0 100644 --- a/libc/include/sched.yaml +++ b/libc/include/sched.yaml @@ -18,6 +18,14 @@ types: enums: [] objects: [] functions: + - name: __sched_clrcpuset + standards: + - llvm_libc_ext + return_type: void + arguments: + - type: int + - type: size_t + - type: cpu_set_t * - name: __sched_cpualloc standards: - llvm_libc_ext diff --git a/libc/src/sched/CMakeLists.txt b/libc/src/sched/CMakeLists.txt index 3aef41f996962..56ed5c141f1a4 100644 --- a/libc/src/sched/CMakeLists.txt +++ b/libc/src/sched/CMakeLists.txt @@ -86,6 +86,13 @@ add_entrypoint_object( .${LIBC_TARGET_OS}.sched_rr_get_interval ) +add_entrypoint_object( + __sched_clrcpuset + ALIAS + DEPENDS + .${LIBC_TARGET_OS}.__sched_clrcpuset +) + add_entrypoint_object( __sched_cpualloc ALIAS diff --git a/libc/src/sched/linux/CMakeLists.txt b/libc/src/sched/linux/CMakeLists.txt index b8662c6383e74..f0bb7fcc93b6f 100644 --- a/libc/src/sched/linux/CMakeLists.txt +++ b/libc/src/sched/linux/CMakeLists.txt @@ -168,6 +168,21 @@ add_entrypoint_object( libc.src.errno.errno ) +add_entrypoint_object( + __sched_clrcpuset + SRCS + sched_clrcpuset.cpp + HDRS + ../sched_clrcpuset.h + DEPENDS + libc.src.__support.common + libc.src.__support.macros.config + libc.src.__support.macros.null_check + libc.hdr.sched_macros + libc.hdr.types.cpu_set_t + libc.hdr.types.size_t +) + add_entrypoint_object( __sched_cpualloc SRCS diff --git a/libc/src/sched/linux/sched_clrcpuset.cpp b/libc/src/sched/linux/sched_clrcpuset.cpp new file mode 100644 index 0000000000000..7d149b6c88ca2 --- /dev/null +++ b/libc/src/sched/linux/sched_clrcpuset.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Linux implementation of __sched_clrcpuset. +/// +//===----------------------------------------------------------------------===// + +#include "src/sched/sched_clrcpuset.h" +#include "hdr/sched_macros.h" // NCPUBITS +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/size_t.h" +#include "src/__support/common.h" // LLVM_LIBC_FUNCTION +#include "src/__support/macros/config.h" // LIBC_NAMESPACE_DECL +#include "src/__support/macros/null_check.h" // LIBC_CRASH_ON_NULLPTR + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(void, __sched_clrcpuset, + (int cpu, const size_t cpuset_size, cpu_set_t *set)) { + LIBC_CRASH_ON_NULLPTR(set); + if (static_cast(cpu) / 8 < cpuset_size) { + const size_t element_index = static_cast(cpu) / NCPUBITS; + const size_t bit_position = static_cast(cpu) % NCPUBITS; + + const unsigned long mask = 1UL << bit_position; + set->__mask[element_index] &= ~mask; + } +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sched/sched_clrcpuset.h b/libc/src/sched/sched_clrcpuset.h new file mode 100644 index 0000000000000..6e6d9e3b77b51 --- /dev/null +++ b/libc/src/sched/sched_clrcpuset.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation header for __sched_clrcpuset. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_SCHED_SCHED_CLRCPUSET_H +#define LLVM_LIBC_SRC_SCHED_SCHED_CLRCPUSET_H + +#include "hdr/types/cpu_set_t.h" +#include "hdr/types/size_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +// for internal use in the CPU_CLR macro +void __sched_clrcpuset(int cpu, const size_t cpuset_size, cpu_set_t *set); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_SCHED_SCHED_CLRCPUSET_H diff --git a/libc/test/src/sched/CMakeLists.txt b/libc/test/src/sched/CMakeLists.txt index b8dbf609ecd0f..f014091f9ace4 100644 --- a/libc/test/src/sched/CMakeLists.txt +++ b/libc/test/src/sched/CMakeLists.txt @@ -137,6 +137,7 @@ add_libc_unittest( libc.src.__support.OSUtil.osutil libc.src.errno.errno libc.src.sched.sched_getaffinity + libc.src.sched.__sched_clrcpuset libc.src.sched.__sched_getcpucount libc.src.sched.__sched_getcpuisset libc.src.sched.__sched_setcpuset diff --git a/libc/test/src/sched/cpu_count_test.cpp b/libc/test/src/sched/cpu_count_test.cpp index 55daa2e421810..e3399b0de7602 100644 --- a/libc/test/src/sched/cpu_count_test.cpp +++ b/libc/test/src/sched/cpu_count_test.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/OSUtil/syscall.h" +#include "src/sched/sched_clrcpuset.h" #include "src/sched/sched_getaffinity.h" #include "src/sched/sched_getcpucount.h" #include "src/sched/sched_getcpuisset.h" @@ -46,4 +47,12 @@ TEST_F(LlvmLibcSchedCpuCountTest, CpuSetMacros) { LIBC_NAMESPACE::CPU_SET(1, &mask); ASSERT_EQ(LIBC_NAMESPACE::CPU_ISSET(1, &mask), 1); ASSERT_EQ(LIBC_NAMESPACE::CPU_COUNT(&mask), 1); + + LIBC_NAMESPACE::CPU_CLR(1, &mask); + ASSERT_EQ(LIBC_NAMESPACE::CPU_ISSET(1, &mask), 0); + + LIBC_NAMESPACE::CPU_SET_S(1, sizeof(cpu_set_t), &mask); + ASSERT_EQ(LIBC_NAMESPACE::CPU_ISSET_S(1, sizeof(cpu_set_t), &mask), 1); + LIBC_NAMESPACE::CPU_CLR_S(1, sizeof(cpu_set_t), &mask); + ASSERT_EQ(LIBC_NAMESPACE::CPU_ISSET_S(1, sizeof(cpu_set_t), &mask), 0); } From 04b026a80a54ae25e434216375ffc7b3692ace4d Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 23 Jun 2026 23:51:11 +1000 Subject: [PATCH 190/511] [orc-rt] Add ORC_RT_SPS_ALLOC_ACTION helper macro. (#205339) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ORC_RT_SPS_ALLOC_ACTION(Name, SPSArgs, Handle) is shorthand for defining an allocation-action wrapper function whose arguments are SPS-encoded. It expands to: static orc_rt_WrapperFunctionBuffer Name(const char *ArgData, size_t ArgSize); with a body that deserializes ArgData via SPSAllocActionFunction and forwards the decoded arguments to Handle. SPSArgs is given as a parenthesized comma-separated list of SPS argument types — e.g. (int32_t, int32_t) — stripped at expansion time via ORC_RT_DEPAREN. --- orc-rt/include/orc-rt/SPSAllocAction.h | 19 +++++++++++++++++++ orc-rt/unittests/SPSAllocActionTest.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/orc-rt/include/orc-rt/SPSAllocAction.h b/orc-rt/include/orc-rt/SPSAllocAction.h index 9737b1e37ab98..460a26ee11e77 100644 --- a/orc-rt/include/orc-rt/SPSAllocAction.h +++ b/orc-rt/include/orc-rt/SPSAllocAction.h @@ -15,9 +15,28 @@ #define ORC_RT_SPSALLOCACTION_H #include "orc-rt/AllocAction.h" +#include "orc-rt/MacroUtils.h" #include "orc-rt/SPSWrapperFunctionBuffer.h" #include "orc-rt/SimplePackedSerialization.h" +/// Define an allocation-action wrapper function with the given Name that +/// uses SPS to deserialize its arguments and dispatches to Handle. +/// +/// SPSArgs is a parenthesized comma-separated list of SPS argument types +/// (the parens are stripped by ORC_RT_DEPAREN before being expanded into +/// the SPSAllocActionFunction template instantiation): +/// +/// static Error checkEq(int32_t X, int32_t Y); +/// ORC_RT_SPS_ALLOC_ACTION(check_eq_action, (int32_t, int32_t), checkEq) +/// +#define ORC_RT_SPS_ALLOC_ACTION(Name, SPSArgs, Handle) \ + static orc_rt_WrapperFunctionBuffer Name(const char *ArgData, \ + size_t ArgSize) { \ + return orc_rt::SPSAllocActionFunction::handle( \ + ArgData, ArgSize, Handle) \ + .release(); \ + } + namespace orc_rt { struct SPSAllocAction; diff --git a/orc-rt/unittests/SPSAllocActionTest.cpp b/orc-rt/unittests/SPSAllocActionTest.cpp index 2145ff251c0f5..879aac254dd19 100644 --- a/orc-rt/unittests/SPSAllocActionTest.cpp +++ b/orc-rt/unittests/SPSAllocActionTest.cpp @@ -126,3 +126,27 @@ TEST(SPSAllocActionTest, RunActionWithUndecodableArgs) { EXPECT_STREQ(B.getOutOfBandError(), "Could not deserialize allocation action argument buffer"); } + +// Test the ORC_RT_SPS_ALLOC_ACTION macro. +static Error check_values_equal(int32_t X, int32_t Y) { + if (X == Y) + return Error::success(); + return make_error("X and Y differ"); +} +ORC_RT_SPS_ALLOC_ACTION(macro_defined_allocaction, (int32_t, int32_t), + check_values_equal) + +TEST(SPSAllocActionTest, RunMacroDefinedAllocActionWithErrorSuccessReturn) { + AllocAction AA(macro_defined_allocaction, + *spsSerialize>(42, 42)); + auto B = AA(); + EXPECT_EQ(B.getOutOfBandError(), nullptr); +} + +TEST(SPSAllocActionTest, RunMacroDefinedAllocActionWithErrorFailureReturn) { + AllocAction AA(macro_defined_allocaction, + *spsSerialize>(42, 7)); + auto B = AA(); + ASSERT_NE(B.getOutOfBandError(), nullptr); + EXPECT_STREQ(B.getOutOfBandError(), "X and Y differ"); +} From 2aaeca88df89f0e9e0b01b8b469857337bf5293d Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Tue, 23 Jun 2026 17:58:02 +0400 Subject: [PATCH 191/511] [clang] Update C++ DR status page (#205342) The post-Brno draft will not be out for three more weeks, but Brno updates for Core issues statuses are already trickling in. Notably, [CWG507](https://cplusplus.github.io/CWG/issues/507.html) "Ambiguity with built-in binary operator candidates for class object convertible to built-in type", which used to be marked as a duplicate of [CWG260](https://cplusplus.github.io/CWG/issues/260.html) "User-defined conversions and built-in `operator=`", is open again, presumably because it contains an example that goes beyond the scope of CWG260. Relatively recent CWG thread that might be related (WG21 access required): [link](https://lists.isocpp.org/core/2025/08/18478.php). --- clang/test/CXX/drs/cwg5xx.cpp | 3 - clang/www/cxx_dr_status.html | 108 +++++++++++++++++++++++----------- 2 files changed, 75 insertions(+), 36 deletions(-) diff --git a/clang/test/CXX/drs/cwg5xx.cpp b/clang/test/CXX/drs/cwg5xx.cpp index ea05e714601fc..935f3c7e41dc3 100644 --- a/clang/test/CXX/drs/cwg5xx.cpp +++ b/clang/test/CXX/drs/cwg5xx.cpp @@ -84,9 +84,6 @@ namespace cwg506 { // cwg506: 2.7 // since-cxx11-error@-2 {{cannot pass object of non-trivial type 'NonPod' through variadic function; call will abort at runtime}} } // namespace cwg506 -// FIXME: Add tests here once CWG260 is resolved. -// cwg507: dup 260 - // cwg508: na // cwg509: na // cwg510: na diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index c9f136ba26681..ef30b5eefea13 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -58,7 +58,7 @@

C++ defect report implementation status

2 [temp.dep.res] - drafting + open How can dependent names be used in member declarations that appear outside of the class template definition? Not resolved @@ -1864,7 +1864,7 @@

C++ defect report implementation status

260 [over.built] - open + review User-defined conversions and built-in operator= Not resolved @@ -3594,12 +3594,12 @@

C++ defect report implementation status

Conditionally-supported behavior for non-POD objects passed to ellipsis Clang 2.7 - + 507 [over.built] - dup - Ambiguity assigning class object to built-in type - Duplicate of 260 + open + Ambiguity with built-in binary operator candidates for class object convertible to built-in type + Not resolved 508 @@ -15383,7 +15383,7 @@

C++ defect report implementation status

2228 [dcl.ambig.res] - tentatively ready + ready Ambiguity resolution for cast to function type Not resolved @@ -18591,7 +18591,7 @@

C++ defect report implementation status

2684 [basic.start.dynamic] - open + ready thread_local dynamic initialization Not resolved @@ -19095,7 +19095,7 @@

C++ defect report implementation status

2756 [class.init] - tentatively ready + ready Completion of initialization by delegating constructor Not resolved @@ -20626,14 +20626,14 @@

C++ defect report implementation status

2973 [dcl.typedef] - open + review Does an alias-declaration introduce a name for linkage purposes? Not resolved 2974 [temp.deduct.type] - open + ready Non-deduced context for qualified-id naming a template Not resolved @@ -20976,7 +20976,7 @@

C++ defect report implementation status

3023 [dcl.init.list] - tentatively ready + ready Default arguments in list-initialization Not resolved @@ -21004,7 +21004,7 @@

C++ defect report implementation status

3027 [temp.type] - tentatively ready + ready Equivalence of pack-index-specifiers Not resolved @@ -21137,7 +21137,7 @@

C++ defect report implementation status

3046 [dcl.enum] - open + ready Enumerations as part of the common initial sequence Not resolved @@ -21837,7 +21837,7 @@

C++ defect report implementation status

3146 [diff.expr] - open + ready Usual arithmetic conversions for enumerations are different in C Not resolved @@ -21928,7 +21928,7 @@

C++ defect report implementation status

3159 [temp.inst] - tentatively ready + ready Instantiation of variables with incomplete array types Not resolved @@ -21963,35 +21963,35 @@

C++ defect report implementation status

3164 [cpp.predefined] - tentatively ready + ready Location of specification of __STDCPP_FLOAT16_T__ macro Not resolved - + 3165 [basic.link] - tentatively ready + dup Use "equivalent type" to support templated entities - Not resolved + Unknown 3166 [expr.reflect] - tentatively ready + ready Protected access rule for a pointer-to-member from a reflect-expression Not resolved 3167 [dcl.init] - tentatively ready + ready Initializing typedefs Not resolved 3168 [conv.rank] - tentatively ready + ready Issues with integer conversion ranks Not resolved @@ -22033,7 +22033,7 @@

C++ defect report implementation status

3174 [basic.lookup.argdep] - tentatively ready + review Handling of friends in argument-dependent lookup Not resolved @@ -22047,14 +22047,14 @@

C++ defect report implementation status

3176 [intro.execution] - tentatively ready + ready Sequencing default arguments during constant evaluation Not resolved 3177 [lex.pptoken] - tentatively ready + ready Formation of a header-name preprocessing token Not resolved @@ -22068,7 +22068,7 @@

C++ defect report implementation status

3179 [dcl.fct] - tentatively ready + ready More edge cases for a void function parameter Not resolved @@ -22089,21 +22089,21 @@

C++ defect report implementation status

3182 [temp.deduct.call] - tentatively ready + ready Notional template parameters introduced for an array parameter Not resolved 3183 [expr.call] - tentatively ready + ready No expressions in an expression-list Not resolved 3184 [expr.add] - tentatively ready + ready Pointer arithmetic with similar types Not resolved @@ -22117,7 +22117,7 @@

C++ defect report implementation status

3186 [basic.lookup.argdep] - tentatively ready + ready Argument-dependent lookup is for a name Not resolved @@ -22131,7 +22131,7 @@

C++ defect report implementation status

3188 [diff.cpp20] - tentatively ready + ready Behavior change for class template argument deduction Not resolved @@ -22145,7 +22145,7 @@

C++ defect report implementation status

3190 [expr.reflect] - review + ready Ambiguous lookup for type aliases in reflection Not resolved @@ -22169,6 +22169,48 @@

C++ defect report implementation status

open Missing restrictions and effects for friend declarations Not resolved + + + 3194 + [lex.pptoken] + open + Lexing of <=> + Not resolved + + + 3195 + [expr.const.core] + open + Starting the lifetime of a runtime object during constant evaluation + Not resolved + + + 3196 + [basic.start.term] + open + Move restriction on standard library functions in signal handlers + Not resolved + + + 3197 + [diff.basic] + open + Relaxed requirements for integer representations + Not resolved + + + 3198 + [except.handle] + open + Stack unwinding when no matching handler is found + Not resolved + + + 3199 + [cpp.predefined] + open + Missing predefined macros for chart16_t and char32_t + Not resolved From 59aa54f60319f89cd5ea2416f5665317be5dd8c9 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Tue, 23 Jun 2026 15:10:09 +0100 Subject: [PATCH 192/511] [LV][NFC] Add SVE tests for masked interleaved accesses with gaps (#205330) --- .../AArch64/interleave-with-gaps.ll | 1493 +++++++++++++++-- 1 file changed, 1325 insertions(+), 168 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll index cba9cdaa66770..59b0629069894 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleave-with-gaps.ll @@ -1,5 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 -; RUN: opt -passes=loop-vectorize -S %s | FileCheck %s +; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S < %s | FileCheck --check-prefix=CHECK-NOTF %s +; RUN: opt -passes=loop-vectorize -tail-folding-policy=prefer-fold-tail \ +; RUN: -mtriple=aarch64 -mattr=+sve -S < %s | FileCheck --check-prefix=CHECK-TF %s target triple = "aarch64-linux-gnu" @@ -7,56 +9,81 @@ target triple = "aarch64-linux-gnu" ; the last iteration must execute in the scalar loop. Thus the vector loop can ; only execute up to 16 iterations. define i64 @vector_loop_with_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 { -; CHECK-LABEL: define i64 @vector_loop_with_remaining_iterations( -; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[INDEX]], i32 0, i64 3 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP8]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64> -; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]]) -; CHECK-NEXT: br label %[[SCALAR_PH:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 -; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 -; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 -; CHECK-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) -; CHECK-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) -; CHECK-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) -; CHECK-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 0, ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 -; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] -; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]] +; CHECK-NOTF-LABEL: define i64 @vector_loop_with_remaining_iterations( +; CHECK-NOTF-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*:]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 +; CHECK-NOTF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi <16 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[INDEX]], i32 0, i64 3 +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP1]], align 1 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP0]], <16 x i32> [[TMP2]]) +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP0]], <16 x i32> [[TMP3]]) +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] +; CHECK-NOTF-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = zext <16 x i32> [[TMP4]] to <16 x i64> +; CHECK-NOTF-NEXT: [[TMP7]] = or <16 x i64> [[VEC_PHI]], [[TMP6]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NOTF-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP7]]) +; CHECK-NOTF-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[RED:%.*]] = phi i64 [ [[TMP9]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 +; CHECK-NOTF-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 +; CHECK-NOTF-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NOTF-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NOTF-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) +; CHECK-NOTF-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NOTF-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) +; CHECK-NOTF-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] +; CHECK-NOTF-NEXT: store i8 0, ptr [[GEP_DST]], align 1 +; CHECK-NOTF-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 +; CHECK-NOTF-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: ret i64 [[RED_NEXT_LCSSA]] +; +; CHECK-TF-LABEL: define i64 @vector_loop_with_remaining_iterations( +; CHECK-TF-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[RED:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 +; CHECK-TF-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 +; CHECK-TF-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-TF-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-TF-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) +; CHECK-TF-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-TF-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) +; CHECK-TF-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] +; CHECK-TF-NEXT: store i8 0, ptr [[GEP_DST]], align 1 +; CHECK-TF-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 +; CHECK-TF-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] +; CHECK-TF-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-TF-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] +; CHECK-TF-NEXT: ret i64 [[RED_NEXT_LCSSA]] ; entry: br label %loop @@ -87,56 +114,81 @@ exit: ; the last iteration must execute in the scalar loop. Thus the vector loop can ; only execute up to 16 iterations. define i64 @main_vector_loop_fixed_with_no_remaining_iterations(ptr %src, ptr noalias %dst, i32 %x) #0 { -; CHECK-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations( -; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP11:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[INDEX]], i32 0, i64 3 -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP4]], align 1 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP5]]) -; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP3]], <16 x i32> [[TMP6]]) -; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP26]], align 1 -; CHECK-NEXT: [[TMP10:%.*]] = zext <16 x i32> [[TMP7]] to <16 x i64> -; CHECK-NEXT: [[TMP11]] = or <16 x i64> [[VEC_PHI]], [[TMP10]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP11]]) -; CHECK-NEXT: br label %[[SCALAR_PH:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[TMP13]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 -; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 -; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 -; CHECK-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) -; CHECK-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) -; CHECK-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) -; CHECK-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) -; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store i8 0, ptr [[GEP_DST]], align 1 -; CHECK-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 -; CHECK-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 -; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] -; CHECK-NEXT: ret i64 [[RED_NEXT_LCSSA]] +; CHECK-NOTF-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations( +; CHECK-NOTF-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*:]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[X]], i64 0 +; CHECK-NOTF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = call <16 x i32> @llvm.abs.v16i32(<16 x i32> [[BROADCAST_SPLAT]], i1 false) +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi <16 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[INDEX]], i32 0, i64 3 +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load <64 x i8>, ptr [[TMP1]], align 1 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <64 x i8> [[WIDE_VEC]], <64 x i8> poison, <16 x i32> +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = zext <16 x i8> [[STRIDED_VEC]] to <16 x i32> +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP0]], <16 x i32> [[TMP2]]) +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.umin.v16i32(<16 x i32> [[TMP0]], <16 x i32> [[TMP3]]) +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[INDEX]] +; CHECK-NOTF-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = zext <16 x i32> [[TMP4]] to <16 x i64> +; CHECK-NOTF-NEXT: [[TMP7]] = or <16 x i64> [[VEC_PHI]], [[TMP6]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; CHECK-NOTF-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = call i64 @llvm.vector.reduce.or.v16i64(<16 x i64> [[TMP7]]) +; CHECK-NOTF-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ 16, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[RED:%.*]] = phi i64 [ [[TMP9]], %[[SCALAR_PH]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 +; CHECK-NOTF-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 +; CHECK-NOTF-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-NOTF-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NOTF-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) +; CHECK-NOTF-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-NOTF-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) +; CHECK-NOTF-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] +; CHECK-NOTF-NEXT: store i8 0, ptr [[GEP_DST]], align 1 +; CHECK-NOTF-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 +; CHECK-NOTF-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: ret i64 [[RED_NEXT_LCSSA]] +; +; CHECK-TF-LABEL: define i64 @main_vector_loop_fixed_with_no_remaining_iterations( +; CHECK-TF-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]], i32 [[X:%.*]]) #[[ATTR0]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[RED:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[GEP_SRC_I_I:%.*]] = getelementptr { [4 x i8] }, ptr [[SRC]], i64 [[IV]], i32 0, i64 3 +; CHECK-TF-NEXT: [[L:%.*]] = load i8, ptr [[GEP_SRC_I_I]], align 1 +; CHECK-TF-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i32 +; CHECK-TF-NEXT: [[ABS_0:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-TF-NEXT: [[MIN_0:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_0]], i32 [[L_EXT]]) +; CHECK-TF-NEXT: [[ABS_1:%.*]] = call i32 @llvm.abs.i32(i32 [[X]], i1 false) +; CHECK-TF-NEXT: [[MIN_1:%.*]] = call i32 @llvm.umin.i32(i32 [[ABS_1]], i32 [[MIN_0]]) +; CHECK-TF-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[IV]] +; CHECK-TF-NEXT: store i8 0, ptr [[GEP_DST]], align 1 +; CHECK-TF-NEXT: [[MIN_EXT:%.*]] = zext i32 [[MIN_1]] to i64 +; CHECK-TF-NEXT: [[RED_NEXT]] = or i64 [[RED]], [[MIN_EXT]] +; CHECK-TF-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-TF-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT_I_I]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ] +; CHECK-TF-NEXT: ret i64 [[RED_NEXT_LCSSA]] ; entry: br label %loop @@ -165,73 +217,117 @@ exit: ; Test case for https://github.com/llvm/llvm-project/issues/149726. define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks(ptr noalias %A, ptr noalias %B, ptr noalias %C, ptr noalias %D, ptr noalias %E, ptr noalias %F, ptr noalias %G, ptr noalias %H, ptr noalias %I, ptr noalias %J, ptr noalias %K, ptr %L) #1 { -; CHECK-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( -; CHECK-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: br label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] -; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 -; CHECK-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[GEP_J]], align 8 -; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i16> [[TMP5]], i64 0 -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP5]], i64 1 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i16> [[TMP5]], i64 2 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i16> [[TMP5]], i64 3 -; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i16, ptr [[K]], i64 [[OFFSET_IDX]] -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]] -; CHECK-NEXT: store i16 [[TMP10]], ptr [[TMP6]], align 2 -; CHECK-NEXT: store i16 [[TMP11]], ptr [[TMP7]], align 2 -; CHECK-NEXT: store i16 [[TMP12]], ptr [[TMP8]], align 2 -; CHECK-NEXT: store i16 [[TMP13]], ptr [[TMP9]], align 2 -; CHECK-NEXT: store i64 0, ptr [[A]], align 8 -; CHECK-NEXT: store i64 0, ptr [[B]], align 8 -; CHECK-NEXT: store i64 0, ptr [[C]], align 8 -; CHECK-NEXT: store i64 0, ptr [[D]], align 8 -; CHECK-NEXT: store i64 0, ptr [[E]], align 8 -; CHECK-NEXT: store i64 0, ptr [[F]], align 8 -; CHECK-NEXT: store i64 0, ptr [[G]], align 8 -; CHECK-NEXT: store i64 0, ptr [[H]], align 8 -; CHECK-NEXT: store i64 0, ptr [[I]], align 8 -; CHECK-NEXT: store i64 0, ptr [[L]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 -; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[SCALAR_PH:.*]] -; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: br label %[[LOOP:.*]] -; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ 8, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP_J1:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV1]] -; CHECK-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J1]], align 8 -; CHECK-NEXT: [[L_TRUNC:%.*]] = trunc i64 [[L_J]] to i16 -; CHECK-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV1]] -; CHECK-NEXT: store i16 [[L_TRUNC]], ptr [[GEP_K]], align 2 -; CHECK-NEXT: store i64 0, ptr [[A]], align 8 -; CHECK-NEXT: store i64 0, ptr [[B]], align 8 -; CHECK-NEXT: store i64 0, ptr [[C]], align 8 -; CHECK-NEXT: store i64 0, ptr [[D]], align 8 -; CHECK-NEXT: store i64 0, ptr [[E]], align 8 -; CHECK-NEXT: store i64 0, ptr [[F]], align 8 -; CHECK-NEXT: store i64 0, ptr [[G]], align 8 -; CHECK-NEXT: store i64 0, ptr [[H]], align 8 -; CHECK-NEXT: store i64 0, ptr [[I]], align 8 -; CHECK-NEXT: store i64 0, ptr [[L]], align 8 -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 2 -; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV1]], 14 -; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void +; CHECK-NOTF-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( +; CHECK-NOTF-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*:]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = shl i64 [[INDEX]], 1 +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 2 +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = add i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = add i64 [[TMP0]], 6 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[J]], i64 [[TMP0]] +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP4]], align 8 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = trunc <4 x i64> [[STRIDED_VEC]] to <4 x i16> +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = extractelement <4 x i16> [[TMP5]], i64 0 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = extractelement <4 x i16> [[TMP5]], i64 1 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = extractelement <4 x i16> [[TMP5]], i64 2 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = extractelement <4 x i16> [[TMP5]], i64 3 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP0]] +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP1]] +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP2]] +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr i16, ptr [[K]], i64 [[TMP3]] +; CHECK-NOTF-NEXT: store i16 [[TMP6]], ptr [[TMP10]], align 2 +; CHECK-NOTF-NEXT: store i16 [[TMP7]], ptr [[TMP11]], align 2 +; CHECK-NOTF-NEXT: store i16 [[TMP8]], ptr [[TMP12]], align 2 +; CHECK-NOTF-NEXT: store i16 [[TMP9]], ptr [[TMP13]], align 2 +; CHECK-NOTF-NEXT: store i64 0, ptr [[A]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[B]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[C]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[D]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[E]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[F]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[G]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[H]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[I]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[L]], align 8 +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4 +; CHECK-NOTF-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ 8, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[GEP_J:%.*]] = getelementptr i64, ptr [[J]], i64 [[IV]] +; CHECK-NOTF-NEXT: [[L_J:%.*]] = load i64, ptr [[GEP_J]], align 8 +; CHECK-NOTF-NEXT: [[L_TRUNC:%.*]] = trunc i64 [[L_J]] to i16 +; CHECK-NOTF-NEXT: [[GEP_K:%.*]] = getelementptr i16, ptr [[K]], i64 [[IV]] +; CHECK-NOTF-NEXT: store i16 [[L_TRUNC]], ptr [[GEP_K]], align 2 +; CHECK-NOTF-NEXT: store i64 0, ptr [[A]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[B]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[C]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[D]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[E]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[F]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[G]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[H]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[I]], align 8 +; CHECK-NOTF-NEXT: store i64 0, ptr [[L]], align 8 +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add i64 [[IV]], 2 +; CHECK-NOTF-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], 14 +; CHECK-NOTF-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: ret void +; +; CHECK-TF-LABEL: define void @main_vector_loop_fixed_single_vector_iteration_with_runtime_checks( +; CHECK-TF-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr noalias [[C:%.*]], ptr noalias [[D:%.*]], ptr noalias [[E:%.*]], ptr noalias [[F:%.*]], ptr noalias [[G:%.*]], ptr noalias [[H:%.*]], ptr noalias [[I:%.*]], ptr noalias [[J:%.*]], ptr noalias [[K:%.*]], ptr [[L:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-TF-NEXT: [[ENTRY:.*:]] +; CHECK-TF-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-TF: [[VECTOR_PH]]: +; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 0, i64 8) +; CHECK-TF-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv8i64() +; CHECK-TF-NEXT: [[TMP3:%.*]] = mul [[TMP2]], splat (i64 2) +; CHECK-TF-NEXT: [[TMP4:%.*]] = shl i64 [[TMP1]], 1 +; CHECK-TF-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP4]], i64 0 +; CHECK-TF-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-TF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-TF: [[VECTOR_BODY]]: +; CHECK-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP3]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[WIDE_GEP:%.*]] = getelementptr i64, ptr [[J]], [[VEC_IND]] +; CHECK-TF-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv8i64.nxv8p0( align 8 [[WIDE_GEP]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-TF-NEXT: [[TMP5:%.*]] = trunc [[WIDE_MASKED_GATHER]] to +; CHECK-TF-NEXT: [[WIDE_GEP1:%.*]] = getelementptr i16, ptr [[K]], [[VEC_IND]] +; CHECK-TF-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0( [[TMP5]], align 2 [[WIDE_GEP1]], [[ACTIVE_LANE_MASK]]) +; CHECK-TF-NEXT: store i64 0, ptr [[A]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[B]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[C]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[D]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[E]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[F]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[G]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[H]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[I]], align 8 +; CHECK-TF-NEXT: store i64 0, ptr [[L]], align 8 +; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) +; CHECK-TF-NEXT: [[TMP6:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-TF-NEXT: [[TMP7:%.*]] = xor i1 [[TMP6]], true +; CHECK-TF-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-TF-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK-TF: [[MIDDLE_BLOCK]]: +; CHECK-TF-NEXT: br label %[[EXIT:.*]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret void ; entry: br label %loop @@ -261,6 +357,1067 @@ exit: ret void } +; Interleaved group with gap but without tail gap +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = 0; i < n; i++) { +; rdx += a[i][0]; +; rdx += a[i][1]; +; // No access a[i][2] +; rdx += a[i][3]; +; } +; +define i32 @load_factor_4_with_gap(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define i32 @load_factor_4_with_gap( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP1]] +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP19:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], 0 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 1 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], [[TMP5]] +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[INDEX]], i32 0 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP6]], i32 0 +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP7]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 0 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 1 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 3 +; CHECK-NOTF-NEXT: [[WIDE_VEC2:%.*]] = load , ptr [[TMP8]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC3:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC2]]) +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 0 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 1 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 3 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = add [[VEC_PHI]], [[TMP9]] +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = add [[VEC_PHI1]], [[TMP12]] +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = add [[TMP15]], [[TMP10]] +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = add [[TMP16]], [[TMP13]] +; CHECK-NOTF-NEXT: [[TMP19]] = add [[TMP17]], [[TMP11]] +; CHECK-NOTF-NEXT: [[TMP20]] = add [[TMP18]], [[TMP14]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NOTF-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[BIN_RDX:%.*]] = add [[TMP20]], [[TMP19]] +; CHECK-NOTF-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP22]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-NOTF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[LD1]] +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-NOTF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 3 +; CHECK-NOTF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD1]], [[LD3]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP22]], %[[MIDDLE_BLOCK]] ] +; CHECK-NOTF-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +; CHECK-TF-LABEL: define i32 @load_factor_4_with_gap( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-TF-NEXT: [[ENTRY:.*:]] +; CHECK-TF-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK-TF: [[VECTOR_PH]]: +; CHECK-TF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-TF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) +; CHECK-TF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-TF: [[VECTOR_BODY]]: +; CHECK-TF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP9:%.*]], %[[VECTOR_BODY]] ] +; CHECK-TF-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[INDEX]], i32 0 +; CHECK-TF-NEXT: [[INTERLEAVED_MASK:%.*]] = call @llvm.vector.interleave4.nxv16i1( [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]], [[ACTIVE_LANE_MASK]]) +; CHECK-TF-NEXT: [[WIDE_MASKED_VEC:%.*]] = call @llvm.masked.load.nxv16i32.p0(ptr align 4 [[TMP2]], [[INTERLEAVED_MASK]], poison) +; CHECK-TF-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_MASKED_VEC]]) +; CHECK-TF-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 0 +; CHECK-TF-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 1 +; CHECK-TF-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 3 +; CHECK-TF-NEXT: [[TMP6:%.*]] = add [[VEC_PHI]], [[TMP3]] +; CHECK-TF-NEXT: [[TMP7:%.*]] = add [[TMP6]], [[TMP4]] +; CHECK-TF-NEXT: [[TMP8:%.*]] = add [[TMP7]], [[TMP5]] +; CHECK-TF-NEXT: [[TMP9]] = select [[ACTIVE_LANE_MASK]], [[TMP8]], [[VEC_PHI]] +; CHECK-TF-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] +; CHECK-TF-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]]) +; CHECK-TF-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-TF-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true +; CHECK-TF-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK-TF: [[MIDDLE_BLOCK]]: +; CHECK-TF-NEXT: [[TMP12:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP9]]) +; CHECK-TF-NEXT: br label %[[EXIT:.*]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret i32 [[TMP12]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 0 + %ld1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %rdx, %ld1 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 1 + %ld2 = load i32, ptr %arrayidx1, align 4 + %add1 = add nsw i32 %add, %ld2 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 3 + %ld3 = load i32, ptr %arrayidx2, align 4 + %rdx.next = add nsw i32 %add1, %ld3 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret i32 %rdx.next +} + +; Interleaved group with gap but without tail gap +; E.g. +; int (*a)[4]; +; for (int i = 0; i < n; i++) { +; a[i][0] = i; +; a[i][1] = i; +; // No access a[i][2] +; a[i][3] = i; +; } +; +define void @store_factor_4_with_gap(i32 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define void @store_factor_4_with_gap( +; CHECK-NOTF-SAME: i32 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 0 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 0 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 0 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP3]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP6]], align 4 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 1 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 1 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 1 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP7]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP8]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP9]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 3 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 3 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 3 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 3 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP11]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP12]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP13]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP14]], align 4 +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 3 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: ret void +; +; CHECK-TF-LABEL: define void @store_factor_4_with_gap( +; CHECK-TF-SAME: i32 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 0 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 1 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 3 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 0 + store i32 %iv, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 1 + store i32 %iv, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 3 + store i32 %iv, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond.not = icmp eq i32 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +; Interleaved group with tail gap +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = 0; i < n; i++) { +; rdx += a[i][0]; +; rdx += a[i][1]; +; rdx += a[i][2]; +; // No access a[i][3] +; } +; +define i32 @load_factor_4_with_tail_gap(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define i32 @load_factor_4_with_tail_gap( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 3 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[N]], [[TMP1]] +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 1 +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = icmp eq i64 [[N_MOD_VF]], 0 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = select i1 [[TMP4]], i64 [[TMP3]], i64 [[N_MOD_VF]] +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[TMP5]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = add i64 [[TMP2]], 0 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 1 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[TMP7]] +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[INDEX]], i32 0 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP8]], i32 0 +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP9]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 0 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 1 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 2 +; CHECK-NOTF-NEXT: [[WIDE_VEC2:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC3:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC2]]) +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 0 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 1 +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = extractvalue { , , , } [[STRIDED_VEC3]], 2 +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = add [[VEC_PHI]], [[TMP11]] +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = add [[VEC_PHI1]], [[TMP14]] +; CHECK-NOTF-NEXT: [[TMP19:%.*]] = add [[TMP17]], [[TMP12]] +; CHECK-NOTF-NEXT: [[TMP20:%.*]] = add [[TMP18]], [[TMP15]] +; CHECK-NOTF-NEXT: [[TMP21]] = add [[TMP19]], [[TMP13]] +; CHECK-NOTF-NEXT: [[TMP22]] = add [[TMP20]], [[TMP16]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]] +; CHECK-NOTF-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[BIN_RDX:%.*]] = add [[TMP22]], [[TMP21]] +; CHECK-NOTF-NEXT: [[TMP24:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-NOTF-NEXT: br label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP24]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-NOTF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[LD1]] +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-NOTF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-NOTF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD1]], [[LD3]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +; CHECK-TF-LABEL: define i32 @load_factor_4_with_tail_gap( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[RDX:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-TF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[LD1]] +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-TF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-TF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD1]], [[LD3]] +; CHECK-TF-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; CHECK-TF-NEXT: ret i32 [[RDX_NEXT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 0 + %ld1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %rdx, %ld1 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 1 + %ld2 = load i32, ptr %arrayidx1, align 4 + %add1 = add nsw i32 %add, %ld2 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 2 + %ld3 = load i32, ptr %arrayidx2, align 4 + %rdx.next = add nsw i32 %add1, %ld3 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret i32 %rdx.next +} + +; Interleaved group with tail gap +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = 0; i < n; i++) { +; a[i][0] = i; +; a[i][1] = i; +; a[i][2] = i; +; // No access a[i][3] +; } +; +define void @store_factor_4_with_tail_gap(i32 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define void @store_factor_4_with_tail_gap( +; CHECK-NOTF-SAME: i32 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4 +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], 4 +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 0 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 0 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 0 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP3]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP4]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP5]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP6]], align 4 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 1 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 1 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 1 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP7]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP8]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP9]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[INDEX]], i32 2 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP0]], i32 2 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP1]], i32 2 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[TMP2]], i32 2 +; CHECK-NOTF-NEXT: store i32 [[INDEX]], ptr [[TMP11]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP0]], ptr [[TMP12]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP1]], ptr [[TMP13]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP2]], ptr [[TMP14]], align 4 +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 2 +; CHECK-NOTF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP16:![0-9]+]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: ret void +; +; CHECK-TF-LABEL: define void @store_factor_4_with_tail_gap( +; CHECK-TF-SAME: i32 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 0 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 1 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i32 [[IV]], i32 2 +; CHECK-TF-NEXT: store i32 [[IV]], ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 0 + store i32 %iv, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 1 + store i32 %iv, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i32 %iv, i32 2 + store i32 %iv, ptr %arrayidx2, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond.not = icmp eq i32 %iv.next, %n + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + + +; Interleaved group with gap but without tail gap, reversed +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = n; i >= 0; i++) { +; rdx += a[i][0]; +; rdx += a[i][1]; +; // No access a[i][2] +; rdx += a[i][3]; +; } +; +define i32 @load_factor_4_with_gap_reverse(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define i32 @load_factor_4_with_gap_reverse( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-NOTF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-NOTF: [[LOOP_PREHEADER]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = shl nuw i64 [[TMP1]], 3 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP1]], 2 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = shl nuw i64 [[TMP3]], 1 +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI1:%.*]] = phi [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = sub i64 [[N]], [[INDEX]] +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw [16 x i8], ptr [[A]], i64 [[TMP6]] +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = sub nuw nsw i64 [[TMP3]], 1 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], -4 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 [[TMP9]] +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = mul i64 -4, [[TMP3]] +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = add i64 [[TMP9]], [[TMP11]] +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i64 [[TMP12]] +; CHECK-NOTF-NEXT: [[WIDE_VEC:%.*]] = load , ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC]]) +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 0 +; CHECK-NOTF-NEXT: [[REVERSE:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP14]]) +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 1 +; CHECK-NOTF-NEXT: [[REVERSE2:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP15]]) +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = extractvalue { , , , } [[STRIDED_VEC]], 3 +; CHECK-NOTF-NEXT: [[REVERSE3:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP16]]) +; CHECK-NOTF-NEXT: [[WIDE_VEC4:%.*]] = load , ptr [[TMP13]], align 4 +; CHECK-NOTF-NEXT: [[STRIDED_VEC5:%.*]] = call { , , , } @llvm.vector.deinterleave4.nxv16i32( [[WIDE_VEC4]]) +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = extractvalue { , , , } [[STRIDED_VEC5]], 0 +; CHECK-NOTF-NEXT: [[REVERSE6:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP17]]) +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = extractvalue { , , , } [[STRIDED_VEC5]], 1 +; CHECK-NOTF-NEXT: [[REVERSE7:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP18]]) +; CHECK-NOTF-NEXT: [[TMP19:%.*]] = extractvalue { , , , } [[STRIDED_VEC5]], 3 +; CHECK-NOTF-NEXT: [[REVERSE8:%.*]] = call @llvm.vector.reverse.nxv4i32( [[TMP19]]) +; CHECK-NOTF-NEXT: [[TMP20:%.*]] = add [[REVERSE]], [[VEC_PHI]] +; CHECK-NOTF-NEXT: [[TMP21:%.*]] = add [[REVERSE6]], [[VEC_PHI1]] +; CHECK-NOTF-NEXT: [[TMP22:%.*]] = add [[TMP20]], [[REVERSE2]] +; CHECK-NOTF-NEXT: [[TMP23:%.*]] = add [[TMP21]], [[REVERSE7]] +; CHECK-NOTF-NEXT: [[TMP24]] = add [[TMP22]], [[REVERSE3]] +; CHECK-NOTF-NEXT: [[TMP25]] = add [[TMP23]], [[REVERSE8]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] +; CHECK-NOTF-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[BIN_RDX:%.*]] = add [[TMP25]], [[TMP24]] +; CHECK-NOTF-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[BIN_RDX]]) +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP5]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP27]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [16 x i8], ptr [[A]], i64 [[IV]] +; CHECK-NOTF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ADD:%.*]] = add nsw i32 [[LD1]], [[RDX]] +; CHECK-NOTF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +; CHECK-NOTF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +; CHECK-NOTF-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-NOTF-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +; CHECK-NOTF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +; CHECK-NOTF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD2]], [[LD3]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NOTF-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NOTF-NEXT: br i1 [[CMP_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP18:![0-9]+]] +; CHECK-NOTF: [[EXIT_LOOPEXIT]]: +; CHECK-NOTF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[TMP27]], %[[MIDDLE_BLOCK]] ] +; CHECK-NOTF-NEXT: br label %[[EXIT]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RDX_FINAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NOTF-NEXT: ret i32 [[RDX_FINAL]] +; +; CHECK-TF-LABEL: define i32 @load_factor_4_with_gap_reverse( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-TF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-TF: [[LOOP_PREHEADER]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [16 x i8], ptr [[A]], i64 [[IV]] +; CHECK-TF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[LD1]], [[RDX]] +; CHECK-TF-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 4 +; CHECK-TF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX4]], align 4 +; CHECK-TF-NEXT: [[ADD2:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-TF-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds nuw i8, ptr [[ARRAYIDX]], i64 12 +; CHECK-TF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX8]], align 4 +; CHECK-TF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD2]], [[LD3]] +; CHECK-TF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-TF-NEXT: [[CMP_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-TF-NEXT: br i1 [[CMP_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT_LOOPEXIT]]: +; CHECK-TF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; CHECK-TF-NEXT: br label %[[EXIT]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: [[RDX_FINAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-TF-NEXT: ret i32 [[RDX_FINAL]] +; +entry: + %cmp = icmp sgt i64 %n, -1 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %arrayidx = getelementptr inbounds nuw [16 x i8], ptr %a, i64 %iv + %ld1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %ld1, %rdx + %arrayidx4 = getelementptr inbounds nuw i8, ptr %arrayidx, i64 4 + %ld2 = load i32, ptr %arrayidx4, align 4 + %add2 = add nsw i32 %add, %ld2 + %arrayidx8 = getelementptr inbounds nuw i8, ptr %arrayidx, i64 12 + %ld3 = load i32, ptr %arrayidx8, align 4 + %rdx.next = add nsw i32 %add2, %ld3 + %iv.next = add nsw i64 %iv, -1 + %cmp.not = icmp eq i64 %iv, 0 + br i1 %cmp.not, label %exit, label %loop + +exit: + %rdx.final = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + ret i32 %rdx.final +} + +; Interleaved group with gap but without tail gap, reversed +; E.g. +; int (*a)[4]; +; for (int i = n; i >= 0; i--) { +; a[i][0] = i; +; a[i][1] = i; +; // No access a[i][2] +; a[i][3] = i; +; } +; +define void @store_factor_4_with_gap_reverse(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define void @store_factor_4_with_gap_reverse( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*:]] +; CHECK-NOTF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-NOTF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-NOTF: [[LOOP_PREHEADER]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[INDEX]] +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], -2 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], -3 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 0 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 0 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 0 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP12]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP13]], align 4 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 1 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 1 +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 1 +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP16]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP17]], align 4 +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 3 +; CHECK-NOTF-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 3 +; CHECK-NOTF-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 3 +; CHECK-NOTF-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 3 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP18]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP20]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP21]], align 4 +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 3 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP20:![0-9]+]] +; CHECK-NOTF: [[EXIT_LOOPEXIT]]: +; CHECK-NOTF-NEXT: br label %[[EXIT]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: ret void +; +; CHECK-TF-LABEL: define void @store_factor_4_with_gap_reverse( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*:]] +; CHECK-TF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-TF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-TF: [[LOOP_PREHEADER]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 3 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT_LOOPEXIT]]: +; CHECK-TF-NEXT: br label %[[EXIT]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret void +; +entry: + %cmp = icmp sgt i64 %n, -1 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 0 + store i32 %iv.trunc, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 1 + store i32 %iv.trunc, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 3 + store i32 %iv.trunc, ptr %arrayidx2, align 4 + %iv.next = add nsw i64 %iv, -1 + %exitcond.not = icmp eq i64 %iv, 0 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + +; Interleaved group with tail gap, reversed +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = n; i >= 0; i--) { +; rdx += a[i][0]; +; rdx += a[i][1]; +; rdx += a[i][2]; +; // No access a[i][3] +; } +; +define i32 @load_factor_4_with_tail_gap_reverse(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define i32 @load_factor_4_with_tail_gap_reverse( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*]]: +; CHECK-NOTF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-NOTF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-NOTF: [[LOOP_PREHEADER]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP38:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP39:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP40:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP41:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[INDEX]] +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], -2 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], -3 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 0 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 0 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 0 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 0 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP6]], align 4 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP7]], align 4 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP8]], align 4 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = load i32, ptr [[TMP9]], align 4 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = add i32 [[VEC_PHI]], [[TMP10]] +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = add i32 [[VEC_PHI1]], [[TMP11]] +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = add i32 [[VEC_PHI2]], [[TMP12]] +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = add i32 [[VEC_PHI3]], [[TMP13]] +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 1 +; CHECK-NOTF-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 1 +; CHECK-NOTF-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 1 +; CHECK-NOTF-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 1 +; CHECK-NOTF-NEXT: [[TMP22:%.*]] = load i32, ptr [[TMP18]], align 4 +; CHECK-NOTF-NEXT: [[TMP23:%.*]] = load i32, ptr [[TMP19]], align 4 +; CHECK-NOTF-NEXT: [[TMP24:%.*]] = load i32, ptr [[TMP20]], align 4 +; CHECK-NOTF-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP21]], align 4 +; CHECK-NOTF-NEXT: [[TMP26:%.*]] = add i32 [[TMP14]], [[TMP22]] +; CHECK-NOTF-NEXT: [[TMP27:%.*]] = add i32 [[TMP15]], [[TMP23]] +; CHECK-NOTF-NEXT: [[TMP28:%.*]] = add i32 [[TMP16]], [[TMP24]] +; CHECK-NOTF-NEXT: [[TMP29:%.*]] = add i32 [[TMP17]], [[TMP25]] +; CHECK-NOTF-NEXT: [[TMP30:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 2 +; CHECK-NOTF-NEXT: [[TMP31:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 2 +; CHECK-NOTF-NEXT: [[TMP32:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 2 +; CHECK-NOTF-NEXT: [[TMP33:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 2 +; CHECK-NOTF-NEXT: [[TMP34:%.*]] = load i32, ptr [[TMP30]], align 4 +; CHECK-NOTF-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP31]], align 4 +; CHECK-NOTF-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP32]], align 4 +; CHECK-NOTF-NEXT: [[TMP37:%.*]] = load i32, ptr [[TMP33]], align 4 +; CHECK-NOTF-NEXT: [[TMP38]] = add i32 [[TMP26]], [[TMP34]] +; CHECK-NOTF-NEXT: [[TMP39]] = add i32 [[TMP27]], [[TMP35]] +; CHECK-NOTF-NEXT: [[TMP40]] = add i32 [[TMP28]], [[TMP36]] +; CHECK-NOTF-NEXT: [[TMP41]] = add i32 [[TMP29]], [[TMP37]] +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP42:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP42]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[BIN_RDX:%.*]] = add i32 [[TMP39]], [[TMP38]] +; CHECK-NOTF-NEXT: [[BIN_RDX4:%.*]] = add i32 [[TMP40]], [[BIN_RDX]] +; CHECK-NOTF-NEXT: [[BIN_RDX5:%.*]] = add i32 [[TMP41]], [[BIN_RDX4]] +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[BIN_RDX5]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-NOTF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[LD1]] +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-NOTF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-NOTF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD1]], [[LD3]] +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK-NOTF: [[EXIT_LOOPEXIT]]: +; CHECK-NOTF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ], [ [[BIN_RDX5]], %[[MIDDLE_BLOCK]] ] +; CHECK-NOTF-NEXT: br label %[[EXIT]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: [[RDX_FINAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NOTF-NEXT: ret i32 [[RDX_FINAL]] +; +; CHECK-TF-LABEL: define i32 @load_factor_4_with_tail_gap_reverse( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*]]: +; CHECK-TF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-TF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-TF: [[LOOP_PREHEADER]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-TF-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ADD:%.*]] = add nsw i32 [[RDX]], [[LD1]] +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-TF-NEXT: [[LD2:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ADD1:%.*]] = add nsw i32 [[ADD]], [[LD2]] +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-TF-NEXT: [[LD3:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[RDX_NEXT]] = add nsw i32 [[ADD1]], [[LD3]] +; CHECK-TF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT_LOOPEXIT]]: +; CHECK-TF-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi i32 [ [[RDX_NEXT]], %[[LOOP]] ] +; CHECK-TF-NEXT: br label %[[EXIT]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: [[RDX_FINAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[RDX_NEXT_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-TF-NEXT: ret i32 [[RDX_FINAL]] +; +entry: + %cmp = icmp sgt i64 %n, -1 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ] + %rdx = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 0 + %ld1 = load i32, ptr %arrayidx, align 4 + %add = add nsw i32 %rdx, %ld1 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 1 + %ld2 = load i32, ptr %arrayidx1, align 4 + %add1 = add nsw i32 %add, %ld2 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 2 + %ld3 = load i32, ptr %arrayidx2, align 4 + %rdx.next = add nsw i32 %add1, %ld3 + %iv.next = add nsw i64 %iv, -1 + %exitcond.not = icmp eq i64 %iv, 0 + br i1 %exitcond.not, label %exit, label %loop + +exit: + %rdx.final = phi i32 [ 0, %entry ], [ %rdx.next, %loop ] + ret i32 %rdx.final +} + +; Interleaved group with tail gap, reversed +; E.g. +; int (*a)[4]; +; int rdx = 0; +; for (int i = n; i >= 0; i--) { +; a[i][0] = i; +; a[i][1] = i; +; a[i][2] = i; +; // No access a[i][3] +; } +; +define void @store_factor_4_with_tail_gap_reverse(i64 %n, ptr noalias %a) { +; CHECK-NOTF-LABEL: define void @store_factor_4_with_tail_gap_reverse( +; CHECK-NOTF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-NOTF-NEXT: [[ENTRY:.*:]] +; CHECK-NOTF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-NOTF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-NOTF: [[LOOP_PREHEADER]]: +; CHECK-NOTF-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NOTF-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK-NOTF: [[VECTOR_PH]]: +; CHECK-NOTF-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NOTF-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NOTF-NEXT: [[TMP1:%.*]] = sub i64 [[N]], [[N_VEC]] +; CHECK-NOTF-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK-NOTF: [[VECTOR_BODY]]: +; CHECK-NOTF-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NOTF-NEXT: [[TMP2:%.*]] = sub i64 [[N]], [[INDEX]] +; CHECK-NOTF-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], -1 +; CHECK-NOTF-NEXT: [[TMP4:%.*]] = add i64 [[TMP2]], -2 +; CHECK-NOTF-NEXT: [[TMP5:%.*]] = add i64 [[TMP2]], -3 +; CHECK-NOTF-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP2]] to i32 +; CHECK-NOTF-NEXT: [[TMP7:%.*]] = trunc i64 [[TMP3]] to i32 +; CHECK-NOTF-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP4]] to i32 +; CHECK-NOTF-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32 +; CHECK-NOTF-NEXT: [[TMP10:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 0 +; CHECK-NOTF-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 0 +; CHECK-NOTF-NEXT: [[TMP12:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 0 +; CHECK-NOTF-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP10]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP11]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP12]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP13]], align 4 +; CHECK-NOTF-NEXT: [[TMP14:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 1 +; CHECK-NOTF-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 1 +; CHECK-NOTF-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 1 +; CHECK-NOTF-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP14]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP15]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP16]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP17]], align 4 +; CHECK-NOTF-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP2]], i32 2 +; CHECK-NOTF-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP3]], i32 2 +; CHECK-NOTF-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP4]], i32 2 +; CHECK-NOTF-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[TMP5]], i32 2 +; CHECK-NOTF-NEXT: store i32 [[TMP6]], ptr [[TMP18]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP7]], ptr [[TMP19]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP8]], ptr [[TMP20]], align 4 +; CHECK-NOTF-NEXT: store i32 [[TMP9]], ptr [[TMP21]], align 4 +; CHECK-NOTF-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NOTF-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] +; CHECK-NOTF: [[MIDDLE_BLOCK]]: +; CHECK-NOTF-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NOTF-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] +; CHECK-NOTF: [[SCALAR_PH]]: +; CHECK-NOTF-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP1]], %[[MIDDLE_BLOCK]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-NOTF-NEXT: br label %[[LOOP:.*]] +; CHECK-NOTF: [[LOOP]]: +; CHECK-NOTF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] +; CHECK-NOTF-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-NOTF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX1]], align 4 +; CHECK-NOTF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-NOTF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NOTF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-NOTF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-NOTF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]] +; CHECK-NOTF: [[EXIT_LOOPEXIT]]: +; CHECK-NOTF-NEXT: br label %[[EXIT]] +; CHECK-NOTF: [[EXIT]]: +; CHECK-NOTF-NEXT: ret void +; +; CHECK-TF-LABEL: define void @store_factor_4_with_tail_gap_reverse( +; CHECK-TF-SAME: i64 [[N:%.*]], ptr noalias [[A:%.*]]) #[[ATTR2]] { +; CHECK-TF-NEXT: [[ENTRY:.*:]] +; CHECK-TF-NEXT: [[CMP:%.*]] = icmp sgt i64 [[N]], -1 +; CHECK-TF-NEXT: br i1 [[CMP]], label %[[LOOP_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK-TF: [[LOOP_PREHEADER]]: +; CHECK-TF-NEXT: br label %[[LOOP:.*]] +; CHECK-TF: [[LOOP]]: +; CHECK-TF-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[N]], %[[LOOP_PREHEADER]] ] +; CHECK-TF-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i32 +; CHECK-TF-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 0 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 1 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX1]], align 4 +; CHECK-TF-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x i32], ptr [[A]], i64 [[IV]], i32 2 +; CHECK-TF-NEXT: store i32 [[IV_TRUNC]], ptr [[ARRAYIDX2]], align 4 +; CHECK-TF-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1 +; CHECK-TF-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 0 +; CHECK-TF-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT:.*]], label %[[LOOP]] +; CHECK-TF: [[EXIT_LOOPEXIT]]: +; CHECK-TF-NEXT: br label %[[EXIT]] +; CHECK-TF: [[EXIT]]: +; CHECK-TF-NEXT: ret void +; +entry: + %cmp = icmp sgt i64 %n, -1 + br i1 %cmp, label %loop, label %exit + +loop: + %iv = phi i64 [ %n, %entry ], [ %iv.next, %loop ] + %iv.trunc = trunc i64 %iv to i32 + %arrayidx = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 0 + store i32 %iv.trunc, ptr %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 1 + store i32 %iv.trunc, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds [4 x i32], ptr %a, i64 %iv, i32 2 + store i32 %iv.trunc, ptr %arrayidx2, align 4 + %iv.next = add nsw i64 %iv, -1 + %exitcond.not = icmp eq i64 %iv, 0 + br i1 %exitcond.not, label %exit, label %loop + +exit: + ret void +} + attributes #0 = { "target-cpu"="neoverse-512tvb" } From 0c5761dc749630c82c701a30197b3f8870ccf5bc Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Tue, 23 Jun 2026 07:13:46 -0700 Subject: [PATCH 193/511] [clang] Avoid per-builtin std::string allocation in initializeBuiltins (#205162) `initializeBuiltins()` previously registered every builtin through an allocated std::string every time a `CompilerInstance` initialized. This was hot for module-heavy builds, where each built module re-registered the full set. Add `getBuiltinNameInto()`, which writes the name into a caller-provided buffer and returns a `StringRef`, with no allocation when the shard has no prefix (the common case). `Info::getName` now delegates to it so the two cannot diverge. This change is output behavior-preserving. Resolves: rdar://178672190 --- clang/lib/Basic/Builtins.cpp | 23 ++++++++++++++++++---- clang/test/AST/builtin-name-registration.c | 12 +++++++++++ 2 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 clang/test/AST/builtin-name-registration.c diff --git a/clang/lib/Basic/Builtins.cpp b/clang/lib/Basic/Builtins.cpp index 49517fc748112..06fba04eb2df3 100644 --- a/clang/lib/Basic/Builtins.cpp +++ b/clang/lib/Basic/Builtins.cpp @@ -15,6 +15,7 @@ #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TargetInfo.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" using namespace clang; @@ -71,8 +72,21 @@ Builtin::Context::getShardAndInfo(unsigned ID) const { llvm_unreachable("Invalid target builtin shard structure!"); } +/// Return a non-owning StringRef of the builtin's name, reconstructed into Buf. +static StringRef getBuiltinNameInto(const Builtin::InfosShard &Shard, + const Builtin::Info &BuiltinInfo, + SmallVectorImpl &Buf) { + StringRef Name = (*Shard.Strings)[BuiltinInfo.Offsets.Name]; + if (Shard.NamePrefix.empty()) + return Name; + Buf.assign(Shard.NamePrefix.begin(), Shard.NamePrefix.end()); + Buf.append(Name.begin(), Name.end()); + return StringRef(Buf.data(), Buf.size()); +} + std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const { - return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str(); + SmallString<256> Buf; + return getBuiltinNameInto(Shard, *this, Buf).str(); } /// Return the identifier name for the specified builtin, @@ -297,12 +311,13 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table, const LangOptions &LangOpts) { { unsigned ID = 0; + llvm::SmallString<256> NameBuf; // Step #1: mark all target-independent builtins with their ID's. for (const auto &Shard : BuiltinShards) for (const auto &I : Shard.Infos) { // If this is a real builtin (ID != 0) and is supported, add it. if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts)) - Table.get(I.getName(Shard)).setBuiltinID(ID); + Table.get(getBuiltinNameInto(Shard, I, NameBuf)).setBuiltinID(ID); ++ID; } assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!"); @@ -311,14 +326,14 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table, for (const auto &Shard : TargetShards) for (const auto &I : Shard.Infos) { if (builtinIsSupported(*Shard.Strings, I, LangOpts)) - Table.get(I.getName(Shard)).setBuiltinID(ID); + Table.get(getBuiltinNameInto(Shard, I, NameBuf)).setBuiltinID(ID); ++ID; } // Step #3: Register target-specific builtins for AuxTarget. for (const auto &Shard : AuxTargetShards) for (const auto &I : Shard.Infos) { - Table.get(I.getName(Shard)).setBuiltinID(ID); + Table.get(getBuiltinNameInto(Shard, I, NameBuf)).setBuiltinID(ID); ++ID; } } diff --git a/clang/test/AST/builtin-name-registration.c b/clang/test/AST/builtin-name-registration.c new file mode 100644 index 0000000000000..c5e81c665a280 --- /dev/null +++ b/clang/test/AST/builtin-name-registration.c @@ -0,0 +1,12 @@ +// Verify builtins register under their full name when formed with a prefix. +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -ast-dump %s | FileCheck %s + +void use(void) { + (void)__builtin_abs(-1); + __builtin_ia32_pause(); +} + +// CHECK: FunctionDecl {{.*}} implicit used __builtin_abs 'int (int)' +// CHECK: BuiltinAttr +// CHECK: FunctionDecl {{.*}} implicit used __builtin_ia32_pause 'void (void)' +// CHECK: BuiltinAttr From 989e35fcf7a0af3de6c93b417afda8f8e5325889 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Tue, 23 Jun 2026 23:21:28 +0900 Subject: [PATCH 194/511] [SYCL][E2E] Disable memory_fill.cpp on DG2 (#22386) Failing on DG2, see [here](https://github.com/intel/llvm/issues/21556#issuecomment-4768918545). Signed-off-by: Nick Sarnie --- sycl/test-e2e/Adapters/level_zero/memory_fill.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/Adapters/level_zero/memory_fill.cpp b/sycl/test-e2e/Adapters/level_zero/memory_fill.cpp index d8c69005fca66..f866cfe10f3ab 100644 --- a/sycl/test-e2e/Adapters/level_zero/memory_fill.cpp +++ b/sycl/test-e2e/Adapters/level_zero/memory_fill.cpp @@ -2,7 +2,7 @@ // UNSUPPORTED: level_zero_v2_adapter // UNSUPPORTED-INTENDED: v2 adapter does not allow specifying command queue. -// UNSUPPORTED: windows && gpu-intel-gen12 +// UNSUPPORTED: windows && (gpu-intel-gen12 || gpu-intel-dg2) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21556 // RUN: %{build} %level_zero_options -o %t.out From c258dce071d39961b36954725bc3022fe13548eb Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Tue, 23 Jun 2026 09:32:00 -0500 Subject: [PATCH 195/511] [Hexagon] Add KCFI support for forward-edge control flow integrity (#191746) Add KCFI support for Hexagon. KCFI provides lightweight forward-edge CFI for indirect calls by embedding a type hash before each function and checking it before indirect calls, without requiring LTO. --- llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp | 109 ++++++++++ llvm/lib/Target/Hexagon/HexagonAsmPrinter.h | 3 + .../Target/Hexagon/HexagonISelLowering.cpp | 27 +++ llvm/lib/Target/Hexagon/HexagonISelLowering.h | 6 + .../lib/Target/Hexagon/HexagonMCInstLower.cpp | 4 + llvm/lib/Target/Hexagon/HexagonPseudo.td | 10 + .../Target/Hexagon/HexagonTargetMachine.cpp | 4 + llvm/test/CodeGen/Hexagon/kcfi.ll | 191 ++++++++++++++++++ 8 files changed, 354 insertions(+) create mode 100644 llvm/test/CodeGen/Hexagon/kcfi.ll diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 2a1427b38142c..61ca9020bb689 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -976,6 +976,115 @@ void HexagonAsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, Typed ? SledKind::TYPED_EVENT : SledKind::CUSTOM_EVENT, 2); } +void HexagonAsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { + Register AddrReg = MI.getOperand(0).getReg(); + const int64_t Type = MI.getOperand(1).getImm(); + MachineBasicBlock::const_instr_iterator NextI = std::next(MI.getIterator()); + assert(NextI != MI.getParent()->instr_end() && NextI->isCall() && + "KCFI_CHECK not followed by a call instruction"); + assert(NextI->getOperand(0).getReg() == AddrReg && + "KCFI_CHECK call target doesn't match call operand"); + + // Scratch registers for the compare. Default to R6/R7 (caller-saved, + // in GeneralSubRegs for potential compounding). If AddrReg conflicts, + // fall back through other caller-saved registers. + unsigned ScratchRegs[] = {Hexagon::R6, Hexagon::R7}; + unsigned NextReg = Hexagon::R8; + for (auto &Reg : ScratchRegs) { + if (Reg != AddrReg) + continue; + if (NextReg == AddrReg) + ++NextReg; + Reg = NextReg++; + } + unsigned LoadReg = ScratchRegs[0]; + unsigned TypeReg = ScratchRegs[1]; + unsigned PredReg = Hexagon::P0; + + // Adjust for patchable-function-prefix (nop padding before the function). + int64_t PrefixNops = MI.getMF()->getFunction().getFnAttributeAsParsedInteger( + "patchable-function-prefix"); + int64_t Offset = -(PrefixNops * 4 + 4); + + // Emit the KCFI check sequence. + // + // Packet 1: Load the type hash and materialize the expected hash together. + // The load offset fits in the native instruction field for any + // patchable-function-prefix count, so it never requires a constant + // extender. This lets the extender for ##hash share the same packet, + // saving one packet compared to emitting them separately. + // { r_load = memw(r_addr + #offset); r_type = ##expected_hash } + MCInst *LoadInst = OutContext.createMCInst(); + LoadInst->setOpcode(Hexagon::L2_loadri_io); + LoadInst->addOperand(MCOperand::createReg(LoadReg)); + LoadInst->addOperand(MCOperand::createReg(AddrReg)); + LoadInst->addOperand(MCOperand::createExpr(HexagonMCExpr::create( + MCConstantExpr::create(Offset, OutContext), OutContext))); + + MCInst *TypeInst = OutContext.createMCInst(); + TypeInst->setOpcode(Hexagon::A2_tfrsi); + TypeInst->addOperand(MCOperand::createReg(TypeReg)); + auto *TypeExpr = HexagonMCExpr::create( + MCConstantExpr::create(Type, OutContext), OutContext); + HexagonMCInstrInfo::setMustExtend(*TypeExpr, true); + TypeInst->addOperand(MCOperand::createExpr(TypeExpr)); + + MCInst LoadTypePacket; + LoadTypePacket.setOpcode(Hexagon::BUNDLE); + LoadTypePacket.addOperand(MCOperand::createImm(0)); + LoadTypePacket.addOperand(MCOperand::createInst(LoadInst)); + LoadTypePacket.addOperand(MCOperand::createInst(TypeInst)); + EmitToStreamer(*OutStreamer, LoadTypePacket); + + // Packet 3: Compare and branch if equal. + // { p0 = cmp.eq(r_load, r_type); if (p0.new) jump:t .Lpass } + MCSymbol *Pass = OutContext.createTempSymbol(); + + MCInst *CmpInst = OutContext.createMCInst(); + CmpInst->setOpcode(Hexagon::C2_cmpeq); + CmpInst->addOperand(MCOperand::createReg(PredReg)); + CmpInst->addOperand(MCOperand::createReg(LoadReg)); + CmpInst->addOperand(MCOperand::createReg(TypeReg)); + + MCInst *JumpInst = OutContext.createMCInst(); + JumpInst->setOpcode(Hexagon::J2_jumptnewpt); + JumpInst->addOperand(MCOperand::createReg(PredReg)); + JumpInst->addOperand(MCOperand::createExpr(HexagonMCExpr::create( + MCSymbolRefExpr::create(Pass, OutContext), OutContext))); + + MCInst CmpJmpPacket; + CmpJmpPacket.setOpcode(Hexagon::BUNDLE); + CmpJmpPacket.addOperand(MCOperand::createImm(0)); + CmpJmpPacket.addOperand(MCOperand::createInst(CmpInst)); + CmpJmpPacket.addOperand(MCOperand::createInst(JumpInst)); + EmitToStreamer(*OutStreamer, CmpJmpPacket); + + // Packet 4: Crash on mismatch via misaligned load. + // Use the same mechanism as llvm.trap (PS_crash): a doubleword load from + // a misaligned address is guaranteed to fault in all execution modes, + // including kernel/monitor mode where trap0 may not generate a useful + // exception. + MCSymbol *TrapLabel = OutContext.createTempSymbol(); + OutStreamer->emitLabel(TrapLabel); + + MCInst *CrashInst = OutContext.createMCInst(); + CrashInst->setOpcode(Hexagon::PS_loadrdabs); + CrashInst->addOperand(MCOperand::createReg(Hexagon::D13)); + auto *CrashExpr = HexagonMCExpr::create( + MCConstantExpr::create(0xBADC0FEE, OutContext), OutContext); + HexagonMCInstrInfo::setMustExtend(*CrashExpr, true); + CrashInst->addOperand(MCOperand::createExpr(CrashExpr)); + + MCInst CrashPacket; + CrashPacket.setOpcode(Hexagon::BUNDLE); + CrashPacket.addOperand(MCOperand::createImm(0)); + CrashPacket.addOperand(MCOperand::createInst(CrashInst)); + EmitToStreamer(*OutStreamer, CrashPacket); + + emitKCFITrapEntry(*MI.getMF(), TrapLabel); + OutStreamer->emitLabel(Pass); +} + void HexagonAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind) { static const int8_t NoopsInSledCount = 6; // We want to emit the following pattern: diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h index 83621142ca5b5..1c71a2739bd49 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h @@ -68,6 +68,9 @@ class TargetMachine; void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, bool Typed); void EmitSled(const MachineInstr &MI, SledKind Kind); + // KCFI check lowering. + void LowerKCFI_CHECK(const MachineInstr &MI); + void HexagonProcessInstruction(MCInst &Inst, const MachineInstr &MBB); void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O); diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp index b369bdff477d6..acafd754969d7 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -644,6 +644,8 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL; Chain = DAG.getNode(OpCode, dl, {MVT::Other, MVT::Glue}, Ops); + if (CLI.CFIType) + Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue()); Glue = Chain.getValue(1); // Create the CALLSEQ_END node. @@ -3971,6 +3973,31 @@ MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter( } } +MachineInstr * +HexagonTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const { + assert(MBBI->isCall() && MBBI->getCFIType() && + "Invalid call instruction for a KCFI check"); + + switch (MBBI->getOpcode()) { + case Hexagon::J2_callr: + case Hexagon::PS_callr_nr: + break; + default: + llvm_unreachable("Unexpected CFI call opcode"); + } + + MachineOperand &Target = MBBI->getOperand(0); + assert(Target.isReg() && "Invalid target operand for an indirect call"); + Target.setIsRenamable(false); + + return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Hexagon::KCFI_CHECK)) + .addReg(Target.getReg()) + .addImm(MBBI->getCFIType()) + .getInstr(); +} + bool HexagonTargetLowering::isMaskAndCmp0FoldingBeneficial( const Instruction &AndI) const { // Only sink 'and' mask to cmp use block if it is masking a single bit since diff --git a/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/llvm/lib/Target/Hexagon/HexagonISelLowering.h index 00152d7615769..87041e696f0f4 100644 --- a/llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ b/llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -291,6 +291,12 @@ class HexagonTargetLowering : public TargetLowering { EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override; + bool supportKCFIBundles() const override { return true; } + + MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB, + MachineBasicBlock::instr_iterator &MBBI, + const TargetInstrInfo *TII) const override; + private: void initializeHVXLowering(); unsigned getPreferredHvxVectorAction(MVT VecTy) const; diff --git a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp index c592a55615859..668c5e436e5ff 100644 --- a/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp +++ b/llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp @@ -123,6 +123,10 @@ void llvm::HexagonLowerToMC(const MCInstrInfo &MCII, const MachineInstr *MI, AP.LowerPATCHABLE_EVENT_CALL(*MI, true); return; } + if (MI->getOpcode() == Hexagon::KCFI_CHECK) { + AP.LowerKCFI_CHECK(*MI); + return; + } MCInst *MCI = AP.OutContext.createMCInst(); MCI->setOpcode(MI->getOpcode()); diff --git a/llvm/lib/Target/Hexagon/HexagonPseudo.td b/llvm/lib/Target/Hexagon/HexagonPseudo.td index 4e5e8c3d26f00..d75a5c0e51ce8 100644 --- a/llvm/lib/Target/Hexagon/HexagonPseudo.td +++ b/llvm/lib/Target/Hexagon/HexagonPseudo.td @@ -621,6 +621,16 @@ defm PS_storerd : NewCircularStore; let hasSideEffects = 1, isPseudo = 1, isCodeGenOnly = 1, isSolo = 1 in def PS_crash: InstHexagon<(outs), (ins), "", [], "", PSEUDO, TypePSEUDO>; +// KCFI type check pseudo -- lowered in the AsmPrinter to a +// load-compare-trap sequence before indirect calls. On mismatch, a +// misaligned load faults. +// Defs: R6/R7 default scratch (R8 fallback if AddrReg conflicts), P0 for +// compare, D13 for the crash load. +let hasSideEffects = 1, mayLoad = 1, isPseudo = 1, isCodeGenOnly = 1, + Defs = [R6, R7, R8, P0, D13], Size = 28 in +def KCFI_CHECK : InstHexagon<(outs), (ins IntRegs:$ptr, i32imm:$type), + "", [], "", PSEUDO, TypePSEUDO>; + // This is actual trap1 instruction from before v65. It's here since it is // no longer included in DepInstrInfo.td. def PS_trap1 : HInst<(outs), (ins u8_0Imm:$Ii), "trap1(#$Ii)", tc_53c851ab, diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 5c72b6cb20883..5d835c9c6e71c 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -508,6 +508,10 @@ void HexagonPassConfig::addPreEmitPass() { addPass(&HexagonLiveVariablesID); } + // Emit KCFI checks for indirect calls. Must run before packetization so + // the check and call can be bundled together into a VLIW packet. + addPass(createKCFIPass()); + // Packetization is mandatory: it handles gather/scatter at all opt levels. addPass(createHexagonPacketizer(NoOpt)); diff --git a/llvm/test/CodeGen/Hexagon/kcfi.ll b/llvm/test/CodeGen/Hexagon/kcfi.ll new file mode 100644 index 0000000000000..7fd2ee4a9e16d --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/kcfi.ll @@ -0,0 +1,191 @@ +; RUN: llc -mtriple=hexagon -verify-machineinstrs < %s | FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=hexagon -verify-machineinstrs -stop-after=finalize-isel < %s \ +; RUN: | FileCheck %s --check-prefix=ISEL +; RUN: llc -mtriple=hexagon -verify-machineinstrs -stop-after=kcfi < %s \ +; RUN: | FileCheck %s --check-prefix=KCFI + +; Verify KCFI type hash is emitted before the function. +; ASM: .word 12345678 +; ASM-LABEL: f1: + +define void @f1(ptr noundef %x) !kcfi_type !1 { +; Load and type-hash materialization are combined in one packet. +; ASM: r{{[0-9]+}} = memw(r0+#-4) +; ASM-NEXT: r{{[0-9]+}} = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + +; After ISel, the call should carry a cfi-type. +; ISEL-LABEL: name: f1 +; ISEL: J2_callr %0,{{.*}} cfi-type 12345678 + +; After the KCFI pass, the check and call are bundled. +; KCFI-LABEL: name: f1 +; KCFI: BUNDLE{{.*}} { +; KCFI-NEXT: KCFI_CHECK $r0, 12345678 +; KCFI-NEXT: J2_callr killed $r0 +; KCFI-NEXT: } + + call void %x() [ "kcfi"(i32 12345678) ] + ret void +} + +; Test with a second call using a different type hash. +define void @f2(ptr noundef %x) !kcfi_type !2 { +; ASM-LABEL: f2: +; ASM: r{{[0-9]+}} = memw(r0+#-4) +; ASM-NEXT: r{{[0-9]+}} = ##1234 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + + call void %x() [ "kcfi"(i32 1234) ] + ret void +} + +; Test with patchable-function-entry (nops placed after the label, +; so the KCFI offset is still -4). +define void @f3(ptr noundef %x) #0 { +; ASM-LABEL: f3: +; ASM: nop +; ASM: nop +; ASM: r{{[0-9]+}} = memw(r0+#-4) +; ASM-NEXT: r{{[0-9]+}} = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + + call void %x() [ "kcfi"(i32 12345678) ] + ret void +} + +;; Test patchable-function-prefix: nops are placed before the function entry +;; (after the type hash), so the KCFI load offset is adjusted from -4 to +;; -(PrefixNops*4 + 4). +define void @f4_prefix(ptr noundef %x) #1 !kcfi_type !1 { +; ASM-LABEL: f4_prefix: +; ASM: r{{[0-9]+}} = memw(r0+#-12) +; ASM-NEXT: r{{[0-9]+}} = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + + call void %x() [ "kcfi"(i32 12345678) ] + ret void +} + +;; Test patchable-function-prefix with 3 nops: offset = -(3*4+4) = -16. +define void @f5_prefix3(ptr noundef %x) #2 !kcfi_type !1 { +; ASM-LABEL: f5_prefix3: +; ASM: r{{[0-9]+}} = memw(r0+#-16) +; ASM-NEXT: r{{[0-9]+}} = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + + call void %x() [ "kcfi"(i32 12345678) ] + ret void +} + +;; Test scratch register conflict: call target is R6. The default scratch +;; registers are R6/R7, so when the target occupies R6, the load scratch +;; must use R8 instead. +define void @f6_target_r6() { +; ASM-LABEL: f6_target_r6: +; ASM: r8 = memw(r6+#-4) +; ASM-NEXT: r7 = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r8,r7) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + +; KCFI-LABEL: name: f6_target_r6 +; KCFI: BUNDLE{{.*}} { +; KCFI-NEXT: KCFI_CHECK $r6, 12345678 +; KCFI-NEXT: J2_callr{{.*}}killed $r6 +; KCFI-NEXT: } + + %target = call ptr asm sideeffect "", "={r6}"() + call void %target() [ "kcfi"(i32 12345678) ] + ret void +} + +;; Test scratch register conflict: call target is R7. The type-hash scratch +;; must use R8 instead. +define void @f7_target_r7() { +; ASM-LABEL: f7_target_r7: +; ASM: r6 = memw(r7+#-4) +; ASM-NEXT: r8 = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r6,r8) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + +; KCFI-LABEL: name: f7_target_r7 +; KCFI: BUNDLE{{.*}} { +; KCFI-NEXT: KCFI_CHECK $r7, 12345678 +; KCFI-NEXT: J2_callr{{.*}}killed $r7 +; KCFI-NEXT: } + + %target = call ptr asm sideeffect "", "={r7}"() + call void %target() [ "kcfi"(i32 12345678) ] + ret void +} + +;; Test noreturn indirect call with KCFI (uses PS_callr_nr opcode). +define void @f8_noreturn(ptr noundef %x) { +; ASM-LABEL: f8_noreturn: +; ASM: r{{[0-9]+}} = memw(r0+#-4) +; ASM-NEXT: r{{[0-9]+}} = ##12345678 +; ASM-NEXT: } +; ASM-NEXT: { +; ASM-NEXT: p0 = cmp.eq(r{{[0-9]+}},r{{[0-9]+}}) +; ASM-NEXT: if (p0.new) jump:t +; ASM-NEXT: } +; ASM: r{{[0-9]+}}:{{[0-9]+}} = memd(##3134984174) + +; ISEL-LABEL: name: f8_noreturn +; ISEL: PS_callr_nr %0,{{.*}} cfi-type 12345678 + +; KCFI-LABEL: name: f8_noreturn +; KCFI: BUNDLE{{.*}} { +; KCFI-NEXT: KCFI_CHECK $r0, 12345678 +; KCFI-NEXT: PS_callr_nr killed $r0 +; KCFI-NEXT: } + + call void %x() #3 [ "kcfi"(i32 12345678) ] + unreachable +} + +; Verify the .kcfi_traps section is emitted. +; ASM: .section .kcfi_traps + +attributes #0 = { "patchable-function-entry"="2" } +attributes #1 = { "patchable-function-prefix"="2" } +attributes #2 = { "patchable-function-prefix"="3" } +attributes #3 = { noreturn } + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"kcfi", i32 1} +!1 = !{i32 12345678} +!2 = !{i32 1234} From bf47896175ca9dafc705e9c69fbd3aa4cb177196 Mon Sep 17 00:00:00 2001 From: Chirag Wattamwar <119049661+ChiragSW@users.noreply.github.com> Date: Tue, 23 Jun 2026 20:08:19 +0530 Subject: [PATCH 196/511] [mlir][arith] Reject signful integer element types in `arith.constant` (#204937) Update arith.constant verification to reject integer constants with signed and unsigned element types including shaped constants like tensors and vectors, as the arith dialect does not support signed/unsigned types. This incidentally address cases where further lowering would crash (e.g. SPIR-V constant lowering used IntegerAttr::getInt() on an unsigned integer attribute from tensor<2xui8>) Fixes #204911 --- mlir/include/mlir/Dialect/Arith/IR/ArithOps.td | 8 ++------ mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 4 ++-- mlir/test/Dialect/Arith/invalid.mlir | 16 ++++++++++++++++ .../Tosa/tosa-arith-const-to-tosa-const.mlir | 8 -------- mlir/test/Dialect/common_folders.mlir | 12 ++++++------ mlir/test/lib/Dialect/Test/TestOps.td | 4 ++-- 6 files changed, 28 insertions(+), 24 deletions(-) diff --git a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td index 1f8b07aed3f0d..423948c8734af 100644 --- a/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td +++ b/mlir/include/mlir/Dialect/Arith/IR/ArithOps.td @@ -244,12 +244,8 @@ def Arith_ConstantOp : Op(type) && - !llvm::cast(type).isSignless()) + if (auto intType = dyn_cast(getElementTypeOrSelf(type)); + intType && !intType.isSignless()) return emitOpError("integer return type must be signless"); // Any float or elements attribute are acceptable. if (!llvm::isa(getValue())) { diff --git a/mlir/test/Dialect/Arith/invalid.mlir b/mlir/test/Dialect/Arith/invalid.mlir index 421dac9cfee15..49f55e855663d 100644 --- a/mlir/test/Dialect/Arith/invalid.mlir +++ b/mlir/test/Dialect/Arith/invalid.mlir @@ -32,6 +32,14 @@ func.func @non_signless_constant() { // ----- +func.func @non_signless_tensor_constant() { + // expected-error @+1 {{'arith.constant' op integer return type must be signless}} + %0 = arith.constant dense<[10, 20]> : tensor<2xui8> + return +} + +// ----- + func.func @complex_constant_wrong_attribute_type() { // expected-error @+1 {{'arith.constant' op failed to verify that all of {value, result} have same type}} %0 = "arith.constant" () {value = 1.0 : f32} : () -> complex @@ -48,6 +56,14 @@ func.func @non_signless_constant() { // ----- +func.func @non_signless_vector_constant() { + // expected-error @+1 {{'arith.constant' op integer return type must be signless}} + %0 = arith.constant dense<[0, 1]> : vector<2xsi32> + return +} + +// ----- + func.func @bitcast_different_bit_widths(%arg : f16) -> f32 { // expected-error@+1 {{are cast incompatible}} %res = arith.bitcast %arg : f16 to f32 diff --git a/mlir/test/Dialect/Tosa/tosa-arith-const-to-tosa-const.mlir b/mlir/test/Dialect/Tosa/tosa-arith-const-to-tosa-const.mlir index fc2d77ef375ec..79219510a239f 100644 --- a/mlir/test/Dialect/Tosa/tosa-arith-const-to-tosa-const.mlir +++ b/mlir/test/Dialect/Tosa/tosa-arith-const-to-tosa-const.mlir @@ -65,14 +65,6 @@ func.func @rewrite_resource_tensor() -> tensor<4xf32> { // ----- -// CHECK-LABEL: func.func @rewrite_quant_tensor -// CHECK: %[[CST:.*]] = "tosa.const"() <{values = dense<[10, 20]> : tensor<2xui8>}> : () -> tensor<2xui8> -func.func @rewrite_quant_tensor() -> tensor<2xui8> { - %c = arith.constant dense<[10, 20]> : tensor<2xui8> - return %c : tensor<2xui8> -} - -// ----- // CHECK-LABEL: func.func @rewrite_quant_uniform_tensor // CHECK: %[[CST:.*]] = "tosa.const"() <{values = dense<["10", "20"]> : tensor<2x!quant.uniform>}> : () -> tensor<2x!quant.uniform> diff --git a/mlir/test/Dialect/common_folders.mlir b/mlir/test/Dialect/common_folders.mlir index 92598b4937552..9e38c1479e2e7 100644 --- a/mlir/test/Dialect/common_folders.mlir +++ b/mlir/test/Dialect/common_folders.mlir @@ -1,12 +1,12 @@ // RUN: mlir-opt %s --test-fold-type-converting-op --split-input-file | FileCheck %s -// CHECK-LABEL: @test_fold_unary_op_f32_to_si32( -func.func @test_fold_unary_op_f32_to_si32() -> tensor<4x2xsi32> { - // CHECK-NEXT: %[[POSITIVE_ONE:.*]] = arith.constant dense<1> : tensor<4x2xsi32> - // CHECK-NEXT: return %[[POSITIVE_ONE]] : tensor<4x2xsi32> +// CHECK-LABEL: @test_fold_unary_op_f32_to_i32( +func.func @test_fold_unary_op_f32_to_i32() -> tensor<4x2xi32> { + // CHECK-NEXT: %[[POSITIVE_ONE:.*]] = arith.constant dense<1> : tensor<4x2xi32> + // CHECK-NEXT: return %[[POSITIVE_ONE]] : tensor<4x2xi32> %operand = arith.constant dense<5.1> : tensor<4x2xf32> - %sign = test.sign %operand : (tensor<4x2xf32>) -> tensor<4x2xsi32> - return %sign : tensor<4x2xsi32> + %sign = test.sign %operand : (tensor<4x2xf32>) -> tensor<4x2xi32> + return %sign : tensor<4x2xi32> } // ----- diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td index df321bd5feabc..31c19487075d8 100644 --- a/mlir/test/lib/Dialect/Test/TestOps.td +++ b/mlir/test/lib/Dialect/Test/TestOps.td @@ -1358,10 +1358,10 @@ def OpQ : TEST_Op<"op_q"> { let results = (outs AnyType); } -// Test constant-folding a pattern that maps `(F32) -> SI32`. +// Test constant-folding a pattern that maps `(F32) -> I32`. def SignOp : TEST_Op<"sign", [SameOperandsAndResultShape]> { let arguments = (ins RankedTensorOf<[F32]>:$operand); - let results = (outs RankedTensorOf<[SI32]>:$result); + let results = (outs RankedTensorOf<[I32]>:$result); let assemblyFormat = [{ $operand attr-dict `:` functional-type(operands, results) From b0b1e0b831ffaeffcbc6c8ec151f2a1fec3e7e31 Mon Sep 17 00:00:00 2001 From: Jeff Bailey Date: Tue, 23 Jun 2026 15:59:59 +0100 Subject: [PATCH 197/511] Reland "[libc] Implement basename and dirname in libgen.h #204554" (#205352) Added the POSIX standard functions basename and dirname under a new libgen.h header. The implementations modify the input path in-place using cpp::string_view to determine boundaries safely. Added find_last_not_of to cpp::string_view to support trailing slash removal. Implemented: libc/include/libgen.yaml, libgen.h.def: Public API definitions. libc/src/libgen/basename.cpp, dirname.cpp: Generic implementations. libc/test/src/libgen/: Unit and hermetic tests. Registered the new entrypoints for all active Linux targets (x86_64, aarch64, arm, riscv) and added docgen configuration. The tests are skipped when using ASan because death tests do not currently work with them. Assisted-by: Automated tooling, human reviewed. --- libc/config/linux/aarch64/entrypoints.txt | 4 ++ libc/config/linux/arm/entrypoints.txt | 4 ++ libc/config/linux/riscv/entrypoints.txt | 4 ++ libc/config/linux/x86_64/entrypoints.txt | 4 ++ libc/docs/CMakeLists.txt | 1 + libc/docs/headers/index.rst | 1 + libc/include/CMakeLists.txt | 8 +++ libc/include/libgen.yaml | 20 ++++++ libc/src/CMakeLists.txt | 1 + libc/src/__support/CPP/string_view.h | 9 +++ libc/src/libgen/CMakeLists.txt | 23 ++++++ libc/src/libgen/basename.cpp | 42 +++++++++++ libc/src/libgen/basename.h | 30 ++++++++ libc/src/libgen/dirname.cpp | 48 +++++++++++++ libc/src/libgen/dirname.h | 30 ++++++++ libc/test/src/CMakeLists.txt | 1 + libc/test/src/libgen/CMakeLists.txt | 45 ++++++++++++ libc/test/src/libgen/basename_death_test.cpp | 22 ++++++ libc/test/src/libgen/basename_test.cpp | 62 ++++++++++++++++ libc/test/src/libgen/dirname_death_test.cpp | 22 ++++++ libc/test/src/libgen/dirname_test.cpp | 74 ++++++++++++++++++++ libc/utils/docgen/libgen.yaml | 5 ++ 22 files changed, 460 insertions(+) create mode 100644 libc/include/libgen.yaml create mode 100644 libc/src/libgen/CMakeLists.txt create mode 100644 libc/src/libgen/basename.cpp create mode 100644 libc/src/libgen/basename.h create mode 100644 libc/src/libgen/dirname.cpp create mode 100644 libc/src/libgen/dirname.h create mode 100644 libc/test/src/libgen/CMakeLists.txt create mode 100644 libc/test/src/libgen/basename_death_test.cpp create mode 100644 libc/test/src/libgen/basename_test.cpp create mode 100644 libc/test/src/libgen/dirname_death_test.cpp create mode 100644 libc/test/src/libgen/dirname_test.cpp create mode 100644 libc/utils/docgen/libgen.yaml diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 77bf17b666a14..251913fd57b99 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -109,6 +109,10 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.inttypes.strtoimax libc.src.inttypes.strtoumax + # libgen.h entrypoints + libc.src.libgen.basename + libc.src.libgen.dirname + # stdbit.h entrypoints libc.src.stdbit.stdc_bit_ceil_uc libc.src.stdbit.stdc_bit_ceil_ui diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index c4ac53c4925a3..805738a3a5756 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -73,6 +73,10 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.inttypes.strtoimax libc.src.inttypes.strtoumax + # libgen.h entrypoints + libc.src.libgen.basename + libc.src.libgen.dirname + # stdbit.h entrypoints libc.src.stdbit.stdc_bit_ceil_uc libc.src.stdbit.stdc_bit_ceil_ui diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index 5e73718c4fc63..a9839b44a0042 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -109,6 +109,10 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.inttypes.strtoimax libc.src.inttypes.strtoumax + # libgen.h entrypoints + libc.src.libgen.basename + libc.src.libgen.dirname + # stdbit.h entrypoints libc.src.stdbit.stdc_bit_ceil_uc libc.src.stdbit.stdc_bit_ceil_ui diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index eb7d4781936ee..037c8b50c352e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -125,6 +125,10 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.inttypes.wcstoimax libc.src.inttypes.wcstoumax + # libgen.h entrypoints + libc.src.libgen.basename + libc.src.libgen.dirname + # stdbit.h entrypoints libc.src.stdbit.stdc_bit_ceil_uc libc.src.stdbit.stdc_bit_ceil_ui diff --git a/libc/docs/CMakeLists.txt b/libc/docs/CMakeLists.txt index cf54edeae66de..ded99393f9390 100644 --- a/libc/docs/CMakeLists.txt +++ b/libc/docs/CMakeLists.txt @@ -54,6 +54,7 @@ if (SPHINX_FOUND) float glob inttypes + libgen locale nl_types net/if diff --git a/libc/docs/headers/index.rst b/libc/docs/headers/index.rst index e818e1549c0d1..893ddf839cccd 100644 --- a/libc/docs/headers/index.rst +++ b/libc/docs/headers/index.rst @@ -20,6 +20,7 @@ Implementation Status float glob inttypes + libgen locale math/index.rst net/if diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 549dbd9e4c3f8..e8168687109b0 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -130,6 +130,14 @@ add_header_macro( .llvm-libc-macros.float_macros ) +add_header_macro( + libgen + ../libc/include/libgen.yaml + libgen.h + DEPENDS + .llvm_libc_common_h +) + add_header_macro( limits ../libc/include/limits.yaml diff --git a/libc/include/libgen.yaml b/libc/include/libgen.yaml new file mode 100644 index 0000000000000..c79ab79259be6 --- /dev/null +++ b/libc/include/libgen.yaml @@ -0,0 +1,20 @@ +header: libgen.h +standards: + - posix +macros: [] +types: [] +enums: [] +objects: [] +functions: + - name: basename + standards: + - posix + return_type: char * + arguments: + - type: char * + - name: dirname + standards: + - posix + return_type: char * + arguments: + - type: char * diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt index 9db314f54723b..56085c9632f59 100644 --- a/libc/src/CMakeLists.txt +++ b/libc/src/CMakeLists.txt @@ -7,6 +7,7 @@ add_subdirectory(dlfcn) add_subdirectory(errno) add_subdirectory(fenv) add_subdirectory(inttypes) +add_subdirectory(libgen) add_subdirectory(link) add_subdirectory(math) add_subdirectory(netinet) diff --git a/libc/src/__support/CPP/string_view.h b/libc/src/__support/CPP/string_view.h index 6991fd46a4ace..7b98b7e5fb6c8 100644 --- a/libc/src/__support/CPP/string_view.h +++ b/libc/src/__support/CPP/string_view.h @@ -205,6 +205,15 @@ class string_view { return npos; } + LIBC_INLINE constexpr size_t find_last_not_of(const char c, + size_t end = npos) const { + end = end >= size() ? size() : end + 1; + for (; end > 0; --end) + if ((*this)[end - 1] != c) + return end - 1; + return npos; + } + // Finds the first character not equal to c in this view, starting at // position From. LIBC_INLINE constexpr size_t find_first_not_of(const char c, diff --git a/libc/src/libgen/CMakeLists.txt b/libc/src/libgen/CMakeLists.txt new file mode 100644 index 0000000000000..9c315a4e7b41d --- /dev/null +++ b/libc/src/libgen/CMakeLists.txt @@ -0,0 +1,23 @@ +add_entrypoint_object( + basename + SRCS + basename.cpp + HDRS + basename.h + DEPENDS + libc.src.__support.CPP.string_view + libc.src.__support.common + libc.src.__support.macros.config +) + +add_entrypoint_object( + dirname + SRCS + dirname.cpp + HDRS + dirname.h + DEPENDS + libc.src.__support.CPP.string_view + libc.src.__support.common + libc.src.__support.macros.config +) diff --git a/libc/src/libgen/basename.cpp b/libc/src/libgen/basename.cpp new file mode 100644 index 0000000000000..acf53f2d446e9 --- /dev/null +++ b/libc/src/libgen/basename.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of basename. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/basename.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(char *, basename, (char *path)) { + if (path == nullptr || path[0] == '\0') + return const_cast("."); + + cpp::string_view sv(path); + size_t last_non_slash = sv.find_last_not_of('/'); + + if (last_non_slash == cpp::string_view::npos) + return const_cast("/"); + + size_t last_slash = sv.substr(0, last_non_slash).find_last_of('/'); + + size_t start = (last_slash == cpp::string_view::npos) ? 0 : last_slash + 1; + size_t end = last_non_slash + 1; + + if (end < sv.size()) + path[end] = '\0'; + + return path + start; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/libgen/basename.h b/libc/src/libgen/basename.h new file mode 100644 index 0000000000000..15239888aa97a --- /dev/null +++ b/libc/src/libgen/basename.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Header for basename. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_LIBGEN_BASENAME_H +#define LLVM_LIBC_SRC_LIBGEN_BASENAME_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +/// Return the last component of a pathname. +/// +/// \param path Pointer to the null-terminated pathname string. +/// \return Pointer to the last component of path, or "." if path is null or +/// empty, or "/" if path is all slashes. +char *basename(char *path); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_LIBGEN_BASENAME_H diff --git a/libc/src/libgen/dirname.cpp b/libc/src/libgen/dirname.cpp new file mode 100644 index 0000000000000..9dd958b63ce9f --- /dev/null +++ b/libc/src/libgen/dirname.cpp @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Implementation of dirname. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/dirname.h" +#include "src/__support/CPP/string_view.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(char *, dirname, (char *path)) { + if (path == nullptr || path[0] == '\0') + return const_cast("."); + + cpp::string_view sv(path); + size_t last_non_slash = sv.find_last_not_of('/'); + + if (last_non_slash == cpp::string_view::npos) + return const_cast("/"); + + size_t last_slash = sv.substr(0, last_non_slash).find_last_of('/'); + + if (last_slash == cpp::string_view::npos) + return const_cast("."); + + cpp::string_view dir_sv = sv.substr(0, last_slash); + size_t dir_last_non_slash = dir_sv.find_last_not_of('/'); + + if (dir_last_non_slash == cpp::string_view::npos) { + path[1] = '\0'; + return path; + } + + path[dir_last_non_slash + 1] = '\0'; + return path; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/libgen/dirname.h b/libc/src/libgen/dirname.h new file mode 100644 index 0000000000000..4909b2eb222ad --- /dev/null +++ b/libc/src/libgen/dirname.h @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Header for dirname. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_LIBGEN_DIRNAME_H +#define LLVM_LIBC_SRC_LIBGEN_DIRNAME_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +/// Return the directory component of a pathname. +/// +/// \param path Pointer to the null-terminated pathname string. +/// \return Pointer to the directory component of path, or "." if path is null +/// or empty, or "/" if path is all slashes. +char *dirname(char *path); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_LIBGEN_DIRNAME_H diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt index dd232b29a7a7b..45815c9bba8ca 100644 --- a/libc/test/src/CMakeLists.txt +++ b/libc/test/src/CMakeLists.txt @@ -63,6 +63,7 @@ add_subdirectory(complex) add_subdirectory(ctype) add_subdirectory(errno) add_subdirectory(fenv) +add_subdirectory(libgen) add_subdirectory(link) add_subdirectory(math) add_subdirectory(netinet) diff --git a/libc/test/src/libgen/CMakeLists.txt b/libc/test/src/libgen/CMakeLists.txt new file mode 100644 index 0000000000000..85c28b0764f5c --- /dev/null +++ b/libc/test/src/libgen/CMakeLists.txt @@ -0,0 +1,45 @@ +add_custom_target(libc-libgen-tests) + +add_libc_test( + basename_test + SUITE + libc-libgen-tests + SRCS + basename_test.cpp + DEPENDS + libc.src.libgen.basename +) + +add_libc_test( + dirname_test + SUITE + libc-libgen-tests + SRCS + dirname_test.cpp + DEPENDS + libc.src.libgen.dirname +) + +if (NOT LLVM_USE_SANITIZER) + add_libc_test( + basename_death_test + UNIT_TEST_ONLY + SUITE + libc-libgen-tests + SRCS + basename_death_test.cpp + DEPENDS + libc.src.libgen.basename + ) + + add_libc_test( + dirname_death_test + UNIT_TEST_ONLY + SUITE + libc-libgen-tests + SRCS + dirname_death_test.cpp + DEPENDS + libc.src.libgen.dirname + ) +endif() diff --git a/libc/test/src/libgen/basename_death_test.cpp b/libc/test/src/libgen/basename_death_test.cpp new file mode 100644 index 0000000000000..24100dc48aa1b --- /dev/null +++ b/libc/test/src/libgen/basename_death_test.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Death tests for basename. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/basename.h" +#include "test/UnitTest/Test.h" + +#ifdef ENABLE_SUBPROCESS_TESTS +TEST(LlvmLibcBasenameTest, ModifyReturnValue) { + char *r = LIBC_NAMESPACE::basename(nullptr); + ASSERT_DEATH([r]() { r[0] = 'a'; }, WITH_SIGNAL(-1)); +} +#endif diff --git a/libc/test/src/libgen/basename_test.cpp b/libc/test/src/libgen/basename_test.cpp new file mode 100644 index 0000000000000..2e8feef715137 --- /dev/null +++ b/libc/test/src/libgen/basename_test.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Unittests for basename. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/basename.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcBasenameTest, NullPointer) { + ASSERT_STREQ(LIBC_NAMESPACE::basename(nullptr), "."); +} + +TEST(LlvmLibcBasenameTest, EmptyString) { + char path[] = ""; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "."); +} + +TEST(LlvmLibcBasenameTest, RegularPath) { + char path[] = "/usr/lib"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "lib"); +} + +TEST(LlvmLibcBasenameTest, TrailingSlash) { + char path[] = "/usr/"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "usr"); + ASSERT_STREQ(path, "/usr"); +} + +TEST(LlvmLibcBasenameTest, SingleSlash) { + char path[] = "/"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "/"); +} + +TEST(LlvmLibcBasenameTest, MultipleSlashes) { + char path[] = "///"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "/"); +} + +TEST(LlvmLibcBasenameTest, SimpleName) { + char path[] = "a"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "a"); +} + +TEST(LlvmLibcBasenameTest, SimpleNameTrailingSlash) { + char path[] = "a/"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "a"); + ASSERT_STREQ(path, "a"); +} + +TEST(LlvmLibcBasenameTest, ComplexPath) { + char path[] = "///a///"; + ASSERT_STREQ(LIBC_NAMESPACE::basename(path), "a"); + ASSERT_STREQ(path, "///a"); +} diff --git a/libc/test/src/libgen/dirname_death_test.cpp b/libc/test/src/libgen/dirname_death_test.cpp new file mode 100644 index 0000000000000..e135a4ce60cd6 --- /dev/null +++ b/libc/test/src/libgen/dirname_death_test.cpp @@ -0,0 +1,22 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Death tests for dirname. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/dirname.h" +#include "test/UnitTest/Test.h" + +#ifdef ENABLE_SUBPROCESS_TESTS +TEST(LlvmLibcDirnameTest, ModifyReturnValue) { + char *r = LIBC_NAMESPACE::dirname(nullptr); + ASSERT_DEATH([r]() { r[0] = 'a'; }, WITH_SIGNAL(-1)); +} +#endif diff --git a/libc/test/src/libgen/dirname_test.cpp b/libc/test/src/libgen/dirname_test.cpp new file mode 100644 index 0000000000000..afd718fb6e559 --- /dev/null +++ b/libc/test/src/libgen/dirname_test.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Unittests for dirname. +/// +//===----------------------------------------------------------------------===// + +#include "src/libgen/dirname.h" +#include "test/UnitTest/Test.h" + +TEST(LlvmLibcDirnameTest, NullPointer) { + ASSERT_STREQ(LIBC_NAMESPACE::dirname(nullptr), "."); +} + +TEST(LlvmLibcDirnameTest, EmptyString) { + char path[] = ""; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "."); +} + +TEST(LlvmLibcDirnameTest, RegularPath) { + char path[] = "/usr/lib"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/usr"); + ASSERT_STREQ(path, "/usr"); +} + +TEST(LlvmLibcDirnameTest, TrailingSlash) { + char path[] = "/usr/"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/"); + ASSERT_STREQ(path, "/"); +} + +TEST(LlvmLibcDirnameTest, SingleSlash) { + char path[] = "/"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/"); +} + +TEST(LlvmLibcDirnameTest, MultipleSlashes) { + char path[] = "///"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/"); +} + +TEST(LlvmLibcDirnameTest, SimpleName) { + char path[] = "a"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "."); +} + +TEST(LlvmLibcDirnameTest, SimpleNameTrailingSlash) { + char path[] = "a/"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "."); +} + +TEST(LlvmLibcDirnameTest, ComplexPath) { + char path[] = "///a///b///"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "///a"); + ASSERT_STREQ(path, "///a"); +} + +TEST(LlvmLibcDirnameTest, SlashA) { + char path[] = "/a"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/"); + ASSERT_STREQ(path, "/"); +} + +TEST(LlvmLibcDirnameTest, MultipleSlashesA) { + char path[] = "///a"; + ASSERT_STREQ(LIBC_NAMESPACE::dirname(path), "/"); + ASSERT_STREQ(path, "/"); +} diff --git a/libc/utils/docgen/libgen.yaml b/libc/utils/docgen/libgen.yaml new file mode 100644 index 0000000000000..07aad5f1be55c --- /dev/null +++ b/libc/utils/docgen/libgen.yaml @@ -0,0 +1,5 @@ +functions: + basename: + in-latest-posix: '' + dirname: + in-latest-posix: '' From 873dcc3ed54ce7686e22857b930d1283e4d74bab Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 23 Jun 2026 11:05:29 -0400 Subject: [PATCH 198/511] Silence conversion warning; NFC (#205357) This was triggering a conversion warning in MSVC. --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5772ef37ec762..cc3deaa83f63b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -9126,7 +9126,7 @@ SDValue TargetLowering::expandPDEP(SDNode *Node, SelectionDAG &DAG) const { // Each pass handles half the shift amount of the previous pass. SDValue X = Val; for (int S = (int)LogBW - 1; S >= 0; --S) { - SDValue ShiftSv = DAG.getShiftAmountConstant(1u << S, VT, DL); + SDValue ShiftSv = DAG.getShiftAmountConstant(1ull << S, VT, DL); SDValue T = DAG.getNode(ISD::SHL, DL, VT, X, ShiftSv); SDValue UnshiftedBits = DAG.getNode(ISD::AND, DL, VT, X, DAG.getNOT(DL, MvArray[S], VT)); From c63aa02212c9cefebf305d9c929092f5fc00c1b4 Mon Sep 17 00:00:00 2001 From: Qiongsi Wu Date: Tue, 23 Jun 2026 08:16:55 -0700 Subject: [PATCH 199/511] [clang][Dependency Scanning] Fix the Input File for By-Name Lookup's Input CC1 Command Line (#205214) When the command line is a CC1 command, the scanner does not append the fake input file to the command line when initializing the compiler instance. This PR fixes that by passing the compiler instance initialization the correct modified command line. Without specifying the fake input file, clang picks up `-` as its input. An observable behavior is that the diagnostics are pointing to incorrect files for cc1 commands, hence a test is added to check the diagnostics messages contain the correct file name. --- clang/lib/Tooling/DependencyScanningTool.cpp | 5 +++-- .../modules-full-by-mult-mod-names-diagnostics.c | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/clang/lib/Tooling/DependencyScanningTool.cpp b/clang/lib/Tooling/DependencyScanningTool.cpp index b4b45798c514d..d55367107862d 100644 --- a/clang/lib/Tooling/DependencyScanningTool.cpp +++ b/clang/lib/Tooling/DependencyScanningTool.cpp @@ -378,10 +378,11 @@ CompilerInstanceWithContext::initializeFromCommandline( std::make_unique(ModifiedCommandLine, FS, DC); - if (CommandLine.size() >= 2 && CommandLine[1] == "-cc1") { + if (ModifiedCommandLine.size() >= 2 && ModifiedCommandLine[1] == "-cc1") { // The input command line is already a -cc1 invocation; initialize the // compiler instance directly from it. - CompilerInstanceWithContext CIWithContext(Tool.Worker, CWD, CommandLine); + CompilerInstanceWithContext CIWithContext(Tool.Worker, CWD, + ModifiedCommandLine); if (!CIWithContext.initialize(Controller, std::move(DiagEngineWithCmdAndOpts), std::move(OverlayFS))) diff --git a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c index cf34e19be48a9..0eafb119c6bfe 100644 --- a/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c +++ b/clang/test/ClangScanDeps/modules-full-by-mult-mod-names-diagnostics.c @@ -26,6 +26,20 @@ module root2 { header "root2.h" } // RUN: cat %t/error.txt | FileCheck %s --check-prefixes=ERROR // RUN: cat %t/result.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s +//--- cdb.cc1.json.template +[{ + "file": "", + "directory": "DIR", + "command": "clang -cc1 -nostdsysteminc -nobuiltininc -fmodules -fimplicit-module-maps -fmodules-cache-path=DIR/cache -I DIR -x c" +}] + +// RUN: sed "s|DIR|%/t|g" %t/cdb.cc1.json.template > %t/cdb.cc1.json +// RUN: not clang-scan-deps -compilation-database %t/cdb.cc1.json -format \ +// RUN: experimental-full -module-names=modA,root,modB,modC,root2 2> \ +// RUN: %t/error.cc1.txt > %t/result.cc1.json +// RUN: cat %t/error.cc1.txt | FileCheck %s --check-prefixes=ERROR +// RUN: cat %t/result.cc1.json | sed 's:\\\\\?:/:g' | FileCheck -DPREFIX=%/t %s + // ERROR: Error while scanning dependencies for modA: // ERROR-NEXT: module-include.input:1:1: fatal error: module 'modA' not found // ERROR-NEXT: Error while scanning dependencies for modB: From ac79a36766a6ca6523d4eae5ba3b06cdb311a257 Mon Sep 17 00:00:00 2001 From: owenca Date: Tue, 23 Jun 2026 08:17:12 -0700 Subject: [PATCH 200/511] [clang-format] Don't check the format if diff is unavailable (#205036) --- clang/lib/Format/CMakeLists.txt | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/clang/lib/Format/CMakeLists.txt b/clang/lib/Format/CMakeLists.txt index 3e19151790440..df73212983ab1 100644 --- a/clang/lib/Format/CMakeLists.txt +++ b/clang/lib/Format/CMakeLists.txt @@ -41,21 +41,27 @@ file(GLOB_RECURSE files ${CLANG_SOURCE_DIR}/unittests/Format/*.h ) -set(check_format_depends) -set(i 0) -foreach(file IN LISTS files) - add_custom_command(OUTPUT check_format_depend_${i} - COMMAND clang-format ${file} | diff -u ${file} - && - touch check_format_depend_${i} - VERBATIM - COMMENT "Checking format of ${file}" - DEPENDS clang-format - ${file} - ) - list(APPEND check_format_depends check_format_depend_${i}) - math(EXPR i ${i}+1) -endforeach() -add_custom_target(clang-format-check-format DEPENDS ${check_format_depends}) +find_program(DIFF_EXE diff) +if(DIFF_EXE) + set(check_format_depends) + set(i 0) + foreach(file IN LISTS files) + add_custom_command(OUTPUT check_format_depend_${i} + COMMAND clang-format ${file} | diff -u ${file} - && + touch check_format_depend_${i} + VERBATIM + COMMENT "Checking format of ${file}" + DEPENDS clang-format + ${file} + ) + list(APPEND check_format_depends check_format_depend_${i}) + math(EXPR i ${i}+1) + endforeach() + add_custom_target(clang-format-check-format DEPENDS ${check_format_depends}) +else() + add_custom_target(clang-format-check-format DEPENDS clang-format) +endif() + set(docs_tools_dir ${CLANG_SOURCE_DIR}/docs/tools) From 3e98a15cc45ffbf27f258b4e4b2dad76cad1b595 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 23 Jun 2026 08:21:48 -0700 Subject: [PATCH 201/511] [RISCV][P-ext] Add mvd alias for padd.dw rd, zero, rs. Use for copy idiom. (#205223) See https://github.com/riscv/riscv-p-spec/pull/304 I've refactored the MoveMerge code to use copyPhysReg to avoid duplication. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 +-- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 2 ++ llvm/lib/Target/RISCV/RISCVMoveMerger.cpp | 28 ++++--------------- .../RISCV/calling-conv-p-ext-vector.ll | 2 +- .../CodeGen/RISCV/make-compressible-zilsd.mir | 2 +- .../CodeGen/RISCV/rv32-merge-non-arg-reg.mir | 4 +-- .../CodeGen/RISCV/rv32-move-merge-crash.ll | 2 +- llvm/test/CodeGen/RISCV/rv32-move-merge.ll | 2 +- llvm/test/CodeGen/RISCV/rv32p.ll | 14 +++++----- llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 2 +- llvm/test/MC/RISCV/rv32p-aliases-valid.s | 4 +++ 11 files changed, 28 insertions(+), 38 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 3636325cead24..62091cd734f54 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -548,8 +548,8 @@ void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (STI.hasStdExtP()) { // On RV32P, `padd.dw` is a GPR Pair Add BuildMI(MBB, MBBI, DL, get(RISCV::PADD_DW), DstReg) - .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc)) - .addReg(RISCV::X0_Pair); + .addReg(RISCV::X0_Pair) + .addReg(SrcReg, KillFlag | getRenamableRegState(RenamableSrc)); return; } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index 122f52c7e3dfe..60cdc9a3d2539 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1729,6 +1729,8 @@ let append Predicates = [IsRV32] in { def : InstAlias<"pncvt.h $rd, $rs", (PNSRLI_H GPR:$rd, GPRPairRV32:$rs, 0)>; def : InstAlias<"pncvth.b $rd, $rs", (PNSRLI_B GPR:$rd, GPRPairRV32:$rs, 8)>; def : InstAlias<"pncvth.h $rd, $rs", (PNSRLI_H GPR:$rd, GPRPairRV32:$rs, 16)>; + + def : InstAlias<"mvd $rd, $rs", (PADD_DW GPRPairRV32:$rd, X0_Pair, GPRPairRV32:$rs)>; } // append Predicates = [IsRV32] } // Predicates = [HasStdExtP] diff --git a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp index a242ec3594bd8..ecd991f9ced0f 100644 --- a/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp +++ b/llvm/lib/Target/RISCV/RISCVMoveMerger.cpp @@ -75,16 +75,6 @@ char RISCVMoveMerge::ID = 0; INITIALIZE_PASS(RISCVMoveMerge, "riscv-move-merge", RISCV_MOVE_MERGE_NAME, false, false) -static unsigned getGPRPairCopyOpcode(const RISCVSubtarget &ST) { - if (ST.hasStdExtZdinx()) - return RISCV::FSGNJ_D_IN32X; - - if (ST.hasStdExtP()) - return RISCV::PADD_DW; - - llvm_unreachable("Unhandled subtarget with paired move."); -} - static unsigned getCM_MVOpcode(const RISCVSubtarget &ST, bool MoveFromSToA) { if (ST.hasStdExtZcmp()) return MoveFromSToA ? RISCV::CM_MVA01S : RISCV::CM_MVSA01; @@ -186,25 +176,19 @@ RISCVMoveMerge::mergeGPRPairInsns(MachineBasicBlock::iterator I, // flag. MachineOperand PairedSource = *SecondPair.Source; - unsigned Opcode = getGPRPairCopyOpcode(*ST); for (auto It = std::next(I); It != Paired && PairedSource.isKill(); ++It) if (It->readsRegister(PairedSource.getReg(), TRI)) PairedSource.setIsKill(false); - Register SrcReg1, SrcReg2, DestReg; unsigned GPRPairIdx = RegPairIsEven ? RISCV::sub_gpr_even : RISCV::sub_gpr_odd; - SrcReg1 = TRI->getMatchingSuperReg(FirstPair.Source->getReg(), GPRPairIdx, - &RISCV::GPRPairRegClass); - SrcReg2 = ST->hasStdExtZdinx() ? SrcReg1 : Register(RISCV::X0_Pair); - DestReg = TRI->getMatchingSuperReg(FirstPair.Destination->getReg(), - GPRPairIdx, &RISCV::GPRPairRegClass); + MCRegister SrcReg = TRI->getMatchingSuperReg( + FirstPair.Source->getReg(), GPRPairIdx, &RISCV::GPRPairRegClass); + MCRegister DestReg = TRI->getMatchingSuperReg( + FirstPair.Destination->getReg(), GPRPairIdx, &RISCV::GPRPairRegClass); + bool KillSrc = PairedSource.isKill() && FirstPair.Source->isKill(); - BuildMI(*I->getParent(), I, DL, TII->get(Opcode), DestReg) - .addReg(SrcReg1, getKillRegState(PairedSource.isKill() && - FirstPair.Source->isKill())) - .addReg(SrcReg2, getKillRegState(PairedSource.isKill() && - FirstPair.Source->isKill())); + TII->copyPhysReg(*I->getParent(), I, DL, DestReg, SrcReg, KillSrc); I->eraseFromParent(); Paired->eraseFromParent(); diff --git a/llvm/test/CodeGen/RISCV/calling-conv-p-ext-vector.ll b/llvm/test/CodeGen/RISCV/calling-conv-p-ext-vector.ll index 30aca1db3ed9c..be096e0b95ce4 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-p-ext-vector.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-p-ext-vector.ll @@ -206,7 +206,7 @@ define <8 x i8> @test_call_v8i8(<8 x i8> %a, <8 x i8> %b) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: mv a4, a1 ; RV32-NEXT: mv a5, a0 -; RV32-NEXT: padd.dw a0, a2, zero +; RV32-NEXT: mvd a0, a2 ; RV32-NEXT: mv a2, a5 ; RV32-NEXT: mv a3, a4 ; RV32-NEXT: call external_v8i8 diff --git a/llvm/test/CodeGen/RISCV/make-compressible-zilsd.mir b/llvm/test/CodeGen/RISCV/make-compressible-zilsd.mir index 0c22db20d4a68..9123093ca0128 100644 --- a/llvm/test/CodeGen/RISCV/make-compressible-zilsd.mir +++ b/llvm/test/CodeGen/RISCV/make-compressible-zilsd.mir @@ -130,7 +130,7 @@ body: | ; RV32_P-LABEL: name: store_common_value_double ; RV32_P: liveins: $x10, $x11, $x12, $x16, $x17 ; RV32_P-NEXT: {{ $}} - ; RV32_P-NEXT: $x14_x15 = PADD_DW $x16_x17, $x0_pair + ; RV32_P-NEXT: $x14_x15 = PADD_DW $x0_pair, $x16_x17 ; RV32_P-NEXT: SD_RV32 $x14_x15, killed renamable $x10, 0 :: (store (s64) into %ir.a) ; RV32_P-NEXT: SD_RV32 $x14_x15, killed renamable $x11, 0 :: (store (s64) into %ir.b) ; RV32_P-NEXT: SD_RV32 killed $x14_x15, killed renamable $x12, 0 :: (store (s64) into %ir.c) diff --git a/llvm/test/CodeGen/RISCV/rv32-merge-non-arg-reg.mir b/llvm/test/CodeGen/RISCV/rv32-merge-non-arg-reg.mir index ab3de8c8918fa..094c77461a920 100644 --- a/llvm/test/CodeGen/RISCV/rv32-merge-non-arg-reg.mir +++ b/llvm/test/CodeGen/RISCV/rv32-merge-non-arg-reg.mir @@ -18,7 +18,7 @@ body: | ; P-EXT-LABEL: name: merge_copy_non_arg_reg ; P-EXT: liveins: $x28, $x29 ; P-EXT-NEXT: {{ $}} - ; P-EXT-NEXT: $x6_x7 = PADD_DW $x28_x29, $x0_pair + ; P-EXT-NEXT: $x6_x7 = PADD_DW $x0_pair, $x28_x29 ; P-EXT-NEXT: PseudoRET implicit $x6 $x6 = COPY $x28 $x7 = COPY $x29 @@ -40,7 +40,7 @@ body: | ; P-EXT-LABEL: name: merge_copy_non_arg_reg_with_intervening_unrelated_copy ; P-EXT: liveins: $x12, $x28, $x29 ; P-EXT-NEXT: {{ $}} - ; P-EXT-NEXT: $x6_x7 = PADD_DW $x28_x29, $x0_pair + ; P-EXT-NEXT: $x6_x7 = PADD_DW $x0_pair, $x28_x29 ; P-EXT-NEXT: $x18 = ADDI $x12, 0 ; P-EXT-NEXT: PseudoRET implicit $x6 $x6 = COPY $x28 diff --git a/llvm/test/CodeGen/RISCV/rv32-move-merge-crash.ll b/llvm/test/CodeGen/RISCV/rv32-move-merge-crash.ll index 801de62ce1f54..e25ac02cfad51 100644 --- a/llvm/test/CodeGen/RISCV/rv32-move-merge-crash.ll +++ b/llvm/test/CodeGen/RISCV/rv32-move-merge-crash.ll @@ -48,7 +48,7 @@ define void @test(i32 %arg0, i32 %arg1) nounwind { ; ZCMP-P-NEXT: #NO_APP ; ZCMP-P-NEXT: #APP ; ZCMP-P-NEXT: #NO_APP -; ZCMP-P-NEXT: padd.dw a0, a4, zero +; ZCMP-P-NEXT: mvd a0, a4 ; ZCMP-P-NEXT: tail foo ; ; ZCMP-P64-LABEL: test: diff --git a/llvm/test/CodeGen/RISCV/rv32-move-merge.ll b/llvm/test/CodeGen/RISCV/rv32-move-merge.ll index 646cee53d4bcf..2dcda981d8641 100644 --- a/llvm/test/CodeGen/RISCV/rv32-move-merge.ll +++ b/llvm/test/CodeGen/RISCV/rv32-move-merge.ll @@ -34,7 +34,7 @@ define i64 @mv_to_fmv(i64 %a, i64 %b) nounwind { ; CHECK32P-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; CHECK32P-NEXT: addd s0, a0, a2 ; CHECK32P-NEXT: call foo -; CHECK32P-NEXT: padd.dw a0, s0, zero +; CHECK32P-NEXT: mvd a0, s0 ; CHECK32P-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; CHECK32P-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; CHECK32P-NEXT: lw s1, 4(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rv32p.ll b/llvm/test/CodeGen/RISCV/rv32p.ll index 0cd07672532dd..aca224c397ad2 100644 --- a/llvm/test/CodeGen/RISCV/rv32p.ll +++ b/llvm/test/CodeGen/RISCV/rv32p.ll @@ -1122,7 +1122,7 @@ define i64 @wmaccu(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmaccu: ; CHECK: # %bb.0: ; CHECK-NEXT: wmaccu a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -1135,7 +1135,7 @@ define i64 @wmaccu_commute(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmaccu_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: wmaccu a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = zext i32 %a to i64 %bext = zext i32 %b to i64 @@ -1148,7 +1148,7 @@ define i64 @wmacc(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmacc: ; CHECK: # %bb.0: ; CHECK-NEXT: wmacc a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -1161,7 +1161,7 @@ define i64 @wmacc_commute(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmacc_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: wmacc a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = sext i32 %b to i64 @@ -1174,7 +1174,7 @@ define i64 @wmaccsu(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmaccsu: ; CHECK: # %bb.0: ; CHECK-NEXT: wmaccsu a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = zext i32 %b to i64 @@ -1187,7 +1187,7 @@ define i64 @wmaccsu_commute(i32 %a, i32 %b, i64 %c) nounwind { ; CHECK-LABEL: wmaccsu_commute: ; CHECK: # %bb.0: ; CHECK-NEXT: wmaccsu a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: ret %aext = sext i32 %a to i64 %bext = zext i32 %b to i64 @@ -1467,7 +1467,7 @@ define i64 @wmacc_first_mul_multiple_uses(i32 %a, i32 %b, i32 %c, i32 %d, ptr %o ; CHECK-NEXT: mv a5, a3 ; CHECK-NEXT: mv a6, a2 ; CHECK-NEXT: wmacc a2, a0, a1 -; CHECK-NEXT: padd.dw a0, a2, zero +; CHECK-NEXT: mvd a0, a2 ; CHECK-NEXT: sw a6, 0(a4) ; CHECK-NEXT: sw a5, 4(a4) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll index 957b92a3fd607..808ff81102479 100644 --- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll +++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll @@ -1863,7 +1863,7 @@ define <4 x i16> @test_pssla_h(<4 x i16> %a, <4 x i16> %b) { ; RV32-NEXT: pack a1, a7, t1 ; RV32-NEXT: merge a0, a4, t0 ; RV32-NEXT: merge a2, a0, a1 -; RV32-NEXT: padd.dw a0, a2, zero +; RV32-NEXT: mvd a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: test_pssla_h: diff --git a/llvm/test/MC/RISCV/rv32p-aliases-valid.s b/llvm/test/MC/RISCV/rv32p-aliases-valid.s index c8e517caacc54..a15cb37935df9 100644 --- a/llvm/test/MC/RISCV/rv32p-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv32p-aliases-valid.s @@ -286,3 +286,7 @@ li t3, 0x81008100 # CHECK-S-OBJ-NOALIAS: lui t4, 524296 # CHECK-S-OBJ: lui t4, 524296 li t4, 0x80008000 + +# CHECK-S-OBJ-NOALIAS: padd.dw a0, zero, s0 +# CHECK-S-OBJ: mvd a0, s0 +mvd a0, s0 From 80826bf12bebdef6fd334a91cecaecf2d12ad1e8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 23 Jun 2026 08:23:17 -0700 Subject: [PATCH 202/511] [LegalizeTypes] Fix incorrect EVL1 clip in SplitVecRes_VP_SPLICE. (#205021) We were incorrectly clipping EVL1 to be a valid index for the VT, in the range [0, VT.getNumVectorElements() - 1]. It is legal for EVL1 to be equal to VT.getNumVectorElements() here so that was incorrect. In case it isn't clear, the clip is necessary to prevent turning poison into UB by accessing outside the temporary stack object. --- .../SelectionDAG/LegalizeVectorTypes.cpp | 9 +- .../RISCV/rvv/vp-splice-mask-vectors.ll | 352 +++++++++--------- llvm/test/CodeGen/RISCV/rvv/vp-splice.ll | 146 ++++---- 3 files changed, 249 insertions(+), 258 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9e24ae1807ca1..97aa765642ea7 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -3650,7 +3650,14 @@ void DAGTypeLegalizer::SplitVecRes_VP_SPLICE(SDNode *N, SDValue &Lo, PtrInfo, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), Alignment); - SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, EVL1); + SDValue EltByteSize = + DAG.getTypeSize(DL, PtrVT, VT.getVectorElementType().getStoreSize()); + SDValue EVL1Ptr = DAG.getZExtOrTrunc(EVL1, DL, PtrVT); + SDValue EVL1Bytes = DAG.getNode(ISD::MUL, DL, PtrVT, EVL1Ptr, EltByteSize); + // Clip EVL1Bytes to make sure we stay within the stack object. + SDValue VTBytes = DAG.getTypeSize(DL, PtrVT, VT.getStoreSize()); + EVL1Bytes = DAG.getNode(ISD::UMIN, DL, PtrVT, EVL1Bytes, VTBytes); + SDValue StackPtr2 = DAG.getMemBasePlusOffset(StackPtr, EVL1Bytes, DL); SDValue PoisonPtr = DAG.getPOISON(PtrVT); SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT); diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll index b91ab8c99f1e0..288cbc561431b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-splice-mask-vectors.ll @@ -924,17 +924,18 @@ define @test_vp_splice_nxv64i1_masked( %va, define @test_vp_splice_nxv128i1( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { ; NOVLDEP-LABEL: test_vp_splice_nxv128i1: ; NOVLDEP: # %bb.0: -; NOVLDEP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; NOVLDEP-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; NOVLDEP-NEXT: vmv1r.v v6, v10 ; NOVLDEP-NEXT: vmv1r.v v7, v9 -; NOVLDEP-NEXT: csrr a2, vlenb -; NOVLDEP-NEXT: slli a5, a2, 4 +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmerge.vim v24, v16, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: csrr a3, vlenb +; NOVLDEP-NEXT: slli a2, a3, 3 ; NOVLDEP-NEXT: mv a4, a0 -; NOVLDEP-NEXT: addi a5, a5, -1 -; NOVLDEP-NEXT: mv a3, a0 -; NOVLDEP-NEXT: bltu a0, a5, .LBB21_2 +; NOVLDEP-NEXT: bltu a0, a2, .LBB21_2 ; NOVLDEP-NEXT: # %bb.1: -; NOVLDEP-NEXT: mv a3, a5 +; NOVLDEP-NEXT: mv a4, a2 ; NOVLDEP-NEXT: .LBB21_2: ; NOVLDEP-NEXT: addi sp, sp, -80 ; NOVLDEP-NEXT: .cfi_def_cfa_offset 80 @@ -948,56 +949,52 @@ define @test_vp_splice_nxv128i1( %va, @test_vp_splice_nxv128i1( %va, @test_vp_splice_nxv128i1( %va, @test_vp_splice_nxv128i1( %va, @test_vp_splice_nxv128i1_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { ; NOVLDEP-LABEL: test_vp_splice_nxv128i1_negative_offset: ; NOVLDEP: # %bb.0: -; NOVLDEP-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; NOVLDEP-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; NOVLDEP-NEXT: vmv1r.v v6, v10 ; NOVLDEP-NEXT: vmv1r.v v7, v9 -; NOVLDEP-NEXT: csrr a3, vlenb -; NOVLDEP-NEXT: slli a4, a3, 4 -; NOVLDEP-NEXT: mv a5, a0 -; NOVLDEP-NEXT: addi a4, a4, -1 -; NOVLDEP-NEXT: mv a2, a0 -; NOVLDEP-NEXT: bltu a0, a4, .LBB22_2 +; NOVLDEP-NEXT: vmv.v.i v16, 0 +; NOVLDEP-NEXT: vmerge.vim v24, v16, 1, v0 +; NOVLDEP-NEXT: vmv1r.v v0, v8 +; NOVLDEP-NEXT: csrr a4, vlenb +; NOVLDEP-NEXT: slli a2, a4, 3 +; NOVLDEP-NEXT: mv a3, a0 +; NOVLDEP-NEXT: bltu a0, a2, .LBB22_2 ; NOVLDEP-NEXT: # %bb.1: -; NOVLDEP-NEXT: mv a2, a4 +; NOVLDEP-NEXT: mv a3, a2 ; NOVLDEP-NEXT: .LBB22_2: ; NOVLDEP-NEXT: addi sp, sp, -80 ; NOVLDEP-NEXT: .cfi_def_cfa_offset 80 @@ -1126,65 +1121,61 @@ define @test_vp_splice_nxv128i1_negative_offset( @test_vp_splice_nxv128i1_negative_offset( @test_vp_splice_nxv128i1_negative_offset( @test_vp_splice_nxv2f32_masked( define @test_vp_splice_nxv16i64( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { ; CHECK-LABEL: test_vp_splice_nxv16i64: ; CHECK: # %bb.0: -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a5, a2 ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli t0, a4, 1 ; CHECK-NEXT: slli a1, a4, 3 -; CHECK-NEXT: addi t0, t0, -1 -; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: mv a7, a2 -; CHECK-NEXT: bltu a2, t0, .LBB22_2 +; CHECK-NEXT: add a6, a0, a1 +; CHECK-NEXT: bltu a2, a4, .LBB22_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a7, t0 +; CHECK-NEXT: mv a5, a4 ; CHECK-NEXT: .LBB22_2: ; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 80 -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 5 -; CHECK-NEXT: sub sp, sp, t0 +; CHECK-NEXT: csrr a7, vlenb +; CHECK-NEXT: slli a7, a7, 5 +; CHECK-NEXT: sub sp, sp, a7 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: vl8re64.v v24, (a5) -; CHECK-NEXT: slli a5, a7, 3 -; CHECK-NEXT: addi a7, sp, 64 -; CHECK-NEXT: add a5, a7, a5 -; CHECK-NEXT: bltu a6, a4, .LBB22_4 +; CHECK-NEXT: vl8re64.v v0, (a6) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 64 +; CHECK-NEXT: vsetvli zero, a5, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: sub a5, a2, a4 +; CHECK-NEXT: sltu a6, a2, a5 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: sub a7, a3, a4 +; CHECK-NEXT: and t0, a6, a5 +; CHECK-NEXT: sltu a5, a3, a7 +; CHECK-NEXT: add t1, a0, a1 +; CHECK-NEXT: addi t2, a5, -1 +; CHECK-NEXT: slli a6, a4, 4 +; CHECK-NEXT: slli a5, a2, 3 +; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v16, (t1) +; CHECK-NEXT: and a2, t2, a7 +; CHECK-NEXT: bltu a5, a6, .LBB22_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a6, a4 +; CHECK-NEXT: mv a5, a6 ; CHECK-NEXT: .LBB22_4: -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v8, (a7) -; CHECK-NEXT: sub a0, a2, a4 -; CHECK-NEXT: sltu a2, a2, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add a7, a7, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v16, (a7) -; CHECK-NEXT: sub a0, a3, a4 -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add a2, a5, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v24, (a2) +; CHECK-NEXT: add a0, a0, a5 +; CHECK-NEXT: add a5, a0, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v0, (a5) ; CHECK-NEXT: bltu a3, a4, .LBB22_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB22_6: ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v0, (a5) -; CHECK-NEXT: addi a2, sp, 104 -; CHECK-NEXT: add a1, a2, a1 -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v24, (a0) +; CHECK-NEXT: addi a0, sp, 104 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -537,67 +535,65 @@ define @test_vp_splice_nxv16i64( %va, @test_vp_splice_nxv16i64_negative_offset( %va, %vb, i32 zeroext %evla, i32 zeroext %evlb) #0 { ; CHECK-LABEL: test_vp_splice_nxv16i64_negative_offset: ; CHECK: # %bb.0: -; CHECK-NEXT: mv a7, a2 +; CHECK-NEXT: mv a6, a2 ; CHECK-NEXT: csrr a4, vlenb -; CHECK-NEXT: slli t0, a4, 1 ; CHECK-NEXT: slli a1, a4, 3 -; CHECK-NEXT: addi t0, t0, -1 ; CHECK-NEXT: add a5, a0, a1 -; CHECK-NEXT: mv a6, a2 -; CHECK-NEXT: bltu a2, t0, .LBB23_2 +; CHECK-NEXT: bltu a2, a4, .LBB23_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a6, t0 +; CHECK-NEXT: mv a6, a4 ; CHECK-NEXT: .LBB23_2: ; CHECK-NEXT: addi sp, sp, -80 ; CHECK-NEXT: sd ra, 72(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 64(sp) # 8-byte Folded Spill ; CHECK-NEXT: addi s0, sp, 80 -; CHECK-NEXT: csrr t0, vlenb -; CHECK-NEXT: slli t0, t0, 5 -; CHECK-NEXT: sub sp, sp, t0 +; CHECK-NEXT: csrr a7, vlenb +; CHECK-NEXT: slli a7, a7, 5 +; CHECK-NEXT: sub sp, sp, a7 ; CHECK-NEXT: andi sp, sp, -64 -; CHECK-NEXT: vl8re64.v v24, (a5) -; CHECK-NEXT: slli a5, a6, 3 -; CHECK-NEXT: addi t0, sp, 64 -; CHECK-NEXT: add a6, t0, a5 -; CHECK-NEXT: bltu a7, a4, .LBB23_4 -; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a7, a4 -; CHECK-NEXT: .LBB23_4: -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vsetvli zero, a7, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v8, (t0) +; CHECK-NEXT: vl8re64.v v0, (a5) +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: addi a5, sp, 64 +; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a5) ; CHECK-NEXT: sub a0, a2, a4 -; CHECK-NEXT: sltu a2, a2, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add t0, t0, a1 +; CHECK-NEXT: sltu a6, a2, a0 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: sub a7, a3, a4 +; CHECK-NEXT: and a0, a6, a0 +; CHECK-NEXT: sltu a6, a3, a7 +; CHECK-NEXT: add t0, a5, a1 +; CHECK-NEXT: addi t1, a6, -1 +; CHECK-NEXT: slli a6, a4, 4 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (t0) -; CHECK-NEXT: sub a0, a3, a4 -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: add a2, a6, a1 +; CHECK-NEXT: and a0, t1, a7 +; CHECK-NEXT: bltu a2, a6, .LBB23_4 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: mv a2, a6 +; CHECK-NEXT: .LBB23_4: +; CHECK-NEXT: add a5, a5, a2 +; CHECK-NEXT: add a6, a5, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v24, (a2) +; CHECK-NEXT: vse64.v v0, (a6) ; CHECK-NEXT: bltu a3, a4, .LBB23_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a3, a4 ; CHECK-NEXT: .LBB23_6: -; CHECK-NEXT: li a2, 8 +; CHECK-NEXT: li a4, 8 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vse64.v v0, (a6) -; CHECK-NEXT: bltu a5, a2, .LBB23_8 +; CHECK-NEXT: vse64.v v24, (a5) +; CHECK-NEXT: bltu a2, a4, .LBB23_8 ; CHECK-NEXT: # %bb.7: -; CHECK-NEXT: li a5, 8 +; CHECK-NEXT: li a2, 8 ; CHECK-NEXT: .LBB23_8: -; CHECK-NEXT: sub a2, a6, a5 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: sub a5, a5, a2 +; CHECK-NEXT: add a1, a5, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vle64.v v8, (a5) ; CHECK-NEXT: addi sp, s0, -80 ; CHECK-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload From 384748764eece610aa419c3638f21c4ed2b550a7 Mon Sep 17 00:00:00 2001 From: Yihan Wang Date: Tue, 23 Jun 2026 23:25:02 +0800 Subject: [PATCH 203/511] [clangd] fix preprocessor caching-lexer state tracking (#203716) Fix `recomputeCurLexerKind` to avoid default fallback to `CurLexerCallback = CLK_CachingLexer;`. This prevents code-completion EOF handling from accidentally restoring CLK_CachingLexer while a tentative parse is still active, which could trigger a caching lexer re-entry assertion in clangd signature help. Fixes https://github.com/llvm/llvm-project/issues/200677 --------- Signed-off-by: yronglin --- .../clangd/unittests/CodeCompleteTests.cpp | 17 +++++++++++++++++ clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Lex/Preprocessor.h | 6 +----- clang/lib/Lex/PPCaching.cpp | 6 ++---- clang/lib/Lex/PPLexerChange.cpp | 2 +- clang/lib/Lex/Preprocessor.cpp | 6 ++++-- 6 files changed, 28 insertions(+), 12 deletions(-) diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 5808b2145965f..dc94af4dddf8c 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -1918,6 +1918,23 @@ TEST(SignatureHelpTest, StalePreamble) { EXPECT_EQ(0, Results.activeParameter); } +TEST(SignatureHelpTest, EOFInSkippedFunctionBody) { + Annotations Test(R"cpp( +#ifdef IS_HEADER +void frameSizeBlocksWarning() { + auto fn = []() { + }; + fn(); +} +#else +#define IS_HEADER +#include __FILE__ +#^endif +)cpp"); + auto Results = signatures(Test.code(), Test.point()); + EXPECT_THAT(Results.signatures, IsEmpty()); +} + class IndexRequestCollector : public SymbolIndex { public: IndexRequestCollector(std::vector Syms = {}) : Symbols(Syms) {} diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e86f1d9602bed..4aa13ce1384ac 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -725,6 +725,9 @@ Bug Fixes in This Version - Fixed a potential stack-use-after-return issue in Clang when copy-initializing an array via an element-at-a-time copy loop (#GH192026) - Fixed an issue where certain designated initializers would be rejected for constexpr variables. (#GH193373) +- Fixed ``clang::Preprocessor::recomputeCurLexerKind`` to avoid default fallback to ``CurLexerCallback = CLK_CachingLexer;``. This prevents code-completion + EOF handling from accidentally restoring CLK_CachingLexer while a tentative parse is still active, which could trigger a caching lexer re-entry assertion + in clangd signature help. (#GH200677) - Fixed a crash when ``#embed`` is used with C++ modules (#GH195350) - Fixed a bug where ``-x cuda`` caused clang to immediately resolve templates that should not be. (#GH200545) - Fixed an issue where ``__typeof_unqual`` and ``__typeof_unqual__`` were rejected as a declaration specifier in block scope in C++. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 8b684e85eb1c1..fc66bd745a618 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2819,11 +2819,7 @@ class Preprocessor { // Caching stuff. void CachingLex(Token &Result); - bool InCachingLexMode() const { - // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means - // that we are past EOF, not that we are in CachingLex mode. - return !CurPPLexer && !CurTokenLexer && !IncludeMacroStack.empty(); - } + bool InCachingLexMode() const { return CurLexerCallback == CLK_CachingLexer; } void EnterCachingLexMode(); void EnterCachingLexModeUnchecked(); diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp index cbacda9d31ae2..3f0ebd8455685 100644 --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -117,16 +117,14 @@ void Preprocessor::EnterCachingLexMode() { assert(LexLevel == 0 && "entered caching lex mode while lexing something else"); - if (InCachingLexMode()) { - assert(CurLexerCallback == CLK_CachingLexer && "Unexpected lexer kind"); + if (InCachingLexMode()) return; - } EnterCachingLexModeUnchecked(); } void Preprocessor::EnterCachingLexModeUnchecked() { - assert(CurLexerCallback != CLK_CachingLexer && "already in caching lex mode"); + assert(!InCachingLexMode() && "already in caching lex mode"); PushIncludeMacroStack(); CurLexerCallback = CLK_CachingLexer; } diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp index 98ff9a9a04e7c..b44bf2cd3c253 100644 --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -172,7 +172,7 @@ void Preprocessor::EnterMacro(Token &Tok, SourceLocation ILEnd, void Preprocessor::EnterTokenStream(const Token *Toks, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject) { - if (CurLexerCallback == CLK_CachingLexer) { + if (InCachingLexMode()) { if (CachedLexPos < CachedTokens.size()) { assert(IsReinject && "new tokens in the middle of cached stream"); // We're entering tokens into the middle of our cached token stream. We diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index 1e21b4a94cea3..c69d084d6514f 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -417,14 +417,16 @@ StringRef Preprocessor::getLastMacroWithSpelling( } void Preprocessor::recomputeCurLexerKind() { - if (CurLexer) + if (InCachingLexMode()) + CurLexerCallback = CLK_CachingLexer; + else if (CurLexer) CurLexerCallback = CurLexer->isDependencyDirectivesLexer() ? CLK_DependencyDirectivesLexer : CLK_Lexer; else if (CurTokenLexer) CurLexerCallback = CLK_TokenLexer; else - CurLexerCallback = CLK_CachingLexer; + CurLexerCallback = CLK_Lexer; } bool Preprocessor::SetCodeCompletionPoint(FileEntryRef File, From 98f2520c234fc6c11cf28e6897420262392d5ff7 Mon Sep 17 00:00:00 2001 From: Felipe Novais Date: Tue, 23 Jun 2026 17:31:35 +0200 Subject: [PATCH 204/511] [docs] Fix typo in Docker.rst (#205346) Small typo fix in the Docker documentation: Debian8 -> Debian12. Signed-off-by: Felipe Novais --- llvm/docs/Docker.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/docs/Docker.rst b/llvm/docs/Docker.rst index 29078d1f79fdb..b4d7ad3c7c72e 100644 --- a/llvm/docs/Docker.rst +++ b/llvm/docs/Docker.rst @@ -126,7 +126,7 @@ Which image should I choose? ============================ We currently provide two images: Debian12-based and nvidia-cuda-based. They differ in the base image that they use, i.e. they have a different set of -preinstalled binaries. Debian8 is very minimal, nvidia-cuda is larger, but has +preinstalled binaries. Debian12 is very minimal, nvidia-cuda is larger, but has preinstalled CUDA libraries and allows access to a GPU, installed on your machine. From 73b4a79fe3d5ff133f487386710b4c04bee7af8a Mon Sep 17 00:00:00 2001 From: Artur Gainullin Date: Tue, 23 Jun 2026 08:37:45 -0700 Subject: [PATCH 205/511] [SYCL][Doc] Add sycl_ext_oneapi_register_host_memory extension spec (#22324) Add an experimental extension specification for registering existing host/system memory with the SYCL runtime so that it behaves like a USM host allocation: usable from device code, queryable via get_pointer_type, and faster for explicit copies. The memory is released via unregister_host_memory rather than sycl::free. Co-Authored-By: Greg Lueck Assisted-By: Claude --- ...l_ext_oneapi_register_host_memory.asciidoc | 306 ++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 sycl/doc/extensions/proposed/sycl_ext_oneapi_register_host_memory.asciidoc diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_register_host_memory.asciidoc b/sycl/doc/extensions/proposed/sycl_ext_oneapi_register_host_memory.asciidoc new file mode 100644 index 0000000000000..c1894c84cbb62 --- /dev/null +++ b/sycl/doc/extensions/proposed/sycl_ext_oneapi_register_host_memory.asciidoc @@ -0,0 +1,306 @@ += sycl_ext_oneapi_register_host_memory + +:source-highlighter: coderay +:coderay-linenums-mode: table + +// This section needs to be after the document title. +:doctype: book +:toc2: +:toc: left +:encoding: utf-8 +:lang: en +:dpcpp: pass:[DPC++] +:endnote: —{nbsp}end{nbsp}note + +// Set the default source code type in this document to C++, +// for syntax highlighting purposes. This is needed because +// docbook uses c++ and html5 uses cpp. +:language: {basebackend@docbook:c++:cpp} + + +== Notice + +[%hardbreaks] +Copyright (C) 2026 Intel Corporation. All rights reserved. + +Khronos(R) is a registered trademark and SYCL(TM) and SPIR(TM) are trademarks +of The Khronos Group Inc. OpenCL(TM) is a trademark of Apple Inc. used by +permission by Khronos. + + +== Contact + +To report problems with this extension, please open a new issue at: + +https://github.com/intel/llvm/issues + + +== Dependencies + +This extension is written against the SYCL 2020 revision 11 specification. All +references below to the "core SYCL specification" or to section numbers in the +SYCL specification refer to that revision. + +This extension also depends on the following other SYCL extensions: + +* link:../experimental/sycl_ext_oneapi_properties.asciidoc[ + sycl_ext_oneapi_properties] + + +== Status + +This is a proposed extension specification, intended to gather community +feedback. +Interfaces defined in this specification may not be implemented yet or may be in +a preliminary state. +The specification itself may also change in incompatible ways before it is +finalized. +*Shipping software products should not rely on APIs defined in this +specification.* + + +== Overview + +This extension provides a way to register regular host memory with the SYCL +runtime, such that it can be promoted to USM host memory. Once registered, the +memory can be accessed from within kernels, and it can be used with most APIs +that expect a USM host memory pointer. Registering host memory can also +optimize explicit copy operations between that host memory and USM device +memory. + +== Specification + +=== Feature test macro + +This extension provides a feature-test macro as described in the core SYCL +specification. An implementation supporting this extension must predefine the +macro `SYCL_EXT_ONEAPI_REGISTER_HOST_MEMORY` to one of the values defined in the +table below. Applications can test for the existence of this macro to determine +whether the implementation supports this extension. + +[%header,cols="1,5"] +|=== +|Value +|Description + +|1 +|The APIs of this experimental extension are not versioned, so the + feature-test macro always has this value. +|=== + +=== New device aspect + +This extension adds a new device aspect: + +[source,c++] +---- +namespace sycl { +enum class aspect { + // ... + ext_oneapi_register_host_memory +}; +} +---- + +A device has the `ext_oneapi_register_host_memory` aspect if the implementation +supports this extension on that device. Applications can query support using +`device::has`, and `platform::has` can be used as a conservative check for all +devices in a platform. + +The functions in this extension may be used only with a `context` whose devices +all have this aspect. Otherwise, they throw an `exception` with the +`errc::feature_not_supported` error code. + +=== Functions to register and unregister memory + +This extension adds the following free functions. + +''' + +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +template +void register_host_memory(void *ptr, size_t numBytes, const context &ctxt, + Properties props = {}); + +} // namespace sycl::ext::oneapi::experimental +---- + +_Preconditions:_ + +* `ptr` points to valid host memory that is mapped into the host address space, + and the range `[ptr, ptr + numBytes)` remains valid and mapped at the same + host virtual address from this call until the matching call to + `unregister_host_memory` returns. +* `ptr` is aligned to the host page size, and `numBytes` is a non-zero multiple + of the host page size. +* The range `[ptr, ptr + numBytes)` does not overlap any range that is + currently registered through this extension with any context. +* If any page in the range is mapped read-only on the host, then `props` must + contain the `read_only` property. + +_Constraints:_ + +* `Properties` is one of the properties listed below in section "Properties for + registering memory"; or +* `is_property_list_v` is `true` and contains no properties other + than those listed below in section "Properties for registering memory". + +_Effects:_ Registers the host memory range `[ptr, ptr + numBytes)` with the +context `ctxt`. While the registration is in effect, the range is treated as a +USM host allocation associated with `ctxt`. The memory may be accessed from +device code in kernels submitted to a queue whose context is `ctxt`, and +pointers into the range may be passed to SYCL APIs that accept a USM host +pointer associated with `ctxt`, except for `sycl::free`. + +_Throws:_ + +* An `exception` with the `errc::feature_not_supported` error code if any device + in `ctxt` does not have `aspect::ext_oneapi_register_host_memory`. +* An `exception` with the `errc::invalid` error code if `ptr` is null, if + `numBytes` is zero, if `ptr` or `numBytes` is not aligned to the host page + size, or if the range `[ptr, ptr + numBytes)` is not representable in the host + address space. +* An `exception` with the `errc::runtime` error code if the memory range + cannot be registered by the implementation. + +[_Note:_ This extension does not provide a query for the host page size. It can +be obtained using operating system APIs such as `+sysconf(_SC_PAGESIZE)+` on +POSIX systems or `GetSystemInfo` on Windows. Page alignment of `ptr` and `numBytes` +is required even when the native backend would accept a less restrictive +alignment, and the implementation does not implicitly round the base address or +size. _{endnote}_] + +''' + +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +void unregister_host_memory(void *ptr, const context &ctxt); + +} // namespace sycl::ext::oneapi::experimental +---- + +_Preconditions:_ + +* `ptr` is exactly the base pointer passed to a previous successful call to + `register_host_memory` with the same context `ctxt`, and that registration is + still in effect. +* All commands that reference the registered range have completed. + +_Effects:_ Ends the registration of the host memory range that starts at `ptr` +in the context `ctxt`. After this function returns, the range is no longer +treated as a USM host allocation, and the USM pointer queries behave as they +would have if the range had never been registered. This function does not free +or unmap the underlying host memory; the application remains responsible for +that. + +_Throws:_ + +* An `exception` with the `errc::feature_not_supported` error code if any device + in `ctxt` does not have `aspect::ext_oneapi_register_host_memory`. +* An `exception` with the `errc::runtime` error code if the memory range + cannot be unregistered by the implementation. + +=== Properties for registering memory + +This extension defines the following property, which can be passed to +`register_host_memory`. + +[source,c++] +---- +namespace sycl::ext::oneapi::experimental { + +struct read_only_key { + using value_t = property_value; +}; + +inline constexpr read_only_key::value_t read_only; + +} // namespace sycl::ext::oneapi::experimental +---- + +When this property is passed to `register_host_memory`, the application +guarantees that device code will not write to the registered range. This allows +the range to be registered even when it is not writable by the application, such +as memory backed by a read-only mapping. The behavior is undefined if device +code writes to a range that was registered with this property. + +=== Interaction with USM queries + +While a host memory range is registered, the pointer queries from the core SYCL +specification behave as follows for any pointer within the registered range: + +* `get_pointer_type(ptr, ctxt)` returns `usm::alloc::host`. +* `get_pointer_device(ptr, ctxt)` returns the first device in `ctxt`, following + the rules for a USM host allocation. + +A pointer to registered host memory must not be passed to `sycl::free`. The +application uses `unregister_host_memory` to end the registration instead. + + +== Implementation notes + +This section is informational only and is not part of the portable SYCL API +contract. + +=== Mapping on Level Zero + +On Level Zero, registration can be implemented using the external system memory +mapping mechanism, which maps an existing host memory range without taking +ownership of it. Unregistration removes that mapping but does not free the +underlying host memory. + +== Example + +This example registers a page-aligned host allocation, uses it directly in a +kernel, queries it like a USM host allocation, and then unregisters it. + +[source,c++] +---- +#include +#include // sysconf + +#include +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +int main() { + sycl::queue q; + sycl::context ctxt = q.get_context(); + + if (!ctxt.get_platform().has(sycl::aspect::ext_oneapi_register_host_memory)) + return 0; // Not supported on this platform. + + const size_t pageSize = static_cast(sysconf(_SC_PAGESIZE)); + const size_t numElems = 1024; + size_t numBytes = numElems * sizeof(int); + // Round the size up to a multiple of the page size. + numBytes = ((numBytes + pageSize - 1) / pageSize) * pageSize; + + // Page-aligned host allocation that the application already owns. + int *data = static_cast(std::aligned_alloc(pageSize, numBytes)); + if (!data) + return 0; + + syclexp::register_host_memory(data, numBytes, ctxt); + + // The registered pointer can be used directly in device code. + q.parallel_for(sycl::range<1>{numElems}, + [=](sycl::id<1> i) { data[i[0]] = i[0]; }) + .wait_and_throw(); + + // ... and queried like a USM host allocation. + assert(sycl::get_pointer_type(data, ctxt) == sycl::usm::alloc::host); + + syclexp::unregister_host_memory(data, ctxt); + + // The application still owns the memory and must free it itself. + std::free(data); + return 0; +} +---- From cff2f610a9d307c0e4791d8c1884230acd52196b Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Tue, 23 Jun 2026 18:39:28 +0300 Subject: [PATCH 206/511] [libc++][ranges] Backport P2711R1: Making multi-param constructors of views explicit (#190513) As discussed in https://gcc.gnu.org/PR114298 - GCC and MSVC STL implemented P2711R1 as a DR. This PR does the same for libc++. Co-authored-by: A. Jiang --- libcxx/docs/Status/Cxx23Papers.csv | 2 +- libcxx/include/__config | 6 ----- libcxx/include/__ranges/drop_view.h | 3 +-- libcxx/include/__ranges/drop_while_view.h | 2 +- libcxx/include/__ranges/filter_view.h | 2 +- libcxx/include/__ranges/iota_view.h | 10 ++++---- libcxx/include/__ranges/lazy_split_view.h | 4 ++-- libcxx/include/__ranges/split_view.h | 5 ++-- libcxx/include/__ranges/take_view.h | 3 +-- libcxx/include/__ranges/take_while_view.h | 2 +- libcxx/include/__ranges/transform_view.h | 2 +- .../range.drop.while/ctor.view.pass.cpp | 11 +-------- .../range.drop/ctor.view.pass.cpp | 11 +-------- .../range.filter/ctor.view_pred.pass.cpp | 11 +-------- .../range.lazy.split/ctor.range.pass.cpp | 12 +--------- .../range.lazy.split/ctor.view.pass.cpp | 11 +-------- .../range.split/ctor.range.pass.cpp | 12 +--------- .../range.split/ctor.view.pass.cpp | 11 +-------- .../range.take.while/ctor.view.pass.cpp | 11 +-------- .../range.take/ctor.view_count.pass.cpp | 11 +-------- .../ctor.view_function.pass.cpp | 11 +-------- .../range.iota.view/ctor.first.last.pass.cpp | 24 +------------------ .../range.iota.view/ctor.value.bound.pass.cpp | 23 +----------------- 23 files changed, 28 insertions(+), 172 deletions(-) diff --git a/libcxx/docs/Status/Cxx23Papers.csv b/libcxx/docs/Status/Cxx23Papers.csv index eb580ea891f5b..739f0ecab37e7 100644 --- a/libcxx/docs/Status/Cxx23Papers.csv +++ b/libcxx/docs/Status/Cxx23Papers.csv @@ -105,7 +105,7 @@ "`P0290R4 `__","``apply()`` for ``synchronized_value``","2023-02 (Issaquah)","","","`#105249 `__","" "`P2770R0 `__","Stashing stashing ``iterators`` for proper flattening","2023-02 (Issaquah)","|Complete|","21","`#105250 `__","" "`P2164R9 `__","``views::enumerate``","2023-02 (Issaquah)","|Complete|","23","`#105251 `__","" -"`P2711R1 `__","Making multi-param constructors of ``views`` ``explicit``","2023-02 (Issaquah)","|Complete|","21","`#105252 `__","" +"`P2711R1 `__","Making multi-param constructors of ``views`` ``explicit``","2023-02 (Issaquah)","|Complete|","21","`#105252 `__","Implemented as a DR in C++20 since LLVM 23." "`P2609R3 `__","Relaxing Ranges Just A Smidge","2023-02 (Issaquah)","|Complete|","20","`#105253 `__","Implemented as a DR in C++20. Other implementations will do the same." "`P2713R1 `__","Escaping improvements in ``std::format``","2023-02 (Issaquah)","|Complete|","19","`#105254 `__","" "`P2675R1 `__","``format``'s width estimation is too approximate and not forward compatible","2023-02 (Issaquah)","|Complete|","17","`#105255 `__","" diff --git a/libcxx/include/__config b/libcxx/include/__config index 9172166b16b87..fdd0558fbec6f 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -244,12 +244,6 @@ typedef __char32_t char32_t; # define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit # endif -# if _LIBCPP_STD_VER >= 23 -# define _LIBCPP_EXPLICIT_SINCE_CXX23 explicit -# else -# define _LIBCPP_EXPLICIT_SINCE_CXX23 -# endif - # if _LIBCPP_STD_VER >= 14 # define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr # else diff --git a/libcxx/include/__ranges/drop_view.h b/libcxx/include/__ranges/drop_view.h index feb3705d2df6c..e3754e64d536f 100644 --- a/libcxx/include/__ranges/drop_view.h +++ b/libcxx/include/__ranges/drop_view.h @@ -74,8 +74,7 @@ class drop_view : public view_interface> { requires default_initializable<_View> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 - drop_view(_View __base, range_difference_t<_View> __count) + _LIBCPP_HIDE_FROM_ABI constexpr explicit drop_view(_View __base, range_difference_t<_View> __count) : __count_(__count), __base_(std::move(__base)) { _LIBCPP_ASSERT_UNCATEGORIZED(__count_ >= 0, "count must be greater than or equal to zero."); } diff --git a/libcxx/include/__ranges/drop_while_view.h b/libcxx/include/__ranges/drop_while_view.h index 1fe4e17f8048b..ed5b91d99425f 100644 --- a/libcxx/include/__ranges/drop_while_view.h +++ b/libcxx/include/__ranges/drop_while_view.h @@ -54,7 +54,7 @@ class _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS drop_while_view : public view_interfa requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 drop_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr explicit drop_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/filter_view.h b/libcxx/include/__ranges/filter_view.h index 3ad69ea100931..4612dc4a9ba1d 100644 --- a/libcxx/include/__ranges/filter_view.h +++ b/libcxx/include/__ranges/filter_view.h @@ -72,7 +72,7 @@ class _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS filter_view : public view_interface && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 filter_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr explicit filter_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(in_place, std::move(__pred)) {} template diff --git a/libcxx/include/__ranges/iota_view.h b/libcxx/include/__ranges/iota_view.h index 29f96545ab34f..38e0ccaf2849e 100644 --- a/libcxx/include/__ranges/iota_view.h +++ b/libcxx/include/__ranges/iota_view.h @@ -316,8 +316,8 @@ class iota_view : public view_interface> { _LIBCPP_HIDE_FROM_ABI constexpr explicit iota_view(_Start __value) : __value_(std::move(__value)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 - iota_view(type_identity_t<_Start> __value, type_identity_t<_BoundSentinel> __bound_sentinel) + _LIBCPP_HIDE_FROM_ABI constexpr explicit iota_view(type_identity_t<_Start> __value, + type_identity_t<_BoundSentinel> __bound_sentinel) : __value_(std::move(__value)), __bound_sentinel_(std::move(__bound_sentinel)) { // Validate the precondition if possible. if constexpr (totally_ordered_with<_Start, _BoundSentinel>) { @@ -326,15 +326,15 @@ class iota_view : public view_interface> { } } - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __iterator __last) + _LIBCPP_HIDE_FROM_ABI constexpr explicit iota_view(__iterator __first, __iterator __last) requires same_as<_Start, _BoundSentinel> : iota_view(std::move(__first.__value_), std::move(__last.__value_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, _BoundSentinel __last) + _LIBCPP_HIDE_FROM_ABI constexpr explicit iota_view(__iterator __first, _BoundSentinel __last) requires same_as<_BoundSentinel, unreachable_sentinel_t> : iota_view(std::move(__first.__value_), std::move(__last)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 iota_view(__iterator __first, __sentinel __last) + _LIBCPP_HIDE_FROM_ABI constexpr explicit iota_view(__iterator __first, __sentinel __last) requires(!same_as<_Start, _BoundSentinel> && !same_as<_BoundSentinel, unreachable_sentinel_t>) : iota_view(std::move(__first.__value_), std::move(__last.__bound_sentinel_)) {} diff --git a/libcxx/include/__ranges/lazy_split_view.h b/libcxx/include/__ranges/lazy_split_view.h index cca9191d26818..938dca24cc4fc 100644 --- a/libcxx/include/__ranges/lazy_split_view.h +++ b/libcxx/include/__ranges/lazy_split_view.h @@ -86,13 +86,13 @@ class lazy_split_view : public view_interface> requires default_initializable<_View> && default_initializable<_Pattern> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_View __base, _Pattern __pattern) + _LIBCPP_HIDE_FROM_ABI constexpr explicit lazy_split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move(__pattern)) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 lazy_split_view(_Range&& __r, range_value_t<_Range> __e) + _LIBCPP_HIDE_FROM_ABI constexpr explicit lazy_split_view(_Range&& __r, range_value_t<_Range> __e) : __base_(views::all(std::forward<_Range>(__r))), __pattern_(views::single(std::move(__e))) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/split_view.h b/libcxx/include/__ranges/split_view.h index 2ec908ba4070e..17a2ceeca6686 100644 --- a/libcxx/include/__ranges/split_view.h +++ b/libcxx/include/__ranges/split_view.h @@ -78,14 +78,13 @@ class split_view : public view_interface> { requires default_initializable<_View> && default_initializable<_Pattern> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 split_view(_View __base, _Pattern __pattern) + _LIBCPP_HIDE_FROM_ABI constexpr explicit split_view(_View __base, _Pattern __pattern) : __base_(std::move(__base)), __pattern_(std::move((__pattern))) {} template requires constructible_from<_View, views::all_t<_Range>> && constructible_from<_Pattern, single_view>> - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 - split_view(_Range&& __range, range_value_t<_Range> __elem) + _LIBCPP_HIDE_FROM_ABI constexpr explicit split_view(_Range&& __range, range_value_t<_Range> __elem) : __base_(views::all(std::forward<_Range>(__range))), __pattern_(views::single(std::move(__elem))) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/take_view.h b/libcxx/include/__ranges/take_view.h index 13cb4a285d9df..999f686537f2c 100644 --- a/libcxx/include/__ranges/take_view.h +++ b/libcxx/include/__ranges/take_view.h @@ -69,8 +69,7 @@ class take_view : public view_interface> { requires default_initializable<_View> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 - take_view(_View __base, range_difference_t<_View> __count) + _LIBCPP_HIDE_FROM_ABI constexpr explicit take_view(_View __base, range_difference_t<_View> __count) : __base_(std::move(__base)), __count_(__count) { _LIBCPP_ASSERT_UNCATEGORIZED(__count >= 0, "count has to be greater than or equal to zero"); } diff --git a/libcxx/include/__ranges/take_while_view.h b/libcxx/include/__ranges/take_while_view.h index 4977f139fc555..955f681ef76ed 100644 --- a/libcxx/include/__ranges/take_while_view.h +++ b/libcxx/include/__ranges/take_while_view.h @@ -58,7 +58,7 @@ class _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS take_while_view : public view_interfa requires default_initializable<_View> && default_initializable<_Pred> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 take_while_view(_View __base, _Pred __pred) + _LIBCPP_HIDE_FROM_ABI constexpr explicit take_while_view(_View __base, _Pred __pred) : __base_(std::move(__base)), __pred_(std::in_place, std::move(__pred)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index 98b38dd9b2aa1..5e817a3ca34d4 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -85,7 +85,7 @@ class _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS transform_view : public view_interfac requires default_initializable<_View> && default_initializable<_Fn> = default; - _LIBCPP_HIDE_FROM_ABI constexpr _LIBCPP_EXPLICIT_SINCE_CXX23 transform_view(_View __base, _Fn __func) + _LIBCPP_HIDE_FROM_ABI constexpr explicit transform_view(_View __base, _Fn __func) : __func_(std::in_place, std::move(__func)), __base_(std::move(__base)) {} _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp index 326cabd637089..1f38f113545cf 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_while_view(V base, Pred pred); // explicit since C++23 +// constexpr explicit drop_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -36,18 +36,9 @@ struct Pred { // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, Pred>(), "This constructor must be explicit"); -#else - -static_assert( test_convertible, View, Pred>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { std::ranges::drop_while_view dwv{View{{}, MoveOnly{5}}, Pred{}}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp index aeb31ae2fec4b..b036b34c25a72 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.drop/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr drop_view(V base, range_difference_t count); // explicit since C++23 +// constexpr explicit drop_view(V base, range_difference_t count); // explicit since C++23 #include #include @@ -19,18 +19,9 @@ // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, std::ranges::range_difference_t>(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, View, std::ranges::range_difference_t>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { std::ranges::drop_view dropView1(MoveOnlyView(), 4); assert(dropView1.size() == 4); diff --git a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp index 8eaf6eba35709..dafddfe3d8789 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.filter/ctor.view_pred.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr filter_view(View, Pred); // explicit since C++23 +// constexpr explicit filter_view(View, Pred); // explicit since C++23 #include #include @@ -45,18 +45,9 @@ struct TrackingRange : TrackInitialization, std::ranges::view_base { // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, Range, Pred>(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, Range, Pred>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp index 8eeaa3dae36db..cc06e260c221a 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr lazy_split_view(Range&& r, range_value_t e); // explicit since C++23 +// constexpr explicit lazy_split_view(Range&& r, range_value_t e); // explicit since C++23 #include @@ -88,20 +88,10 @@ static_assert(std::is_copy_constructible_v); // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert( !test_convertible, StrView, std::ranges::range_value_t>(), "This constructor must be explicit"); -#else - -static_assert( - test_convertible, StrView, std::ranges::range_value_t>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { using V = std::ranges::lazy_split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp index 46af5498d59a0..efb5965ff386d 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.lazy.split/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr lazy_split_view(View base, Pattern pattern); // explicit since C++23 +// constexpr explicit lazy_split_view(View base, Pattern pattern); // explicit since C++23 #include #include @@ -48,18 +48,9 @@ using Pattern = ViewWithCounting; // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, Pattern>(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, View, Pattern>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { // Calling the constructor with `(ForwardView, ForwardView)`. { diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp index bbe08befdb419..4e2f7c8890e82 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.range.pass.cpp @@ -11,7 +11,7 @@ // template // requires constructible_from> && // constructible_from>> -// constexpr split_view(Range&& r, range_value_t e); // explicit since C++23 +// constexpr explicit split_view(Range&& r, range_value_t e); // explicit since C++23 #include #include @@ -73,20 +73,10 @@ static_assert(std::is_copy_constructible_v); // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert( !test_convertible, StrView, std::ranges::range_value_t>(), "This constructor must be explicit"); -# else - -static_assert( - test_convertible, StrView, std::ranges::range_value_t>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { using V = std::ranges::split_view; diff --git a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp index 27fc4b84c5206..f68dda1e8ff48 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.split/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr split_view(View base, Pattern pattern); // explicit since C++23 +// constexpr explicit split_view(View base, Pattern pattern); // explicit since C++23 #include #include @@ -46,18 +46,9 @@ using Pattern = ViewWithCounting; // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, Pattern>(), "This constructor must be explicit"); -#else - -static_assert( test_convertible, View, Pattern>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { std::string_view input = "abc def"; diff --git a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp index 469b2698c8844..4d68bc9580dfe 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take.while/ctor.view.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_while_view(V base, Pred pred); // explicit since C++23 +// constexpr explicit take_while_view(V base, Pred pred); // explicit since C++23 #include #include @@ -36,18 +36,9 @@ struct Pred { // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, Pred>(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, View, Pred>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { std::ranges::take_while_view twv{View{{}, MoveOnly{5}}, Pred{}}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp index f37ffb0825ac1..e1ddf40febb2a 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.take/ctor.view_count.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr take_view(V base, range_difference_t count); // explicit since C++23 +// constexpr explicit take_view(V base, range_difference_t count); // explicit since C++23 #include #include @@ -21,18 +21,9 @@ // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, View, std::ranges::range_difference_t>(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, View, std::ranges::range_difference_t>(), - "This constructor must be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { int buffer[8] = {1, 2, 3, 4, 5, 6, 7, 8}; diff --git a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp index 63a43d189256f..afe1faf7f3cdb 100644 --- a/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp +++ b/libcxx/test/std/ranges/range.adaptors/range.transform/ctor.view_function.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr transform_view(View, F); // explicit since C++23 +// constexpr explicit transform_view(View, F); // explicit since C++23 #include #include @@ -32,18 +32,9 @@ struct F { // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, Range, F>(), "This constructor must be explicit"); -#else - -static_assert( test_convertible, Range, F>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { int buff[] = {1, 2, 3, 4, 5, 6, 7, 8}; diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp index 67b7dc428a14f..25a78d0475f7e 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.first.last.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: c++03, c++11, c++14, c++17 -// constexpr iota_view(iterator first, see below last); // explicit since C++23 +// constexpr explicit iota_view(iterator first, see below last); // explicit since C++23 #include #include @@ -19,8 +19,6 @@ // SFINAE tests. -#if TEST_STD_VER >= 23 - std::ranges::iota_view view; static_assert(!test_convertible, @@ -38,25 +36,6 @@ static_assert(!test_convertible(), "This constructor must be explicit"); -#else - -static_assert(test_convertible, - decltype(std::ranges::iota_view{}.begin()), - decltype(std::ranges::iota_view{}.end())>(), - "This constructor must not be explicit"); - -static_assert(test_convertible, - decltype(std::ranges::iota_view{SomeInt{0}}.begin()), - decltype(std::unreachable_sentinel)>(), - "This constructor must not be explicit"); - -static_assert(test_convertible>, - decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), - decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { std::ranges::iota_view commonView(SomeInt(0), SomeInt(10)); @@ -91,4 +70,3 @@ int main(int, char**) { return 0; } - diff --git a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp index 7528e1ccf3ee0..b6f4295498644 100644 --- a/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp +++ b/libcxx/test/std/ranges/range.factories/range.iota.view/ctor.value.bound.pass.cpp @@ -14,7 +14,7 @@ TEST_CLANG_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_GCC_DIAGNOSTIC_IGNORED("-Wsign-compare") TEST_MSVC_DIAGNOSTIC_IGNORED(4018 4389) // various "signed/unsigned mismatch" -// constexpr iota_view(type_identity_t value, type_identity_t bound); // explicit since C++23 +// constexpr explicit iota_view(type_identity_t value, type_identity_t bound); // explicit since C++23 #include #include @@ -24,8 +24,6 @@ TEST_MSVC_DIAGNOSTIC_IGNORED(4018 4389) // various "signed/unsigned mismatch" // SFINAE tests. -#if TEST_STD_VER >= 23 - static_assert(!test_convertible, decltype(std::ranges::iota_view{}.begin()), decltype(std::ranges::iota_view{}.end())>(), @@ -41,25 +39,6 @@ static_assert(!test_convertible(), "This constructor must be explicit"); -#else - -static_assert( test_convertible, - decltype(std::ranges::iota_view{}.begin()), - decltype(std::ranges::iota_view{}.end())>(), - "This constructor must not be explicit"); - -static_assert( test_convertible, - decltype(std::ranges::iota_view{}.begin()), - decltype(std::unreachable_sentinel)>(), - "This constructor must not be explicit"); - -static_assert( test_convertible>, - decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.begin()), - decltype(std::ranges::iota_view{SomeInt(0), IntComparableWith(SomeInt(10))}.end())>(), - "This constructor must not be explicit"); - -#endif // TEST_STD_VER >= 23 - constexpr bool test() { { std::ranges::iota_view io(SomeInt(0), SomeInt(10)); From 420f85fed72e85efeaeca0c027a89b4e1d311d13 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 23 Jun 2026 16:41:08 +0100 Subject: [PATCH 207/511] [X86] madd.ll - add additional tests for matchPMADDWD folds that fail with larger source types (#205362) matchPMADDWD handles sext/shl cases as well which don't fold either on SSE/AVX512 targets --- llvm/test/CodeGen/X86/madd.ll | 156 ++++++++++++++++++++++++++++------ 1 file changed, 130 insertions(+), 26 deletions(-) diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 39fbe706ff369..056712cc5a66c 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -2142,6 +2142,110 @@ define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { ret <4 x i32> %ret } +; FIXME: SSE fails to match PMADDWD +define <4 x i32> @larger_sext(<16 x i16> %A) { +; SSE2-LABEL: larger_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: larger_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: pmovsxwd %xmm0, %xmm1 +; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] +; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE42-NEXT: phaddd %xmm0, %xmm1 +; SSE42-NEXT: movdqa %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX1-LABEL: larger_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: larger_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: larger_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %a = sext <16 x i16> %A to <16 x i32> + %odd = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> + %even = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> + %ret = add <4 x i32> %odd, %even + ret <4 x i32> %ret +} + +; FIXME: SSE fails to match PMADDWD +define <4 x i32> @larger_shl(<16 x i16> %A) { +; SSE2-LABEL: larger_shl: +; SSE2: # %bb.0: +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: pslld $7, %xmm0 +; SSE2-NEXT: pslld $7, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] +; SSE2-NEXT: paddd %xmm2, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: larger_shl: +; SSE42: # %bb.0: +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE42-NEXT: pslld $7, %xmm0 +; SSE42-NEXT: pslld $7, %xmm1 +; SSE42-NEXT: phaddd %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX1-LABEL: larger_shl: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,128,128,128,128,128,128] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: larger_shl: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,128,128,128,128,128,128] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: larger_shl: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512-NEXT: vpslld $7, %ymm0, %ymm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %a = sext <16 x i16> %A to <16 x i32> + %shl = shl <16 x i32> %a, splat (i32 7) + %odd = shufflevector <16 x i32> %shl, <16 x i32> undef, <4 x i32> + %even = shufflevector <16 x i32> %shl, <16 x i32> undef, <4 x i32> + %ret = add <4 x i32> %odd, %even + ret <4 x i32> %ret +} + define <8 x i32> @pmaddwd_16(<16 x i16> %A, <16 x i16> %B) { ; SSE-LABEL: pmaddwd_16: ; SSE: # %bb.0: @@ -2991,7 +3095,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm3, %xmm3 ; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB33_1: # %vector.body +; SSE2-NEXT: .LBB35_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE2-NEXT: movq {{.*#+}} xmm4 = mem[0],zero ; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] @@ -3005,7 +3109,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; SSE2-NEXT: paddd %xmm4, %xmm1 ; SSE2-NEXT: addq $8, %rdi ; SSE2-NEXT: addq $-8, %rax -; SSE2-NEXT: jne .LBB33_1 +; SSE2-NEXT: jne .LBB35_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: paddd %xmm3, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] @@ -3031,7 +3135,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; SSE42-NEXT: pxor %xmm2, %xmm2 ; SSE42-NEXT: pxor %xmm3, %xmm3 ; SSE42-NEXT: .p2align 4 -; SSE42-NEXT: .LBB33_1: # %vector.body +; SSE42-NEXT: .LBB35_1: # %vector.body ; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm4 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero @@ -3043,7 +3147,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; SSE42-NEXT: paddd %xmm5, %xmm1 ; SSE42-NEXT: addq $8, %rdi ; SSE42-NEXT: addq $-8, %rax -; SSE42-NEXT: jne .LBB33_1 +; SSE42-NEXT: jne .LBB35_1 ; SSE42-NEXT: # %bb.2: # %middle.block ; SSE42-NEXT: paddd %xmm3, %xmm2 ; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] @@ -3067,7 +3171,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: .p2align 4 -; AVX1-NEXT: .LBB33_1: # %vector.body +; AVX1-NEXT: .LBB35_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero ; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm3 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero @@ -3083,7 +3187,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0 ; AVX1-NEXT: addq $8, %rdi ; AVX1-NEXT: addq $-8, %rax -; AVX1-NEXT: jne .LBB33_1 +; AVX1-NEXT: jne .LBB35_1 ; AVX1-NEXT: # %bb.2: # %middle.block ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 @@ -3110,7 +3214,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX256-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX256-NEXT: .p2align 4 -; AVX256-NEXT: .LBB33_1: # %vector.body +; AVX256-NEXT: .LBB35_1: # %vector.body ; AVX256-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX256-NEXT: vpmovzxbd {{.*#+}} ymm2 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero ; AVX256-NEXT: vpaddd %ymm1, %ymm2, %ymm1 @@ -3118,7 +3222,7 @@ define i64 @sum_and_sum_of_squares(ptr %a, i32 %n) { ; AVX256-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ; AVX256-NEXT: addq $8, %rdi ; AVX256-NEXT: addq $-8, %rax -; AVX256-NEXT: jne .LBB33_1 +; AVX256-NEXT: jne .LBB35_1 ; AVX256-NEXT: # %bb.2: # %middle.block ; AVX256-NEXT: vextracti128 $1, %ymm1, %xmm2 ; AVX256-NEXT: vpaddd %xmm2, %xmm1, %xmm1 @@ -3175,7 +3279,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; SSE2-NEXT: xorl %ecx, %ecx ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB34_1: # %vector.body +; SSE2-NEXT: .LBB36_1: # %vector.body ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero ; SSE2-NEXT: movq {{.*#+}} xmm3 = mem[0],zero @@ -3186,7 +3290,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; SSE2-NEXT: paddd %xmm3, %xmm1 ; SSE2-NEXT: addq $8, %rcx ; SSE2-NEXT: cmpq %rcx, %rax -; SSE2-NEXT: jne .LBB34_1 +; SSE2-NEXT: jne .LBB36_1 ; SSE2-NEXT: # %bb.2: # %middle.block ; SSE2-NEXT: paddd %xmm0, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] @@ -3203,7 +3307,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; SSE42-NEXT: xorl %ecx, %ecx ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: .p2align 4 -; SSE42-NEXT: .LBB34_1: # %vector.body +; SSE42-NEXT: .LBB36_1: # %vector.body ; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE42-NEXT: pmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; SSE42-NEXT: pmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -3212,7 +3316,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; SSE42-NEXT: paddd %xmm3, %xmm1 ; SSE42-NEXT: addq $8, %rcx ; SSE42-NEXT: cmpq %rcx, %rax -; SSE42-NEXT: jne .LBB34_1 +; SSE42-NEXT: jne .LBB36_1 ; SSE42-NEXT: # %bb.2: # %middle.block ; SSE42-NEXT: paddd %xmm0, %xmm1 ; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] @@ -3228,7 +3332,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; AVX1-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX1-NEXT: xorl %ecx, %ecx ; AVX1-NEXT: .p2align 4 -; AVX1-NEXT: .LBB34_1: # %vector.body +; AVX1-NEXT: .LBB36_1: # %vector.body ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -3238,7 +3342,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] ; AVX1-NEXT: addq $8, %rcx ; AVX1-NEXT: cmpq %rcx, %rax -; AVX1-NEXT: jne .LBB34_1 +; AVX1-NEXT: jne .LBB36_1 ; AVX1-NEXT: # %bb.2: # %middle.block ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 ; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 @@ -3256,7 +3360,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; AVX256-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX256-NEXT: xorl %ecx, %ecx ; AVX256-NEXT: .p2align 4 -; AVX256-NEXT: .LBB34_1: # %vector.body +; AVX256-NEXT: .LBB36_1: # %vector.body ; AVX256-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX256-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero ; AVX256-NEXT: vpmovzxbw {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero @@ -3265,7 +3369,7 @@ define i32 @sum_of_square_differences(ptr %a, ptr %b, i32 %n) { ; AVX256-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX256-NEXT: addq $8, %rcx ; AVX256-NEXT: cmpq %rcx, %rax -; AVX256-NEXT: jne .LBB34_1 +; AVX256-NEXT: jne .LBB36_1 ; AVX256-NEXT: # %bb.2: # %middle.block ; AVX256-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX256-NEXT: vpaddd %xmm1, %xmm0, %xmm0 @@ -3400,7 +3504,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; SSE2-NEXT: pxor %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm1 ; SSE2-NEXT: .p2align 4 -; SSE2-NEXT: .LBB38_1: # %loop +; SSE2-NEXT: .LBB40_1: # %loop ; SSE2-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE2-NEXT: movdqu (%rdi,%rax), %xmm3 ; SSE2-NEXT: movdqu (%rsi,%rax), %xmm4 @@ -3418,7 +3522,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; SSE2-NEXT: paddd %xmm3, %xmm1 ; SSE2-NEXT: addq $16, %rax ; SSE2-NEXT: cmpq %r8, %rax -; SSE2-NEXT: jb .LBB38_1 +; SSE2-NEXT: jb .LBB40_1 ; SSE2-NEXT: # %bb.2: # %afterloop ; SSE2-NEXT: paddd %xmm0, %xmm2 ; SSE2-NEXT: paddd %xmm0, %xmm1 @@ -3439,7 +3543,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; SSE42-NEXT: pxor %xmm2, %xmm2 ; SSE42-NEXT: pxor %xmm1, %xmm1 ; SSE42-NEXT: .p2align 4 -; SSE42-NEXT: .LBB38_1: # %loop +; SSE42-NEXT: .LBB40_1: # %loop ; SSE42-NEXT: # =>This Inner Loop Header: Depth=1 ; SSE42-NEXT: pmovsxbw 8(%rdi,%rax), %xmm3 ; SSE42-NEXT: pmovsxbw (%rdi,%rax), %xmm4 @@ -3451,7 +3555,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; SSE42-NEXT: paddd %xmm3, %xmm2 ; SSE42-NEXT: addq $16, %rax ; SSE42-NEXT: cmpq %r8, %rax -; SSE42-NEXT: jb .LBB38_1 +; SSE42-NEXT: jb .LBB40_1 ; SSE42-NEXT: # %bb.2: # %afterloop ; SSE42-NEXT: paddd %xmm0, %xmm2 ; SSE42-NEXT: paddd %xmm0, %xmm1 @@ -3471,7 +3575,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX1-NEXT: xorl %eax, %eax ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: .p2align 4 -; AVX1-NEXT: .LBB38_1: # %loop +; AVX1-NEXT: .LBB40_1: # %loop ; AVX1-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX1-NEXT: vpmovsxbw 8(%rdi,%rax), %xmm2 ; AVX1-NEXT: vpmovsxbw (%rdi,%rax), %xmm3 @@ -3485,7 +3589,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: addq $16, %rax ; AVX1-NEXT: cmpq %r8, %rax -; AVX1-NEXT: jb .LBB38_1 +; AVX1-NEXT: jb .LBB40_1 ; AVX1-NEXT: # %bb.2: # %afterloop ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 @@ -3508,7 +3612,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: .p2align 4 -; AVX2-NEXT: .LBB38_1: # %loop +; AVX2-NEXT: .LBB40_1: # %loop ; AVX2-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX2-NEXT: vpmovsxbw (%rdi,%rax), %ymm2 ; AVX2-NEXT: vpmovsxbw (%rsi,%rax), %ymm3 @@ -3516,7 +3620,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX2-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ; AVX2-NEXT: addq $16, %rax ; AVX2-NEXT: cmpq %r8, %rax -; AVX2-NEXT: jb .LBB38_1 +; AVX2-NEXT: jb .LBB40_1 ; AVX2-NEXT: # %bb.2: # %afterloop ; AVX2-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 @@ -3536,7 +3640,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX512-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; AVX512-NEXT: xorl %eax, %eax ; AVX512-NEXT: .p2align 4 -; AVX512-NEXT: .LBB38_1: # %loop +; AVX512-NEXT: .LBB40_1: # %loop ; AVX512-NEXT: # =>This Inner Loop Header: Depth=1 ; AVX512-NEXT: vpmovsxbw (%rdi,%rax), %ymm1 ; AVX512-NEXT: vpmovsxbw (%rsi,%rax), %ymm2 @@ -3544,7 +3648,7 @@ define i32 @add_used_by_loop_phi(ptr %a, ptr %b, i64 %offset_a, i64 %offset_b, i ; AVX512-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ; AVX512-NEXT: addq $16, %rax ; AVX512-NEXT: cmpq %r8, %rax -; AVX512-NEXT: jb .LBB38_1 +; AVX512-NEXT: jb .LBB40_1 ; AVX512-NEXT: # %bb.2: # %afterloop ; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 From 1a5819284df72def12b7a678a41aa8588ca39e83 Mon Sep 17 00:00:00 2001 From: adams381 Date: Tue, 23 Jun 2026 10:56:45 -0500 Subject: [PATCH 208/511] [CIR] Lower elementwise saturating add/sub builtins (#203112) `__builtin_elementwise_add_sat` and `__builtin_elementwise_sub_sat` were still in the `errorBuiltinNYI` batch in `emitBuiltinExpr`, so any use hit "unimplemented builtin call". That blocks C++26 `std::add_sat`/`std::sub_sat` (libc++ `<__numeric/saturation_arithmetic.h>`), which lower directly onto these builtins. This lowers them the way classic CodeGen does in `CGBuiltin.cpp`: select the signed or unsigned saturating intrinsic from the operand's element type (`sadd.sat`/`uadd.sat` for add, `ssub.sat`/`usub.sat` for sub). Test coverage in `builtins-elementwise.c` exercises signed and unsigned, scalar and vector, at i32 and i16 widths, checking the CIR `cir.call_llvm_intrinsic` and the lowered `@llvm.{s,u}{add,sub}.sat` calls on both the CIR and classic `-emit-llvm` paths. --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 25 ++++++- .../builtins-elementwise-bool-nyi.c | 14 ++++ .../CodeGenBuiltins/builtins-elementwise.c | 65 +++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/builtins-elementwise-bool-nyi.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 4fb7ffc13a2ce..e206353aac2c9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -1695,7 +1695,30 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, mlir::ValueRange{a, b, c})); } case Builtin::BI__builtin_elementwise_add_sat: - case Builtin::BI__builtin_elementwise_sub_sat: + case Builtin::BI__builtin_elementwise_sub_sat: { + // cir.add/cir.sub do not model i1 arithmetic, so a bool-element + // saturating add/sub is not representable through the saturated op. + // Bail before emitScalarExpr: an ext-vector-of-bool operand would + // otherwise hit the NYI bool-vector load, which returns a null value + // and would crash op0.getType(). + QualType argTy = e->getArg(0)->getType(); + if (argTy->isBooleanType() || argTy->isExtVectorBoolType()) { + cgm.errorNYI(e->getSourceRange(), + "saturating add/sub on a boolean operand"); + return RValue::get(nullptr); + } + mlir::Location loc = getLoc(e->getExprLoc()); + mlir::Value op0 = emitScalarExpr(e->getArg(0)); + mlir::Value op1 = emitScalarExpr(e->getArg(1)); + assert(cir::isIntOrVectorOfIntType(op0.getType()) && + "elementwise saturating add/sub requires integer operands"); + mlir::Value val = + builtinIDIfNoAsmLabel == Builtin::BI__builtin_elementwise_add_sat + ? builder.createAdd(loc, op0, op1, cir::OverflowBehavior::Saturated) + : builder.createSub(loc, op0, op1, + cir::OverflowBehavior::Saturated); + return RValue::get(val); + } case Builtin::BI__builtin_elementwise_max: case Builtin::BI__builtin_elementwise_min: case Builtin::BI__builtin_elementwise_maxnum: diff --git a/clang/test/CIR/CodeGenBuiltins/builtins-elementwise-bool-nyi.c b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise-bool-nyi.c new file mode 100644 index 0000000000000..933a1298ae5b9 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise-bool-nyi.c @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir %s -verify -emit-cir -o - + +typedef _Bool vbool4 __attribute__((ext_vector_type(4))); + +void test_bool_sat(_Bool a, _Bool b, vbool4 va, vbool4 vb) { + // expected-error@+1 {{ClangIR code gen Not Yet Implemented: saturating add/sub on a boolean operand}} + (void)__builtin_elementwise_add_sat(a, b); + // expected-error@+1 {{ClangIR code gen Not Yet Implemented: saturating add/sub on a boolean operand}} + (void)__builtin_elementwise_sub_sat(a, b); + // expected-error@+1 {{ClangIR code gen Not Yet Implemented: saturating add/sub on a boolean operand}} + (void)__builtin_elementwise_add_sat(va, vb); + // expected-error@+1 {{ClangIR code gen Not Yet Implemented: saturating add/sub on a boolean operand}} + (void)__builtin_elementwise_sub_sat(va, vb); +} diff --git a/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c index c04739d737632..42525e6744190 100644 --- a/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c +++ b/clang/test/CIR/CodeGenBuiltins/builtins-elementwise.c @@ -7,6 +7,7 @@ // RUN: FileCheck --check-prefix=LLVM --input-file=%t-ogcg.ll %s typedef int vint4 __attribute__((ext_vector_type(4))); +typedef unsigned int vuint4 __attribute__((ext_vector_type(4))); typedef short vshort8 __attribute__((ext_vector_type(8))); typedef float vfloat4 __attribute__((ext_vector_type(4))); typedef double vdouble4 __attribute__((ext_vector_type(4))); @@ -509,3 +510,67 @@ void test_builtin_elementwise_fshr(long long int i1, long long int i2, // LLVM: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) vu1 = __builtin_elementwise_fshr(vu1, vu2, vu3); } + +void test_builtin_elementwise_add_sat(int i1, int i2, unsigned u1, unsigned u2, + short s1, short s2, vint4 vi1, vint4 vi2, + vuint4 vu1, vuint4 vu2, vshort8 vs1, + vshort8 vs2) { + // CIR-LABEL: test_builtin_elementwise_add_sat + // LLVM-LABEL: test_builtin_elementwise_add_sat + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !s32i + // LLVM: call i32 @llvm.sadd.sat.i32(i32 %{{.*}}, i32 %{{.*}}) + i1 = __builtin_elementwise_add_sat(i1, i2); + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !u32i + // LLVM: call i32 @llvm.uadd.sat.i32(i32 %{{.*}}, i32 %{{.*}}) + u1 = __builtin_elementwise_add_sat(u1, u2); + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !s16i + // LLVM: call i16 @llvm.sadd.sat.i16(i16 %{{.*}}, i16 %{{.*}}) + s1 = __builtin_elementwise_add_sat(s1, s2); + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !cir.vector<4 x !s32i> + // LLVM: call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + vi1 = __builtin_elementwise_add_sat(vi1, vi2); + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !cir.vector<4 x !u32i> + // LLVM: call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + vu1 = __builtin_elementwise_add_sat(vu1, vu2); + + // CIR: cir.add sat %{{.*}}, %{{.*}} : !cir.vector<8 x !s16i> + // LLVM: call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + vs1 = __builtin_elementwise_add_sat(vs1, vs2); +} + +void test_builtin_elementwise_sub_sat(int i1, int i2, unsigned u1, unsigned u2, + short s1, short s2, vint4 vi1, vint4 vi2, + vuint4 vu1, vuint4 vu2, vshort8 vs1, + vshort8 vs2) { + // CIR-LABEL: test_builtin_elementwise_sub_sat + // LLVM-LABEL: test_builtin_elementwise_sub_sat + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !s32i + // LLVM: call i32 @llvm.ssub.sat.i32(i32 %{{.*}}, i32 %{{.*}}) + i1 = __builtin_elementwise_sub_sat(i1, i2); + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !u32i + // LLVM: call i32 @llvm.usub.sat.i32(i32 %{{.*}}, i32 %{{.*}}) + u1 = __builtin_elementwise_sub_sat(u1, u2); + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !s16i + // LLVM: call i16 @llvm.ssub.sat.i16(i16 %{{.*}}, i16 %{{.*}}) + s1 = __builtin_elementwise_sub_sat(s1, s2); + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !cir.vector<4 x !s32i> + // LLVM: call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + vi1 = __builtin_elementwise_sub_sat(vi1, vi2); + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !cir.vector<4 x !u32i> + // LLVM: call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + vu1 = __builtin_elementwise_sub_sat(vu1, vu2); + + // CIR: cir.sub sat %{{.*}}, %{{.*}} : !cir.vector<8 x !s16i> + // LLVM: call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) + vs1 = __builtin_elementwise_sub_sat(vs1, vs2); +} From 5cb869a9d7c81414d761391c12c05e6929202782 Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Tue, 23 Jun 2026 12:15:29 -0400 Subject: [PATCH 209/511] Additional test coverage for WG14 N3037 (#202674) There was a request for additional test coverage in: https://github.com/llvm/llvm-project/pull/201650#discussion_r3376475306 --- clang/test/C/C23/n3037.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/clang/test/C/C23/n3037.c b/clang/test/C/C23/n3037.c index 198f0487bead1..8dd46ef69b414 100644 --- a/clang/test/C/C23/n3037.c +++ b/clang/test/C/C23/n3037.c @@ -930,3 +930,21 @@ struct GH199417_2 { // c17-note {{previous definition is here}} struct GH199417_2 { enum GH199417_E2 { eGH199417 } u; // c23-note {{field 'u' has type 'enum GH199417_E2' here}} }; + +struct GH199417_3 { // c17-note {{previous definition is here}} + struct GH199417_4 { // c17-note {{previous definition is here}} + union { int i; } u; // c23-note-re {{field 'u' has type 'union (unnamed at {{.*}})' here}} + } a; // c23-note {{field 'a' has type 'struct GH199417_4' here}} +}; + +// c23-error@+2 {{type 'struct GH199417_3' has incompatible definitions}} +// c17-error@+1 {{redefinition of 'GH199417_3'}} +struct GH199417_3 { + // c23-error@+2 {{type 'struct GH199417_4' has incompatible definitions}} + // c17-error@+1 {{redefinition of 'GH199417_4'}} + struct GH199417_4 { + enum GH199417_E3 { eeGH199417 } u; // c23-note {{field 'u' has type 'enum GH199417_E3' here}} + // FIXME: the below diagnostic uses type 'int' because of error recovery, + // it would be better to print the original type. + } a; // c23-note {{field 'a' has type 'int' here}} +}; From 00d611c8232b1c8bb42882f4e1b81bb8492cce98 Mon Sep 17 00:00:00 2001 From: Jan Leyonberg Date: Tue, 23 Jun 2026 12:20:04 -0400 Subject: [PATCH 210/511] [CIR] Add cir.builtin_int_cast operation to replace uses of builtin.unrealized_conversion_cast (#201592) This patch adds a new operation builtin_int_cast to handle casting between CIR integer types and builtin integer types. This will replace the current use of the builtin.unrealized_conversion_cast since this operation is only intended to be used temporarily when doing transformations. Assisted-by: Cursor/Claude Opus 4.8 High --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 9 + clang/include/clang/CIR/Dialect/IR/CIROps.td | 38 +++ clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp | 4 +- clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp | 22 +- clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp | 4 +- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 46 ++++ .../Dialect/Transforms/CIRCanonicalize.cpp | 14 +- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 34 +++ clang/test/CIR/CodeGenOpenACC/cache.c | 24 +- clang/test/CIR/CodeGenOpenACC/combined-copy.c | 158 ++++++------- .../test/CIR/CodeGenOpenACC/combined-copy.cpp | 32 +-- .../combined-firstprivate-clause.cpp | 100 ++++---- .../combined-private-clause.cpp | 80 +++---- .../combined-reduction-clause-default-ops.cpp | 72 +++--- .../combined-reduction-clause-float.cpp | 48 ++-- .../combined-reduction-clause-inline-ops.cpp | 108 ++++----- .../combined-reduction-clause-int.cpp | 72 +++--- .../combined-reduction-clause-outline-ops.cpp | 108 ++++----- clang/test/CIR/CodeGenOpenACC/combined.cpp | 182 +++++++-------- clang/test/CIR/CodeGenOpenACC/compute-copy.c | 158 ++++++------- .../test/CIR/CodeGenOpenACC/compute-copy.cpp | 32 +-- .../compute-firstprivate-clause.c | 48 ++-- .../compute-firstprivate-clause.cpp | 100 ++++---- .../CodeGenOpenACC/compute-private-clause.c | 36 +-- .../CodeGenOpenACC/compute-private-clause.cpp | 80 +++---- .../compute-reduction-clause-default-ops.c | 72 +++--- .../compute-reduction-clause-default-ops.cpp | 72 +++--- .../compute-reduction-clause-float.c | 48 ++-- .../compute-reduction-clause-float.cpp | 48 ++-- .../compute-reduction-clause-inline-ops.cpp | 108 ++++----- .../compute-reduction-clause-int.c | 72 +++--- .../compute-reduction-clause-int.cpp | 72 +++--- .../compute-reduction-clause-outline-ops.cpp | 108 ++++----- .../compute-reduction-clause-unsigned-int.c | 72 +++--- clang/test/CIR/CodeGenOpenACC/data.c | 56 ++--- .../test/CIR/CodeGenOpenACC/declare-copy.cpp | 24 +- .../CIR/CodeGenOpenACC/declare-copyin.cpp | 56 ++--- .../CIR/CodeGenOpenACC/declare-copyout.cpp | 24 +- .../CIR/CodeGenOpenACC/declare-create.cpp | 56 ++--- .../CodeGenOpenACC/declare-deviceresident.cpp | 56 ++--- .../test/CIR/CodeGenOpenACC/declare-link.cpp | 24 +- .../CIR/CodeGenOpenACC/declare-present.cpp | 24 +- clang/test/CIR/CodeGenOpenACC/enter-data.c | 24 +- clang/test/CIR/CodeGenOpenACC/exit-data.c | 24 +- .../firstprivate-clause-recipes.cpp | 84 +++---- clang/test/CIR/CodeGenOpenACC/host_data.c | 4 +- clang/test/CIR/CodeGenOpenACC/init.c | 6 +- clang/test/CIR/CodeGenOpenACC/kernels.c | 116 +++++----- .../CodeGenOpenACC/loop-private-clause.cpp | 80 +++---- .../loop-reduction-clause-default-ops.cpp | 72 +++--- .../loop-reduction-clause-float.cpp | 48 ++-- .../loop-reduction-clause-inline-ops.cpp | 108 ++++----- .../loop-reduction-clause-int.cpp | 72 +++--- .../loop-reduction-clause-outline-ops.cpp | 108 ++++----- clang/test/CIR/CodeGenOpenACC/loop.cpp | 44 ++-- clang/test/CIR/CodeGenOpenACC/parallel.c | 138 +++++------ .../private-clause-array-recipes-CtorDtor.cpp | 64 +++--- .../private-clause-array-recipes-NoOps.cpp | 32 +-- ...-clause-pointer-array-recipes-CtorDtor.cpp | 216 +++++++++--------- ...ate-clause-pointer-array-recipes-NoOps.cpp | 132 +++++------ ...ivate-clause-pointer-array-recipes-int.cpp | 48 ++-- ...rivate-clause-pointer-recipes-CtorDtor.cpp | 68 +++--- .../private-clause-pointer-recipes-NoOps.cpp | 44 ++-- .../private-clause-pointer-recipes-int.cpp | 20 +- .../reduction-clause-recipes.cpp | 120 +++++----- clang/test/CIR/CodeGenOpenACC/serial.c | 56 ++--- clang/test/CIR/CodeGenOpenACC/set.c | 10 +- clang/test/CIR/CodeGenOpenACC/shutdown.c | 6 +- clang/test/CIR/CodeGenOpenACC/update.c | 24 +- clang/test/CIR/CodeGenOpenACC/wait.c | 28 +-- clang/test/CIR/IR/builtin-int-cast.cir | 50 ++++ .../test/CIR/IR/invalid-builtin-int-cast.cir | 37 +++ clang/test/CIR/Lowering/builtin-int-cast.cir | 56 +++++ .../CIR/Transforms/builtin-int-cast-fold.cir | 29 +++ 74 files changed, 2430 insertions(+), 2139 deletions(-) create mode 100644 clang/test/CIR/IR/builtin-int-cast.cir create mode 100644 clang/test/CIR/IR/invalid-builtin-int-cast.cir create mode 100644 clang/test/CIR/Lowering/builtin-int-cast.cir create mode 100644 clang/test/CIR/Transforms/builtin-int-cast-fold.cir diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 0db205f8d5b79..777f239cc63ba 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -532,6 +532,15 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return createCast(cir::CastKind::integral, src, newTy); } + mlir::Value createBuiltinIntCast(mlir::Location loc, mlir::Value src, + mlir::Type newTy) { + return cir::BuiltinIntCastOp::create(*this, loc, newTy, src); + } + + mlir::Value createBuiltinIntCast(mlir::Value src, mlir::Type newTy) { + return createBuiltinIntCast(src.getLoc(), src, newTy); + } + mlir::Value createIntToPtr(mlir::Value src, mlir::Type newTy) { return createCast(cir::CastKind::int_to_ptr, src, newTy); } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index f4f22cd297ea6..f48c5c7ee5209 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -288,6 +288,44 @@ def CIR_CastOp : CIR_Op<"cast", [ } +//===----------------------------------------------------------------------===// +// BuiltinIntCastOp +//===----------------------------------------------------------------------===// + +def CIR_IntOrBuiltinIntType : AnyTypeOf<[CIR_IntType, AnyInteger, Index]>; + +def CIR_BuiltinIntCastOp : CIR_Op<"builtin_int_cast", [Pure]> { + let summary = "Cast between a CIR integer and a builtin integer"; + let description = [{ + Convert between a CIR integer type (`!cir.int`) and a builtin MLIR integer + type (`AnyInteger`, e.g. `i32`, `si32`, `ui32`) or `index`, and vice versa. + + This allows using operations from e.g. OpenMP or OpenACC dialects + that expect the builtin types with CIR operations. Casting can be done + in either direction. + + Example: + + ```mlir + // CIR integer cast to a builtin integer. + %0 = cir.builtin_int_cast %ciri : !cir.int -> i32 + + // Builtin induction variable / bound cast to CIR type. + %1 = cir.builtin_int_cast %iv : index -> !cir.int + ``` + }]; + + let arguments = (ins CIR_IntOrBuiltinIntType:$src); + let results = (outs CIR_IntOrBuiltinIntType:$result); + + let assemblyFormat = [{ + $src `:` type($src) `->` type($result) attr-dict + }]; + + let hasVerifier = 1; + let hasFolder = 1; +} + //===----------------------------------------------------------------------===// // DynamicCastOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp index e7bf3bcc85c0b..e9725940dd501 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACC.cpp @@ -48,9 +48,7 @@ mlir::Value CIRGenFunction::emitOpenACCIntExpr(const Expr *intExpr) { ? mlir::IntegerType::SignednessSemantics::Signed : mlir::IntegerType::SignednessSemantics::Unsigned); - auto conversionOp = mlir::UnrealizedConversionCastOp::create( - builder, exprLoc, targetType, expr); - return conversionOp.getResult(0); + return builder.createBuiltinIntCast(exprLoc, expr, targetType); } mlir::Value CIRGenFunction::createOpenACCConstantInt(mlir::Location loc, diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp index ea1fb2ce96edd..b82c854d2c03e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp @@ -125,9 +125,7 @@ mlir::Value OpenACCRecipeBuilderBase::makeBoundsAlloca( auto getUpperBound = [&](mlir::Value bound) { auto upperBoundVal = mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); - return mlir::UnrealizedConversionCastOp::create(builder, loc, itrTy, - upperBoundVal.getResult()) - .getResult(0); + return builder.createBuiltinIntCast(loc, upperBoundVal.getResult(), itrTy); }; auto isArrayTy = [&](QualType ty) { @@ -252,12 +250,12 @@ std::pair OpenACCRecipeBuilderBase::createBoundsLoop( // get the lower and upper bound for iterating over. auto lowerBoundVal = mlir::acc::GetLowerboundOp::create(builder, loc, idxType, bound); - auto lbConversion = mlir::UnrealizedConversionCastOp::create( - builder, loc, itrTy, lowerBoundVal.getResult()); + mlir::Value lbConversion = + builder.createBuiltinIntCast(loc, lowerBoundVal.getResult(), itrTy); auto upperBoundVal = mlir::acc::GetUpperboundOp::create(builder, loc, idxType, bound); - auto ubConversion = mlir::UnrealizedConversionCastOp::create( - builder, loc, itrTy, upperBoundVal.getResult()); + mlir::Value ubConversion = + builder.createBuiltinIntCast(loc, upperBoundVal.getResult(), itrTy); // Create a memory location for the iterator. auto itr = cir::AllocaOp::create(builder, loc, itrPtrTy, "iter", itrAlign); @@ -266,20 +264,18 @@ std::pair OpenACCRecipeBuilderBase::createBoundsLoop( if (inverse) { cir::ConstantOp constOne = builder.getConstInt(loc, itrTy, 1); - auto sub = - cir::SubOp::create(builder, loc, ubConversion.getResult(0), constOne); + auto sub = cir::SubOp::create(builder, loc, ubConversion, constOne); // Upperbound is exclusive, so subtract 1. builder.CIRBaseBuilderTy::createStore(loc, sub, itr); } else { // Lowerbound is inclusive, so we can include it. - builder.CIRBaseBuilderTy::createStore(loc, lbConversion.getResult(0), - itr); + builder.CIRBaseBuilderTy::createStore(loc, lbConversion, itr); } // Save the 'end' iterator based on whether we are inverted or not. This // end iterator never changes, so we can just get it and convert it, so no // need to store/load/etc. - auto endItr = inverse ? lbConversion : ubConversion; + mlir::Value endItr = inverse ? lbConversion : ubConversion; builder.createFor( loc, @@ -289,7 +285,7 @@ std::pair OpenACCRecipeBuilderBase::createBoundsLoop( // Use 'not equal' since we are just doing an increment/decrement. auto cmp = builder.createCompare( loc, inverse ? cir::CmpOpKind::ge : cir::CmpOpKind::lt, loadCur, - endItr.getResult(0)); + endItr); builder.createCondition(cmp); }, /*bodyBuilder=*/ diff --git a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp index 71cbed1be6471..897fa419cd331 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp @@ -187,9 +187,7 @@ CIRGenFunction::emitOpenACCWaitConstruct(const OpenACCWaitConstruct &s) { ? mlir::IntegerType::SignednessSemantics::Signed : mlir::IntegerType::SignednessSemantics::Unsigned); - auto conversionOp = mlir::UnrealizedConversionCastOp::create( - builder, exprLoc, targetType, expr); - return conversionOp.getResult(0); + return builder.createBuiltinIntCast(exprLoc, expr, targetType); }; // Emit the correct 'wait' clauses. diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index f374e9beb2068..bf7efc2172fe9 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -17,6 +17,7 @@ #include "clang/CIR/Dialect/IR/CIRTypes.h" #include "mlir/IR/Attributes.h" +#include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/IR/Value.h" @@ -969,6 +970,51 @@ OpFoldResult cir::CastOp::fold(FoldAdaptor adaptor) { return {}; } +//===----------------------------------------------------------------------===// +// BuiltinIntCastOp +//===----------------------------------------------------------------------===// + +LogicalResult cir::BuiltinIntCastOp::verify() { + mlir::Type srcType = getSrc().getType(); + mlir::Type resType = getType(); + + auto srcCirInt = mlir::dyn_cast(srcType); + auto resCirInt = mlir::dyn_cast(resType); + + // One side must be a CIR integer the other must be a builtin + // integer or index type. + if (static_cast(srcCirInt) == static_cast(resCirInt)) + return emitOpError() + << "requires exactly one '!cir.int' operand or result; the other " + "must be a builtin integer or 'index' type"; + + mlir::Type builtinType = srcCirInt ? resType : srcType; + if (!mlir::isa(builtinType)) + return emitOpError() << "requires a builtin integer or 'index' type on the " + "non-CIR side"; + + // The cast preserves bit width. 'index' has no fixed width, so only check + // when the builtin side is a fixed-width integer. + if (auto builtinInt = mlir::dyn_cast(builtinType)) { + cir::IntType cirInt = srcCirInt ? srcCirInt : resCirInt; + if (cirInt.getWidth() != builtinInt.getWidth()) + return emitOpError() + << "requires the CIR and builtin integer types to have the same " + "width; use 'cir.cast' for width conversions"; + } + + return success(); +} + +OpFoldResult cir::BuiltinIntCastOp::fold(FoldAdaptor adaptor) { + // Fold: builtin_int_cast(builtin_int_cast(x)) -> x + // Inner source type must match the cast's result type. + if (auto inner = getSrc().getDefiningOp()) + if (inner.getSrc().getType() == getType()) + return inner.getSrc(); + return {}; +} + //===----------------------------------------------------------------------===// // CallOp //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp index a88b0accef57c..b4890a1b5bf37 100644 --- a/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CIRCanonicalize.cpp @@ -70,13 +70,13 @@ void CIRCanonicalizePass::runOnOperation() { // Many operations are here to perform a manual `fold` in // applyOpPatternsGreedily. - if (isa(op)) + if (isa(op)) ops.push_back(op); }); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 27eba4ee326a5..af729e95c7709 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -1436,6 +1436,40 @@ mlir::LogicalResult CIRToLLVMCastOpLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMBuiltinIntCastOpLowering::matchAndRewrite( + cir::BuiltinIntCastOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + // Both the CIR integer and the builtin integer/index lower to LLVM integer + // types, so this cast becomes an integer resize. Signedness is taken from + // the CIR integer side (the builtin/index side is treated as signless). + bool isUnsigned = true; + if (auto cirSrc = mlir::dyn_cast(op.getSrc().getType())) + isUnsigned = cirSrc.isUnsigned(); + else if (auto cirDst = mlir::dyn_cast(op.getType())) + isUnsigned = cirDst.isUnsigned(); + + mlir::Value llvmSrc = adaptor.getSrc(); + mlir::Type llvmDstTy = getTypeConverter()->convertType(op.getType()); + auto srcIntTy = mlir::cast(llvmSrc.getType()); + auto dstIntTy = mlir::cast(llvmDstTy); + unsigned srcWidth = srcIntTy.getWidth(); + unsigned dstWidth = dstIntTy.getWidth(); + + // Fixed-width builtin integers must match the CIR integer width. + // If the converted LLVM widths differ, the non-CIR side must have been + // 'index' type (target dependent width). + assert((srcWidth == dstWidth || + mlir::isa(op.getSrc().getType()) || + mlir::isa(op.getType())) && + "only index casts may change width during lowering"); + + // For equal widths getLLVMIntCast returns the source unchanged, so casts + // between CIR integers and fixed-width builtin integers lower to a no-op. + rewriter.replaceOp(op, getLLVMIntCast(rewriter, llvmSrc, dstIntTy, isUnsigned, + srcWidth, dstWidth)); + return mlir::success(); +} + static mlir::Value convertToIndexTy(mlir::ConversionPatternRewriter &rewriter, mlir::ModuleOp mod, mlir::Value index, mlir::Type baseTy, cir::IntType strideTy) { diff --git a/clang/test/CIR/CodeGenOpenACC/cache.c b/clang/test/CIR/CodeGenOpenACC/cache.c index 8920d0037a125..c67f0dd4d2f3a 100644 --- a/clang/test/CIR/CodeGenOpenACC/cache.c +++ b/clang/test/CIR/CodeGenOpenACC/cache.c @@ -27,7 +27,7 @@ void acc_cache() { } } // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -35,9 +35,9 @@ void acc_cache() { // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -52,7 +52,7 @@ void acc_cache() { #pragma acc cache(iArr[1], fArr[1:5]) } // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -60,9 +60,9 @@ void acc_cache() { // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -78,7 +78,7 @@ void acc_cache() { } // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -86,9 +86,9 @@ void acc_cache() { // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -108,7 +108,7 @@ void acc_cache() { } // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -116,9 +116,9 @@ void acc_cache() { // CHECK-NEXT: %[[CACHE1:.*]] = acc.cache varPtr(%[[IARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) -> !cir.ptr> {name = "iArr[1]", structured = false} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE:.*]] = cir.const #cir.int<5> - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[FIVE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.c b/clang/test/CIR/CodeGenOpenACC/combined-copy.c index 933579e3054b4..a9277667af2b4 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.c @@ -145,7 +145,7 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localVar1, localVar2) async(1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr, !cir.ptr) async(%[[ONE_CAST]] : si32) { @@ -160,7 +160,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop async(1) copy(localVar1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) { // CHECK-NEXT: acc.loop combined(parallel) { @@ -184,7 +184,7 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localVar1) device_type(nvidia, radeon) async(1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { @@ -208,9 +208,9 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localVar1) async(0) device_type(nvidia, radeon) async(1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i - // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CAST:.*]] = cir.builtin_int_cast %[[ZERO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(serial) { @@ -223,7 +223,7 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localVar1) async device_type(nvidia, radeon) async(1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { @@ -236,7 +236,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localVar1) async(1) device_type(nvidia, radeon) async for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) { @@ -250,9 +250,9 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localVar1) async(0) device_type(nvidia, radeon) async(1) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i - // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CAST:.*]] = cir.builtin_int_cast %[[ZERO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(serial) { @@ -265,7 +265,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArray[3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -282,9 +282,9 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localArray[1:3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -301,7 +301,7 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -317,7 +317,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArray[1:]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -334,9 +334,9 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localArray[localVar1:localVar2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -353,7 +353,7 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -369,7 +369,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArray[localVar1:]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -386,7 +386,7 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localPointer[3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -403,9 +403,9 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localPointer[1:3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -422,7 +422,7 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -438,9 +438,9 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localPointer[localVar1:localVar2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -457,7 +457,7 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -474,7 +474,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArrayOfPtrs[3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -491,13 +491,13 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localArrayOfPtrs[3][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -514,9 +514,9 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localArrayOfPtrs[localVar1:localVar2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -532,7 +532,7 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArrayOfPtrs[localVar1:]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -550,7 +550,7 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -566,7 +566,7 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(localArrayOfPtrs[localVar1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -583,13 +583,13 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(localArrayOfPtrs[localVar1][localVar2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -606,14 +606,14 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localArrayOfPtrs[localVar1][localVar2:parmVar]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[PV:.*]] = cir.load{{.*}} %[[PARM]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[PV_CAST:.*]] = cir.builtin_int_cast %[[PV]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -631,12 +631,12 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[PV:.*]] = cir.load{{.*}} %[[PARM]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[PV_CAST:.*]] = cir.builtin_int_cast %[[PV]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -654,14 +654,14 @@ void acc_compute(int parmVar) { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -677,16 +677,16 @@ void acc_compute(int parmVar) { #pragma acc serial loop copy(localArrayOfPtrs[localVar1:localVar2][1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -703,19 +703,19 @@ void acc_compute(int parmVar) { #pragma acc kernels loop copy(threeDArray[1][2][3]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -732,23 +732,23 @@ void acc_compute(int parmVar) { #pragma acc parallel loop copy(threeDArray[1:1][2:1][3:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -813,7 +813,7 @@ void acc_compute_members() { #pragma acc parallel loop copy(localStruct.arrayMember[2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -831,9 +831,9 @@ void acc_compute_members() { #pragma acc serial loop copy(localStruct.arrayMember[1:2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[TWO_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -850,7 +850,7 @@ void acc_compute_members() { #pragma acc kernels loop copy(localStruct.arrayMember[1:]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -869,7 +869,7 @@ void acc_compute_members() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[TWO_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) @@ -898,13 +898,13 @@ void acc_compute_members() { #pragma acc kernels loop copy(localStruct.twoDArrayMember[3][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -922,16 +922,16 @@ void acc_compute_members() { #pragma acc parallel loop copy(localStruct.twoDArrayMember[1:3][1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -960,13 +960,13 @@ void acc_compute_members() { #pragma acc kernels loop copy(localStruct.ptrArrayMember[3][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -984,16 +984,16 @@ void acc_compute_members() { #pragma acc parallel loop copy(localStruct.ptrArrayMember[1:3][1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -1022,13 +1022,13 @@ void acc_compute_members() { #pragma acc kernels loop copy(localStruct.ptrPtrMember[3][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -1046,16 +1046,16 @@ void acc_compute_members() { #pragma acc parallel loop copy(localStruct.ptrPtrMember[1:3][1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/combined-copy.cpp b/clang/test/CIR/CodeGenOpenACC/combined-copy.cpp index ac1ba566e1a7f..46727dea1a4d6 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-copy.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-copy.cpp @@ -30,7 +30,7 @@ void InlineFunc() { #pragma acc kernels loop copy(arrayMember[2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -48,13 +48,13 @@ void InlineFunc() { #pragma acc kernels loop copy(twoDArrayMember[1][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -97,7 +97,7 @@ void InlineFunc() { #pragma acc serial loop copy(iSTy.Member[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -128,7 +128,7 @@ void InlineFunc() { #pragma acc kernels loop copy(this->arrayMember[2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -145,13 +145,13 @@ void InlineFunc() { #pragma acc kernels loop copy(this->twoDArrayMember[1][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -194,7 +194,7 @@ void InlineFunc() { #pragma acc serial loop copy(this->iSTy.Member[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -238,7 +238,7 @@ void StructTy::OutlineFunc() { #pragma acc kernels loop copy(arrayMember[2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -255,13 +255,13 @@ void StructTy::OutlineFunc() { #pragma acc kernels loop copy(twoDArrayMember[1][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -303,7 +303,7 @@ void StructTy::OutlineFunc() { #pragma acc serial loop copy(iSTy.Member[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -333,7 +333,7 @@ void StructTy::OutlineFunc() { #pragma acc kernels loop copy(this->arrayMember[2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -351,13 +351,13 @@ void StructTy::OutlineFunc() { #pragma acc kernels loop copy(this->twoDArrayMember[1][2]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -400,7 +400,7 @@ void StructTy::OutlineFunc() { #pragma acc serial loop copy(this->iSTy.Member[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 diff --git a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp index 31f60aa6a1d05..7a3932a3fc2cb 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-firstprivate-clause.cpp @@ -93,9 +93,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -129,9 +129,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -165,9 +165,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -200,9 +200,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -235,9 +235,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -270,9 +270,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -299,9 +299,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -435,7 +435,7 @@ extern "C" void acc_combined() { #pragma acc serial loop firstprivate(someIntArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -450,7 +450,7 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(someFloatArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -465,7 +465,7 @@ extern "C" void acc_combined() { #pragma acc serial loop firstprivate(noCopyArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -480,7 +480,7 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(hasCopyArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -495,7 +495,7 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(notDefCtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -510,7 +510,7 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(dtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -525,42 +525,42 @@ extern "C" void acc_combined() { #pragma acc serial loop firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -581,9 +581,9 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(someIntArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -597,9 +597,9 @@ extern "C" void acc_combined() { #pragma acc serial loop firstprivate(someFloatArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -613,9 +613,9 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(noCopyArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -629,9 +629,9 @@ extern "C" void acc_combined() { #pragma acc serial loop firstprivate(hasCopyArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -645,9 +645,9 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(notDefCtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -661,9 +661,9 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(dtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -677,49 +677,49 @@ extern "C" void acc_combined() { #pragma acc parallel loop firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp index c7bd7b86b1b61..6a8416d5d7e45 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-private-clause.cpp @@ -96,9 +96,9 @@ struct HasDtor { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -130,9 +130,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -266,7 +266,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.serial combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -281,7 +281,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -296,7 +296,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.serial combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -311,7 +311,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -326,7 +326,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -341,7 +341,7 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -356,42 +356,42 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.serial combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -412,9 +412,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -428,9 +428,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.serial combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -444,9 +444,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -460,9 +460,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.serial combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -476,9 +476,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -492,9 +492,9 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -508,49 +508,49 @@ extern "C" void acc_combined() { for(int i = 0; i < 5; ++i); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp index 1993646e61b08..5966c57d8a79e 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp @@ -1301,9 +1301,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1342,9 +1342,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1407,9 +1407,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1448,9 +1448,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1513,9 +1513,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1554,9 +1554,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1661,9 +1661,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1702,9 +1702,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1809,9 +1809,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1844,9 +1844,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1897,9 +1897,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1932,9 +1932,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1985,9 +1985,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2020,9 +2020,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2073,9 +2073,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2114,9 +2114,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2219,9 +2219,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2260,9 +2260,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp index 921c75ddf404a..23cae4113dac9 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-float.cpp @@ -404,9 +404,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -432,9 +432,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -468,9 +468,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -496,9 +496,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -532,9 +532,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -560,9 +560,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -603,9 +603,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -631,9 +631,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -674,9 +674,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -702,9 +702,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -748,9 +748,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -776,9 +776,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp index 018807f626e45..079c3db68f5d3 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-inline-ops.cpp @@ -1399,9 +1399,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1440,9 +1440,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1469,9 +1469,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1502,9 +1502,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1543,9 +1543,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1572,9 +1572,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1605,9 +1605,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1646,9 +1646,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1681,9 +1681,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1714,9 +1714,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1755,9 +1755,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1790,9 +1790,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1823,9 +1823,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1864,9 +1864,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1893,9 +1893,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1926,9 +1926,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1967,9 +1967,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1996,9 +1996,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2029,9 +2029,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2070,9 +2070,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2099,9 +2099,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2132,9 +2132,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2173,9 +2173,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2205,9 +2205,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2238,9 +2238,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2279,9 +2279,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2311,9 +2311,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp index d347616452bae..4d72e5a28d4f3 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-int.cpp @@ -570,9 +570,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -598,9 +598,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -634,9 +634,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -662,9 +662,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -698,9 +698,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -726,9 +726,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -769,9 +769,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -797,9 +797,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -840,9 +840,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -868,9 +868,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -904,9 +904,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -932,9 +932,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -968,9 +968,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -996,9 +996,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1032,9 +1032,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1060,9 +1060,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1106,9 +1106,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1134,9 +1134,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp index a1e50feaa1e78..0bf5e790cebed 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-outline-ops.cpp @@ -1398,9 +1398,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1439,9 +1439,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1468,9 +1468,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1501,9 +1501,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1542,9 +1542,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1571,9 +1571,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1604,9 +1604,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1645,9 +1645,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1680,9 +1680,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1713,9 +1713,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1754,9 +1754,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1789,9 +1789,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1822,9 +1822,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1863,9 +1863,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1892,9 +1892,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1925,9 +1925,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1966,9 +1966,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1995,9 +1995,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2028,9 +2028,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2069,9 +2069,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2098,9 +2098,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2131,9 +2131,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2172,9 +2172,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2204,9 +2204,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2237,9 +2237,9 @@ void acc_combined() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2278,9 +2278,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2310,9 +2310,9 @@ void acc_combined() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/combined.cpp b/clang/test/CIR/CodeGenOpenACC/combined.cpp index d0c78d0cc34aa..cd3496ebc77b8 100644 --- a/clang/test/CIR/CodeGenOpenACC/combined.cpp +++ b/clang/test/CIR/CodeGenOpenACC/combined.cpp @@ -351,7 +351,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.parallel combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 @@ -365,9 +365,9 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV2:.*]] = cir.builtin_int_cast %[[N_LOAD2]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) gang({num=%[[N_CONV]] : si32}, {num=%[[N_CONV2]] : si32} [#acc.device_type], {num=%[[N_CONV2]] : si32} [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -377,7 +377,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: acc.loop combined(kernels) gang({static=%[[N_CONV]] : si32}, {static=%[[STAR_CONST]] : i64} [#acc.device_type]) { // CHECK: acc.yield @@ -388,16 +388,16 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[CIR_ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[CIR_ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[CIR_TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i // CHECK-NEXT: %[[N_PLUS_TWO:.*]] = cir.add nsw %[[N_LOAD3]], %[[CIR_TWO_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_TWO]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_TWO]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) gang({static=%[[N_CONV]] : si32, num=%[[N_PLUS_ONE_CONV]] : si32}, {static=%[[STAR_CONST]] : i64, num=%[[N_PLUS_TWO_CONV]] : si32} [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -417,7 +417,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -437,7 +437,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type, #acc.device_type], %[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -448,7 +448,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type], %[[N_CONV]] : si32 [#acc.device_type], %[[N_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -459,11 +459,11 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -476,7 +476,7 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.terminator // CHECK-NEXT: } loc @@ -504,7 +504,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -524,7 +524,7 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) vector([#acc.device_type, #acc.device_type], %[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -535,11 +535,11 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -552,7 +552,7 @@ extern "C" void acc_combined(int N, int cond) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -563,13 +563,13 @@ extern "C" void acc_combined(int N, int cond) { for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: acc.kernels combined(loop) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV2:.*]] = cir.builtin_int_cast %[[N_LOAD2]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV3:.*]] = cir.builtin_int_cast %[[N_LOAD3]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV4:.*]] = cir.builtin_int_cast %[[N_LOAD4]] : !s32i -> si32 // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -597,7 +597,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop wait(1) device_type(nvidia) wait for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) wait([#acc.device_type], {%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -608,7 +608,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop wait device_type(nvidia) wait(1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) wait([#acc.device_type], {%[[ONE_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -619,9 +619,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc serial loop wait(1) device_type(nvidia) wait(1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL2]] : !s32i -> si32 // CHECK-NEXT: acc.serial combined(loop) wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(serial) { // CHECK: acc.yield @@ -632,9 +632,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop wait(devnum: cond : 1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -645,13 +645,13 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -662,11 +662,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc serial loop wait(devnum: cond : 1, 2) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(serial) { // CHECK: acc.yield @@ -677,17 +677,17 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST2:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -698,9 +698,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop wait(cond, 1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -711,9 +711,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc serial loop wait(queues: cond, 1) device_type(radeon) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial combined(loop) wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(serial) { // CHECK: acc.yield @@ -724,7 +724,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_gangs(1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -735,7 +735,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop num_gangs(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) num_gangs({%[[CONV_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -746,11 +746,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_gangs(1, cond, 2) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -761,9 +761,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop num_gangs(1) device_type(radeon) num_gangs(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) num_gangs({%[[ONE_CAST]] : si32}, {%[[CONV_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -774,17 +774,17 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_gangs(1, cond, 2) device_type(radeon) num_gangs(4, 5, 6) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i - // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_CAST:.*]] = cir.builtin_int_cast %[[SIX_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type]) // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -795,17 +795,17 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_gangs(1, cond, 2) device_type(radeon, nvidia) num_gangs(4, 5, 6) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i - // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_CAST:.*]] = cir.builtin_int_cast %[[SIX_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type], {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type]) // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -816,7 +816,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_workers(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -827,9 +827,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2u) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -840,11 +840,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -855,11 +855,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(4) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -870,9 +870,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -883,7 +883,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop vector_length(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -894,9 +894,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2u) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -907,11 +907,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -922,11 +922,11 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield @@ -937,9 +937,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -959,7 +959,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc serial loop async(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.serial combined(loop) async(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.loop combined(serial) { // CHECK: acc.yield @@ -979,9 +979,9 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc parallel loop async(3) device_type(nvidia, radeon) async(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel combined(loop) async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(parallel) { // CHECK: acc.yield @@ -992,7 +992,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc serial loop async device_type(nvidia, radeon) async(cond) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.serial combined(loop) async([#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.loop combined(serial) { // CHECK: acc.yield @@ -1003,7 +1003,7 @@ extern "C" void acc_combined(int N, int cond) { #pragma acc kernels loop async(3) device_type(nvidia, radeon) async for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type, #acc.device_type], %[[THREE_CAST]] : si32) { // CHECK-NEXT: acc.loop combined(kernels) { // CHECK: acc.yield diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.c b/clang/test/CIR/CodeGenOpenACC/compute-copy.c index f35bfe6f996e0..ab1721754ec56 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-copy.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.c @@ -122,7 +122,7 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localVar1, localVar2) async(1) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LOCAL2]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar2"} loc // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr, !cir.ptr) async(%[[ONE_CAST]] : si32) { @@ -134,7 +134,7 @@ void acc_compute(int parmVar) { #pragma acc parallel async(1) copy(localVar1) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32) { // CHECK-NEXT: acc.yield @@ -152,7 +152,7 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localVar1) device_type(nvidia, radeon) async(1) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator @@ -170,9 +170,9 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) async(1) ; // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i - // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CAST:.*]] = cir.builtin_int_cast %[[ZERO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield @@ -182,7 +182,7 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localVar1) async device_type(nvidia, radeon) async(1) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.kernels dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator @@ -192,7 +192,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localVar1) async(1) device_type(nvidia, radeon) async ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.parallel dataOperands(%[[COPYIN1]] : !cir.ptr) async([#acc.device_type, #acc.device_type], %[[ONE_CAST]] : si32) { @@ -203,9 +203,9 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localVar1) async(0) device_type(nvidia, radeon) async(1) ; // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i - // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CAST:.*]] = cir.builtin_int_cast %[[ZERO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[LOCAL1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "localVar1"} loc // CHECK-NEXT: acc.serial dataOperands(%[[COPYIN1]] : !cir.ptr) async(%[[ZERO_CAST]] : si32, %[[ONE_CAST]] : si32 [#acc.device_type], %[[ONE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield @@ -215,7 +215,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArray[3]) ; // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -229,9 +229,9 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localArray[1:3]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -245,7 +245,7 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -258,7 +258,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArray[1:]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -271,9 +271,9 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localArray[localVar1:localVar2]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -287,7 +287,7 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -300,7 +300,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArray[localVar1:]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -314,7 +314,7 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localPointer[3]) ; // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -328,9 +328,9 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localPointer[1:3]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -344,7 +344,7 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -357,9 +357,9 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localPointer[localVar1:localVar2]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -373,7 +373,7 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -387,7 +387,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArrayOfPtrs[3]) ; // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -400,13 +400,13 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localArrayOfPtrs[3][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -420,9 +420,9 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localArrayOfPtrs[localVar1:localVar2]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -435,7 +435,7 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArrayOfPtrs[localVar1:]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -450,7 +450,7 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc @@ -463,7 +463,7 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(localArrayOfPtrs[localVar1]) ; // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -477,13 +477,13 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(localArrayOfPtrs[localVar1][localVar2]) ; // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -497,14 +497,14 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localArrayOfPtrs[localVar1][localVar2:parmVar]) ; // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[PV:.*]] = cir.load{{.*}} %[[PARM]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[PV_CAST:.*]] = cir.builtin_int_cast %[[PV]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LV2_CAST]] : si16) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -519,12 +519,12 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[PV:.*]] = cir.load{{.*}} %[[PARM]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[PV_CAST:.*]] = builtin.unrealized_conversion_cast %[[PV]] : !s32i to si32 + // CHECK-NEXT: %[[PV_CAST:.*]] = cir.builtin_int_cast %[[PV]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[PV_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -539,14 +539,14 @@ void acc_compute(int parmVar) { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -559,16 +559,16 @@ void acc_compute(int parmVar) { #pragma acc serial copy(localArrayOfPtrs[localVar1:localVar2][1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) loc // CHECK-NEXT: %[[LV1:.*]] = cir.load{{.*}} %[[LOCAL1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[LV1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV1]] : !s32i to si32 + // CHECK-NEXT: %[[LV1_CAST:.*]] = cir.builtin_int_cast %[[LV1]] : !s32i -> si32 // CHECK-NEXT: %[[LV2:.*]] = cir.load{{.*}} %[[LOCAL2]] : !cir.ptr, !s16i - // CHECK-NEXT: %[[LV2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LV2]] : !s16i to si16 + // CHECK-NEXT: %[[LV2_CAST:.*]] = cir.builtin_int_cast %[[LV2]] : !s16i -> si16 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[LV1_CAST]] : si32) extent(%[[LV2_CAST]] : si16) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -582,19 +582,19 @@ void acc_compute(int parmVar) { #pragma acc kernels copy(threeDArray[1][2][3]) ; // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -608,23 +608,23 @@ void acc_compute(int parmVar) { #pragma acc parallel copy(threeDArray[1:1][2:1][3:1]) ; // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[THREE_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS3:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) loc @@ -677,7 +677,7 @@ void acc_compute_members() { #pragma acc parallel copy(localStruct.arrayMember[2]) ; // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -692,9 +692,9 @@ void acc_compute_members() { #pragma acc serial copy(localStruct.arrayMember[1:2]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[TWO_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -708,7 +708,7 @@ void acc_compute_members() { #pragma acc kernels copy(localStruct.arrayMember[1:]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_CONST:.*]] = arith.constant 4 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 @@ -724,7 +724,7 @@ void acc_compute_members() { ; // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST2:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CONST]] : i64) extent(%[[TWO_CAST]] : si32) stride(%[[ONE_CONST]] : i64) startIdx(%[[ZERO_CONST2]] : i64) @@ -747,13 +747,13 @@ void acc_compute_members() { #pragma acc kernels copy(localStruct.twoDArrayMember[3][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -768,16 +768,16 @@ void acc_compute_members() { #pragma acc parallel copy(localStruct.twoDArrayMember[1:3][1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -800,13 +800,13 @@ void acc_compute_members() { #pragma acc kernels copy(localStruct.ptrArrayMember[3][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -821,16 +821,16 @@ void acc_compute_members() { #pragma acc parallel copy(localStruct.ptrArrayMember[1:3][1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -853,13 +853,13 @@ void acc_compute_members() { #pragma acc kernels copy(localStruct.ptrPtrMember[3][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -874,16 +874,16 @@ void acc_compute_members() { #pragma acc parallel copy(localStruct.ptrPtrMember[1:3][1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[THREE:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS2:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[THREE_CAST]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-copy.cpp b/clang/test/CIR/CodeGenOpenACC/compute-copy.cpp index 746ce9d3f16eb..4dd9013c28c97 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-copy.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-copy.cpp @@ -27,7 +27,7 @@ void InlineFunc() { #pragma acc kernels copy(arrayMember[2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -42,13 +42,13 @@ void InlineFunc() { #pragma acc kernels copy(twoDArrayMember[1][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -82,7 +82,7 @@ void InlineFunc() { #pragma acc serial copy(iSTy.Member[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -107,7 +107,7 @@ void InlineFunc() { #pragma acc kernels copy(this->arrayMember[2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -121,13 +121,13 @@ void InlineFunc() { #pragma acc kernels copy(this->twoDArrayMember[1][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -161,7 +161,7 @@ void InlineFunc() { #pragma acc serial copy(this->iSTy.Member[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -199,7 +199,7 @@ void StructTy::OutlineFunc() { #pragma acc kernels copy(arrayMember[2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -213,13 +213,13 @@ void StructTy::OutlineFunc() { #pragma acc kernels copy(twoDArrayMember[1][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -252,7 +252,7 @@ void StructTy::OutlineFunc() { #pragma acc serial copy(iSTy.Member[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -276,7 +276,7 @@ void StructTy::OutlineFunc() { #pragma acc kernels copy(this->arrayMember[2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -291,13 +291,13 @@ void StructTy::OutlineFunc() { #pragma acc kernels copy(this->twoDArrayMember[1][2]) ; // CHECK-NEXT: %[[TWO:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS1:.*]] = acc.bounds lowerbound(%[[TWO_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 @@ -331,7 +331,7 @@ void StructTy::OutlineFunc() { #pragma acc serial copy(this->iSTy.Member[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 : i64 diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c index 82f8626c3428a..8eee0d2253ccb 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.c @@ -42,9 +42,9 @@ struct NoCopyConstruct {}; // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -78,9 +78,9 @@ struct NoCopyConstruct {}; // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -114,9 +114,9 @@ struct NoCopyConstruct {}; // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -191,7 +191,7 @@ void acc_compute() { #pragma acc serial firstprivate(someIntArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -203,7 +203,7 @@ void acc_compute() { #pragma acc parallel firstprivate(someFloatArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -215,7 +215,7 @@ void acc_compute() { #pragma acc serial firstprivate(noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -227,21 +227,21 @@ void acc_compute() { #pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -256,9 +256,9 @@ void acc_compute() { #pragma acc parallel firstprivate(someIntArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -269,9 +269,9 @@ void acc_compute() { #pragma acc serial firstprivate(someFloatArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -282,9 +282,9 @@ void acc_compute() { #pragma acc parallel firstprivate(noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -295,25 +295,25 @@ void acc_compute() { #pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp index 9d9d7806c09d3..bce93ce65e61f 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-firstprivate-clause.cpp @@ -94,9 +94,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -130,9 +130,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -166,9 +166,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -201,9 +201,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -236,9 +236,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -271,9 +271,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -300,9 +300,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -415,7 +415,7 @@ extern "C" void acc_compute() { #pragma acc serial firstprivate(someIntArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -427,7 +427,7 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(someFloatArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -439,7 +439,7 @@ extern "C" void acc_compute() { #pragma acc serial firstprivate(noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -451,7 +451,7 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(hasCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -463,7 +463,7 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(notDefCtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -475,7 +475,7 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(dtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -487,42 +487,42 @@ extern "C" void acc_compute() { #pragma acc serial firstprivate(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -540,9 +540,9 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(someIntArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -553,9 +553,9 @@ extern "C" void acc_compute() { #pragma acc serial firstprivate(someFloatArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -566,9 +566,9 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -579,9 +579,9 @@ extern "C" void acc_compute() { #pragma acc serial firstprivate(hasCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -592,9 +592,9 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(notDefCtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -605,9 +605,9 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(dtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -618,49 +618,49 @@ extern "C" void acc_compute() { #pragma acc parallel firstprivate(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.firstprivate varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.firstprivate varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.firstprivate varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.firstprivate varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.firstprivate varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@firstprivatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c index 54e27d905c12d..0ce0c3932584b 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.c @@ -94,7 +94,7 @@ void acc_compute() { #pragma acc serial private(someIntArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -106,7 +106,7 @@ void acc_compute() { #pragma acc parallel private(someFloatArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -118,7 +118,7 @@ void acc_compute() { #pragma acc serial private(noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -130,21 +130,21 @@ void acc_compute() { #pragma acc serial private(someIntArr[1], someFloatArr[1], noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -159,9 +159,9 @@ void acc_compute() { #pragma acc parallel private(someIntArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -172,9 +172,9 @@ void acc_compute() { #pragma acc serial private(someFloatArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -185,9 +185,9 @@ void acc_compute() { #pragma acc parallel private(noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -198,25 +198,25 @@ void acc_compute() { #pragma acc parallel private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp index 1f533fa65b96b..bb5cf4a795764 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-private-clause.cpp @@ -85,9 +85,9 @@ struct HasDtor { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -118,9 +118,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -232,7 +232,7 @@ extern "C" void acc_compute() { #pragma acc serial private(someIntArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -244,7 +244,7 @@ extern "C" void acc_compute() { #pragma acc parallel private(someFloatArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -256,7 +256,7 @@ extern "C" void acc_compute() { #pragma acc serial private(noCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -268,7 +268,7 @@ extern "C" void acc_compute() { #pragma acc parallel private(hasCopyArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -280,7 +280,7 @@ extern "C" void acc_compute() { #pragma acc parallel private(notDefCtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -292,7 +292,7 @@ extern "C" void acc_compute() { #pragma acc parallel private(dtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -304,42 +304,42 @@ extern "C" void acc_compute() { #pragma acc serial private(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -357,9 +357,9 @@ extern "C" void acc_compute() { #pragma acc parallel private(someIntArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -370,9 +370,9 @@ extern "C" void acc_compute() { #pragma acc serial private(someFloatArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -383,9 +383,9 @@ extern "C" void acc_compute() { #pragma acc parallel private(noCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -396,9 +396,9 @@ extern "C" void acc_compute() { #pragma acc serial private(hasCopyArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -409,9 +409,9 @@ extern "C" void acc_compute() { #pragma acc parallel private(notDefCtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -422,9 +422,9 @@ extern "C" void acc_compute() { #pragma acc parallel private(dtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -435,49 +435,49 @@ extern "C" void acc_compute() { #pragma acc parallel private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) ; // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c index 1c240630971ed..cee253c1a0e2e 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.c @@ -1334,9 +1334,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1375,9 +1375,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1440,9 +1440,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1481,9 +1481,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1546,9 +1546,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1587,9 +1587,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1698,9 +1698,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1739,9 +1739,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1850,9 +1850,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1885,9 +1885,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1938,9 +1938,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1973,9 +1973,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2026,9 +2026,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2061,9 +2061,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2114,9 +2114,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2155,9 +2155,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2263,9 +2263,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2304,9 +2304,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp index b3cf9260eb395..2fc31640949b9 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-default-ops.cpp @@ -1301,9 +1301,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1342,9 +1342,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1407,9 +1407,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1448,9 +1448,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1513,9 +1513,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1554,9 +1554,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1661,9 +1661,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1702,9 +1702,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1809,9 +1809,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1844,9 +1844,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1897,9 +1897,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1932,9 +1932,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1985,9 +1985,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2020,9 +2020,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2073,9 +2073,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2114,9 +2114,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2219,9 +2219,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2260,9 +2260,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c index 90bd6d1fef956..480e9ac9d8348 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.c @@ -407,9 +407,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -435,9 +435,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -471,9 +471,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -499,9 +499,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -535,9 +535,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -563,9 +563,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -606,9 +606,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -634,9 +634,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -677,9 +677,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -705,9 +705,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -752,9 +752,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -780,9 +780,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp index 2360ea8de5669..2c6c997bac294 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-float.cpp @@ -404,9 +404,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -432,9 +432,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -468,9 +468,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -496,9 +496,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -532,9 +532,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -560,9 +560,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -603,9 +603,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -631,9 +631,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -674,9 +674,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -702,9 +702,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -748,9 +748,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -776,9 +776,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp index ecffce6cbc639..560f158726a84 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-inline-ops.cpp @@ -1399,9 +1399,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1440,9 +1440,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1469,9 +1469,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1502,9 +1502,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1543,9 +1543,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1572,9 +1572,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1605,9 +1605,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1646,9 +1646,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1681,9 +1681,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1714,9 +1714,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1755,9 +1755,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1790,9 +1790,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1823,9 +1823,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1864,9 +1864,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1893,9 +1893,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1926,9 +1926,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1967,9 +1967,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1996,9 +1996,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2029,9 +2029,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2070,9 +2070,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2099,9 +2099,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2132,9 +2132,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2173,9 +2173,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2205,9 +2205,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2238,9 +2238,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2279,9 +2279,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2311,9 +2311,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c index 2e63e8ee80922..408925699d3c9 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.c @@ -569,9 +569,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -597,9 +597,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -633,9 +633,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -661,9 +661,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -697,9 +697,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -725,9 +725,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -768,9 +768,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -796,9 +796,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -839,9 +839,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -867,9 +867,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -903,9 +903,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -931,9 +931,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -967,9 +967,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -995,9 +995,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1031,9 +1031,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1059,9 +1059,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1105,9 +1105,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1133,9 +1133,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp index b472f604c50c4..d80b7265ecadb 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-int.cpp @@ -570,9 +570,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -598,9 +598,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -634,9 +634,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -662,9 +662,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -698,9 +698,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -726,9 +726,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -769,9 +769,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -797,9 +797,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -840,9 +840,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -868,9 +868,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -904,9 +904,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -932,9 +932,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -968,9 +968,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -996,9 +996,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1032,9 +1032,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1060,9 +1060,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1106,9 +1106,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1134,9 +1134,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp index 95864f6173809..a16a2304090c8 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-outline-ops.cpp @@ -1398,9 +1398,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1439,9 +1439,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1468,9 +1468,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1501,9 +1501,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1542,9 +1542,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1571,9 +1571,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1604,9 +1604,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1645,9 +1645,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1680,9 +1680,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1713,9 +1713,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1754,9 +1754,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1789,9 +1789,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1822,9 +1822,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1863,9 +1863,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1892,9 +1892,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1925,9 +1925,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1966,9 +1966,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1995,9 +1995,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2028,9 +2028,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2069,9 +2069,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2098,9 +2098,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2131,9 +2131,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2172,9 +2172,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2204,9 +2204,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2237,9 +2237,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2278,9 +2278,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2310,9 +2310,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c index 44f248c30a1e8..2cae7275c42c5 100644 --- a/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c +++ b/clang/test/CIR/CodeGenOpenACC/compute-reduction-clause-unsigned-int.c @@ -572,9 +572,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -600,9 +600,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -636,9 +636,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -664,9 +664,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -700,9 +700,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -728,9 +728,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -771,9 +771,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -799,9 +799,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -842,9 +842,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -870,9 +870,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -906,9 +906,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -934,9 +934,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -970,9 +970,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -998,9 +998,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1034,9 +1034,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1062,9 +1062,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1109,9 +1109,9 @@ void acc_compute() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1137,9 +1137,9 @@ void acc_compute() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/data.c b/clang/test/CIR/CodeGenOpenACC/data.c index a11a1b837130e..e2000b84dbc05 100644 --- a/clang/test/CIR/CodeGenOpenACC/data.c +++ b/clang/test/CIR/CodeGenOpenACC/data.c @@ -47,7 +47,7 @@ void acc_data(int cond) { #pragma acc data default(none) async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.data async(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } attributes {defaultAttr = #acc} @@ -61,9 +61,9 @@ void acc_data(int cond) { #pragma acc data default(none) async(3) device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.data async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } attributes {defaultAttr = #acc} @@ -71,7 +71,7 @@ void acc_data(int cond) { #pragma acc data default(none) async device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.data async([#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } attributes {defaultAttr = #acc} @@ -79,7 +79,7 @@ void acc_data(int cond) { #pragma acc data default(none) async(3) device_type(nvidia, radeon) async {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data async([#acc.device_type, #acc.device_type], %[[THREE_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } attributes {defaultAttr = #acc} @@ -127,7 +127,7 @@ void acc_data(int cond) { #pragma acc data default(none) wait(1) device_type(nvidia) wait {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait([#acc.device_type], {%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -135,7 +135,7 @@ void acc_data(int cond) { #pragma acc data default(none) wait device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait([#acc.device_type], {%[[ONE_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -143,9 +143,9 @@ void acc_data(int cond) { #pragma acc data default(none) wait(1) device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL2]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -153,9 +153,9 @@ void acc_data(int cond) { #pragma acc data default(none) wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -163,13 +163,13 @@ void acc_data(int cond) { #pragma acc data default(none) wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -177,11 +177,11 @@ void acc_data(int cond) { #pragma acc data default(none) wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -189,17 +189,17 @@ void acc_data(int cond) { #pragma acc data default(none) wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST2:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -207,9 +207,9 @@ void acc_data(int cond) { #pragma acc data default(none) wait(cond, 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} @@ -217,9 +217,9 @@ void acc_data(int cond) { #pragma acc data default(none) wait(queues: cond, 1) device_type(radeon) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.data wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: attributes {defaultAttr = #acc} diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp index 84a1c0c320c36..2683a56f29720 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-copy.cpp @@ -36,17 +36,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -99,9 +99,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -113,9 +113,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -167,9 +167,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -183,9 +183,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp index b772cd454c29b..2d044ca86d975 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-copyin.cpp @@ -40,9 +40,9 @@ int GlobalInt1; // // CHECK: acc.global_ctor @GlobalHSEArr_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -53,9 +53,9 @@ int GlobalInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @GlobalHSEArr_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -103,9 +103,9 @@ int NSInt1; // // CHECK: acc.global_ctor @{{.*}}NSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -116,9 +116,9 @@ int NSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}NSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -168,9 +168,9 @@ int AnonNSInt1; // // CHECK: acc.global_ctor @{{.*}}AnonNSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -181,9 +181,9 @@ int AnonNSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}AnonNSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -232,9 +232,9 @@ struct Struct { // // CHECK: acc.global_ctor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -245,9 +245,9 @@ struct Struct { // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -284,17 +284,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -347,9 +347,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -361,9 +361,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -415,9 +415,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_COPYIN:.*]] = acc.copyin varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -430,9 +430,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_COPYIN:.*]] = acc.copyin varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp index c0a00f134e81f..a9a090218cd32 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-copyout.cpp @@ -36,17 +36,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {dataClause = #acc, modifiers = #acc, name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -99,9 +99,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -113,9 +113,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -167,9 +167,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -183,9 +183,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-create.cpp b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp index 71986f76ea3b2..d44db9289733a 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-create.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-create.cpp @@ -40,9 +40,9 @@ int GlobalInt1; // // CHECK: acc.global_ctor @GlobalHSEArr_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -53,9 +53,9 @@ int GlobalInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @GlobalHSEArr_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -103,9 +103,9 @@ int NSInt1; // // CHECK: acc.global_ctor @{{.*}}NSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -116,9 +116,9 @@ int NSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}NSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -169,9 +169,9 @@ int AnonNSInt1; // // CHECK: acc.global_ctor @{{.*}}AnonNSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -182,9 +182,9 @@ int AnonNSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}AnonNSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -233,9 +233,9 @@ struct Struct { // // CHECK: acc.global_ctor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -246,9 +246,9 @@ struct Struct { // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -285,17 +285,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {modifiers = #acc, name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -348,9 +348,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -362,9 +362,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -416,9 +416,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_CREATE:.*]] = acc.create varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_CREATE:.*]] = acc.create varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {modifiers = #acc, name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -431,9 +431,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_CREATE:.*]] = acc.create varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_CREATE:.*]] = acc.create varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp index 0a1759cb20c39..29f7ea66d7ac0 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-deviceresident.cpp @@ -40,9 +40,9 @@ int GlobalInt1; // // CHECK: acc.global_ctor @GlobalHSEArr_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -53,9 +53,9 @@ int GlobalInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @GlobalHSEArr_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -103,9 +103,9 @@ int NSInt1; // // CHECK: acc.global_ctor @{{.*}}NSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -116,9 +116,9 @@ int NSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}NSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -168,9 +168,9 @@ int AnonNSInt1; // // CHECK: acc.global_ctor @{{.*}}AnonNSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -181,9 +181,9 @@ int AnonNSInt1; // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}AnonNSHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -232,9 +232,9 @@ struct Struct { // // CHECK: acc.global_ctor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -245,9 +245,9 @@ struct Struct { // CHECK-NEXT: } // CHECK: acc.global_dtor @{{.*}}Struct{{.*}}StaticMemHSEArr{{.*}}_acc_dtor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -284,17 +284,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -347,9 +347,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -361,9 +361,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -415,9 +415,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -430,9 +430,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_DEV_RES:.*]] = acc.declare_device_resident varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-link.cpp b/clang/test/CIR/CodeGenOpenACC/declare-link.cpp index 44a7f0811184e..33d03c59bd947 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-link.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-link.cpp @@ -27,9 +27,9 @@ int GlobalInt1; // // CHECK: acc.global_ctor @GlobalHSEArr_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -62,9 +62,9 @@ int NSInt1; // // CHECK: acc.global_ctor @{{.*}}NSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -99,9 +99,9 @@ int AnonNSInt1; // // CHECK: acc.global_ctor @{{.*}}AnonNSHSEArr{{.*}}_acc_ctor { // CHECK-NEXT: %[[LB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] // CHECK-NEXT: %[[UB:.*]] = cir.const #cir.int<1> : !s32i -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB_CAST]] : si32) extent(%[[UB_CAST]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -133,9 +133,9 @@ struct Struct { // CHECK-NEXT: %[[INT_LINK:.*]] = acc.declare_link varPtr(%[[GET_LOCAL_INT]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[ONE]] : i64) startIdx(%[[ZERO]] : i64) @@ -176,9 +176,9 @@ void Struct::MemFunc2() { // CHECK-NEXT: %[[INT_LINK:.*]] = acc.declare_link varPtr(%[[GET_LOCAL_INT]] : !cir.ptr) -> !cir.ptr {name = "LocalInt2"} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[ONE]] : i64) startIdx(%[[ZERO]] : i64) @@ -213,9 +213,9 @@ void NormalFunc() { // CHECK-NEXT: %[[INT_LINK:.*]] = acc.declare_link varPtr(%[[GET_LOCAL_INT]] : !cir.ptr) -> !cir.ptr {name = "LocalInt3"} // // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[ONE]] : i64) startIdx(%[[ZERO]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/declare-present.cpp b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp index 75ddd1400a6d4..b7d85db4b246a 100644 --- a/clang/test/CIR/CodeGenOpenACC/declare-present.cpp +++ b/clang/test/CIR/CodeGenOpenACC/declare-present.cpp @@ -37,17 +37,17 @@ struct Struct { // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) // CHECK-NEXT: %[[ARG_HSE_PTR_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_PTR_ALLOCA]] : !cir.ptr>) bounds(%[[BOUND1]]) -> !cir.ptr> {name = "ArgHSEPtr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -100,9 +100,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -114,9 +114,9 @@ void Struct::MemFunc2(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *ArgHSEP // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -168,9 +168,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK: %[[ARG_HSE_PRESENT:.*]] = acc.present varPtr(%[[ARG_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgHSE"} // CHECK-NEXT: %[[ARG_INT_PRESENT:.*]] = acc.present varPtr(%[[ARG_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "ArgInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND1:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) @@ -183,9 +183,9 @@ extern "C" void NormalFunc(HasSideEffects ArgHSE, int ArgInt, HasSideEffects *Ar // CHECK-NEXT: %[[LOC_HSE_PRESENT:.*]] = acc.present varPtr(%[[LOC_HSE_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalHSE"} // CHECK-NEXT: %[[LOC_INT_PRESENT:.*]] = acc.present varPtr(%[[LOC_INT_ALLOCA]] : !cir.ptr) -> !cir.ptr {name = "LocalInt"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[LB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[LB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[UB:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[UB:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[IDX:.*]] = arith.constant 0 : i64 // CHECK-NEXT: %[[STRIDE:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[BOUND2:.*]] = acc.bounds lowerbound(%[[LB]] : si32) extent(%[[UB]] : si32) stride(%[[STRIDE]] : i64) startIdx(%[[IDX]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/enter-data.c b/clang/test/CIR/CodeGenOpenACC/enter-data.c index d7a47bae35da3..174ea9aca67dd 100644 --- a/clang/test/CIR/CodeGenOpenACC/enter-data.c +++ b/clang/test/CIR/CodeGenOpenACC/enter-data.c @@ -24,13 +24,13 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc enter data copyin(readonly, alwaysin: parmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {modifiers = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data async(%[[PARM_CAST]] : si32) dataOperands(%[[COPYIN1]] : !cir.ptr) #pragma acc enter data async(parmVar) copyin(readonly, alwaysin: parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {modifiers = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data async(%[[PARM_CAST]] : si32) dataOperands(%[[COPYIN1]] : !cir.ptr) @@ -48,7 +48,7 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc enter data create(zero: parmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {modifiers = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data async(%[[PARM_CAST]] : si32) dataOperands(%[[CREATE1]] : !cir.ptr) @@ -62,7 +62,7 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc enter data attach(ptrParmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ATTACH1:.*]] = acc.attach varPtr(%[[PTRPARM]] : !cir.ptr>) async(%[[PARM_CAST]] : si32) -> !cir.ptr> {name = "ptrParmVar", structured = false} // CHECK-NEXT: acc.enter_data async(%[[PARM_CAST]] : si32) dataOperands(%[[ATTACH1]] : !cir.ptr>) @@ -88,7 +88,7 @@ void acc_data(int parmVar, int *ptrParmVar) { // CHECK-NEXT: %[[CMP:.*]] = cir.cmp eq %[[PARM_LOAD]], %[[ONE_CONST]] // CHECK-NEXT: %[[CMP_CAST:.*]] = builtin.unrealized_conversion_cast %[[CMP]] // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data if(%[[CMP_CAST]]) async(%[[PARM_CAST]] : si32) dataOperands(%[[COPYIN1]] : !cir.ptr) @@ -98,27 +98,27 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc enter data wait(1) create(parmVar) // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data wait(%[[ONE_CAST]] : si32) dataOperands(%[[CREATE1]] : !cir.ptr) #pragma acc enter data wait(parmVar, 1, 2) create(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data wait(%[[PARM_CAST]], %[[ONE_CAST]], %[[TWO_CAST]] : si32, si32, si32) dataOperands(%[[CREATE1]] : !cir.ptr) #pragma acc enter data wait(devnum: parmVar: 1, 2) create(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {name = "parmVar", structured = false} // CHECK-NEXT: acc.enter_data wait_devnum(%[[PARM_CAST]] : si32) wait(%[[ONE_CAST]], %[[TWO_CAST]] : si32, si32) dataOperands(%[[CREATE1]] : !cir.ptr) diff --git a/clang/test/CIR/CodeGenOpenACC/exit-data.c b/clang/test/CIR/CodeGenOpenACC/exit-data.c index 74bdf4c959481..79fcf598aeeec 100644 --- a/clang/test/CIR/CodeGenOpenACC/exit-data.c +++ b/clang/test/CIR/CodeGenOpenACC/exit-data.c @@ -28,14 +28,14 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc exit data finalize copyout(zero, alwaysout: parmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr) attributes {finalize} // CHECK-NEXT: acc.copyout accPtr(%[[GDP]] : !cir.ptr) async(%[[PARM_CAST]] : si32) to varPtr(%[[PARM]] : !cir.ptr) {modifiers = #acc, name = "parmVar", structured = false} #pragma acc exit data async(parmVar) copyout(zero, alwaysout: parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, modifiers = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr) // CHECK-NEXT: acc.copyout accPtr(%[[GDP]] : !cir.ptr) async(%[[PARM_CAST]] : si32) to varPtr(%[[PARM]] : !cir.ptr) {modifiers = #acc, name = "parmVar", structured = false} @@ -47,7 +47,7 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc exit data delete(parmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr) // CHECK-NEXT: acc.delete accPtr(%[[GDP]] : !cir.ptr) async(%[[PARM_CAST]] : si32) {name = "parmVar", structured = false} @@ -64,7 +64,7 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc exit data detach(ptrParmVar) async(parmVar) finalize // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PTRPARM]] : !cir.ptr>) async(%[[PARM_CAST]] : si32) -> !cir.ptr> {dataClause = #acc, name = "ptrParmVar", structured = false} // CHECK-NEXT: acc.exit_data async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr>) attributes {finalize} // CHECK-NEXT: acc.detach accPtr(%[[GDP]] : !cir.ptr>) async(%[[PARM_CAST]] : si32) {name = "ptrParmVar", structured = false} @@ -93,7 +93,7 @@ void acc_data(int parmVar, int *ptrParmVar) { // CHECK-NEXT: %[[CMP:.*]] = cir.cmp eq %[[PARM_LOAD]], %[[ONE_CONST]] // CHECK-NEXT: %[[CMP_CAST:.*]] = builtin.unrealized_conversion_cast %[[CMP]] // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data if(%[[CMP_CAST]]) async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr) // CHECK-NEXT: acc.copyout accPtr(%[[GDP]] : !cir.ptr) async(%[[PARM_CAST]] : si32) to varPtr(%[[PARM]] : !cir.ptr) {name = "parmVar", structured = false} @@ -105,29 +105,29 @@ void acc_data(int parmVar, int *ptrParmVar) { #pragma acc exit data wait(1) delete(parmVar) // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data wait(%[[ONE_CAST]] : si32) dataOperands(%[[GDP]] : !cir.ptr) // CHECK-NEXT: acc.delete accPtr(%[[GDP]] : !cir.ptr) {name = "parmVar", structured = false} #pragma acc exit data wait(parmVar, 1, 2) delete(parmVar) finalize // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data wait(%[[PARM_CAST]], %[[ONE_CAST]], %[[TWO_CAST]] : si32, si32, si32) dataOperands(%[[GDP]] : !cir.ptr) attributes {finalize} // CHECK-NEXT: acc.delete accPtr(%[[GDP]] : !cir.ptr) {name = "parmVar", structured = false} #pragma acc exit data wait(devnum: parmVar: 1, 2) delete(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: %[[GDP:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.exit_data wait_devnum(%[[PARM_CAST]] : si32) wait(%[[ONE_CAST]], %[[TWO_CAST]] : si32, si32) dataOperands(%[[GDP]] : !cir.ptr) // CHECK-NEXT: acc.delete accPtr(%[[GDP]] : !cir.ptr) {name = "parmVar", structured = false} diff --git a/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp index 8a9163182d21d..a48f2c1b58983 100644 --- a/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp +++ b/clang/test/CIR/CodeGenOpenACC/firstprivate-clause-recipes.cpp @@ -26,9 +26,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -43,9 +43,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride %[[TLA_DECAY_TO]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -60,9 +60,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY_TO]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -105,9 +105,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB3_CAST]], %[[ONE]] : !u64i @@ -122,9 +122,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB2_CAST]], %[[ONE]] : !u64i @@ -139,9 +139,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i @@ -189,7 +189,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.firstprivate.init" align(8) : !cir.ptr>>> // CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -221,7 +221,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]] : !u64i @@ -255,7 +255,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.mul %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS_2]], %[[SIZEOF_INT]] : !u64i @@ -292,9 +292,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr @@ -310,9 +310,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDETO:.*]] = cir.ptr_stride %[[TLA_LOAD_TO]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -327,9 +327,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD_TO]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -373,9 +373,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -391,9 +391,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -408,9 +408,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -460,7 +460,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.firstprivate.init" align(8) : !cir.ptr x 5>>> // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> @@ -490,7 +490,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i @@ -498,7 +498,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -531,9 +531,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG_FROM:.*]]: !cir.ptr x 5>>> {{.*}}, %[[ARG_TO:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -548,9 +548,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE_TO:.*]] = cir.ptr_stride %[[TLA_LOAD_TO]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -565,9 +565,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE_TO:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY_TO]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -610,9 +610,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -628,9 +628,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -645,9 +645,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/host_data.c b/clang/test/CIR/CodeGenOpenACC/host_data.c index 4e2dcf4a52db4..bcab22f451406 100644 --- a/clang/test/CIR/CodeGenOpenACC/host_data.c +++ b/clang/test/CIR/CodeGenOpenACC/host_data.c @@ -58,9 +58,9 @@ void acc_host_data(int cond, int var1, int var2, int *arr) { #pragma acc host_data use_device(arr[0:var1]) {} // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> - // CHECK-NEXT: %[[ZERO_CAST:.*]] = builtin.unrealized_conversion_cast %[[ZERO]] : !s32i to si32 + // CHECK-NEXT: %[[ZERO_CAST:.*]] = cir.builtin_int_cast %[[ZERO]] : !s32i -> si32 // CHECK-NEXT: %[[VAR1_LOAD:.*]] = cir.load{{.*}} %[[V1]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[VAR1_CAST:.*]] = builtin.unrealized_conversion_cast %[[VAR1_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[VAR1_CAST:.*]] = cir.builtin_int_cast %[[VAR1_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[CONST_ZERO:.*]] = arith.constant 0 // CHECK-NEXT: %[[CONST_ONE:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ZERO_CAST]] : si32) extent(%[[VAR1_CAST]] : si32) stride(%[[CONST_ONE]] : i64) startIdx(%[[CONST_ZERO]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/init.c b/clang/test/CIR/CodeGenOpenACC/init.c index 4c1d74abfbdfe..e9401b5af333e 100644 --- a/clang/test/CIR/CodeGenOpenACC/init.c +++ b/clang/test/CIR/CodeGenOpenACC/init.c @@ -30,12 +30,12 @@ void acc_init(int cond) { #pragma acc init device_num(cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.init device_num(%[[COND_CONV]] : si32) #pragma acc init device_num(1) // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONV:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.init device_num(%[[ONE_CONV]] : si32) #pragma acc init if(cond) device_num(cond) device_type(*) @@ -43,6 +43,6 @@ void acc_init(int cond) { // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.init device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_types = [#acc.device_type]} } diff --git a/clang/test/CIR/CodeGenOpenACC/kernels.c b/clang/test/CIR/CodeGenOpenACC/kernels.c index 1602dbb65b153..308206493e48c 100644 --- a/clang/test/CIR/CodeGenOpenACC/kernels.c +++ b/clang/test/CIR/CodeGenOpenACC/kernels.c @@ -109,7 +109,7 @@ void acc_kernels(int cond) { #pragma acc kernels num_workers(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -117,9 +117,9 @@ void acc_kernels(int cond) { #pragma acc kernels num_workers(cond) device_type(nvidia) num_workers(2u) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -127,11 +127,11 @@ void acc_kernels(int cond) { #pragma acc kernels num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -139,11 +139,11 @@ void acc_kernels(int cond) { #pragma acc kernels num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -151,9 +151,9 @@ void acc_kernels(int cond) { #pragma acc kernels device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3) {} // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_workers(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -161,7 +161,7 @@ void acc_kernels(int cond) { #pragma acc kernels vector_length(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -169,9 +169,9 @@ void acc_kernels(int cond) { #pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2u) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -179,11 +179,11 @@ void acc_kernels(int cond) { #pragma acc kernels vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -191,11 +191,11 @@ void acc_kernels(int cond) { #pragma acc kernels vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -203,9 +203,9 @@ void acc_kernels(int cond) { #pragma acc kernels device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3) {} // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels vector_length(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -219,7 +219,7 @@ void acc_kernels(int cond) { #pragma acc kernels async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels async(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -233,9 +233,9 @@ void acc_kernels(int cond) { #pragma acc kernels async(3) device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -243,7 +243,7 @@ void acc_kernels(int cond) { #pragma acc kernels async device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels async([#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -251,7 +251,7 @@ void acc_kernels(int cond) { #pragma acc kernels async(3) device_type(nvidia, radeon) async {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels async([#acc.device_type, #acc.device_type], %[[THREE_CAST]] : si32) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -259,7 +259,7 @@ void acc_kernels(int cond) { #pragma acc kernels num_gangs(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_gangs({%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -267,7 +267,7 @@ void acc_kernels(int cond) { #pragma acc kernels num_gangs(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_gangs({%[[CONV_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -275,9 +275,9 @@ void acc_kernels(int cond) { #pragma acc kernels num_gangs(1) device_type(radeon) num_gangs(cond) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_gangs({%[[ONE_CAST]] : si32}, {%[[CONV_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -285,9 +285,9 @@ void acc_kernels(int cond) { #pragma acc kernels num_gangs(1) device_type(radeon) num_gangs(6) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i - // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_CAST:.*]] = cir.builtin_int_cast %[[SIX_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_gangs({%[[ONE_CAST]] : si32}, {%[[SIX_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -295,9 +295,9 @@ void acc_kernels(int cond) { #pragma acc kernels num_gangs(cond) device_type(radeon, nvidia) num_gangs(4) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels num_gangs({%[[CONV_CAST]] : si32}, {%[[FOUR_CAST]] : si32} [#acc.device_type], {%[[FOUR_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -317,7 +317,7 @@ void acc_kernels(int cond) { #pragma acc kernels wait(1) device_type(nvidia) wait {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait([#acc.device_type], {%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -325,7 +325,7 @@ void acc_kernels(int cond) { #pragma acc kernels wait device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait([#acc.device_type], {%[[ONE_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -333,9 +333,9 @@ void acc_kernels(int cond) { #pragma acc kernels wait(1) device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL2]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -343,9 +343,9 @@ void acc_kernels(int cond) { #pragma acc kernels wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -353,13 +353,13 @@ void acc_kernels(int cond) { #pragma acc kernels wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -367,11 +367,11 @@ void acc_kernels(int cond) { #pragma acc kernels wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -379,17 +379,17 @@ void acc_kernels(int cond) { #pragma acc kernels wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST2:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -397,9 +397,9 @@ void acc_kernels(int cond) { #pragma acc kernels wait(cond, 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc @@ -407,9 +407,9 @@ void acc_kernels(int cond) { #pragma acc kernels wait(queues: cond, 1) device_type(radeon) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.kernels wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.terminator // CHECK-NEXT: } loc diff --git a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp index d695382ef05bd..84e9ccd74dea2 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-private-clause.cpp @@ -96,9 +96,9 @@ struct HasDtor { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -130,9 +130,9 @@ struct HasDtor { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -244,7 +244,7 @@ extern "C" void acc_loop() { #pragma acc loop private(someIntArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -256,7 +256,7 @@ extern "C" void acc_loop() { #pragma acc loop private(someFloatArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -268,7 +268,7 @@ extern "C" void acc_loop() { #pragma acc loop private(noCopyArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -280,7 +280,7 @@ extern "C" void acc_loop() { #pragma acc loop private(hasCopyArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -292,7 +292,7 @@ extern "C" void acc_loop() { #pragma acc loop private(notDefCtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -304,7 +304,7 @@ extern "C" void acc_loop() { #pragma acc loop private(dtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -316,42 +316,42 @@ extern "C" void acc_loop() { #pragma acc loop private(someIntArr[1], someFloatArr[1], noCopyArr[1], hasCopyArr[1], notDefCtorArr[1], dtorArr[1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CONST]] : i64) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 @@ -369,9 +369,9 @@ extern "C" void acc_loop() { #pragma acc loop private(someIntArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -382,9 +382,9 @@ extern "C" void acc_loop() { #pragma acc loop private(someFloatArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -395,9 +395,9 @@ extern "C" void acc_loop() { #pragma acc loop private(noCopyArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -408,9 +408,9 @@ extern "C" void acc_loop() { #pragma acc loop private(hasCopyArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -421,9 +421,9 @@ extern "C" void acc_loop() { #pragma acc loop private(notDefCtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -434,9 +434,9 @@ extern "C" void acc_loop() { #pragma acc loop private(dtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) @@ -447,49 +447,49 @@ extern "C" void acc_loop() { #pragma acc loop private(someIntArr[1:1], someFloatArr[1:1], noCopyArr[1:1], hasCopyArr[1:1], notDefCtorArr[1:1], dtorArr[1:1]) for(int i = 0; i < 5; ++i); // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE1:.*]] = acc.private varPtr(%[[INTARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_i) -> !cir.ptr> {name = "someIntArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE2:.*]] = acc.private varPtr(%[[FLOATARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_f) -> !cir.ptr> {name = "someFloatArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE3:.*]] = acc.private varPtr(%[[NOCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_15NoCopyConstruct) -> !cir.ptr> {name = "noCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE4:.*]] = acc.private varPtr(%[[HASCOPYARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_13CopyConstruct) -> !cir.ptr> {name = "hasCopyArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) // CHECK-NEXT: %[[PRIVATE5:.*]] = acc.private varPtr(%[[NOTDEFCTORARR]] : !cir.ptr>) bounds(%[[BOUNDS]]) recipe(@privatization__Bcnt1__ZTSA5_14NonDefaultCtor) -> !cir.ptr> {name = "notDefCtorArr[1:1]"} // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE]] : !s32i -> si32 // CHECK-NEXT: %[[ZERO_CONST:.*]] = arith.constant 0 // CHECK-NEXT: %[[ONE_CONST2:.*]] = arith.constant 1 // CHECK-NEXT: %[[BOUNDS:.*]] = acc.bounds lowerbound(%[[ONE_CAST]] : si32) extent(%[[ONE_CAST2]] : si32) stride(%[[ONE_CONST2]] : i64) startIdx(%[[ZERO_CONST]] : i64) diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp index a954834782230..947b93c4cfdb7 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-default-ops.cpp @@ -1301,9 +1301,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1342,9 +1342,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1407,9 +1407,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1448,9 +1448,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1513,9 +1513,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1554,9 +1554,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1661,9 +1661,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1702,9 +1702,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1809,9 +1809,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1844,9 +1844,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1897,9 +1897,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1932,9 +1932,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1985,9 +1985,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2020,9 +2020,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2073,9 +2073,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2114,9 +2114,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2219,9 +2219,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2260,9 +2260,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp index 53b9f735a3034..bc26fe39d2664 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-float.cpp @@ -404,9 +404,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -432,9 +432,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -468,9 +468,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -496,9 +496,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -532,9 +532,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -560,9 +560,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -603,9 +603,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -631,9 +631,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -674,9 +674,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -702,9 +702,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -748,9 +748,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -776,9 +776,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp index e7cdb7693c0dd..53dc10b04fcf2 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-inline-ops.cpp @@ -1399,9 +1399,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1440,9 +1440,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1469,9 +1469,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1502,9 +1502,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1543,9 +1543,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1572,9 +1572,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1605,9 +1605,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1646,9 +1646,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1681,9 +1681,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1714,9 +1714,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1755,9 +1755,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1790,9 +1790,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1823,9 +1823,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1864,9 +1864,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1893,9 +1893,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1926,9 +1926,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1967,9 +1967,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1996,9 +1996,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2029,9 +2029,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2070,9 +2070,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2099,9 +2099,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2132,9 +2132,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2173,9 +2173,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2205,9 +2205,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2238,9 +2238,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2279,9 +2279,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2311,9 +2311,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp index beb667faaf769..ca714f4eec693 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-int.cpp @@ -570,9 +570,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -598,9 +598,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -634,9 +634,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -662,9 +662,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -698,9 +698,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -726,9 +726,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -769,9 +769,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -797,9 +797,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" {{.*}} : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -840,9 +840,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -868,9 +868,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -904,9 +904,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -932,9 +932,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -968,9 +968,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -996,9 +996,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1032,9 +1032,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1060,9 +1060,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1106,9 +1106,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1134,9 +1134,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp index 41bb299bf6267..91592951ed440 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop-reduction-clause-outline-ops.cpp @@ -1398,9 +1398,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1439,9 +1439,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1468,9 +1468,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1501,9 +1501,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1542,9 +1542,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1571,9 +1571,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1604,9 +1604,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1645,9 +1645,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1680,9 +1680,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1713,9 +1713,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1754,9 +1754,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1789,9 +1789,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1822,9 +1822,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1863,9 +1863,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1892,9 +1892,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -1925,9 +1925,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1966,9 +1966,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1995,9 +1995,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2028,9 +2028,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2069,9 +2069,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2098,9 +2098,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2131,9 +2131,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2172,9 +2172,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2204,9 +2204,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i @@ -2237,9 +2237,9 @@ void acc_loop() { // CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca "openacc.reduction.init" {{.*}} : !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2278,9 +2278,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr> {{.*}}, %[[RHSARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}})) // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -2310,9 +2310,9 @@ void acc_loop() { // CHECK-NEXT: ^bb0(%[[ORIG:.*]]: !cir.ptr> {{.*}}, %[[ARG:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i +// CHECK-NEXT: %[[LB_CAST:.*]] = cir.builtin_int_cast %[[LB]] : index -> !u64i // CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i +// CHECK-NEXT: %[[UB_CAST:.*]] = cir.builtin_int_cast %[[UB]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/loop.cpp b/clang/test/CIR/CodeGenOpenACC/loop.cpp index 8c2589d6d00d2..c187045eca61f 100644 --- a/clang/test/CIR/CodeGenOpenACC/loop.cpp +++ b/clang/test/CIR/CodeGenOpenACC/loop.cpp @@ -206,7 +206,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop worker(N) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -220,7 +220,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop worker(N) device_type(nvidia, radeon) worker for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker([#acc.device_type, #acc.device_type], %[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -228,7 +228,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop worker device_type(nvidia, radeon) worker(N) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker([#acc.device_type], %[[N_CONV]] : si32 [#acc.device_type], %[[N_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -236,11 +236,11 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -250,7 +250,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { #pragma acc loop vector @@ -262,7 +262,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop vector(N) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -276,7 +276,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop vector(N) device_type(nvidia, radeon) vector for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.loop vector([#acc.device_type, #acc.device_type], %[[N_CONV]] : si32) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -284,11 +284,11 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -298,7 +298,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD]], %[[ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -312,13 +312,13 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV2:.*]] = cir.builtin_int_cast %[[N_LOAD2]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV3:.*]] = cir.builtin_int_cast %[[N_LOAD3]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV4:.*]] = cir.builtin_int_cast %[[N_LOAD4]] : !s32i -> si32 // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc @@ -347,7 +347,7 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop gang(static:N, dim: 1) device_type(nvidia, radeon) gang(static:*, dim : 2) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64 @@ -361,16 +361,16 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop gang(num:N) device_type(nvidia, radeon) gang(num:N) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV2:.*]] = cir.builtin_int_cast %[[N_LOAD2]] : !s32i -> si32 // CHECK-NEXT: acc.loop gang({num=%[[N_CONV]] : si32}, {num=%[[N_CONV2]] : si32} [#acc.device_type], {num=%[[N_CONV2]] : si32} [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc #pragma acc loop gang(static:N) device_type(nvidia) gang(static:*) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32}, {static=%[[STAR_CONST]] : i64} [#acc.device_type]) { // CHECK: acc.yield @@ -378,16 +378,16 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) { #pragma acc loop gang(static:N, num: N + 1) device_type(nvidia) gang(static:*, num : N + 2) for(unsigned I = 0; I < N; ++I); // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[N_CONV:.*]] = cir.builtin_int_cast %[[N_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[CIR_ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.add nsw %[[N_LOAD2]], %[[CIR_ONE_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_ONE]] : !s32i -> si32 // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64 // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load{{.*}} %[[ALLOCA_N]] : !cir.ptr, !s32i // CHECK-NEXT: %[[CIR_TWO_CONST:.*]] = cir.const #cir.int<2> : !s32i // CHECK-NEXT: %[[N_PLUS_TWO:.*]] = cir.add nsw %[[N_LOAD3]], %[[CIR_TWO_CONST]] : !s32i - // CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_TWO]] : !s32i to si32 + // CHECK-NEXT: %[[N_PLUS_TWO_CONV:.*]] = cir.builtin_int_cast %[[N_PLUS_TWO]] : !s32i -> si32 // CHECK-NEXT: acc.loop gang({static=%[[N_CONV]] : si32, num=%[[N_PLUS_ONE_CONV]] : si32}, {static=%[[STAR_CONST]] : i64, num=%[[N_PLUS_TWO_CONV]] : si32} [#acc.device_type]) { // CHECK: acc.yield // CHECK-NEXT: } loc diff --git a/clang/test/CIR/CodeGenOpenACC/parallel.c b/clang/test/CIR/CodeGenOpenACC/parallel.c index 663d16fbba8f1..1d1ff9c6edbbf 100644 --- a/clang/test/CIR/CodeGenOpenACC/parallel.c +++ b/clang/test/CIR/CodeGenOpenACC/parallel.c @@ -108,7 +108,7 @@ void acc_parallel(int cond) { #pragma acc parallel num_workers(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_workers(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -116,9 +116,9 @@ void acc_parallel(int cond) { #pragma acc parallel num_workers(cond) device_type(nvidia) num_workers(2u) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.parallel num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -126,11 +126,11 @@ void acc_parallel(int cond) { #pragma acc parallel num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -138,11 +138,11 @@ void acc_parallel(int cond) { #pragma acc parallel num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(4) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -150,9 +150,9 @@ void acc_parallel(int cond) { #pragma acc parallel device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3) {} // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_workers(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -160,7 +160,7 @@ void acc_parallel(int cond) { #pragma acc parallel vector_length(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -168,9 +168,9 @@ void acc_parallel(int cond) { #pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2u) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !u32i -> ui32 // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -178,11 +178,11 @@ void acc_parallel(int cond) { #pragma acc parallel vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -190,11 +190,11 @@ void acc_parallel(int cond) { #pragma acc parallel vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type], %[[FOUR_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -202,9 +202,9 @@ void acc_parallel(int cond) { #pragma acc parallel device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3) {} // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel vector_length(%[[TWO_CAST]] : si32 [#acc.device_type], %[[THREE_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -218,7 +218,7 @@ void acc_parallel(int cond) { #pragma acc parallel async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel async(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -232,9 +232,9 @@ void acc_parallel(int cond) { #pragma acc parallel async(3) device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -242,7 +242,7 @@ void acc_parallel(int cond) { #pragma acc parallel async device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel async([#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -250,7 +250,7 @@ void acc_parallel(int cond) { #pragma acc parallel async(3) device_type(nvidia, radeon) async {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel async([#acc.device_type, #acc.device_type], %[[THREE_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -258,7 +258,7 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -266,7 +266,7 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[CONV_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -274,11 +274,11 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(1, cond, 2) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -286,9 +286,9 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(1) device_type(radeon) num_gangs(cond) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[ONE_CAST]] : si32}, {%[[CONV_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -296,17 +296,17 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(1, cond, 2) device_type(radeon) num_gangs(4, 5, 6) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i - // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_CAST:.*]] = cir.builtin_int_cast %[[SIX_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type]) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -314,17 +314,17 @@ void acc_parallel(int cond) { #pragma acc parallel num_gangs(1, cond, 2) device_type(radeon, nvidia) num_gangs(4, 5, 6) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i - // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FOUR_CAST:.*]] = cir.builtin_int_cast %[[FOUR_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[FIVE_LITERAL:.*]] = cir.const #cir.int<5> : !s32i - // CHECK-NEXT: %[[FIVE_CAST:.*]] = builtin.unrealized_conversion_cast %[[FIVE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[FIVE_CAST:.*]] = cir.builtin_int_cast %[[FIVE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[SIX_LITERAL:.*]] = cir.const #cir.int<6> : !s32i - // CHECK-NEXT: %[[SIX_CAST:.*]] = builtin.unrealized_conversion_cast %[[SIX_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[SIX_CAST:.*]] = cir.builtin_int_cast %[[SIX_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel num_gangs({%[[ONE_CAST]] : si32, %[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32}, {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type], {%[[FOUR_CAST]] : si32, %[[FIVE_CAST]] : si32, %[[SIX_CAST]] : si32} [#acc.device_type]) // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -344,7 +344,7 @@ void acc_parallel(int cond) { #pragma acc parallel wait(1) device_type(nvidia) wait {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait([#acc.device_type], {%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -352,7 +352,7 @@ void acc_parallel(int cond) { #pragma acc parallel wait device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait([#acc.device_type], {%[[ONE_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -360,9 +360,9 @@ void acc_parallel(int cond) { #pragma acc parallel wait(1) device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL2]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -370,9 +370,9 @@ void acc_parallel(int cond) { #pragma acc parallel wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -380,13 +380,13 @@ void acc_parallel(int cond) { #pragma acc parallel wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -394,11 +394,11 @@ void acc_parallel(int cond) { #pragma acc parallel wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -406,17 +406,17 @@ void acc_parallel(int cond) { #pragma acc parallel wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST2:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -424,9 +424,9 @@ void acc_parallel(int cond) { #pragma acc parallel wait(cond, 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -434,9 +434,9 @@ void acc_parallel(int cond) { #pragma acc parallel wait(queues: cond, 1) device_type(radeon) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.parallel wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp index a1682e80a2fd1..7dc3f7beb63cf 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-CtorDtor.cpp @@ -18,9 +18,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -46,9 +46,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i @@ -129,9 +129,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -146,9 +146,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -182,9 +182,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB2_CAST]], %[[ONE]] : !u64i @@ -201,9 +201,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i @@ -297,9 +297,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -312,9 +312,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -327,9 +327,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -370,9 +370,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB3_CAST]], %[[ONE]] : !u64i @@ -387,9 +387,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB2_CAST]], %[[ONE]] : !u64i @@ -404,9 +404,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i @@ -460,9 +460,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -477,9 +477,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -529,9 +529,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB2_CAST]], %[[ONE]] : !u64i @@ -548,9 +548,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp index ed66265318c9c..92022c4ab45ae 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-array-recipes-NoOps.cpp @@ -13,9 +13,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -75,9 +75,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -92,9 +92,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -165,9 +165,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -180,9 +180,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -195,9 +195,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -249,9 +249,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -266,9 +266,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp index 6f2452c84282b..679aea8bb0abe 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-CtorDtor.cpp @@ -16,7 +16,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB1_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr @@ -48,9 +48,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -76,9 +76,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -122,7 +122,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -152,7 +152,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -184,9 +184,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -199,9 +199,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -234,9 +234,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -251,9 +251,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -305,7 +305,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -335,7 +335,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -365,7 +365,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ARR_SIZE]] : !u64i @@ -398,9 +398,9 @@ void do_things(unsigned A, unsigned B) { // Init: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -413,9 +413,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -428,9 +428,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -471,9 +471,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -489,9 +489,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -506,9 +506,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -561,7 +561,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -591,7 +591,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -642,14 +642,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr x 5>> -> !cir.ptr> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -682,9 +682,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -697,9 +697,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -733,9 +733,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -750,9 +750,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -805,7 +805,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -837,9 +837,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -852,9 +852,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -887,9 +887,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -904,9 +904,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -958,14 +958,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -995,7 +995,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -1027,9 +1027,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1042,9 +1042,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1057,9 +1057,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1100,9 +1100,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -1118,9 +1118,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -1135,9 +1135,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -1190,14 +1190,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -1247,7 +1247,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -1277,7 +1277,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -1309,9 +1309,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1324,9 +1324,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1339,9 +1339,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1382,9 +1382,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -1400,9 +1400,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -1417,9 +1417,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -1472,7 +1472,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -1501,7 +1501,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -1533,9 +1533,9 @@ void do_things(unsigned A, unsigned B) { // Initialization. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1548,9 +1548,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1599,9 +1599,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -1618,9 +1618,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -1691,7 +1691,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> @@ -1721,7 +1721,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i @@ -1729,7 +1729,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -1760,9 +1760,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1775,9 +1775,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1790,9 +1790,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1833,9 +1833,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -1851,9 +1851,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -1868,9 +1868,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -1923,7 +1923,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp index ccc2ebeda3ade..ffe146787189c 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-NoOps.cpp @@ -11,7 +11,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB1_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr @@ -43,9 +43,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -86,7 +86,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -116,7 +116,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -147,9 +147,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -162,9 +162,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -214,7 +214,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -244,7 +244,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -274,7 +274,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ARR_SIZE]] : !u64i @@ -306,9 +306,9 @@ void do_things(unsigned A, unsigned B) { // Init: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -321,9 +321,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -336,9 +336,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -389,7 +389,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -419,7 +419,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -470,14 +470,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr x 5>> -> !cir.ptr> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -509,9 +509,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -524,9 +524,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -579,7 +579,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -610,9 +610,9 @@ void do_things(unsigned A, unsigned B) { // Init Section // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -625,9 +625,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -676,14 +676,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -713,7 +713,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -744,9 +744,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -759,9 +759,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -774,9 +774,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -827,14 +827,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -884,7 +884,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -914,7 +914,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -945,9 +945,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -960,9 +960,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -975,9 +975,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1028,7 +1028,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -1058,7 +1058,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -1089,9 +1089,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1104,9 +1104,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1174,7 +1174,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> @@ -1204,7 +1204,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i @@ -1212,7 +1212,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -1243,9 +1243,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1258,9 +1258,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1273,9 +1273,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -1326,7 +1326,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp index 5211e79e17f51..9ba1880f32b07 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-array-recipes-int.cpp @@ -9,7 +9,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB1_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr @@ -58,7 +58,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -88,7 +88,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -138,7 +138,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -168,7 +168,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -197,7 +197,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ARR_SIZE]] : !u64i @@ -240,7 +240,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -270,7 +270,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -320,14 +320,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>> // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr x 5>> -> !cir.ptr> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -378,7 +378,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[ALLOCA_SIZE]]) : !cir.ptr> @@ -428,14 +428,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -465,7 +465,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -508,14 +508,14 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr> x 5>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i // CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[TL_ALLOCA]] : !cir.ptr> x 5>> -> !cir.ptr>> // CHECK-NEXT: %[[TL_DEREF:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -565,7 +565,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -596,7 +596,7 @@ void do_things(unsigned A, unsigned B) { // // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ELT_SIZE]] : !u64i @@ -639,7 +639,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr>> @@ -669,7 +669,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB1_CAST]], %[[UB2_CAST]] : !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<20> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[ARR_SIZE]] : !u64i @@ -721,7 +721,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> @@ -751,7 +751,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i @@ -759,7 +759,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -803,7 +803,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB2_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp index 6543349f0ae9d..69981784d52f4 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-CtorDtor.cpp @@ -25,7 +25,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -66,7 +66,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -98,7 +98,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR]] : !u64i @@ -144,7 +144,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -176,7 +176,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]] : !u64i @@ -210,7 +210,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.mul %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS_2]], %[[SIZEOF_INT]] : !u64i @@ -244,9 +244,9 @@ void do_things(unsigned A, unsigned B) { // Initialization Section // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr @@ -260,9 +260,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -275,9 +275,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -319,9 +319,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUNDS1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUNDS3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -337,9 +337,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -354,9 +354,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -418,7 +418,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -458,7 +458,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -490,7 +490,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_INT]] : !u64i @@ -525,9 +525,9 @@ void do_things(unsigned A, unsigned B) { // Initialization Section // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -542,9 +542,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -578,9 +578,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>> {{.*}}, %[[BOUNDS1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUNDS2:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUNDS2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -597,9 +597,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUNDS1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -653,7 +653,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_CTORDTOR:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_CTORDTOR]] : !u64i // CHECK-NEXT: %[[INT_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr @@ -688,9 +688,9 @@ void do_things(unsigned A, unsigned B) { // Initialization Section // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -716,9 +716,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp index 4506c957c4422..56d63aa15dc39 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-NoOps.cpp @@ -19,7 +19,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -59,7 +59,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -92,7 +92,7 @@ void do_things(unsigned A, unsigned B) { // // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR]] : !u64i @@ -136,7 +136,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -169,7 +169,7 @@ void do_things(unsigned A, unsigned B) { // // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]] : !u64i @@ -203,7 +203,7 @@ void do_things(unsigned A, unsigned B) { // // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.mul %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS_2]], %[[SIZEOF_INT]] : !u64i @@ -238,9 +238,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr @@ -254,9 +254,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -269,9 +269,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -333,7 +333,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -373,7 +373,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -405,7 +405,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_INT]] : !u64i @@ -440,9 +440,9 @@ void do_things(unsigned A, unsigned B) { // Initialization Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -457,9 +457,9 @@ void do_things(unsigned A, unsigned B) { // // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // @@ -511,7 +511,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_NOOPS:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_NOOPS]] : !u64i // CHECK-NEXT: %[[INT_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr @@ -546,9 +546,9 @@ void do_things(unsigned A, unsigned B) { // Init Section. // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { diff --git a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp index eeb52ded8b4fb..2c5b52856b3da 100644 --- a/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp +++ b/clang/test/CIR/CodeGenOpenACC/private-clause-pointer-recipes-int.cpp @@ -17,7 +17,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -57,7 +57,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -90,7 +90,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR]] : !u64i @@ -134,7 +134,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -167,7 +167,7 @@ void do_things(unsigned A, unsigned B) { // // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]] : !u64i @@ -200,7 +200,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.mul %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS_2]], %[[SIZEOF_INT]] : !u64i @@ -256,7 +256,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -297,7 +297,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" align(8) : !cir.ptr>> // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr> @@ -329,7 +329,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_INT]] : !u64i @@ -384,7 +384,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.private.init" {{.*}} : !cir.ptr> // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_INT]] : !u64i // CHECK-NEXT: %[[INT_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(4) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr diff --git a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp index 1a2e377a997d0..ea5d9c9507099 100644 --- a/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp +++ b/clang/test/CIR/CodeGenOpenACC/reduction-clause-recipes.cpp @@ -18,9 +18,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -33,9 +33,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -48,9 +48,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -93,9 +93,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -110,9 +110,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -127,9 +127,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -177,9 +177,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5> x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB3_CAST]], %[[ONE]] : !u64i @@ -194,9 +194,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_DECAY]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB2_CAST]], %[[ONE]] : !u64i @@ -211,9 +211,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[LAST_SUB_ONE:.*]] = cir.sub %[[UB1_CAST]], %[[ONE]] : !u64i @@ -262,7 +262,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TOP_LEVEL_ALLOCA:.*]] = cir.alloca "openacc.reduction.init" align(8) : !cir.ptr>>> // // CHECK-NEXT: %[[INT_PTR_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[SIZEOF_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[UPPER_BOUND_CAST]], %[[SIZEOF_PTR]] : !u64i // CHECK-NEXT: %[[INT_PTR_PTR_VLA_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[CALC_ALLOCA_SIZE]]) : !cir.ptr>> @@ -294,7 +294,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_2:.*]] = cir.builtin_int_cast %[[INT_PTR_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UPPER_BOUND_CAST_2]], %[[UPPER_BOUND_CAST]] : !u64i // CHECK-NEXT: %[[SIZEOF_PTR_PTR:.*]] = cir.const #cir.int<8> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS]], %[[SIZEOF_PTR_PTR]] : !u64i @@ -328,7 +328,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[INT_PTR_UPPER_BOUND:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = builtin.unrealized_conversion_cast %[[INT_PTR_UPPER_BOUND]] : index to !u64i +// CHECK-NEXT: %[[UPPER_BOUND_CAST_3:.*]] = cir.builtin_int_cast %[[INT_PTR_UPPER_BOUND]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS_2:.*]] = cir.mul %[[UPPER_BOUND_CAST_3]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[SIZEOF_INT:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[CALC_ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS_2]], %[[SIZEOF_INT]] : !u64i @@ -362,9 +362,9 @@ void do_things(unsigned A, unsigned B) { // Initialization Section // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr @@ -378,9 +378,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -393,9 +393,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -439,9 +439,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr>>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr @@ -457,9 +457,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -474,9 +474,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -524,9 +524,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -542,9 +542,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr>>, !u64i) -> !cir.ptr>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -559,9 +559,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_LOAD]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i @@ -612,7 +612,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[TL_ALLOCA:.*]] = cir.alloca "openacc.reduction.init" align(8) : !cir.ptr x 5>>> // // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ARR_SIZE:.*]] = cir.const #cir.int<40> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[UB3_CAST]], %[[ARR_SIZE]] : !u64i // CHECK-NEXT: %[[ARR_ALLOCA:.*]] = cir.alloca "openacc.init.bounds" align(8) size(%[[ALLOCA_SIZE]]) : !cir.ptr x 5>> @@ -642,7 +642,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: } // // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS:.*]] = cir.mul %[[UB2_CAST]], %[[UB3_CAST]] : !u64i // // CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u64i @@ -650,7 +650,7 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride %[[DECAY]], %[[ZERO]] : (!cir.ptr>, !u64i) -> !cir.ptr> // // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[NUM_ELTS2:.*]] = cir.mul %[[UB1_CAST]], %[[NUM_ELTS]] : !u64i // CHECK-NEXT: %[[ELT_SIZE:.*]] = cir.const #cir.int<4> : !u64i // CHECK-NEXT: %[[ALLOCA_SIZE:.*]] = cir.mul %[[NUM_ELTS2]], %[[ELT_SIZE]] : !u64i @@ -681,9 +681,9 @@ void do_things(unsigned A, unsigned B) { // Init Section: // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -696,9 +696,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -711,9 +711,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -756,9 +756,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr x 5>>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB3_CAST]], %[[ITR3]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -773,9 +773,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND3_STRIDE:.*]] = cir.ptr_stride %[[RHS_TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB2_CAST]], %[[ITR2]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -790,9 +790,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[RHS_BOUND2_STRIDE:.*]] = cir.ptr_stride %[[RHS_BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: cir.store %[[LB1_CAST]], %[[ITR1]] : !u64i, !cir.ptr // CHECK-NEXT: cir.for : cond { @@ -848,9 +848,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: ^bb0(%[[REF:.*]]: !cir.ptr x 5>>> {{.*}}, %[[PRIVATE:.*]]: !cir.ptr x 5>>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND2:.*]]: !acc.data_bounds_ty {{.*}}, %[[BOUND3:.*]]: !acc.data_bounds_ty {{.*}}): // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB3:.*]] = acc.get_lowerbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB3]] : index to !u64i +// CHECK-NEXT: %[[LB3_CAST:.*]] = cir.builtin_int_cast %[[LB3]] : index -> !u64i // CHECK-NEXT: %[[UB3:.*]] = acc.get_upperbound %[[BOUND3]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB3_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB3]] : index to !u64i +// CHECK-NEXT: %[[UB3_CAST:.*]] = cir.builtin_int_cast %[[UB3]] : index -> !u64i // CHECK-NEXT: %[[ITR3:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB3:.*]] = cir.sub %[[UB3_CAST]], %[[CONST_ONE]] : !u64i @@ -866,9 +866,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND3_STRIDE:.*]] = cir.ptr_stride %[[TLA_LOAD]], %[[ITR3_LOAD]] : (!cir.ptr x 5>>, !u64i) -> !cir.ptr x 5>> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB2:.*]] = acc.get_lowerbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB2]] : index to !u64i +// CHECK-NEXT: %[[LB2_CAST:.*]] = cir.builtin_int_cast %[[LB2]] : index -> !u64i // CHECK-NEXT: %[[UB2:.*]] = acc.get_upperbound %[[BOUND2]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB2_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB2]] : index to !u64i +// CHECK-NEXT: %[[UB2_CAST:.*]] = cir.builtin_int_cast %[[UB2]] : index -> !u64i // CHECK-NEXT: %[[ITR2:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB2:.*]] = cir.sub %[[UB2_CAST]], %[[CONST_ONE]] : !u64i @@ -883,9 +883,9 @@ void do_things(unsigned A, unsigned B) { // CHECK-NEXT: %[[BOUND2_STRIDE:.*]] = cir.ptr_stride %[[BOUND3_STRIDE_DECAY]], %[[ITR2_LOAD]] : (!cir.ptr>, !u64i) -> !cir.ptr> // CHECK-NEXT: cir.scope { // CHECK-NEXT: %[[LB1:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[LB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB1]] : index to !u64i +// CHECK-NEXT: %[[LB1_CAST:.*]] = cir.builtin_int_cast %[[LB1]] : index -> !u64i // CHECK-NEXT: %[[UB1:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index -// CHECK-NEXT: %[[UB1_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB1]] : index to !u64i +// CHECK-NEXT: %[[UB1_CAST:.*]] = cir.builtin_int_cast %[[UB1]] : index -> !u64i // CHECK-NEXT: %[[ITR1:.*]] = cir.alloca "iter" align(8) : !cir.ptr // CHECK-NEXT: %[[CONST_ONE:.*]] = cir.const #cir.int<1> : !u64i // CHECK-NEXT: %[[ONE_BELOW_UB1:.*]] = cir.sub %[[UB1_CAST]], %[[CONST_ONE]] : !u64i diff --git a/clang/test/CIR/CodeGenOpenACC/serial.c b/clang/test/CIR/CodeGenOpenACC/serial.c index fb55c5a0a3685..d7e0ad78cb71b 100644 --- a/clang/test/CIR/CodeGenOpenACC/serial.c +++ b/clang/test/CIR/CodeGenOpenACC/serial.c @@ -115,7 +115,7 @@ void acc_serial(int cond) { #pragma acc serial async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.serial async(%[[CONV_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -129,9 +129,9 @@ void acc_serial(int cond) { #pragma acc serial async(3) device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.serial async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -139,7 +139,7 @@ void acc_serial(int cond) { #pragma acc serial async device_type(nvidia, radeon) async(cond) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.serial async([#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type], %[[CONV_CAST]] : si32 [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -147,7 +147,7 @@ void acc_serial(int cond) { #pragma acc serial async(3) device_type(nvidia, radeon) async {} // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial async([#acc.device_type, #acc.device_type], %[[THREE_CAST]] : si32) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -167,7 +167,7 @@ void acc_serial(int cond) { #pragma acc serial wait(1) device_type(nvidia) wait {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait([#acc.device_type], {%[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -175,7 +175,7 @@ void acc_serial(int cond) { #pragma acc serial wait device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait([#acc.device_type], {%[[ONE_CAST]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -183,9 +183,9 @@ void acc_serial(int cond) { #pragma acc serial wait(1) device_type(nvidia) wait(1) {} // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL2:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL2]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL2]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({%[[ONE_CAST]] : si32}, {%[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -193,9 +193,9 @@ void acc_serial(int cond) { #pragma acc serial wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -203,13 +203,13 @@ void acc_serial(int cond) { #pragma acc serial wait(devnum: cond : 1) device_type(nvidia) wait(devnum: cond : 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -217,11 +217,11 @@ void acc_serial(int cond) { #pragma acc serial wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -229,17 +229,17 @@ void acc_serial(int cond) { #pragma acc serial wait(devnum: cond : 1, 2) device_type(nvidia, radeon) wait(devnum: cond : 1, 2) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST2:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST2:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST2:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST2:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST2:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST2:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({devnum: %[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32}, {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type], {devnum: %[[CONV_CAST2]] : si32, %[[ONE_CAST2]] : si32, %[[TWO_CAST2]] : si32} [#acc.device_type]) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -247,9 +247,9 @@ void acc_serial(int cond) { #pragma acc serial wait(cond, 1) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc @@ -257,9 +257,9 @@ void acc_serial(int cond) { #pragma acc serial wait(queues: cond, 1) device_type(radeon) {} // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.serial wait({%[[CONV_CAST]] : si32, %[[ONE_CAST]] : si32}) { // CHECK-NEXT: acc.yield // CHECK-NEXT: } loc diff --git a/clang/test/CIR/CodeGenOpenACC/set.c b/clang/test/CIR/CodeGenOpenACC/set.c index 2950f142aea1b..4f7a083c3f8df 100644 --- a/clang/test/CIR/CodeGenOpenACC/set.c +++ b/clang/test/CIR/CodeGenOpenACC/set.c @@ -14,17 +14,17 @@ void acc_set(int cond) { #pragma acc set default_async(cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.set default_async(%[[COND_CONV]] : si32) #pragma acc set default_async(1) // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONV:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.set default_async(%[[ONE_CONV]] : si32) #pragma acc set device_num(cond) if (cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1 @@ -32,9 +32,9 @@ void acc_set(int cond) { #pragma acc set device_type(radeon) default_async(1) device_num(cond) if (cond) // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONV:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1 diff --git a/clang/test/CIR/CodeGenOpenACC/shutdown.c b/clang/test/CIR/CodeGenOpenACC/shutdown.c index 07b8fb8df1043..7c3627e732d07 100644 --- a/clang/test/CIR/CodeGenOpenACC/shutdown.c +++ b/clang/test/CIR/CodeGenOpenACC/shutdown.c @@ -30,12 +30,12 @@ void acc_shutdown(int cond) { #pragma acc shutdown device_num(cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.shutdown device_num(%[[COND_CONV]] : si32) #pragma acc shutdown device_num(1) // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CONV:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.shutdown device_num(%[[ONE_CONV]] : si32) #pragma acc shutdown if(cond) device_num(cond) device_type(*) @@ -43,6 +43,6 @@ void acc_shutdown(int cond) { // CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1 // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[COND_CONV:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.shutdown device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_types = [#acc.device_type]} } diff --git a/clang/test/CIR/CodeGenOpenACC/update.c b/clang/test/CIR/CodeGenOpenACC/update.c index 9282bc971af60..31aed57084ac2 100644 --- a/clang/test/CIR/CodeGenOpenACC/update.c +++ b/clang/test/CIR/CodeGenOpenACC/update.c @@ -100,43 +100,43 @@ void acc_update(int parmVar, int *ptrParmVar) { #pragma acc update self(parmVar) wait(parmVar) // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: acc.update wait({%[[PARM_CAST]] : si32}) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) wait(parmVar) device_type(nvidia) // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: acc.update wait({%[[PARM_CAST]] : si32}) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) device_type(radeon) wait(parmVar) // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: acc.update wait({%[[PARM_CAST]] : si32} [#acc.device_type]) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) device_type(radeon) wait(parmVar, 1, 2) // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: acc.update wait({%[[PARM_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32} [#acc.device_type]) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) device_type(radeon) wait(devnum:parmVar: 1, 2) // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_CONST]] + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_CONST]] // CHECK-NEXT: %[[TWO_CONST:.*]] = cir.const #cir.int<2> - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_CONST]] + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_CONST]] // CHECK-NEXT: acc.update wait({devnum: %[[PARM_CAST]] : si32, %[[ONE_CAST]] : si32, %[[TWO_CAST]] : si32} [#acc.device_type]) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} @@ -157,21 +157,21 @@ void acc_update(int parmVar, int *ptrParmVar) { #pragma acc update self(parmVar) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.update async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) async(%[[PARM_CAST]] : si32) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) async(parmVar) device_type(nvidia) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.update async(%[[PARM_CAST]] : si32) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) async(%[[PARM_CAST]] : si32) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} #pragma acc update self(parmVar) device_type(radeon) async(parmVar) // CHECK-NEXT: %[[PARM_LOAD:.*]] = cir.load{{.*}} %[[PARM]] - // CHECK-NEXT: %[[PARM_CAST:.*]] = builtin.unrealized_conversion_cast %[[PARM_LOAD]] + // CHECK-NEXT: %[[PARM_CAST:.*]] = cir.builtin_int_cast %[[PARM_LOAD]] // CHECK-NEXT: %[[GDP1:.*]] = acc.getdeviceptr varPtr(%[[PARM]] : !cir.ptr) async(%[[PARM_CAST]] : si32 [#acc.device_type]) -> !cir.ptr {dataClause = #acc, name = "parmVar", structured = false} // CHECK-NEXT: acc.update async(%[[PARM_CAST]] : si32 [#acc.device_type]) dataOperands(%[[GDP1]] : !cir.ptr) // CHECK-NEXT: acc.update_host accPtr(%[[GDP1]] : !cir.ptr) async(%[[PARM_CAST]] : si32 [#acc.device_type]) to varPtr(%[[PARM]] : !cir.ptr) {dataClause = #acc, name = "parmVar", structured = false} diff --git a/clang/test/CIR/CodeGenOpenACC/wait.c b/clang/test/CIR/CodeGenOpenACC/wait.c index 91ed0626d9950..1cc796d3d6207 100644 --- a/clang/test/CIR/CodeGenOpenACC/wait.c +++ b/clang/test/CIR/CodeGenOpenACC/wait.c @@ -19,19 +19,19 @@ void acc_wait(int cond) { #pragma acc wait async(cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: acc.wait async(%[[CONV_CAST]] : si32) loc #pragma acc wait(1) // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[ONE_CAST]] : si32) loc #pragma acc wait(1, 2) async // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[ONE_CAST]], %[[TWO_CAST]] : si32, si32) async loc @@ -40,16 +40,16 @@ void acc_wait(int cond) { // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[ONE_CAST]] : si32) if(%[[CONV_CAST]]) #pragma acc wait(queues:1, 2) async(cond) // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load{{.*}} %[[COND]] : !cir.ptr, !s32i - // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32 + // CHECK-NEXT: %[[CONV_CAST:.*]] = cir.builtin_int_cast %[[COND_LOAD]] : !s32i -> si32 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[ONE_CAST]], %[[TWO_CAST]] : si32, si32) async(%[[CONV_CAST]] : si32) loc #pragma acc wait(devnum:1: 2, 3) if (cond) @@ -57,20 +57,20 @@ void acc_wait(int cond) { // CHECK-NEXT: %[[BOOL_CAST:.*]] = cir.cast int_to_bool %[[COND_LOAD]] : !s32i -> !cir.bool // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[BOOL_CAST]] : !cir.bool to i1 // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[TWO_CAST]], %[[THREE_CAST]] : si32, si32) wait_devnum(%[[ONE_CAST]] : si32) if(%[[CONV_CAST]]) loc #pragma acc wait(devnum:1: queues: 2, 3) async // CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i - // CHECK-NEXT: %[[ONE_CAST:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[ONE_CAST:.*]] = cir.builtin_int_cast %[[ONE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i - // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[TWO_CAST:.*]] = cir.builtin_int_cast %[[TWO_LITERAL]] : !s32i -> si32 // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i - // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32 + // CHECK-NEXT: %[[THREE_CAST:.*]] = cir.builtin_int_cast %[[THREE_LITERAL]] : !s32i -> si32 // CHECK-NEXT: acc.wait(%[[TWO_CAST]], %[[THREE_CAST]] : si32, si32) async wait_devnum(%[[ONE_CAST]] : si32) loc // CHECK-NEXT: cir.return diff --git a/clang/test/CIR/IR/builtin-int-cast.cir b/clang/test/CIR/IR/builtin-int-cast.cir new file mode 100644 index 0000000000000..213298ac48a9e --- /dev/null +++ b/clang/test/CIR/IR/builtin-int-cast.cir @@ -0,0 +1,50 @@ +// RUN: cir-opt %s --verify-roundtrip | FileCheck %s + +!s32i = !cir.int +!u64i = !cir.int + +module { + // CIR integer -> builtin signless integer. + cir.func @to_builtin(%arg0: !s32i) { + %0 = cir.builtin_int_cast %arg0 : !s32i -> i32 + cir.return + } + + // CIR integer -> builtin signed integer. + cir.func @to_builtin_signed(%arg0: !s32i) { + %0 = cir.builtin_int_cast %arg0 : !s32i -> si32 + cir.return + } + + // Builtin integer -> CIR integer. + cir.func @from_builtin(%arg0: i64) { + %0 = cir.builtin_int_cast %arg0 : i64 -> !u64i + cir.return + } + + // CIR integer <-> index (no width constraint on the index side). + cir.func @to_index(%arg0: !u64i) { + %0 = cir.builtin_int_cast %arg0 : !u64i -> index + cir.return + } + + cir.func @from_index(%arg0: index) { + %0 = cir.builtin_int_cast %arg0 : index -> !u64i + cir.return + } +} + +// CHECK: cir.func{{.*}} @to_builtin(%arg0: !s32i) +// CHECK: %0 = cir.builtin_int_cast %arg0 : !s32i -> i32 + +// CHECK: cir.func{{.*}} @to_builtin_signed(%arg0: !s32i) +// CHECK: %0 = cir.builtin_int_cast %arg0 : !s32i -> si32 + +// CHECK: cir.func{{.*}} @from_builtin(%arg0: i64) +// CHECK: %0 = cir.builtin_int_cast %arg0 : i64 -> !u64i + +// CHECK: cir.func{{.*}} @to_index(%arg0: !u64i) +// CHECK: %0 = cir.builtin_int_cast %arg0 : !u64i -> index + +// CHECK: cir.func{{.*}} @from_index(%arg0: index) +// CHECK: %0 = cir.builtin_int_cast %arg0 : index -> !u64i diff --git a/clang/test/CIR/IR/invalid-builtin-int-cast.cir b/clang/test/CIR/IR/invalid-builtin-int-cast.cir new file mode 100644 index 0000000000000..56cde4d75e2e3 --- /dev/null +++ b/clang/test/CIR/IR/invalid-builtin-int-cast.cir @@ -0,0 +1,37 @@ +// RUN: cir-opt %s -verify-diagnostics -split-input-file + +!s32i = !cir.int +!s64i = !cir.int + +module { + cir.func @both_cir(%arg0: !s32i) { + // expected-error@+1 {{requires exactly one '!cir.int' operand or result; the other must be a builtin integer or 'index' type}} + %0 = cir.builtin_int_cast %arg0 : !s32i -> !s64i + cir.return + } +} + +// ----- + +!s32i = !cir.int + +module { + cir.func @both_builtin(%arg0: !s32i) { + %0 = cir.builtin_int_cast %arg0 : !s32i -> i32 + // expected-error@+1 {{requires exactly one '!cir.int' operand or result; the other must be a builtin integer or 'index' type}} + %1 = cir.builtin_int_cast %0 : i32 -> i64 + cir.return + } +} + +// ----- + +!s32i = !cir.int + +module { + cir.func @width_mismatch(%arg0: !s32i) { + // expected-error@+1 {{requires the CIR and builtin integer types to have the same width; use 'cir.cast' for width conversions}} + %0 = cir.builtin_int_cast %arg0 : !s32i -> i64 + cir.return + } +} diff --git a/clang/test/CIR/Lowering/builtin-int-cast.cir b/clang/test/CIR/Lowering/builtin-int-cast.cir new file mode 100644 index 0000000000000..f855923453318 --- /dev/null +++ b/clang/test/CIR/Lowering/builtin-int-cast.cir @@ -0,0 +1,56 @@ +// RUN: cir-opt %s -cir-to-llvm | FileCheck %s + +// Check the CIR-to-LLVM lowering of cir.builtin_int_cast, the cast between +// CIR integer types and the builtin integer/index types. + +!s16i = !cir.int +!s32i = !cir.int +!s64i = !cir.int +!u32i = !cir.int +!u64i = !cir.int + +module { + // Same width: the cast carries no resize and is dropped entirely, so the + // argument flows straight to the return. + // CHECK-LABEL: llvm.func @cast_noop + // CHECK-SAME: (%[[ARG:.*]]: i32) + // CHECK-NEXT: llvm.return %[[ARG]] : i32 + cir.func @cast_noop(%arg0: i32) -> !s32i { + %0 = cir.builtin_int_cast %arg0 : i32 -> !s32i + cir.return %0 : !s32i + } + + // index (i64) down to a narrower CIR integer: truncation. + // CHECK-LABEL: llvm.func @cast_trunc + // CHECK-SAME: (%[[ARG:.*]]: i64) + // CHECK-NEXT: %[[T:.*]] = llvm.trunc %[[ARG]] : i64 to i16 + // CHECK-NEXT: llvm.return %[[T]] : i16 + cir.func @cast_trunc(%arg0: index) -> !s16i { + %0 = cir.builtin_int_cast %arg0 : index -> !s16i + cir.return %0 : !s16i + } + + // Signed CIR integer widened through index: sign extension. The trailing + // index -> !s64i cast is a same-width no-op, so only the sext remains. + // CHECK-LABEL: llvm.func @cast_sext + // CHECK-SAME: (%[[ARG:.*]]: i32) + // CHECK-NEXT: %[[E:.*]] = llvm.sext %[[ARG]] : i32 to i64 + // CHECK-NEXT: llvm.return %[[E]] : i64 + cir.func @cast_sext(%arg0: !s32i) -> !s64i { + %0 = cir.builtin_int_cast %arg0 : !s32i -> index + %1 = cir.builtin_int_cast %0 : index -> !s64i + cir.return %1 : !s64i + } + + // Unsigned CIR integer widened through index: zero extension. Signedness is + // taken from the CIR integer side. + // CHECK-LABEL: llvm.func @cast_zext + // CHECK-SAME: (%[[ARG:.*]]: i32) + // CHECK-NEXT: %[[E:.*]] = llvm.zext %[[ARG]] : i32 to i64 + // CHECK-NEXT: llvm.return %[[E]] : i64 + cir.func @cast_zext(%arg0: !u32i) -> !u64i { + %0 = cir.builtin_int_cast %arg0 : !u32i -> index + %1 = cir.builtin_int_cast %0 : index -> !u64i + cir.return %1 : !u64i + } +} diff --git a/clang/test/CIR/Transforms/builtin-int-cast-fold.cir b/clang/test/CIR/Transforms/builtin-int-cast-fold.cir new file mode 100644 index 0000000000000..24fe27965d2cc --- /dev/null +++ b/clang/test/CIR/Transforms/builtin-int-cast-fold.cir @@ -0,0 +1,29 @@ +// RUN: cir-opt -cir-canonicalize -o %t.cir %s +// RUN: FileCheck --input-file=%t.cir %s + +!s32i = !cir.int + +module { + // A round-trip cast (CIR -> builtin -> CIR back to the original type) + // folds away, returning the original value. + cir.func @roundtrip(%arg0: !s32i) -> !s32i { + %0 = cir.builtin_int_cast %arg0 : !s32i -> i32 + %1 = cir.builtin_int_cast %0 : i32 -> !s32i + cir.return %1 : !s32i + } + + // CHECK: cir.func{{.*}} @roundtrip(%[[ARG:.+]]: !s32i) -> !s32i { + // CHECK-NEXT: cir.return %[[ARG]] : !s32i + // CHECK-NEXT: } + + // A round-trip that returns to a *different* CIR type must not fold. + cir.func @no_fold_diff_type(%arg0: i32) -> !s32i { + %0 = cir.builtin_int_cast %arg0 : i32 -> !s32i + cir.return %0 : !s32i + } + + // CHECK: cir.func{{.*}} @no_fold_diff_type(%[[ARG2:.+]]: i32) -> !s32i { + // CHECK-NEXT: %[[CAST:.+]] = cir.builtin_int_cast %[[ARG2]] : i32 -> !s32i + // CHECK-NEXT: cir.return %[[CAST]] : !s32i + // CHECK-NEXT: } +} From 8a7169c919c824e8a23d0421795ce0e32926652b Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Tue, 23 Jun 2026 12:25:28 -0400 Subject: [PATCH 211/511] [SLP]Fix dominance crash for scheduled copyable PHI-operand bundles Extend the copyable/non-copyable PHI conflict bail-out in tryScheduleBundle to the scheduled path, not just the non-schedulable one. Fixes #205327 Reviewers: Pull Request: https://github.com/llvm/llvm-project/pull/205372 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 33 ++++++++----- .../copyable-phi-scheduled-non-copyable.ll | 47 +++++++++++++++++++ 2 files changed, 68 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/copyable-phi-scheduled-non-copyable.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 4dcf3243a20ad..da2fe9d9f9f61 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -25549,6 +25549,27 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, return std::nullopt; } } + // A scalar that is a non-copyable (i.e. really vectorized) element in this + // PHI-operand node, but a copyable element in another PHI-operand node, is + // vectorized here and only gathered/reused there. Both nodes feed different + // incoming values of the same vectorized PHI and may be emitted in different + // predecessor blocks, so the gathered reuse can end up not dominated by the + // vectorized value, producing broken IR. Bail out of scheduling to avoid it. + if (EI && EI.UserTE->State == TreeEntry::Vectorize && + EI.UserTE->getOpcode() == Instruction::PHI && any_of(VL, [&](Value *V) { + auto *I = dyn_cast(V); + if (!I || (HasCopyables && S.isCopyableElement(V))) + return false; + return any_of( + SLP->VectorizableTree, [&](const std::unique_ptr &TE) { + return TE->UserTreeIndex && + TE->UserTreeIndex.UserTE->State == TreeEntry::Vectorize && + TE->UserTreeIndex.UserTE->getOpcode() == + Instruction::PHI && + TE->hasCopyableElements() && TE->isCopyableElement(V); + }); + })) + return std::nullopt; if (DoesNotRequireScheduling) { // If all operands were replaced by copyables, the operands of this node // might be not, so need to recalculate dependencies for schedule data, @@ -25579,18 +25600,6 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef VL, BoUpSLP *SLP, return SD && SD->hasValidDependencies(); })) return std::nullopt; - if (EI && EI.UserTE->State == TreeEntry::Vectorize && - EI.UserTE->getOpcode() == Instruction::PHI && - any_of(SLP->VectorizableTree, - [&](const std::unique_ptr &TE) { - return TE->UserTreeIndex && - TE->UserTreeIndex.UserTE->State == - TreeEntry::Vectorize && - TE->UserTreeIndex.UserTE->getOpcode() == - Instruction::PHI && - TE->hasCopyableElements() && TE->isCopyableElement(V); - })) - return std::nullopt; SmallDenseMap, unsigned> UserOpToNumOps; for (const Use &U : I->operands()) { unsigned &NumOps = diff --git a/llvm/test/Transforms/SLPVectorizer/X86/copyable-phi-scheduled-non-copyable.ll b/llvm/test/Transforms/SLPVectorizer/X86/copyable-phi-scheduled-non-copyable.ll new file mode 100644 index 0000000000000..762366cb3b623 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/copyable-phi-scheduled-non-copyable.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -slp-threshold=-99999 < %s | FileCheck %s + +define void @test(i32 %arg) { +; CHECK-LABEL: define void @test( +; CHECK-SAME: i32 [[ARG:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: br label %[[BB1:.*]] +; CHECK: [[BB1]]: +; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> , i32 [[ARG]], i32 0 +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> , <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> , <4 x i32> [[TMP3]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> zeroinitializer, [[TMP4]] +; CHECK-NEXT: [[ICMP:%.*]] = icmp sgt i32 0, 0 +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x i32> [[TMP6]], i32 0, i32 1 +; CHECK-NEXT: br i1 false, label %[[BB5:.*]], label %[[BB4:.*]] +; CHECK: [[BB4]]: +; CHECK-NEXT: br label %[[BB5]] +; CHECK: [[BB5]]: +; CHECK-NEXT: [[TMP8:%.*]] = phi <4 x i32> [ [[TMP5]], %[[BB4]] ], [ [[TMP7]], %[[BB1]] ] +; CHECK-NEXT: ret void +; +bb: + br label %bb1 + +bb1: + %sub = sub i32 0, %arg + %or = or i32 %arg, 0 + %add = add i32 %or, 0 + %add2 = add i32 0, 0 + %icmp = icmp sgt i32 %add2, 0 + %add3 = add i32 0, 1 + br i1 false, label %bb5, label %bb4 + +bb4: + br label %bb5 + +bb5: + %phi = phi i32 [ 0, %bb4 ], [ 1, %bb1 ] + %phi6 = phi i32 [ %sub, %bb4 ], [ %sub, %bb1 ] + %phi7 = phi i32 [ %add, %bb4 ], [ %add, %bb1 ] + %phi8 = phi i32 [ %add3, %bb4 ], [ %add2, %bb1 ] + ret void +} From 40b73337f31d02c0d4197c37c94528bc867fb8e5 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Tue, 23 Jun 2026 18:33:28 +0200 Subject: [PATCH 212/511] Recommit "[libc] Introduce the ioctl syscall wrapper and port all callers (#204640)" (#205317) This patch reapplies #204640 (reverted in #205277), due to (-Werror) build failure with gcc. Gcc warned about passing an uninitialized structure through a `const void *` argument. This isn't a problem because the ioctls in question write to that argument. The fix/workaround is to provide a `void *` overload. The original commit message was: This patch adds an ioctl syscall wrapper in linux_syscalls namespace and migrates all direct SYS_ioctl calls to use it. To handle the polymorphic nature of ioctl arguments (where some commands expect pointers, some expect scalar integers like queue_selector, and some expect no argument at all), I use a helper struct IoctlArg with implicit constructors. This avoids template bloat and overload ambiguities (particularly around literal 0) while keeping call sites clean. Assisted by Gemini. --- .../linux/syscall_wrappers/CMakeLists.txt | 13 +++++ .../OSUtil/linux/syscall_wrappers/ioctl.h | 52 +++++++++++++++++++ libc/src/sys/ioctl/linux/CMakeLists.txt | 3 +- libc/src/sys/ioctl/linux/ioctl.cpp | 15 +++--- libc/src/termios/linux/CMakeLists.txt | 21 +++----- libc/src/termios/linux/tcdrain.cpp | 9 ++-- libc/src/termios/linux/tcflow.cpp | 9 ++-- libc/src/termios/linux/tcflush.cpp | 10 ++-- libc/src/termios/linux/tcgetattr.cpp | 12 ++--- libc/src/termios/linux/tcgetsid.cpp | 10 ++-- libc/src/termios/linux/tcsendbreak.cpp | 9 ++-- libc/src/termios/linux/tcsetattr.cpp | 12 ++--- libc/src/unistd/linux/CMakeLists.txt | 5 +- libc/src/unistd/linux/isatty.cpp | 13 ++--- 14 files changed, 117 insertions(+), 76 deletions(-) create mode 100644 libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt index a52be9676a3ca..f3282c315d9a9 100644 --- a/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/syscall_wrappers/CMakeLists.txt @@ -706,6 +706,19 @@ add_header_library( libc.include.sys_syscall ) +add_header_library( + ioctl + HDRS + ioctl.h + DEPENDS + libc.src.__support.CPP.type_traits + libc.src.__support.OSUtil.osutil + libc.src.__support.error_or + libc.src.__support.macros.attributes + libc.src.__support.macros.config + libc.include.sys_syscall +) + add_header_library( readlink HDRS diff --git a/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h b/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h new file mode 100644 index 0000000000000..3972690283a36 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/syscall_wrappers/ioctl.h @@ -0,0 +1,52 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Syscall wrapper for ioctl. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H + +#include "src/__support/CPP/type_traits/enable_if.h" +#include "src/__support/CPP/type_traits/is_integral.h" +#include "src/__support/CPP/type_traits/is_null_pointer.h" +#include "src/__support/CPP/type_traits/is_pointer.h" +#include "src/__support/OSUtil/linux/syscall.h" // syscall_checked +#include "src/__support/error_or.h" +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/config.h" +#include // For syscall numbers + +namespace LIBC_NAMESPACE_DECL { +namespace linux_syscalls { + +struct IoctlArg { + unsigned long val; + + // Some ioctls read the argument, some write to it. The caller is responsible + // for passing the correct pointer type. + LIBC_INLINE IoctlArg(const void *ptr) + : val(reinterpret_cast(ptr)) {} + LIBC_INLINE IoctlArg(void *ptr) : val(reinterpret_cast(ptr)) {} + + template , int> = 0> + LIBC_INLINE constexpr IoctlArg(T num = 0) + : val(static_cast(num)) {} +}; + +LIBC_INLINE ErrorOr ioctl(int fd, unsigned long request, + IoctlArg arg = 0) { + return syscall_checked(SYS_ioctl, fd, request, arg.val); +} + +} // namespace linux_syscalls +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_SYSCALL_WRAPPERS_IOCTL_H diff --git a/libc/src/sys/ioctl/linux/CMakeLists.txt b/libc/src/sys/ioctl/linux/CMakeLists.txt index 876f35aaee66c..33ff2d4dce214 100644 --- a/libc/src/sys/ioctl/linux/CMakeLists.txt +++ b/libc/src/sys/ioctl/linux/CMakeLists.txt @@ -6,7 +6,6 @@ add_entrypoint_object( ../ioctl.h DEPENDS libc.include.sys_ioctl - libc.include.sys_syscall - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/sys/ioctl/linux/ioctl.cpp b/libc/src/sys/ioctl/linux/ioctl.cpp index 9bb669c6a6f66..ec861ffc03ec6 100644 --- a/libc/src/sys/ioctl/linux/ioctl.cpp +++ b/libc/src/sys/ioctl/linux/ioctl.cpp @@ -8,11 +8,10 @@ #include "src/sys/ioctl/ioctl.h" -#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include -#include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -20,16 +19,14 @@ LLVM_LIBC_FUNCTION(int, ioctl, (int fd, unsigned long request, ...)) { va_list vargs; va_start(vargs, request); void *data_pointer = va_arg(vargs, void *); - int ret = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, request, data_pointer); va_end(vargs); - // Some ioctls can be expected to return positive values - if (ret >= 0) - return ret; + auto ret = linux_syscalls::ioctl(fd, request, data_pointer); - // If there is an error, errno is set and -1 is returned. - libc_errno = -ret; + if (ret.has_value()) + return ret.value(); + + libc_errno = ret.error(); return -1; } diff --git a/libc/src/termios/linux/CMakeLists.txt b/libc/src/termios/linux/CMakeLists.txt index e990fba25eabe..5d5440ae69266 100644 --- a/libc/src/termios/linux/CMakeLists.txt +++ b/libc/src/termios/linux/CMakeLists.txt @@ -51,9 +51,8 @@ add_entrypoint_object( HDRS ../tcgetsid.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -64,9 +63,8 @@ add_entrypoint_object( HDRS ../tcdrain.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -77,9 +75,8 @@ add_entrypoint_object( HDRS ../tcflush.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -90,9 +87,8 @@ add_entrypoint_object( HDRS ../tcflow.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -103,9 +99,8 @@ add_entrypoint_object( HDRS ../tcsendbreak.h DEPENDS - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -123,9 +118,8 @@ add_entrypoint_object( ../tcgetattr.h DEPENDS .kernel_termios - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) @@ -137,8 +131,7 @@ add_entrypoint_object( ../tcsetattr.h DEPENDS .kernel_termios - libc.include.sys_syscall libc.include.termios - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/termios/linux/tcdrain.cpp b/libc/src/termios/linux/tcdrain.cpp index 570b15c24fe7f..4fce89d65a76f 100644 --- a/libc/src/termios/linux/tcdrain.cpp +++ b/libc/src/termios/linux/tcdrain.cpp @@ -8,21 +8,20 @@ #include "src/termios/tcdrain.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcdrain, (int fd)) { - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 1); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCSBRK, 1); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcflow.cpp b/libc/src/termios/linux/tcflow.cpp index 714ef6aa71298..4ffd294997ad4 100644 --- a/libc/src/termios/linux/tcflow.cpp +++ b/libc/src/termios/linux/tcflow.cpp @@ -8,21 +8,20 @@ #include "src/termios/tcflow.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflow, (int fd, int action)) { - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCXONC, action); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCXONC, action); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcflush.cpp b/libc/src/termios/linux/tcflush.cpp index 4c7b9fadc446d..8a4676d97454a 100644 --- a/libc/src/termios/linux/tcflush.cpp +++ b/libc/src/termios/linux/tcflush.cpp @@ -8,22 +8,20 @@ #include "src/termios/tcflush.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcflush, (int fd, int queue_selector)) { - int ret = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCFLSH, queue_selector); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCFLSH, queue_selector); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcgetattr.cpp b/libc/src/termios/linux/tcgetattr.cpp index 2e768269c874d..0569be4ae588f 100644 --- a/libc/src/termios/linux/tcgetattr.cpp +++ b/libc/src/termios/linux/tcgetattr.cpp @@ -7,24 +7,22 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcgetattr.h" -#include "kernel_termios.h" - -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, tcgetattr, (int fd, struct termios *t)) { LIBC_NAMESPACE::kernel_termios kt; - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCGETS, &kt); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCGETS, &kt); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } t->c_iflag = kt.c_iflag; diff --git a/libc/src/termios/linux/tcgetsid.cpp b/libc/src/termios/linux/tcgetsid.cpp index 7487816cf2741..f80fd31b65865 100644 --- a/libc/src/termios/linux/tcgetsid.cpp +++ b/libc/src/termios/linux/tcgetsid.cpp @@ -8,22 +8,22 @@ #include "src/termios/tcgetsid.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(pid_t, tcgetsid, (int fd)) { pid_t sid; - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGSID, &sid); - if (ret < 0) { - libc_errno = -ret; + + auto ret = linux_syscalls::ioctl(fd, TIOCGSID, &sid); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return sid; diff --git a/libc/src/termios/linux/tcsendbreak.cpp b/libc/src/termios/linux/tcsendbreak.cpp index 1d546c1d5953e..e91ec7b748582 100644 --- a/libc/src/termios/linux/tcsendbreak.cpp +++ b/libc/src/termios/linux/tcsendbreak.cpp @@ -8,13 +8,12 @@ #include "src/termios/tcsendbreak.h" -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -23,9 +22,9 @@ LLVM_LIBC_FUNCTION(pid_t, tcsendbreak, (int fd, int /* unused duration */)) { // POSIX leaves the behavior for non-zero duration implementation dependent. // Which means that the behavior can be the same as it is when duration is // zero. So, we just pass zero to the syscall. - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TCSBRK, 0); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, TCSBRK, 0); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/termios/linux/tcsetattr.cpp b/libc/src/termios/linux/tcsetattr.cpp index 8a2c7290217ba..b2f08d078ee83 100644 --- a/libc/src/termios/linux/tcsetattr.cpp +++ b/libc/src/termios/linux/tcsetattr.cpp @@ -7,15 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/termios/tcsetattr.h" -#include "kernel_termios.h" - -#include "src/__support/OSUtil/syscall.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/termios/linux/kernel_termios.h" #include // Safe to include without the risk of name pollution. -#include // For syscall numbers #include namespace LIBC_NAMESPACE_DECL { @@ -52,9 +50,9 @@ LLVM_LIBC_FUNCTION(int, tcsetattr, kt.c_cc[i] = 0; } - int ret = LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, cmd, &kt); - if (ret < 0) { - libc_errno = -ret; + auto ret = linux_syscalls::ioctl(fd, cmd, &kt); + if (!ret.has_value()) { + libc_errno = ret.error(); return -1; } return 0; diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt index 04ccde414cd2f..af385e9bbed72 100644 --- a/libc/src/unistd/linux/CMakeLists.txt +++ b/libc/src/unistd/linux/CMakeLists.txt @@ -342,11 +342,10 @@ add_entrypoint_object( HDRS ../isatty.h DEPENDS - libc.hdr.fcntl_macros + libc.hdr.unistd_macros libc.include.unistd libc.include.sys_ioctl - libc.include.sys_syscall - libc.src.__support.OSUtil.osutil + libc.src.__support.OSUtil.linux.syscall_wrappers.ioctl libc.src.errno.errno ) diff --git a/libc/src/unistd/linux/isatty.cpp b/libc/src/unistd/linux/isatty.cpp index a4d17912b57b0..4418feb2229a3 100644 --- a/libc/src/unistd/linux/isatty.cpp +++ b/libc/src/unistd/linux/isatty.cpp @@ -8,13 +8,11 @@ #include "src/unistd/isatty.h" -#include "src/__support/OSUtil/syscall.h" // For internal syscall function. +#include "src/__support/OSUtil/linux/syscall_wrappers/ioctl.h" #include "src/__support/common.h" - #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include // For ioctl numbers. -#include // For syscall numbers. +#include // For ioctl numbers. namespace LIBC_NAMESPACE_DECL { @@ -23,12 +21,11 @@ LLVM_LIBC_FUNCTION(int, isatty, (int fd)) { int line_d_val = INIT_VAL; // This gets the line dicipline of the terminal. When called on something that // isn't a terminal it doesn't change line_d_val and returns -1. - int result = - LIBC_NAMESPACE::syscall_impl(SYS_ioctl, fd, TIOCGETD, &line_d_val); - if (result == 0) + auto result = linux_syscalls::ioctl(fd, TIOCGETD, &line_d_val); + if (result.has_value()) return 1; - libc_errno = -result; + libc_errno = result.error(); return 0; } From 194100babad8fda8c6fbd47e2558a8957e071316 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= Date: Tue, 23 Jun 2026 17:42:10 +0100 Subject: [PATCH 213/511] [LifetimeSafety] Model pointer-to-data-member access in the fact generator (#204612) VisitBinaryOperator had no case for `obj.*pm` (BO_PtrMemD) / `objptr->*pm` (BO_PtrMemI), so a borrow of the accessed member (`&(obj.*pm)`) dropped the object's loan to an empty origin and a use-after-scope was missed. Flow the object operand's origin into the result, mirroring a member access: for `.*` the object is the LHS, for `->*` it is the LHS pointer's pointee. Assisted-by: Claude Opus 4.8 Co-authored-by: Gabor Horvath --- .../LifetimeSafety/FactsGenerator.cpp | 21 +++++++ clang/test/Sema/LifetimeSafety/safety.cpp | 60 +++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 3861117005752..50bf79d4c1a38 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -497,6 +497,27 @@ void FactsGenerator::handlePointerArithmetic(const BinaryOperator *BO) { } void FactsGenerator::VisitBinaryOperator(const BinaryOperator *BO) { + if (BO->getOpcode() == BO_PtrMemD || BO->getOpcode() == BO_PtrMemI) { + // `obj.*pm` / `objptr->*pm` names a member of the object, so a borrow of it + // borrows the object; flow the object's origin into the result. For `.*` + // the object is the LHS; for `->*` it is the LHS pointer's pointee. + // + // Only the result's outer (storage) origin relates to the object: borrowing + // the member borrows the object's storage. Deeper levels of the result (a + // pointer/view member's own pointee) are the member's value, with no + // counterpart in the object's origin -- so the lists may differ in length + // and we flow just the top level, leaving the member's value untouched. + OriginList *Dst = getOriginsList(*BO); + OriginList *ObjSrc = + BO->getOpcode() == BO_PtrMemD + ? getOriginsList(*BO->getLHS()) + : getRValueOrigins(BO->getLHS(), getOriginsList(*BO->getLHS())); + if (Dst && ObjSrc) + CurrentBlockFacts.push_back(FactMgr.createFact( + Dst->getOuterOriginID(), ObjSrc->getOuterOriginID(), /*Kill=*/true)); + handleUse(BO->getLHS()); + return; + } if (BO->getOpcode() == BO_Comma) { killAndFlowOrigin(*BO, *BO->getRHS()); return; diff --git a/clang/test/Sema/LifetimeSafety/safety.cpp b/clang/test/Sema/LifetimeSafety/safety.cpp index abd3d9c61b784..b59fac191dcfb 100644 --- a/clang/test/Sema/LifetimeSafety/safety.cpp +++ b/clang/test/Sema/LifetimeSafety/safety.cpp @@ -2073,6 +2073,66 @@ std::string_view refViewMemberReturnRefView1(RefMember a) { return a.view_ref; } std::string_view& refViewMemberReturnRefView2(RefMember a) { return a.view_ref; } } // namespace field_access +namespace pointer_to_member { +struct S { int x; void f() const; }; + +// `&(obj.*pm)` borrows the object, like `&obj.field`. +void via_dot_star() { + const int *p; + { + S s{5}; + int S::*pm = &S::x; + p = &(s.*pm); // expected-warning {{local variable 's' does not live long enough}} + } // expected-note {{local variable 's' is destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +void via_arrow_star() { + const int *p; + { + S s{5}; + int S::*pm = &S::x; + S *sp = &s; // expected-warning {{local variable 's' does not live long enough}} + p = &(sp->*pm); // expected-note {{local variable 'sp' aliases the storage of local variable 's'}} + } // expected-note {{local variable 's' is destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +// Negative: a long-lived object borrowed through `.*` stays silent. +void via_dot_star_ok() { + static S s{5}; + int S::*pm = &S::x; + const int *p = &(s.*pm); + (void)*p; // no-warning +} + +// A pointer/view member makes `obj.*pm` an origin one level deeper than the +// object; flowing only the outer (storage) level still ties the borrow to the +// object, so a dangle is caught. +struct V { std::string_view view; }; +void via_dot_star_view_member() { + std::string_view *p; + { + V v; + std::string_view V::*pm = &V::view; + p = &(v.*pm); // expected-warning {{local variable 'v' does not live long enough}} + } // expected-note {{local variable 'v' is destroyed here}} + (void)*p; // expected-note {{later used here}} +} + +// A pointer-to-member-function result is only callable (not storable), so it +// carries no borrow -- but calling one on a dangling object is still caught. +void via_member_function_ptr() { + void (S::*pmf)() const = &S::f; + S *sp; + { + S s; + sp = &s; // expected-warning {{local variable 's' does not live long enough}} + } // expected-note {{local variable 's' is destroyed here}} + (sp->*pmf)(); // expected-note {{later used here}} +} +} // namespace pointer_to_member + namespace attr_on_template_params { struct MyObj { ~MyObj(); From f6037477362ff0abd78bd8b21b38918e75acfadc Mon Sep 17 00:00:00 2001 From: Jeff Bailey Date: Tue, 23 Jun 2026 17:46:14 +0100 Subject: [PATCH 214/511] [libc] Add loff_t type (#204641) Added loff_t type definition. This is a Linux extension required for large file offsets. * llvm-libc-types/loff_t.h: Include linux/loff_t.h on Linux. * llvm-libc-types/linux/loff_t.h: Added Linux definition using __kernel_loff_t. * llvm-libc-types/CMakeLists.txt: Include linux subdirectory and add dependency for loff_t. * llvm-libc-types/linux/CMakeLists.txt: Registered loff_t. * sys/types.yaml: Added loff_t to sys/types. * libc/include/CMakeLists.txt: Added dependency to sys_types target. Assisted-by: Automated tooling, human reviewed. --- libc/include/CMakeLists.txt | 1 + libc/include/llvm-libc-types/CMakeLists.txt | 8 +++++++ .../llvm-libc-types/linux/CMakeLists.txt | 5 +++++ libc/include/llvm-libc-types/linux/loff_t.h | 21 +++++++++++++++++++ libc/include/llvm-libc-types/loff_t.h | 21 +++++++++++++++++++ libc/include/sys/types.yaml | 1 + 6 files changed, 57 insertions(+) create mode 100644 libc/include/llvm-libc-types/linux/CMakeLists.txt create mode 100644 libc/include/llvm-libc-types/linux/loff_t.h create mode 100644 libc/include/llvm-libc-types/loff_t.h diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index e8168687109b0..ab24c854692d0 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -885,6 +885,7 @@ add_header_macro( .llvm-libc-types.gid_t .llvm-libc-types.ino_t .llvm-libc-types.key_t + .llvm-libc-types.loff_t .llvm-libc-types.mode_t .llvm-libc-types.nlink_t .llvm-libc-types.off_t diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index 46b0eb7d71183..c0c600983ba12 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -1,3 +1,6 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) +endif() add_header(off64_t HDR off64_t.h) add_header(size_t HDR size_t.h) add_header( @@ -48,6 +51,11 @@ add_header( ) add_header(ldiv_t HDR ldiv_t.h) add_header(lldiv_t HDR lldiv_t.h) +if(TARGET libc.include.llvm-libc-types.${LIBC_TARGET_OS}.loff_t) + add_header(loff_t HDR loff_t.h DEPENDS .${LIBC_TARGET_OS}.loff_t) +else() + add_header(loff_t HDR loff_t.h) +endif() add_header(FILE HDR FILE.h) add_header(fd_set HDR fd_set.h DEPENDS libc.include.llvm-libc-macros.sys_select_macros) add_header(fenv_t HDR fenv_t.h) diff --git a/libc/include/llvm-libc-types/linux/CMakeLists.txt b/libc/include/llvm-libc-types/linux/CMakeLists.txt new file mode 100644 index 0000000000000..ccb986a7b54c8 --- /dev/null +++ b/libc/include/llvm-libc-types/linux/CMakeLists.txt @@ -0,0 +1,5 @@ +add_header( + loff_t + HDR + loff_t.h +) diff --git a/libc/include/llvm-libc-types/linux/loff_t.h b/libc/include/llvm-libc-types/linux/loff_t.h new file mode 100644 index 0000000000000..408f9bdb9754e --- /dev/null +++ b/libc/include/llvm-libc-types/linux/loff_t.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of loff_t type for Linux. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_LINUX_LOFF_T_H +#define LLVM_LIBC_TYPES_LINUX_LOFF_T_H + +#include + +typedef __kernel_loff_t loff_t; + +#endif // LLVM_LIBC_TYPES_LINUX_LOFF_T_H diff --git a/libc/include/llvm-libc-types/loff_t.h b/libc/include/llvm-libc-types/loff_t.h new file mode 100644 index 0000000000000..4e95135fdb81a --- /dev/null +++ b/libc/include/llvm-libc-types/loff_t.h @@ -0,0 +1,21 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of loff_t type. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_LOFF_T_H +#define LLVM_LIBC_TYPES_LOFF_T_H + +#if defined(__linux__) +#include "linux/loff_t.h" +#endif + +#endif // LLVM_LIBC_TYPES_LOFF_T_H diff --git a/libc/include/sys/types.yaml b/libc/include/sys/types.yaml index 605b0c9258841..37c58e708af34 100644 --- a/libc/include/sys/types.yaml +++ b/libc/include/sys/types.yaml @@ -9,6 +9,7 @@ types: - type_name: gid_t - type_name: ino_t - type_name: key_t + - type_name: loff_t - type_name: mode_t - type_name: nlink_t - type_name: off_t From 4c04966b31be49fd33b58e42e87f415b220e0228 Mon Sep 17 00:00:00 2001 From: Sirui Mu Date: Wed, 24 Jun 2026 00:49:21 +0800 Subject: [PATCH 215/511] [CIR] Atomic load and store via library call (#202671) This patch adds support for atomic load/store operations that go through calls to the `__atomic_load` and `__atomic_store` library functions. This could happen when the size of the atomic type is too large or is not a power of 2. Assisted-by: Codex / gpt-5.5 xhigh --- clang/lib/CIR/CodeGen/CIRGenAtomic.cpp | 225 +++++++++++++++++++++++- clang/lib/CIR/CodeGen/CIRGenCall.cpp | 12 ++ clang/lib/CIR/CodeGen/CIRGenTypes.h | 5 + clang/test/CIR/CodeGen/atomic-libcall.c | 139 +++++++++++++++ 4 files changed, 376 insertions(+), 5 deletions(-) create mode 100644 clang/test/CIR/CodeGen/atomic-libcall.c diff --git a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp index 4ac6f4506b2cd..4d774761b975a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenAtomic.cpp @@ -1115,6 +1115,224 @@ void CIRGenFunction::emitAtomicExprWithMemOrder( emitAtomicOpFn); } +static RValue emitAtomicLibCall(CIRGenFunction &cgf, llvm::StringRef funcName, + QualType resultType, CallArgList &args) { + const CIRGenFunctionInfo &fnInfo = + cgf.cgm.getTypes().arrangeBuiltinFunctionCall(resultType, args); + cir::FuncType fnTy = cgf.cgm.getTypes().getFunctionType(fnInfo); + + mlir::NamedAttrList fnAttrs; + assert(!cir::MissingFeatures::opFuncExtraAttrs()); + + cir::FuncOp fn = cgf.cgm.createRuntimeFunction(fnTy, funcName, fnAttrs); + auto callee = CIRGenCallee::forDirect(fn); + return cgf.emitCall(fnInfo, callee, ReturnValueSlot(), args); +} + +static RValue emitLibCallForAtomicExpr(CIRGenFunction &cgf, AtomicExpr *e, + Address atomicPtr, Address dest, + Address val1, uint64_t atomicTySize, + QualType resultTy) { + mlir::Location loc = cgf.getLoc(e->getSourceRange()); + + CallArgList args; + // For non-optimized library calls, the size is the first parameter. + args.add( + RValue::get(cgf.getBuilder().getConstInt(loc, cgf.sizeTy, atomicTySize)), + cgf.getContext().getSizeType()); + + // The atomic address is the second parameter. + // The OpenCL atomic library functions only accept pointer arguments to + // generic address space. + auto castToGenericAddrSpace = [&](mlir::Value v, QualType pt) { + if (!e->isOpenCL()) + return cgf.getBuilder().createPtrBitcast(v, cgf.voidTy); + + assert(!cir::MissingFeatures::openCL()); + cgf.cgm.errorNYI(loc, "emitLibCallForAtomicExpr: openCL"); + return cgf.getBuilder().createPtrBitcast(v, cgf.voidTy); + }; + args.add(RValue::get(castToGenericAddrSpace(atomicPtr.emitRawPointer(), + e->getPtr()->getType())), + cgf.getContext().VoidPtrTy); + + // The next 1-3 parameters are op-dependent. + llvm::StringRef calleeName; + QualType retTy; + bool hasRetTy = false; + switch (e->getOp()) { + case AtomicExpr::AO__c11_atomic_init: + case AtomicExpr::AO__opencl_atomic_init: + llvm_unreachable("Already handled!"); + + // There is only one libcall for compare an exchange, because there is no + // optimisation benefit possible from a libcall version of a weak compare + // and exchange. + // bool __atomic_compare_exchange(size_t size, void *mem, void *expected, + // void *desired, int success, int failure) + case AtomicExpr::AO__atomic_compare_exchange: + case AtomicExpr::AO__atomic_compare_exchange_n: + case AtomicExpr::AO__c11_atomic_compare_exchange_weak: + case AtomicExpr::AO__c11_atomic_compare_exchange_strong: + case AtomicExpr::AO__hip_atomic_compare_exchange_weak: + case AtomicExpr::AO__hip_atomic_compare_exchange_strong: + case AtomicExpr::AO__opencl_atomic_compare_exchange_weak: + case AtomicExpr::AO__opencl_atomic_compare_exchange_strong: + case AtomicExpr::AO__scoped_atomic_compare_exchange: + case AtomicExpr::AO__scoped_atomic_compare_exchange_n: + cgf.cgm.errorNYI( + loc, "emitLibCallForAtomicExpr: atomic compare-and-exchange NYI"); + return RValue::get(nullptr); + + // void __atomic_exchange(size_t size, void *mem, void *val, void *return, + // int order) + case AtomicExpr::AO__atomic_exchange: + case AtomicExpr::AO__atomic_exchange_n: + case AtomicExpr::AO__c11_atomic_exchange: + case AtomicExpr::AO__hip_atomic_exchange: + case AtomicExpr::AO__opencl_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange: + case AtomicExpr::AO__scoped_atomic_exchange_n: + cgf.cgm.errorNYI(loc, "emitLibCallForAtomicExpr: atomic exchange NYI"); + return RValue::get(nullptr); + + // void __atomic_store(size_t size, void *mem, void *val, int order) + case AtomicExpr::AO__atomic_store: + case AtomicExpr::AO__atomic_store_n: + case AtomicExpr::AO__c11_atomic_store: + case AtomicExpr::AO__scoped_atomic_store: + case AtomicExpr::AO__scoped_atomic_store_n: { + calleeName = "__atomic_store"; + retTy = cgf.getContext().VoidTy; + hasRetTy = true; + args.add(RValue::get(castToGenericAddrSpace(val1.emitRawPointer(), + e->getVal1()->getType())), + cgf.getContext().VoidPtrTy); + break; + } + + case AtomicExpr::AO__hip_atomic_store: + case AtomicExpr::AO__opencl_atomic_store: + cgf.cgm.errorNYI(loc, + "emitLibCallForAtomicExpr: atomic store for hip/opencl"); + return RValue::get(nullptr); + + // void __atomic_load(size_t size, void *mem, void *return, int order) + case AtomicExpr::AO__atomic_load: + case AtomicExpr::AO__atomic_load_n: + case AtomicExpr::AO__c11_atomic_load: + case AtomicExpr::AO__scoped_atomic_load: + case AtomicExpr::AO__scoped_atomic_load_n: { + calleeName = "__atomic_load"; + break; + } + + case AtomicExpr::AO__hip_atomic_load: + case AtomicExpr::AO__opencl_atomic_load: + cgf.cgm.errorNYI(loc, + "emitLibCallForAtomicExpr: atomic load for hip/opencl"); + return RValue::get(nullptr); + + case AtomicExpr::AO__atomic_add_fetch: + case AtomicExpr::AO__scoped_atomic_add_fetch: + case AtomicExpr::AO__atomic_fetch_add: + case AtomicExpr::AO__c11_atomic_fetch_add: + case AtomicExpr::AO__hip_atomic_fetch_add: + case AtomicExpr::AO__opencl_atomic_fetch_add: + case AtomicExpr::AO__scoped_atomic_fetch_add: + case AtomicExpr::AO__atomic_and_fetch: + case AtomicExpr::AO__scoped_atomic_and_fetch: + case AtomicExpr::AO__atomic_fetch_and: + case AtomicExpr::AO__c11_atomic_fetch_and: + case AtomicExpr::AO__hip_atomic_fetch_and: + case AtomicExpr::AO__opencl_atomic_fetch_and: + case AtomicExpr::AO__scoped_atomic_fetch_and: + case AtomicExpr::AO__atomic_or_fetch: + case AtomicExpr::AO__scoped_atomic_or_fetch: + case AtomicExpr::AO__atomic_fetch_or: + case AtomicExpr::AO__c11_atomic_fetch_or: + case AtomicExpr::AO__hip_atomic_fetch_or: + case AtomicExpr::AO__opencl_atomic_fetch_or: + case AtomicExpr::AO__scoped_atomic_fetch_or: + case AtomicExpr::AO__atomic_sub_fetch: + case AtomicExpr::AO__scoped_atomic_sub_fetch: + case AtomicExpr::AO__atomic_fetch_sub: + case AtomicExpr::AO__c11_atomic_fetch_sub: + case AtomicExpr::AO__hip_atomic_fetch_sub: + case AtomicExpr::AO__opencl_atomic_fetch_sub: + case AtomicExpr::AO__scoped_atomic_fetch_sub: + case AtomicExpr::AO__atomic_xor_fetch: + case AtomicExpr::AO__scoped_atomic_xor_fetch: + case AtomicExpr::AO__atomic_fetch_xor: + case AtomicExpr::AO__c11_atomic_fetch_xor: + case AtomicExpr::AO__hip_atomic_fetch_xor: + case AtomicExpr::AO__opencl_atomic_fetch_xor: + case AtomicExpr::AO__scoped_atomic_fetch_xor: + case AtomicExpr::AO__atomic_nand_fetch: + case AtomicExpr::AO__atomic_fetch_nand: + case AtomicExpr::AO__c11_atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_fetch_nand: + case AtomicExpr::AO__scoped_atomic_nand_fetch: + case AtomicExpr::AO__atomic_min_fetch: + case AtomicExpr::AO__atomic_fetch_min: + case AtomicExpr::AO__c11_atomic_fetch_min: + case AtomicExpr::AO__hip_atomic_fetch_min: + case AtomicExpr::AO__opencl_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_fetch_min: + case AtomicExpr::AO__scoped_atomic_min_fetch: + case AtomicExpr::AO__atomic_max_fetch: + case AtomicExpr::AO__atomic_fetch_max: + case AtomicExpr::AO__c11_atomic_fetch_max: + case AtomicExpr::AO__hip_atomic_fetch_max: + case AtomicExpr::AO__opencl_atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_fetch_max: + case AtomicExpr::AO__scoped_atomic_max_fetch: + case AtomicExpr::AO__scoped_atomic_fetch_uinc: + case AtomicExpr::AO__scoped_atomic_fetch_udec: + case AtomicExpr::AO__atomic_test_and_set: + case AtomicExpr::AO__atomic_clear: + case AtomicExpr::AO__atomic_fetch_uinc: + case AtomicExpr::AO__atomic_fetch_udec: + llvm_unreachable("Integral atomic operations always become atomicrmw!"); + } + + if (e->isOpenCL()) { + assert(!cir::MissingFeatures::openCL()); + cgf.cgm.errorNYI(loc, "emitLibCallForAtomicExpr: openCL"); + return RValue::get(nullptr); + } + + // By default, assume we return a value of the atomic type. + if (!hasRetTy) { + // Value is returned through parameter before the order. + retTy = cgf.getContext().VoidTy; + args.add(RValue::get(castToGenericAddrSpace(dest.emitRawPointer(), retTy)), + cgf.getContext().VoidPtrTy); + } + + // Order is always the last parameter. + args.add(RValue::get(cgf.emitScalarExpr(e->getOrder())), + cgf.getContext().IntTy); + if (e->isOpenCL()) { + assert(!cir::MissingFeatures::openCL()); + cgf.cgm.errorNYI(loc, "emitLibCallForAtomicExpr: openCL"); + return RValue::get(nullptr); + } + + RValue res = emitAtomicLibCall(cgf, calleeName, retTy, args); + + // The value is returned directly from the libcall. + if (e->isCmpXChg()) + return res; + + if (resultTy->isVoidType()) + return RValue::get(nullptr); + + return cgf.convertTempToRValue( + dest.withElementType(cgf.getBuilder(), cgf.convertTypeForMem(resultTy)), + resultTy, e->getExprLoc()); +} + RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { QualType atomicTy = e->getPtr()->getType()->getPointeeType(); QualType memTy = atomicTy; @@ -1325,11 +1543,8 @@ RValue CIRGenFunction::emitAtomicExpr(AtomicExpr *e) { // the size-optimized libcall variants, which are only valid up to 16 bytes.) // // See: https://llvm.org/docs/Atomics.html#libcalls-atomic - if (useLibCall) { - assert(!cir::MissingFeatures::atomicUseLibCall()); - cgm.errorNYI(e->getSourceRange(), "emitAtomicExpr: emit atomic lib call"); - return RValue::get(nullptr); - } + if (useLibCall) + return emitLibCallForAtomicExpr(*this, e, ptr, dest, val1, size, resultTy); bool isStore = e->getOp() == AtomicExpr::AO__c11_atomic_store || e->getOp() == AtomicExpr::AO__opencl_atomic_store || diff --git a/clang/lib/CIR/CodeGen/CIRGenCall.cpp b/clang/lib/CIR/CodeGen/CIRGenCall.cpp index f648eff375a77..c3065c8917924 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCall.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCall.cpp @@ -1016,6 +1016,18 @@ CIRGenTypes::arrangeFreeFunctionCall(const CallArgList &args, return arrangeFreeFunctionLikeCall(*this, cgm, args, fnType); } +const CIRGenFunctionInfo & +CIRGenTypes::arrangeBuiltinFunctionCall(QualType resultType, + const CallArgList &args) { + llvm::SmallVector argTypes; + for (const CallArg &arg : args) + argTypes.push_back(astContext.getCanonicalParamType(arg.ty)); + + CanQualType retType = resultType->getCanonicalTypeUnqualified(); + return arrangeCIRFunctionInfo(retType, /*isInstanceMethod=*/false, argTypes, + FunctionType::ExtInfo(), RequiredArgs::All); +} + /// Arrange the argument and result information for a declaration or definition /// of the given C++ non-static member function. The member function must be an /// ordinary function, i.e. not a constructor or destructor. diff --git a/clang/lib/CIR/CodeGen/CIRGenTypes.h b/clang/lib/CIR/CodeGen/CIRGenTypes.h index 15955c517f1f3..a7827f76bd5f2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenTypes.h +++ b/clang/lib/CIR/CodeGen/CIRGenTypes.h @@ -186,6 +186,11 @@ class CIRGenTypes { const CIRGenFunctionInfo & arrangeFunctionDeclaration(const clang::FunctionDecl *fd); + /// A builtin function is a freestanding function using the default + /// C conventions. + const CIRGenFunctionInfo &arrangeBuiltinFunctionCall(QualType resultType, + const CallArgList &args); + /// Return whether a type can be zero-initialized (in the C++ sense) with an /// LLVM zeroinitializer. bool isZeroInitializable(clang::QualType ty); diff --git a/clang/test/CIR/CodeGen/atomic-libcall.c b/clang/test/CIR/CodeGen/atomic-libcall.c new file mode 100644 index 0000000000000..53685dd1b543d --- /dev/null +++ b/clang/test/CIR/CodeGen/atomic-libcall.c @@ -0,0 +1,139 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s -check-prefix=CIR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --input-file=%t-cir.ll %s -check-prefix=LLVM +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -Wno-unused-value -emit-llvm %s -o %t.ll +// RUN: FileCheck --input-file=%t.ll %s -check-prefix=LLVM + +struct Big { + int x[6]; +}; + +void load(struct Big *ptr) { + // CIR-LABEL: @load + // LLVM-LABEL: @load + + struct Big b; + __atomic_load(ptr, &b, __ATOMIC_RELAXED); + // CIR: %[[DEST_SLOT:.+]] = cir.alloca "b" align(4) : !cir.ptr + // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[PTR_INTPTR:.+]] = cir.cast bitcast %[[PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[DEST_INTPTR:.+]] = cir.cast bitcast %[[DEST_SLOT]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[PTR_VOIDPTR:.+]] = cir.cast bitcast %[[PTR_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[DEST_VOIDPTR:.+]] = cir.cast bitcast %[[DEST_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_load(%[[SIZE]], %[[PTR_VOIDPTR]], %[[DEST_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + + // LLVM: %[[DEST:.+]] = alloca %struct.Big + // LLVM: %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @__atomic_load(i64 noundef 24, ptr noundef %[[PTR]], ptr noundef %[[DEST]], i32 noundef 0) +} + +void scoped_load(struct Big *ptr) { + // CIR-LABEL: @scoped_load + // LLVM-LABEL: @scoped_load + + struct Big b; + __scoped_atomic_load(ptr, &b, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); + // CIR: %[[DEST_SLOT:.+]] = cir.alloca "b" align(4) : !cir.ptr + // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[PTR_INTPTR:.+]] = cir.cast bitcast %[[PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[DEST_INTPTR:.+]] = cir.cast bitcast %[[DEST_SLOT]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[PTR_VOIDPTR:.+]] = cir.cast bitcast %[[PTR_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[DEST_VOIDPTR:.+]] = cir.cast bitcast %[[DEST_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_load(%[[SIZE]], %[[PTR_VOIDPTR]], %[[DEST_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + + // LLVM: %[[DEST:.+]] = alloca %struct.Big + // LLVM: %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @__atomic_load(i64 noundef 24, ptr noundef %[[PTR]], ptr noundef %[[DEST]], i32 noundef 0) +} + +void c11_load(_Atomic(struct Big) *ptr) { + // CIR-LABEL: @c11_load + // LLVM-LABEL: @c11_load + + struct Big b = __c11_atomic_load(ptr, __ATOMIC_RELAXED); + // CIR: %[[DEST_SLOT:.+]] = cir.alloca "b" align(4) init : !cir.ptr + // CIR-NEXT: %[[TEMP_SLOT:.+]] = cir.alloca "atomic-temp" align(4) : !cir.ptr + // CIR: %[[PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[PTR_INTPTR:.+]] = cir.cast bitcast %[[PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[TEMP_INTPTR:.+]] = cir.cast bitcast %[[TEMP_SLOT]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[PTR_VOIDPTR:.+]] = cir.cast bitcast %[[PTR_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[TEMP_VOIDPTR:.+]] = cir.cast bitcast %[[TEMP_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_load(%[[SIZE]], %[[PTR_VOIDPTR]], %[[TEMP_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + // CIR-NEXT: %[[TEMP_CAST:.+]] = cir.cast bitcast %[[TEMP_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: cir.copy %[[TEMP_CAST]] to %[[DEST_SLOT]] : !cir.ptr + + // LLVM: %[[DEST_SLOT:.+]] = alloca %struct.Big + // LLVM-NEXT: %[[TEMP_SLOT:.+]] = alloca %struct.Big + // LLVM: %[[PTR:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @__atomic_load(i64 noundef 24, ptr noundef %[[PTR]], ptr noundef %[[TEMP_SLOT]], i32 noundef 0) + // LLVM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}%[[DEST_SLOT]], ptr {{.*}}%[[TEMP_SLOT]], i64 24, i1 false) +} + +void store(struct Big *dest, struct Big *val) { + // CIR-LABEL: @store + // LLVM-LABEL: @store + + __atomic_store(dest, val, __ATOMIC_RELAXED); + // CIR: %[[DEST_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[VALUE_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[DEST_INTPTR:.+]] = cir.cast bitcast %[[DEST_PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[VALUE_INTPTR:.+]] = cir.cast bitcast %[[VALUE_PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[DEST_VOIDPTR:.+]] = cir.cast bitcast %[[DEST_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[VALUE_VOIDPTR:.+]] = cir.cast bitcast %[[VALUE_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_store(%[[SIZE]], %[[DEST_VOIDPTR]], %[[VALUE_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + + // LLVM: %[[DEST:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: %[[VALUE:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @__atomic_store(i64 noundef 24, ptr noundef %[[DEST]], ptr noundef %[[VALUE]], i32 noundef 0) +} + +void scoped_store(struct Big *dest, struct Big *val) { + // CIR-LABEL: @scoped_store + // LLVM-LABEL: @scoped_store + + __scoped_atomic_store(dest, val, __ATOMIC_RELAXED, __MEMORY_SCOPE_SYSTEM); + // CIR: %[[DEST_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[VALUE_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[DEST_INTPTR:.+]] = cir.cast bitcast %[[DEST_PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[VALUE_INTPTR:.+]] = cir.cast bitcast %[[VALUE_PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[DEST_VOIDPTR:.+]] = cir.cast bitcast %[[DEST_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[VALUE_VOIDPTR:.+]] = cir.cast bitcast %[[VALUE_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_store(%[[SIZE]], %[[DEST_VOIDPTR]], %[[VALUE_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + + // LLVM: %[[DEST:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: %[[VALUE:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @__atomic_store(i64 noundef 24, ptr noundef %[[DEST]], ptr noundef %[[VALUE]], i32 noundef 0) +} + +void c11_store(_Atomic(struct Big) *dest, struct Big *val) { + // CIR-LABEL: @c11_store + // LLVM-LABEL: @c11_store + + __c11_atomic_store(dest, *val, __ATOMIC_RELAXED); + // CIR: %[[DEST_PTR:.+]] = cir.load align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: %[[VALUE_PTR:.+]] = cir.load deref align(8) %{{.+}} : !cir.ptr>, !cir.ptr + // CIR-NEXT: cir.copy %[[VALUE_PTR]] to %[[TEMP_SLOT:.+]] : !cir.ptr + // CIR-NEXT: %[[DEST_INTPTR:.+]] = cir.cast bitcast %[[DEST_PTR]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[VALUE_INTPTR:.+]] = cir.cast bitcast %[[TEMP_SLOT]] : !cir.ptr -> !cir.ptr> + // CIR-NEXT: %[[SIZE:.+]] = cir.const #cir.int<24> : !u64i + // CIR-NEXT: %[[DEST_VOIDPTR:.+]] = cir.cast bitcast %[[DEST_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[VALUE_VOIDPTR:.+]] = cir.cast bitcast %[[VALUE_INTPTR]] : !cir.ptr> -> !cir.ptr + // CIR-NEXT: %[[ORDER:.+]] = cir.const #cir.int<0> : !s32i + // CIR-NEXT: cir.call @__atomic_store(%[[SIZE]], %[[DEST_VOIDPTR]], %[[VALUE_VOIDPTR]], %[[ORDER]]) : (!u64i {llvm.noundef}, !cir.ptr {llvm.noundef}, !cir.ptr {llvm.noundef}, !s32i {llvm.noundef}) -> () + + // LLVM: %[[DEST:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: %[[VALUE:.+]] = load ptr, ptr %{{.+}}, align 8 + // LLVM-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr {{.*}}%[[TEMP_SLOT:.+]], ptr {{.*}}%[[VALUE]], i64 24, i1 false) + // LLVM-NEXT: call void @__atomic_store(i64 noundef 24, ptr noundef %[[DEST]], ptr noundef %[[TEMP_SLOT]], i32 noundef 0) +} From 822baad58a54a0723c5ef24d418e1a05848fb617 Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Tue, 23 Jun 2026 19:09:45 +0200 Subject: [PATCH 216/511] [LifetimeSafety] Fix liveness propagation for all origin flows (#205323) Previously, the `transfer` function for `OriginFlowFact` only handled killing the destination origin. It did not propagate liveness backwards from destination to source, meaning that if an origin flowed into another, the source was not marked live even when the destination was. The `transfer` function now propagates liveness from destination to source: if the destination origin is live, the source origin is also marked live with the same `LivenessInfo`, before optionally killing the destination. Additionally, `handleMovedArgsInCall` now skips rvalue reference parameters annotated with `[[clang::lifetimebound]]`, since such parameters should not be treated as moved-from. This introduces some false positives for invalidations as container modifications are not considered self-invalidating. The updated test expectations reflect this, along with `FIXME` comments explaining that they are false positives stemming from the lack of distinction between owner-borrows and content-borrows, which will require more precise `AccessPath` reasoning to resolve. --- .../LifetimeSafety/FactsGenerator.cpp | 6 ++++ .../Analysis/LifetimeSafety/LiveOrigins.cpp | 17 +++++++++-- .../Sema/LifetimeSafety/invalidations.cpp | 29 +++++++++++++++---- 3 files changed, 43 insertions(+), 9 deletions(-) diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 50bf79d4c1a38..d8c5679a80b38 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -908,6 +908,12 @@ void FactsGenerator::handleMovedArgsInCall(const FunctionDecl *FD, const ParmVarDecl *PVD = FD->getParamDecl(I - IsInstance); if (!PVD->getType()->isRValueReferenceType()) continue; + // Skip lifetime annotated r-value reference parameters. Lifetime annotation + // indicates that the parameter is borrowed (not consumed), so it should not + // be marked as moved even though it's an r-value reference. + if (PVD->hasAttr() || + PVD->hasAttr()) + continue; const Expr *Arg = Args[I]; OriginList *MovedOrigins = getOriginsList(*Arg); assert(MovedOrigins->getLength() >= 1 && diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp index cfbcacf04b1b0..69b903c813555 100644 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -161,9 +161,20 @@ class AnalysisImpl /// An OriginFlow kills the liveness of the destination origin if `KillDest` /// is true. Otherwise, it propagates liveness from destination to source. Lattice transfer(Lattice In, const OriginFlowFact &OF) { - if (!OF.getKillDest()) - return In; - return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); + Lattice Out = In; + OriginID Dest = OF.getDestOriginID(); + OriginID Src = OF.getSrcOriginID(); + // If the destination of the flow is live, the source of the flow must also + // be marked live before this point as its value will flow into the + // destination. + if (In.LiveOrigins.contains(Dest)) { + const LivenessInfo *DestInfo = In.LiveOrigins.lookup(Dest); + assert(DestInfo); + Out = Lattice(Factory.add(Out.LiveOrigins, Src, *DestInfo)); + } + if (OF.getKillDest()) + Out = Lattice(Factory.remove(Out.LiveOrigins, Dest)); + return Out; } Lattice transfer(Lattice In, const KillOriginFact &F) { diff --git a/clang/test/Sema/LifetimeSafety/invalidations.cpp b/clang/test/Sema/LifetimeSafety/invalidations.cpp index c2ac105855d07..be1acc6bc7fbc 100644 --- a/clang/test/Sema/LifetimeSafety/invalidations.cpp +++ b/clang/test/Sema/LifetimeSafety/invalidations.cpp @@ -402,10 +402,20 @@ void SelfInvalidatingMap() { // Therefore the following is safe in practice. // On the other hand, std::flat_map (since C++23) does not provide pointer stability on // insertion and following is unsafe for this container. - mp[1] = "42"; - mp[2] // expected-note {{local variable 'mp' is invalidated here}} - = - mp[1]; // expected-warning {{local variable 'mp' is later invalidated}} expected-note {{later used here}} + // FIXME: The warnings below are false positives (self-invalidation of the Owner). + // Modifying a container should not invalidate the container object itself. + // To resolve this, we need to: + // 1. Distinguish owner-borrow (borrowing the container object) from content-borrow (borrowing elements inside the container). + // 2. Make AccessPaths more precise to reason at element/field granularity rather than treating the whole container as a single storage location. + mp[1] = "42"; // expected-warning {{local variable 'mp' is later invalidated}} \ + // expected-note {{local variable 'mp' is invalidated here}} \ + // expected-note {{later used here}} + mp[2] = mp[1]; // expected-warning {{local variable 'mp' is later invalidated}} \ + // expected-warning {{local variable 'mp' is later invalidated}} \ + // expected-note {{local variable 'mp' is invalidated here}} \ + // expected-note {{later used here}} \ + // expected-note {{local variable 'mp' is invalidated here}} \ + // expected-note {{later used here}} } void InvalidateErase() { @@ -740,9 +750,16 @@ void MapSubscriptMultipleCallsDoesNotInvalidate(std::map mp, int a, in } void FlatMapSubscriptMultipleCallsInvalidate(std::flat_map mp, int a, int b) { + // FIXME: The duplicate warning below is a false positive caused by self-invalidation of the Owner 'mp'. + // While the warning on the temporary reference returned by mp[a] is a true positive (it dangles), + // the second warning on 'mp' itself is redundant and incorrect. + // Resolving this requires distinguishing owner-borrow from content-borrow. PrintMax(mp[a], mp[b]); // expected-warning {{parameter 'mp' is later invalidated}} \ - // expected-note {{parameter 'mp' is invalidated here}} \ - // expected-note {{later used here}} + // expected-warning {{parameter 'mp' is later invalidated}} \ + // expected-note {{parameter 'mp' is invalidated here}} \ + // expected-note {{later used here}} \ + // expected-note {{parameter 'mp' is invalidated here}} \ + // expected-note {{later used here}} } } // namespace AssociativeContainers From 171b9a93c386660050897a131175767a4db7b30c Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Wed, 24 Jun 2026 03:16:59 +1000 Subject: [PATCH 217/511] Add limited vtable address querying to `clang::CodeGenerator` (#202807) This is being used in Carbon ( https://github.com/carbon-language/carbon-lang/pull/7323 ) to implement cross-language overriding. I realize this isn't the fully general feature needed for virtual bases, etc - but this limited functionality is already wrapped up for, if I understand it correctly, constexpr use cases and some others. So hopefully it's still something folks feel is general enough to be worthwhile exporting. For now Carbon doesn't support deriving from a type with virtual bases, so the extra complexity isn't needed. --- clang/include/clang/CodeGen/ModuleBuilder.h | 26 ++++++---- clang/lib/CodeGen/ModuleBuilder.cpp | 11 +++++ .../unittests/CodeGen/CodeGenExternalTest.cpp | 47 +++++++++++++++---- 3 files changed, 65 insertions(+), 19 deletions(-) diff --git a/clang/include/clang/CodeGen/ModuleBuilder.h b/clang/include/clang/CodeGen/ModuleBuilder.h index 5173fd05d75d6..cd93ef7cc654a 100644 --- a/clang/include/clang/CodeGen/ModuleBuilder.h +++ b/clang/include/clang/CodeGen/ModuleBuilder.h @@ -32,15 +32,16 @@ namespace llvm { inline constexpr llvm::StringRef ClangTrapPrefix = "__clang_trap_msg"; namespace clang { - class CodeGenOptions; - class CoverageSourceInfo; - class Decl; - class DiagnosticsEngine; - class GlobalDecl; - class HeaderSearchOptions; - class LangOptions; - class PreprocessorOptions; - class CompilerInstance; +class BaseSubobject; +class CodeGenOptions; +class CoverageSourceInfo; +class Decl; +class DiagnosticsEngine; +class GlobalDecl; +class HeaderSearchOptions; +class LangOptions; +class PreprocessorOptions; +class CompilerInstance; namespace CodeGen { class CodeGenModule; @@ -105,6 +106,13 @@ class CodeGenerator : public ASTConsumer { /// definition has been registered with this code generator. llvm::Constant *GetAddrOfGlobal(GlobalDecl decl, bool isForDefinition); + /// Return the LLVM address of the vtable for the given base subobject. + /// + /// \param base The base subobject that owns the vptr to be initialized. + /// \param decl The derived type being initialized, that contains `base`. + llvm::Constant *GetAddrOfVTable(BaseSubobject base, + const CXXRecordDecl *decl); + /// Create a new \c llvm::Module after calling HandleTranslationUnit. This /// enable codegen in interactive processing environments. llvm::Module* StartModule(llvm::StringRef ModuleName, llvm::LLVMContext &C); diff --git a/clang/lib/CodeGen/ModuleBuilder.cpp b/clang/lib/CodeGen/ModuleBuilder.cpp index 1888de4521abd..0b00362487d2a 100644 --- a/clang/lib/CodeGen/ModuleBuilder.cpp +++ b/clang/lib/CodeGen/ModuleBuilder.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "clang/CodeGen/ModuleBuilder.h" +#include "CGCXXABI.h" #include "CGDebugInfo.h" #include "CodeGenModule.h" #include "clang/AST/ASTContext.h" @@ -131,6 +132,11 @@ namespace { return Builder->GetAddrOfGlobal(global, ForDefinition_t(isForDefinition)); } + llvm::Constant *GetAddrOfVTable(BaseSubobject subobject, + const CXXRecordDecl *decl) { + return Builder->getCXXABI().getVTableAddressPoint(subobject, decl); + } + llvm::Module *StartModule(llvm::StringRef ModuleName, llvm::LLVMContext &C) { assert(!M && "Replacing existing Module?"); @@ -378,6 +384,11 @@ llvm::Constant *CodeGenerator::GetAddrOfGlobal(GlobalDecl global, ->GetAddrOfGlobal(global, isForDefinition); } +llvm::Constant *CodeGenerator::GetAddrOfVTable(BaseSubobject base, + const CXXRecordDecl *decl) { + return static_cast(this)->GetAddrOfVTable(base, decl); +} + llvm::Module *CodeGenerator::StartModule(llvm::StringRef ModuleName, llvm::LLVMContext &C) { return static_cast(this)->StartModule(ModuleName, C); diff --git a/clang/unittests/CodeGen/CodeGenExternalTest.cpp b/clang/unittests/CodeGen/CodeGenExternalTest.cpp index be3be147460f3..8824451ccc2f4 100644 --- a/clang/unittests/CodeGen/CodeGenExternalTest.cpp +++ b/clang/unittests/CodeGen/CodeGenExternalTest.cpp @@ -10,6 +10,7 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" +#include "clang/AST/BaseSubobject.h" #include "clang/AST/GlobalDecl.h" #include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/TargetInfo.h" @@ -21,6 +22,7 @@ #include "clang/Sema/Sema.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/TargetParser/Host.h" @@ -163,7 +165,12 @@ bool MyASTConsumer::shouldSkipFunctionBody(Decl *D) { const char TestProgram[] = "struct mytest_struct { char x; short y; char p; long z; };\n" - "int mytest_fn(int x) { return x; }\n"; + "int mytest_fn(int x) { return x; }\n" + "struct mytest_dynamic_struct {\n" + " mytest_dynamic_struct();\n" + " virtual void f1();\n" + "};\n" + "mytest_dynamic_struct::mytest_dynamic_struct() { }\n"; // This function has the real test code here static void test_codegen_fns(MyASTConsumer *my) { @@ -176,17 +183,19 @@ static void test_codegen_fns(MyASTConsumer *my) { for (auto decl : my->toplevel_decls ) { if (FunctionDecl *fd = dyn_cast(decl)) { - if (fd->getName() == "mytest_fn") { - Constant *c = my->Builder->GetAddrOfGlobal(GlobalDecl(fd), false); - // Verify that we got a function. - ASSERT_TRUE(c != NULL); - if (DebugThisTest) { - c->print(dbgs(), true); - dbgs() << "\n"; + if (fd->getDeclName().isIdentifier()) { + if (fd->getName() == "mytest_fn") { + Constant *c = my->Builder->GetAddrOfGlobal(GlobalDecl(fd), false); + // Verify that we got a function. + ASSERT_TRUE(c != NULL); + if (DebugThisTest) { + c->print(dbgs(), true); + dbgs() << "\n"; + } + mytest_fn_ok = true; } - mytest_fn_ok = true; } - } else if(clang::RecordDecl *rd = dyn_cast(decl)) { + } else if (CXXRecordDecl *rd = dyn_cast(decl)) { if (rd->getName() == "mytest_struct") { RecordDecl *def = rd->getDefinition(); ASSERT_TRUE(def != NULL); @@ -247,6 +256,24 @@ static void test_codegen_fns(MyASTConsumer *my) { ASSERT_GE(zTy->getPrimitiveSizeInBits(), 32u); // long is at least 32b mytest_struct_ok = true; + } else if (rd->getName() == "mytest_dynamic_struct") { + Constant *c = my->Builder->GetAddrOfVTable( + BaseSubobject(rd, CharUnits::fromQuantity(0)), rd); + ASSERT_NE(c, nullptr); + Value *vtableGlobal = c->getOperand(0); + ASSERT_NE(vtableGlobal, nullptr); + ASSERT_EQ(vtableGlobal->getName(), "_ZTV21mytest_dynamic_struct"); + const DataLayout &dataLayout = + my->Builder->GetModule()->getDataLayout(); + unsigned pointerSizeInBits = + dataLayout.getPointerTypeSizeInBits(c->getType()); + APInt offset(pointerSizeInBits, 0); + GEPOperator *gepOperator = dyn_cast(c); + ASSERT_NE(gepOperator, nullptr); + gepOperator->accumulateConstantOffset(dataLayout, offset); + // Itanium ABI has a couple of pointers (offset to top, type info) + // before the array of function pointers. + ASSERT_EQ(offset, (pointerSizeInBits / 8) * 2); } } } From a3f4798506f4c8ac2cb7fdf93ec3a99868ddc0ea Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Tue, 23 Jun 2026 19:25:02 +0200 Subject: [PATCH 218/511] Revert "Reapply "[InstCombine] Merge consecutive assumes" (#205177)" (#205380) This surfaces a bug likely fixed by https://github.com/llvm/llvm-project/pull/205275. Once that lands this patch can be landed again. Reverts llvm/llvm-project#205324 --- llvm/include/llvm/IR/InstrTypes.h | 6 ---- .../InstCombine/InstCombineCalls.cpp | 22 ++---------- .../InstCombine/InstructionCombining.cpp | 2 +- .../InstCombine/assume-loop-align.ll | 3 +- llvm/test/Transforms/InstCombine/assume.ll | 36 ++++++++----------- .../PhaseOrdering/AArch64/std-find.ll | 3 +- 6 files changed, 22 insertions(+), 50 deletions(-) diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 681c4b18375f4..5f7df6a4eb6f8 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -2359,12 +2359,6 @@ class CallBase : public Instruction { }); } - /// Return whether there exists an operand bundle of type ID - bool hasOperandBundle(uint32_t ID) const { - return any_of(operand_bundles(), - [&](OperandBundleUse OBU) { return OBU.getTagID() == ID; }); - } - /// Populate the BundleOpInfo instances and the Use& vector from \p /// Bundles. Return the op_iterator pointing to the Use& one past the last /// last bundle operand use. diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 880d896e12d6e..ce9e4b836a56e 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3844,26 +3844,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { } } - if (II->hasOperandBundles()) { - // Merge consecutive assumes to save some resources - if (auto *PrevAI = dyn_cast_or_null(II->getPrevNode()); - PrevAI && PrevAI->hasOperandBundles()) { - SmallVector Bundles; - Bundles.reserve(II->getNumOperandBundles() + - PrevAI->getNumOperandBundles()); - for (auto Bundle : PrevAI->operand_bundles()) - Bundles.emplace_back(Bundle); - for (auto Bundle : II->operand_bundles()) - Bundles.emplace_back(Bundle); - Builder.CreateAssumption(Bundles); - eraseInstFromFunction(*PrevAI); - return eraseInstFromFunction(*II); - } - - // If the assume has operand bundles, the folds below will never work, so - // don't bother trying. + // If the assume has operand bundles, the folds below will never work, so + // don't bother trying. + if (II->hasOperandBundles()) break; - } Value *IIOperand = II->getArgOperand(0); diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index ece0b36a15b72..1e24ff8d51057 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -5849,7 +5849,7 @@ bool InstCombinerImpl::run() { // removed. auto II = dyn_cast(User); if (II->getIntrinsicID() != Intrinsic::assume || - !II->hasOperandBundle(LLVMContext::OB_Dereferenceable)) + !II->getOperandBundle("dereferenceable")) continue; } diff --git a/llvm/test/Transforms/InstCombine/assume-loop-align.ll b/llvm/test/Transforms/InstCombine/assume-loop-align.ll index 2701775f011e8..0c5e403ca54a9 100644 --- a/llvm/test/Transforms/InstCombine/assume-loop-align.ll +++ b/llvm/test/Transforms/InstCombine/assume-loop-align.ll @@ -10,7 +10,8 @@ target triple = "x86_64-unknown-linux-gnu" define void @foo(ptr %a, ptr %b) #0 { ; CHECK-LABEL: @foo( ; CHECK-NEXT: entry: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64), "align"(ptr [[B:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[A:%.*]], i64 64) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[B:%.*]], i64 64) ] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll index 8c819ad17a1f3..69220811ac206 100644 --- a/llvm/test/Transforms/InstCombine/assume.ll +++ b/llvm/test/Transforms/InstCombine/assume.ll @@ -134,7 +134,8 @@ define i1 @align_with_offset_on_gep(ptr %base) { define void @align_with_constant_offset_0(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_0( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -144,7 +145,8 @@ define void @align_with_constant_offset_0(ptr %ptr) { define void @align_with_constant_offset_1(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_1( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 -8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 -8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -155,7 +157,8 @@ define void @align_with_constant_offset_1(ptr %ptr) { define void @align_with_constant_offset_4(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_4( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 0) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 0) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -166,7 +169,8 @@ define void @align_with_constant_offset_4(ptr %ptr) { define void @align_with_constant_offset_8(ptr %ptr) { ; CHECK-LABEL: @align_with_constant_offset_8( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 8) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 8) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -176,7 +180,8 @@ define void @align_with_constant_offset_8(ptr %ptr) { define void @align_with_variable_offset(ptr %ptr, i64 %offset) { ; CHECK-LABEL: @align_with_variable_offset( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16), "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR:%.*]], i64 16) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[PTR]], i64 8, i64 [[OFFSET:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "align"(ptr %ptr, i64 16) ] @@ -622,7 +627,10 @@ define void @redundant_nonnull3(ptr %ptr) { define void @partially_redundant(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4, ptr %ptr5) { ; CHECK-LABEL: @partially_redundant( -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]), "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]), "nonnull"(ptr [[PTR:%.*]]), "nonnull"(ptr [[PTR2:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR2:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR4:%.*]]), "nonnull"(ptr [[PTR3:%.*]]) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "nonnull"(ptr [[PTR5:%.*]]) ] ; CHECK-NEXT: ret void ; call void @llvm.assume(i1 true) [ "nonnull"(ptr %ptr), "nonnull"(ptr %ptr2) ] @@ -1384,22 +1392,6 @@ define i32 @assume_noundef_on_load_after_call(ptr %ptr) { ret i32 %val } -define ptr @avoid_get_operand_bundle() { -; CHECK-LABEL: @avoid_get_operand_bundle( -; CHECK-NEXT: bb: -; CHECK-NEXT: [[LOAD:%.*]] = load volatile ptr, ptr null, align 8 -; CHECK-NEXT: [[PTRTOINT_I:%.*]] = ptrtoint ptr [[LOAD]] to i64 -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 [[PTRTOINT_I]]), "dereferenceable"(ptr null, i64 [[PTRTOINT_I]]) ] -; CHECK-NEXT: ret ptr null -; -bb: - %load = load volatile ptr, ptr null, align 8 - %ptrtoint.i = ptrtoint ptr %load to i64 - call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 %ptrtoint.i) ] - call void @llvm.assume(i1 true) [ "dereferenceable"(ptr null, i64 %ptrtoint.i) ] - ret ptr null -} - declare void @use(i1) declare void @block() declare void @llvm.dbg.value(metadata, metadata, metadata) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll index 4ca7f780cdc5e..69b23200b239b 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/std-find.ll @@ -244,7 +244,8 @@ define ptr @std_find_caller(ptr noundef %first, ptr noundef %last) { ; CHECK-LABEL: define noundef ptr @std_find_caller( ; CHECK-SAME: ptr noundef [[FIRST:%.*]], ptr noundef [[LAST:%.*]]) local_unnamed_addr #[[ATTR0]] { ; CHECK-NEXT: [[ENTRY:.*]]: -; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2), "align"(ptr [[LAST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[FIRST]], i64 2) ] +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[LAST]], i64 2) ] ; CHECK-NEXT: [[PRE_I:%.*]] = icmp eq ptr [[FIRST]], [[LAST]] ; CHECK-NEXT: br i1 [[PRE_I]], label %[[STD_FIND_GENERIC_IMPL_EXIT:.*]], label %[[LOOP_HEADER_I_PREHEADER:.*]] ; CHECK: [[LOOP_HEADER_I_PREHEADER]]: From ce70d5c491dcabe38a42d03582c0a34b3aefe697 Mon Sep 17 00:00:00 2001 From: Ehsan Amiri Date: Tue, 23 Jun 2026 13:27:46 -0400 Subject: [PATCH 219/511] [SimpleLoopUnswitch] Reland "Generalize the notion of trivial unswitching" (#204934) In the original PR for this feature, the third parameter of `MemorySSAUpdater::applyUpdates` was not set to `true`. This caused `DT` to be invalid, resulting in the failures. This PR fixes the issue and provides an additional testcase. Also in the original commit had three asserts to confirm `!ModifiedBranch ` when we bail out. One of these is redundant because it is enclosed in an `if` statement checking the same condition. This PR removes the redundant `assert` too. --- .../Transforms/Scalar/SimpleLoopUnswitch.cpp | 50 +- .../PhaseOrdering/AArch64/loopflatten.ll | 6 +- .../AArch64/matrix-extract-insert.ll | 58 +- .../X86/hoist-load-of-baseptr.ll | 62 +- .../PhaseOrdering/unswitch-cold-func.ll | 15 +- .../unswitch-nontrivial-cold-func.ll | 114 ++ .../AMDGPU/uniform-unswitch.ll | 19 +- .../PGO-nontrivial-unswitch2.ll | 47 +- .../PGO-nontrivial-unswitch3.ll | 41 +- .../trivial-unswitch-loop-guard.ll | 1031 +++++++++++++++++ .../SimpleLoopUnswitch/update-dt.ll | 52 + polly/test/Support/pipelineposition.ll | 2 +- 12 files changed, 1365 insertions(+), 132 deletions(-) create mode 100644 llvm/test/Transforms/PhaseOrdering/unswitch-nontrivial-cold-func.ll create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-loop-guard.ll create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/update-dt.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index e410f0644dc6b..abd5d30ab9c35 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -553,8 +553,9 @@ static Loop *getTopMostExitingLoop(const BasicBlock *ExitBB, /// /// This routine should only be called when loop code leading to the branch has /// been validated as trivial (no side effects). This routine checks if the -/// condition is invariant and one of the successors is a loop exit. This -/// allows us to unswitch without duplicating the loop, making it trivial. +/// condition is invariant and one of the successors is a loop exit or a loop +/// latch with no side-effects. This allows us to unswitch without duplicating +/// the loop, making it trivial. /// /// If this routine fails to unswitch the branch it returns false. /// @@ -591,6 +592,46 @@ static bool unswitchTrivialBranch(Loop &L, CondBrInst &BI, DominatorTree &DT, } } + std::optional LatchIdx = std::nullopt; + auto *LoopLatch = L.getLoopLatch(); + auto *ULExit = L.getUniqueLatchExitBlock(); + if (SE && FullUnswitch && ULExit) { + if (BI.getSuccessor(0) == LoopLatch && L.contains(BI.getSuccessor(1))) + LatchIdx = 0; + else if (BI.getSuccessor(1) == LoopLatch && L.contains(BI.getSuccessor(0))) + LatchIdx = 1; + } + + bool ModifiedBranch = false; + if (LatchIdx && areLoopExitPHIsLoopInvariant(L, *LoopLatch, *ULExit) && + !llvm::any_of(*LoopLatch, + [](Instruction &I) { return I.mayHaveSideEffects(); })) { + + // We need to prove the loop is finite, otherwise this change will convert + // it to a finite loop. This conservative check is good enough as we are + // mostly interested in perfect countable loop nests that perform + // calculations on arrays. + const SCEV *MaxBECount = SE->getConstantMaxBackedgeTakenCount(&L); + if (!isa(MaxBECount)) { + SmallVector, 2> Updates; + Updates.push_back({cfg::UpdateKind::Delete, BI.getParent(), + BI.getSuccessor(*LatchIdx)}); + Updates.push_back({cfg::UpdateKind::Insert, BI.getParent(), ULExit}); + LoopLatch->removePredecessor(BI.getParent()); + BI.setSuccessor(*LatchIdx, ULExit); + for (PHINode &PN : ULExit->phis()) { + Value *V = PN.getIncomingValueForBlock(LoopLatch); + PN.addIncoming(V, BI.getParent()); + } + if (MSSAU) + MSSAU->applyUpdates(Updates, DT, /*UpdateDTFirst=*/true); + else + DT.applyUpdates(Updates); + + ModifiedBranch = true; + } + } + // Check that one of the branch's successors exits, and which one. bool ExitDirection = true; int LoopExitSuccIdx = 0; @@ -601,12 +642,14 @@ static bool unswitchTrivialBranch(Loop &L, CondBrInst &BI, DominatorTree &DT, LoopExitBB = BI.getSuccessor(1); if (L.contains(LoopExitBB)) { LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n"); + assert(!ModifiedBranch && "Modified the branch but didn't unswitch"); return false; } } auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); auto *ParentBB = BI.getParent(); - if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) { + if (!ModifiedBranch && + !areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) { LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n"); return false; } @@ -621,6 +664,7 @@ static bool unswitchTrivialBranch(Loop &L, CondBrInst &BI, DominatorTree &DT, : !match(Cond, m_LogicalAnd())) { LLVM_DEBUG(dbgs() << " Branch condition is in improper form for " "non-full unswitch!\n"); + assert(!ModifiedBranch && "Modified the branch but didn't unswitch"); return false; } } diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll index 52d49ac9cd661..8f106d720f758 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/loopflatten.ll @@ -11,13 +11,13 @@ define dso_local void @_Z3fooPiii(ptr %A, i32 %N, i32 %M) #0 { ; CHECK-NEXT: [[CMP21:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP3]], i1 [[CMP21]], i1 false ; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND_CLEANUP:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split.us: +; CHECK: for.cond1.preheader.lr.ph.split: ; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[M]] to i64 ; CHECK-NEXT: [[TMP1:%.*]] = zext nneg i32 [[N]] to i64 ; CHECK-NEXT: [[FLATTEN_TRIPCOUNT:%.*]] = mul nuw nsw i64 [[TMP0]], [[TMP1]] ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] -; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[INDVAR6:%.*]] = phi i64 [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ], [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ] +; CHECK: for.cond1.preheader: +; CHECK-NEXT: [[INDVAR6:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ], [ [[INDVAR_NEXT7:%.*]], [[FOR_COND1_PREHEADER_US]] ] ; CHECK-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds nuw [4 x i8], ptr [[A:%.*]], i64 [[INDVAR6]] ; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX_US]], align 4 ; CHECK-NEXT: tail call void @_Z1fi(i32 [[TMP2]]) diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll index d94bf5e221b93..b43d12ecf8b3b 100644 --- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll +++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll @@ -86,7 +86,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[CMP210_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0 ; CHECK-NEXT: [[CONV6:%.*]] = zext i32 [[I]] to i64 ; CHECK-NEXT: br i1 [[CMP210_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; CHECK: for.cond1.preheader.us.preheader: +; CHECK: for.cond1.preheader.preheader: ; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[CONV6]], 3 ; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 360 ; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[TMP1]] @@ -129,25 +129,12 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[CONV6]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[VECTOR_MEMCHECK_1:%.*]], label [[FOR_BODY4_US_PREHEADER]] -; CHECK: for.body4.us.preheader: +; CHECK: for.body4.preheader: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US:%.*]] -; CHECK: for.body4.us: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY4_US_PREHEADER]] ] -; CHECK-NEXT: [[TMP27:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 225 -; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP27]]) -; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[A]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[MATRIXEXT_US:%.*]] = load double, ptr [[TMP28]], align 8 -; CHECK-NEXT: [[MATRIXEXT8_US:%.*]] = load double, ptr [[TMP3]], align 8 -; CHECK-NEXT: [[MUL_US:%.*]] = fmul double [[MATRIXEXT_US]], [[MATRIXEXT8_US]] -; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[B]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[MATRIXEXT11_US:%.*]] = load double, ptr [[TMP29]], align 8 -; CHECK-NEXT: [[SUB_US:%.*]] = fsub double [[MATRIXEXT11_US]], [[MUL_US]] -; CHECK-NEXT: store double [[SUB_US]], ptr [[TMP29]], align 8 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[VECTOR_MEMCHECK_1]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10:![0-9]+]] -; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us: +; CHECK: for.cond.cleanup.split: +; CHECK-NEXT: ret void +; CHECK: for.cond1.for.cond.cleanup3_crit_edge: ; CHECK-NEXT: [[TMP31:%.*]] = icmp samesign ult i32 [[I]], 210 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP31]]) ; CHECK-NEXT: [[TMP61:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[B]], i64 [[CONV6]] @@ -188,10 +175,10 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: middle.block.1: ; CHECK-NEXT: [[CMP_N_1:%.*]] = icmp eq i64 [[N_VEC_1]], [[CONV6]] ; CHECK-NEXT: br i1 [[CMP_N_1]], label [[VECTOR_MEMCHECK_2:%.*]], label [[FOR_BODY4_US_PREHEADER_1]] -; CHECK: for.body4.us.preheader.1: +; CHECK: for.body4.preheader.1: ; CHECK-NEXT: [[INDVARS_IV_PH_1:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]] ], [ 0, [[VECTOR_MEMCHECK_1]] ], [ [[N_VEC_1]], [[MIDDLE_BLOCK_1]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_1:%.*]] -; CHECK: for.body4.us.1: +; CHECK: for.body4.1: ; CHECK-NEXT: [[INDVARS_IV_1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_1:%.*]], [[FOR_BODY4_US_1]] ], [ [[INDVARS_IV_PH_1]], [[FOR_BODY4_US_PREHEADER_1]] ] ; CHECK-NEXT: [[TMP57:%.*]] = add nuw nsw i64 [[INDVARS_IV_1]], 15 ; CHECK-NEXT: [[TMP58:%.*]] = icmp samesign ult i64 [[INDVARS_IV_1]], 210 @@ -206,8 +193,8 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: store double [[SUB_US_1]], ptr [[TMP60]], align 8 ; CHECK-NEXT: [[INDVARS_IV_NEXT_1]] = add nuw nsw i64 [[INDVARS_IV_1]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_1:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_1]], [[CONV6]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[VECTOR_MEMCHECK_2]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10]] -; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.1: +; CHECK-NEXT: br i1 [[EXITCOND_NOT_1]], label [[VECTOR_MEMCHECK_2]], label [[FOR_BODY4_US_1]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.1: ; CHECK-NEXT: [[TMP62:%.*]] = icmp samesign ult i32 [[I]], 195 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP62]]) ; CHECK-NEXT: [[TMP92:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[B]], i64 [[CONV6]] @@ -248,10 +235,10 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: middle.block.2: ; CHECK-NEXT: [[CMP_N_2:%.*]] = icmp eq i64 [[N_VEC_2]], [[CONV6]] ; CHECK-NEXT: br i1 [[CMP_N_2]], label [[VECTOR_MEMCHECK_3:%.*]], label [[FOR_BODY4_US_PREHEADER_2]] -; CHECK: for.body4.us.preheader.2: +; CHECK: for.body4.preheader.2: ; CHECK-NEXT: [[INDVARS_IV_PH_2:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ 0, [[VECTOR_MEMCHECK_2]] ], [ [[N_VEC_2]], [[MIDDLE_BLOCK_2]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_2:%.*]] -; CHECK: for.body4.us.2: +; CHECK: for.body4.2: ; CHECK-NEXT: [[INDVARS_IV_2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_2:%.*]], [[FOR_BODY4_US_2]] ], [ [[INDVARS_IV_PH_2]], [[FOR_BODY4_US_PREHEADER_2]] ] ; CHECK-NEXT: [[TMP88:%.*]] = add nuw nsw i64 [[INDVARS_IV_2]], 30 ; CHECK-NEXT: [[TMP89:%.*]] = icmp samesign ult i64 [[INDVARS_IV_2]], 195 @@ -267,7 +254,7 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[INDVARS_IV_NEXT_2]] = add nuw nsw i64 [[INDVARS_IV_2]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_2:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_2]], [[CONV6]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT_2]], label [[VECTOR_MEMCHECK_3]], label [[FOR_BODY4_US_2]], !llvm.loop [[LOOP10]] -; CHECK: for.cond1.for.cond.cleanup3_crit_edge.us.2: +; CHECK: for.cond1.for.cond.cleanup3_crit_edge.2: ; CHECK-NEXT: [[TMP93:%.*]] = icmp samesign ult i32 [[I]], 180 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP93]]) ; CHECK-NEXT: [[TMP123:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[B]], i64 [[CONV6]] @@ -308,10 +295,10 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK: middle.block.3: ; CHECK-NEXT: [[CMP_N_3:%.*]] = icmp eq i64 [[N_VEC_3]], [[CONV6]] ; CHECK-NEXT: br i1 [[CMP_N_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_PREHEADER_3]] -; CHECK: for.body4.us.preheader.3: +; CHECK: for.body4.preheader.3: ; CHECK-NEXT: [[INDVARS_IV_PH_3:%.*]] = phi i64 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ 0, [[VECTOR_MEMCHECK_3]] ], [ [[N_VEC_3]], [[MIDDLE_BLOCK_3]] ] ; CHECK-NEXT: br label [[FOR_BODY4_US_3:%.*]] -; CHECK: for.body4.us.3: +; CHECK: for.body4.3: ; CHECK-NEXT: [[INDVARS_IV_3:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_3:%.*]], [[FOR_BODY4_US_3]] ], [ [[INDVARS_IV_PH_3]], [[FOR_BODY4_US_PREHEADER_3]] ] ; CHECK-NEXT: [[TMP119:%.*]] = add nuw nsw i64 [[INDVARS_IV_3]], 45 ; CHECK-NEXT: [[TMP120:%.*]] = icmp samesign ult i64 [[INDVARS_IV_3]], 180 @@ -327,8 +314,21 @@ define void @matrix_extract_insert_loop(i32 %i, ptr nonnull align 8 dereferencea ; CHECK-NEXT: [[INDVARS_IV_NEXT_3]] = add nuw nsw i64 [[INDVARS_IV_3]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_3:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_3]], [[CONV6]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT_3]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY4_US_3]], !llvm.loop [[LOOP10]] -; CHECK: for.cond.cleanup: -; CHECK-NEXT: ret void +; CHECK: for.body4: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY4_US]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY4_US_PREHEADER]] ] +; CHECK-NEXT: [[TMP68:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 225 +; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP68]]) +; CHECK-NEXT: [[TMP69:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[A]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[MATRIXEXT:%.*]] = load double, ptr [[TMP69]], align 8 +; CHECK-NEXT: [[MATRIXEXT8:%.*]] = load double, ptr [[TMP3]], align 8 +; CHECK-NEXT: [[MUL:%.*]] = fmul double [[MATRIXEXT]], [[MATRIXEXT8]] +; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds nuw [8 x i8], ptr [[B]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[MATRIXEXT11:%.*]] = load double, ptr [[TMP70]], align 8 +; CHECK-NEXT: [[SUB:%.*]] = fsub double [[MATRIXEXT11]], [[MUL]] +; CHECK-NEXT: store double [[SUB]], ptr [[TMP70]], align 8 +; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[CONV6]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[VECTOR_MEMCHECK_1]], label [[FOR_BODY4_US]], !llvm.loop [[LOOP10]] ; entry: %i.addr = alloca i32, align 4 diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll index 57ed5092bef5c..46a1b44bb468a 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll @@ -19,42 +19,42 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O1-NEXT: [[ENTRY:.*]]: ; O1-NEXT: [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 ; O1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8 -; O1-NEXT: br label %[[FOR_COND1_PREHEADER:.*]] -; O1: [[FOR_COND1_PREHEADER]]: -; O1-NEXT: [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ] -; O1-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4:.*]] -; O1: [[FOR_COND_CLEANUP:.*]]: -; O1-NEXT: ret void +; O1-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3:.*]], label %[[FOR_BODY4:.*]] +; O1: [[FOR_BODY4]]: +; O1-NEXT: [[I_06:%.*]] = phi i64 [ [[INC7:%.*]], %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE:.*]] ], [ 0, %[[ENTRY]] ] +; O1-NEXT: br label %[[FOR_BODY5:.*]] ; O1: [[FOR_COND_CLEANUP3]]: +; O1-NEXT: ret void +; O1: [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE]]: ; O1-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1 ; O1-NEXT: [[EXITCOND7_NOT:%.*]] = icmp eq i64 [[INC7]], 100 -; O1-NEXT: br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER]], !llvm.loop [[LOOP0:![0-9]+]] -; O1: [[FOR_BODY4]]: -; O1-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4]] ], [ 0, %[[FOR_COND1_PREHEADER]] ] +; O1-NEXT: br i1 [[EXITCOND7_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP0:![0-9]+]] +; O1: [[FOR_BODY5]]: +; O1-NEXT: [[J_05:%.*]] = phi i64 [ 0, %[[FOR_BODY4]] ], [ [[INC5:%.*]], %[[FOR_BODY5]] ] ; O1-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[J_05]] ; O1-NEXT: [[TMP1:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2:![0-9]+]] ; O1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 ; O1-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA2]] ; O1-NEXT: [[INC5]] = add nuw i64 [[J_05]], 1 ; O1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]] -; O1-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4]], !llvm.loop [[LOOP6:![0-9]+]] +; O1-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE]], label %[[FOR_BODY5]], !llvm.loop [[LOOP6:![0-9]+]] ; ; O2-LABEL: define dso_local void @_Z7computeRSt6vectorIiSaIiEEy( ; O2-SAME: ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; O2-NEXT: [[ENTRY:.*]]: ; O2-NEXT: [[CMP24_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0 ; O2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[DATA]], align 8 +; O2-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_COND1_PREHEADER_PREHEADER:.*]] +; O2: [[FOR_COND1_PREHEADER_PREHEADER]]: ; O2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8 ; O2-NEXT: [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8 ; O2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[NUMELEMS]], [[N_VEC]] ; O2-NEXT: br label %[[FOR_COND1_PREHEADER:.*]] ; O2: [[FOR_COND1_PREHEADER]]: -; O2-NEXT: [[I_06:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ] -; O2-NEXT: br i1 [[CMP24_NOT]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER:.*]] -; O2: [[FOR_BODY4_PREHEADER]]: +; O2-NEXT: [[I_06:%.*]] = phi i64 [ [[INC7:%.*]], %[[FOR_COND_CLEANUP3:.*]] ], [ 0, %[[FOR_COND1_PREHEADER_PREHEADER]] ] ; O2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY4_PREHEADER9:.*]], label %[[VECTOR_BODY:.*]] ; O2: [[VECTOR_BODY]]: -; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_BODY4_PREHEADER]] ] +; O2-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ], [ 0, %[[FOR_COND1_PREHEADER]] ] ; O2-NEXT: [[TMP1:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[INDEX]] ; O2-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 ; O2-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[INT_TBAA0:![0-9]+]] @@ -69,9 +69,9 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O2: [[MIDDLE_BLOCK]]: ; O2-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP3]], label %[[FOR_BODY4_PREHEADER9]] ; O2: [[FOR_BODY4_PREHEADER9]]: -; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY4_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +; O2-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] ; O2-NEXT: br label %[[FOR_BODY4:.*]] -; O2: [[FOR_COND_CLEANUP:.*]]: +; O2: [[FOR_COND_CLEANUP]]: ; O2-NEXT: ret void ; O2: [[FOR_COND_CLEANUP3]]: ; O2-NEXT: [[INC7]] = add nuw nsw i64 [[I_06]], 1 @@ -117,23 +117,23 @@ define dso_local void @_Z7computeRSt6vectorIiSaIiEEy(ptr noundef nonnull align 8 ; O3: [[MIDDLE_BLOCK]]: ; O3-NEXT: br i1 [[CMP_N]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US_PREHEADER]] ; O3: [[FOR_BODY4_US_PREHEADER]]: -; O3-NEXT: [[J_05_US_PH:%.*]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] +; O3-NEXT: [[J_05_PH:%.*]] = phi i64 [ 0, %[[FOR_COND1_PREHEADER_US]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] ; O3-NEXT: br label %[[FOR_BODY4_US:.*]] -; O3: [[FOR_BODY4_US]]: -; O3-NEXT: [[J_05_US:%.*]] = phi i64 [ [[INC5_US:%.*]], %[[FOR_BODY4_US]] ], [ [[J_05_US_PH]], %[[FOR_BODY4_US_PREHEADER]] ] -; O3-NEXT: [[ADD_PTR_I_US:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[J_05_US]] -; O3-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]] -; O3-NEXT: [[INC_US:%.*]] = add nsw i32 [[TMP6]], 1 -; O3-NEXT: store i32 [[INC_US]], ptr [[ADD_PTR_I_US]], align 4, !tbaa [[INT_TBAA0]] -; O3-NEXT: [[INC5_US]] = add nuw i64 [[J_05_US]], 1 -; O3-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5_US]], [[NUMELEMS]] -; O3-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US]], !llvm.loop [[LOOP8:![0-9]+]] +; O3: [[FOR_COND_CLEANUP]]: +; O3-NEXT: ret void ; O3: [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]]: ; O3-NEXT: [[INC7_US]] = add nuw nsw i64 [[I_06_US]], 1 ; O3-NEXT: [[EXITCOND8_NOT:%.*]] = icmp eq i64 [[INC7_US]], 100 -; O3-NEXT: br i1 [[EXITCOND8_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP9:![0-9]+]] -; O3: [[FOR_COND_CLEANUP]]: -; O3-NEXT: ret void +; O3-NEXT: br i1 [[EXITCOND8_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_COND1_PREHEADER_US]], !llvm.loop [[LOOP8:![0-9]+]] +; O3: [[FOR_BODY4_US]]: +; O3-NEXT: [[J_05:%.*]] = phi i64 [ [[INC5:%.*]], %[[FOR_BODY4_US]] ], [ [[J_05_PH]], %[[FOR_BODY4_US_PREHEADER]] ] +; O3-NEXT: [[ADD_PTR_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP0]], i64 [[J_05]] +; O3-NEXT: [[TMP6:%.*]] = load i32, ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]] +; O3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP6]], 1 +; O3-NEXT: store i32 [[INC]], ptr [[ADD_PTR_I]], align 4, !tbaa [[INT_TBAA0]] +; O3-NEXT: [[INC5]] = add nuw i64 [[J_05]], 1 +; O3-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC5]], [[NUMELEMS]] +; O3-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US]], label %[[FOR_BODY4_US]], !llvm.loop [[LOOP9:![0-9]+]] ; entry: %data.addr = alloca ptr, align 8 @@ -265,6 +265,6 @@ declare void @llvm.lifetime.end.p0(ptr nocapture) ; O3: [[META5]] = !{!"llvm.loop.mustprogress"} ; O3: [[META6]] = !{!"llvm.loop.isvectorized", i32 1} ; O3: [[META7]] = !{!"llvm.loop.unroll.runtime.disable"} -; O3: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]], [[META7]], [[META6]]} -; O3: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]]} +; O3: [[LOOP8]] = distinct !{[[LOOP8]], [[META5]]} +; O3: [[LOOP9]] = distinct !{[[LOOP9]], [[META5]], [[META7]], [[META6]]} ;. diff --git a/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll b/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll index a6ebdf052411d..531869c1c113b 100644 --- a/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll +++ b/llvm/test/Transforms/PhaseOrdering/unswitch-cold-func.ll @@ -3,8 +3,9 @@ ; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=optsize -S | FileCheck %s ; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=minsize -S | FileCheck %s -;; Check that non-trivial loop unswitching is not applied to a cold loop in a -;; cold loop nest. +;; Check that trivial loop unswitching is applied to a cold loop in a +;; cold loop nest. Another testcase, unswitch-cold-func.ll, ensures that +;; non-trivial unswitching is not applied to a cold loop. ;; IR was generated from the following loop nest, profiled when called ;; with M=0 and N=0. @@ -18,19 +19,23 @@ define void @_Z11functionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { ; CHECK-LABEL: define void @_Z11functionbiiPiS_S_ -; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {{.*}}{ +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) #[[ATTR0:[0-9]+]] {{.*}}{ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 ; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF17:![0-9]+]] ; CHECK: for.cond1.preheader.lr.ph: ; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader: -; CHECK-NEXT: [[J_020:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH]] ], [ [[INC10:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] -; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]] +; CHECK-NEXT: [[J_020:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT]] ], [ [[INC10:%.*]], [[FOR_COND_CLEANUP3:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER:%.*]] ; CHECK: for.body4.preheader: ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] ; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] +; CHECK: for.cond.cleanup.loopexit.split: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/PhaseOrdering/unswitch-nontrivial-cold-func.ll b/llvm/test/Transforms/PhaseOrdering/unswitch-nontrivial-cold-func.ll new file mode 100644 index 0000000000000..6b84b45ef4772 --- /dev/null +++ b/llvm/test/Transforms/PhaseOrdering/unswitch-nontrivial-cold-func.ll @@ -0,0 +1,114 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --scrub-attributes --version 2 + +; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=optsize -S | FileCheck %s +; RUN: opt < %s -passes='pgo-force-function-attrs,function(loop-mssa(simple-loop-unswitch))' -pgo-kind=pgo-instr-use-pipeline -pgo-cold-func-opt=minsize -S | FileCheck %s + +;; Check that genuinely non-trivial loop unswitching - where the optimizer +;; would have to duplicate the entire loop body - is suppressed for cold +;; (optsize/minsize) functions. +;; +;; The branch on %cond in the loop header is loop-invariant, but both of its +;; successors (if.then, if.else) stay inside the loop, so unswitching it +;; requires producing two full copies of the loop. That code-size increase +;; must be blocked when the function is cold. +;; +;; Contrast with unswitch-cold-func.ll, where the invariant condition can be +;; hoisted via a latch-bypass rewrite that introduces no code duplication and +;; is therefore allowed even for cold functions. +;; +;; Source: +;; void function(bool cond, int N, int *A, int *B) { +;; for (int i = 0; i < N; i++) { +;; if (cond) A[i]++; +;; else B[i]++; +;; } +;; } +;; profiled when called with N=0 (cold). + +define void @function(i1 %cond, i32 %N, ptr %A, ptr %B) !prof !16 { +; CHECK-LABEL: define void @function +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {{.*}}{ +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP_PREHEADER:%.*]], !prof [[PROF17:![0-9]+]] +; CHECK: loop.preheader: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[I_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: [[VAL_A:%.*]] = load i32, ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[INC_A:%.*]] = add i32 [[VAL_A]], 1 +; CHECK-NEXT: store i32 [[INC_A]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: if.else: +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL_B:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC_B:%.*]] = add i32 [[VAL_B]], 1 +; CHECK-NEXT: store i32 [[INC_B]], ptr [[GEP_B]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT_LOOPEXIT:%.*]], label [[LOOP_HEADER]], !prof [[PROF17]] +; CHECK: exit.loopexit: +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + %cmp.not = icmp sle i32 %N, 0 + br i1 %cmp.not, label %exit, label %loop.preheader, !prof !17 + +loop.preheader: + br label %loop.header + +loop.header: + %i = phi i32 [ 0, %loop.preheader ], [ %i.next, %loop.latch ] + br i1 %cond, label %if.then, label %if.else + +if.then: + %gep.a = getelementptr inbounds i32, ptr %A, i32 %i + %val.a = load i32, ptr %gep.a, align 4 + %inc.a = add i32 %val.a, 1 + store i32 %inc.a, ptr %gep.a, align 4 + br label %loop.latch + +if.else: + %gep.b = getelementptr inbounds i32, ptr %B, i32 %i + %val.b = load i32, ptr %gep.b, align 4 + %inc.b = add i32 %val.b, 1 + store i32 %inc.b, ptr %gep.b, align 4 + br label %loop.latch + +loop.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.not = icmp eq i32 %i.next, %N + br i1 %exitcond.not, label %exit, label %loop.header, !prof !17 + +exit: + ret void +} + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"ProfileSummary", !1} +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9, !10, !11} +!2 = !{!"ProfileFormat", !"InstrProf"} +!3 = !{!"TotalCount", i64 1002} +!4 = !{!"MaxCount", i64 1000} +!5 = !{!"MaxInternalCount", i64 1000} +!6 = !{!"MaxFunctionCount", i64 1} +!7 = !{!"NumCounts", i64 6} +!8 = !{!"NumFunctions", i64 3} +!9 = !{!"IsPartialProfile", i64 0} +!10 = !{!"PartialProfileRatio", double 0.000000e+00} +!11 = !{!"DetailedSummary", !12} +!12 = !{!13, !14, !15} +!13 = !{i32 10000, i64 1000, i32 1} +!14 = !{i32 999000, i64 1000, i32 1} +!15 = !{i32 999999, i64 1, i32 3} +!16 = !{!"function_entry_count", i64 1} +!17 = !{!"branch_weights", i32 1, i32 0} + diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll index 331dc1ef4e567..b1d86a67aa1bd 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/uniform-unswitch.ll @@ -20,7 +20,7 @@ define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) { ; CHECK-LABEL: define amdgpu_kernel void @uniform_unswitch( -; CHECK-SAME: ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr nofree captures(none) [[OUT:%.*]], i32 [[N:%.*]], i32 [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OUT_GLOBAL:%.*]] = addrspacecast ptr [[OUT]] to ptr addrspace(1) ; CHECK-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N]], 0 @@ -32,12 +32,16 @@ define amdgpu_kernel void @uniform_unswitch(ptr nocapture %out, i32 %n, i32 %x) ; CHECK-NEXT: ret void ; CHECK: for.body: ; CHECK-NEXT: [[I_07:%.*]] = phi i32 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] -; CHECK: if.then: ; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[I_07]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(1) [[OUT_GLOBAL]], i64 [[TMP0]] +; CHECK-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] +; CHECK: if.then: ; CHECK-NEXT: store i32 [[I_07]], ptr addrspace(1) [[ARRAYIDX]], align 4 ; CHECK-NEXT: br label [[FOR_INC]] +; CHECK: if.else: +; CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(1) [[ARRAYIDX]] to ptr +; CHECK-NEXT: store volatile i32 0, ptr [[TMP2]], align 4 +; CHECK-NEXT: br label [[FOR_INC]] ; CHECK: for.inc: ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_07]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] @@ -59,14 +63,19 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo for.body: ; preds = %for.inc, %for.body.lr.ph %i.07 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ] - br i1 %cmp1, label %if.then, label %for.inc + br i1 %cmp1, label %if.then, label %if.else if.then: ; preds = %for.body %arrayidx = getelementptr inbounds i32, ptr %out, i32 %i.07 store i32 %i.07, ptr %arrayidx, align 4 br label %for.inc -for.inc: ; preds = %for.body, %if.then +if.else: ; preds = %for.body + %arrayidx2 = getelementptr inbounds i32, ptr %out, i32 %i.07 + store volatile i32 0, ptr %arrayidx2, align 4 + br label %for.inc + +for.inc: ; preds = %if.else, %if.then %inc = add nuw nsw i32 %i.07, 1 %exitcond = icmp eq i32 %inc, %n br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll index ad674ed11d3d8..3b4478f2dc900 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch2.ll @@ -17,39 +17,28 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { ; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ -; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF33:![0-9]+]] { +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {{.*}}{ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 -; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF34:![0-9]+]] +; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF17:![0-9]+]] ; CHECK: for.cond1.preheader.lr.ph: ; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 -; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF35:![0-9]+]] -; CHECK: for.cond1.preheader.lr.ph.split.us: +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF18:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph.split: +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split.us: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] +; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] ; CHECK: for.cond.cleanup3.us: ; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[J_020_US]], 1 ; CHECK-NEXT: [[EXITCOND22_NOT_US:%.*]] = icmp eq i32 [[INC10_US]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF34]] -; CHECK: for.cond.cleanup.loopexit.split.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split: -; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split.split.us: -; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US1:%.*]] -; CHECK: for.cond1.preheader.us1: -; CHECK-NEXT: [[J_020_US2:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US4:%.*]], [[FOR_COND_CLEANUP3_US3:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] -; CHECK: for.cond.cleanup3.us3: -; CHECK-NEXT: [[INC10_US4]] = add nuw i32 [[J_020_US2]], 1 -; CHECK-NEXT: [[EXITCOND22_NOT_US5:%.*]] = icmp eq i32 [[INC10_US4]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US5]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US1]], !prof [[PROF34]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT2_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF17]] ; CHECK: for.body4.preheader.us: ; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT_US_US:%.*]] ; CHECK: for.cond.cleanup3.loopexit.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US3]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] ; CHECK: for.body4.preheader.split.us.us: ; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]] ; CHECK: for.body4.us.us: @@ -69,11 +58,11 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK-NEXT: [[WIDE_TRIP_COUNT_US_US:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[INDVARS_IV_NEXT_US_US]] = add nuw nsw i64 [[INDVARS_IV_US_US]], 1 ; CHECK-NEXT: [[EXITCOND_NOT_US_US:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_US_US]], [[WIDE_TRIP_COUNT_US_US]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US]], !prof [[PROF35]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US]], !prof [[PROF18]] ; CHECK: for.cond.cleanup3.loopexit.split.us.us: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT_US:%.*]] -; CHECK: for.cond.cleanup.loopexit.split.split.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]] +; CHECK: for.cond.cleanup.loopexit.split2.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: for.cond1.preheader.lr.ph.split.split: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader: @@ -83,11 +72,11 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT:%.*]] ; CHECK: for.body4.preheader.split: ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.cond.cleanup.loopexit.split.split: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] -; CHECK: for.cond.cleanup.loopexit.split: +; CHECK: for.cond.cleanup.loopexit.split2: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] +; CHECK: for.cond.cleanup.loopexit.split: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -98,7 +87,7 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK: for.cond.cleanup3: ; CHECK-NEXT: [[INC10]] = add nuw i32 [[J_020]], 1 ; CHECK-NEXT: [[EXITCOND22_NOT:%.*]] = icmp eq i32 [[INC10]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF34]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT2:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF17]] ; CHECK: for.body4: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] @@ -113,7 +102,7 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY4]], !prof [[PROF35]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT:%.*]], label [[FOR_BODY4]], !prof [[PROF18]] ; entry: %cmp19.not = icmp eq i32 %M, 0 diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll index 59b8404b3e9ef..f07b1e71239cb 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/PGO-nontrivial-unswitch3.ll @@ -17,39 +17,28 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ptr %C) !prof !36 { ; CHECK-LABEL: define void @_Z11hotFunctionbiiPiS_S_ -; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) !prof [[PROF18:![0-9]+]] { +; CHECK-SAME: (i1 [[COND:%.*]], i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]]) {{.*}}{ ; CHECK-NEXT: entry: ; CHECK-NEXT: [[CMP19_NOT:%.*]] = icmp eq i32 [[M]], 0 ; CHECK-NEXT: br i1 [[CMP19_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_LR_PH:%.*]], !prof [[PROF19:![0-9]+]] ; CHECK: for.cond1.preheader.lr.ph: ; CHECK-NEXT: [[CMP217_NOT:%.*]] = icmp eq i32 [[N]], 0 -; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF20:![0-9]+]] -; CHECK: for.cond1.preheader.lr.ph.split.us: +; CHECK-NEXT: br i1 [[CMP217_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT:%.*]], !prof [[PROF20:![0-9]+]] +; CHECK: for.cond1.preheader.lr.ph.split: +; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] +; CHECK: for.cond1.preheader.lr.ph.split.split.us: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US:%.*]] ; CHECK: for.cond1.preheader.us: -; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] -; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] +; CHECK-NEXT: [[J_020_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US:%.*]], [[FOR_COND_CLEANUP3_US:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] ; CHECK: for.cond.cleanup3.us: ; CHECK-NEXT: [[INC10_US]] = add nuw i32 [[J_020_US]], 1 ; CHECK-NEXT: [[EXITCOND22_NOT_US:%.*]] = icmp eq i32 [[INC10_US]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF19]] -; CHECK: for.cond.cleanup.loopexit.split.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split: -; CHECK-NEXT: br i1 [[COND]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT:%.*]] -; CHECK: for.cond1.preheader.lr.ph.split.split.us: -; CHECK-NEXT: br label [[FOR_COND1_PREHEADER_US1:%.*]] -; CHECK: for.cond1.preheader.us1: -; CHECK-NEXT: [[J_020_US2:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_LR_PH_SPLIT_SPLIT_US]] ], [ [[INC10_US4:%.*]], [[FOR_COND_CLEANUP3_US3:%.*]] ] -; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_US:%.*]] -; CHECK: for.cond.cleanup3.us3: -; CHECK-NEXT: [[INC10_US4]] = add nuw i32 [[J_020_US2]], 1 -; CHECK-NEXT: [[EXITCOND22_NOT_US5:%.*]] = icmp eq i32 [[INC10_US4]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US5]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT_US:%.*]], label [[FOR_COND1_PREHEADER_US1]], !prof [[PROF19]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT_US]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT2_US:%.*]], label [[FOR_COND1_PREHEADER_US]], !prof [[PROF19]] ; CHECK: for.body4.preheader.us: ; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT_US_US:%.*]] ; CHECK: for.cond.cleanup3.loopexit.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US3]] +; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_US]] ; CHECK: for.body4.preheader.split.us.us: ; CHECK-NEXT: br label [[FOR_BODY4_US_US:%.*]] ; CHECK: for.body4.us.us: @@ -72,8 +61,8 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK-NEXT: br i1 [[EXITCOND_NOT_US_US]], label [[FOR_COND_CLEANUP3_LOOPEXIT_SPLIT_US_US:%.*]], label [[FOR_BODY4_US_US]], !prof [[PROF20]] ; CHECK: for.cond.cleanup3.loopexit.split.us.us: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP3_LOOPEXIT_US:%.*]] -; CHECK: for.cond.cleanup.loopexit.split.split.us: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT:%.*]] +; CHECK: for.cond.cleanup.loopexit.split2.us: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ; CHECK: for.cond1.preheader.lr.ph.split.split: ; CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] ; CHECK: for.cond1.preheader: @@ -83,11 +72,11 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK-NEXT: br label [[FOR_BODY4_PREHEADER_SPLIT:%.*]] ; CHECK: for.body4.preheader.split: ; CHECK-NEXT: br label [[FOR_BODY4:%.*]] -; CHECK: for.cond.cleanup.loopexit.split.split: -; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] -; CHECK: for.cond.cleanup.loopexit.split: +; CHECK: for.cond.cleanup.loopexit.split2: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT]] ; CHECK: for.cond.cleanup.loopexit: +; CHECK-NEXT: br label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT]] +; CHECK: for.cond.cleanup.loopexit.split: ; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void @@ -98,7 +87,7 @@ define void @_Z11hotFunctionbiiPiS_S_(i1 %cond, i32 %M, i32 %N, ptr %A, ptr %B, ; CHECK: for.cond.cleanup3: ; CHECK-NEXT: [[INC10]] = add nuw i32 [[J_020]], 1 ; CHECK-NEXT: [[EXITCOND22_NOT:%.*]] = icmp eq i32 [[INC10]], [[M]] -; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT_SPLIT:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF19]] +; CHECK-NEXT: br i1 [[EXITCOND22_NOT]], label [[FOR_COND_CLEANUP_LOOPEXIT_SPLIT2:%.*]], label [[FOR_COND1_PREHEADER]], !prof [[PROF19]] ; CHECK: for.body4: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[FOR_BODY4_PREHEADER_SPLIT]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDVARS_IV]] diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-loop-guard.ll b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-loop-guard.ll new file mode 100644 index 0000000000000..d08c03e7a31f8 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/trivial-unswitch-loop-guard.ll @@ -0,0 +1,1031 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -passes='loop-mssa(simple-loop-unswitch),verify' -S < %s | FileCheck %s + +;; Check that a loop-invariant guard branch in a perfect nest is trivially +;; unswitched. The outer loop's header branches to the latch (skipping the +;; inner loop entirely) when N == 0, or falls through into the inner loop. +;; Because N is loop-invariant, the new code in unswitchTrivialBranch rewires +;; the latch arm to point at the outer-loop exit, making the branch look like +;; an ordinary exit branch and allowing the standard trivial-unswitch logic to +;; hoist it out of the outer loop. +;; +;; Source: +;; void f(int M, int N, int *A, int *B) { +;; for (int j = 0; j < M; j++) { +;; if (N <= 0) continue; // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +;; The key CFG edge is: outer.header --[N==0]--> outer.latch (the latch), +;; outer.header --[N!=0]--> inner.preheader (inside the outer loop). + +define void @perfect_nest_guard(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @perfect_nest_guard( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP_M:%.*]] = icmp sle i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP_M]], label %[[EXIT:.*]], label %[[OUTER_PREHEADER:.*]] +; CHECK: [[OUTER_PREHEADER]]: +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT_LOOPEXIT_SPLIT:.*]], label %[[OUTER_PREHEADER_SPLIT:.*]] +; CHECK: [[OUTER_PREHEADER_SPLIT]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[OUTER_PREHEADER_SPLIT]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: br label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[J_NEXT]] = add nuw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND_OUTER:%.*]] = icmp eq i32 [[J_NEXT]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label %[[EXIT_LOOPEXIT:.*]], label %[[OUTER_HEADER]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_SPLIT]] +; CHECK: [[EXIT_LOOPEXIT_SPLIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cmp.M = icmp sle i32 %M, 0 + br i1 %cmp.M, label %exit, label %outer.preheader + +outer.preheader: + %guard = icmp sle i32 %N, 0 + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %outer.preheader ], [ %j.next, %outer.latch ] + br i1 %guard, label %outer.latch, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + %j.next = add nuw i32 %j, 1 + %exitcond.outer = icmp eq i32 %j.next, %M + br i1 %exitcond.outer, label %exit, label %outer.header + +exit: + ret void +} + +;; This loopnest is similar to @perfect_nest_guard, except that the outer loop +;; is infinite. So the trivial unswitching of the inner loop guard is not +;; legal. +;; +;; Source: +;; void f(int N, int *A, int *B) { +;; while (true) { +;; if (N <= 0) continue; // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +define void @perfect_nest_guard2(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @perfect_nest_guard2( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br i1 [[GUARD]], label %[[OUTER_LATCH:.*]], label %[[INNER_PREHEADER1:.*]] +; CHECK: [[INNER_PREHEADER1]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER1]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: br label %[[INNER_PREHEADER]] +; CHECK: [[EXIT:.*:]] +; CHECK-NEXT: ret void +; +entry: + %guard = icmp sle i32 %N, 0 + br label %outer.header + +outer.header: + br i1 %guard, label %outer.latch, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + br label %outer.header + +exit: + ret void +} + +;; A negative test in which trivial unswitching cannot be done because there is +;; side effect before the branch +;; +;; Source: +;; void f(int N, int *A, int *B) { +;; while (true) { +;; B[0] = 1; +;; if (N <= 0) continue; // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +define void @not_perfect_nest_guard(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @not_perfect_nest_guard( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: br i1 [[GUARD]], label %[[OUTER_LATCH:.*]], label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: br label %[[OUTER_HEADER]] +; CHECK: [[EXIT:.*:]] +; CHECK-NEXT: ret void +; +entry: + %guard = icmp sle i32 %N, 0 + br label %outer.header + +outer.header: + store i32 0, ptr %B, align 4 + br i1 %guard, label %outer.latch, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + br label %outer.header + +exit: + ret void +} + +;; A negative test in which trivial unswitching cannot be done because there is +;; side effect in the latch of the outer loop +;; +;; Source: +;; void f(int N, int *A, int *B) { +;; while (true) { +;; if (N > 0) { // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; B[0] = 1; +;; } +;; } +;; +define void @not_perfect_nest_guard2(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @not_perfect_nest_guard2( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GUARD:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: br i1 [[GUARD]], label %[[INNER_PREHEADER:.*]], label %[[OUTER_LATCH:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: store i32 0, ptr [[B]], align 4 +; CHECK-NEXT: br label %[[OUTER_HEADER]] +; CHECK: [[EXIT:.*:]] +; CHECK-NEXT: ret void +; +entry: + %guard = icmp sgt i32 %N, 0 + br label %outer.header + +outer.header: + br i1 %guard, label %inner.preheader, label %outer.latch + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + store i32 0, ptr %B, align 4 + br label %outer.header + +exit: + ret void +} + +;; A negative test in which trivial unswitching cannot be done because the +;; latch of the outer loop has multiple exit blocks. +;; +;; void bad_outer_latch(int M, int N, int *A, int *B) { +;; while (true) { +;; if (N > 0) { // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; +;; // The latch now has multiple exit edges based on B[0] +;; switch (B[0]) { +;; case 0: +;; A[0] = 1; +;; return; // branches to %exit +;; case 1: +;; return; // branches to %exit2 +;; default: +;; break; // loops back to %outer.header +;; } +;; } +;; } +;; +define void @bad_outer_latch(i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @bad_outer_latch( +; CHECK-SAME: i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[GUARD:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: br i1 [[GUARD]], label %[[INNER_PREHEADER:.*]], label %[[OUTER_LATCH:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[M:%.*]] = load i32, ptr [[B]], align 4 +; CHECK-NEXT: switch i32 [[M]], label %[[OUTER_HEADER]] [ +; CHECK-NEXT: i32 0, label %[[EXIT:.*]] +; CHECK-NEXT: i32 1, label %[[EXIT2:.*]] +; CHECK-NEXT: ] +; CHECK: [[EXIT]]: +; CHECK-NEXT: store i32 1, ptr [[A]], align 4 +; CHECK-NEXT: br label %[[EXIT3:.*]] +; CHECK: [[EXIT2]]: +; CHECK-NEXT: br label %[[EXIT3]] +; CHECK: [[EXIT3]]: +; CHECK-NEXT: ret void +; +entry: + %guard = icmp sgt i32 %N, 0 + br label %outer.header + +outer.header: + br i1 %guard, label %inner.preheader, label %outer.latch + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + %sw = load i32, ptr %B, align 4 + switch i32 %sw, label %outer.header [ + i32 0, label %exit + i32 1, label %exit2 + ] + +exit: + store i32 1, ptr %A, align 4 + br label %exit2 + +exit2: + ret void +} + +;; A negative test in which trivial unswitching cannot be done because a value +;; calculated in the loop is used in a phi in the exit block of the loop. +;; +;; Source: +;; int f(int M, int N, int *A, int *B) { +;; int sum = 42; // 1. Initialized before the outer loop +;; while (M > 0) { +;; sum = sum + 1; // 2. Updated in the outer header to a new initial value +;; if (N > 0) { // invariant guard branches to latch +;; for (int i = 0; i < N; i++) { +;; A[i] = B[i] + 1; +;; sum += A[i]; // 3. Calculated/Accumulated in the inner loop +;; } +;; } +;; B[0] = 0; +;; M--; +;; } +;; return sum; // 4. Used in outer loop exit block +;; } +;; +define i32 @exit_phi(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define i32 @exit_phi( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp sgt i32 [[M]], 0 +; CHECK-NEXT: [[GUARD:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[IV_M:%.*]] = phi i32 [ [[IV_M_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ [[M]], %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[SUM_OUTER:%.*]] = phi i32 [ [[SUM_LATCH:%.*]], %[[OUTER_LATCH]] ], [ 42, %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[SUM_NEW_INIT:%.*]] = add i32 [[SUM_OUTER]], 1 +; CHECK-NEXT: br i1 [[GUARD]], label %[[INNER_PREHEADER:.*]], label %[[OUTER_LATCH]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[SUM_INNER:%.*]] = phi i32 [ [[SUM_NEW_INIT]], %[[INNER_PREHEADER]] ], [ [[SUM_NEXT:%.*]], %[[INNER_LATCH]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: [[SUM_NEXT]] = add i32 [[SUM_INNER]], [[INC]] +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i32 [ [[SUM_NEXT]], %[[INNER_LATCH]] ] +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[SUM_LATCH]] = phi i32 [ [[SUM_NEW_INIT]], %[[OUTER_HEADER]] ], [ [[SUM_NEXT_LCSSA]], %[[OUTER_LATCH_LOOPEXIT]] ] +; CHECK-NEXT: [[IV_M_NEXT]] = sub nsw i32 [[IV_M]], 1 +; CHECK-NEXT: [[OUTER_COND2:%.*]] = icmp sgt i32 [[IV_M_NEXT]], 0 +; CHECK-NEXT: br i1 [[OUTER_COND2]], label %[[OUTER_HEADER]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: [[SUM_OUTER_LCSSA:%.*]] = phi i32 [ [[SUM_OUTER]], %[[OUTER_LATCH]] ] +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SUM_EXIT:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[SUM_OUTER_LCSSA]], %[[EXIT_LOOPEXIT]] ] +; CHECK-NEXT: ret i32 [[SUM_EXIT]] +; +entry: + %outer.cond = icmp sgt i32 %M, 0 + %guard = icmp sgt i32 %N, 0 + br i1 %outer.cond, label %outer.header, label %exit + +outer.header: + %iv.M = phi i32 [ %M, %entry ], [ %iv.M.next, %outer.latch ] + ; 1. Initialized before the outer loop (starts at 42 from entry) + %sum.outer = phi i32 [ 42, %entry ], [ %sum.latch, %outer.latch ] + + ; 2. Updated in the outer loop header to a new initial value + %sum.new_init = add i32 %sum.outer, 1 + br i1 %guard, label %inner.preheader, label %outer.latch + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %sum.inner = phi i32 [ %sum.new_init, %inner.preheader ], [ %sum.next, %inner.latch ] + + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + + ; 3. Calculated in the inner loop + %sum.next = add i32 %sum.inner, %inc + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + ; Merging the bypassed inner loop value with the executed inner loop reduction + %sum.latch = phi i32 [ %sum.new_init, %outer.header], [ %sum.next, %inner.latch ] + %iv.M.next = sub nsw i32 %iv.M, 1 + %outer.cond2 = icmp sgt i32 %iv.M.next, 0 + br i1 %outer.cond2, label %outer.header, label %exit + +exit: + ; 4. Used in a phi node in the outer loop exit block (LCSSA form) + %sum.exit = phi i32 [%sum.outer, %outer.latch], [0, %entry] + ret i32 %sum.exit +} + +;; A positive test that includes a phi in the exit block of the outer loop +;; +;; Source: +;; int f(int M, int N, int *A, int *B) { +;; int sum = 42; +;; while (M > 0) { +;; sum = 10; +;; if (N > 0) { +;; for (int i = 0; i < N; i++) { +;; A[i] = B[i] + 1; +;; } +;; } +;; B[0] = 0; +;; M--; +;; } +;; return sum; +;; } +;; +define i32 @exit_phi2(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define i32 @exit_phi2( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[OUTER_COND:%.*]] = icmp sgt i32 [[M]], 0 +; CHECK-NEXT: [[GUARD:%.*]] = icmp sgt i32 [[N]], 0 +; CHECK-NEXT: br i1 [[OUTER_COND]], label %[[OUTER_HEADER_PREHEADER:.*]], label %[[EXIT:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER]]: +; CHECK-NEXT: br i1 [[GUARD]], label %[[OUTER_HEADER_PREHEADER_SPLIT:.*]], label %[[EXIT_LOOPEXIT_SPLIT:.*]] +; CHECK: [[OUTER_HEADER_PREHEADER_SPLIT]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[IV_M:%.*]] = phi i32 [ [[IV_M_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ [[M]], %[[OUTER_HEADER_PREHEADER_SPLIT]] ] +; CHECK-NEXT: br label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[IV_M_NEXT]] = sub nsw i32 [[IV_M]], 1 +; CHECK-NEXT: [[OUTER_COND2:%.*]] = icmp sgt i32 [[IV_M_NEXT]], 0 +; CHECK-NEXT: br i1 [[OUTER_COND2]], label %[[OUTER_HEADER]], label %[[EXIT_LOOPEXIT:.*]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_SPLIT]] +; CHECK: [[EXIT_LOOPEXIT_SPLIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: [[SUM_EXIT:%.*]] = phi i32 [ 42, %[[ENTRY]] ], [ 10, %[[EXIT_LOOPEXIT_SPLIT]] ] +; CHECK-NEXT: ret i32 [[SUM_EXIT]] +; +entry: + %outer.cond = icmp sgt i32 %M, 0 + %guard = icmp sgt i32 %N, 0 + br i1 %outer.cond, label %outer.header, label %exit + +outer.header: + %iv.M = phi i32 [ %M, %entry ], [ %iv.M.next, %outer.latch ] + br i1 %guard, label %inner.preheader, label %outer.latch + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %outer.latch, label %inner.header + +outer.latch: + %iv.M.next = sub nsw i32 %iv.M, 1 + %outer.cond2 = icmp sgt i32 %iv.M.next, 0 + br i1 %outer.cond2, label %outer.header, label %exit + +exit: + %sum.exit = phi i32 [10, %outer.latch], [42, %entry] + ret i32 %sum.exit +} + + +;; A negative test in which we have two inner loops both guarded with different +;; guard conditions. The first guard doesn't branch to loop latch so this cannot +;; be unswitched. Unswitching either of the branches will be non-trivial +;; and requires loop versioning +;; +;; Source: +;; void f(int M, int N, int N2, int *A, int *B) { +;; for (int j = 0; j < M; j++) { +;; if (N > 0) { // invariant guard +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; +;; if (N2 > 0) { // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +;; } + +define void @multiple_inner_loops(i32 %M, i32 %N, i32 %N2, ptr %A, ptr %B) { +; CHECK-LABEL: define void @multiple_inner_loops( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], i32 [[N2:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP_M:%.*]] = icmp sle i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP_M]], label %[[EXIT:.*]], label %[[OUTER_PREHEADER:.*]] +; CHECK: [[OUTER_PREHEADER]]: +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: [[GUARD2:%.*]] = icmp sle i32 [[N2]], 0 +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[OUTER_PREHEADER]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: br i1 [[GUARD]], label %[[INNER2_GUARD:.*]], label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[INNER2_GUARD_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[INNER2_GUARD_LOOPEXIT]]: +; CHECK-NEXT: br label %[[INNER2_GUARD]] +; CHECK: [[INNER2_GUARD]]: +; CHECK-NEXT: br i1 [[GUARD2]], label %[[OUTER_LATCH]], label %[[INNER2_PREHEADER:.*]] +; CHECK: [[INNER2_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER2_HEADER:.*]] +; CHECK: [[INNER2_HEADER]]: +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ 0, %[[INNER2_PREHEADER]] ], [ [[I_NEXT2:%.*]], %[[INNER2_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I2]] +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[GEP_B2]], align 4 +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[VAL2]], 1 +; CHECK-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I2]] +; CHECK-NEXT: store i32 [[INC2]], ptr [[GEP_A2]], align 4 +; CHECK-NEXT: br label %[[INNER2_LATCH]] +; CHECK: [[INNER2_LATCH]]: +; CHECK-NEXT: [[I_NEXT2]] = add nuw i32 [[I2]], 1 +; CHECK-NEXT: [[EXITCOND_INNER2:%.*]] = icmp eq i32 [[I_NEXT2]], [[N2]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER2]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER2_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[J_NEXT]] = add nuw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND_OUTER:%.*]] = icmp eq i32 [[J_NEXT]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label %[[EXIT_LOOPEXIT:.*]], label %[[OUTER_HEADER]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cmp.M = icmp sle i32 %M, 0 + br i1 %cmp.M, label %exit, label %outer.preheader + +outer.preheader: + %guard = icmp sle i32 %N, 0 + %guard2 = icmp sle i32 %N2, 0 + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %outer.preheader ], [ %j.next, %outer.latch ] + br i1 %guard, label %inner2.guard, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %inner2.guard, label %inner.header + +inner2.guard: + br i1 %guard2, label %outer.latch, label %inner2.preheader + +inner2.preheader: + br label %inner2.header + +inner2.header: + %i2 = phi i32 [ 0, %inner2.preheader ], [ %i.next2, %inner2.latch ] + %gep.B2 = getelementptr inbounds i32, ptr %B, i32 %i2 + %val2 = load i32, ptr %gep.B2, align 4 + %inc2 = add i32 %val2, 1 + %gep.A2 = getelementptr inbounds i32, ptr %A, i32 %i2 + store i32 %inc2, ptr %gep.A2, align 4 + br label %inner2.latch + +inner2.latch: + %i.next2 = add nuw i32 %i2, 1 + %exitcond.inner2 = icmp eq i32 %i.next2, %N2 + br i1 %exitcond.inner2, label %outer.latch, label %inner2.header + +outer.latch: + %j.next = add nuw i32 %j, 1 + %exitcond.outer = icmp eq i32 %j.next, %M + br i1 %exitcond.outer, label %exit, label %outer.header + +exit: + ret void +} + +;; A negative test in which we have two inner loops both guarded but the guards +;; have the same conditions. The first guard doesn't branch to loop latch so +;; this cannot be unswitched. If the control flow is optimized before the loop +;; unswitching, and the second branch is eliminated then this will be a case of +;; trivial unswitching. +;; +;; Source: +;; void f(int M, int N, int *A, int *B) { +;; for (int j = 0; j < M; j++) { +;; if (N > 0) { // invariant guard +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; +;; if (N > 0) { // invariant guard branches to latch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +;; } + +define void @multiple_inner_loops2(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @multiple_inner_loops2( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP_M:%.*]] = icmp sle i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP_M]], label %[[EXIT:.*]], label %[[OUTER_PREHEADER:.*]] +; CHECK: [[OUTER_PREHEADER]]: +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[OUTER_PREHEADER]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: br i1 [[GUARD]], label %[[INNER2_GUARD:.*]], label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[INNER2_GUARD_LOOPEXIT:.*]], label %[[INNER_HEADER]] +; CHECK: [[INNER2_GUARD_LOOPEXIT]]: +; CHECK-NEXT: br label %[[INNER2_GUARD]] +; CHECK: [[INNER2_GUARD]]: +; CHECK-NEXT: br i1 [[GUARD]], label %[[OUTER_LATCH]], label %[[INNER2_PREHEADER:.*]] +; CHECK: [[INNER2_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER2_HEADER:.*]] +; CHECK: [[INNER2_HEADER]]: +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ 0, %[[INNER2_PREHEADER]] ], [ [[I_NEXT2:%.*]], %[[INNER2_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I2]] +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[GEP_B2]], align 4 +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[VAL2]], 1 +; CHECK-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I2]] +; CHECK-NEXT: store i32 [[INC2]], ptr [[GEP_A2]], align 4 +; CHECK-NEXT: br label %[[INNER2_LATCH]] +; CHECK: [[INNER2_LATCH]]: +; CHECK-NEXT: [[I_NEXT2]] = add nuw i32 [[I2]], 1 +; CHECK-NEXT: [[EXITCOND_INNER2:%.*]] = icmp eq i32 [[I_NEXT2]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER2]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER2_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[J_NEXT]] = add nuw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND_OUTER:%.*]] = icmp eq i32 [[J_NEXT]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label %[[EXIT_LOOPEXIT:.*]], label %[[OUTER_HEADER]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cmp.M = icmp sle i32 %M, 0 + br i1 %cmp.M, label %exit, label %outer.preheader + +outer.preheader: + %guard = icmp sle i32 %N, 0 + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %outer.preheader ], [ %j.next, %outer.latch ] + br i1 %guard, label %inner2.guard, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %inner2.guard, label %inner.header + +inner2.guard: + br i1 %guard, label %outer.latch, label %inner2.preheader + +inner2.preheader: + br label %inner2.header + +inner2.header: + %i2 = phi i32 [ 0, %inner2.preheader ], [ %i.next2, %inner2.latch ] + %gep.B2 = getelementptr inbounds i32, ptr %B, i32 %i2 + %val2 = load i32, ptr %gep.B2, align 4 + %inc2 = add i32 %val2, 1 + %gep.A2 = getelementptr inbounds i32, ptr %A, i32 %i2 + store i32 %inc2, ptr %gep.A2, align 4 + br label %inner2.latch + +inner2.latch: + %i.next2 = add nuw i32 %i2, 1 + %exitcond.inner2 = icmp eq i32 %i.next2, %N + br i1 %exitcond.inner2, label %outer.latch, label %inner2.header + +outer.latch: + %j.next = add nuw i32 %j, 1 + %exitcond.outer = icmp eq i32 %j.next, %M + br i1 %exitcond.outer, label %exit, label %outer.header + +exit: + ret void +} + +;; This is modified from the previous test, @multiple_inner_loops2. Here +;; the second branch is optimzied away. The first branch is technically not a +;; loop guard anymore, but still this is an invariant branch and both loops +;; are control flow dependent on it. This is a case of trivial unswitching again. +;; +;; Source: +;; void f(int M, int N, int *A, int *B) { +;; for (int j = 0; j < M; j++) { +;; if (N > 0) { // invariant branch +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; +;; for (int i = 0; i < N; i++) +;; A[i] = B[i] + 1; +;; } +;; } +;; } + +define void @multiple_inner_loops3(i32 %M, i32 %N, ptr %A, ptr %B) { +; CHECK-LABEL: define void @multiple_inner_loops3( +; CHECK-SAME: i32 [[M:%.*]], i32 [[N:%.*]], ptr [[A:%.*]], ptr [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP_M:%.*]] = icmp sle i32 [[M]], 0 +; CHECK-NEXT: br i1 [[CMP_M]], label %[[EXIT:.*]], label %[[OUTER_PREHEADER:.*]] +; CHECK: [[OUTER_PREHEADER]]: +; CHECK-NEXT: [[GUARD:%.*]] = icmp sle i32 [[N]], 0 +; CHECK-NEXT: br i1 [[GUARD]], label %[[EXIT_LOOPEXIT_SPLIT:.*]], label %[[OUTER_PREHEADER_SPLIT:.*]] +; CHECK: [[OUTER_PREHEADER_SPLIT]]: +; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: [[J:%.*]] = phi i32 [ 0, %[[OUTER_PREHEADER_SPLIT]] ], [ [[J_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: br label %[[INNER_PREHEADER:.*]] +; CHECK: [[INNER_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER_HEADER:.*]] +; CHECK: [[INNER_HEADER]]: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 0, %[[INNER_PREHEADER]] ], [ [[I_NEXT:%.*]], %[[INNER_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[GEP_B]], align 4 +; CHECK-NEXT: [[INC:%.*]] = add i32 [[VAL]], 1 +; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I]] +; CHECK-NEXT: store i32 [[INC]], ptr [[GEP_A]], align 4 +; CHECK-NEXT: br label %[[INNER_LATCH]] +; CHECK: [[INNER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add nuw i32 [[I]], 1 +; CHECK-NEXT: [[EXITCOND_INNER:%.*]] = icmp eq i32 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER]], label %[[INNER2_PREHEADER:.*]], label %[[INNER_HEADER]] +; CHECK: [[INNER2_PREHEADER]]: +; CHECK-NEXT: br label %[[INNER2_HEADER:.*]] +; CHECK: [[INNER2_HEADER]]: +; CHECK-NEXT: [[I2:%.*]] = phi i32 [ 0, %[[INNER2_PREHEADER]] ], [ [[I_NEXT2:%.*]], %[[INNER2_LATCH:.*]] ] +; CHECK-NEXT: [[GEP_B2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[I2]] +; CHECK-NEXT: [[VAL2:%.*]] = load i32, ptr [[GEP_B2]], align 4 +; CHECK-NEXT: [[INC2:%.*]] = add i32 [[VAL2]], 1 +; CHECK-NEXT: [[GEP_A2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[I2]] +; CHECK-NEXT: store i32 [[INC2]], ptr [[GEP_A2]], align 4 +; CHECK-NEXT: br label %[[INNER2_LATCH]] +; CHECK: [[INNER2_LATCH]]: +; CHECK-NEXT: [[I_NEXT2]] = add nuw i32 [[I2]], 1 +; CHECK-NEXT: [[EXITCOND_INNER2:%.*]] = icmp eq i32 [[I_NEXT2]], [[N]] +; CHECK-NEXT: br i1 [[EXITCOND_INNER2]], label %[[OUTER_LATCH_LOOPEXIT:.*]], label %[[INNER2_HEADER]] +; CHECK: [[OUTER_LATCH_LOOPEXIT]]: +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[J_NEXT]] = add nuw i32 [[J]], 1 +; CHECK-NEXT: [[EXITCOND_OUTER:%.*]] = icmp eq i32 [[J_NEXT]], [[M]] +; CHECK-NEXT: br i1 [[EXITCOND_OUTER]], label %[[EXIT_LOOPEXIT:.*]], label %[[OUTER_HEADER]] +; CHECK: [[EXIT_LOOPEXIT]]: +; CHECK-NEXT: br label %[[EXIT_LOOPEXIT_SPLIT]] +; CHECK: [[EXIT_LOOPEXIT_SPLIT]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + %cmp.M = icmp sle i32 %M, 0 + br i1 %cmp.M, label %exit, label %outer.preheader + +outer.preheader: + %guard = icmp sle i32 %N, 0 + br label %outer.header + +outer.header: + %j = phi i32 [ 0, %outer.preheader ], [ %j.next, %outer.latch ] + br i1 %guard, label %outer.latch, label %inner.preheader + +inner.preheader: + br label %inner.header + +inner.header: + %i = phi i32 [ 0, %inner.preheader ], [ %i.next, %inner.latch ] + %gep.B = getelementptr inbounds i32, ptr %B, i32 %i + %val = load i32, ptr %gep.B, align 4 + %inc = add i32 %val, 1 + %gep.A = getelementptr inbounds i32, ptr %A, i32 %i + store i32 %inc, ptr %gep.A, align 4 + br label %inner.latch + +inner.latch: + %i.next = add nuw i32 %i, 1 + %exitcond.inner = icmp eq i32 %i.next, %N + br i1 %exitcond.inner, label %inner2.preheader, label %inner.header + +inner2.preheader: + br label %inner2.header + +inner2.header: + %i2 = phi i32 [ 0, %inner2.preheader ], [ %i.next2, %inner2.latch ] + %gep.B2 = getelementptr inbounds i32, ptr %B, i32 %i2 + %val2 = load i32, ptr %gep.B2, align 4 + %inc2 = add i32 %val2, 1 + %gep.A2 = getelementptr inbounds i32, ptr %A, i32 %i2 + store i32 %inc2, ptr %gep.A2, align 4 + br label %inner2.latch + +inner2.latch: + %i.next2 = add nuw i32 %i2, 1 + %exitcond.inner2 = icmp eq i32 %i.next2, %N + br i1 %exitcond.inner2, label %outer.latch, label %inner2.header + +outer.latch: + %j.next = add nuw i32 %j, 1 + %exitcond.outer = icmp eq i32 %j.next, %M + br i1 %exitcond.outer, label %exit, label %outer.header + +exit: + ret void +} + diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/update-dt.ll b/llvm/test/Transforms/SimpleLoopUnswitch/update-dt.ll new file mode 100644 index 0000000000000..a9e1a77835eed --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/update-dt.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -passes='loop-mssa(simple-loop-unswitch)' -verify-memoryssa -disable-output + +; Ensure that MemorySSA and DomTree are correctly updated after a +; continue-to-break transformation during loop unswitching. +; This addresses the bug that caused PR #193989 to be reverted. + +define dso_local void @a() { +entry: + %c = alloca [4 x i32], align 4 + br label %for.cond + +for.cond: ; preds = %for.end5, %entry + call void @llvm.lifetime.start.p0(ptr nonnull %c) + %cmp3 = icmp ult ptr inttoptr (i64 2 to ptr), @a + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond, %for.inc + %b.08 = phi i32 [ 0, %for.cond ], [ %inc, %for.inc ] + br i1 %cmp3, label %for.body4.lr.ph, label %for.inc + +for.body4.lr.ph: ; preds = %for.cond2.preheader + %idxprom = zext nneg i32 %b.08 to i64 + %arrayidx = getelementptr inbounds nuw [4 x i8], ptr %c, i64 %idxprom + %arrayidx.promoted = load i32, ptr %arrayidx, align 4 + br i1 %cmp3, label %for.body4.lr.ph.split.us, label %for.body4.lr.ph.split + +for.body4.lr.ph.split.us: ; preds = %for.body4.lr.ph + %arrayidx.promoted.lcssa = phi i32 [ %arrayidx.promoted, %for.body4.lr.ph ] + br label %for.body4.us + +for.body4.us: ; preds = %for.body4.us, %for.body4.lr.ph.split.us + %0 = phi i32 [ %arrayidx.promoted.lcssa, %for.body4.lr.ph.split.us ], [ %add.us, %for.body4.us ] + %add.us = add nsw i32 %0, 1 + br label %for.body4.us + +for.body4.lr.ph.split: ; preds = %for.body4.lr.ph + %add = add nsw i32 %arrayidx.promoted, 1 + store i32 %add, ptr %arrayidx, align 4 + br label %for.inc + +for.inc: ; preds = %for.body4.lr.ph.split, %for.cond2.preheader + %inc = add nuw nsw i32 %b.08, 1 + %cmp = icmp samesign ult i32 %inc, 4 + br i1 %cmp, label %for.cond2.preheader, label %for.end5 + +for.end5: ; preds = %for.inc + call void @llvm.lifetime.end.p0(ptr nonnull %c) + br label %for.cond +} + + +declare void @llvm.lifetime.start.p0(ptr captures(none)) diff --git a/polly/test/Support/pipelineposition.ll b/polly/test/Support/pipelineposition.ll index 1ddfb5879ce16..8673657a7b0a8 100644 --- a/polly/test/Support/pipelineposition.ll +++ b/polly/test/Support/pipelineposition.ll @@ -79,4 +79,4 @@ return: ; INLINED3-LABEL: Function: caller ; INLINED3: Schedule := -; INLINED3-NEXT: [n] -> { Stmt_body_i_us[i0, i1] -> [i0, i1] }; +; INLINED3-NEXT: [n] -> { Stmt_body_i[i0, i1] -> [i0, i1] }; From 7e5efe80cf5635db4dc31b8c4fa0a9800639e38b Mon Sep 17 00:00:00 2001 From: Nicholas Bishop Date: Tue, 23 Jun 2026 13:38:37 -0400 Subject: [PATCH 220/511] Add no-arg constructor and EraseIf method to MultiplexExternalSemaSource (#204458) The no-arg constructor makes MultiplexExternalSemaSource usable in situations where two child sources are not immediately available, or where conditional logic makes it easier to call AddSource rather than immediately pass child sources to the constructor. The EraseIf method allows sources to be removed later if they are no longer needed. Co-authored-by: Nicholas Bishop --- .../clang/Sema/MultiplexExternalSemaSource.h | 13 +++++++++++++ clang/lib/Sema/MultiplexExternalSemaSource.cpp | 3 +++ 2 files changed, 16 insertions(+) diff --git a/clang/include/clang/Sema/MultiplexExternalSemaSource.h b/clang/include/clang/Sema/MultiplexExternalSemaSource.h index 12015724b39f4..1395470401b5d 100644 --- a/clang/include/clang/Sema/MultiplexExternalSemaSource.h +++ b/clang/include/clang/Sema/MultiplexExternalSemaSource.h @@ -14,6 +14,7 @@ #include "clang/Sema/ExternalSemaSource.h" #include "clang/Sema/Weak.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include @@ -43,6 +44,9 @@ class MultiplexExternalSemaSource : public ExternalSemaSource { SmallVector, 2> Sources; public: + /// Constructs an empty multiplexing external sema source. + MultiplexExternalSemaSource(); + /// Constructs a new multiplexing external sema source and appends the /// given element to it. /// @@ -58,6 +62,15 @@ class MultiplexExternalSemaSource : public ExternalSemaSource { /// void AddSource(llvm::IntrusiveRefCntPtr Source); + /// Remove all sources for which the predicate returns true. + /// + /// \param P - A predicate that takes an + /// IntrusiveRefCntPtr param and returns true if + /// the source should be removed, false otherwise. + template void EraseIf(UnaryPredicate P) { + llvm::erase_if(Sources, P); + } + //===--------------------------------------------------------------------===// // ExternalASTSource. //===--------------------------------------------------------------------===// diff --git a/clang/lib/Sema/MultiplexExternalSemaSource.cpp b/clang/lib/Sema/MultiplexExternalSemaSource.cpp index be9582ce501fe..4306143025c80 100644 --- a/clang/lib/Sema/MultiplexExternalSemaSource.cpp +++ b/clang/lib/Sema/MultiplexExternalSemaSource.cpp @@ -16,6 +16,9 @@ using namespace clang; char MultiplexExternalSemaSource::ID; +/// Constructs an empty multiplexing external sema source. +MultiplexExternalSemaSource::MultiplexExternalSemaSource() {} + /// Constructs a new multiplexing external sema source and appends the /// given element to it. /// From 98aa0ee77578fe80a3f99a2b5008067bb18a9bb3 Mon Sep 17 00:00:00 2001 From: AZero13 Date: Tue, 23 Jun 2026 13:39:29 -0400 Subject: [PATCH 221/511] [InstCombine] Fold (bw-1) - ctlz(x&-x) to cttz (#205383) Works for non-powers of 2. Alive2: https://alive2.llvm.org/ce/z/-cbxFz --- .../InstCombine/InstCombineSelect.cpp | 12 +++++--- .../InstCombine/select-ctlz-to-cttz.ll | 28 +++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 78be7c6ea2900..af10ac712886a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1504,17 +1504,21 @@ static Instruction *foldSelectCtlzToCttz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy &Builder) { unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits(); - if (!isPowerOf2_32(BitWidth) || !ICI->isEquality() || - !match(ICI->getOperand(1), m_Zero())) + if (!ICI->isEquality() || !match(ICI->getOperand(1), m_Zero())) return nullptr; if (ICI->getPredicate() == ICmpInst::ICMP_NE) std::swap(TrueVal, FalseVal); Value *Ctlz; - if (!match(FalseVal, - m_Xor(m_Value(Ctlz), m_SpecificInt(BitWidth - 1)))) + if (match(FalseVal, + m_Xor(m_Value(Ctlz), m_SpecificIntAllowPoison(BitWidth - 1)))) { + if (!isPowerOf2_32(BitWidth)) + return nullptr; + } else if (!match(FalseVal, m_Sub(m_SpecificIntAllowPoison(BitWidth - 1), + m_Value(Ctlz)))) { return nullptr; + } if (!match(Ctlz, m_Ctlz(m_Value(), m_Value()))) return nullptr; diff --git a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll index d1f25b19d35a0..9b85008bbcac6 100644 --- a/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll +++ b/llvm/test/Transforms/InstCombine/select-ctlz-to-cttz.ll @@ -340,3 +340,31 @@ define i4 @PR45762_logical(i3 %x4) { %sel_71 = select i1 %t12, i4 %one_hot_16, i4 %umul_23 ret i4 %sel_71 } + +define i32 @select_clz_to_ctz_sub_constant_for_zero(i32 %a) { +; CHECK-LABEL: @select_clz_to_ctz_sub_constant_for_zero( +; CHECK-NEXT: [[COND:%.*]] = call range(i32 0, 33) i32 @llvm.cttz.i32(i32 [[A:%.*]], i1 false) +; CHECK-NEXT: ret i32 [[COND]] +; + %sub = sub i32 0, %a + %and = and i32 %a, %sub + %lz = tail call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 %and, i1 false) + %tobool = icmp eq i32 %a, 0 + %sub1 = sub nsw i32 31, %lz + %cond = select i1 %tobool, i32 32, i32 %sub1 + ret i32 %cond +} + +define i33 @select_clz_to_ctz_sub_constant_for_zero_33(i33 %a) { +; CHECK-LABEL: @select_clz_to_ctz_sub_constant_for_zero_33( +; CHECK-NEXT: [[COND:%.*]] = call range(i33 0, 34) i33 @llvm.cttz.i33(i33 [[A:%.*]], i1 false) +; CHECK-NEXT: ret i33 [[COND]] +; + %sub = sub i33 0, %a + %and = and i33 %a, %sub + %lz = tail call range(i33 0, 34) i33 @llvm.ctlz.i33(i33 %and, i1 false) + %tobool = icmp eq i33 %a, 0 + %sub1 = sub nsw i33 32, %lz + %cond = select i1 %tobool, i33 33, i33 %sub1 + ret i33 %cond +} From 90bfc339e6cbf5466ed705a6c25278613268bc6b Mon Sep 17 00:00:00 2001 From: adams381 Date: Tue, 23 Jun 2026 12:47:22 -0500 Subject: [PATCH 222/511] [CIR] Handle non-zero-initializable types in emitNullInitialization (#201654) Value-initializing an aggregate containing a pointer-to-data-member (e.g. `new Inner()` where `Inner` has an `int Inner::*` field) crashed with "type is not zero initializable" because `emitNullInitialization` unconditionally called `errorNYI` for types where `isZeroInitializable` returns false. Member pointers use -1 as the null sentinel, so a plain zero store is incorrect. Replace the `errorNYI` with `emitNullConstant`, which already builds the correct per-field pattern (-1 for member-pointer fields, zero elsewhere), and store the result. Types with virtual bases are still guarded with `errorNYI` since `emitNullConstant` does not yet handle them. --- clang/lib/CIR/CodeGen/CIRGenFunction.cpp | 10 +++- .../CIR/CodeGen/member-pointer-null-init.cpp | 47 +++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/member-pointer-null-init.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index 4b020c96964a7..6606cf74c7dea 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -1385,7 +1385,15 @@ void CIRGenFunction::emitNullInitialization(mlir::Location loc, Address destPtr, // TODO: there are other patterns besides zero that we can usefully memset, // like -1, which happens to be the pattern used by member-pointers. if (!cgm.getTypes().isZeroInitializable(ty)) { - cgm.errorNYI(loc, "type is not zero initializable"); + // Only the pointer-to-data-member case is tested here; emitNullConstant + // owns the NYIs for shapes it cannot build (virtual bases, non-zero-init + // arrays). + assert((ty->isMemberDataPointerType() || ty->isRecordType()) && + "emitNullInitialization: only pointer-to-data-member (directly or " + "within a record) null initialization is implemented"); + mlir::Value nullVal = cgm.emitNullConstant(ty, loc); + builder.createStore(loc, nullVal, destPtr); + return; } // In LLVM Codegen: otherwise, just memset the whole thing to zero using diff --git a/clang/test/CIR/CodeGen/member-pointer-null-init.cpp b/clang/test/CIR/CodeGen/member-pointer-null-init.cpp new file mode 100644 index 0000000000000..74ee344c6f407 --- /dev/null +++ b/clang/test/CIR/CodeGen/member-pointer-null-init.cpp @@ -0,0 +1,47 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM,LLVMCIR --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -std=c++17 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM,OGCG --input-file=%t.ll %s + +struct Inner { + int Inner::*p; +}; + +struct Outer { + Inner a; + int b; +}; + +// Value-init of a heap-allocated struct containing a pointer-to-data-member. +// The member pointer is null (-1), so the stored constant must carry -1. + +// CIR-LABEL: cir.func {{.*}}@_Z8make_newv +// CIR: [[NULL:%.*]] = cir.const #cir.const_record<{#cir.int<-1> : !s64i}> : !rec_Inner +// CIR: cir.store align(8) [[NULL]], {{%.*}} : !rec_Inner, !cir.ptr + +// LLVMCIR-LABEL: define {{.*}} ptr @_Z8make_newv +// LLVMCIR: call {{.*}} @_Znwm +// LLVMCIR: store %struct.Inner { i64 -1 }, ptr %{{.*}}, align 8 + +// OGCG: @{{.*}} = private constant %struct.Inner { i64 -1 } +// OGCG-LABEL: define {{.*}} ptr @_Z8make_newv +// OGCG: call {{.*}} @llvm.memcpy{{.*}}i64 8 + +Inner *make_new() { return new Inner(); } + +// Partial aggregate init: Inner subobject 'a' is value-initialized because +// it has no designated initializer. + +// CIR-LABEL: cir.func {{.*}}@_Z11runtime_aggi +// CIR: cir.const #cir.int<-1> : !s64i +// CIR: cir.store align(8) {{%.*}}, {{%.*}} : !s64i + +// LLVM-LABEL: define {{.*}} void @_Z11runtime_aggi +// LLVM: store i64 -1, ptr %{{.*}}, align 8 + +void runtime_agg(int x) { + Outer o = {.b = x}; + (void)o; +} From 57088c537e8e7e6d769d8e456639462d22e1fcbb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 23 Jun 2026 10:47:53 -0700 Subject: [PATCH 223/511] [RISCV] Remove isCodeGenOnly=0 from PseudoLA_TLSDESC. (#205366) The default for a Pseudo is isCodeGenOnly=1. With isCodeGenOnly=0 the 'la.tlsdesc' is a valid mnemonic for the assembly parser, but crashes in the encoder. I don't think this was meant to be a valid assembly mnemonic so remove it. --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 6c776c91ae73a..90a3fd7bf3de9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1955,7 +1955,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 8, def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; -let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 32, isCodeGenOnly = 0 in +let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 32 in def PseudoLA_TLSDESC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tlsdesc", "$dst, $src">; From e0a1a6d5747075adc0866d5b61062e581f9ded2c Mon Sep 17 00:00:00 2001 From: Yao Qi Date: Tue, 23 Jun 2026 18:51:28 +0100 Subject: [PATCH 224/511] [lldb][test] Fix TestHiddenIvars rebuild failures in hidden-ivars Makefile (#205114) Two rebuild-state bugs surfaced when running `./bin/lldb-dotest -p TestHiddenIvars.py`: ``` FAILED (errors=6, skipped=7, expected failures=2) ``` 1. `mkdir: stripped: File exists`, mkdir failed on the second build because the directory already existed. Switch to `mkdir -p`. 2. `cp -r a.out.dSYM stripped/a.out.dSYM` recurses *into* the existing destination on rebuild, producing a nested `stripped/a.out.dSYM/ a.out.dSYM/`. The outer dSYM keeps its stale UUID, so lldb cannot match it to the freshly relinked stripped binary (which gets a new UUID per link via -Wl,-random_uuid), causing source breakpoints to fail to resolve. Remove the destination before copying. --- lldb/test/API/lang/objc/hidden-ivars/Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/lang/objc/hidden-ivars/Makefile b/lldb/test/API/lang/objc/hidden-ivars/Makefile index c94c0dee1b9ce..49b88651b115f 100644 --- a/lldb/test/API/lang/objc/hidden-ivars/Makefile +++ b/lldb/test/API/lang/objc/hidden-ivars/Makefile @@ -13,7 +13,7 @@ stripped: a.out.dSYM endif stripped: a.out libInternalDefiner.dylib - mkdir stripped + mkdir -p stripped $(STRIP) -Sx a.out -o stripped/a.out $(STRIP) -Sx libInternalDefiner.dylib -o stripped/libInternalDefiner.dylib ifneq "$(CODESIGN)" "" @@ -23,5 +23,6 @@ ifneq "$(CODESIGN)" "" $(CODESIGN) -fs - stripped/libInternalDefiner.dylib endif ifeq "$(MAKE_DSYM)" "YES" + rm -rf stripped/a.out.dSYM cp -r a.out.dSYM stripped/a.out.dSYM endif From b53b78019ad81d8bf15fb68a56a338f53068891c Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 24 Jun 2026 02:54:52 +0900 Subject: [PATCH 225/511] [clang][SYCL][lit] Disable two LIT tests failing in post-commit (#22401) Both failing, see https://github.com/intel/llvm/issues/22400 and https://github.com/intel/llvm/issues/22392 Signed-off-by: Nick Sarnie --- clang/test/CodeGenSYCL/simple-sycl-virtual-function.cpp | 3 +++ sycl/test/esimd/ctor_codegen.cpp | 3 +++ 2 files changed, 6 insertions(+) diff --git a/clang/test/CodeGenSYCL/simple-sycl-virtual-function.cpp b/clang/test/CodeGenSYCL/simple-sycl-virtual-function.cpp index 857d181c22e0b..8d3b3da220f9e 100644 --- a/clang/test/CodeGenSYCL/simple-sycl-virtual-function.cpp +++ b/clang/test/CodeGenSYCL/simple-sycl-virtual-function.cpp @@ -6,6 +6,9 @@ // RUN: %clang_cc1 -triple spir64 -fsycl-is-device -emit-llvm %s -o - | FileCheck %s --implicit-check-not _ZTI4Base --implicit-check-not _ZTI8Derived1 -check-prefix VTABLE // RUNx: %clang_cc1 -triple spir64 -fsycl-is-device -fexperimental-relative-c++-abi-vtables -emit-llvm %s -o - | FileCheck %s --implicit-check-not _ZTI4Base --implicit-check-not _ZTI8Derived1 +// UNSUPPORTED: true +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22400 + // Since experimental-relative-c++-abi-vtables is some experimental option, temporary disabling the check for now // until we emit proper address spaces (and casts) everywhere. diff --git a/sycl/test/esimd/ctor_codegen.cpp b/sycl/test/esimd/ctor_codegen.cpp index 959ca0cb5ec06..0e51f7ce6f058 100644 --- a/sycl/test/esimd/ctor_codegen.cpp +++ b/sycl/test/esimd/ctor_codegen.cpp @@ -1,5 +1,8 @@ // RUN: %clangxx -fsycl -fsycl-device-only -S %s -o - | FileCheck %s +// UNSUPPORTED: true +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22392 + // Check efficiency of LLVM IR generated for various simd constructors. #include From d7f441b5ff4f527415b57b180b443114f78339ed Mon Sep 17 00:00:00 2001 From: Ethan Luis McDonough Date: Tue, 23 Jun 2026 12:56:20 -0500 Subject: [PATCH 226/511] [Instrumentor] Add comparison instrumentation opportunity (#204069) This patch adds an instrumentation opportunity for comparison instructions (`icmp` and `fcmp`). --- .../llvm/Transforms/IPO/Instrumentor.h | 73 +++++++- .../IPO/InstrumentorRuntimeHelper.h | 9 + llvm/lib/Transforms/IPO/Instrumentor.cpp | 176 ++++++++++++++---- .../Instrumentation/Instrumentor/compare.ll | 112 +++++++++++ .../Instrumentor/compare_config.json | 33 ++++ .../Instrumentor/default_config.json | 49 +++++ .../Instrumentation/Instrumentor/default_rt.h | 9 + 7 files changed, 420 insertions(+), 41 deletions(-) create mode 100644 llvm/test/Instrumentation/Instrumentor/compare.ll create mode 100644 llvm/test/Instrumentation/Instrumentor/compare_config.json diff --git a/llvm/include/llvm/Transforms/IPO/Instrumentor.h b/llvm/include/llvm/Transforms/IPO/Instrumentor.h index b08edfde66fbc..cf2432bd6eebf 100644 --- a/llvm/include/llvm/Transforms/IPO/Instrumentor.h +++ b/llvm/include/llvm/Transforms/IPO/Instrumentor.h @@ -622,6 +622,20 @@ struct InstructionIO : public InstrumentationOpportunity { } }; +/// Common getters use across different instrumentation opportunities. +///{ +LLVM_ABI Value *getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +LLVM_ABI Value *getTypeSize(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +LLVM_ABI Value *getLeft(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +LLVM_ABI Value *getRight(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +LLVM_ABI Value *getTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +///} + /// The instrumentation opportunity for functions. struct FunctionIO final : public InstrumentationOpportunity { FunctionIO(InstrumentationLocation::KindTy Kind) @@ -1171,12 +1185,6 @@ struct NumericIO final InstrumentorIRBuilderTy &IIRB, ConfigTy *UserConfig = nullptr); - LLVM_ABI static Value *getLeft(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); - LLVM_ABI static Value *getRight(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getFlags(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); @@ -1192,6 +1200,59 @@ struct NumericIO final } }; +struct CompareIO final + : public InstructionIO { + CompareIO(InstrumentationLocation::KindTy Kind) : InstructionIO(Kind) {} + + enum ConfigKind { + PassOpTypeId, + PassOpSize, + PassOpcode, + PassPredicate, + PassLeft, + PassRight, + PassResultTypeId, + PassResultSize, + PassResult, + ReplaceResult, + PassFlags, + PassId, + NumConfig, + }; + + using ConfigTy = BaseConfigTy; + ConfigTy Config; + + StringRef getName() const override { return "compare"; } + + LLVM_ABI void init(InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB, + ConfigTy *UserConfig = nullptr); + + LLVM_ABI static Value *getOperandTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getOperandSize(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getPredicate(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getFlags(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + + static void populate(InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto *PreIO = + IConf.allocate(InstrumentationLocation::INSTRUCTION_PRE); + PreIO->init(IConf, IIRB); + auto *PostIO = + IConf.allocate(InstrumentationLocation::INSTRUCTION_POST); + PostIO->init(IConf, IIRB); + } +}; + } // namespace instrumentor /// The Instrumentor pass. diff --git a/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h b/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h index 7f98b6df47421..af58eed0bd301 100644 --- a/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h +++ b/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h @@ -304,4 +304,13 @@ typedef enum NumericFlags { NUMERIC_FLAG_IS_EXACT = 1 << 6, } NumericFlags; +/// CompareIO flag bitmask values. +typedef enum CompareFlags { + COMPARE_FLAG_NONE = 0, + COMPARE_FLAG_SAMESIGN = 1 << 0, + COMPARE_FLAG_HAS_NO_NANS = 1 << 1, + COMPARE_FLAG_HAS_NO_INFS = 1 << 2, + COMPARE_FLAG_HAS_NO_SIGNED_ZEROS = 1 << 3, +} CompareFlags; + #endif // INSTRUMENTOR_RUNTIME_H diff --git a/llvm/lib/Transforms/IPO/Instrumentor.cpp b/llvm/lib/Transforms/IPO/Instrumentor.cpp index 36d44500f40ca..352d223751d07 100644 --- a/llvm/lib/Transforms/IPO/Instrumentor.cpp +++ b/llvm/lib/Transforms/IPO/Instrumentor.cpp @@ -109,7 +109,7 @@ Value *tryToCast(IRBTy &IRB, Value *V, Type *Ty, const DataLayout &DL, Type *VTy = V->getType(); if (VTy == Ty) return V; - if (VTy->isAggregateType()) + if (VTy->isAggregateType() || VTy->isVectorTy()) return V; TypeSize RequestedSize = DL.getTypeSizeInBits(Ty); TypeSize ValueSize = DL.getTypeSizeInBits(VTy); @@ -557,6 +557,7 @@ void InstrumentationConfig::populate(InstrumentorIRBuilderTy &IIRB) { StoreIO::populate(*this, IIRB); CastIO::populate(*this, IIRB); NumericIO::populate(*this, IIRB); + CompareIO::populate(*this, IIRB); } void InstrumentationConfig::addChoice(InstrumentationOpportunity &IO, @@ -737,6 +738,7 @@ CallInst *IRTCallDescription::createLLVMCall(Value *&V, if (Param->getType()->isVoidTy()) { Param = Constant::getNullValue(It.Ty); } else if (Param->getType()->isAggregateType() || + Param->getType()->isVectorTy() || DL.getTypeSizeInBits(Param->getType()) > DL.getTypeSizeInBits(It.Ty)) { if (!isPotentiallyIndirect(It)) { @@ -903,6 +905,44 @@ static void readValuePack(const Range &R, Value &Pack, } } +Value *llvm::instrumentor::getOpcode(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + return getCI(&Ty, I.getOpcode()); +} + +Value *llvm::instrumentor::getTypeSize(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + auto &DL = I.getDataLayout(); + return getCI(&Ty, DL.getTypeStoreSize(V.getType())); +} + +Value *llvm::instrumentor::getLeft(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + return I.getOperand(0); +} + +Value *llvm::instrumentor::getRight(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + if (I.getNumOperands() > 1) + return I.getOperand(1); + else + return PoisonValue::get(&Ty); +} + +Value *llvm::instrumentor::getTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + return getCI(&Ty, V.getType()->getTypeID()); +} + /// FunctionIO /// { void FunctionIO::init(InstrumentationConfig &IConf, @@ -1001,24 +1041,6 @@ Value *FunctionIO::isMainFunction(Value &V, Type &Ty, return getCI(&Ty, Fn.getName() == "main"); } -static Value *getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { - auto &I = cast(V); - return getCI(&Ty, I.getOpcode()); -} - -static Value *getTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { - return getCI(&Ty, V.getType()->getTypeID()); -} - -static Value *getSize(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { - auto &I = cast(V); - auto &DL = I.getDataLayout(); - return getCI(&Ty, DL.getTypeStoreSize(V.getType())); -} - /// UnreachableIO ///{ void UnreachableIO::init(InstrumentationConfig &IConf, @@ -1691,21 +1713,6 @@ Value *CastIO::getResultSize(Value &V, Type &Ty, InstrumentationConfig &IConf, } ///} -Value *NumericIO::getLeft(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { - auto &I = cast(V); - return I.getOperand(0); -} - -Value *NumericIO::getRight(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { - auto &I = cast(V); - if (I.getNumOperands() > 1) - return I.getOperand(1); - else - return PoisonValue::get(&Ty); -} - Value *NumericIO::getFlags(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB) { auto &I = cast(V); @@ -1763,7 +1770,7 @@ void NumericIO::init(InstrumentationConfig &IConf, getTypeId)); if (Config.has(PassSize)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "size", "The operation's type size.", - IRTArg::NONE, getSize)); + IRTArg::NONE, getTypeSize)); if (Config.has(PassOpcode)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "opcode", "The instruction opcode.", IRTArg::NONE, getOpcode)); @@ -1789,3 +1796,102 @@ void NumericIO::init(InstrumentationConfig &IConf, addCommonArgs(IConf, IIRB.Ctx, Config.has(PassId)); IConf.addChoice(*this, IIRB.Ctx); } + +Value *CompareIO::getOperandTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + return getCI(&Ty, I.getOperand(0)->getType()->getTypeID()); +} + +Value *CompareIO::getOperandSize(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + auto &DL = I.getDataLayout(); + return getCI(&Ty, DL.getTypeStoreSize(I.getOperand(0)->getType())); +} + +Value *CompareIO::getPredicate(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto *CI = dyn_cast(&V); + return getCI(&Ty, CI->getPredicate()); +} + +Value *CompareIO::getFlags(Value &V, Type &Ty, InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &I = cast(V); + uint64_t Flag = NUMERIC_FLAG_NONE; + + switch (I.getOpcode()) { + case Instruction::ICmp: + if (dyn_cast(&V)->hasSameSign()) + Flag |= COMPARE_FLAG_SAMESIGN; + break; + case Instruction::FCmp: + if (I.hasNoNaNs()) + Flag |= COMPARE_FLAG_HAS_NO_NANS; + if (I.hasNoInfs()) + Flag |= COMPARE_FLAG_HAS_NO_INFS; + if (I.hasNoSignedZeros()) + Flag |= COMPARE_FLAG_HAS_NO_SIGNED_ZEROS; + break; + } + + return getCI(&Ty, Flag); +} + +void CompareIO::init(InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB, ConfigTy *UserConfig) { + if (UserConfig) + Config = UserConfig; + bool IsPRE = getLocationKind() == InstrumentationLocation::INSTRUCTION_PRE; + const auto OperandArgOpts = + IRTArg::POTENTIALLY_INDIRECT | + (Config.has(PassOpSize) ? IRTArg::INDIRECT_HAS_SIZE : IRTArg::NONE); + if (Config.has(PassOpTypeId)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "operand_type_id", + "The operand type id.", IRTArg::NONE, + getOperandTypeId)); + if (Config.has(PassOpSize)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "operand_size", + "The operand type size.", IRTArg::NONE, + getOperandSize)); + if (Config.has(PassOpcode)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "opcode", "The instruction opcode.", + IRTArg::NONE, getOpcode)); + if (Config.has(PassPredicate)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "predicate", + "The comparison predicate ID.", IRTArg::NONE, + getPredicate)); + if (Config.has(PassLeft)) + IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "left", + "The comparison's left operand.", OperandArgOpts, + getLeft)); + if (Config.has(PassRight)) + IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "right", + "The comparison's right operand.", OperandArgOpts, + getRight)); + if (!IsPRE && Config.has(PassResultSize)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "result_type_id", + "The result value's type ID.", IRTArg::NONE, + getTypeId)); + if (!IsPRE && Config.has(PassResultSize)) + IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "result_size", + "Size of the result value.", IRTArg::NONE, + getTypeSize)); + if (!IsPRE && Config.has(PassResult)) + IRTArgs.push_back( + IRTArg(IIRB.Int64Ty, "result", "Result of the operation.", + IRTArg::REPLACABLE | IRTArg::POTENTIALLY_INDIRECT | + (Config.has(PassResultSize) ? IRTArg::INDIRECT_HAS_SIZE + : IRTArg::NONE), + getValue, Config.has(ReplaceResult) ? replaceValue : nullptr)); + if (Config.has(PassFlags)) + IRTArgs.push_back( + IRTArg(IIRB.Int64Ty, "flags", + "A bitmask value signaling which instruction flags are present.", + IRTArg::NONE, getFlags)); + addCommonArgs(IConf, IIRB.Ctx, Config.has(PassId)); + IConf.addChoice(*this, IIRB.Ctx); +} diff --git a/llvm/test/Instrumentation/Instrumentor/compare.ll b/llvm/test/Instrumentation/Instrumentor/compare.ll new file mode 100644 index 0000000000000..2eba6da3c6b68 --- /dev/null +++ b/llvm/test/Instrumentation/Instrumentor/compare.ll @@ -0,0 +1,112 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instrumentor -instrumentor-read-config-files=%S/compare_config.json -S | FileCheck %s + +define <128 x i1> @test_ivec_128(<128 x i32> %0, <128 x i32> %1) { +; CHECK-LABEL: define <128 x i1> @test_ivec_128( +; CHECK-SAME: <128 x i32> [[TMP0:%.*]], <128 x i32> [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = alloca <128 x i1>, align 16 +; CHECK-NEXT: [[TMP4:%.*]] = alloca <128 x i32>, align 512 +; CHECK-NEXT: [[TMP5:%.*]] = alloca <128 x i32>, align 512 +; CHECK-NEXT: store <128 x i32> [[TMP0]], ptr [[TMP5]], align 512 +; CHECK-NEXT: store <128 x i32> [[TMP1]], ptr [[TMP4]], align 512 +; CHECK-NEXT: call void @__instrumentor_pre_compare_ind(i32 18, i32 512, i32 55, ptr [[TMP5]], ptr [[TMP4]], i64 0, i32 1) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <128 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <128 x i1> [[TMP6]], ptr [[TMP3]], align 16 +; CHECK-NEXT: call void @__instrumentor_post_compare_ind(i32 18, i32 512, i32 55, ptr [[TMP5]], ptr [[TMP4]], i32 18, i32 16, ptr [[TMP3]], i64 0, i32 -1) #[[ATTR0]] +; CHECK-NEXT: [[TMP7:%.*]] = load <128 x i1>, ptr [[TMP3]], align 16 +; CHECK-NEXT: ret <128 x i1> [[TMP7]] +; + %3 = icmp eq <128 x i32> %0, %1 + ret <128 x i1> %3 +} + +define <32 x i1> @test_ivec_32(<32 x i32> %0, <32 x i32> %1) { +; CHECK-LABEL: define <32 x i1> @test_ivec_32( +; CHECK-SAME: <32 x i32> [[TMP0:%.*]], <32 x i32> [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = alloca <32 x i1>, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = alloca <32 x i32>, align 128 +; CHECK-NEXT: [[TMP5:%.*]] = alloca <32 x i32>, align 128 +; CHECK-NEXT: store <32 x i32> [[TMP0]], ptr [[TMP5]], align 128 +; CHECK-NEXT: store <32 x i32> [[TMP1]], ptr [[TMP4]], align 128 +; CHECK-NEXT: call void @__instrumentor_pre_compare_ind(i32 18, i32 128, i32 55, ptr [[TMP5]], ptr [[TMP4]], i64 0, i32 2) #[[ATTR0]] +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <32 x i32> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <32 x i1> [[TMP6]], ptr [[TMP3]], align 4 +; CHECK-NEXT: call void @__instrumentor_post_compare_ind(i32 18, i32 128, i32 55, ptr [[TMP5]], ptr [[TMP4]], i32 18, i32 4, ptr [[TMP3]], i64 0, i32 -2) #[[ATTR0]] +; CHECK-NEXT: [[TMP7:%.*]] = load <32 x i1>, ptr [[TMP3]], align 4 +; CHECK-NEXT: ret <32 x i1> [[TMP7]] +; + %3 = icmp eq <32 x i32> %0, %1 + ret <32 x i1> %3 +} + +define i1 @test_int_i1(i32 %0, i32 %1) { +; CHECK-LABEL: define i1 @test_int_i1( +; CHECK-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP0]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP1]] to i64 +; CHECK-NEXT: call void @__instrumentor_pre_compare(i32 12, i32 4, i32 55, i64 [[TMP3]], i64 [[TMP4]], i64 0, i32 3) #[[ATTR0]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP6:%.*]] = zext i1 [[TMP5]] to i64 +; CHECK-NEXT: [[TMP7:%.*]] = call i64 @__instrumentor_post_compare(i32 12, i32 4, i32 55, i64 [[TMP3]], i64 [[TMP4]], i32 12, i32 1, i64 [[TMP6]], i64 0, i32 -3) #[[ATTR0]] +; CHECK-NEXT: [[TMP8:%.*]] = trunc i64 [[TMP7]] to i1 +; CHECK-NEXT: ret i1 [[TMP8]] +; + %3 = icmp ult i32 %0, %1 + ret i1 %3 +} + +define <128 x i1> @test_fvec_128(<128 x float> %0, <128 x float> %1) { +; CHECK-LABEL: define <128 x i1> @test_fvec_128( +; CHECK-SAME: <128 x float> [[TMP0:%.*]], <128 x float> [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = alloca <128 x i1>, align 16 +; CHECK-NEXT: [[TMP4:%.*]] = alloca <128 x float>, align 512 +; CHECK-NEXT: [[TMP5:%.*]] = alloca <128 x float>, align 512 +; CHECK-NEXT: store <128 x float> [[TMP0]], ptr [[TMP5]], align 512 +; CHECK-NEXT: store <128 x float> [[TMP1]], ptr [[TMP4]], align 512 +; CHECK-NEXT: call void @__instrumentor_pre_compare_ind(i32 18, i32 512, i32 56, ptr [[TMP5]], ptr [[TMP4]], i64 0, i32 4) #[[ATTR0]] +; CHECK-NEXT: [[TMP6:%.*]] = fcmp olt <128 x float> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <128 x i1> [[TMP6]], ptr [[TMP3]], align 16 +; CHECK-NEXT: call void @__instrumentor_post_compare_ind(i32 18, i32 512, i32 56, ptr [[TMP5]], ptr [[TMP4]], i32 18, i32 16, ptr [[TMP3]], i64 0, i32 -4) #[[ATTR0]] +; CHECK-NEXT: [[TMP7:%.*]] = load <128 x i1>, ptr [[TMP3]], align 16 +; CHECK-NEXT: ret <128 x i1> [[TMP7]] +; + %3 = fcmp olt <128 x float> %0, %1 + ret <128 x i1> %3 +} + +define <32 x i1> @test_fvec_32(<32 x float> %0, <32 x float> %1) { +; CHECK-LABEL: define <32 x i1> @test_fvec_32( +; CHECK-SAME: <32 x float> [[TMP0:%.*]], <32 x float> [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = alloca <32 x i1>, align 4 +; CHECK-NEXT: [[TMP4:%.*]] = alloca <32 x float>, align 128 +; CHECK-NEXT: [[TMP5:%.*]] = alloca <32 x float>, align 128 +; CHECK-NEXT: store <32 x float> [[TMP0]], ptr [[TMP5]], align 128 +; CHECK-NEXT: store <32 x float> [[TMP1]], ptr [[TMP4]], align 128 +; CHECK-NEXT: call void @__instrumentor_pre_compare_ind(i32 18, i32 128, i32 56, ptr [[TMP5]], ptr [[TMP4]], i64 2, i32 5) #[[ATTR0]] +; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan oeq <32 x float> [[TMP0]], [[TMP1]] +; CHECK-NEXT: store <32 x i1> [[TMP6]], ptr [[TMP3]], align 4 +; CHECK-NEXT: call void @__instrumentor_post_compare_ind(i32 18, i32 128, i32 56, ptr [[TMP5]], ptr [[TMP4]], i32 18, i32 4, ptr [[TMP3]], i64 2, i32 -5) #[[ATTR0]] +; CHECK-NEXT: [[TMP7:%.*]] = load <32 x i1>, ptr [[TMP3]], align 4 +; CHECK-NEXT: ret <32 x i1> [[TMP7]] +; + %3 = fcmp nnan oeq <32 x float> %0, %1 + ret <32 x i1> %3 +} + +define i1 @test_float_i1(float %0, float %1) { +; CHECK-LABEL: define i1 @test_float_i1( +; CHECK-SAME: float [[TMP0:%.*]], float [[TMP1:%.*]]) { +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float [[TMP0]] to i32 +; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = bitcast float [[TMP1]] to i32 +; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 +; CHECK-NEXT: call void @__instrumentor_pre_compare(i32 2, i32 4, i32 56, i64 [[TMP4]], i64 [[TMP6]], i64 14, i32 6) #[[ATTR0]] +; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ueq float [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP8:%.*]] = zext i1 [[TMP7]] to i64 +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @__instrumentor_post_compare(i32 2, i32 4, i32 56, i64 [[TMP4]], i64 [[TMP6]], i32 12, i32 1, i64 [[TMP8]], i64 14, i32 -6) #[[ATTR0]] +; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i1 +; CHECK-NEXT: ret i1 [[TMP10]] +; + %3 = fcmp fast ueq float %0, %1 + ret i1 %3 +} diff --git a/llvm/test/Instrumentation/Instrumentor/compare_config.json b/llvm/test/Instrumentation/Instrumentor/compare_config.json new file mode 100644 index 0000000000000..f42e686cf90c4 --- /dev/null +++ b/llvm/test/Instrumentation/Instrumentor/compare_config.json @@ -0,0 +1,33 @@ +{ + "configuration": { + "runtime_prefix": "__instrumentor_" + }, + "instruction_pre": { + "compare": { + "enabled": true, + "operand_type_id": true, + "operand_size": true, + "opcode": true, + "left": true, + "right": true, + "flags": true, + "id": true + } + }, + "instruction_post": { + "compare": { + "enabled": true, + "operand_type_id": true, + "operand_size": true, + "opcode": true, + "left": true, + "right": true, + "result_type_id": true, + "result_size": true, + "result": true, + "result.replace": true, + "flags": true, + "id": true + } + } +} diff --git a/llvm/test/Instrumentation/Instrumentor/default_config.json b/llvm/test/Instrumentation/Instrumentor/default_config.json index 08bde4a265450..4d9c2834cbc14 100644 --- a/llvm/test/Instrumentation/Instrumentor/default_config.json +++ b/llvm/test/Instrumentation/Instrumentor/default_config.json @@ -242,6 +242,27 @@ "flags.description": "A bitmask value signaling which instruction flags are present.", "id": true, "id.description": "A unique ID associated with the given instrumentor call" + }, + "compare": { + "enabled": true, + "filter": "", + "filter.description": "Static property filter to exclude instrumentation.", + "operand_type_id": true, + "operand_type_id.description": "The operand type id.", + "operand_size": true, + "operand_size.description": "The operand type size.", + "opcode": true, + "opcode.description": "The instruction opcode.", + "predicate": true, + "predicate.description": "The comparison predicate ID.", + "left": true, + "left.description": "The comparison's left operand.", + "right": true, + "right.description": "The comparison's right operand.", + "flags": true, + "flags.description": "A bitmask value signaling which instruction flags are present.", + "id": true, + "id.description": "A unique ID associated with the given instrumentor call" } }, "instruction_post": { @@ -357,6 +378,34 @@ "flags.description": "A bitmask value signaling which instruction flags are present.", "id": true, "id.description": "A unique ID associated with the given instrumentor call" + }, + "compare": { + "enabled": true, + "filter": "", + "filter.description": "Static property filter to exclude instrumentation.", + "operand_type_id": true, + "operand_type_id.description": "The operand type id.", + "operand_size": true, + "operand_size.description": "The operand type size.", + "opcode": true, + "opcode.description": "The instruction opcode.", + "predicate": true, + "predicate.description": "The comparison predicate ID.", + "left": true, + "left.description": "The comparison's left operand.", + "right": true, + "right.description": "The comparison's right operand.", + "result_type_id": true, + "result_type_id.description": "The result value's type ID.", + "result_size": true, + "result_size.description": "Size of the result value.", + "result": true, + "result.replace": true, + "result.description": "Result of the operation.", + "flags": true, + "flags.description": "A bitmask value signaling which instruction flags are present.", + "id": true, + "id.description": "A unique ID associated with the given instrumentor call" } }, "special_value": { diff --git a/llvm/test/Instrumentation/Instrumentor/default_rt.h b/llvm/test/Instrumentation/Instrumentor/default_rt.h index e21e185e0389c..e09686f8ce4cd 100644 --- a/llvm/test/Instrumentation/Instrumentor/default_rt.h +++ b/llvm/test/Instrumentation/Instrumentor/default_rt.h @@ -304,6 +304,15 @@ typedef enum NumericFlags { NUMERIC_FLAG_IS_EXACT = 1 << 6, } NumericFlags; +/// CompareIO flag bitmask values. +typedef enum CompareFlags { + COMPARE_FLAG_NONE = 0, + COMPARE_FLAG_SAMESIGN = 1 << 0, + COMPARE_FLAG_HAS_NO_NANS = 1 << 1, + COMPARE_FLAG_HAS_NO_INFS = 1 << 2, + COMPARE_FLAG_HAS_NO_SIGNED_ZEROS = 1 << 3, +} CompareFlags; + #endif // INSTRUMENTOR_RUNTIME_H // Generated with runtime prefix: __instrumentor_ From 8e10c8f75929fb5243ee521d64bc6f8c02f5f551 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Tue, 23 Jun 2026 13:14:03 -0500 Subject: [PATCH 227/511] [TableGen] Use llvm_unreachable in switch guard for all except gcc 8- (#205385) Follow-up to https://github.com/llvm/llvm-project/pull/194728. For gccs older than v9 use abort. That seems to make everybody happy. --- llvm/test/TableGen/directive1.td | 24 +++++++++------ llvm/test/TableGen/directive2.td | 24 +++++++++------ .../utils/TableGen/Basic/DirectiveEmitter.cpp | 30 +++++++++---------- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td index fc6a1fcce47e5..7dca15e908665 100644 --- a/llvm/test/TableGen/directive1.td +++ b/llvm/test/TableGen/directive1.td @@ -143,9 +143,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return Association::None; // CHECK-NEXT: } // switch (Dir) -// CHECK-NEXT: assert(llvm::to_underlying(Dir) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(Dir) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: constexpr Category getDirectiveCategory(Directive Dir) { @@ -153,9 +155,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return Category::Executable; // CHECK-NEXT: } // switch (Dir) -// CHECK-NEXT: assert(llvm::to_underlying(Dir) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(Dir) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: constexpr SourceLanguage getDirectiveLanguages(Directive D) { @@ -163,9 +167,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return SourceLanguage::C | SourceLanguage::Fortran; // CHECK-NEXT: } // switch(D) -// CHECK-NEXT: assert(llvm::to_underlying(D) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(D) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: // Enumeration helper functions diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td index edde2d19f5117..31a3bc907988c 100644 --- a/llvm/test/TableGen/directive2.td +++ b/llvm/test/TableGen/directive2.td @@ -119,9 +119,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return Association::Block; // CHECK-NEXT: } // switch (Dir) -// CHECK-NEXT: assert(llvm::to_underlying(Dir) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(Dir) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: constexpr Category getDirectiveCategory(Directive Dir) { @@ -129,9 +131,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return Category::Declarative; // CHECK-NEXT: } // switch (Dir) -// CHECK-NEXT: assert(llvm::to_underlying(Dir) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(Dir) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: constexpr SourceLanguage getDirectiveLanguages(Directive D) { @@ -139,9 +143,11 @@ def TDL_DirA : Directive<[Spelling<"dira">]> { // CHECK-NEXT: case TDLD_dira: // CHECK-NEXT: return SourceLanguage::C | SourceLanguage::Fortran; // CHECK-NEXT: } // switch(D) -// CHECK-NEXT: assert(llvm::to_underlying(D) >= llvm::to_underlying(Directive::First_) && -// CHECK-NEXT: llvm::to_underlying(D) <= llvm::to_underlying(Directive::Last_) && -// CHECK-NEXT: "Unexpected directive"); +// CHECK-NEXT: #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9 +// CHECK-NEXT: abort(); +// CHECK-NEXT: #else +// CHECK-NEXT: llvm_unreachable("Unexpected directive"); +// CHECK-NEXT: #endif // CHECK-NEXT: } // CHECK-EMPTY: // CHECK-NEXT: // Enumeration helper functions diff --git a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp index e091cda694f99..55675fdec8448 100644 --- a/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp +++ b/llvm/utils/TableGen/Basic/DirectiveEmitter.cpp @@ -850,11 +850,11 @@ static void generateGetDirectiveAssociation(const DirectiveLanguage &DirLang, } } OS << " } // switch (Dir)\n"; - OS << " assert(llvm::to_underlying(Dir) >= " - "llvm::to_underlying(Directive::First_) &&\n"; - OS << " llvm::to_underlying(Dir) <= " - "llvm::to_underlying(Directive::Last_) &&\n"; - OS << " \"Unexpected directive\");\n"; + OS << "#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9\n"; + OS << " abort();\n"; + OS << "#else\n"; + OS << " llvm_unreachable(\"Unexpected directive\");\n"; + OS << "#endif\n"; OS << "}\n"; } @@ -872,11 +872,11 @@ static void generateGetDirectiveCategory(const DirectiveLanguage &DirLang, << ";\n"; } OS << " } // switch (Dir)\n"; - OS << " assert(llvm::to_underlying(Dir) >= " - "llvm::to_underlying(Directive::First_) &&\n"; - OS << " llvm::to_underlying(Dir) <= " - "llvm::to_underlying(Directive::Last_) &&\n"; - OS << " \"Unexpected directive\");\n"; + OS << "#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9\n"; + OS << " abort();\n"; + OS << "#else\n"; + OS << " llvm_unreachable(\"Unexpected directive\");\n"; + OS << "#endif\n"; OS << "}\n"; } @@ -901,11 +901,11 @@ static void generateGetDirectiveLanguages(const DirectiveLanguage &DirLang, OS << ";\n"; } OS << " } // switch(D)\n"; - OS << " assert(llvm::to_underlying(D) >= " - "llvm::to_underlying(Directive::First_) &&\n"; - OS << " llvm::to_underlying(D) <= " - "llvm::to_underlying(Directive::Last_) &&\n"; - OS << " \"Unexpected directive\");\n"; + OS << "#if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 9\n"; + OS << " abort();\n"; + OS << "#else\n"; + OS << " llvm_unreachable(\"Unexpected directive\");\n"; + OS << "#endif\n"; OS << "}\n"; } From 494ee3e129d88370ed5ca53019f105b4b37f222d Mon Sep 17 00:00:00 2001 From: adams381 Date: Tue, 23 Jun 2026 13:16:46 -0500 Subject: [PATCH 228/511] [CIR] Lower bitwise vector reduce builtins `__builtin_reduce_or`, `__builtin_reduce_and`, and `__builtin_reduce_xor` were caught by the batch `errorBuiltinNYI` switch in CIRGen and rejected with an NYI diagnostic. They now lower to the matching `llvm.vector.reduce.{or,and,xor}` intrinsics on the vector element type, mirroring classic CodeGen in `CGBuiltin.cpp`. The emission routes through `emitBuiltinWithOneOverloadedType`, extended with an optional result type: a vector reduction returns the element type, and CIR has no intrinsic registry to derive it the way classic's `getIntrinsic(ID, argTy)` does, so the element type is supplied explicitly (the default keeps the existing callers unchanged). The intrinsic name is passed without the `llvm.` prefix, which `LowerToLLVM` prepends, producing LLVM output byte-identical to OGCG. The `builtin-undef-rvalue` regression test previously pinned `reduce_or` as NYI; it is retargeted to `reduce_add`, which is still NYI, so the undef-rvalue path stays covered. --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 13 +++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 17 ++++--- .../CodeGenBuiltins/builtin-reduce-bitwise.c | 49 +++++++++++++++++++ .../CodeGenBuiltins/builtin-undef-rvalue.cpp | 10 ++-- 4 files changed, 78 insertions(+), 11 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/builtin-reduce-bitwise.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index e206353aac2c9..50529a61068d5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -1731,9 +1731,22 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, case Builtin::BI__builtin_reduce_min: case Builtin::BI__builtin_reduce_add: case Builtin::BI__builtin_reduce_mul: + return errorBuiltinNYI(*this, e, builtinID); case Builtin::BI__builtin_reduce_xor: + return emitBuiltinWithOneOverloadedType<1>( + e, "vector.reduce.xor", + cast(convertType(e->getArg(0)->getType())) + .getElementType()); case Builtin::BI__builtin_reduce_or: + return emitBuiltinWithOneOverloadedType<1>( + e, "vector.reduce.or", + cast(convertType(e->getArg(0)->getType())) + .getElementType()); case Builtin::BI__builtin_reduce_and: + return emitBuiltinWithOneOverloadedType<1>( + e, "vector.reduce.and", + cast(convertType(e->getArg(0)->getType())) + .getElementType()); case Builtin::BI__builtin_reduce_assoc_fadd: case Builtin::BI__builtin_reduce_in_order_fadd: case Builtin::BI__builtin_reduce_maximum: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index d0b936f45378d..b6a4a277fab92 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1705,16 +1705,21 @@ class CIRGenFunction : public CIRGenTypeCache { void instantiateIndirectGotoBlock(); - /// Emit a simple LLVM intrinsic that takes N scalar arguments and whose - /// return type matches the type of the first argument. The intrinsic name is - /// used verbatim; any overload mangling (e.g. `.f32`, `.p1`) must be baked - /// into \p intrinName by the caller. + /// Emit a simple LLVM intrinsic that takes N scalar arguments. The intrinsic + /// name is used verbatim; any overload mangling (e.g. `.f32`, `.p1`) must be + /// baked into \p intrinName by the caller. The result type defaults to the + /// type of the first argument; pass \p resultType for intrinsics whose result + /// differs from the operand, such as a vector reduction that returns the + /// element type. Unlike classic CodeGen, CIR has no intrinsic registry to + /// derive the result type from the operand, so it must be supplied here. template [[maybe_unused]] RValue emitBuiltinWithOneOverloadedType(const CallExpr *e, - llvm::StringRef intrinName) { + llvm::StringRef intrinName, + mlir::Type resultType = {}) { static_assert(N, "expect non-empty argument"); - mlir::Type cirTy = convertType(e->getArg(0)->getType()); + mlir::Type cirTy = + resultType ? resultType : convertType(e->getArg(0)->getType()); SmallVector args; for (unsigned i = 0; i < N; ++i) args.push_back(emitScalarExpr(e->getArg(i))); diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-reduce-bitwise.c b/clang/test/CIR/CodeGenBuiltins/builtin-reduce-bitwise.c new file mode 100644 index 0000000000000..0036c62201ee0 --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/builtin-reduce-bitwise.c @@ -0,0 +1,49 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +typedef int v4si __attribute__((vector_size(16))); +typedef unsigned int v4su __attribute__((vector_size(16))); + +int test_reduce_or(v4si x) { + // CIR-LABEL: @test_reduce_or + // CIR: cir.call_llvm_intrinsic "vector.reduce.or" + // CIR: cir.return + // LLVM-LABEL: @test_reduce_or + // LLVM: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> + // LLVM: ret i32 + return __builtin_reduce_or(x); +} + +int test_reduce_and(v4si x) { + // CIR-LABEL: @test_reduce_and + // CIR: cir.call_llvm_intrinsic "vector.reduce.and" + // CIR: cir.return + // LLVM-LABEL: @test_reduce_and + // LLVM: call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> + // LLVM: ret i32 + return __builtin_reduce_and(x); +} + +int test_reduce_xor(v4si x) { + // CIR-LABEL: @test_reduce_xor + // CIR: cir.call_llvm_intrinsic "vector.reduce.xor" + // CIR: cir.return + // LLVM-LABEL: @test_reduce_xor + // LLVM: call i32 @llvm.vector.reduce.xor.v4i32(<4 x i32> + // LLVM: ret i32 + return __builtin_reduce_xor(x); +} + +unsigned test_reduce_or_unsigned(v4su x) { + // CIR-LABEL: @test_reduce_or_unsigned + // CIR: cir.call_llvm_intrinsic "vector.reduce.or" + // CIR: cir.return + // LLVM-LABEL: @test_reduce_or_unsigned + // LLVM: call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> + // LLVM: ret i32 + return __builtin_reduce_or(x); +} diff --git a/clang/test/CIR/CodeGenBuiltins/builtin-undef-rvalue.cpp b/clang/test/CIR/CodeGenBuiltins/builtin-undef-rvalue.cpp index 3ba0266b0f83a..335383c075dac 100644 --- a/clang/test/CIR/CodeGenBuiltins/builtin-undef-rvalue.cpp +++ b/clang/test/CIR/CodeGenBuiltins/builtin-undef-rvalue.cpp @@ -5,15 +5,15 @@ typedef int v4si __attribute__((vector_size(16))); -int test_builtin_reduce_or_undef_rvalue(v4si x) { - // expected-error@+1 {{unimplemented X86 builtin call: __builtin_reduce_or}} - return __builtin_reduce_or(x); +int test_builtin_reduce_add_undef_rvalue(v4si x) { + // expected-error@+1 {{unimplemented X86 builtin call: __builtin_reduce_add}} + return __builtin_reduce_add(x); } -// CIR-LABEL: @_Z35test_builtin_reduce_or_undef_rvalueDv4_i +// CIR-LABEL: @_Z36test_builtin_reduce_add_undef_rvalueDv4_i // CIR: cir.const #cir.undef : !s32i // CIR: cir.return -// LLVM-LABEL: @_Z35test_builtin_reduce_or_undef_rvalueDv4_i +// LLVM-LABEL: @_Z36test_builtin_reduce_add_undef_rvalueDv4_i // LLVM: store i32 undef, ptr %{{.+}}, align 4 // LLVM: ret i32 %{{.+}} From b3d0fbe3c403d83c3ec84e76d370e198a872d560 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 23 Jun 2026 11:18:00 -0700 Subject: [PATCH 229/511] [llvm][option] Remove bitfield marshalling (#203051) Marshaling of bitfield options is adding some extra complexity in the form of extractors and mergers, but is now unused. This PR removes that feature. --- clang/docs/InternalsManual.rst | 12 ++--- clang/lib/Frontend/CompilerInvocation.cpp | 46 ++++--------------- llvm/include/llvm/Option/OptParser.td | 12 ----- .../Option/OptionMarshallingTest.cpp | 3 +- llvm/utils/TableGen/OptionParserEmitter.cpp | 9 ---- 5 files changed, 15 insertions(+), 67 deletions(-) diff --git a/clang/docs/InternalsManual.rst b/clang/docs/InternalsManual.rst index 2dab979dac3e4..a09e23063858c 100644 --- a/clang/docs/InternalsManual.rst +++ b/clang/docs/InternalsManual.rst @@ -901,11 +901,11 @@ command line: PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ - MERGER, EXTRACTOR, TABLE_INDEX) \ + TABLE_INDEX) \ PARSE_OPTION_WITH_MARSHALLING(Args, Diags, Success, ID, FLAGS, PARAM, \ SHOULD_PARSE, KEYPATH, DEFAULT_VALUE, \ IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, \ - MERGER, TABLE_INDEX) + TABLE_INDEX) #include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING @@ -921,10 +921,10 @@ command line: PREFIX_TYPE, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM, \ HELPTEXT, METAVAR, VALUES, SPELLING, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ - MERGER, EXTRACTOR, TABLE_INDEX) \ + TABLE_INDEX) \ GENERATE_OPTION_WITH_MARSHALLING( \ Args, SA, KIND, FLAGS, SPELLING, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ - IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, EXTRACTOR, TABLE_INDEX) + IMPLIED_CHECK, IMPLIED_VALUE, DENORMALIZER, TABLE_INDEX) #include "clang/Options/Options.inc" #undef LANG_OPTION_WITH_MARSHALLING @@ -1057,10 +1057,6 @@ comma-separated string values and elements of the array within NormalizedValuesScope<"LangOptions::ThreadModelKind">, MarshallingInfoEnum, "POSIX">; -.. - Intentionally omitting MarshallingInfoBitfieldFlag. It's adding some - complexity to the marshalling infrastructure and might be removed. - It is also possible to define relationships between options. **Implication** diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 55b344fc2da26..dfde7b756dbff 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -518,58 +518,32 @@ static std::optional normalizeTriple(OptSpecifier Opt, return llvm::Triple::normalize(Arg->getValue()); } -template -static T mergeForwardValue(T KeyPath, U Value) { - return static_cast(Value); -} - -template -[[maybe_unused]] static T mergeMaskValue(T KeyPath, U Value) { - return KeyPath | Value; -} - -template static T extractForwardValue(T KeyPath) { - return KeyPath; -} - -template -[[maybe_unused]] static T extractMaskValue(T KeyPath) { - return ((KeyPath & Value) == Value) ? static_cast(Value) : T(); -} - #define PARSE_OPTION_WITH_MARSHALLING( \ ARGS, DIAGS, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, \ ALIASARGS, FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, \ METAVAR, VALUES, SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, \ DEFAULT_VALUE, IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, \ - MERGER, EXTRACTOR, TABLE_INDEX) \ + TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ - KEYPATH = MERGER(KEYPATH, DEFAULT_VALUE); \ + KEYPATH = static_cast(DEFAULT_VALUE); \ if (IMPLIED_CHECK) \ - KEYPATH = MERGER(KEYPATH, IMPLIED_VALUE); \ + KEYPATH = static_cast(IMPLIED_VALUE); \ if (SHOULD_PARSE) \ if (auto MaybeValue = NORMALIZER(OPT_##ID, TABLE_INDEX, ARGS, DIAGS)) \ - KEYPATH = \ - MERGER(KEYPATH, static_cast(*MaybeValue)); \ + KEYPATH = static_cast(*MaybeValue); \ } -// Capture the extracted value as a lambda argument to avoid potential issues -// with lifetime extension of the reference. #define GENERATE_OPTION_WITH_MARSHALLING( \ CONSUMER, PREFIX_TYPE, SPELLING_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ - IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ - TABLE_INDEX) \ + IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, TABLE_INDEX) \ if ((VISIBILITY) & options::CC1Option) { \ - [&](const auto &Extracted) { \ - if (ALWAYS_EMIT || \ - (Extracted != \ - static_cast((IMPLIED_CHECK) ? (IMPLIED_VALUE) \ - : (DEFAULT_VALUE)))) \ - DENORMALIZER(CONSUMER, SPELLING_OFFSET, Option::KIND##Class, \ - TABLE_INDEX, Extracted); \ - }(EXTRACTOR(KEYPATH)); \ + if (ALWAYS_EMIT || (KEYPATH != static_cast( \ + ((IMPLIED_CHECK) ? (IMPLIED_VALUE) \ + : (DEFAULT_VALUE))))) \ + DENORMALIZER(CONSUMER, SPELLING_OFFSET, Option::KIND##Class, \ + TABLE_INDEX, KEYPATH); \ } static StringRef GetInputKindName(InputKind IK); diff --git a/llvm/include/llvm/Option/OptParser.td b/llvm/include/llvm/Option/OptParser.td index 8f32fb4493511..7adb2cf6fa5de 100644 --- a/llvm/include/llvm/Option/OptParser.td +++ b/llvm/include/llvm/Option/OptParser.td @@ -134,8 +134,6 @@ class Option prefixes, string name, OptionKind kind, code NormalizedValuesScope = ""; code Normalizer = ""; code Denormalizer = ""; - code ValueMerger = "mergeForwardValue"; - code ValueExtractor = "extractForwardValue"; list NormalizedValues = ?; list SubCommands = subcommands; } @@ -248,14 +246,6 @@ class MarshallingInfoNegativeFlag - : MarshallingInfoFlag { - code Normalizer = "makeFlagToValueNormalizer("#value#")"; - code ValueMerger = "mergeMaskValue"; - code ValueExtractor = "(extractMaskValue)"; -} - // Implementation detail of BoolOption. class MarshallingInfoBooleanFlag @@ -280,8 +270,6 @@ class Normalizer { code Normalizer = normalizer; } class Denormalizer { code Denormalizer = denormalizer; } class NormalizedValuesScope { code NormalizedValuesScope = scope; } class NormalizedValues definitions> { list NormalizedValues = definitions; } -class ValueMerger { code ValueMerger = merger; } -class ValueExtractor { code ValueExtractor = extractor; } // Predefined options. diff --git a/llvm/unittests/Option/OptionMarshallingTest.cpp b/llvm/unittests/Option/OptionMarshallingTest.cpp index 15917cc05c51e..900b4326d33c3 100644 --- a/llvm/unittests/Option/OptionMarshallingTest.cpp +++ b/llvm/unittests/Option/OptionMarshallingTest.cpp @@ -30,8 +30,7 @@ static const OptionWithMarshallingInfo MarshallingTable[] = { PREFIX_TYPE, PREFIXED_NAME_OFFSET, ID, KIND, GROUP, ALIAS, ALIASARGS, \ FLAGS, VISIBILITY, PARAM, HELPTEXT, HELPTEXTSFORVARIANTS, METAVAR, VALUES, \ SUBCOMMANDIDS_OFFSET, SHOULD_PARSE, ALWAYS_EMIT, KEYPATH, DEFAULT_VALUE, \ - IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, MERGER, EXTRACTOR, \ - TABLE_INDEX) \ + IMPLIED_CHECK, IMPLIED_VALUE, NORMALIZER, DENORMALIZER, TABLE_INDEX) \ {PREFIXED_NAME_OFFSET, #KEYPATH, #IMPLIED_CHECK, #IMPLIED_VALUE}, #include "Opts.inc" #undef OPTION_WITH_MARSHALLING diff --git a/llvm/utils/TableGen/OptionParserEmitter.cpp b/llvm/utils/TableGen/OptionParserEmitter.cpp index 45edde3546f6f..829c202b495e4 100644 --- a/llvm/utils/TableGen/OptionParserEmitter.cpp +++ b/llvm/utils/TableGen/OptionParserEmitter.cpp @@ -71,8 +71,6 @@ class MarshallingInfo { StringRef ShouldParse; StringRef Normalizer; StringRef Denormalizer; - StringRef ValueMerger; - StringRef ValueExtractor; int TableIndex = -1; std::vector Values; std::vector NormalizedValues; @@ -118,10 +116,6 @@ struct SimpleEnumValueTable { OS << ", "; OS << Denormalizer; OS << ", "; - OS << ValueMerger; - OS << ", "; - OS << ValueExtractor; - OS << ", "; OS << TableIndex; } @@ -155,7 +149,6 @@ size_t MarshallingInfo::NextTableIndex = 0; static MarshallingInfo createMarshallingInfo(const Record &R) { assert(!isa(R.getValueInit("KeyPath")) && !isa(R.getValueInit("DefaultValue")) && - !isa(R.getValueInit("ValueMerger")) && "MarshallingInfo must have a provide a keypath, default value and a " "value merger"); @@ -173,8 +166,6 @@ static MarshallingInfo createMarshallingInfo(const Record &R) { Ret.ShouldParse = R.getValueAsString("ShouldParse"); Ret.Normalizer = R.getValueAsString("Normalizer"); Ret.Denormalizer = R.getValueAsString("Denormalizer"); - Ret.ValueMerger = R.getValueAsString("ValueMerger"); - Ret.ValueExtractor = R.getValueAsString("ValueExtractor"); if (!isa(R.getValueInit("NormalizedValues"))) { assert(!isa(R.getValueInit("Values")) && From c0e9aacec7f97bab965127cc2539a21594b04d1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 23 Jun 2026 11:20:02 -0700 Subject: [PATCH 230/511] Revert "[flang][cuda] Do not emit data transfer for constant read on the rhs" (#205394) Reverts llvm/llvm-project#205185 this is making couple of downstream tests failing. Another approach is needed --- flang/include/flang/Evaluate/tools.h | 27 -------------------- flang/lib/Evaluate/tools.cpp | 3 ++- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 14 +--------- 3 files changed, 3 insertions(+), 41 deletions(-) diff --git a/flang/include/flang/Evaluate/tools.h b/flang/include/flang/Evaluate/tools.h index 08468f304914b..d2d0b69e6337d 100644 --- a/flang/include/flang/Evaluate/tools.h +++ b/flang/include/flang/Evaluate/tools.h @@ -1321,15 +1321,6 @@ inline bool IsCUDAManagedOrUnifiedSymbol(const Symbol &sym) { return false; } -inline bool IsCUDAConstantSymbol(const Symbol &sym) { - if (const auto *details = - sym.GetUltimate().detailsIf()) { - return details->cudaDataAttr() && - (*details->cudaDataAttr() == common::CUDADataAttr::Constant); - } - return false; -} - // Non-allocatable module-level managed/unified variables use pointer // indirection through a companion global in __nv_managed_data__. // Explicit data transfers (cudaMemcpy) must be avoided for these @@ -1380,16 +1371,6 @@ inline int GetNbOfCUDAManagedOrUnifiedSymbols(const A &expr) { return symbols.size(); } -template inline int GetNbOfCUDAConstantSymbols(const A &expr) { - semantics::UnorderedSymbolSet symbols; - for (const Symbol &sym : CollectCudaSymbols(expr)) { - if (IsCUDAConstantSymbol(sym)) { - symbols.insert(sym); - } - } - return symbols.size(); -} - // Check if any of the symbols part of the expression has a CUDA device // attribute. template inline bool HasCUDADeviceAttrs(const A &expr) { @@ -1402,19 +1383,11 @@ template inline bool IsCUDADataTransfer(const A &lhs, const B &rhs) { int lhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(lhs)}; int rhsNbManagedSymbols{GetNbOfCUDAManagedOrUnifiedSymbols(rhs)}; - int rhsNbConstantSymbols{GetNbOfCUDAConstantSymbols(rhs)}; int rhsNbSymbols{GetNbOfCUDADeviceSymbols(rhs)}; if (HasNonAllocatableModuleCUDAManagedSymbols(lhs)) return false; - // If only constant symbols are present on the rhs, and no device symbols on - // the lhs, then no data transfer is needed because the constant have a host - // value. - if (rhsNbConstantSymbols == rhsNbSymbols && !HasCUDADeviceAttrs(lhs)) { - return false; - } - if (lhsNbManagedSymbols >= 1 && lhs.Rank() > 0 && rhsNbSymbols == 0 && rhsNbManagedSymbols == 0 && (IsVariable(rhs) || IsConstantExpr(rhs))) { return true; // Managed arrays initialization is performed on the device. diff --git a/flang/lib/Evaluate/tools.cpp b/flang/lib/Evaluate/tools.cpp index a60e36654ca34..82dcd1e795f49 100644 --- a/flang/lib/Evaluate/tools.cpp +++ b/flang/lib/Evaluate/tools.cpp @@ -1210,7 +1210,8 @@ bool IsCUDADeviceOnlySymbol(const Symbol &sym) { if (const auto *details = sym.GetUltimate().detailsIf()) { return details->cudaDataAttr() && - (*details->cudaDataAttr() == common::CUDADataAttr::Device); + (*details->cudaDataAttr() == common::CUDADataAttr::Device || + *details->cudaDataAttr() == common::CUDADataAttr::Constant); } return false; } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index f236e829072ee..a1006437485ca 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -3,8 +3,6 @@ ! Test CUDA Fortran data transfer using assignment statements. module mod1 - real, constant :: c1 = 1.0 - type :: t1 integer :: i end type @@ -497,7 +495,7 @@ subroutine sub25() end ! CHECK-LABEL: func.func @_QPsub25() -! CHECK: fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp", uniq_name = ""} +! CHECK: fir.allocmem !fir.array, %15#1 {bindc_name = ".tmp", uniq_name = ""} ! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer} : !fir.ref>>>, !fir.box> ! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref ! CHECK: fir.freemem %{{.*}} : !fir.heap> @@ -726,13 +724,3 @@ subroutine sub41() lm(1:5) = a%m(1:5) end subroutine - -subroutine sub42() - use mod1 - real :: a - a = c1 * c1 -end subroutine - -! CHECK-LABEL: func.func @_QPsub42() -! CHECK-NOT: cuf.data_transfer -! CHECK: hlfir.assign From f3ae5fa67a25b04fd3363e107b147fe8a8ea9c80 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 23 Jun 2026 11:25:11 -0700 Subject: [PATCH 231/511] [lldb] Fix data race in Module::GetSectionList (#205226) Module::GetSectionList built the section list (m_sections_up) without a lock, so parallel module loading (e.g. crashlog.py's thread pool) could race two builders on the unique_ptr and the SectionList vector, crashing in AppleObjCRuntime::GetObjCVersion. Build through ObjectFile::GetSectionList instead of locking the module mutex and calling CreateSections directly: that path locks the object file's section mutex before the module mutex, and the build can re-enter it, so holding the module mutex across the build would invert the order and risk a deadlock. The Module-level counterpart to a0176fd9dfc5. rdar://180308581 --- lldb/include/lldb/Core/Module.h | 3 + lldb/source/Core/Module.cpp | 8 ++- lldb/unittests/Core/ModuleTest.cpp | 93 ++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+), 3 deletions(-) diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h index eb09e8b602da7..33904ef7be5d8 100644 --- a/lldb/include/lldb/Core/Module.h +++ b/lldb/include/lldb/Core/Module.h @@ -1116,6 +1116,9 @@ class Module : public std::enable_shared_from_this, /// is used by the ObjectFile and /// ObjectFile instances for the debug info + /// Guards the lazy construction of m_sections_up. + mutable std::recursive_mutex m_sections_mutex; + std::atomic m_did_load_objfile{false}; std::atomic m_did_load_symfile{false}; std::atomic m_did_set_uuid{false}; diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp index e232d322d762c..2bc8fd138427d 100644 --- a/lldb/source/Core/Module.cpp +++ b/lldb/source/Core/Module.cpp @@ -1221,10 +1221,12 @@ ObjectFile *Module::GetObjectFile() { } SectionList *Module::GetSectionList() { - // Populate m_sections_up with sections from objfile. + // Guard the lazy build with m_sections_mutex rather than m_mutex: + // Module::PreloadSymbols holds m_mutex across the parallel DWARF index, whose + // worker threads re-enter GetSectionList, so taking m_mutex here deadlocks. + std::lock_guard guard(m_sections_mutex); if (!m_sections_up) { - ObjectFile *obj_file = GetObjectFile(); - if (obj_file != nullptr) + if (ObjectFile *obj_file = GetObjectFile()) obj_file->CreateSections(*GetUnifiedSectionList()); } return m_sections_up.get(); diff --git a/lldb/unittests/Core/ModuleTest.cpp b/lldb/unittests/Core/ModuleTest.cpp index bcaeede367bdd..62cc2c025c863 100644 --- a/lldb/unittests/Core/ModuleTest.cpp +++ b/lldb/unittests/Core/ModuleTest.cpp @@ -13,11 +13,17 @@ #include "TestingSupport/SubsystemRAII.h" #include "TestingSupport/TestUtilities.h" #include "lldb/Core/PluginManager.h" +#include "lldb/Core/Section.h" #include "lldb/Host/FileSystem.h" #include "lldb/Host/HostInfo.h" #include "lldb/Target/Language.h" +#include "lldb/Utility/ConstString.h" #include "gtest/gtest.h" +#include +#include #include +#include +#include using namespace lldb; using namespace lldb_private; @@ -170,3 +176,90 @@ TEST(ModuleTest, ResolveSymbolContextForAddressExactMatch) { ASSERT_NE(sc.symbol, nullptr); EXPECT_STREQ(sc.symbol->GetName().GetCString(), "inner_function"); } + +// Module::GetSectionList builds the module's section list lazily. Concurrent +// first-time callers (e.g. AppleObjCRuntime::GetObjCVersion during parallel +// SBTarget module loading) must not race on m_sections_up. This hammers +// GetSectionList from several threads on a fresh module so a sanitizer flags an +// unsynchronized build. +TEST(ModuleTest, GetSectionListConcurrent) { + SubsystemRAII + subsystems; + + // Several sections widen the window during which CreateSections is appending + // to the SectionList vector while another thread iterates it. + const char *yaml = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_DYN + Machine: EM_X86_64 +Sections: + - Name: .text + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_EXECINSTR ] + Address: 0x1000 + AddressAlign: 0x10 + Size: 0x100 + - Name: .data + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] + Address: 0x2000 + AddressAlign: 0x10 + Size: 0x100 + - Name: .rodata + Type: SHT_PROGBITS + Flags: [ SHF_ALLOC ] + Address: 0x3000 + AddressAlign: 0x10 + Size: 0x100 + - Name: .bss + Type: SHT_NOBITS + Flags: [ SHF_ALLOC, SHF_WRITE ] + Address: 0x4000 + AddressAlign: 0x10 + Size: 0x100 +... +)"; + + const ConstString text_name(".text"); + constexpr int kThreads = 8; + // Each iteration uses a fresh module so the lazy build (and its race) is + // re-triggered every time. + for (int iter = 0; iter < 100; ++iter) { + auto ExpectedFile = TestFile::fromYaml(yaml); + ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded()); + auto module_sp = std::make_shared(ExpectedFile->moduleSpec()); + + // Release the threads together with a blocking gate. A busy-wait would peg + // every core and starve the workers when many test binaries run at once. + std::mutex mutex; + std::condition_variable cv; + bool go = false; + std::vector threads; + threads.reserve(kThreads); + for (int t = 0; t < kThreads; ++t) { + threads.emplace_back([&] { + { + std::unique_lock lock(mutex); + cv.wait(lock, [&] { return go; }); + } + if (SectionList *sections = module_sp->GetSectionList()) + sections->FindSectionByName(text_name); + }); + } + { + std::lock_guard lock(mutex); + go = true; + } + cv.notify_all(); + for (auto &th : threads) + th.join(); + + // The concurrently-built list must be intact and complete. + SectionList *sections = module_sp->GetSectionList(); + ASSERT_NE(sections, nullptr); + EXPECT_TRUE(sections->FindSectionByName(text_name)); + } +} From 1320b0f9a19b797205278882dbe685f21ed8e471 Mon Sep 17 00:00:00 2001 From: "forking-google-bazel-bot[bot]" <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com> Date: Tue, 23 Jun 2026 13:32:06 -0500 Subject: [PATCH 232/511] [Bazel] Fixes 7591910 (#205377) This fixes 759191045115d966dffc99901e9086289767ff5c. Co-authored-by: Google Bazel Bot --- .../llvm-project-overlay/libc/BUILD.bazel | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index fd71a06b382d8..4a6df7f43f39c 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -14427,6 +14427,23 @@ libc_function( # ], # ) +libc_support_library( + name = "__support_osutil_linux_syscall_wrappers_ioctl", + hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/ioctl.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":__support_common", + ":__support_cpp_type_traits", + ":__support_error_or", + ":__support_macros_attributes", + ":__support_macros_config", + ":__support_osutil_syscall", + ], +) + libc_function( name = "isatty", srcs = ["src/unistd/linux/isatty.cpp"], @@ -14435,6 +14452,7 @@ libc_function( ":__support_common", ":__support_libc_errno", ":__support_macros_config", + ":__support_osutil_linux_syscall_wrappers_ioctl", ":__support_osutil_syscall", ":errno", ":hdr_unistd_macros", From 6367dfa9800423f87a906b4837b91dc87a45cf8b Mon Sep 17 00:00:00 2001 From: Ryan Buchner Date: Tue, 23 Jun 2026 11:33:27 -0700 Subject: [PATCH 233/511] [SLP] Don't recognize rotated widened strided stores in analyzeRtStrideCandidate() (#204013) These cases which are nearly strided stores are being incorrectly recognized as strided stores. Fixes #204011 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 12 +++ .../RISCV/rotated-strided-loads.ll | 77 ++++++++++++++++--- 2 files changed, 80 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index da2fe9d9f9f61..fbf2394593e17 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7619,6 +7619,11 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, // `PointerOps` and their indicies in `PointerOps`. SmallDenseMap, SmallVector>> OffsetToPointerOpIdxMap; + // Track to make sure that only VecSz different base pointers are consumed + // Prevents cases such as: + // 1, x + 0, x + 1, 2x + 0 from being recognized as legal RT strided as there + // are 2 "0" and 2 "1" offsets and a stride of "x" between both offsets + SmallDenseSet StrideMultiples; for (auto [Idx, Ptr] : enumerate(PointerOps)) { const SCEV *PtrSCEV = SE->getSCEV(Ptr); if (!PtrSCEV) @@ -7626,6 +7631,7 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, const auto *Add = dyn_cast(PtrSCEV); int64_t Offset = 0; + const SCEV *StrideMultiple = PtrSCEV; if (Add) { // `Offset` is non-zero. for (int I : seq(Add->getNumOperands())) { @@ -7637,11 +7643,13 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, Offset = 0; continue; } + StrideMultiple = SE->getMinusSCEV(StrideMultiple, SC); break; } } OffsetToPointerOpIdxMap[Offset].first.push_back(Ptr); OffsetToPointerOpIdxMap[Offset].second.push_back(Idx); + StrideMultiples.insert(StrideMultiple); } unsigned NumOffsets = OffsetToPointerOpIdxMap.size(); @@ -7655,6 +7663,10 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, return false; VecSz = Sz / NumOffsets; } + + if (StrideMultiples.size() != VecSz) + return false; + if (NumOffsets > 1 || BaseTy->isVectorTy()) NewScalarTy = Type::getIntNTy( SE->getContext(), diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/rotated-strided-loads.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/rotated-strided-loads.ll index e2446f75cb722..c2daaffdc499f 100644 --- a/llvm/test/Transforms/SLPVectorizer/RISCV/rotated-strided-loads.ll +++ b/llvm/test/Transforms/SLPVectorizer/RISCV/rotated-strided-loads.ll @@ -195,14 +195,54 @@ define void @constant_stride_widen_rotatedn_1(ptr %pl, i64 %stride, ptr %ps) { define void @rt_stride_widen_rotate1(ptr %pl, i64 %stride, ptr %ps) { ; CHECK-LABEL: define void @rt_stride_widen_rotate1( ; CHECK-SAME: ptr [[PL:%.*]], i64 [[STRIDE:%.*]], ptr [[PS:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[OFFSET0:%.*]] = mul nsw i64 [[STRIDE]], 0 +; CHECK-NEXT: [[OFFSET1:%.*]] = add nsw i64 [[OFFSET0]], 1 +; CHECK-NEXT: [[OFFSET2:%.*]] = add nsw i64 [[OFFSET0]], 2 ; CHECK-NEXT: [[OFFSET4:%.*]] = mul nsw i64 [[STRIDE]], 1 +; CHECK-NEXT: [[OFFSET6:%.*]] = add nsw i64 [[OFFSET4]], 2 +; CHECK-NEXT: [[OFFSET8:%.*]] = mul nsw i64 [[STRIDE]], 2 +; CHECK-NEXT: [[OFFSET10:%.*]] = add nsw i64 [[OFFSET8]], 2 +; CHECK-NEXT: [[OFFSET12:%.*]] = mul nsw i64 [[STRIDE]], 3 +; CHECK-NEXT: [[OFFSET14:%.*]] = add nsw i64 [[OFFSET12]], 2 +; CHECK-NEXT: [[OFFSET16:%.*]] = mul nsw i64 [[STRIDE]], 4 +; CHECK-NEXT: [[GEP_L1:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET1]] +; CHECK-NEXT: [[GEP_L2:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET2]] ; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET4]] +; CHECK-NEXT: [[GEP_L6:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET6]] +; CHECK-NEXT: [[GEP_L8:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET8]] +; CHECK-NEXT: [[GEP_L10:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET10]] +; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET12]] +; CHECK-NEXT: [[GEP_L14:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET14]] +; CHECK-NEXT: [[GEP_L16:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET16]] +; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[GEP_L1]], align 1 +; CHECK-NEXT: [[LOAD16:%.*]] = load i8, ptr [[GEP_L16]], align 1 ; CHECK-NEXT: [[GEP_S1:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 [[GEP_L4]], i64 [[TMP1]], <4 x i1> splat (i1 true), i32 4) -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[GEP_S1]], align 1 +; CHECK-NEXT: [[GEP_S2:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 2 +; CHECK-NEXT: [[GEP_S10:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 10 +; CHECK-NEXT: [[GEP_S14:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 14 +; CHECK-NEXT: [[GEP_S16:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 16 +; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[GEP_L2]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i8>, ptr [[GEP_L4]], align 1 +; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i8>, ptr [[GEP_L6]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i8>, ptr [[GEP_L8]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x i8> [[TMP2]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i8> [[TMP1]], <2 x i8> [[TMP2]], <8 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x i8> [[TMP3]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> [[TMP8]], <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i8> [[TMP4]], <2 x i8> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <8 x i8> [[TMP9]], <8 x i8> [[TMP10]], <8 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = load <2 x i8>, ptr [[GEP_L10]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i8>, ptr [[GEP_L12]], align 1 +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <2 x i8> [[TMP12]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i8> [[TMP13]], <2 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i8> [[TMP12]], <2 x i8> [[TMP13]], <4 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = load <2 x i8>, ptr [[GEP_L14]], align 1 +; CHECK-NEXT: store i8 [[LOAD1]], ptr [[GEP_S1]], align 1 +; CHECK-NEXT: store <8 x i8> [[TMP11]], ptr [[GEP_S2]], align 1 +; CHECK-NEXT: store <4 x i8> [[TMP16]], ptr [[GEP_S10]], align 1 +; CHECK-NEXT: store <2 x i8> [[TMP17]], ptr [[GEP_S14]], align 1 +; CHECK-NEXT: store i8 [[LOAD16]], ptr [[GEP_S16]], align 1 ; CHECK-NEXT: ret void ; %offset0 = mul nsw i64 %stride, 0 @@ -304,14 +344,33 @@ define void @rt_stride_widen_rotaten_1(ptr %pl, i64 %stride, ptr %ps) { ; CHECK-NEXT: [[OFFSET0:%.*]] = mul nsw i64 [[STRIDE]], 0 ; CHECK-NEXT: [[OFFSET1:%.*]] = add nsw i64 [[OFFSET0]], 1 ; CHECK-NEXT: [[OFFSET2:%.*]] = add nsw i64 [[OFFSET0]], 2 +; CHECK-NEXT: [[OFFSET3:%.*]] = add nsw i64 [[OFFSET0]], 3 ; CHECK-NEXT: [[OFFSET4:%.*]] = mul nsw i64 [[STRIDE]], 1 +; CHECK-NEXT: [[OFFSET12:%.*]] = mul nsw i64 [[STRIDE]], 3 +; CHECK-NEXT: [[OFFSET16:%.*]] = mul nsw i64 [[STRIDE]], 4 +; CHECK-NEXT: [[OFFSET18:%.*]] = add nsw i64 [[OFFSET16]], 2 +; CHECK-NEXT: [[GEP_L3:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET3]] ; CHECK-NEXT: [[GEP_L4:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET4]] +; CHECK-NEXT: [[GEP_L12:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET12]] +; CHECK-NEXT: [[GEP_L16:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET16]] +; CHECK-NEXT: [[GEP_L18:%.*]] = getelementptr inbounds i8, ptr [[PL]], i64 [[OFFSET18]] +; CHECK-NEXT: [[LOAD3:%.*]] = load i8, ptr [[GEP_L3]], align 1 +; CHECK-NEXT: [[LOAD18:%.*]] = load i8, ptr [[GEP_L18]], align 1 ; CHECK-NEXT: [[GEP_S3:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 3 +; CHECK-NEXT: [[GEP_S4:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 4 +; CHECK-NEXT: [[GEP_S12:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 12 +; CHECK-NEXT: [[GEP_S16:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 16 +; CHECK-NEXT: [[GEP_S18:%.*]] = getelementptr inbounds i8, ptr [[PS]], i64 18 ; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.experimental.vp.strided.load.v4i32.p0.i64(ptr align 1 [[GEP_L4]], i64 [[TMP1]], <4 x i1> splat (i1 true), i32 4) -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to <16 x i8> -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> poison, <16 x i32> -; CHECK-NEXT: store <16 x i8> [[TMP4]], ptr [[GEP_S3]], align 1 +; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i32> @llvm.experimental.vp.strided.load.v2i32.p0.i64(ptr align 1 [[GEP_L4]], i64 [[TMP1]], <2 x i1> splat (i1 true), i32 2) +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i8>, ptr [[GEP_L12]], align 1 +; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i8>, ptr [[GEP_L16]], align 1 +; CHECK-NEXT: store i8 [[LOAD3]], ptr [[GEP_S3]], align 1 +; CHECK-NEXT: store <8 x i8> [[TMP3]], ptr [[GEP_S4]], align 1 +; CHECK-NEXT: store <4 x i8> [[TMP4]], ptr [[GEP_S12]], align 1 +; CHECK-NEXT: store <2 x i8> [[TMP5]], ptr [[GEP_S16]], align 1 +; CHECK-NEXT: store i8 [[LOAD18]], ptr [[GEP_S18]], align 1 ; CHECK-NEXT: ret void ; %offset0 = mul nsw i64 %stride, 0 From 2c257f9b308099a8dd0a5902b5b48610053c0898 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Horv=C3=A1th?= Date: Tue, 23 Jun 2026 19:33:54 +0100 Subject: [PATCH 234/511] [LifetimeSafety] Model GNU statement expressions (#204841) --- .../Analyses/LifetimeSafety/FactsGenerator.h | 1 + .../LifetimeSafety/FactsGenerator.cpp | 15 ++++ clang/test/Sema/LifetimeSafety/safety.cpp | 70 +++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 5ac67263681ac..8dc5213dd8de2 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -57,6 +57,7 @@ class FactsGenerator : public ConstStmtVisitor { void VisitArraySubscriptExpr(const ArraySubscriptExpr *ASE); void VisitCXXNewExpr(const CXXNewExpr *NE); void VisitCXXDeleteExpr(const CXXDeleteExpr *DE); + void VisitStmtExpr(const StmtExpr *SE); private: OriginList *getOriginsList(const ValueDecl &D); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index d8c5679a80b38..a2341ebc8f2ed 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -801,6 +801,21 @@ void FactsGenerator::VisitCXXDeleteExpr(const CXXDeleteExpr *DE) { FactMgr.createFact(List->getOuterOriginID(), DE)); } +void FactsGenerator::VisitStmtExpr(const StmtExpr *SE) { + // A statement expression (`({ ...; e; })`) yields the value of its final + // expression `e`. Flow `e`'s origins into the statement expression's origin + // so a borrow `e` carries reaches the value's users. + const auto *CS = SE->getSubStmt(); + if (!CS || CS->body_empty()) + return; + const auto *Last = dyn_cast(CS->body_back()); + if (!Last) + return; + if (OriginList *Dst = getOriginsList(*SE)) + if (OriginList *Src = getRValueOrigins(Last, getOriginsList(*Last))) + flow(Dst, Src, /*Kill=*/true); +} + bool FactsGenerator::escapesViaReturn(OriginID OID) const { return llvm::any_of(EscapesInCurrentBlock, [OID](const Fact *F) { if (const auto *EF = F->getAs()) diff --git a/clang/test/Sema/LifetimeSafety/safety.cpp b/clang/test/Sema/LifetimeSafety/safety.cpp index b59fac191dcfb..bbb8fbe6fc6a9 100644 --- a/clang/test/Sema/LifetimeSafety/safety.cpp +++ b/clang/test/Sema/LifetimeSafety/safety.cpp @@ -3955,3 +3955,73 @@ struct [[gsl::Pointer()]] PtrWithInt { int x; }; PtrWithInt f() { return PtrWithInt{10}; } + +// A GNU statement expression (`({ ...; e; })`) yields the value of its final +// expression `e`. `e`'s origins flow into the statement expression's value, so +// a borrow `e` carries is tracked: a borrow of a body-local dangles, and a +// borrow forwarded from an outer object propagates to the value's users. +namespace statement_expression { +void use(int *p); + +// A borrow of a statement-expression-local escaping via the value. +void borrow_of_local() { + int *p = ({ int x = 7; &x; }); // expected-warning {{local variable 'x' does not live long enough}} expected-note {{local variable 'x' is destroyed here}} + use(p); // expected-note {{later used here}} +} + +// An outer borrow forwarded through a statement expression and returned: +// use-after-return. +int *return_borrow_of_local() { + int local = 0; + return ({ (void)0; &local; }); // expected-warning {{stack memory associated with local variable 'local' is returned}} expected-note {{returned here}} +} + +// A view bound to a temporary produced by the statement expression dangles. +void borrow_temporary() { + std::string_view view = ({ std::string x = "long enough heap string!!!!!!"; x; }); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} + (void)view; // expected-note {{later used here}} +} + +// Forwarding an outer borrow that dangles. +void forward_outer_borrow() { + int *p; + { + int local = 0; + p = ({ (void)0; &local; }); // expected-warning {{local variable 'local' does not live long enough}} + } // expected-note {{local variable 'local' is destroyed here}} + use(p); // expected-note {{later used here}} +} + +// The statement-expression result carries the borrow, so a `?:` sibling +// supplying a valid loan no longer hides it via the merge. +void masked(bool c) { + static int valid; + int *keep = &valid; + int *r; + { + int local = 0; + r = c ? keep : ({ &local; }); // expected-warning {{local variable 'local' does not live long enough}} + } // expected-note {{local variable 'local' is destroyed here}} + use(r); // expected-note {{later used here}} +} + +// Both conditional arms are statement expressions returning a borrow of a +// body-local; each is caught as a returned stack address. +int *conditional_arms(bool c) { + return c ? ({ int x = 7; &x; }) // expected-warning {{stack memory associated with local variable 'x' is returned}} expected-note 2 {{returned here}} + : ({ int y = 7; &y; }); // expected-warning {{stack memory associated with local variable 'y' is returned}} +} + +// Negative: a statement expression yielding a long-lived borrow stays silent. +void ok() { + static int s; + int *p = ({ int unused = 0; (void)unused; &s; }); + use(p); // no-warning +} + +// A discarded statement expression's value is not consumed, so a borrow of a +// body-local in it does not reach any user and is correctly not flagged. +void discarded_body_local() { + (void)({ int x = 7; &x; }); // no-warning +} +} // namespace statement_expression From 1529053db428531109c31f8a76b3aa6fb4e11d00 Mon Sep 17 00:00:00 2001 From: Kelvin Li Date: Tue, 23 Jun 2026 14:45:39 -0400 Subject: [PATCH 235/511] [dsymutil] Use more portable way to compare timestamp (NFC) (#204680) `find` on AIX does not support `-maxpath` option. This patch is to use python to compare the `mtime` of the file/directory. --- llvm/test/tools/dsymutil/X86/bundle-mtime.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/tools/dsymutil/X86/bundle-mtime.test b/llvm/test/tools/dsymutil/X86/bundle-mtime.test index cca0e355b7997..152753331cbd9 100644 --- a/llvm/test/tools/dsymutil/X86/bundle-mtime.test +++ b/llvm/test/tools/dsymutil/X86/bundle-mtime.test @@ -17,8 +17,8 @@ RUN: env TZ=GMT touch -t 200001010000 %t.dir/marker RUN: dsymutil -oso-prepend-path=%p/.. %t.dir/basic -## `find -maxdepth 0 -newer` prints the bundle path iff its mtime is newer -## than the marker. Without the mtime bump, the directory keeps its 1970 -## stamp and find prints nothing. -RUN: find %t.dir/basic.dSYM -maxdepth 0 -newer %t.dir/marker | FileCheck %s +## It prints the bundle path iff its mtime is newer than the marker. +## Without the mtime bump, the directory keeps its 1970 stamp and +## nothing is printed. +RUN: %python -c "import sys ; from pathlib import Path ; sys.exit(0 if Path(r'%t.dir/basic.dSYM').stat().st_mtime > Path(r'%t.dir/marker').stat().st_mtime else 1)" && echo "basic.dSYM" | FileCheck %s CHECK: basic.dSYM From bdf0caab704a7fd5e50bc519f34b7c09119af145 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 23 Jun 2026 20:57:28 +0200 Subject: [PATCH 236/511] [BasicAA] Allow some more recursion across GEPs/phis. (#205010) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allow recursive base-object analysis for some GEPs. The new version still retains some bail-outs to avoid some redundant work. This has a notable impact across a large IR corpus (32k modules from large set of C/C++ workloads). Some of the highlights include: aa.NumNoAlias +1.52% aa.NumMayAlias −0.10% licm.NumMovedLoads +20.47% licm.NumHoisted +2.03% early-cse.NumCSELoad +1.59% SLP.NumVectorInstructions +0.86% loop-vectorize.LoopsVectorized +0.21% instcount.TotalInsts −0.05% instcount.NumLoadInst −0.10% basicaa.SearchTimes +0.98% basicaa.SearchLimitReached +14.39% LV runtime alias check −1.3% Compile-time has 2 notable changes stage1-ReleaseThinLTO: +0.04% stage1-ReleaseLTO-g: +0.18% https://llvm-compile-time-tracker.com/compare.php?from=26ed0c17c0de84add0513a0e9699479f8c88b8fe&to=476e7466c5f543ac74c86bad728d79892f668f73&stat=instructions:u The stage1-ReleaseLTO-g regression in particular is almost exclusively due to tramp3d-v4, for which we also generate +1.18% more code, so the majority of the increase is coming from additional transforms/codegen, not the analysis itself. PR: https://github.com/llvm/llvm-project/pull/205010 --- llvm/lib/Analysis/BasicAliasAnalysis.cpp | 18 ++-- llvm/test/Analysis/BasicAA/phi-aa.ll | 3 +- llvm/test/Analysis/BasicAA/phi-and-select.ll | 3 +- llvm/test/Analysis/BasicAA/recphi.ll | 17 ++-- ...-interleave-to-widen-memory-derived-ivs.ll | 68 +++------------ .../LoopVectorize/ARM/pointer_iv.ll | 57 +++---------- .../LoopVectorize/RISCV/pointer-induction.ll | 47 ++--------- .../LoopVectorize/X86/gather_scatter.ll | 82 ++++++------------- .../exit-block-dominates-rt-check-block.ll | 8 +- 9 files changed, 82 insertions(+), 221 deletions(-) diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 43b1c412e81b9..7df62577e04db 100644 --- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -1124,14 +1124,20 @@ AliasResult BasicAAResult::aliasGEP( }; if (!V1Size.hasValue() && !V2Size.hasValue()) { - // TODO: This limitation exists for compile-time reasons. Relax it if we - // can avoid exponential pathological cases. - if (!isa(V2)) + // Skip if V2 is itself a phi or select, leave the recursive walk to + // aliasPHI/aliasSelect. + if (isa(V2)) return AliasResult::MayAlias; - // If both accesses have unknown size, we can only check whether the base - // objects don't alias. - return BaseObjectsAlias(); + // Otherwise check whether the base objects don't alias. Only do so if V2 + // is a GEP or an underlying object is a GEP/phi/select, which can be + // analyzed further. + if (isa(V2) || + isa(UnderlyingV1) || + isa(UnderlyingV2)) + return BaseObjectsAlias(); + + return AliasResult::MayAlias; } DominatorTree *DT = getDT(AAQI); diff --git a/llvm/test/Analysis/BasicAA/phi-aa.ll b/llvm/test/Analysis/BasicAA/phi-aa.ll index 3db63c083c268..d5d2142b2093d 100644 --- a/llvm/test/Analysis/BasicAA/phi-aa.ll +++ b/llvm/test/Analysis/BasicAA/phi-aa.ll @@ -155,8 +155,7 @@ loop: } ; CHECK-LABEL: phi_and_gep_unknown_size -; CHECK: Just Mod: call void @llvm.memset.p0.i32(ptr %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0.i32(ptr %z, i8 0, i32 %size, i1 false) -; TODO: This should be NoModRef. +; CHECK: NoModRef: call void @llvm.memset.p0.i32(ptr %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0.i32(ptr %z, i8 0, i32 %size, i1 false) define void @phi_and_gep_unknown_size(i1 %c, ptr %x, ptr %y, ptr noalias %z, i32 %size) { entry: br i1 %c, label %true, label %false diff --git a/llvm/test/Analysis/BasicAA/phi-and-select.ll b/llvm/test/Analysis/BasicAA/phi-and-select.ll index 0ab404b5f98eb..8588b4a71fc95 100644 --- a/llvm/test/Analysis/BasicAA/phi-and-select.ll +++ b/llvm/test/Analysis/BasicAA/phi-and-select.ll @@ -80,9 +80,8 @@ entry: } ; A gep off a select of two argument pointers does not alias a noalias argument. -; TODO: This should be NoModRef. ; CHECK-LABEL: Function: select_and_gep_unknown_size -; CHECK: Just Mod: call void @llvm.memset.p0.i32(ptr %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0.i32(ptr %z, i8 0, i32 %size, i1 false) +; CHECK: NoModRef: call void @llvm.memset.p0.i32(ptr %g, i8 0, i32 %size, i1 false) <-> call void @llvm.memset.p0.i32(ptr %z, i8 0, i32 %size, i1 false) define void @select_and_gep_unknown_size(i1 %c, ptr %x, ptr %y, ptr noalias %z, i32 %size) { entry: %p = select i1 %c, ptr %x, ptr %y diff --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll index 2c1e96a7c5603..dc55ca4e6f66d 100644 --- a/llvm/test/Analysis/BasicAA/recphi.ll +++ b/llvm/test/Analysis/BasicAA/recphi.ll @@ -242,11 +242,10 @@ exit: ret ptr %result } -; FIXME: %a and %p.inner do not alias. ; CHECK-LABEL: Function: nested_loop ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer -; CHECK: MayAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next ; CHECK: NoAlias: i8* %a, i8* %p.outer.next define void @nested_loop(i1 %c, i1 %c2, ptr noalias %p.base) { @@ -282,9 +281,8 @@ exit: ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer ; CHECK: NoAlias: i8* %a, i8* %p.outer.next -; CHECK: MayAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next -; TODO: (a, p.inner) could be NoAlias define void @nested_loop2(i1 %c, i1 %c2, ptr noalias %p.base) { entry: %a = alloca i8 @@ -314,11 +312,10 @@ exit: } ; Same as nested_loop, but with a plain pointer argument (no noalias). -; TODO: %a and %p.inner do not alias. ; CHECK-LABEL: Function: nested_loop_plain_arg ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p.outer -; CHECK: MayAlias: i8* %a, i8* %p.inner +; CHECK: NoAlias: i8* %a, i8* %p.inner ; CHECK: NoAlias: i8* %a, i8* %p.inner.next ; CHECK: NoAlias: i8* %a, i8* %p.outer.next define void @nested_loop_plain_arg(i1 %c, i1 %c2, ptr %p.base) { @@ -387,9 +384,8 @@ exit: ; CHECK: NoAlias: i8* %a, i8* %p.base ; CHECK: NoAlias: i8* %a, i8* %p1 ; CHECK: NoAlias: i8* %a, i8* %p1.next -; CHECK: MayAlias: i8* %a, i8* %p2 +; CHECK: NoAlias: i8* %a, i8* %p2 ; CHECK: NoAlias: i8* %a, i8* %p2.next -; TODO: %p2 does not alias %a define void @sibling_loop(i1 %c, i1 %c2, ptr noalias %p.base) { entry: %a = alloca i8 @@ -473,10 +469,11 @@ exit: ret void } -; TODO: %other and %p.inner do not alias. +; aliasGEP must leave the underlying-object check for a phi step GEP to +; aliasPHI, to avoid an overlapping walk that risks compile-time blow-up. ; CHECK-LABEL: Function: rec_phi_gep_guard ; CHECK: NoAlias: i8* %other, i8* %p.outer -; CHECK: MayAlias: i8* %other, i8* %p.inner +; CHECK: NoAlias: i8* %other, i8* %p.inner define void @rec_phi_gep_guard(i1 %c, i1 %c2, ptr noalias %base, ptr noalias %other) { entry: load i8, ptr %other diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll index 18e536178189e..d39a7b0c23ef8 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-derived-ivs.ll @@ -131,28 +131,14 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF2-LABEL: define void @derived_pointer_ivs( ; VF2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) { ; VF2-NEXT: [[ENTRY:.*:]] -; VF2-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64 -; VF2-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 ; VF2-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; VF2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; VF2-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16 -; VF2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]] +; VF2-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -16 +; VF2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]] ; VF2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 ; VF2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 -; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] -; VF2: [[VECTOR_MEMCHECK]]: -; VF2-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16 -; VF2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]] -; VF2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4 -; VF2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4 -; VF2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16 -; VF2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; VF2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] -; VF2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]] -; VF2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] -; VF2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; VF2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF2: [[VECTOR_PH]]: ; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 1 ; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -178,28 +164,14 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF2IC2-LABEL: define void @derived_pointer_ivs( ; VF2IC2-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) { ; VF2IC2-NEXT: [[ENTRY:.*:]] -; VF2IC2-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64 -; VF2IC2-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 ; VF2IC2-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; VF2IC2-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16 -; VF2IC2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]] +; VF2IC2-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -16 +; VF2IC2-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]] ; VF2IC2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 ; VF2IC2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; VF2IC2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 -; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] -; VF2IC2: [[VECTOR_MEMCHECK]]: -; VF2IC2-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16 -; VF2IC2-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]] -; VF2IC2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4 -; VF2IC2-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4 -; VF2IC2-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16 -; VF2IC2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; VF2IC2-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] -; VF2IC2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]] -; VF2IC2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] -; VF2IC2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; VF2IC2-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; VF2IC2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF2IC2: [[VECTOR_PH]]: ; VF2IC2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; VF2IC2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -230,28 +202,14 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF4-LABEL: define void @derived_pointer_ivs( ; VF4-SAME: ptr noalias [[A:%.*]], ptr noalias [[B:%.*]], ptr [[END:%.*]]) { ; VF4-NEXT: [[ENTRY:.*:]] -; VF4-NEXT: [[A5:%.*]] = ptrtoint ptr [[A]] to i64 -; VF4-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 ; VF4-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64 ; VF4-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 -; VF4-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -16 -; VF4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A5]] +; VF4-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -16 +; VF4-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[A2]] ; VF4-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 4 ; VF4-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; VF4-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4 -; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]] -; VF4: [[VECTOR_MEMCHECK]]: -; VF4-NEXT: [[TMP4:%.*]] = add i64 [[END1]], -16 -; VF4-NEXT: [[TMP5:%.*]] = sub i64 [[TMP4]], [[A2]] -; VF4-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 4 -; VF4-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 4 -; VF4-NEXT: [[TMP8:%.*]] = add i64 [[TMP7]], 16 -; VF4-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP8]] -; VF4-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[A]], i64 [[TMP8]] -; VF4-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[B]], [[SCEVGEP3]] -; VF4-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP]] -; VF4-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; VF4-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; VF4-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VF4: [[VECTOR_PH]]: ; VF4-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4 ; VF4-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] @@ -264,15 +222,15 @@ define void @derived_pointer_ivs(ptr noalias %a, ptr noalias %b, ptr %end) { ; VF4-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 4 ; VF4-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[OFFSET_IDX]] ; VF4-NEXT: [[NEXT_GEP6:%.*]] = getelementptr i8, ptr [[B]], i64 [[OFFSET_IDX]] -; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[NEXT_GEP]], align 8, !alias.scope [[META4:![0-9]+]] +; VF4-NEXT: [[WIDE_VEC:%.*]] = load <8 x double>, ptr [[NEXT_GEP]], align 8 ; VF4-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> ; VF4-NEXT: [[STRIDED_VEC8:%.*]] = shufflevector <8 x double> [[WIDE_VEC]], <8 x double> poison, <4 x i32> ; VF4-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[STRIDED_VEC]], <4 x double> [[STRIDED_VEC8]], <8 x i32> ; VF4-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> poison, <8 x i32> -; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP6]], align 8, !alias.scope [[META7:![0-9]+]], !noalias [[META4]] +; VF4-NEXT: store <8 x double> [[INTERLEAVED_VEC]], ptr [[NEXT_GEP6]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; VF4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] +; VF4-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; VF4-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] @@ -367,7 +325,7 @@ define void @narrow_with_uniform_add_and_gep(ptr noalias %p) { ; VF4-NEXT: store <8 x i64> [[INTERLEAVED_VEC]], ptr [[TMP1]], align 8 ; VF4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; VF4-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512 -; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] +; VF4-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; VF4: [[MIDDLE_BLOCK]]: ; VF4-NEXT: br label %[[EXIT:.*]] ; VF4: [[EXIT]]: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll index c6ad024df5c22..e5ed1f0d2b6a2 100644 --- a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll @@ -970,71 +970,34 @@ define hidden void @mult_ptr_iv(ptr noalias nocapture readonly %x, ptr noalias n ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[Z]], i32 3000 -; CHECK-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[X]], i32 3000 -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[Z]], [[SCEVGEP1]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[X]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[X]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[Z]], %[[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_MEMCHECK]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[X]], %[[VECTOR_MEMCHECK]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[Z]], %[[VECTOR_MEMCHECK]] ], [ [[PTR_IND6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <4 x i32> ; CHECK-NEXT: [[VECTOR_GEP3:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP3]], i32 1 -; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[VECTOR_GEP3]], <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28:![0-9]+]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[VECTOR_GEP3]], <4 x i1> splat (i1 true), <4 x i8> poison) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP3]], i32 2 -; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[TMP0]], <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] -; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[TMP1]], <4 x i1> splat (i1 true), <4 x i8> poison), !alias.scope [[META28]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[TMP0]], <4 x i1> splat (i1 true), <4 x i8> poison) +; CHECK-NEXT: [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0(<4 x ptr> align 1 [[TMP1]], <4 x i1> splat (i1 true), <4 x i8> poison) ; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], splat (i8 10) ; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER4]] ; CHECK-NEXT: [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER5]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i32 1 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP2]], <4 x ptr> align 1 [[VECTOR_GEP]], <4 x i1> splat (i1 true)), !alias.scope [[META31:![0-9]+]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP2]], <4 x ptr> align 1 [[VECTOR_GEP]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8, <4 x ptr> [[VECTOR_GEP]], i32 2 -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP3]], <4 x ptr> align 1 [[TMP5]], <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] -; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> align 1 [[TMP6]], <4 x i1> splat (i1 true)), !alias.scope [[META31]], !noalias [[META28]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP3]], <4 x ptr> align 1 [[TMP5]], <4 x i1> splat (i1 true)) +; CHECK-NEXT: call void @llvm.masked.scatter.v4i8.v4p0(<4 x i8> [[TMP4]], <4 x ptr> align 1 [[TMP6]], <4 x i1> splat (i1 true)) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 12 ; CHECK-NEXT: [[PTR_IND6]] = getelementptr i8, ptr [[POINTER_PHI2]], i32 12 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000 -; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP33:![0-9]+]] -; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br label %[[END:.*]] +; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: -; CHECK-NEXT: [[X_ADDR_050:%.*]] = phi ptr [ [[INCDEC_PTR2:%.*]], %[[FOR_BODY]] ], [ [[X]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[Z_ADDR_049:%.*]] = phi ptr [ [[INCDEC_PTR34:%.*]], %[[FOR_BODY]] ], [ [[Z]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[I_048:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[SCALAR_PH]] ] -; CHECK-NEXT: [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 1 -; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[X_ADDR_050]], align 1 -; CHECK-NEXT: [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 2 -; CHECK-NEXT: [[TMP9:%.*]] = load i8, ptr [[INCDEC_PTR]], align 1 -; CHECK-NEXT: [[INCDEC_PTR2]] = getelementptr inbounds i8, ptr [[X_ADDR_050]], i32 3 -; CHECK-NEXT: [[TMP10:%.*]] = load i8, ptr [[INCDEC_PTR1]], align 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP8]] to i32 -; CHECK-NEXT: [[MUL3:%.*]] = mul nuw nsw i32 [[CONV]], 10 -; CHECK-NEXT: [[CONV1:%.*]] = zext i8 [[TMP9]] to i32 -; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP10]] to i32 -; CHECK-NEXT: [[MUL4:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV1]] -; CHECK-NEXT: [[MUL5:%.*]] = mul nuw nsw i32 [[CONV]], [[CONV2]] -; CHECK-NEXT: [[MUL:%.*]] = trunc i32 [[MUL3]] to i8 -; CHECK-NEXT: [[MUL1:%.*]] = trunc i32 [[MUL4]] to i8 -; CHECK-NEXT: [[MUL2:%.*]] = trunc i32 [[MUL5]] to i8 -; CHECK-NEXT: [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 1 -; CHECK-NEXT: store i8 [[MUL]], ptr [[Z_ADDR_049]], align 1 -; CHECK-NEXT: [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 2 -; CHECK-NEXT: store i8 [[MUL1]], ptr [[INCDEC_PTR32]], align 1 -; CHECK-NEXT: [[INCDEC_PTR34]] = getelementptr inbounds i8, ptr [[Z_ADDR_049]], i32 3 -; CHECK-NEXT: store i8 [[MUL2]], ptr [[INCDEC_PTR33]], align 1 -; CHECK-NEXT: [[INC]] = add nuw i32 [[I_048]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 -; CHECK-NEXT: br i1 [[EXITCOND]], label %[[END]], label %[[FOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]] -; CHECK: [[END]]: ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll index 5047f0f669efa..7462d58d632ca 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pointer-induction.ll @@ -63,38 +63,24 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c ; CHECK-LABEL: define i1 @scalarize_ptr_induction( ; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], ptr noalias [[DST:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[START5:%.*]] = ptrtoint ptr [[START]] to i64 -; CHECK-NEXT: [[END4:%.*]] = ptrtoint ptr [[END]] to i64 ; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64 ; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64 ; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, ptr [[START]], i64 4 -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END4]], -12 -; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START5]] +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[END1]], -12 +; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[START2]] ; CHECK-NEXT: [[TMP2:%.*]] = udiv i64 [[TMP1]], 12 ; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; CHECK-NEXT: br label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DST]], i64 8 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[END1]], -12 -; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], [[START2]] -; CHECK-NEXT: [[TMP8:%.*]] = udiv i64 [[TMP7]], 12 -; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 12 -; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 8 -; CHECK-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP10]] -; CHECK-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DST]], [[SCEVGEP3]] -; CHECK-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[START]], [[SCEVGEP]] -; CHECK-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; CHECK-NEXT: br i1 [[FOUND_CONFLICT]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] -; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, ptr [[DST]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement poison, ptr [[END]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector [[BROADCAST_SPLATINSERT6]], poison, zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP3]], %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_MEMCHECK]] ], [ [[CURRENT_ITERATION_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], %[[VECTOR_MEMCHECK]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ [[TMP3]], %[[VECTOR_MEMCHECK]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = call @llvm.stepvector.nxv2i64() ; CHECK-NEXT: [[TMP14:%.*]] = mul [[TMP13]], splat (i64 12) ; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP14]] @@ -102,40 +88,25 @@ define i1 @scalarize_ptr_induction(ptr %start, ptr %end, ptr noalias %dst, i1 %c ; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[INDEX]], 12 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[SCEVGEP6]], i64 [[TMP12]] -; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP15]], i64 12, splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]] +; CHECK-NEXT: [[TMP18:%.*]] = call @llvm.experimental.vp.strided.load.nxv2i32.p0.i64(ptr align 4 [[TMP15]], i64 12, splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to ; CHECK-NEXT: [[TMP20:%.*]] = mul [[TMP19]], splat (i64 -7070675565921424023) ; CHECK-NEXT: [[TMP21:%.*]] = add [[TMP20]], splat (i64 -4) -; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[TMP21]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]), !alias.scope [[META6:![0-9]+]], !noalias [[META3]] +; CHECK-NEXT: call void @llvm.vp.scatter.nxv2i64.nxv2p0( [[TMP21]], align 1 [[BROADCAST_SPLAT]], splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[CURRENT_ITERATION_NEXT]] = add i64 [[TMP26]], [[INDEX]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP26]] ; CHECK-NEXT: [[TMP27:%.*]] = mul i64 12, [[TMP26]] ; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP27]] ; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP28]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: [[TMP30:%.*]] = getelementptr nusw i8, [[VECTOR_GEP]], i64 12 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq [[TMP30]], [[BROADCAST_SPLAT7]] ; CHECK-NEXT: [[TMP29:%.*]] = sub i64 [[TMP26]], 1 ; CHECK-NEXT: [[TMP25:%.*]] = extractelement [[TMP17]], i64 [[TMP29]] -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[START]], %[[SCALAR_PH]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ] -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR_IV]], i64 4 -; CHECK-NEXT: [[L:%.*]] = load i32, ptr [[GEP]], align 4 -; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[L]] to i64 -; CHECK-NEXT: [[UNUSED:%.*]] = load i32, ptr [[PTR_IV]], align 4 -; CHECK-NEXT: [[MUL1:%.*]] = mul i64 [[EXT]], -7070675565921424023 -; CHECK-NEXT: [[MUL2:%.*]] = add i64 [[MUL1]], -4 -; CHECK-NEXT: store i64 [[MUL2]], ptr [[DST]], align 1 -; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr nusw i8, ptr [[PTR_IV]], i64 12 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[PTR_IV_NEXT]], [[END]] -; CHECK-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: [[CMP_LCSSA:%.*]] = phi i1 [ [[CMP]], %[[LOOP]] ], [ [[TMP25]], %[[MIDDLE_BLOCK]] ] -; CHECK-NEXT: ret i1 [[CMP_LCSSA]] +; CHECK-NEXT: ret i1 [[TMP25]] ; entry: br label %loop diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll index 8561961802d92..0e46d4de9f915 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll @@ -632,7 +632,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-LABEL: @test_gather_not_profitable_pr48429( ; AVX512-NEXT: entry: ; AVX512-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; AVX512-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; AVX512-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; AVX512-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; AVX512-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[ITER_CHECK:%.*]] ; AVX512: iter.check: @@ -643,24 +643,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 ; AVX512-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; AVX512-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8 -; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; AVX512: vector.memcheck: -; AVX512-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; AVX512-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; AVX512-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 2 -; AVX512-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 -; AVX512-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 -; AVX512-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; AVX512-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 -; AVX512-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; AVX512-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] -; AVX512-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] -; AVX512-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; AVX512-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] -; AVX512-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] -; AVX512-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] -; AVX512-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] -; AVX512-NEXT: br i1 [[CONFLICT_RDX]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] ; AVX512: vector.main.loop.iter.check: ; AVX512-NEXT: [[MIN_ITERS_CHECK7:%.*]] = icmp ult i64 [[TMP3]], 16 ; AVX512-NEXT: br i1 [[MIN_ITERS_CHECK7]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] @@ -670,7 +653,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[TMP23:%.*]] = shl i64 [[N_VEC]], 2 ; AVX512-NEXT: [[IND_END12:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP23]] ; AVX512-NEXT: [[TMP13:%.*]] = shl i64 [[N_VEC]], 6 -; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP13]] +; AVX512-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP13]] ; AVX512-NEXT: br label [[VECTOR_BODY:%.*]] ; AVX512: vector.body: ; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -679,21 +662,21 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2 ; AVX512-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX]] ; AVX512-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] -; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP17]], align 4, !alias.scope [[META8:![0-9]+]] -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> align 4 [[TMP14]], <16 x i1> splat (i1 true)), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] -; AVX512-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x float>, ptr [[TMP16]], align 4, !alias.scope [[META15:![0-9]+]] +; AVX512-NEXT: [[WIDE_LOAD:%.*]] = load <16 x float>, ptr [[TMP17]], align 4 +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD]], <16 x ptr> align 4 [[TMP14]], <16 x i1> splat (i1 true)) +; AVX512-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x float>, ptr [[TMP16]], align 4 ; AVX512-NEXT: [[TMP20:%.*]] = getelementptr inbounds float, <16 x ptr> [[TMP14]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD6]], <16 x ptr> align 4 [[TMP20]], <16 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> [[WIDE_LOAD2]], <16 x ptr> align 4 [[TMP20]], <16 x i1> splat (i1 true)) ; AVX512-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; AVX512-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 1024 ; AVX512-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; AVX512-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; AVX512-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; AVX512: middle.block: ; AVX512-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; AVX512-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; AVX512: vec.epilog.iter.check: ; AVX512-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 8 -; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]] +; AVX512-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF9:![0-9]+]] ; AVX512: vec.epilog.ph: ; AVX512-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; AVX512-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[DEST]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -711,15 +694,15 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; AVX512-NEXT: [[OFFSET_IDX21:%.*]] = shl i64 [[INDEX18]], 2 ; AVX512-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[OFFSET_IDX21]] ; AVX512-NEXT: [[TMP29:%.*]] = getelementptr inbounds float, ptr [[TMP28]], i64 [[IDXPROM]] -; AVX512-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x float>, ptr [[TMP29]], align 4, !alias.scope [[META8]] -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD14]], <8 x ptr> align 4 [[TMP26]], <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] -; AVX512-NEXT: [[WIDE_LOAD15:%.*]] = load <8 x float>, ptr [[TMP28]], align 4, !alias.scope [[META15]] +; AVX512-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x float>, ptr [[TMP29]], align 4 +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD10]], <8 x ptr> align 4 [[TMP26]], <8 x i1> splat (i1 true)) +; AVX512-NEXT: [[WIDE_LOAD11:%.*]] = load <8 x float>, ptr [[TMP28]], align 4 ; AVX512-NEXT: [[TMP32:%.*]] = getelementptr inbounds float, <8 x ptr> [[TMP26]], i64 1 -; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD15]], <8 x ptr> align 4 [[TMP32]], <8 x i1> splat (i1 true)), !alias.scope [[META11]], !noalias [[META13]] +; AVX512-NEXT: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> [[WIDE_LOAD11]], <8 x ptr> align 4 [[TMP32]], <8 x i1> splat (i1 true)) ; AVX512-NEXT: [[INDEX_NEXT24]] = add nuw i64 [[INDEX18]], 8 ; AVX512-NEXT: [[PTR_IND20]] = getelementptr i8, ptr [[POINTER_PHI19]], i64 512 ; AVX512-NEXT: [[TMP33:%.*]] = icmp eq i64 [[INDEX_NEXT24]], [[N_VEC10]] -; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] +; AVX512-NEXT: br i1 [[TMP33]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; AVX512: vec.epilog.middle.block: ; AVX512-NEXT: [[CMP_N17:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC10]] ; AVX512-NEXT: br i1 [[CMP_N17]], label [[FOR_END_LOOPEXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -728,7 +711,7 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-LABEL: @test_gather_not_profitable_pr48429( ; FVW2-NEXT: entry: ; FVW2-NEXT: [[IDX_EXT:%.*]] = sext i32 [[D:%.*]] to i64 -; FVW2-NEXT: [[SCEVGEP1:%.*]] = getelementptr float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] +; FVW2-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds float, ptr [[PTR:%.*]], i64 [[IDX_EXT]] ; FVW2-NEXT: [[CMP_NOT10:%.*]] = icmp eq i32 [[D]], 0 ; FVW2-NEXT: br i1 [[CMP_NOT10]], label [[FOR_END:%.*]], label [[FOR_BODY_LR_PH:%.*]] ; FVW2: for.body.lr.ph: @@ -739,31 +722,14 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[TMP2:%.*]] = lshr i64 [[TMP1]], 2 ; FVW2-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1 ; FVW2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2 -; FVW2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]] -; FVW2: vector.memcheck: -; FVW2-NEXT: [[TMP4:%.*]] = shl nsw i64 [[IDX_EXT]], 2 -; FVW2-NEXT: [[TMP5:%.*]] = add nsw i64 [[TMP4]], -4 -; FVW2-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 2 -; FVW2-NEXT: [[TMP7:%.*]] = shl i64 [[TMP6]], 6 -; FVW2-NEXT: [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 8 -; FVW2-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP8]] -; FVW2-NEXT: [[TMP11:%.*]] = mul nsw i64 [[IDX_EXT]], -4 -; FVW2-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP11]] -; FVW2-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[DEST]], [[SCEVGEP1]] -; FVW2-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[PTR]], [[SCEVGEP]] -; FVW2-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] -; FVW2-NEXT: [[BOUND04:%.*]] = icmp ult ptr [[DEST]], [[PTR]] -; FVW2-NEXT: [[BOUND15:%.*]] = icmp ult ptr [[SCEVGEP2]], [[SCEVGEP]] -; FVW2-NEXT: [[FOUND_CONFLICT6:%.*]] = and i1 [[BOUND04]], [[BOUND15]] -; FVW2-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT6]] -; FVW2-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; FVW2-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; FVW2: vector.ph: ; FVW2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2 ; FVW2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]] ; FVW2-NEXT: [[TMP13:%.*]] = shl i64 [[N_VEC]], 2 ; FVW2-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[TMP13]] ; FVW2-NEXT: [[TMP14:%.*]] = shl i64 [[N_VEC]], 6 -; FVW2-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP14]] +; FVW2-NEXT: [[IND_END7:%.*]] = getelementptr i8, ptr [[DEST:%.*]], i64 [[TMP14]] ; FVW2-NEXT: br label [[VECTOR_BODY:%.*]] ; FVW2: vector.body: ; FVW2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -774,21 +740,21 @@ define void @test_gather_not_profitable_pr48429(i32 %d, ptr readonly noalias %pt ; FVW2-NEXT: [[TMP19:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[OFFSET_IDX9]] ; FVW2-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DEST]], i64 [[TMP18]] ; FVW2-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[TMP16]], i64 [[IDXPROM]] -; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4, !alias.scope [[META8:![0-9]+]] +; FVW2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x float>, ptr [[TMP21]], align 4 ; FVW2-NEXT: [[TMP23:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 0 ; FVW2-NEXT: [[TMP17:%.*]] = extractelement <2 x float> [[WIDE_LOAD]], i64 1 -; FVW2-NEXT: store float [[TMP23]], ptr [[TMP19]], align 4, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]] -; FVW2-NEXT: store float [[TMP17]], ptr [[TMP20]], align 4, !alias.scope [[META11]], !noalias [[META13]] -; FVW2-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP16]], align 4, !alias.scope [[META15:![0-9]+]] +; FVW2-NEXT: store float [[TMP23]], ptr [[TMP19]], align 4 +; FVW2-NEXT: store float [[TMP17]], ptr [[TMP20]], align 4 +; FVW2-NEXT: [[WIDE_LOAD8:%.*]] = load <2 x float>, ptr [[TMP16]], align 4 ; FVW2-NEXT: [[TMP24:%.*]] = extractelement <2 x float> [[WIDE_LOAD8]], i64 0 ; FVW2-NEXT: [[TMP26:%.*]] = extractelement <2 x float> [[WIDE_LOAD8]], i64 1 ; FVW2-NEXT: [[TMP25:%.*]] = getelementptr inbounds float, ptr [[TMP19]], i64 1 ; FVW2-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[TMP20]], i64 1 -; FVW2-NEXT: store float [[TMP24]], ptr [[TMP25]], align 4, !alias.scope [[META11]], !noalias [[META13]] -; FVW2-NEXT: store float [[TMP26]], ptr [[TMP22]], align 4, !alias.scope [[META11]], !noalias [[META13]] +; FVW2-NEXT: store float [[TMP24]], ptr [[TMP25]], align 4 +; FVW2-NEXT: store float [[TMP26]], ptr [[TMP22]], align 4 ; FVW2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; FVW2-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] -; FVW2-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] +; FVW2-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; FVW2: middle.block: ; FVW2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]] ; FVW2-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] diff --git a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll index d8c05f4b05c7c..908d90080e323 100644 --- a/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll +++ b/llvm/test/Transforms/LoopVersioning/exit-block-dominates-rt-check-block.ll @@ -12,7 +12,7 @@ @c1 = external global i16 -define void @f(i16 %a) { +define void @f(i16 %a, ptr %p) { br label %bb0 bb0: @@ -24,12 +24,14 @@ bb1: bb2: %tmp2 = phi i16 [ %tmp1, %bb1 ], [ %tmp3, %bb2 ] - %tmp4 = getelementptr inbounds [1 x i32], ptr undef, i32 0, i32 4 + %iv = phi i32 [ 0, %bb1 ], [ %iv.next, %bb2 ] + %tmp4 = getelementptr inbounds i32, ptr %p, i32 %iv store i32 1, ptr %tmp4 - %tmp5 = getelementptr inbounds [1 x i32], ptr undef, i32 0, i32 9 + %tmp5 = getelementptr inbounds i32, ptr %p, i32 4 store i32 0, ptr %tmp5 %tmp3 = add i16 %tmp2, 1 store i16 %tmp2, ptr @c1 + %iv.next = add i32 %iv, 1 %tmp6 = icmp sle i16 %tmp3, 0 br i1 %tmp6, label %bb2, label %bb0 } From 579249af7253a833bd1f68f20030d1dccb82f6ec Mon Sep 17 00:00:00 2001 From: yueshe Date: Tue, 23 Jun 2026 15:04:31 -0400 Subject: [PATCH 237/511] [SystemZ] Add missing asserts requirement for pre-RA sched mir tests (#205403) This is based off https://github.com/llvm/llvm-project/pull/188823 and is needed because tests are failing in release (non-asserts) builds --- llvm/test/CodeGen/SystemZ/misched-prera-loads.mir | 1 + llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir b/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir index ef4b3a19cb781..af38362fd199a 100644 --- a/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir +++ b/llvm/test/CodeGen/SystemZ/misched-prera-loads.mir @@ -1,6 +1,7 @@ # RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z17 -verify-machineinstrs \ # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler \ # RUN: 2>&1 | FileCheck %s +# REQUIRES: asserts --- | diff --git a/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir b/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir index 048c7fa203e07..49dd546929daf 100644 --- a/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir +++ b/llvm/test/CodeGen/SystemZ/misched-prera-pdiffs.mir @@ -1,6 +1,7 @@ # RUN: llc -o - %s -mtriple=s390x-linux-gnu -mcpu=z16 -verify-machineinstrs \ # RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler 2>&1\ # RUN: | FileCheck %s +# REQUIRES: asserts # Some tests for Pressure Diffs of scheduling units. Each interesting register # class is used in a def-use sequence and the initial Pressure Diff of each SU From 4c96d29f0132ae5f2a6fee3cb52205acb0d91a5d Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Tue, 23 Jun 2026 12:08:43 -0700 Subject: [PATCH 238/511] [clang] Fix incorrect filenames for hardlinks (#189475) Previously hardlinked files would be discovered based on their inode, leading to incorrect filepaths printed in some cases. Fixes https://github.com/llvm/llvm-project/issues/26953 Fixes https://github.com/llvm/llvm-project/issues/58726 Related https://reviews.llvm.org/D137304 Assisted-by: codex --- clang/include/clang/Basic/SourceManager.h | 6 + clang/lib/Basic/SourceManager.cpp | 60 ++++++--- clang/test/DebugInfo/Generic/macro.c | 4 +- .../Preprocessor/hardlink-include-names.c | 42 ++++++ clang/unittests/Basic/SourceManagerTest.cpp | 127 +++++++++++++++++- 5 files changed, 220 insertions(+), 19 deletions(-) create mode 100644 clang/test/Preprocessor/hardlink-include-names.c diff --git a/clang/include/clang/Basic/SourceManager.h b/clang/include/clang/Basic/SourceManager.h index 4217b8683da1e..1939d1aa4915e 100644 --- a/clang/include/clang/Basic/SourceManager.h +++ b/clang/include/clang/Basic/SourceManager.h @@ -714,6 +714,12 @@ class SourceManager : public RefCountedBase { /// as they do not refer to a file. std::vector MemBufferInfos; + /// Per-FileID content caches for aliased file references. + /// + /// These caches preserve the spelling used for a particular include while + /// sharing file contents with the canonical cache in \c FileInfos. + std::vector FileIDContentCaches; + /// The table of SLocEntries that are local to this module. /// /// Positive FileIDs are indexes into this table. Entry 0 indicates an invalid diff --git a/clang/lib/Basic/SourceManager.cpp b/clang/lib/Basic/SourceManager.cpp index b6cc6ec9365f5..5540aade05ef5 100644 --- a/clang/lib/Basic/SourceManager.cpp +++ b/clang/lib/Basic/SourceManager.cpp @@ -30,6 +30,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -37,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +49,20 @@ using llvm::MemoryBuffer; #define DEBUG_TYPE "source-manager" +static SrcMgr::ContentCache *cloneContentCache(llvm::BumpPtrAllocator &Alloc, + const ContentCache &Other) { + auto *Clone = new (Alloc.Allocate()) ContentCache; + Clone->OrigEntry = Other.OrigEntry; + Clone->ContentsEntry = Other.ContentsEntry; + Clone->Filename = Other.Filename; + Clone->BufferOverridden = Other.BufferOverridden; + Clone->IsFileVolatile = Other.IsFileVolatile; + Clone->IsTransient = Other.IsTransient; + Clone->IsBufferInvalid = Other.IsBufferInvalid; + Clone->setUnownedBuffer(Other.getBufferIfLoaded()); + return Clone; +} + // Reaching a limit of 2^31 results in a hard error. This metric allows to track // if particular invocation of the compiler is close to it. STATISTIC(MaxUsedSLocBytes, "Maximum number of bytes used by source locations " @@ -324,9 +340,23 @@ SourceManager::~SourceManager() { ContentCacheAlloc.Deallocate(I->second); } } + for (unsigned i = 0, e = FileIDContentCaches.size(); i != e; ++i) { + if (FileIDContentCaches[i]) { + FileIDContentCaches[i]->~ContentCache(); + ContentCacheAlloc.Deallocate(FileIDContentCaches[i]); + } + } } void SourceManager::clearIDTables() { + for (unsigned i = 0, e = FileIDContentCaches.size(); i != e; ++i) { + if (FileIDContentCaches[i]) { + FileIDContentCaches[i]->~ContentCache(); + ContentCacheAlloc.Deallocate(FileIDContentCaches[i]); + } + } + FileIDContentCaches.clear(); + MainFileID = FileID(); LocalSLocEntryTable.clear(); LocalLocOffsetTable.clear(); @@ -361,17 +391,6 @@ bool SourceManager::isMainFile(const FileEntry &SourceFile) { void SourceManager::initializeForReplay(const SourceManager &Old) { assert(MainFileID.isInvalid() && "expected uninitialized SourceManager"); - auto CloneContentCache = [&](const ContentCache *Cache) -> ContentCache * { - auto *Clone = new (ContentCacheAlloc.Allocate()) ContentCache; - Clone->OrigEntry = Cache->OrigEntry; - Clone->ContentsEntry = Cache->ContentsEntry; - Clone->BufferOverridden = Cache->BufferOverridden; - Clone->IsFileVolatile = Cache->IsFileVolatile; - Clone->IsTransient = Cache->IsTransient; - Clone->setUnownedBuffer(Cache->getBufferIfLoaded()); - return Clone; - }; - // Ensure all SLocEntries are loaded from the external source. for (unsigned I = 0, N = Old.LoadedSLocEntryTable.size(); I != N; ++I) if (!Old.SLocEntryLoaded[I]) @@ -382,7 +401,7 @@ void SourceManager::initializeForReplay(const SourceManager &Old) { SrcMgr::ContentCache *&Slot = FileInfos[FileInfo.first]; if (Slot) continue; - Slot = CloneContentCache(FileInfo.second); + Slot = cloneContentCache(ContentCacheAlloc, *FileInfo.second); } } @@ -542,14 +561,22 @@ FileID SourceManager::createFileID(FileEntryRef SourceFile, SourceLocation::UIntTy LoadedOffset) { SrcMgr::ContentCache &IR = getOrCreateContentCache(SourceFile, isSystem(FileCharacter)); + SrcMgr::ContentCache *Cache = &IR; + StringRef Filename = SourceFile.getName(); + + if (IR.OrigEntry && !IR.OrigEntry->isSameRef(SourceFile)) { + Cache = cloneContentCache(ContentCacheAlloc, IR); + Cache->OrigEntry = SourceFile; + FileIDContentCaches.push_back(Cache); + } // If this is a named pipe, immediately load the buffer to ensure subsequent // calls to ContentCache::getSize() are accurate. - if (IR.ContentsEntry->isNamedPipe()) - (void)IR.getBufferOrNone(Diag, getFileManager(), SourceLocation()); + if (Cache->ContentsEntry->isNamedPipe()) + (void)Cache->getBufferOrNone(Diag, getFileManager(), SourceLocation()); - return createFileIDImpl(IR, SourceFile.getName(), IncludePos, FileCharacter, - LoadedID, LoadedOffset); + return createFileIDImpl(*Cache, Filename, IncludePos, FileCharacter, LoadedID, + LoadedOffset); } /// Create a new FileID that represents the specified memory buffer. @@ -2310,6 +2337,7 @@ SourceManager::MemoryBufferSizes SourceManager::getMemoryBufferSizes() const { size_t SourceManager::getDataStructureSizes() const { size_t size = llvm::capacity_in_bytes(MemBufferInfos) + + llvm::capacity_in_bytes(FileIDContentCaches) + llvm::capacity_in_bytes(LocalSLocEntryTable) + llvm::capacity_in_bytes(LoadedSLocEntryTable) + llvm::capacity_in_bytes(SLocEntryLoaded) + diff --git a/clang/test/DebugInfo/Generic/macro.c b/clang/test/DebugInfo/Generic/macro.c index de8d1ab915d7e..72d8dc4f9fdd5 100644 --- a/clang/test/DebugInfo/Generic/macro.c +++ b/clang/test/DebugInfo/Generic/macro.c @@ -39,13 +39,13 @@ // CHECK: [[UndefA]] = !DIMacro(type: DW_MACINFO_undef, line: 11, name: "A") // CHECK: [[DefineD1]] = !DIMacro(type: DW_MACINFO_define, line: 15, name: "D1", value: "1") -// CHECK: [[FileInclude1]] = !DIMacroFile(line: 16, file: [[HeaderFile]], nodes: [[N3:![0-9]+]]) +// CHECK: [[FileInclude1]] = !DIMacroFile(line: 16, file: [[SourceIncludeHeaderFile:![0-9]+]], nodes: [[N3:![0-9]+]]) // CHECK: [[N3]] = !{[[DefineAx:![0-9]+]], [[UndefA]]} // CHECK: [[DefineAx]] = !DIMacro(type: DW_MACINFO_define, line: 3, name: "A(x,y,z)", value: "(x)") // CHECK: [[UndefD1]] = !DIMacro(type: DW_MACINFO_undef, line: 17, name: "D1") // CHECK: [[DefineD2]] = !DIMacro(type: DW_MACINFO_define, line: 18, name: "D2", value: "2") -// CHECK: [[FileInclude2]] = !DIMacroFile(line: 19, file: [[HeaderFile]], nodes: [[N4:![0-9]+]]) +// CHECK: [[FileInclude2]] = !DIMacroFile(line: 19, file: [[SourceIncludeHeaderFile]], nodes: [[N4:![0-9]+]]) // CHECK: [[N4]] = !{[[DefineAy:![0-9]+]], [[UndefA]]} // CHECK: [[DefineAy]] = !DIMacro(type: DW_MACINFO_define, line: 7, name: "A(x,y,z)", value: "(y)") // CHECK: [[UndefD2]] = !DIMacro(type: DW_MACINFO_undef, line: 20, name: "D2") diff --git a/clang/test/Preprocessor/hardlink-include-names.c b/clang/test/Preprocessor/hardlink-include-names.c new file mode 100644 index 0000000000000..675ba30438ce9 --- /dev/null +++ b/clang/test/Preprocessor/hardlink-include-names.c @@ -0,0 +1,42 @@ +// Test that symlinked and hardlinked files are reported with their correct +// filenames +// +// REQUIRES: symlinks +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: ln %t/foo.inc %t/bar.inc +// RUN: ln -s %t/foo.inc %t/baz.inc + +// Test 1: -E should show both filenames. +// RUN: %clang_cc1 -E %t/main.c -o - | FileCheck --check-prefix=PP %s +// PP: # 1 "{{.*(/|\\\\)}}foo.inc" 1 +// PP: # 1 "{{.*(/|\\\\)}}bar.inc" 1 +// PP: # 1 "{{.*(/|\\\\)}}baz.inc" 1 + +// Test 2: .d should list both filenames. +// RUN: %clang_cc1 -dependency-file %t/deps.d -MT main.o %t/main.c -fsyntax-only +// RUN: FileCheck --check-prefix=DEPS -input-file=%t/deps.d %s +// DEPS: foo.inc +// DEPS: bar.inc +// DEPS: baz.inc + +// Test 3: --show-includes should list both filenames. +// RUN: %clang_cc1 --show-includes -o /dev/null %t/main.c | \ +// RUN: FileCheck --check-prefix=SHOW %s +// SHOW: Note: including file: {{.*}}foo.inc +// SHOW: Note: including file: {{.*}}bar.inc +// SHOW: Note: including file: {{.*}}baz.inc + +//--- main.c +const char *a = +#include "foo.inc" +; +const char *b = +#include "bar.inc" +; +const char *c = +#include "baz.inc" +; + +//--- foo.inc +"contents" diff --git a/clang/unittests/Basic/SourceManagerTest.cpp b/clang/unittests/Basic/SourceManagerTest.cpp index 7c8aae5c5834f..e45d23265ea34 100644 --- a/clang/unittests/Basic/SourceManagerTest.cpp +++ b/clang/unittests/Basic/SourceManagerTest.cpp @@ -22,6 +22,7 @@ #include "llvm/Config/llvm-config.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" +#include "llvm/Support/VirtualFileSystem.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -42,14 +43,26 @@ namespace { class SourceManagerTest : public ::testing::Test { protected: SourceManagerTest() - : FileMgr(FileMgrOpts), + : FileMgr(FileMgrOpts, FS), Diags(DiagnosticIDs::create(), DiagOpts, new IgnoringDiagConsumer()), SourceMgr(Diags, FileMgr), TargetOpts(new TargetOptions) { TargetOpts->Triple = "x86_64-apple-darwin11.1.0"; Target = TargetInfo::CreateTargetInfo(Diags, *TargetOpts); } + void AddFile(StringRef Path) { + ASSERT_TRUE(FS->addFile(Path, /*ModificationTime=*/0, + llvm::MemoryBuffer::getMemBuffer("x\n"))); + } + + void AddHardLink(StringRef NewLink, StringRef Target) { + ASSERT_TRUE(FS->addHardLink(NewLink, Target)); + } + FileSystemOptions FileMgrOpts; + IntrusiveRefCntPtr FS = + llvm::makeIntrusiveRefCnt( + /*UseNormalizedPaths=*/false); FileManager FileMgr; DiagnosticOptions DiagOpts; DiagnosticsEngine Diags; @@ -417,6 +430,118 @@ TEST_F(SourceManagerTest, getLineNumber) { ASSERT_NO_FATAL_FAILURE(SourceMgr.getLineNumber(mainFileID, 1, nullptr)); } +TEST_F(SourceManagerTest, aliasedFilesKeepRequestedNamesPerFileID) { +#ifdef _WIN32 + constexpr StringRef FooPath = "C:\\dir\\foo.h"; + constexpr StringRef BarPath = "C:\\dir\\bar.h"; +#else + constexpr StringRef FooPath = "/dir/foo.h"; + constexpr StringRef BarPath = "/dir/bar.h"; +#endif + + AddFile(FooPath); + AddHardLink(BarPath, FooPath); + + auto FooOrErr = FileMgr.getFileRef(FooPath); + auto BarOrErr = FileMgr.getFileRef(BarPath); + ASSERT_TRUE(static_cast(FooOrErr)); + ASSERT_TRUE(static_cast(BarOrErr)); + + FileEntryRef Foo = *FooOrErr; + FileEntryRef Bar = *BarOrErr; + EXPECT_FALSE(Foo.isSameRef(Bar)); + EXPECT_EQ(Foo, Bar); + + SourceMgr.overrideFileContents(Foo, llvm::MemoryBuffer::getMemBuffer("x\n")); + + FileID FooID = SourceMgr.createFileID(Foo, SourceLocation(), SrcMgr::C_User); + FileID BarID = SourceMgr.createFileID(Bar, SourceLocation(), SrcMgr::C_User); + + SourceLocation FooLoc = SourceMgr.getLocForStartOfFile(FooID); + SourceLocation BarLoc = SourceMgr.getLocForStartOfFile(BarID); + + EXPECT_EQ(FooPath, SourceMgr.getFilename(FooLoc)); + EXPECT_EQ(BarPath, SourceMgr.getFilename(BarLoc)); + EXPECT_STREQ(FooPath.data(), SourceMgr.getPresumedLoc(FooLoc).getFilename()); + EXPECT_STREQ(BarPath.data(), SourceMgr.getPresumedLoc(BarLoc).getFilename()); +} + +TEST_F(SourceManagerTest, dotPathSpellingsKeepRequestedNamesPerFileID) { +#ifdef _WIN32 + constexpr StringRef FooPath = "C:\\dir\\foo.h"; + constexpr StringRef DotFooPath = "C:\\.\\dir\\foo.h"; +#else + constexpr StringRef FooPath = "/dir/foo.h"; + constexpr StringRef DotFooPath = "/./dir/foo.h"; +#endif + + AddFile(FooPath); + AddHardLink(DotFooPath, FooPath); + + auto FooOrErr = FileMgr.getFileRef(FooPath); + auto DotFooOrErr = FileMgr.getFileRef(DotFooPath); + ASSERT_TRUE(static_cast(FooOrErr)); + ASSERT_TRUE(static_cast(DotFooOrErr)); + + FileEntryRef Foo = *FooOrErr; + FileEntryRef DotFoo = *DotFooOrErr; + EXPECT_FALSE(Foo.isSameRef(DotFoo)); + EXPECT_EQ(Foo, DotFoo); + + SourceMgr.overrideFileContents(Foo, llvm::MemoryBuffer::getMemBuffer("x\n")); + + FileID FooID = SourceMgr.createFileID(Foo, SourceLocation(), SrcMgr::C_User); + FileID DotFooID = + SourceMgr.createFileID(DotFoo, SourceLocation(), SrcMgr::C_User); + + SourceLocation FooLoc = SourceMgr.getLocForStartOfFile(FooID); + SourceLocation DotFooLoc = SourceMgr.getLocForStartOfFile(DotFooID); + + EXPECT_EQ(FooPath, SourceMgr.getFilename(FooLoc)); + EXPECT_EQ(DotFooPath, SourceMgr.getFilename(DotFooLoc)); + EXPECT_STREQ(FooPath.data(), SourceMgr.getPresumedLoc(FooLoc).getFilename()); + EXPECT_STREQ(DotFooPath.data(), + SourceMgr.getPresumedLoc(DotFooLoc).getFilename()); + EXPECT_EQ(DotFooPath, *SourceMgr.getNonBuiltinFilenameForID(DotFooID)); +} + +TEST_F(SourceManagerTest, dotDotPathSpellingsKeepRequestedNamesPerFileID) { +#ifdef _WIN32 + constexpr StringRef BPath = "C:\\a\\b\\..\\c.h"; + constexpr StringRef XPath = "C:\\a\\x\\..\\c.h"; +#else + constexpr StringRef BPath = "/a/b/../c.h"; + constexpr StringRef XPath = "/a/x/../c.h"; +#endif + + AddFile(BPath); + AddHardLink(XPath, BPath); + + auto BOrErr = FileMgr.getFileRef(BPath); + auto XOrErr = FileMgr.getFileRef(XPath); + ASSERT_TRUE(static_cast(BOrErr)); + ASSERT_TRUE(static_cast(XOrErr)); + + FileEntryRef B = *BOrErr; + FileEntryRef X = *XOrErr; + EXPECT_FALSE(B.isSameRef(X)); + EXPECT_EQ(B, X); + + SourceMgr.overrideFileContents(B, llvm::MemoryBuffer::getMemBuffer("x\n")); + + FileID BID = SourceMgr.createFileID(B, SourceLocation(), SrcMgr::C_User); + FileID XID = SourceMgr.createFileID(X, SourceLocation(), SrcMgr::C_User); + + SourceLocation BLoc = SourceMgr.getLocForStartOfFile(BID); + SourceLocation XLoc = SourceMgr.getLocForStartOfFile(XID); + + EXPECT_EQ(BPath, SourceMgr.getFilename(BLoc)); + EXPECT_EQ(XPath, SourceMgr.getFilename(XLoc)); + EXPECT_STREQ(BPath.data(), SourceMgr.getPresumedLoc(BLoc).getFilename()); + EXPECT_STREQ(XPath.data(), SourceMgr.getPresumedLoc(XLoc).getFilename()); + EXPECT_EQ(XPath, *SourceMgr.getNonBuiltinFilenameForID(XID)); +} + struct FakeExternalSLocEntrySource : ExternalSLocEntrySource { bool ReadSLocEntry(int ID) override { return {}; } int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override { return 0; } From 7028678fdd6c1f8795121c247e8872ff1aee2ac0 Mon Sep 17 00:00:00 2001 From: Andy Kaylor Date: Tue, 23 Jun 2026 12:15:46 -0700 Subject: [PATCH 239/511] [CIR] Implement support for emitting label address constants (#203644) The evalloop.c test in the llvm-test-suite single source tests contains a static array that is initialized with the address of labels within the enclosing function. This wasn't implemented in CIR. This change adds an implementation. The constant emitter change was trivial. We just needed to create a #cir.block_addr_info attribute. However, using that attribute as an initializer for a global requires some additional handling and special lowering for the initializer. The goto solver also needed to be updated to consider uses of labels in global initializers. The test case here was copied over directly from classic codegen. The original test has an additional test case for the difference between two label addresses. Support for that case will be added in a future change. Assisted-by: Cursor / claude-opus-4.8 --- .../include/clang/CIR/Dialect/IR/CIRAttrs.td | 21 +++-- clang/include/clang/CIR/Dialect/IR/CIROps.td | 3 + clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 8 +- clang/lib/CIR/CodeGen/CIRGenStmt.cpp | 13 +++- clang/lib/CIR/Dialect/IR/CIRDialect.cpp | 11 ++- .../CIR/Dialect/Transforms/CXXABILowering.cpp | 6 ++ .../lib/CIR/Dialect/Transforms/GotoSolver.cpp | 31 +++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 59 +++++++++++--- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 8 +- clang/test/CIR/CodeGen/const-label-addr.c | 78 +++++++++++++++++++ 10 files changed, 202 insertions(+), 36 deletions(-) create mode 100644 clang/test/CIR/CodeGen/const-label-addr.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td index 356fac33a5733..f5f4f28f8993c 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td +++ b/clang/include/clang/CIR/Dialect/IR/CIRAttrs.td @@ -1469,25 +1469,32 @@ def CIR_UnwindAttr : CIR_UnitAttr<"Unwind", "unwind"> { // CIR_BlockAddrInfoAttr //===----------------------------------------------------------------------===// -def CIR_BlockAddrInfoAttr : CIR_Attr<"BlockAddrInfo", "block_addr_info"> { - let summary = "Block Addres attribute"; +def CIR_BlockAddrInfoAttr + : CIR_ValueLikeAttr<"BlockAddrInfo", "block_addr_info"> { + let summary = "Block address attribute"; let description = [{ This attribute is used to represent the address of a basic block within a function. It combines the symbol reference to a function with the name of a label inside that function. }]; - let parameters = (ins "mlir::FlatSymbolRefAttr":$func, - "mlir::StringAttr":$label); + let parameters = (ins + AttributeSelfTypeParameter< + "", "cir::PointerType", + "cir::PointerType::get(cir::VoidType::get($_ctxt))">:$type, + "mlir::FlatSymbolRefAttr":$func, + "mlir::StringAttr":$label); let assemblyFormat = "`<` $func `,` $label `>`"; let builders = [ AttrBuilder<(ins "llvm::StringRef":$func_name, - "llvm::StringRef":$label_name - ), [{ - return $_get($_ctxt, mlir::FlatSymbolRefAttr::get($_ctxt, func_name), + "llvm::StringRef":$label_name), [{ + return $_get($_ctxt, + cir::PointerType::get(cir::VoidType::get($_ctxt)), + mlir::FlatSymbolRefAttr::get($_ctxt, func_name), mlir::StringAttr::get($_ctxt, label_name)); }]> ]; + let canHaveIllegalCXXABIType = 0; } diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index f48c5c7ee5209..355d4cb047a04 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -3177,6 +3177,9 @@ def CIR_GlobalOp : CIR_Op<"global", [ mlir::SymbolRefAttr getComdatAttr(cir::GlobalOp &op, mlir::OpBuilder &builder) const; }]; + + let customLLVMLoweringConstructorDecl = + LoweringBuilders<(ins "LLVMBlockAddressInfo &":$blockInfoAddr)>; } //===----------------------------------------------------------------------===// diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index 8ee29484ce64b..6c64d7571795a 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -1239,6 +1239,8 @@ struct ConstantLValue { : value(nullptr), hasOffsetApplied(false) {} /*implicit*/ ConstantLValue(cir::GlobalViewAttr address) : value(address), hasOffsetApplied(false) {} + /*implicit*/ ConstantLValue(cir::BlockAddrInfoAttr address) + : value(address), hasOffsetApplied(true) {} ConstantLValue() : value(nullptr), hasOffsetApplied(false) {} }; @@ -1519,8 +1521,10 @@ ConstantLValueEmitter::VisitPredefinedExpr(const PredefinedExpr *e) { ConstantLValue ConstantLValueEmitter::VisitAddrLabelExpr(const AddrLabelExpr *e) { - cgm.errorNYI(e->getSourceRange(), "ConstantLValueEmitter: addr label expr"); - return {}; + auto func = cast(emitter.cgf->curFn); + return cir::BlockAddrInfoAttr::get(cgm.getBuilder().getContext(), + func.getSymName(), + e->getLabel()->getName()); } ConstantLValue ConstantLValueEmitter::VisitCallExpr(const CallExpr *e) { diff --git a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp index 922140a93aa5a..47c94cb4ec535 100644 --- a/clang/lib/CIR/CodeGen/CIRGenStmt.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenStmt.cpp @@ -706,8 +706,13 @@ mlir::LogicalResult CIRGenFunction::emitGotoStmt(const clang::GotoStmt &s) { mlir::LogicalResult CIRGenFunction::emitIndirectGotoStmt(const IndirectGotoStmt &s) { mlir::Value val = emitScalarExpr(s.getTarget()); - assert(indirectGotoBlock && - "If you jumping to a indirect branch should be alareadye emitted"); + if (!indirectGotoBlock) { + // If the target labels were emitted as constants, we have more work to do. + // This diagnostic is here to flag the condition, but the changes may end + // up being implemented elsewhere. + cgm.errorNYI(s.getSourceRange(), "Indirect goto without a goto block"); + return mlir::failure(); + } cir::BrOp::create(builder, getLoc(s.getSourceRange()), indirectGotoBlock, val); builder.createBlock(builder.getBlock()->getParent()); @@ -745,8 +750,8 @@ mlir::LogicalResult CIRGenFunction::emitLabel(const clang::LabelDecl &d) { builder.setInsertionPointToEnd(labelBlock); auto func = cast(curFn); cgm.mapBlockAddress(cir::BlockAddrInfoAttr::get(builder.getContext(), - func.getSymNameAttr(), - label.getLabelAttr()), + func.getSymName(), + label.getLabel()), label); // FIXME: emit debug info for labels, incrementProfileCounter assert(!cir::MissingFeatures::incrementProfileCounter()); diff --git a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp index bf7efc2172fe9..b26fb9cec6d80 100644 --- a/clang/lib/CIR/Dialect/IR/CIRDialect.cpp +++ b/clang/lib/CIR/Dialect/IR/CIRDialect.cpp @@ -583,10 +583,10 @@ static LogicalResult checkConstantTypes(mlir::Operation *op, mlir::Type opType, return success(); } - if (mlir::isa(attrType)) + if (mlir::isa(attrType)) return success(); assert(isa(attrType) && "What else could we be looking at here?"); @@ -2190,8 +2190,7 @@ static ParseResult parseGlobalOpTypeAndInitialValue(OpAsmParser &parser, assert(mlir::isa(initialValueAttr) && "Non-typed attrs shouldn't appear here."); - auto typedAttr = mlir::cast(initialValueAttr); - opTy = typedAttr.getType(); + opTy = mlir::cast(initialValueAttr).getType(); } // Parse destructor, example: diff --git a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp index 0bcfe124723e6..704ebbeb1ecd9 100644 --- a/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp +++ b/clang/lib/CIR/Dialect/Transforms/CXXABILowering.cpp @@ -485,6 +485,12 @@ static mlir::TypedAttr lowerInitialValue(const LowerModule *lowerModule, return cir::GlobalViewAttr::get(convertedTy, gva.getSymbol(), gva.getIndices()); + if (auto blockAddr = + mlir::dyn_cast_if_present(initVal)) { + assert(convertedTy == ptrTy && "BlockAddrInfo type should not change"); + return blockAddr; + } + auto constPtr = mlir::cast_if_present(initVal); if (!constPtr) return {}; diff --git a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp index d590ccce1f540..e2a561cb3a003 100644 --- a/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp +++ b/clang/lib/CIR/Dialect/Transforms/GotoSolver.cpp @@ -9,6 +9,8 @@ #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/Passes.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringSet.h" #include "llvm/Support/TimeProfiler.h" #include @@ -27,7 +29,8 @@ struct GotoSolverPass : public impl::GotoSolverBase { void runOnOperation() override; }; -static void process(cir::FuncOp func) { +static void process(cir::FuncOp func, + const llvm::StringSet<> &globalBlockAddrLabel) { mlir::OpBuilder rewriter(func.getContext()); llvm::StringMap labels; llvm::SmallVector gotos; @@ -46,7 +49,11 @@ static void process(cir::FuncOp func) { for (auto &lab : labels) { StringRef labelName = lab.getKey(); Block *block = lab.getValue(); - if (!blockAddrLabel.contains(labelName)) { + // Keep labels whose address is taken either by a cir.block_address op in + // this function or by a block-address attribute used elsewhere (e.g. in a + // global initializer). + if (!blockAddrLabel.contains(labelName) && + !globalBlockAddrLabel.contains(labelName)) { // erase the LabelOp inside the block if safe if (auto lab = dyn_cast(&block->front())) { lab.erase(); @@ -65,7 +72,25 @@ static void process(cir::FuncOp func) { void GotoSolverPass::runOnOperation() { llvm::TimeTraceScope scope("Goto Solver"); - getOperation()->walk(&process); + + // Block addresses can also appear in attributes outside of any function body, + // such as global variable initializers. Collect, per target function, the + // labels referenced this way so their LabelOps are not erased below. + llvm::StringMap> globalBlockAddrLabels; + getOperation()->walk([&](mlir::Operation *op) { + for (const mlir::NamedAttribute &namedAttr : op->getAttrs()) { + namedAttr.getValue().walk([&](cir::BlockAddrInfoAttr info) { + globalBlockAddrLabels[info.getFunc().getValue()].insert( + info.getLabel()); + }); + } + }); + + static const llvm::StringSet<> emptySet; + getOperation()->walk([&](cir::FuncOp func) { + auto it = globalBlockAddrLabels.find(func.getSymName()); + process(func, it == globalBlockAddrLabels.end() ? emptySet : it->second); + }); } } // namespace diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index af729e95c7709..7cb15f8c5e8a3 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -285,8 +285,10 @@ class CIRAttrToValue { public: CIRAttrToValue(mlir::Operation *parentOp, mlir::ConversionPatternRewriter &rewriter, - const mlir::TypeConverter *converter) - : parentOp(parentOp), rewriter(rewriter), converter(converter) {} + const mlir::TypeConverter *converter, + LLVMBlockAddressInfo *blockInfoAddr = nullptr) + : parentOp(parentOp), rewriter(rewriter), converter(converter), + blockInfoAddr(blockInfoAddr) {} #define GET_CIR_ATTR_TO_VALUE_VISITOR_DECLS #include "clang/CIR/Dialect/IR/CIRLowering.inc" @@ -296,14 +298,18 @@ class CIRAttrToValue { mlir::Operation *parentOp; mlir::ConversionPatternRewriter &rewriter; const mlir::TypeConverter *converter; + // Only available when lowering global initializers that may contain block + // address attributes. Used to resolve a BlockAddrInfoAttr to its block tag. + LLVMBlockAddressInfo *blockInfoAddr; }; /// Switches on the type of attribute and calls the appropriate conversion. mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp, const mlir::Attribute attr, mlir::ConversionPatternRewriter &rewriter, - const mlir::TypeConverter *converter) { - CIRAttrToValue valueConverter(parentOp, rewriter, converter); + const mlir::TypeConverter *converter, + LLVMBlockAddressInfo *blockInfoAddr) { + CIRAttrToValue valueConverter(parentOp, rewriter, converter, blockInfoAddr); mlir::Value value = valueConverter.visit(attr); if (!value) llvm_unreachable("unhandled attribute type"); @@ -475,6 +481,29 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstPtrAttr ptrAttr) { rewriter, loc, converter->convertType(ptrAttr.getType()), ptrVal); } +/// BlockAddrInfoAttr visitor. +mlir::Value CIRAttrToValue::visitCirAttr(cir::BlockAddrInfoAttr blockAddrInfo) { + assert(blockInfoAddr && + "block address lowering requires LLVMBlockAddressInfo"); + // A block address is lowered to an llvm.blockaddress op that references a + // block tag inside the target function. The matching block tag may not have + // been emitted yet, in which case the address is recorded as unresolved and + // patched up later in resolveBlockAddressOp. + mlir::Location loc = parentOp->getLoc(); + mlir::LLVM::BlockTagOp matchLabel = + blockInfoAddr->lookupBlockTag(blockAddrInfo); + mlir::LLVM::BlockTagAttr tagAttr = + matchLabel ? matchLabel.getTag() : mlir::LLVM::BlockTagAttr{}; + auto blkAddr = mlir::LLVM::BlockAddressAttr::get( + rewriter.getContext(), blockAddrInfo.getFunc(), tagAttr); + auto blockAddressOp = mlir::LLVM::BlockAddressOp::create( + rewriter, loc, mlir::LLVM::LLVMPointerType::get(rewriter.getContext()), + blkAddr); + if (!matchLabel) + blockInfoAddr->addUnresolvedBlockAddress(blockAddressOp, blockAddrInfo); + return blockAddressOp; +} + // ConstArrayAttr visitor mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) { mlir::Type llvmTy = converter->convertType(attr.getType()); @@ -2439,17 +2468,21 @@ CIRToLLVMGlobalOpLowering::matchAndRewriteRegionInitializedGlobal( cir::GlobalOp op, mlir::Attribute init, mlir::ConversionPatternRewriter &rewriter) const { // TODO: Generalize this handling when more types are needed here. - assert((isa(init))); + assert((isa(init))); // TODO(cir): once LLVM's dialect has proper equivalent attributes this // should be updated. For now, we use a custom op to initialize globals // to the appropriate value. const mlir::Location loc = op.getLoc(); setupRegionInitializedLLVMGlobalOp(op, rewriter); - CIRAttrToValue valueConverter(op, rewriter, typeConverter); + + // Pass blockInfoAddr so that block address initializers (either as the whole + // initializer or nested inside an aggregate) can be resolved by the + // BlockAddrInfoAttr visitor. + CIRAttrToValue valueConverter(op, rewriter, typeConverter, &blockInfoAddr); mlir::Value value = valueConverter.visit(init); mlir::LLVM::ReturnOp::create(rewriter, loc, value); return mlir::success(); @@ -2555,7 +2588,8 @@ mlir::LogicalResult CIRToLLVMGlobalOpLowering::matchAndRewrite( return mlir::success(); } return matchAndRewriteRegionInitializedGlobal(op, init.value(), rewriter); - } else if (mlir::isa(init.value())) { @@ -3721,8 +3755,9 @@ void ConvertCIRToLLVMPass::runOnOperation() { /// repeated O(M) module-wide symbol scans for every call site. mlir::SymbolTableCollection symbolTables; mlir::RewritePatternSet patterns(&getContext()); - patterns.add( - converter, patterns.getContext(), dl, blockInfoAddr); + patterns.add(converter, patterns.getContext(), dl, + blockInfoAddr); patterns.add( converter, patterns.getContext(), dl, symbolTables); diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index c0abb40b7304e..059d6a9778be3 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -22,11 +22,15 @@ namespace cir { namespace direct { +struct LLVMBlockAddressInfo; + /// Convert a CIR attribute to an LLVM attribute. May use the datalayout for -/// lowering attributes to-be-stored in memory. +/// lowering attributes to-be-stored in memory. When the attribute may contain +/// block address attributes, `blockInfoAddr` is used to resolve them. mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp, mlir::Attribute attr, mlir::ConversionPatternRewriter &rewriter, - const mlir::TypeConverter *converter); + const mlir::TypeConverter *converter, + LLVMBlockAddressInfo *blockInfoAddr = nullptr); mlir::LLVM::Linkage convertLinkage(cir::GlobalLinkageKind linkage); diff --git a/clang/test/CIR/CodeGen/const-label-addr.c b/clang/test/CIR/CodeGen/const-label-addr.c new file mode 100644 index 0000000000000..d820db4221b66 --- /dev/null +++ b/clang/test/CIR/CodeGen/const-label-addr.c @@ -0,0 +1,78 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +// CIR: cir.global "private" internal dso_local @d.s = #cir.const_record<{#cir.block_addr_info<@d, "A"> : !cir.ptr, #cir.block_addr_info<@d, "B"> : !cir.ptr}> : !rec_S +// CIR: cir.global "private" internal dso_local @c.tbl = #cir.const_array<[#cir.block_addr_info<@c, "A"> : !cir.ptr, #cir.block_addr_info<@c, "A"> : !cir.ptr, #cir.block_addr_info<@c, "B"> : !cir.ptr]> : !cir.array x 3> +// CIR: cir.global "private" internal dso_local @a.a = #cir.block_addr_info<@a, "A"> : !cir.ptr + +// LLVM-DAG: @a.a = internal global ptr blockaddress(@a, %[[A_BLOCK:.*]]), align 8 +// LLVM-DAG: @c.tbl = internal global [3 x ptr] [ptr blockaddress(@c, %[[C_A:.*]]), ptr blockaddress(@c, %[[C_A]]), ptr blockaddress(@c, %[[C_B:.*]])], align 16 +// LLVM-DAG: @d.s = internal global %struct.S { ptr blockaddress(@d, %[[D_A:.*]]), ptr blockaddress(@d, %[[D_B:.*]]) }, align 8 + +void a(void) { +A:; + static void *a = &&A; +} + +// CIR: cir.func{{.*}} @a() +// CIR: cir.br ^[[A_BLOCK:bb[0-9]+]] +// CIR: ^[[A_BLOCK]]: +// CIR: cir.label "A" +// CIR: %[[STATIC_A:.*]] = cir.get_global @a.a : !cir.ptr> +// CIR: cir.return + +// LLVM: define dso_local void @a() +// LLVM: br label %[[A_BLOCK]] +// LLVM: [[A_BLOCK]]: +// LLVM: ret void + +void c(int x) { + static void *tbl[3] = {&&A, &&A, &&B}; + int idx = x > 2 ? 2 : x; +A: + void *p = tbl[idx]; +B: +} + +// CIR: cir.func{{.*}} @c +// CIR: %[[C_TBL:.*]] = cir.get_global @c.tbl +// CIR: [[LABEL_A:.*]]: +// CIR: cir.label "A" +// CIR: %[[P:.*]] = cir.get_element %[[C_TBL]][%{{.*}}] +// CIR: [[LABEL_B:.*]]: +// CIR: cir.label "B" + +// LLVM: define dso_local void @c(i32 noundef %{{.*}}) +// LLVM: br label %[[C_A]] +// LLVM: [[C_A]]: +// LLVM: %[[TARGET:.*]] = getelementptr{{.*}} [3 x ptr], ptr @c.tbl +// LLVM: %[[P:.*]] = load ptr, ptr %[[TARGET]], align 8 +// LLVM: br label %[[C_B]] +// LLVM: [[C_B]]: +// LLVM: ret void + +struct S { void *a, *b; }; +void d(void) { +A:; +B:; + static struct S s = {&&A, &&B}; +} + +// CIR: cir.func{{.*}} @d +// CIR: [[LABEL_A:.*]]: +// CIR: cir.label "A" +// CIR: [[LABEL_B:.*]]: +// CIR: cir.label "B" +// CIR: %[[S:.*]] = cir.get_global @d.s +// CIR: cir.return + +// LLVM: define dso_local void @d() +// LLVM: br label %[[D_A]] +// LLVM: [[D_A]]: +// LLVM: br label %[[D_B]] +// LLVM: [[D_B]]: +// LLVM: ret void From 8a2cd8b984bc36ce4e1dd6dd650e7e35bc42e6dd Mon Sep 17 00:00:00 2001 From: vitbur Date: Tue, 23 Jun 2026 21:15:59 +0200 Subject: [PATCH 240/511] [RISCV][XCV] Add missing IsRV32 predicate to the XCVmac block (#205095) The XCVmac instruction block was missing the `IsRV32` predicate that every other XCV block already carries. `HasVendorXCVmac` on its own does not require RV32, so `-mtriple=riscv64 -mattr=+xcvmac` could select these RV32-only vendor instructions on RV64. Add `IsRV32` to the XCVmac block to match the other XCV extensions and prevent selecting invalid instructions on RV64. Split out of #204879 at review request (one fix per PR). Part of a CORE-V (XCV) series; see RFC: https://discourse.llvm.org/t/rfc-core-v-xcv-support-for-cv32e40p-clang-builtins-xcvsimd-intrinsics-and-generic-auto-selection/91111 --- llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index ec93f60366e1e..af1919f21362b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -828,7 +828,7 @@ class PatCoreVMacGprGprUimm5 : Pat<(!cast("int_riscv_cv_mac_" # intr) GPR:$rs1, GPR:$rs2, cv_tuimm5:$imm5), (!cast("CV_" # asm) GPR:$rs1, GPR:$rs2, cv_tuimm5:$imm5)>; -let Predicates = [HasVendorXCVmac] in { +let Predicates = [HasVendorXCVmac, IsRV32] in { def : PatCoreVMacGprGprGpr<"mac", "MAC">; def : PatCoreVMacGprGprGpr<"msu", "MSU">; From 60870d97be7ff7f18a164dae39abde5f00f0fe8f Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 23 Jun 2026 12:21:18 -0700 Subject: [PATCH 241/511] Revert "[NFC][Support] Add test for inverted slash-agnostic matching" (#205397) Reverts llvm/llvm-project#203290 as part of the process of reverting https://github.com/llvm/llvm-project/pull/202854 due to downstream breakage (see discussion in https://github.com/llvm/llvm-project/pull/202854#issuecomment-4746579478) --- llvm/unittests/Support/GlobPatternTest.cpp | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index aac02bc1d7e7d..35423e37a3ae0 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -471,18 +471,4 @@ TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) { EXPECT_FALSE(Pat->match("foo/barb")); EXPECT_TRUE(Pat->match("foo/bar1")); } - -TEST_F(GlobPatternTest, SlashAgnosticInvertedSlash) { - auto Pat1 = GlobPattern::create("foo[^/]", 1024, /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat1); - EXPECT_FALSE(Pat1->match("foo/")); - EXPECT_FALSE(Pat1->match("foo\\")); - EXPECT_TRUE(Pat1->match("fooa")); - - auto Pat2 = GlobPattern::create("foo[^\\]", 1024, /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat2); - EXPECT_FALSE(Pat2->match("foo/")); - EXPECT_FALSE(Pat2->match("foo\\")); - EXPECT_TRUE(Pat2->match("fooa")); -} } From 2ae91ce5ad47275db650193cef368c85ab4d46bd Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 23 Jun 2026 12:21:25 -0700 Subject: [PATCH 242/511] Revert "Make sanitizer special case list slash-agnostic" (#205399) Reverts llvm/llvm-project#149886 as part of the process of reverting https://github.com/llvm/llvm-project/pull/202854 due to downstream breakage (see discussion in https://github.com/llvm/llvm-project/pull/202854#issuecomment-4746579478) --- clang/docs/ReleaseNotes.rst | 5 --- clang/docs/SanitizerSpecialCaseList.rst | 12 ------- clang/unittests/Basic/DiagnosticTest.cpp | 35 ------------------- llvm/lib/Support/SpecialCaseList.cpp | 31 ++++------------ .../unittests/Support/SpecialCaseListTest.cpp | 20 ----------- 5 files changed, 6 insertions(+), 97 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4aa13ce1384ac..0ff8e8f5afd3c 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1078,11 +1078,6 @@ Sanitizers warning for deprecated matches. Version 5 drops backward compatibility and requires rules to match canonicalized paths (without leading ``./``). -- Sanitizer Special Case Lists (``-fsanitize-ignorelist``) and warning - suppression mappings (``--warning-suppression-mappings``) now recognize version - 4 of the Special Case List format (indicated by ``#!special-case-list-v4``). - On Windows hosts, path matching is slash-agnostic (both forward slashes (``/``) - and backslashes (``\``) match either path separator in both patterns and paths). Python Binding Changes ---------------------- diff --git a/clang/docs/SanitizerSpecialCaseList.rst b/clang/docs/SanitizerSpecialCaseList.rst index 918abf19f8f10..1de3555c5a8ce 100644 --- a/clang/docs/SanitizerSpecialCaseList.rst +++ b/clang/docs/SanitizerSpecialCaseList.rst @@ -230,18 +230,6 @@ tool-specific docs. [{cfi-vcall,cfi-icall}] fun:*BadCfiCall - -.. note:: - - By default, ``src`` and ``mainfile`` are matched against the filename as seen - by LLVM. On Windows, this might involve a mix of forward and backslashes as - file separators, and writing patterns to match both variants can be - inconvenient. - - Starting with version 4 (indicated by ``#!special-case-list-v4``), path matching - on Windows hosts is slash-agnostic: both forward slashes (``/``) and backslashes - (``\``) match either path separator in both patterns and paths. - .. note:: By default, path matching (for ``src`` and ``mainfile``) matches the query diff --git a/clang/unittests/Basic/DiagnosticTest.cpp b/clang/unittests/Basic/DiagnosticTest.cpp index 4ced52c8f715f..4d310d3ece23f 100644 --- a/clang/unittests/Basic/DiagnosticTest.cpp +++ b/clang/unittests/Basic/DiagnosticTest.cpp @@ -414,39 +414,4 @@ TEST_F(SuppressionMappingTest, ParsingRespectsOtherWarningOpts) { clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); EXPECT_THAT(diags(), IsEmpty()); } - -#ifdef _WIN32 -TEST_F(SuppressionMappingTest, CanonicalizesSlashesOnWindows) { - llvm::StringLiteral SuppressionMappingFile = R"(#!special-case-list-v4 - [unused] - src:*clang/* - src:*clang/lib/Sema/*=emit - src:*clang/lib\\Sema/foo* - fun:suppress/me)"; - Diags.getDiagnosticOptions().DiagnosticSuppressionMappingsFile = "foo.txt"; - FS->addFile("foo.txt", /*ModificationTime=*/{}, - llvm::MemoryBuffer::getMemBuffer(SuppressionMappingFile)); - clang::ProcessWarningOptions(Diags, Diags.getDiagnosticOptions(), *FS); - EXPECT_THAT(diags(), IsEmpty()); - - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Basic/bar.h)"))); - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Basic\bar.h)"))); - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang\lib/Basic/bar.h)"))); - EXPECT_FALSE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Sema/baz.h)"))); - EXPECT_FALSE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Sema\baz.h)"))); - - // Under slash-agnostic matching, backslashes and forward slashes match each - // other, so we match the third pattern. - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang\lib\Sema/foo.h)"))); - EXPECT_TRUE(Diags.isSuppressedViaMapping( - diag::warn_unused_function, locForFile(R"(clang/lib/Sema/foo.h)"))); -} -#endif - } // namespace diff --git a/llvm/lib/Support/SpecialCaseList.cpp b/llvm/lib/Support/SpecialCaseList.cpp index fe12039719059..d72f7e7fd1d81 100644 --- a/llvm/lib/Support/SpecialCaseList.cpp +++ b/llvm/lib/Support/SpecialCaseList.cpp @@ -24,7 +24,6 @@ #include "llvm/Support/GlobPattern.h" #include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/Regex.h" #include "llvm/Support/VirtualFileSystem.h" #include "llvm/Support/WithColor.h" @@ -45,7 +44,7 @@ namespace { // Lagacy v1 matcher. class RegexMatcher { public: - Error insert(StringRef Pattern, unsigned LineNumber, bool SlashAgnostic); + Error insert(StringRef Pattern, unsigned LineNumber); unsigned match(StringRef Query) const; StringRef findRule(unsigned LineNo) const; @@ -63,7 +62,7 @@ class RegexMatcher { class GlobMatcher { public: - Error insert(StringRef Pattern, unsigned LineNumber, bool SlashAgnostic); + Error insert(StringRef Pattern, unsigned LineNumber); unsigned match(StringRef Query) const; StringRef findRule(unsigned LineNo) const; @@ -96,7 +95,6 @@ struct QueryOptions { bool UseGlobs = true; bool RemoveDotSlash = false; bool WarnDotSlashMatch = false; - bool SlashAgnostic = false; }; /// Represents a set of patterns and their line numbers @@ -118,8 +116,7 @@ class Matcher { mutable std::once_flag Warned; }; -Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber, - bool SlashAgnostic) { +Error RegexMatcher::insert(StringRef Pattern, unsigned LineNumber) { if (Pattern.empty()) return createStringError(errc::invalid_argument, "Supplied regex was blank"); @@ -158,13 +155,11 @@ StringRef RegexMatcher::findRule(unsigned LineNo) const { return {}; } -Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber, - bool SlashAgnostic) { +Error GlobMatcher::insert(StringRef Pattern, unsigned LineNumber) { if (Pattern.empty()) return createStringError(errc::invalid_argument, "Supplied glob was blank"); - auto Res = - GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024, SlashAgnostic); + auto Res = GlobPattern::create(Pattern, /*MaxSubPatterns=*/1024); if (auto Err = Res.takeError()) return Err; Globs.emplace_back(Pattern, LineNumber, std::move(Res.get())); @@ -262,11 +257,7 @@ Matcher::Matcher(QueryOptions QOpts) : Options(QOpts) { } Error Matcher::insert(StringRef Pattern, unsigned LineNumber) { - return std::visit( - [&](auto &V) { - return V.insert(Pattern, LineNumber, Options.SlashAgnostic); - }, - M); + return std::visit([&](auto &V) { return V.insert(Pattern, LineNumber); }, M); } /// Matches Query against the patterns. The behavior is controlled by @@ -415,15 +406,6 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, bool UseGlobs = MinVersion(2); bool RemoveDotSlash = MinVersion(3); bool WarnDotSlash = MinVersion(4) && !MinVersion(5); - // TODO: Improve efficiency on Windows. - // `SlashAgnostic` makes `GlobMatcher` lookup inefficient by reducing the part - // of the pattern handled by the RadixTree. This was already the case even - // before `SlashAgnostic` because `GlobMatcher` pessimizes on escape sequences - // needed to represent Windows backslashes. A possible, but not unique, - // solution is to assume (or convert Windows query) backslashes, and - // preprocess the Glob pattern to use different escape sequences. - bool SlashAgnostic = MinVersion(4) && llvm::sys::path::is_style_windows( - llvm::sys::path::Style::native); auto ErrOrSection = addSection("*", FileIdx, 1, true); if (auto Err = ErrOrSection.takeError()) { @@ -475,7 +457,6 @@ bool SpecialCaseList::parse(unsigned FileIdx, const MemoryBuffer *MB, if (llvm::is_contained(PathPrefixes, Prefix)) { QOpts.RemoveDotSlash = RemoveDotSlash; QOpts.WarnDotSlashMatch = WarnDotSlash; - QOpts.SlashAgnostic = SlashAgnostic; } auto [Pattern, Category] = Postfix.split("="); diff --git a/llvm/unittests/Support/SpecialCaseListTest.cpp b/llvm/unittests/Support/SpecialCaseListTest.cpp index d4d47c37993f2..5bcd111f53059 100644 --- a/llvm/unittests/Support/SpecialCaseListTest.cpp +++ b/llvm/unittests/Support/SpecialCaseListTest.cpp @@ -464,24 +464,4 @@ TEST_F(SpecialCaseListTest, FileIdx) { sys::fs::remove(Path); } -#ifdef _WIN32 -TEST_F(SpecialCaseListTest, SlashAgnosticPathsOnWindows) { - std::unique_ptr SCL = - makeSpecialCaseList("#!special-case-list-v4\n" - "\n" - "src:*foo/bar*\n" - "src:*foo\\\\baz\n" - "fun:hi\\\\bye=category\n"); - EXPECT_TRUE(SCL->inSection("", "src", "foo/bar")); - EXPECT_TRUE(SCL->inSection("", "src", "foo\\bar")); - // The baz pattern matches because paths are matched slash-agnostically - EXPECT_TRUE(SCL->inSection("", "src", "foo/baz")); - EXPECT_TRUE(SCL->inSection("", "src", "foo\\baz")); - // Slash-agnostic matching only applies to files - EXPECT_TRUE(SCL->inSection("", "fun", "hi\\bye", "category")); - EXPECT_FALSE(SCL->inSection("", "fun", "hi/bye", "category")); -} - -#endif - } // namespace From 7fcb67da050d2ac7c6356ba783652d950c8e9063 Mon Sep 17 00:00:00 2001 From: Nerixyz Date: Tue, 23 Jun 2026 21:23:53 +0200 Subject: [PATCH 243/511] [DebugInfo][CodeView] Resolve forward references to types without unique name (#203781) In the following code: ```cpp // header.h typedef struct lua_State lua_State; lua_State *getState(); // source.c #include "header.h" struct lua_State { int field; }; lua_State *getState() { static lua_State state = {.field=42}; // make sure the type is emitted return &state; } // main.cpp extern "C" { #include "header.h" } int main() { return getState() != 0; } ``` We'll get two forward references for `lua_State` when compiling with clang-cl. One with a unique name (from C++) and one without (from C). There's one complete definition for the type (from `source.c`). Since this is from C, it doesn't have a unique name. Before, we could only resolve the forward reference from C, not from C++. With this PR, we can resolve both references. I also tested that the VS debugger resolves the reference correctly. I'm not sure if this is a clang bug, because when compiling with MSVC, the C type also has a unique name. Clang omits the unique name, because C doesn't have ODR like C++. --- llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp | 23 ++-- .../llvm-pdbutil/forward-cxx-to-c-odr.test | 116 ++++++++++++++++++ .../tools/llvm-pdbutil/forward-cxx-to-c.test | 89 ++++++++++++++ 3 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/llvm-pdbutil/forward-cxx-to-c-odr.test create mode 100644 llvm/test/tools/llvm-pdbutil/forward-cxx-to-c.test diff --git a/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp b/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp index 24d2f9e3360ea..83795e37f930f 100644 --- a/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/TpiStream.cpp @@ -187,9 +187,11 @@ TpiStream::findFullDeclForForwardRef(TypeIndex ForwardRefTI) const { Expected ForwardTRH = hashTagRecord(F); if (!ForwardTRH) return ForwardTRH.takeError(); + TagRecord &ForwardTR = ForwardTRH->getRecord(); uint32_t BucketIdx = ForwardTRH->FullRecordHash % Header->NumHashBuckets; + TypeIndex BestMatch = ForwardRefTI; for (TypeIndex TI : HashMap[BucketIdx]) { CVType CVT = Types->getType(TI); if (CVT.kind() != F.kind()) @@ -200,21 +202,26 @@ TpiStream::findFullDeclForForwardRef(TypeIndex ForwardRefTI) const { return FullTRH.takeError(); if (ForwardTRH->FullRecordHash != FullTRH->FullRecordHash) continue; - TagRecord &ForwardTR = ForwardTRH->getRecord(); TagRecord &FullTR = FullTRH->getRecord(); - if (!ForwardTR.hasUniqueName()) { - if (ForwardTR.getName() == FullTR.getName()) + if (ForwardTR.hasUniqueName() && FullTR.hasUniqueName()) { + if (ForwardTR.getUniqueName() == FullTR.getUniqueName()) return TI; continue; } - if (!FullTR.hasUniqueName()) - continue; - if (ForwardTR.getUniqueName() == FullTR.getUniqueName()) - return TI; + if (ForwardTR.getName() == FullTR.getName()) { + // If the type we search for doesn't have a unique name, we found the + // target. + if (!ForwardTR.hasUniqueName()) + return TI; + + // `ForwardTR` does have a unique name, but the type we found doesn't. + // Remember this type, but look for better candidates in the bucket first. + BestMatch = TI; + } } - return ForwardRefTI; + return BestMatch; } codeview::CVType TpiStream::getType(codeview::TypeIndex Index) { diff --git a/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c-odr.test b/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c-odr.test new file mode 100644 index 0000000000000..9a13c80cca6bc --- /dev/null +++ b/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c-odr.test @@ -0,0 +1,116 @@ +# Test that we prefer types with unique names when resolving forward references +# from records with unique names. + +# RUN: llvm-pdbutil yaml2pdb %s --pdb=%t.pdb +# RUN: llvm-pdbutil dump --types %t.pdb | FileCheck %s + +# The forward reference from 0x1002 -> 0x100A is important here. +# In contrast to `forward-cxx-to-c.test`, we should resolve the reference to +# 0x100A here, because both have a unique name (even if 0x1008 is lower). +# For 0x1004, we resolve 0x1008, because of the missing unique name. + +# CHECK: 0x1002 | LF_STRUCTURE [size = 48] `lua_State` +# CHECK-NEXT: unique name: `.?AUlua_State@@` +# CHECK-NEXT: vtable: , base list: , field list: +# CHECK-NEXT: options: forward ref (-> 0x100A) | has unique name, sizeof 0 +# CHECK: 0x1004 | LF_STRUCTURE [size = 32] `lua_State` +# CHECK-NEXT: vtable: , base list: , field list: +# CHECK-NEXT: options: forward ref (-> 0x1008), sizeof 0 +# CHECK: 0x1007 | LF_FIELDLIST [size = 20] +# CHECK-NEXT: - LF_MEMBER [name = `field`, Type = 0x0074 (int), offset = 0, attrs = public] +# CHECK-NEXT: 0x1008 | LF_STRUCTURE [size = 32] `lua_State` +# CHECK-NEXT: vtable: , base list: , field list: 0x1007 +# CHECK-NEXT: options: , sizeof 4 +# CHECK-NEXT: 0x1009 | LF_FIELDLIST [size = 32] +# CHECK-NEXT: - LF_MEMBER [name = `differentField`, Type = 0x0074 (int), offset = 0, attrs = public] +# CHECK-NEXT: 0x100A | LF_STRUCTURE [size = 48] `lua_State` +# CHECK-NEXT: unique name: `.?AUlua_State@@` +# CHECK-NEXT: vtable: , base list: , field list: 0x1007 +# CHECK-NEXT: options: has unique name, sizeof 4 + +--- +TpiStream: + Version: VC80 + Records: + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 116 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4096 + - Kind: LF_STRUCTURE + Class: + MemberCount: 0 + Options: [ None, ForwardReference, HasUniqueName ] + FieldList: 0 + Name: lua_State + UniqueName: '.?AUlua_State@@' + DerivationList: 0 + VTableShape: 0 + Size: 0 + - Kind: LF_POINTER + Pointer: + ReferentType: 4098 + Attrs: 65548 + - Kind: LF_STRUCTURE + Class: + MemberCount: 0 + Options: [ None, ForwardReference ] + FieldList: 0 + Name: lua_State + UniqueName: '' + DerivationList: 0 + VTableShape: 0 + Size: 0 + - Kind: LF_POINTER + Pointer: + ReferentType: 4100 + Attrs: 65548 + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 4101 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4096 + - Kind: LF_FIELDLIST + FieldList: + - Kind: LF_MEMBER + DataMember: + Attrs: 3 + Type: 116 + FieldOffset: 0 + Name: field + - Kind: LF_STRUCTURE + Class: + MemberCount: 1 + Options: [ None ] + FieldList: 4103 + Name: lua_State + UniqueName: '' + DerivationList: 0 + VTableShape: 0 + Size: 4 + - Kind: LF_FIELDLIST + FieldList: + - Kind: LF_MEMBER + DataMember: + Attrs: 3 + Type: 116 + FieldOffset: 0 + Name: differentField + - Kind: LF_STRUCTURE + Class: + MemberCount: 1 + Options: [ None, HasUniqueName ] + FieldList: 4103 + Name: lua_State + UniqueName: '.?AUlua_State@@' + DerivationList: 0 + VTableShape: 0 + Size: 4 +... diff --git a/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c.test b/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c.test new file mode 100644 index 0000000000000..7a2810cc79ed4 --- /dev/null +++ b/llvm/test/tools/llvm-pdbutil/forward-cxx-to-c.test @@ -0,0 +1,89 @@ +# Test that we can resolve forward references from types with a unique name (from C++) +# to types without a unique name (from C). + +# RUN: llvm-pdbutil yaml2pdb %s --pdb=%t.pdb +# RUN: llvm-pdbutil dump --types %t.pdb | FileCheck %s + +# The forward reference from 0x1002 -> 0x1008 is important here. + +# CHECK: 0x1002 | LF_STRUCTURE [size = 48] `lua_State` +# CHECK-NEXT: unique name: `.?AUlua_State@@` +# CHECK-NEXT: vtable: , base list: , field list: +# CHECK-NEXT: options: forward ref (-> 0x1008) | has unique name, sizeof 0 +# CHECK: 0x1004 | LF_STRUCTURE [size = 32] `lua_State` +# CHECK-NEXT: vtable: , base list: , field list: +# CHECK-NEXT: options: forward ref (-> 0x1008), sizeof 0 +# CHECK: 0x1007 | LF_FIELDLIST [size = 20] +# CHECK-NEXT: - LF_MEMBER [name = `field`, Type = 0x0074 (int), offset = 0, attrs = public] +# CHECK-NEXT: 0x1008 | LF_STRUCTURE [size = 32] `lua_State` +# CHECK-NEXT: vtable: , base list: , field list: 0x1007 +# CHECK-NEXT: options: , sizeof 4 + +--- +TpiStream: + Version: VC80 + Records: + - Kind: LF_ARGLIST + ArgList: + ArgIndices: [ ] + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 116 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4096 + - Kind: LF_STRUCTURE + Class: + MemberCount: 0 + Options: [ None, ForwardReference, HasUniqueName ] + FieldList: 0 + Name: lua_State + UniqueName: '.?AUlua_State@@' + DerivationList: 0 + VTableShape: 0 + Size: 0 + - Kind: LF_POINTER + Pointer: + ReferentType: 4098 + Attrs: 65548 + - Kind: LF_STRUCTURE + Class: + MemberCount: 0 + Options: [ None, ForwardReference ] + FieldList: 0 + Name: lua_State + UniqueName: '' + DerivationList: 0 + VTableShape: 0 + Size: 0 + - Kind: LF_POINTER + Pointer: + ReferentType: 4100 + Attrs: 65548 + - Kind: LF_PROCEDURE + Procedure: + ReturnType: 4101 + CallConv: NearC + Options: [ None ] + ParameterCount: 0 + ArgumentList: 4096 + - Kind: LF_FIELDLIST + FieldList: + - Kind: LF_MEMBER + DataMember: + Attrs: 3 + Type: 116 + FieldOffset: 0 + Name: field + - Kind: LF_STRUCTURE + Class: + MemberCount: 1 + Options: [ None ] + FieldList: 4103 + Name: lua_State + UniqueName: '' + DerivationList: 0 + VTableShape: 0 + Size: 4 +... From 45bd11205f77defd39437a060527b668085375c3 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 23 Jun 2026 12:24:57 -0700 Subject: [PATCH 244/511] [X86] Hoist getMOVriOpcode to X86InstrInfo.h and share it, NFC (#205187) The x86 backend often needs to materialize potentially 64-bit immediates into registers, and the logic to pick between the available opcodes exists in 3 places at least. Move this to X86InstrInfo.h so we can share it over the x86 backend without copying it. An LLM did the refactoring. --- .../X86/GISel/X86InstructionSelector.cpp | 7 +---- llvm/lib/Target/X86/X86FastISel.cpp | 10 ++----- llvm/lib/Target/X86/X86FrameLowering.cpp | 27 +++++++------------ llvm/lib/Target/X86/X86InstrInfo.cpp | 12 +++++++++ llvm/lib/Target/X86/X86InstrInfo.h | 3 +++ 5 files changed, 27 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp index 3c94c33cdaaa6..113fd8867f2cd 100644 --- a/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp +++ b/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp @@ -821,12 +821,7 @@ bool X86InstructionSelector::selectConstant(MachineInstr &I, NewOpc = X86::MOV32ri; break; case 64: - if (isUInt<32>(Val)) - NewOpc = X86::MOV32ri64; - else if (isInt<32>(Val)) - NewOpc = X86::MOV64ri32; - else - NewOpc = X86::MOV64ri; + NewOpc = X86::getMOVriOpcode(/*Use64BitReg=*/true, Val); break; default: llvm_unreachable("Can't select G_CONSTANT, unsupported type."); diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp index e8e6005172e8f..95d5cbe06da63 100644 --- a/llvm/lib/Target/X86/X86FastISel.cpp +++ b/llvm/lib/Target/X86/X86FastISel.cpp @@ -3750,16 +3750,10 @@ Register X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) { case MVT::i8: Opc = X86::MOV8ri; break; case MVT::i16: Opc = X86::MOV16ri; break; case MVT::i32: Opc = X86::MOV32ri; break; - case MVT::i64: { - if (isUInt<32>(Imm)) - Opc = X86::MOV32ri64; - else if (isInt<32>(Imm)) - Opc = X86::MOV64ri32; - else - Opc = X86::MOV64ri; + case MVT::i64: + Opc = X86::getMOVriOpcode(/*Use64BitReg=*/true, Imm); break; } - } return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index 8fe09bea456c8..a3f50cde617ac 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -139,17 +139,6 @@ static unsigned getLEArOpcode(bool IsLP64) { return IsLP64 ? X86::LEA64r : X86::LEA32r; } -static unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm) { - if (Use64BitReg) { - if (isUInt<32>(Imm)) - return X86::MOV32ri64; - if (isInt<32>(Imm)) - return X86::MOV64ri32; - return X86::MOV64ri; - } - return X86::MOV32ri; -} - // Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the // value written by the PUSH from the stack. The processor tracks these marked // instructions internally and fast-forwards register data between matching PUSH @@ -282,8 +271,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr) : getADDrrOpcode(Uses64BitFramePtr); if (Reg) { - BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Uses64BitFramePtr, Offset)), - Reg) + BuildMI(MBB, MBBI, DL, + TII.get(X86::getMOVriOpcode(Uses64BitFramePtr, Offset)), Reg) .addImm(Offset) .setMIFlag(Flag); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) @@ -309,8 +298,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, Offset = -(Offset - SlotSize); else Offset = Offset + SlotSize; - BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Uses64BitFramePtr, Offset)), - Rax) + BuildMI(MBB, MBBI, DL, + TII.get(X86::getMOVriOpcode(Uses64BitFramePtr, Offset)), Rax) .addImm(Offset) .setMIFlag(Flag); MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) @@ -2101,7 +2090,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // Handle the 64-bit Windows ABI case where we need to call __chkstk. // Function prologue is responsible for adjusting the stack pointer. int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; - BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Alloc)), X86::RAX) + BuildMI(MBB, MBBI, DL, TII.get(X86::getMOVriOpcode(Is64Bit, Alloc)), + X86::RAX) .addImm(Alloc) .setMIFlag(MachineInstr::FrameSetup); } else { @@ -3623,10 +3613,11 @@ void X86FrameLowering::adjustForSegmentedStacks( if (IsNested) BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); - BuildMI(allocMBB, DL, TII.get(getMOVriOpcode(IsLP64, StackSize)), Reg10) + BuildMI(allocMBB, DL, TII.get(X86::getMOVriOpcode(IsLP64, StackSize)), + Reg10) .addImm(StackSize); BuildMI(allocMBB, DL, - TII.get(getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())), + TII.get(X86::getMOVriOpcode(IsLP64, X86FI->getArgumentStackSize())), Reg11) .addImm(X86FI->getArgumentStackSize()); } else { diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d4162414b3b40..86a5a631ce737 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include @@ -3481,6 +3482,17 @@ unsigned X86::getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand, } } +unsigned X86::getMOVriOpcode(bool Use64BitReg, int64_t Imm) { + if (!Use64BitReg) + return X86::MOV32ri; + + if (isUInt<32>(Imm)) + return X86::MOV32ri64; + if (isInt<32>(Imm)) + return X86::MOV64ri32; + return X86::MOV64ri; +} + /// Get the VPCMP immediate for the given condition. unsigned X86::getVPCMPImmForCond(ISD::CondCode CC) { switch (CC) { diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h index cab63623613a0..2db0731da3c56 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.h +++ b/llvm/lib/Target/X86/X86InstrInfo.h @@ -51,6 +51,9 @@ std::pair getX86ConditionCode(CmpInst::Predicate Predicate); unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand = false, bool HasNDD = false); +/// Return a MOVri opcode for materializing \p Imm into a 32- or 64-bit GPR. +unsigned getMOVriOpcode(bool Use64BitReg, int64_t Imm); + /// Return the source operand # for condition code by \p MCID. If the /// instruction doesn't have a condition code, return -1. int getCondSrcNoFromDesc(const MCInstrDesc &MCID); From b4f549d881668cc8e84f5bf993350692b8b57f58 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Tue, 23 Jun 2026 22:26:45 +0300 Subject: [PATCH 245/511] [lldb] Add missing dependency on lldbPluginObjectFilePlaceholder (#204831) Fixes shared library build after 882d0251. --- lldb/source/Plugins/Process/elf-core/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/lldb/source/Plugins/Process/elf-core/CMakeLists.txt b/lldb/source/Plugins/Process/elf-core/CMakeLists.txt index 6bf295f0ad2e2..d1addf2f36be3 100644 --- a/lldb/source/Plugins/Process/elf-core/CMakeLists.txt +++ b/lldb/source/Plugins/Process/elf-core/CMakeLists.txt @@ -20,6 +20,7 @@ add_lldb_library(lldbPluginProcessElfCore PLUGIN lldbCore lldbTarget lldbPluginDynamicLoaderPosixDYLD + lldbPluginObjectFilePlaceholder lldbPluginObjectFileELF lldbPluginProcessUtility ) From 2418762f46f740b3380175fe3d2a8bb20f0311ce Mon Sep 17 00:00:00 2001 From: adams381 Date: Tue, 23 Jun 2026 14:28:40 -0500 Subject: [PATCH 246/511] [CIR] Lower byval/byref args in CallConvLowering (#201717) [CIR] Lower byval/byref args in CallConvLowering ArgKind::Indirect arguments were hitting an errorNYI in CIRABIRewriteContext. Add the lowering: in the callee the block argument type changes to !cir.ptr, a load is inserted at entry so the body sees the original value type, and llvm.byval or llvm.byref is attached based on ownership. At call sites, both byval and byref are lowered by allocating a stack slot, copying the value in, and passing the pointer. For byval, llvm.noalias and llvm.noundef are also added -- llvm.noalias because the call-site rewrite always produces a fresh alloca+store (equivalent to -fpass-by-value-is-noalias), and llvm.noundef because the copy is always fully defined. byref carries only llvm.byref and llvm.align since it does not assert exclusive ownership. --- .../TargetLowering/CIRABIRewriteContext.cpp | 215 +++++++++++----- .../abi-lowering/indirect-byval.cir | 242 ++++++++++++++++++ 2 files changed, 390 insertions(+), 67 deletions(-) create mode 100644 clang/test/CIR/Transforms/abi-lowering/indirect-byval.cir diff --git a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp index a0d3b5d39df89..82b33ac38de30 100644 --- a/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp +++ b/clang/lib/CIR/Dialect/Transforms/TargetLowering/CIRABIRewriteContext.cpp @@ -17,14 +17,13 @@ using namespace mlir; using namespace mlir::abi; // This rewrite context supports the Direct (with or without coercion), -// Extend, Ignore, and Indirect-return (sret) classifications. Indirect -// arguments (byval) and Expand still emit an errorNYI here rather than -// silently passing through, because the IR they would produce is wrong -// (e.g. Expand should flatten an aggregate into multiple primitives, not -// pass it through as a single value). byval and struct coercion are not -// yet handled here; they need the signature-shaping that goes with them -// (byval inserts an extra pointer argument, struct coercion replaces one -// argument with several). +// Extend, Ignore, Indirect-return (sret), and Indirect-argument (byval and +// byref) classifications. For byval (ArgClassification::byVal == true) the +// callee gets llvm.byval + llvm.noalias + llvm.noundef; for byref (byVal == +// false) the callee gets llvm.byref without the ownership attrs. Both pass +// through an alloca+store at the call site; the attribute distinction +// communicates ownership semantics to the optimizer. Expand still emits an +// errorNYI rather than silently passing through. namespace { @@ -42,10 +41,10 @@ bool needsRewrite(const FunctionClassification &fc) { } /// Build the new argument-type list for a function whose ABI classification -/// is \p fc. Handles Direct (with or without coercion), Extend, and Ignore. -/// Indirect (byval) arguments and Expand emit an error. The sret return -/// pointer, when present, is prepended by rewriteFunctionDefinition rather -/// than here. +/// is \p fc. Handles Direct (with or without coercion), Extend, Ignore, and +/// Indirect (byval and byref) arguments. Expand emits an error. The sret +/// return pointer, when present, is prepended by rewriteFunctionDefinition +/// rather than here. mlir::LogicalResult buildNewArgTypes(ArrayRef oldArgTypes, const FunctionClassification &fc, @@ -79,9 +78,13 @@ buildNewArgTypes(ArrayRef oldArgTypes, newArgTypes.push_back(origTy); break; case ArgKind::Indirect: - emitError() << "Indirect at arg " << idx - << " not yet implemented in CallConvLowering"; - return mlir::failure(); + // byval and byref both use a pointer wire type. The attribute + // distinction (llvm.byval vs llvm.byref) is applied in updateArgAttrs; + // the call-site rewrite guards against byref separately because passing + // a byref pointer from a CIR value requires the original alloca address, + // which the rewriter does not yet track. + newArgTypes.push_back(cir::PointerType::get(origTy)); + break; } } return mlir::success(); @@ -131,37 +134,65 @@ mlir::Value createIgnoredValue(mlir::OpBuilder &builder, mlir::Location loc, return cir::ConstantOp::create(builder, loc, ty, cir::PoisonAttr::get(ty)); } -/// Build an updated arg_attrs ArrayAttr that drops Ignore'd args and adds -/// llvm.signext / llvm.zeroext on Extend args. Preserves any existing arg -/// attributes on retained arg slots. +/// Build an updated arg_attrs ArrayAttr that drops Ignore'd args, adds +/// llvm.signext / llvm.zeroext on Extend args, and adds llvm.byval / +/// llvm.align on Indirect args. Preserves any existing arg attributes on +/// retained arg slots. \p origArgTypes provides the pre-rewrite type for +/// each arg slot (needed to compute the llvm.byval pointee type). mlir::ArrayAttr updateArgAttrs(mlir::MLIRContext *ctx, + ArrayRef origArgTypes, mlir::ArrayAttr existingArgAttrs, const FunctionClassification &fc) { + mlir::Builder builder(ctx); SmallVector newArgAttrs; newArgAttrs.reserve(fc.argInfos.size()); for (auto [oldIdx, ac] : llvm::enumerate(fc.argInfos)) { if (ac.kind == ArgKind::Ignore) continue; - mlir::DictionaryAttr existing = mlir::DictionaryAttr::get(ctx); + mlir::DictionaryAttr existing = builder.getDictionaryAttr({}); if (existingArgAttrs && oldIdx < existingArgAttrs.size()) existing = mlir::cast(existingArgAttrs[oldIdx]); if (ac.kind == ArgKind::Extend) { StringRef attrName = ac.signExtend ? "llvm.signext" : "llvm.zeroext"; - mlir::NamedAttribute extAttr(mlir::StringAttr::get(ctx, attrName), - mlir::UnitAttr::get(ctx)); - if (existing.empty()) { - newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, {extAttr})); - } else { - SmallVector attrs(existing.begin(), - existing.end()); - attrs.push_back(extAttr); - newArgAttrs.push_back(mlir::DictionaryAttr::get(ctx, attrs)); + SmallVector attrs(existing.begin(), existing.end()); + attrs.push_back(builder.getNamedAttr(attrName, builder.getUnitAttr())); + newArgAttrs.push_back(builder.getDictionaryAttr(attrs)); + } else if (ac.kind == ArgKind::Indirect) { + // byval: caller-allocated copy; callee receives pointer to copy. + // byref: callee receives pointer to the caller's original storage. + // Both use llvm.align(A). The ownership flag differs: llvm.byval(T) + // vs llvm.byref(T). Both are typed attributes carrying the pointee + // type T (the pre-rewrite arg type); T is recorded explicitly because + // it cannot be recovered from the opaque LLVM pointer after lowering. + // + // For byval, two additional attributes match classic CodeGen: + // llvm.noundef -- the copy is always fully defined (the caller's + // original must be defined or UB has already occurred, and the + // copy inherits that property). + // llvm.noalias -- the copy is a fresh caller-allocated alloca that + // no other pointer in the function can alias. Classic CodeGen + // emits this when -fpass-by-value-is-noalias is set; here we + // emit it unconditionally because our call-site rewrite always + // produces a fresh alloca+store. + mlir::Type pointeeTy = origArgTypes[oldIdx]; + StringRef ownershipAttr = ac.byVal ? "llvm.byval" : "llvm.byref"; + SmallVector attrs(existing.begin(), existing.end()); + attrs.push_back(builder.getNamedAttr( + "llvm.align", builder.getI64IntegerAttr(ac.indirectAlign.value()))); + attrs.push_back( + builder.getNamedAttr(ownershipAttr, mlir::TypeAttr::get(pointeeTy))); + if (ac.byVal) { + attrs.push_back( + builder.getNamedAttr("llvm.noalias", builder.getUnitAttr())); + attrs.push_back( + builder.getNamedAttr("llvm.noundef", builder.getUnitAttr())); } + newArgAttrs.push_back(builder.getDictionaryAttr(attrs)); } else { newArgAttrs.push_back(existing); } } - return mlir::ArrayAttr::get(ctx, newArgAttrs); + return builder.getArrayAttr(newArgAttrs); } /// Build an updated res_attrs ArrayAttr (single entry, since CIR funcs have @@ -308,30 +339,53 @@ void insertArgCoercion(mlir::FunctionOpInterface funcOp, mlir::Block &entry = body.front(); for (auto [idx, ac] : llvm::enumerate(fc.argInfos)) { - if (ac.kind != ArgKind::Direct || !ac.coercedType) + // Only two classifications need an entry-block fixup: a Direct arg with a + // coerced wire type, and an Indirect (byval/byref) arg. Extend, Ignore, + // and Direct-without-coercion keep the original block-argument type, so + // the body already sees the right value and there is nothing to do. + bool needsCoercion = ac.kind == ArgKind::Direct && ac.coercedType; + if (!needsCoercion && ac.kind != ArgKind::Indirect) continue; + unsigned blockIdx = idx + hasSRetArg; if (blockIdx >= entry.getNumArguments()) continue; mlir::BlockArgument blockArg = entry.getArgument(blockIdx); - mlir::Type oldArgTy = blockArg.getType(); - mlir::Type newArgTy = ac.coercedType; - if (oldArgTy == newArgTy) - continue; - blockArg.setType(newArgTy); + if (needsCoercion) { + mlir::Type oldArgTy = blockArg.getType(); + mlir::Type newArgTy = ac.coercedType; + if (oldArgTy == newArgTy) + continue; - builder.setInsertionPointToStart(&entry); - SmallPtrSet coercionOps; - mlir::Value adapted = emitCoercion(builder, funcOp.getLoc(), oldArgTy, - blockArg, funcOp, dl, coercionOps); - - // Replace blockArg uses with the adapted value, except inside the helper - // ops we just created. This is critical: the StoreOp's value operand is - // blockArg, and if we naively replaceAllUses it gets swapped to adapted - // (now of the original type != the alloca's pointee type). - blockArg.replaceAllUsesExcept(adapted, coercionOps); + blockArg.setType(newArgTy); + + builder.setInsertionPointToStart(&entry); + SmallPtrSet coercionOps; + mlir::Value adapted = emitCoercion(builder, funcOp.getLoc(), oldArgTy, + blockArg, funcOp, dl, coercionOps); + + // Replace blockArg uses with the adapted value, except inside the + // helper ops we just created. This is critical: the StoreOp's value + // operand is blockArg, and if we naively replaceAllUses it gets swapped + // to adapted (now of the original type != the alloca's pointee type). + blockArg.replaceAllUsesExcept(adapted, coercionOps); + } else { + // ArgKind::Indirect. byval and byref: the wire type is !cir.ptr. + // Change the block arg to the pointer type and insert a load so the + // body sees the original T. The body transformation is the same for + // both; the distinction between byval (llvm.byval) and byref + // (llvm.byref) is in the arg attributes applied by updateArgAttrs. + mlir::Type origTy = blockArg.getType(); + auto ptrTy = cir::PointerType::get(origTy); + blockArg.setType(ptrTy); + + builder.setInsertionPointToStart(&entry); + auto loadOp = cir::LoadOp::create(builder, funcOp.getLoc(), blockArg); + SmallPtrSet loadOps = {loadOp}; + blockArg.replaceAllUsesExcept(loadOp.getResult(), loadOps); + } } } @@ -462,7 +516,9 @@ void applySretSlotAttrs(cir::CallOp newCall, mlir::ArrayAttr argAttrs, void rewriteIndirectReturnCall(cir::CallOp call, const FunctionClassification &fc, ArrayRef newArgs, - mlir::Type origRetTy, mlir::OpBuilder &builder) { + mlir::Type origRetTy, + ArrayRef origCallArgTypes, + mlir::OpBuilder &builder) { mlir::MLIRContext *ctx = call->getContext(); auto ptrTy = cir::PointerType::get(origRetTy); builder.setInsertionPoint(call); @@ -511,15 +567,17 @@ void rewriteIndirectReturnCall(cir::CallOp call, newCall->setAttr(attr.getName(), attr.getValue()); // Shape the per-argument attrs exactly as the non-sret path does - // (signext / zeroext for Extend, drop Ignore slots) before prepending - // the sret slot, so sret composes correctly with Extend / Ignore args. + // (signext / zeroext for Extend, drop Ignore slots, byval / align for + // Indirect) before prepending the sret slot, so sret composes correctly + // with Extend / Ignore / Indirect args. mlir::ArrayAttr argAttrs = call->getAttrOfType("arg_attrs"); bool needsArgAttrUpdate = llvm::any_of(fc.argInfos, [](const ArgClassification &ac) { - return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend; + return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend || + ac.kind == ArgKind::Indirect; }); if (needsArgAttrUpdate) - argAttrs = updateArgAttrs(ctx, argAttrs, fc); + argAttrs = updateArgAttrs(ctx, origCallArgTypes, argAttrs, fc); applySretSlotAttrs(newCall, argAttrs, origRetTy, sretAlign, builder); if (reuseStore) { @@ -670,15 +728,17 @@ mlir::LogicalResult CIRABIRewriteContext::rewriteFunctionDefinition( funcOp.setFunctionTypeAttr(mlir::TypeAttr::get(newFnTy)); // Rebuild arg_attrs when the function has an sret slot (slot 0 needs the - // sret attribute set) or any arg is Ignore (dropped from the output array) - // or Extend (needs llvm.signext / llvm.zeroext layered on). + // sret attribute set) or any arg is Ignore (dropped from the output array), + // Extend (needs llvm.signext / llvm.zeroext), or Indirect (needs + // llvm.byval / llvm.align). bool needsArgAttrUpdate = hasSRet || llvm::any_of(fc.argInfos, [](const ArgClassification &ac) { - return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend; + return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend || + ac.kind == ArgKind::Indirect; }); if (needsArgAttrUpdate) { auto existing = funcOp->getAttrOfType("arg_attrs"); - mlir::ArrayAttr updated = updateArgAttrs(ctx, existing, fc); + mlir::ArrayAttr updated = updateArgAttrs(ctx, oldArgTypes, existing, fc); if (hasSRet) { // Prepend the sret slot's attribute dict (slot 0); the per-argument // dicts shift to slots 1..N. noalias is valid only on the callee's @@ -728,17 +788,13 @@ CIRABIRewriteContext::rewriteCallSite(mlir::Operation *callOp, switch (ac.kind) { case ArgKind::Direct: case ArgKind::Ignore: + case ArgKind::Extend: + case ArgKind::Indirect: + // All handled in the arg-building loop below. break; case ArgKind::Expand: return call.emitOpError() << "Expand at call-site arg " << idx << " not yet implemented in CallConvLowering"; - case ArgKind::Extend: - // Direct (with or without coercion), Ignore, Expand, and Extend are - // all handled below. Extend is attribute-only at the IR level. - break; - case ArgKind::Indirect: - return call.emitOpError() << "Indirect at call-site arg " << idx - << " not yet implemented in CallConvLowering"; } } @@ -747,6 +803,12 @@ CIRABIRewriteContext::rewriteCallSite(mlir::Operation *callOp, SmallVector newArgs; mlir::ValueRange argOperands = call.getArgOperands(); newArgs.reserve(argOperands.size()); + + // Capture original arg types before building newArgs (byval slots change + // the wire argument from T to !cir.ptr, so we save the pre-rewrite + // types here for use in updateArgAttrs). + SmallVector origCallArgTypes; + llvm::append_range(origCallArgTypes, argOperands.getTypes()); if (argOperands.size() > fc.argInfos.size()) return call.emitOpError() << "variadic arguments not yet implemented in CallConvLowering"; @@ -757,9 +819,25 @@ CIRABIRewriteContext::rewriteCallSite(mlir::Operation *callOp, continue; mlir::Value arg = argOperands[idx]; if (ac.kind == ArgKind::Direct && ac.coercedType && - arg.getType() != ac.coercedType) + arg.getType() != ac.coercedType) { arg = emitCoercion(builder, call.getLoc(), ac.coercedType, arg, enclosingFunc, dl); + } else if (ac.kind == ArgKind::Indirect) { + // byval and byref: allocate a stack slot, copy the value in, and pass + // the pointer. The alloca+store pattern is identical for both; the + // attribute distinction (llvm.byval vs llvm.byref) is applied by + // updateArgAttrs. byref does not receive llvm.noalias or llvm.noundef + // because it does not assert exclusive ownership of the storage. + mlir::Type argTy = arg.getType(); + auto ptrTy = cir::PointerType::get(argTy); + uint64_t align = ac.indirectAlign.value(); + StringRef slotName = ac.byVal ? "byval" : "byref"; + auto slot = cir::AllocaOp::create(builder, call.getLoc(), ptrTy, + builder.getStringAttr(slotName), + builder.getI64IntegerAttr(align)); + cir::StoreOp::create(builder, call.getLoc(), arg, slot); + arg = slot; + } newArgs.push_back(arg); } @@ -772,7 +850,8 @@ CIRABIRewriteContext::rewriteCallSite(mlir::Operation *callOp, // through a prepended pointer slot, not as a result), so dispatch to a // dedicated helper for it; everything below handles the by-value returns. if (fc.returnInfo.kind == ArgKind::Indirect && hasResult) { - rewriteIndirectReturnCall(call, fc, newArgs, origRetTy, builder); + rewriteIndirectReturnCall(call, fc, newArgs, origRetTy, origCallArgTypes, + builder); return mlir::success(); } @@ -803,15 +882,17 @@ CIRABIRewriteContext::rewriteCallSite(mlir::Operation *callOp, } // Layer llvm.signext / llvm.zeroext onto the new call's arg_attrs and - // res_attrs for Extend args/return. Ignore args also require a rebuild - // because their slots are dropped from the output array. + // res_attrs for Extend args/return. Ignore args require a rebuild because + // their slots are dropped; Indirect args need llvm.byval / llvm.align. bool needsArgAttrUpdate = llvm::any_of(fc.argInfos, [](const ArgClassification &ac) { - return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend; + return ac.kind == ArgKind::Ignore || ac.kind == ArgKind::Extend || + ac.kind == ArgKind::Indirect; }); if (needsArgAttrUpdate) { auto existing = call->getAttrOfType("arg_attrs"); - newCall->setAttr("arg_attrs", updateArgAttrs(ctx, existing, fc)); + newCall->setAttr("arg_attrs", + updateArgAttrs(ctx, origCallArgTypes, existing, fc)); } if (fc.returnInfo.kind == ArgKind::Extend) { auto existing = call->getAttrOfType("res_attrs"); diff --git a/clang/test/CIR/Transforms/abi-lowering/indirect-byval.cir b/clang/test/CIR/Transforms/abi-lowering/indirect-byval.cir new file mode 100644 index 0000000000000..f0a8572403fe9 --- /dev/null +++ b/clang/test/CIR/Transforms/abi-lowering/indirect-byval.cir @@ -0,0 +1,242 @@ +// RUN: cir-opt %s -cir-call-conv-lowering="classification-attr=test_classify" \ +// RUN: | FileCheck %s + +!s32i = !cir.int +!s64i = !cir.int +!rec_Big = !cir.struct<"Big" {!s64i, !s64i, !s64i, !s64i}> + +#byval_arg = { + return = { kind = "direct" }, + args = [ { kind = "indirect", indirect_align = 8 } ] +} + +#byval_arg_ext_return = { + return = { kind = "extend", coerced_type = !cir.int, + sign_extend = true }, + args = [ { kind = "indirect", indirect_align = 8 } ] +} + +#byval_ignore_arg = { + return = { kind = "direct" }, + args = [ { kind = "indirect", indirect_align = 8 }, + { kind = "ignore" } ] +} + +#two_byval = { + return = { kind = "direct" }, + args = [ { kind = "indirect", indirect_align = 8 }, + { kind = "indirect", indirect_align = 8 } ] +} + +#sret_byval = { + return = { kind = "indirect", indirect_align = 8 }, + args = [ { kind = "indirect", indirect_align = 8 } ] +} + +#byref_arg = { + return = { kind = "direct" }, + args = [ { kind = "indirect", indirect_align = 8, byval = false } ] +} + +#passthrough = { + return = { kind = "direct" }, + args = [ ] +} + +module attributes { + dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry: vector<2xi64>>, + #dlti.dl_entry: vector<2xi64>>, + #dlti.dl_entry: vector<2xi64>>> +} { + + // The byval arg becomes a pointer with llvm.byval, llvm.align, llvm.noalias, + // and llvm.noundef. A load is inserted at entry so the body sees the + // original value type. + cir.func @takes_big(%arg0: !rec_Big) -> !s32i + attributes { test_classify = #byval_arg } { + %r = cir.const #cir.int<0> : !s32i + cir.return %r : !s32i + } + + // CHECK: cir.func{{.*}} @takes_big(%[[ARG:.*]]: !cir.ptr + // CHECK-SAME: llvm.align = 8 : i64 + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK: %{{.*}} = cir.load %[[ARG]] : !cir.ptr, !rec_Big + // CHECK-NEXT: %{{.*}} = cir.const #cir.int<0> : !s32i + + // byval composes with an Extend return: the pointer parameter carries all + // four byval attrs and the return value carries llvm.signext. + cir.func @takes_big_ext(%arg0: !rec_Big) -> !s32i + attributes { test_classify = #byval_arg_ext_return } { + %r = cir.const #cir.int<0> : !s32i + cir.return %r : !s32i + } + + // CHECK: cir.func{{.*}} @takes_big_ext(%{{.*}}: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK-SAME: ) -> (!s32i {llvm.signext}) + + // byval composes with an Ignored second argument: the ignored arg is dropped + // from the signature and the byval slot is the only argument. + cir.func @takes_big_ignore(%arg0: !rec_Big, %arg1: !s32i) -> !s32i + attributes { test_classify = #byval_ignore_arg } { + %r = cir.const #cir.int<0> : !s32i + cir.return %r : !s32i + } + + // CHECK: cir.func{{.*}} @takes_big_ignore(%{{.*}}: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK-SAME: ) -> !s32i + // CHECK-NOT: !s32i {{.*}}!s32i + + // A forward declaration: signature is rewritten but no body to touch. + cir.func private @takes_big_decl(%arg0: !rec_Big) -> !s32i + attributes { test_classify = #byval_arg } + + // CHECK: cir.func{{.*}} @takes_big_decl(!cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + + // Caller: byval argument is copied into a fresh alloca; the pointer is + // passed with llvm.byval, llvm.noalias, and llvm.noundef on the call + // operand, matching classic CodeGen's -fpass-by-value-is-noalias output. + cir.func @caller(%s: !rec_Big) -> !s32i + attributes { test_classify = #passthrough } { + %r = cir.call @takes_big(%s) : (!rec_Big) -> !s32i + cir.return %r : !s32i + } + + // CHECK: cir.func{{.*}} @caller(%[[S:.*]]: !rec_Big) -> !s32i + // CHECK: %[[SLOT:.*]] = cir.alloca "byval" align(8) : !cir.ptr + // CHECK-NEXT: cir.store %[[S]], %[[SLOT]] : !rec_Big, !cir.ptr + // CHECK-NEXT: %{{.*}} = cir.call @takes_big(%[[SLOT]]) : + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + + // A body that actually uses the byval arg value: the existing use in + // cir.return is rerouted through the load inserted at entry. + cir.func @takes_big_uses_arg(%arg0: !rec_Big) -> !rec_Big + attributes { test_classify = #byval_arg } { + cir.return %arg0 : !rec_Big + } + + // CHECK: cir.func{{.*}} @takes_big_uses_arg(%[[PTR:.*]]: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK: %[[LOADED:.*]] = cir.load %[[PTR]] : !cir.ptr, !rec_Big + // CHECK-NEXT: cir.return %[[LOADED]] : !rec_Big + + // Two byval arguments: each gets its own alloca at the call site, and both + // block args are changed to pointers with loads at the callee entry. + cir.func @two_byval(%a: !rec_Big, %b: !rec_Big) -> !rec_Big + attributes { test_classify = #two_byval } { + cir.return %b : !rec_Big + } + + // Both byval slots carry the full attribute set. Match in appearance order. + // CHECK: cir.func{{.*}} @two_byval( + // CHECK-SAME: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK-SAME: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + + cir.func @caller_two(%s: !rec_Big, %t: !rec_Big) -> !rec_Big + attributes { test_classify = #passthrough } { + %r = cir.call @two_byval(%s, %t) : (!rec_Big, !rec_Big) -> !rec_Big + cir.return %r : !rec_Big + } + + // CHECK: cir.func{{.*}} @caller_two + // CHECK: %[[A:.*]] = cir.alloca "byval" align(8) : !cir.ptr + // CHECK-NEXT: cir.store %{{.*}}, %[[A]] : !rec_Big, !cir.ptr + // CHECK-NEXT: %[[B:.*]] = cir.alloca "byval" align(8) : !cir.ptr + // CHECK-NEXT: cir.store %{{.*}}, %[[B]] : !rec_Big, !cir.ptr + // CHECK-NEXT: %{{.*}} = cir.call @two_byval(%[[A]], %[[B]]) : + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK-SAME: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + + // sret return + byval arg: the sret pointer is prepended at slot 0 and the + // byval arg shifts to slot 1 (sretOffset = 1). The byval slot carries all + // four byval attrs; the sret slot carries its own standard set. + cir.func @byval_and_sret(%arg0: !rec_Big) -> !rec_Big + attributes { test_classify = #sret_byval } { + %0 = cir.alloca "__retval" align(8) : !cir.ptr + %z = cir.const #cir.zero : !rec_Big + cir.store %z, %0 : !rec_Big, !cir.ptr + %1 = cir.load %0 : !cir.ptr, !rec_Big + cir.return %1 : !rec_Big + } + + // CHECK: cir.func{{.*}} @byval_and_sret( + // CHECK-SAME: llvm.sret = !rec_Big + // CHECK-SAME: !cir.ptr + // CHECK-SAME: llvm.byval = !rec_Big + // CHECK-SAME: llvm.noalias + // CHECK-SAME: llvm.noundef + // CHECK-NOT: -> !rec_Big + // CHECK: cir.store %{{.*}}, %{{.*}} : !rec_Big, !cir.ptr + // CHECK: cir.return + + // byref callee definition: the parameter gets llvm.byref instead of + // llvm.byval. noalias and noundef are NOT added (byref passes the + // original, which may alias and may carry padding). The body + // transformation (ptr + load at entry) is identical to byval. + cir.func @takes_big_byref(%arg0: !rec_Big) -> !rec_Big + attributes { test_classify = #byref_arg } { + cir.return %arg0 : !rec_Big + } + + // CHECK: cir.func{{.*}} @takes_big_byref(%[[PTR:.*]]: !cir.ptr + // CHECK-SAME: llvm.align = 8 : i64 + // CHECK-SAME: llvm.byref = !rec_Big + // CHECK-NOT: llvm.noalias + // CHECK-NOT: llvm.noundef + // CHECK: %[[L:.*]] = cir.load %[[PTR]] : !cir.ptr, !rec_Big + // CHECK-NEXT: cir.return %[[L]] : !rec_Big + + // byref forward declaration: signature gets llvm.byref, no body. + cir.func private @takes_big_byref_decl(%arg0: !rec_Big) -> !rec_Big + attributes { test_classify = #byref_arg } + + // CHECK: cir.func{{.*}} @takes_big_byref_decl(!cir.ptr + // CHECK-SAME: llvm.byref = !rec_Big + // CHECK-NOT: llvm.noalias + // CHECK-NOT: llvm.noundef + + // byref call site: same alloca+store pattern as byval, but the pointer + // carries llvm.byref (and no llvm.noalias / llvm.noundef since byref + // does not assert exclusive ownership of the storage). + cir.func @caller_byref(%s: !rec_Big) -> !rec_Big + attributes { test_classify = #passthrough } { + %r = cir.call @takes_big_byref(%s) : (!rec_Big) -> !rec_Big + cir.return %r : !rec_Big + } + + // CHECK: cir.func{{.*}} @caller_byref(%[[S:.*]]: !rec_Big) -> !rec_Big + // CHECK: %[[SLOT:.*]] = cir.alloca "byref" align(8) : !cir.ptr + // CHECK-NEXT: cir.store %[[S]], %[[SLOT]] : !rec_Big, !cir.ptr + // CHECK-NEXT: %{{.*}} = cir.call @takes_big_byref(%[[SLOT]]) : + // CHECK-SAME: llvm.byref = !rec_Big + // CHECK-NOT: llvm.noalias + // CHECK-NOT: llvm.noundef + +} From fcc16371eafed826a2b76e20c4e899bf4f0faf82 Mon Sep 17 00:00:00 2001 From: Domenic Nutile Date: Tue, 23 Jun 2026 15:48:06 -0400 Subject: [PATCH 247/511] [AMDGPU] Change static NOP last terminator SI_DEMOTE_I1 to be replaced by S_BRANCH instead of assert (#204649) This issue was first discovered in some testing downstream. A specific chain of transformations on a ballot instruction with a constant argument followed by an llvm.amgcn.wqm.demote call leads to an instruction of `SI_DEMOTE_I1 -1, 0` being the last terminator of a block with a single successor. This instruction is a NOP and can safely be replaced with an S_BRANCH to the block's successor instead of asserting failure. The test added in this change is a very simplified recreation of the pattern seen in the shader compilation in the downstream that lead to assertion failure --- llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp | 2 +- .../AMDGPU/uniform-intrin-combine-wqm-demote.ll | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/AMDGPU/uniform-intrin-combine-wqm-demote.ll diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp index bf5dc2c529be6..008f7f70456df 100644 --- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp +++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp @@ -979,7 +979,7 @@ MachineInstr *SIWholeQuadMode::lowerKillI1(MachineInstr &MI, bool IsWQM) { if (!IsLastTerminator) { LIS->RemoveMachineInstrFromMaps(MI); } else { - assert(MBB.succ_size() == 1 && MI.getOpcode() != AMDGPU::SI_DEMOTE_I1); + assert(MBB.succ_size() == 1); MachineInstr *NewTerm = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_BRANCH)) .addMBB(*MBB.succ_begin()); LIS->ReplaceMachineInstrInMaps(MI, *NewTerm); diff --git a/llvm/test/CodeGen/AMDGPU/uniform-intrin-combine-wqm-demote.ll b/llvm/test/CodeGen/AMDGPU/uniform-intrin-combine-wqm-demote.ll new file mode 100644 index 0000000000000..6e82e70b56c57 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/uniform-intrin-combine-wqm-demote.ll @@ -0,0 +1,17 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 < %s | FileCheck %s + +define amdgpu_ps void @_amdgpu_ps_main() "amdgpu-conditional-discard-transformations" "amdgpu-transform-discard-to-demote" { +; CHECK-LABEL: _amdgpu_ps_main: +; CHECK: ; %bb.0: ; %.entry +; CHECK-NEXT: s_endpgm +.entry: + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 true) + %not_ballot = icmp eq i64 %ballot, 0 + %inverse = xor i1 %not_ballot, true + call void @llvm.amdgcn.wqm.demote(i1 %inverse) + br label %.exit + +.exit: + ret void +} From 8f21d03dea59b2fa6fee298ed0232079b136dbc8 Mon Sep 17 00:00:00 2001 From: seantalts Date: Tue, 23 Jun 2026 15:55:42 -0400 Subject: [PATCH 248/511] [LoopVectorize] Don't assert in getVectorCallCost for vector library variants (#202085) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During loop vectorization, `computePredInstDiscount` queries the cost of instructions at vector VF using `getInstructionCost`. A `CallInst` with a vector library variant delegates to `getVectorCallCost`, which asserted that such variants should not reach it. A predicated call can however reach `getVectorCallCost` via `computePredInstDiscount` — before its widening decision is made — when a predicated user (e.g. a scatter store) is being considered for scalarization. Remove the assert and fall through to the existing scalarization cost, which is the cost relevant to that analysis. Adds a regression test exercising that path. --- .../Transforms/Vectorize/LoopVectorize.cpp | 50 ++-- .../pred-inst-discount-vector-library-call.ll | 221 ++++++++++++++++++ 2 files changed, 251 insertions(+), 20 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/pred-inst-discount-vector-library-call.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 3dbee08e7d7d8..545fcba65d228 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2093,30 +2093,32 @@ static unsigned estimateElementCount(ElementCount VF, return EstimatedVF; } -/// Returns true iff \p CI has a library vector variant usable at \p VF: a -/// mapping with matching VF, masked if required, whose vector function is -/// declared in the module. Such variants are priced by -/// VPWidenCallRecipe::computeCost rather than by scalarization. +/// Returns the vector library variant function of \p CI usable at \p VF, +/// respecting \p MaskRequired, or nullptr if none is found: a mapping with +/// matching VF, masked if required, whose vector function is declared in the +/// module. +static Function *getVectorLibraryVariantFor(const CallInst &CI, ElementCount VF, + bool MaskRequired, + const TargetLibraryInfo *TLI) { + if (!TLI || CI.isNoBuiltin()) + return nullptr; + for (const VFInfo &Info : VFDatabase::getMappings(CI)) + if (Info.Shape.VF == VF && (!MaskRequired || Info.isMasked())) + if (Function *F = CI.getModule()->getFunction(Info.VectorName)) + return F; + return nullptr; +} + +/// Returns true iff \p CI has a library vector variant usable at \p VF. static bool hasVectorLibraryVariantFor(const CallInst &CI, ElementCount VF, bool MaskRequired, const TargetLibraryInfo *TLI) { - if (!TLI || CI.isNoBuiltin()) - return false; - return any_of(VFDatabase::getMappings(CI), [&](const VFInfo &Info) { - return Info.Shape.VF == VF && (!MaskRequired || Info.isMasked()) && - CI.getModule()->getFunction(Info.VectorName); - }); + return getVectorLibraryVariantFor(CI, VF, MaskRequired, TLI) != nullptr; } InstructionCost LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, ElementCount VF) const { - // Vector library variants are priced by VPWidenCallRecipe::computeCost and - // should not reach this function. - assert((VF.isScalar() || - !hasVectorLibraryVariantFor(*CI, VF, isMaskRequired(CI), TLI)) && - "getVectorCallCost does not price vector library variants"); - Type *RetTy = CI->getType(); SmallVector Tys; for (auto &ArgOp : CI->args()) @@ -2132,10 +2134,18 @@ LoopVectorizationCostModel::getVectorCallCost(CallInst *CI, : ScalarCallCost * VF.getKnownMinValue() + getScalarizationOverhead(CI, VF); - if (getVectorIntrinsicIDForCall(CI, TLI)) { - InstructionCost IntrinsicCost = getVectorIntrinsicCost(CI, VF); - return std::min(Cost, IntrinsicCost); - } + // The call may be vectorized at this VF, via a vector intrinsic or a vector + // library variant. + if (getVectorIntrinsicIDForCall(CI, TLI)) + Cost = std::min(Cost, getVectorIntrinsicCost(CI, VF)); + + if (Function *Variant = + getVectorLibraryVariantFor(*CI, VF, isMaskRequired(CI), TLI)) + Cost = std::min(Cost, + TTI.getCallInstrCost( + /*F=*/nullptr, Variant->getReturnType(), + Variant->getFunctionType()->params(), Config.CostKind)); + return Cost; } diff --git a/llvm/test/Transforms/LoopVectorize/pred-inst-discount-vector-library-call.ll b/llvm/test/Transforms/LoopVectorize/pred-inst-discount-vector-library-call.ll new file mode 100644 index 0000000000000..1d7f96c35949e --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/pred-inst-discount-vector-library-call.ll @@ -0,0 +1,221 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s --check-prefixes=VF2 +; RUN: opt < %s -passes=loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -S | FileCheck %s --check-prefixes=VF8 + +; Check that a conditionally-executed call with a vector library variant is +; costed correctly when deciding whether scalarizing a predicated tree of +; operations is profitable. Its result feeds a scatter store that must be +; scalarized; the cost model uses the wide-call cost to decide whether to also +; scalarize the call. At VF=2 scalarizing the call is cheapest, so it is +; scalarized; at VF=8 the wide variant is cheaper, so the call stays wide and +; only the store is scalarized. Querying the wide-call cost on this path +; previously crashed. + +define void @pred_call_with_variant(ptr readonly %src, ptr noalias %dest, i64 %N) { +; VF2-LABEL: define void @pred_call_with_variant( +; VF2-SAME: ptr readonly [[SRC:%.*]], ptr noalias [[DEST:%.*]], i64 [[N:%.*]]) { +; VF2-NEXT: [[ENTRY:.*]]: +; VF2-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2 +; VF2-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF2: [[VECTOR_PH]]: +; VF2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2 +; VF2-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; VF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF2: [[VECTOR_BODY]]: +; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE2:.*]] ] +; VF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; VF2-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP0]], align 8 +; VF2-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> [[WIDE_LOAD]], splat (i64 5) +; VF2-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 +; VF2-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF2: [[PRED_STORE_IF]]: +; VF2-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i64 0 +; VF2-NEXT: [[TMP4:%.*]] = call i64 @foo(i64 [[TMP3]]) #[[ATTR0:[0-9]+]] +; VF2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP3]] +; VF2-NEXT: store i64 [[TMP4]], ptr [[TMP5]], align 8 +; VF2-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF2: [[PRED_STORE_CONTINUE]]: +; VF2-NEXT: [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i64 1 +; VF2-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2]] +; VF2: [[PRED_STORE_IF1]]: +; VF2-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[WIDE_LOAD]], i64 1 +; VF2-NEXT: [[TMP8:%.*]] = call i64 @foo(i64 [[TMP7]]) #[[ATTR0]] +; VF2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP7]] +; VF2-NEXT: store i64 [[TMP8]], ptr [[TMP9]], align 8 +; VF2-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; VF2: [[PRED_STORE_CONTINUE2]]: +; VF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; VF2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF2-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF2: [[MIDDLE_BLOCK]]: +; VF2-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; VF2-NEXT: br i1 [[CMP_N]], label %[[END:.*]], label %[[SCALAR_PH]] +; VF2: [[SCALAR_PH]]: +; VF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF2-NEXT: br label %[[FOR_BODY:.*]] +; VF2: [[FOR_BODY]]: +; VF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_LOOP:.*]] ] +; VF2-NEXT: [[LD_ADDR:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] +; VF2-NEXT: [[IDX:%.*]] = load i64, ptr [[LD_ADDR]], align 8 +; VF2-NEXT: [[IFCOND:%.*]] = icmp ult i64 [[IDX]], 5 +; VF2-NEXT: br i1 [[IFCOND]], label %[[IF_THEN:.*]], label %[[FOR_LOOP]] +; VF2: [[IF_THEN]]: +; VF2-NEXT: [[FOO_RET:%.*]] = call i64 @foo(i64 [[IDX]]) #[[ATTR0]] +; VF2-NEXT: [[ST_ADDR:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[IDX]] +; VF2-NEXT: store i64 [[FOO_RET]], ptr [[ST_ADDR]], align 8 +; VF2-NEXT: br label %[[FOR_LOOP]] +; VF2: [[FOR_LOOP]]: +; VF2-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; VF2-NEXT: [[LOOPCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; VF2-NEXT: br i1 [[LOOPCOND]], label %[[END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF2: [[END]]: +; VF2-NEXT: ret void +; +; VF8-LABEL: define void @pred_call_with_variant( +; VF8-SAME: ptr readonly [[SRC:%.*]], ptr noalias [[DEST:%.*]], i64 [[N:%.*]]) { +; VF8-NEXT: [[ENTRY:.*]]: +; VF8-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 8 +; VF8-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8: [[VECTOR_PH]]: +; VF8-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 8 +; VF8-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; VF8-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8: [[VECTOR_BODY]]: +; VF8-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ] +; VF8-NEXT: [[TMP0:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]] +; VF8-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i64>, ptr [[TMP0]], align 8 +; VF8-NEXT: [[TMP1:%.*]] = icmp ult <8 x i64> [[WIDE_LOAD]], splat (i64 5) +; VF8-NEXT: [[TMP2:%.*]] = call <8 x i64> @vector_foo_8(<8 x i64> [[WIDE_LOAD]], <8 x i1> [[TMP1]]) +; VF8-NEXT: [[TMP3:%.*]] = extractelement <8 x i1> [[TMP1]], i64 0 +; VF8-NEXT: br i1 [[TMP3]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; VF8: [[PRED_STORE_IF]]: +; VF8-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 0 +; VF8-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP4]] +; VF8-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0 +; VF8-NEXT: store i64 [[TMP6]], ptr [[TMP5]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE]] +; VF8: [[PRED_STORE_CONTINUE]]: +; VF8-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP1]], i64 1 +; VF8-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]] +; VF8: [[PRED_STORE_IF1]]: +; VF8-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 1 +; VF8-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP8]] +; VF8-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1 +; VF8-NEXT: store i64 [[TMP10]], ptr [[TMP9]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE2]] +; VF8: [[PRED_STORE_CONTINUE2]]: +; VF8-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP1]], i64 2 +; VF8-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]] +; VF8: [[PRED_STORE_IF3]]: +; VF8-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 2 +; VF8-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP12]] +; VF8-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2 +; VF8-NEXT: store i64 [[TMP14]], ptr [[TMP13]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE4]] +; VF8: [[PRED_STORE_CONTINUE4]]: +; VF8-NEXT: [[TMP15:%.*]] = extractelement <8 x i1> [[TMP1]], i64 3 +; VF8-NEXT: br i1 [[TMP15]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]] +; VF8: [[PRED_STORE_IF5]]: +; VF8-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 3 +; VF8-NEXT: [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP16]] +; VF8-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3 +; VF8-NEXT: store i64 [[TMP18]], ptr [[TMP17]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE6]] +; VF8: [[PRED_STORE_CONTINUE6]]: +; VF8-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP1]], i64 4 +; VF8-NEXT: br i1 [[TMP19]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]] +; VF8: [[PRED_STORE_IF7]]: +; VF8-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 4 +; VF8-NEXT: [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP20]] +; VF8-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4 +; VF8-NEXT: store i64 [[TMP22]], ptr [[TMP21]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE8]] +; VF8: [[PRED_STORE_CONTINUE8]]: +; VF8-NEXT: [[TMP23:%.*]] = extractelement <8 x i1> [[TMP1]], i64 5 +; VF8-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]] +; VF8: [[PRED_STORE_IF9]]: +; VF8-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 5 +; VF8-NEXT: [[TMP25:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP24]] +; VF8-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5 +; VF8-NEXT: store i64 [[TMP26]], ptr [[TMP25]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE10]] +; VF8: [[PRED_STORE_CONTINUE10]]: +; VF8-NEXT: [[TMP27:%.*]] = extractelement <8 x i1> [[TMP1]], i64 6 +; VF8-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]] +; VF8: [[PRED_STORE_IF11]]: +; VF8-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 6 +; VF8-NEXT: [[TMP29:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP28]] +; VF8-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6 +; VF8-NEXT: store i64 [[TMP30]], ptr [[TMP29]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE12]] +; VF8: [[PRED_STORE_CONTINUE12]]: +; VF8-NEXT: [[TMP31:%.*]] = extractelement <8 x i1> [[TMP1]], i64 7 +; VF8-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]] +; VF8: [[PRED_STORE_IF13]]: +; VF8-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[WIDE_LOAD]], i64 7 +; VF8-NEXT: [[TMP33:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[TMP32]] +; VF8-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7 +; VF8-NEXT: store i64 [[TMP34]], ptr [[TMP33]], align 8 +; VF8-NEXT: br label %[[PRED_STORE_CONTINUE14]] +; VF8: [[PRED_STORE_CONTINUE14]]: +; VF8-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; VF8-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; VF8: [[MIDDLE_BLOCK]]: +; VF8-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; VF8-NEXT: br i1 [[CMP_N]], label %[[END:.*]], label %[[SCALAR_PH]] +; VF8: [[SCALAR_PH]]: +; VF8-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8-NEXT: br label %[[FOR_BODY:.*]] +; VF8: [[FOR_BODY]]: +; VF8-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_LOOP:.*]] ] +; VF8-NEXT: [[LD_ADDR:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]] +; VF8-NEXT: [[IDX:%.*]] = load i64, ptr [[LD_ADDR]], align 8 +; VF8-NEXT: [[IFCOND:%.*]] = icmp ult i64 [[IDX]], 5 +; VF8-NEXT: br i1 [[IFCOND]], label %[[IF_THEN:.*]], label %[[FOR_LOOP]] +; VF8: [[IF_THEN]]: +; VF8-NEXT: [[FOO_RET:%.*]] = call i64 @foo(i64 [[IDX]]) #[[ATTR0:[0-9]+]] +; VF8-NEXT: [[ST_ADDR:%.*]] = getelementptr inbounds i64, ptr [[DEST]], i64 [[IDX]] +; VF8-NEXT: store i64 [[FOO_RET]], ptr [[ST_ADDR]], align 8 +; VF8-NEXT: br label %[[FOR_LOOP]] +; VF8: [[FOR_LOOP]]: +; VF8-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; VF8-NEXT: [[LOOPCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; VF8-NEXT: br i1 [[LOOPCOND]], label %[[END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; VF8: [[END]]: +; VF8-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.loop ] + %ld.addr = getelementptr inbounds i64, ptr %src, i64 %iv + %idx = load i64, ptr %ld.addr, align 8 + %ifcond = icmp ult i64 %idx, 5 + br i1 %ifcond, label %if.then, label %for.loop + +if.then: + %foo.ret = call i64 @foo(i64 %idx) #0 + ; A scatter to a loaded index: not consecutive, so the store is + ; scalar-with-predication and triggers the predication-discount analysis. + %st.addr = getelementptr inbounds i64, ptr %dest, i64 %idx + store i64 %foo.ret, ptr %st.addr, align 8 + br label %for.loop + +for.loop: + %iv.next = add nsw nuw i64 %iv, 1 + %loopcond = icmp eq i64 %iv.next, %N + br i1 %loopcond, label %end, label %for.body + +end: + ret void +} + +declare i64 @foo(i64) #0 +declare <2 x i64> @vector_foo_2(<2 x i64>, <2 x i1>) +declare <8 x i64> @vector_foo_8(<8 x i64>, <8 x i1>) + +; Masked vector variants for VF=2 and VF=8, so the call is a widen-with-mask +; candidate at both VFs while feeding the scalar-with-predication scatter store. +attributes #0 = { readonly nounwind "vector-function-abi-variant"="_ZGV_LLVM_M2v_foo(vector_foo_2),_ZGV_LLVM_M8v_foo(vector_foo_8)" } From bbf57fd86a55e5d27d4de30de9f7c47150aba79a Mon Sep 17 00:00:00 2001 From: Jianhui Li Date: Tue, 23 Jun 2026 12:58:27 -0700 Subject: [PATCH 249/511] [MLIR][XeGPU] Refactor XeGPU layout propagation: passing lane_layout/lane_data with inst_data (#203156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Motivation** Enhance setup* rules in layout propagation to pass lane_layout, and lane_data information during inst_data propagation, so that the propagation can have lane level information when choosing an optimal inst_data. This branch makes that relationship explicit and uniform across all setup rules. **Invariant** All setup rules now produce layouts that satisfy: Nd ops + dpas/dpas_mx: inst_data = k * (lane_layout * lane_data), k ≥ 1 Scatter/matrix ops + non-anchor ops: inst_data = lane_layout * lane_data **Key changes in XeGPULayoutImpl** - New per-op anchor setup rules: setupStoreNdAnchorLayout, setupPrefetchNdAnchorLayout, setupLoadNdAnchorLayout (replacing a generic block-IO path) — Nd ops have rigid lane info and fit inst_data to the lane factorization. - New complete*LayoutFromInstData helpers: fill in lane info on user-provided anchors that specify only inst_data, by re-running the op's Lane-kind setup with inst_data as the destination shape. Covers scatter load/store, block store/load, DPAS, and DPAS_MX. - BlockIOInstructionInterface (new in uArchBase.h): shared abstraction over the load/store/prefetch 2D-block instructions; the three Xe2 instructions now implement it. - setupMultiReductionResultLayout reorganized so the InstData and Lane branches share the same lane-layout logic (computeReductionLaneLayoutAndData). - createScaleLayout (dpas_mx): caps the scale lane_layout by inst_data so the scale operand's load_nd satisfies the multiple-of invariant. - getValidLayouts → getSgLayoutCandidates: renamed and generalized from 2D to N-D factorization. - inferShapeCastSourceLayout preserves lane info through 1D↔ND collapse-style casts. - chunkSize → contiguousChunkSize parameter rename for the scatter inst_data decision. - ResolveLayoutConflicts: retargets an existing convert_layout instead of chaining a second one; UnrollConvertLayoutOp short-circuits when input == target. - Sub-byte (4-bit) element support added to the block-load uArch table. - Tests updated: propagate-layout-inst-data.mlir, propagate-layout.mlir, resolve-layout-conflicts.mlir. Assisted-by-claude --------- Co-authored-by: Claude Opus 4.7 (1M context) --- .../XeGPU/Transforms/XeGPULayoutImpl.h | 126 +- .../mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h | 67 +- .../mlir/Dialect/XeGPU/uArch/uArchBase.h | 19 + mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 4 +- .../XeGPU/Transforms/XeGPULayoutImpl.cpp | 2440 +++++++++++------ .../XeGPU/Transforms/XeGPUPropagateLayout.cpp | 745 ++--- .../Dialect/XeGPU/Transforms/XeGPUUnroll.cpp | 4 +- .../XeGPU/propagate-layout-inst-data.mlir | 526 ++-- mlir/test/Dialect/XeGPU/propagate-layout.mlir | 206 +- .../XeGPU/resolve-layout-conflicts.mlir | 8 +- 10 files changed, 2543 insertions(+), 1602 deletions(-) diff --git a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h index 94d1d5aecbe60..bf61f0ced6048 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h +++ b/mlir/include/mlir/Dialect/XeGPU/Transforms/XeGPULayoutImpl.h @@ -159,6 +159,22 @@ DistributeLayoutAttr inferSourceLayoutFromResultForNonAnchorOp(OpOperand &operand, DistributeLayoutAttr resLayout); +/// Note on the `consumerLayout` argument used by the consumer-driven setup* / +/// complete* helpers below: +/// +/// Layout propagation is a backward dataflow analysis, so a producer learns its +/// consumers' demands one at a time. The `consumerLayout` passed to these +/// helpers is the *single* layout that the first consumer to reach the producer +/// has requested (see `getConsumerLayoutAt`); these helpers do not pick among, +/// or merge, multiple consumers, and they do not reason about cost (e.g. a +/// consumer inside a loop vs. one outside). If a producer has several consumers +/// with conflicting layout demands, only the first-arriving one shapes the +/// producer's anchor layout here; any later, inconsistent consumer is left +/// as-is and reconciled afterwards by the layout conflict resolution process +/// (`ResolveLayoutConflicts`), which inserts a `convert_layout` op on that +/// edge. So these helpers can always assume exactly one (possibly null) +/// consumer layout to honor. + /// Sets up layout for Multi-Reduction operations by creating a SliceAttr for /// the result. /// @@ -208,28 +224,95 @@ DistributeLayoutAttr setupInsertStridedSliceResultLayout( DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch); /// Sets up the anchor layout for a load gather operation. -DistributeLayoutAttr -setupLoadGatherAnchorLayout(LayoutKind layoutKind, VectorType vectorTy, - int chunkSize, DistributeLayoutAttr consumerLayout, - const uArch::uArch *uArch); +DistributeLayoutAttr setupLoadGatherAnchorLayout( + LayoutKind layoutKind, VectorType vectorTy, int contigChunkSize, + DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch); /// Sets up the anchor layout for load matrix operation. -DistributeLayoutAttr -setupLoadMatrixAnchorLayout(LayoutKind layoutKind, VectorType vectorTy, - DistributeLayoutAttr consumerLayout, - const uArch::uArch *uArch); +DistributeLayoutAttr setupLoadMatrixAnchorLayout( + LayoutKind layoutKind, VectorType vectorTy, int contigChunkSize, + DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch); /// Sets up the anchor layout for a store scatter operation. DistributeLayoutAttr setupStoreScatterAnchorLayout(LayoutKind layoutKind, VectorType vectorTy, - int chunkSize, + int contigChunkSize, const uArch::uArch *uArch); /// Sets up the anchor layout for a store matrix operation. DistributeLayoutAttr setupStoreMatrixAnchorLayout(LayoutKind layoutKind, VectorType vectorTy, + int contigChunkSize, const uArch::uArch *uArch); +/// If the consumer layout has only inst_data (no lane_layout/lane_data), +/// completes it by running the corresponding scatter-style Lane-kind setup +/// rule with inst_data as the destination shape. The resulting lane info is +/// merged with the consumer's inst_data so downstream setup* paths see a +/// fully-populated layout. +/// Returns the layout unchanged when it is null, has no inst_data, or already +/// carries lane info; returns nullopt when the derived lane factorization does +/// not divide the user's inst_data (an invalid inst_data). +std::optional completeScatterLoadLaneLayoutFromInstData( + DistributeLayoutAttr userSpecifiedLayout, + DistributeLayoutAttr consumerLayout, Type elemTy, + const xegpu::uArch::LoadGatherInstructionInterface *uArchInstruction, + const int subgroupSize); + +/// Like completeScatterLoadLaneLayoutFromInstData, but for scatter stores +/// (store_scatter / store_matrix). A store is a data sink: lane info is derived +/// purely from inst_data using the uArch's StoreScatter per-lane store width, +/// with no consumer layout to reuse. +std::optional completeScatterStoreLaneLayoutFromInstData( + DistributeLayoutAttr specifiedLayout, Type elemTy, + const xegpu::uArch::StoreScatterInstructionInterface *uArchInstruction, + const int subgroupSize); + +/// Completes a user-provided 2D-block store_nd / prefetch_nd anchor that has +/// only inst_data. These ops are data sinks, so lane info is derived purely +/// from inst_data using the shared BlockIOInstructionInterface; one helper +/// serves both store_nd and prefetch_nd. +std::optional completeBlockStoreLaneLayoutFromInstData( + DistributeLayoutAttr specifiedLayout, Type elemTy, + const xegpu::uArch::BlockIOInstructionInterface *uArchInstruction, + const int subgroupSize); + +/// Like completeBlockStoreLaneLayoutFromInstData, but for load_nd. The consumer +/// layout supplies the transform / transpose / packing properties; the lane +/// factorization is recomputed from inst_data (load-side lane counts differ +/// from the consumer's). +std::optional completeBlockLoadLaneLayoutFromInstData( + DistributeLayoutAttr specifiedLayout, DistributeLayoutAttr consumerLayout, + Type elemTy, + const xegpu::uArch::BlockIOInstructionInterface *uArchInstruction, + const int subgroupSize); + +/// Sets up the anchor layout for a store_nd operation. StoreNd does not +/// consider a consumer layout (it is a data sink), and picks its layout from +/// uArch block parameters. `numSg` is only used for Subgroup-kind layouts. +DistributeLayoutAttr setupStoreNdAnchorLayout(LayoutKind layoutKind, + VectorType vectorTy, int numSg, + const uArch::uArch *uArch); + +/// Sets up the anchor layout for a prefetch_nd operation. PrefetchNd has no +/// value result and thus no consumer; it picks its layout from uArch block +/// parameters. `numSg` is only used for Subgroup-kind layouts. +DistributeLayoutAttr setupPrefetchNdAnchorLayout(LayoutKind layoutKind, + TensorDescType tdescTy, + int numSg, + const uArch::uArch *uArch); + +/// Sets up the anchor layout for a load_nd operation. LoadNd takes a +/// (downstream) consumer layout and validates it against uArch constraints; +/// when valid, the consumer's `inst_data` / `sg_layout` are honored. +/// Otherwise defaults derived from uArch block parameters are used. +/// `consumerLayout` must be presented. `numSg` is only used for Subgroup-kind +/// layouts when the consumer does not already provide an sg_layout. +DistributeLayoutAttr +setupLoadNdAnchorLayout(LayoutKind layoutKind, VectorType vectorTy, + DistributeLayoutAttr consumerLayout, int numSg, + const uArch::uArch *uArch); + /// Sets up the anchor layouts for a dpas operands (A, B, and C/D). /// The numSg and consumerLayout (optional) are only used by sg layout creation. std::optional> +completeDpasLaneLayoutFromInstData(DistributeLayoutAttr aLayout, + DistributeLayoutAttr bLayout, + DistributeLayoutAttr cdLayout, + VectorType aTy, VectorType bTy, + VectorType cdTy, const uArch::uArch *uArch); + +/// Like completeDpasLaneLayoutFromInstData, but for dpas_mx: additionally +/// re-derives the A_scale / B_scale layouts from the completed A / B layouts. +std::optional< + std::tuple> +completeDpasMxLaneLayoutFromInstData(DistributeLayoutAttr aLayout, + DistributeLayoutAttr bLayout, + DistributeLayoutAttr cdLayout, + VectorType aTy, VectorType bTy, + VectorType cdTy, VectorType aScaleTy, + VectorType bScaleTy, + const uArch::uArch *uArch); + /// Gets the expected layout for a given consumer operand. This will check if /// the owning operation of the consumer operand is one of the special layout /// users and determine the expected layout accordingly. diff --git a/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h b/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h index eeb1100cc8eab..ff80a77b28d37 100644 --- a/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h +++ b/mlir/include/mlir/Dialect/XeGPU/uArch/IntelGpuXe2.h @@ -45,7 +45,8 @@ struct Xe2Plus : public uArch { //===----------------------------------------------------------------------===// // uArch instructions //===----------------------------------------------------------------------===// -struct Subgroup2DBlockStoreInstruction : public Instruction { +struct Subgroup2DBlockStoreInstruction : public Instruction, + public BlockIOInstructionInterface { Subgroup2DBlockStoreInstruction() : Instruction(InstructionKind::Subgroup2DBlockStore, InstructionScope::Subgroup) {} @@ -54,9 +55,12 @@ struct Subgroup2DBlockStoreInstruction : public Instruction { } // Source : // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_2d_block_io.html#_add_a_new_section_5_2_x_cl_intel_subgroup_2d_block_io + // Stores ignore the transform / transpose / upConv flags. std::optional< std::tuple, llvm::ArrayRef, llvm::ArrayRef>> - getBlockWidthHeightCount(Type elemTy) const { + getBlockWidthHeightCount(Type elemTy, bool /*hasTransform*/ = false, + bool /*hasTranspose*/ = false, + bool /*upConv*/ = false) const override { const static int kHeight[] = {1, 2, 4, 8}; const static int kWidth16[] = {16}; const static int kWidth32[] = {16}; @@ -73,10 +77,11 @@ struct Subgroup2DBlockStoreInstruction : public Instruction { return std::nullopt; } - int32_t getPackedFormatBitSize() const { return 16; } + int32_t getPackedFormatBitSize() const override { return 16; } }; -struct Subgroup2DBlockLoadInstruction : public Instruction { +struct Subgroup2DBlockLoadInstruction : public Instruction, + public BlockIOInstructionInterface { Subgroup2DBlockLoadInstruction() : Instruction(InstructionKind::Subgroup2DBlockLoad, InstructionScope::Subgroup) {} @@ -88,48 +93,63 @@ struct Subgroup2DBlockLoadInstruction : public Instruction { // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_2d_block_io.html#_add_a_new_section_5_2_x_cl_intel_subgroup_2d_block_io std::optional< std::tuple, llvm::ArrayRef, llvm::ArrayRef>> - getBlockWidthHeightCount(Type elemTy, bool hasTransform, bool hasTranspose, - bool upConv = false) const { + getBlockWidthHeightCount(Type elemTy, bool hasTransform = false, + bool hasTranspose = false, + bool upConv = false) const override { static const int kHeightAtLeast1[] = {1, 2, 4, 8, 16, 32}; static const int kHeightAtLeast8[] = {8, 16, 32}; static const int kHeightAtLeast16[] = {16, 32}; - static const int kHeightAtLeast32[] = {32}; + static const int kHeight32[] = {32}; + static const int kHeight64[] = {64}; + static const int kWidth64[] = {64}; static const int kWidth32[] = {32}; static const int kWidth16[] = {16}; + static const int kWidthAtLeast16[] = {16, 32}; + static const int kWidthAtLeast32[] = {32, 64}; static const int kWidth8[] = {8}; static const int32_t kCount1[] = {1}; static const int32_t kCount2[] = {1, 2}; static const int32_t kCount4[] = {1, 2, 4}; static const int32_t kCount4Only[] = {4}; - // (elemBytes, transform, transpose, upConvert) + // (elemBits, transform, transpose, upConvert) using Key = std::tuple; // (widths, heights, counts) using Value = std::tuple, llvm::ArrayRef, llvm::ArrayRef>; + // The table is keyed on element bit width so sub-byte elements can be + // expressed directly. 4-bit elements are packed two-per-byte, so their + // widths (or heights, when transformed) are double the 8-bit rows. static const llvm::DenseMap kMap = { - {{1, false, false, false}, {kWidth32, kHeightAtLeast1, kCount2}}, - {{1, false, false, true}, {kWidth16, kHeightAtLeast8, kCount4Only}}, - {{2, false, false, false}, {kWidth16, kHeightAtLeast1, kCount2}}, - {{4, false, false, false}, {kWidth16, kHeightAtLeast1, kCount1}}, + {{8, false, false, false}, {kWidthAtLeast16, kHeightAtLeast1, kCount2}}, + {{8, false, false, true}, {kWidth16, kHeightAtLeast8, kCount4Only}}, + {{16, false, false, false}, {kWidth16, kHeightAtLeast1, kCount2}}, + {{32, false, false, false}, {kWidth16, kHeightAtLeast1, kCount1}}, // Block Loads with Transform: - {{1, true, false, false}, {kWidth16, kHeightAtLeast32, kCount4}}, - {{2, true, false, false}, {kWidth16, kHeightAtLeast16, kCount2}}, + {{8, true, false, false}, {kWidth16, kHeight32, kCount4}}, + {{16, true, false, false}, {kWidth16, kHeightAtLeast16, kCount2}}, // Block Loads with Transpose: - {{4, false, true, false}, {kWidth8, kHeightAtLeast16, kCount1}}, - }; - const int elemByteSize = elemTy.getIntOrFloatBitWidth() / 8; - auto it = kMap.find({elemByteSize, hasTransform, hasTranspose, upConv}); + {{8, false, true, false}, {kWidth32, kHeightAtLeast16, kCount1}}, + {{16, false, true, false}, {kWidth16, kHeightAtLeast16, kCount1}}, + {{32, false, true, false}, {kWidth8, kHeightAtLeast16, kCount1}}, + // 4-bit elements (sub-byte): + {{4, false, false, false}, {kWidthAtLeast32, kHeightAtLeast1, kCount2}}, + {{4, false, false, true}, {kWidth32, kHeightAtLeast8, kCount4Only}}, + {{4, true, false, false}, {kWidth16, kHeight64, kCount4}}, + {{4, false, true, false}, {kWidth64, kHeightAtLeast16, kCount1}}}; + int elemBitSize = elemTy.getIntOrFloatBitWidth(); + auto it = kMap.find({elemBitSize, hasTransform, hasTranspose, upConv}); if (it != kMap.end()) return it->second; return std::nullopt; } - int32_t getPackedFormatBitSize() const { return 16; } + int32_t getPackedFormatBitSize() const override { return 16; } }; -struct Subgroup2DBlockPrefetchInstruction : public Instruction { +struct Subgroup2DBlockPrefetchInstruction : public Instruction, + public BlockIOInstructionInterface { Subgroup2DBlockPrefetchInstruction() : Instruction(InstructionKind::Subgroup2DBlockPrefetch, InstructionScope::Subgroup) {} @@ -138,9 +158,12 @@ struct Subgroup2DBlockPrefetchInstruction : public Instruction { } // Source : // https://registry.khronos.org/OpenCL/extensions/intel/cl_intel_subgroup_buffer_prefetch.html#_add_a_new_section_6_15_x_sub_group_prefetch_functions + // Prefetches ignore the transform / transpose / upConv flags. std::optional< std::tuple, llvm::ArrayRef, llvm::ArrayRef>> - getBlockWidthHeightCount(Type elemTy) const { + getBlockWidthHeightCount(Type elemTy, bool /*hasTransform*/ = false, + bool /*hasTranspose*/ = false, + bool /*upConv*/ = false) const override { static const int kHeightAtLeast1[] = {1, 2, 4, 8, 16, 32}; static const int kWidth32[] = {32}; @@ -164,7 +187,7 @@ struct Subgroup2DBlockPrefetchInstruction : public Instruction { return it->second; return std::nullopt; } - int32_t getPackedFormatBitSize() const { return 16; } + int32_t getPackedFormatBitSize() const override { return 16; } }; struct SubgroupMatrixMultiplyAcc : public Instruction, diff --git a/mlir/include/mlir/Dialect/XeGPU/uArch/uArchBase.h b/mlir/include/mlir/Dialect/XeGPU/uArch/uArchBase.h index 147a56a52c188..61db4605e85fa 100644 --- a/mlir/include/mlir/Dialect/XeGPU/uArch/uArchBase.h +++ b/mlir/include/mlir/Dialect/XeGPU/uArch/uArchBase.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -255,6 +256,24 @@ struct MMAInstructionInterface { virtual ~MMAInstructionInterface() = default; }; +// Interface for subgroup-level 2D block instructions (load / store / prefetch). +// All three describe the set of hardware-supported block shapes via +// (width, height, count) tuples and share a packed-format bit size. The +// transform / transpose / upConv flags are only meaningful for loads; store +// and prefetch implementations ignore them. +struct BlockIOInstructionInterface { + // Returns the supported (widths, heights, counts) for the given element + // type, or std::nullopt if the element type is unsupported. + virtual std::optional< + std::tuple, llvm::ArrayRef, llvm::ArrayRef>> + getBlockWidthHeightCount(Type elemTy, bool hasTransform = false, + bool hasTranspose = false, + bool upConv = false) const = 0; + // Bit size of the packed format used by this block instruction. + virtual int32_t getPackedFormatBitSize() const = 0; + virtual ~BlockIOInstructionInterface() = default; +}; + //===----------------------------------------------------------------------===// // Common instructions (shared across architectures) //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 4aa1c0d666a94..311cf9a64c0c4 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -822,8 +822,8 @@ DistributeLayoutAttr LayoutAttr::expandDim(int64_t dim, } // Per-sg view used as the base for lane_layout / lane_data / inst_data: - // targetShape[i] / sg_layout[i] when sg_layout is present, else - // targetShape itself. + // targetShape[i] / sg_layout[i] when sg_layout is present (and not + // replicated), else targetShape itself. SmallVector perSgShape(targetShape.begin(), targetShape.end()); if (hasSgLayout && !sgDataReplicated) for (int64_t i = 0; i < expCount; ++i) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp index ca766f67c2583..2a13997aa181f 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp @@ -370,6 +370,86 @@ void xegpu::removeTemporaryLayoutAttrs(Operation *op) { }); } +/// Returns true if every dimension of `shape` except the innermost +/// `numInnerDims` is a unit (size-1) dimension. +static bool leadingDimsAreUnit(ArrayRef shape, int numInnerDims) { + int numLeading = static_cast(shape.size()) - numInnerDims; + if (numLeading <= 0) + return true; + return llvm::all_of(shape.take_front(numLeading), + [](int64_t dim) { return dim == 1; }); +} + +static xegpu::LayoutAttr buildInstDataLayoutWithLane( + mlir::MLIRContext *context, ArrayRef instData, + ArrayRef laneLayout, ArrayRef laneData, + DenseI32ArrayAttr orderAttr = nullptr) { + auto toI32Attr = [&](auto range) { + SmallVector v(range.begin(), range.end()); + return DenseI32ArrayAttr::get(context, v); + }; + return xegpu::LayoutAttr::get(context, /*sg_layout=*/nullptr, + /*sg_data=*/nullptr, toI32Attr(instData), + toI32Attr(laneLayout), toI32Attr(laneData), + orderAttr); +} + +static bool isValidLaneLayout(ArrayRef dataShape, + ArrayRef laneLayout, + ArrayRef laneData) { + return !llvm::any_of(llvm::seq(0, dataShape.size()), [&](int dim) { + return dataShape[dim] % (laneLayout[dim] * laneData[dim]) != 0; + }); +} + +static xegpu::LayoutAttr +buildLaneLayout(mlir::MLIRContext *context, ArrayRef laneLayout, + ArrayRef laneData, + DenseI32ArrayAttr orderAttr = nullptr) { + auto toI32Attr = [&](auto range) { + SmallVector v(range.begin(), range.end()); + return DenseI32ArrayAttr::get(context, v); + }; + return xegpu::LayoutAttr::get(context, /*sg_layout=*/nullptr, + /*sg_data=*/nullptr, + /*inst_data=*/nullptr, toI32Attr(laneLayout), + toI32Attr(laneData), orderAttr); +} + +static xegpu::LayoutAttr +buildLayout(mlir::MLIRContext *context, ArrayRef sgLayout, + ArrayRef sgData, ArrayRef instData, + ArrayRef laneLayout, ArrayRef laneData, + DenseI32ArrayAttr orderAttr = nullptr) { + auto toI32Attr = [&](auto range) { + SmallVector v(range.begin(), range.end()); + return DenseI32ArrayAttr::get(context, v); + }; + return xegpu::LayoutAttr::get( + context, sgLayout.empty() ? nullptr : toI32Attr(sgLayout), + sgData.empty() ? nullptr : toI32Attr(sgData), + instData.empty() ? nullptr : toI32Attr(instData), + laneLayout.empty() ? nullptr : toI32Attr(laneLayout), + laneData.empty() ? nullptr : toI32Attr(laneData), orderAttr); +} + +static xegpu::LayoutAttr buildSgLayout(mlir::MLIRContext *context, + ArrayRef wgTileShape, + ArrayRef sgLayout, + int dimK = -1, + DenseI32ArrayAttr orderAttr = nullptr) { + SmallVector sgData(sgLayout.size()); + for (int dim = 0; dim < (int)sgLayout.size(); ++dim) { + if (dim == dimK) + sgData[dim] = wgTileShape[dim]; + else + sgData[dim] = wgTileShape[dim] / sgLayout[dim]; + } + return buildLayout(context, sgLayout, sgData, + /*inst_data=*/{}, /*lane_layout=*/{}, + /*lane_data=*/{}, /*order=*/nullptr); +} + /// Infers the source layout attribute for a broadcast operation given the /// result layout attribute, result shape, source shape. xegpu::DistributeLayoutAttr @@ -666,21 +746,13 @@ xegpu::inferExtractSourceLayout(xegpu::DistributeLayoutAttr resLayout, order.push_back(dimDiff - 1 - i); } - DenseI32ArrayAttr orderAttr = resLayout ? resLayout.getOrder() : nullptr; - auto toAttr = [&](ArrayRef v) -> DenseI32ArrayAttr { - if (v.empty()) - return DenseI32ArrayAttr(); - SmallVector v32(v.begin(), v.end()); - return DenseI32ArrayAttr::get(context, v32); - }; - auto srcLayout = xegpu::LayoutAttr::get( - context, sgLayout.empty() ? nullptr : toAttr(sgLayout), - sgData.empty() ? nullptr : toAttr(sgData), - instData.empty() ? nullptr : toAttr(instData), - laneLayout.empty() ? nullptr : toAttr(laneLayout), - laneData.empty() ? nullptr : toAttr(laneData), - (!orderAttr || orderAttr.empty()) ? nullptr : toAttr(order)); - return srcLayout; + DenseI32ArrayAttr orderAttr = DenseI32ArrayAttr::get( + context, SmallVector(order.begin(), order.end())); + if (!resLayout.getOrder()) + orderAttr = nullptr; + + return buildLayout(context, sgLayout, sgData, instData, laneLayout, + laneData, orderAttr); } return resLayout; } @@ -727,18 +799,6 @@ xegpu::inferShapeCastSourceLayout(xegpu::DistributeLayoutAttr resLayout, // Use case 3: General dim collapse, for cross-sg reduction to SLM and other // shape casts where consecutive src dims fold into a single dst dim. - // - // Mirrors use case 2's elegant shape: walk the dst-side groups and call - // a single layout-attribute primitive per group. Here the primitive is - // `expandDim(dim, targetShape)`, the inverse of `collapseDims`. It applies - // the per-field distribution policy required for a no-data-movement collapse - // (sg_layout/lane_layout spread outer-to-inner; sg_data/lane_data/inst_data - // fill innermost-first; inst_data is seeded from lane_layout * lane_data). - // See LayoutAttr::expandDim for the full policy. - // - // Iteration goes innermost-first (reverse dst order) so that each - // expandDim/dropDims call only mutates dst positions whose indices are - // unaffected by earlier calls. SmallVector> collapseDims; if (xegpu::matchDimCollapse(srcShape, resShape, collapseDims)) { auto srcLayout = resLayout; @@ -746,12 +806,10 @@ xegpu::inferShapeCastSourceLayout(xegpu::DistributeLayoutAttr resLayout, dstIdx >= 0; --dstIdx) { ArrayRef srcDims = collapseDims[dstIdx]; if (srcDims.empty()) { - // Unit dst dim with no backing src dim: drop it. srcLayout = srcLayout.dropDims({dstIdx}); continue; } if (srcDims.size() == 1) - // 1:1 mapping, nothing to do for this dim. continue; SmallVector targetShape; targetShape.reserve(srcDims.size()); @@ -761,7 +819,6 @@ xegpu::inferShapeCastSourceLayout(xegpu::DistributeLayoutAttr resLayout, } return srcLayout; } - llvm_unreachable("running into unsupported shape cast scenarios"); return nullptr; } @@ -776,460 +833,877 @@ xegpu::DistributeLayoutAttr xegpu::inferMaskOffsetLayoutForScatterIO( return payloadLayout; } -/// Sets up layout for reduction operations by creating a SliceAttr for the -/// result. -/// -/// Algorithm Overview: -/// This function attempts to construct a source layout that, when sliced along -/// reduction dimensions, produces a result layout compatible with the -/// consumer layout. -/// -/// For subgroup layouts, it first tries to align the source layout's subgroup -/// layout and data with the consumer's layout on non-reduction dimensions. -/// Then, it distributes remaining subgroups across reduction dimensions. This -/// avoids subgroup data redistribution overhead between the reduced result and -/// its consumer. When the consumer layout is a slice layout, it attempts to -/// reuse the slice layout's parent layout for the source to further minimize -/// potential data redistribution. -/// -/// InstData requries {1, ..., min(maxReduceVectorSize, srcShape),subgroupSize} -/// Lane Layout requires {1, ..., 1, subgroupSize} -/// Lane data requires {1, ..., min(maxReduceVectorSize, srcShape), 1} -/// -/// Examples: -/// 1. Subgroup layout - Row reduction on 2D tensor: -/// srcShape=[32, 128], reductionDims=[1], resShape=[32], subgroupSize=16, -/// NumSg=32 -/// * Consumer Layout: -/// #xegpu.slice<#xegpu.layout, dims = -/// [1]>} -//// * Result Layout: -/// #xegpu.slice<#xegpu.layout, dims = -/// [1]>} -/// Note that the sg_layout is reused but sg_data needs to be adjusted to -/// evenly distribute the source tensor tile among the reduction dim. -/// -/// 2. Subgroup layout - Same example above but consumer doesn't have a -/// reusable slice layout. -/// * Consumer Layout: -/// #xegpu.layout -/// * Result Layout: -/// #xegpu.slice<#xegpu.layout, dims = -/// [1]>} -/// * Consumer Layout: -/// #xegpu.slice<#xegpu.layout, -/// dims = [1, 2]>} -/// * Result Layout: -/// #xegpu.slice<#xegpu.layout, dims = -/// [1]>} -/// Note that the consumer's layout can't be directly reused as is. -/// So the algorithm distributes all subgroups on non reduction dimensions -/// first and then distribute remaining subgroups on the reduction -/// dimension. -/// -/// 2. InstData layout - Column reduction: -/// srcShape=[32, 64], reductionDims=[0], subgroupSize=16 -/// Result: instData=[1, 16] (maxReduceVectorSize=1, subgroupSize on -/// innermost) -/// -/// 3. Lane layout - Multi-dimensional reduction: -/// srcShape=[16, 32, 64], reductionDims=[1], subgroupSize=16 -/// Result: laneLayout=[1, 1, 16], laneData=[1, 1, 1] -/// (subgroupSize on innermost dim, max vector size on reduction dim) - -xegpu::SliceAttr xegpu::setupMultiReductionResultLayout( - xegpu::LayoutKind layoutKind, VectorType srcVecTy, - DistributeLayoutAttr consumerLayout, SmallVector reductionDims, - int numSg, const xegpu::uArch::uArch *uArch) { +//===----------------------------------------------------------------------===// +// Layout derivation helpers: factorize sgCount into +// sg_layout candidates, then +// compute per-subgroup (sgData) and per-lane +// (lane_layout/lane_data/inst_data). +//===----------------------------------------------------------------------===// - auto srcShape = srcVecTy.getShape(); - int srcRank = srcShape.size(); - auto context = srcVecTy.getContext(); +using LayoutRepresentation = SmallVector; + +/// Enumerates all ways to split `total` into `rank` factors whose product +/// equals `total`. Returns the list of all such factorizations. +static SmallVector enumerateFactorizations(int64_t total, + int64_t rank) { + SmallVector results; + SmallVector current(rank, 0); + + // Returns all divisors of `n` in ascending order. + auto getDivisors = [](int64_t n) { + SmallVector divs; + for (int64_t i = 1; i * i <= n; ++i) { + if (n % i == 0) { + divs.push_back(i); + if (i != n / i) + divs.push_back(n / i); + } + } + llvm::sort(divs); + return divs; + }; - // Helper lambda to convert int64 vectors to int32 DenseArrayAttr - auto toInt32Attr = [&](ArrayRef vec) { - SmallVector vec32(vec.begin(), vec.end()); - return DenseI32ArrayAttr::get(context, vec32); + std::function generate = [&](int64_t dim, + int64_t remaining) { + if (dim == rank - 1) { + current[dim] = remaining; + results.push_back(LayoutRepresentation(current)); + return; + } + for (int64_t factor : getDivisors(remaining)) { + current[dim] = factor; + generate(dim + 1, remaining / factor); + } }; - const int subgroupSize = uArch->getSubgroupSize(); - int64_t maxReduceVectorSize = 1; // could extend to spirv vector Size - xegpu::DistributeLayoutAttr srcLayout; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - xegpu::SliceAttr consumerSliceLayout = - dyn_cast_if_present(consumerLayout); - if (consumerSliceLayout && - consumerSliceLayout.getDims().asArrayRef().equals(reductionDims)) { - srcLayout = consumerSliceLayout.getParent(); - SmallVector sgLayoutFromConsumer = - srcLayout.getEffectiveSgLayoutAsInt(); - auto srcSgData = computeShapeRatio(srcShape, sgLayoutFromConsumer); - if (srcSgData) - for (int dim = 0; dim < srcRank; dim++) { - if (llvm::is_contained(reductionDims, dim)) - srcLayout = - srcLayout.setDimData(dim, srcSgData.value()[dim], -1, -1); - } - } else { - SmallVector consumerSgLayout = - consumerLayout ? consumerLayout.getEffectiveSgLayoutAsInt() - : SmallVector(); - SmallVector consumerSgData = - consumerLayout ? consumerLayout.getEffectiveSgDataAsInt() - : SmallVector(); - SmallVector consumerOrder = - consumerLayout ? consumerLayout.getEffectiveOrderAsInt() - : SmallVector(); - DenseI32ArrayAttr orderAttr = - consumerLayout ? consumerLayout.getOrder() : nullptr; - SmallVector sgLayout(srcRank), sgData(srcRank), order(srcRank); - int remainingSgCount = - consumerLayout ? consumerLayout.getNumSubgroups() : numSg; - int consumerIdx = 0; + generate(0, total); + return results; +} - // First pass: Match consumer's layout on non-reduction dimensions - for (int i = 0; i < srcRank; i++) { - if (!llvm::is_contained(reductionDims, i) && - consumerIdx < static_cast(consumerSgLayout.size())) { - sgLayout[i] = consumerSgLayout[consumerIdx]; - sgData[i] = consumerSgData[consumerIdx]; - remainingSgCount /= sgLayout[i]; - order[i] = consumerOrder[consumerIdx]; - consumerIdx++; - } - } +// Computes all valid N-dimensional sg_layout candidates for the given +// sgCount, whose sgData (= wgShape / sgLayout): +// 1. Evenly divides wgShape (i.e., wgShape[d] % sgLayout[d] == 0). +// 2. Is a multiple of instData (i.e., sgData[d] % instData[d] == 0). +// Results are sorted by balance (smallest max-min spread first), with +// lexicographic order as a tiebreaker. +// +// Example (2D): +// wgShape = [128, 64], instData = [8, 16], sgCount = 32 +// Returns: [[8,4], [16,2]], corresponding to sgData [16,16] and [8,32]. +static SmallVector +getSgLayoutCandidates(ArrayRef wgShape, ArrayRef instData, + int64_t sgCount) { + int64_t rank = wgShape.size(); + assert(rank > 0 && "wgShape must be non-empty"); + assert(static_cast(instData.size()) == rank && + "instData rank must match wgShape rank"); - // Second pass: Distribute remaining subgroups across reduction dimensions - // the reduction to scalar case is handled only by this loop - int64_t remainOrder = consumerSgLayout.size(); - for (int i = 0; i < srcRank; i++) { - if (llvm::is_contained(reductionDims, i)) { - sgLayout[i] = - std::min(srcShape[i], static_cast(remainingSgCount)); - assert((srcShape[i] % sgLayout[i] == 0) && - "source shape not divisible by sg_layout"); - sgData[i] = srcShape[i] / sgLayout[i]; - remainingSgCount /= sgLayout[i]; - order[i] = remainOrder++; - } - } + // Step 1: Get all N-D factorizations of sgCount. + auto allFactorizations = enumerateFactorizations(sgCount, rank); - assert(remainingSgCount == 1 && "not all subgroups distributed"); - srcLayout = xegpu::LayoutAttr::get( - context, toInt32Attr(sgLayout), toInt32Attr(sgData), - /*inst_data =*/nullptr, /*lane_layout =*/nullptr, - /*lane_data =*/nullptr, /*order =*/ - (!orderAttr || orderAttr.empty()) ? nullptr : toInt32Attr(order)); + // Step 2: Filter to keep only valid candidates. + SmallVector candidates; + for (const auto &sgLayout : allFactorizations) { + bool valid = true; + for (int64_t dim = 0; dim < rank; ++dim) { + if (wgShape[dim] % sgLayout[dim] != 0) { + valid = false; + break; + } + int64_t sgData = wgShape[dim] / sgLayout[dim]; + if (sgData % instData[dim] != 0) { + valid = false; + break; + } } - } else if (layoutKind == xegpu::LayoutKind::InstData) { - - SmallVector instData(srcRank, 1); - if (srcRank >= 2) - instData[srcRank - 2] = - std::min(maxReduceVectorSize, srcShape[srcRank - 2]); - instData[srcRank - 1] = - std::min(static_cast(subgroupSize), srcShape[srcRank - 1]); - srcLayout = xegpu::LayoutAttr::get(context, toInt32Attr(instData)); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - - SmallVector laneLayout(srcRank, 1), laneData(srcRank, 1); - laneLayout[srcRank - 1] = - std::min(static_cast(subgroupSize), srcShape[srcRank - 1]); - if (srcRank >= 2) - laneData[srcRank - 2] = - std::min(maxReduceVectorSize, srcShape[srcRank - 2]); - srcLayout = xegpu::LayoutAttr::get(context, toInt32Attr(laneLayout), - toInt32Attr(laneData)); + if (valid) + candidates.push_back(sgLayout); } - return xegpu::SliceAttr::get(context, srcLayout, - DenseI64ArrayAttr::get(context, reductionDims)); + // Step 3: Sort by balance (smallest max-min spread), then lexicographic. + llvm::sort(candidates, [](const LayoutRepresentation &lhs, + const LayoutRepresentation &rhs) { + int64_t spreadLhs = *llvm::max_element(lhs) - *llvm::min_element(lhs); + int64_t spreadRhs = *llvm::max_element(rhs) - *llvm::min_element(rhs); + if (spreadLhs != spreadRhs) + return spreadLhs < spreadRhs; + return lhs < rhs; + }); + return candidates; } -/// Sets up layout for Reduction operations by creating a SliceAttr for the -/// result. -xegpu::SliceAttr -xegpu::setupReductionResultLayout(xegpu::LayoutKind layoutKind, - VectorType srcVecTy, - const xegpu::uArch::uArch *uArch) { +/// Helper function to compute inst_data vectors for DPAS operands A, B, and +/// C/D. +static std::optional> get2DBlockIOInstDataLayout( + ArrayRef dataShape, Type elemTy, + const xegpu::uArch::BlockIOInstructionInterface *uArchInstruction, + bool transform = false, bool transpose = false) { + int rank = dataShape.size(); + auto blockWHC = + uArchInstruction->getBlockWidthHeightCount(elemTy, transform, transpose); + if (!blockWHC) + return std::nullopt; + auto [bWidths, bHeights, bCounts] = blockWHC.value(); + // Compute inst_data from hardware block params. For Nd ops, the lane + // factorization above (laneLayout / laneData) is rigid; inst_data must be + // a multiple of lane_layout * lane_data on each dim (Category A + // invariant). + SmallVector instData(rank, 1); + assert(rank >= 2 && "dataShape must be at least 2D for 2D-block IO"); + int instWidth = + xegpu::getLargestDivisor(static_cast(dataShape.back()), bWidths); + int instHeight = + xegpu::getLargestDivisor(static_cast(dataShape[rank - 2]), bHeights); + instData.back() = instWidth; + instData[rank - 2] = instHeight; + + return instData; +} - auto srcShape = srcVecTy.getShape(); - auto context = srcVecTy.getContext(); - auto subgroupSize = uArch->getSubgroupSize(); - xegpu::LayoutAttr srcLayout; +/// Helper function to compute inst_data vectors for DPAS operands A, B, and +/// C/D. Look up the uArch table and search for the largest supported block size +/// that divides the data shape +static std::optional, SmallVector, + SmallVector>> +getDpasInstDataLayouts( + VectorType aTy, VectorType bTy, VectorType cdTy, + const xegpu::uArch::MMAInstructionInterface *uArchInstruction) { - if (layoutKind == xegpu::LayoutKind::Subgroup) { - assert(true && "subgroup layout assignment not supported for reduction (op " - "is not expected at this level)."); - } else if (layoutKind == xegpu::LayoutKind::InstData) { - assert(true && "instData layout assignment not supported for reduction (op " - "is not expected at this level)."); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - SmallVector laneLayout(1), laneData(1); - laneLayout[0] = std::min(subgroupSize, static_cast(srcShape[0])); - laneData[0] = 1; - srcLayout = xegpu::LayoutAttr::get( - context, DenseI32ArrayAttr::get(context, laneLayout), - DenseI32ArrayAttr::get(context, laneData)); - } + // M dimension is the second-to-last dim of A (handles batch dims). + const unsigned dataALen = aTy.getShape()[aTy.getRank() - 2]; + auto supportedALen = uArchInstruction->getSupportedM(aTy.getElementType()); + const int maxALen = + xegpu::getLargestDivisor(dataALen, ArrayRef(supportedALen)); - auto result = xegpu::SliceAttr::get(context, srcLayout, - DenseI64ArrayAttr::get(context, 0)); - return result; -} + // N dimension is the last dim of B. + const unsigned dataBLen = bTy.getShape().back(); + auto supportedBLen = uArchInstruction->getSupportedN(bTy.getElementType()); + const int maxBLen = + xegpu::getLargestDivisor(dataBLen, ArrayRef(supportedBLen)); -/// Sets up the result layout for a bitcast operation. -/// When casting to a smaller bitwidth, adjusts the layout dimensions (sgData, -/// instData, or laneData) by multiplying by the bitwidth ratio to ensure the -/// result layout can be correctly divided back to the source layout during -/// inference. -/// -/// Examples: -/// 1. Casting f32 -> f16 (32-bit to 16-bit, bitWidthRatio = 2): -/// Consumer layout: instData=[1, 16], subgroupSize=16 -/// Source shape: [8, 32] -/// Result layout: instData=[1, 32] (16 * 2) -/// The innermost dimension is multiplied by 2 to maintain consistency. -/// -/// 2. Casting f32 -> i8 (32-bit to 8-bit, bitWidthRatio = 4): -/// Consumer instData=[1, 16], subgroupSize=16 -/// Source shape: [4, 128] -/// adjust the instData from [1, 16] to [1, 16 * 4 = 64] -/// -/// 3. Casting i8 -> i32 (8-bit to 32-bit, bitWidthRatio = 1/4): -/// Consumer layout: laneLayout=[1, 16], laneData=[1, 4] -/// No adjustment needed - returns consumer layout directly. -/// -xegpu::DistributeLayoutAttr xegpu::setupBitCastResultLayout( - xegpu::LayoutKind layoutKind, VectorType srcVecTy, VectorType resVecTy, - DistributeLayoutAttr consumerLayout, const xegpu::uArch::uArch *uArch) { + auto supportedCLen = uArchInstruction->getSupportedN(cdTy.getElementType()); + const int maxCLen = + xegpu::getLargestDivisor(dataBLen, ArrayRef(supportedCLen)); + if (maxALen == -1 || maxBLen == -1 || maxCLen == -1) + return std::nullopt; - int srcElemTyBitWidth = srcVecTy.getElementType().getIntOrFloatBitWidth(); - int resElemTyBitWidth = resVecTy.getElementType().getIntOrFloatBitWidth(); + auto supportedKLen = uArchInstruction->getSupportedK(aTy.getElementType()); + if (supportedKLen.empty()) + return std::nullopt; + auto kDimSize = supportedKLen[0]; - ArrayRef srcShape = srcVecTy.getShape(); - ArrayRef resShape = resVecTy.getShape(); - SmallVector sgData = consumerLayout.getEffectiveSgDataAsInt(); - SmallVector instData = consumerLayout.getEffectiveInstDataAsInt(); - SmallVector laneData = consumerLayout.getEffectiveLaneDataAsInt(); - SmallVector laneLayout = - consumerLayout.getEffectiveLaneLayoutAsInt(); + SmallVector instDataA(aTy.getRank(), 1); + instDataA[aTy.getRank() - 2] = maxALen; + instDataA[aTy.getRank() - 1] = kDimSize; + SmallVector instDataB(bTy.getRank(), 1); + instDataB[bTy.getRank() - 2] = kDimSize; + instDataB[bTy.getRank() - 1] = maxBLen; + SmallVector instDataCD(cdTy.getRank(), 1); + instDataCD[cdTy.getRank() - 2] = maxALen; + instDataCD[cdTy.getRank() - 1] = maxCLen; + return std::make_tuple(instDataA, instDataB, instDataCD); +} - assert(consumerLayout.getRank() == static_cast(srcShape.size()) && - "laneData must be available for all dimensions"); - size_t innerMostDim = srcShape.size() - 1; - int64_t sgDataValue = -1; - int64_t instDataValue = -1; - int64_t laneDataValue = -1; - if (srcElemTyBitWidth > resElemTyBitWidth) { - // When casting to a smaller bitwidth, multiply the result layout - // accordingly to ensure it can be divided by the ratio back to the - // source layout. - int bitWidthRatio = srcElemTyBitWidth / resElemTyBitWidth; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - sgDataValue = sgData[innerMostDim]; - while ((sgDataValue <= resShape[innerMostDim]) && - (sgDataValue % bitWidthRatio) != 0) - sgDataValue *= 2; - } else if (layoutKind == xegpu::LayoutKind::InstData) { - instDataValue = instData[innerMostDim]; - const int innermostDimLaneLayout = laneLayout.empty() - ? uArch->getSubgroupSize() - : laneLayout[innerMostDim]; - // Adjust instDataValue so it still fits within an instruction after - // dividing by bitWidthRatio - while ((instDataValue <= resShape[innerMostDim]) && - (instDataValue % (innermostDimLaneLayout * bitWidthRatio) != 0)) - instDataValue *= 2; - assert((resShape[innerMostDim] % instDataValue) == 0 && - "resShape, instData, and lanelayout for innermost must be 2^n !"); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneDataValue = laneData[innerMostDim]; - while ((laneDataValue <= resShape[innerMostDim]) && - (laneDataValue % bitWidthRatio != 0)) - laneDataValue *= 2; - } - // Now set only instData and laneData, preserving sgData - xegpu::DistributeLayoutAttr resLayout; - resLayout = consumerLayout.setDimData(innerMostDim, sgDataValue, - instDataValue, laneDataValue); - return resLayout; +/// Computes lane_layout and lane_data for scatter-style store anchor layouts +/// (store scatter, store matrix). Lanes and the per-lane vector both live on +/// the innermost dim: +/// - laneLayout[innermost] = min(subgroupSize, srcShape[innermost]) +/// - laneData[innermost] = min(srcShape[innermost] / laneLayout[innermost], +/// maxChunkSize) +/// All other entries are 1. +static std::pair, SmallVector> +computeScatterIOLaneLayoutAndData(ArrayRef instShape, + int64_t subgroupSize, int64_t maxChunkSize) { + int64_t rank = instShape.size(); + SmallVector laneLayout(rank, 1), laneData(rank, 1); + int64_t innermost = rank - 1; + laneLayout[innermost] = std::min(subgroupSize, instShape[innermost]); + laneData[innermost] = + std::min(instShape[innermost] / laneLayout[innermost], maxChunkSize); + return {laneLayout, laneData}; +} + +// Computes the per-lane layout and data for a 2D block load/store/prefetch: +// lanes are spread across the subgroup along the last dim (or rank-2 if +// transposed), and laneData packs sub-bitwidth elements along the packing dim. +static std::pair, SmallVector> +compute2DBlockIOLaneLayoutAndData(ArrayRef instShape, + int64_t subgroupSize, int64_t bitwidth, + int64_t packingSize, bool transform = false) { + int64_t rank = instShape.size(); + SmallVector laneLayout(rank, 1), laneData(rank, 1); + int kDim = transform ? rank - 2 : rank - 1; + unsigned vnniFactor = packingSize / bitwidth; + laneData[kDim] = bitwidth < packingSize ? vnniFactor : 1; + laneLayout.back() = + std::min(subgroupSize, instShape.back() / laneData.back()); + + // assert that the lane layout and data fit in the inst shape + for (int64_t i = 0; i < rank; ++i) { + int64_t laneProduct = laneLayout[i] * laneData[i]; + assert(instShape[i] % laneProduct == 0 && + "lane_layout * lane_data must evenly divide the inst shape"); + (void)laneProduct; } - return consumerLayout; + return {laneLayout, laneData}; } -/// Sets up the result layout for an interleave operation to ensure the source -/// layout can be safely derived. Interleave doubles the innermost dimension, -/// so the result layout must ensure that laneData is a multiple -/// of 2, and instData must be divisible by innermostDimLaneLayout * 2. +/// Computes the (lane_layout, lane_data) for a multi-reduction's source layout. +/// Only the innermost two dims are distributed; leading dims are assumed unit. +/// `subgroupSize` lanes go on one dim; up to `maxReduceVectorSize` elements are +/// packed into lane_data on the other. To minimize cross-lane reduction, lanes +/// are spread across a non-reduction dim when possible so the reduction happens +/// within a lane. inst_data is the element-wise product lane_layout * +/// lane_data. /// -/// Example: -/// Interleave: vector<128x256xf4> -> vector<128x512xf4> -/// Consumer layout: laneLayout=[1, 16], laneData=[1, 4], instData=[1, 64] -/// Result layout adjustment to ensure source can be safely inferred: -/// - laneData must be >= 2 and multiple of 2 (so source = laneData/2 is -/// valid) -/// - instData must be divisible by (16 * 2 = 32) (so source = instData/2 is -/// valid) -/// - Adjusted instData: ensure (instData % 32 == 0) -/// -xegpu::DistributeLayoutAttr xegpu::setupInterleaveResultLayout( - xegpu::LayoutKind layoutKind, VectorType srcVecTy, VectorType resVecTy, - DistributeLayoutAttr consumerLayout, const xegpu::uArch::uArch *uArch) { +/// e.g. with srcShape=[32, 128], subgroupSize=16, maxReduceVectorSize=2: +/// - Switch: reductionDims=[1] and consumerReductionDims=[] -> lanes move +/// to the non-reduction dim 0: lane_layout=[16, 1], lane_data=[1, 2]. +/// - Default: reductionDims=[0, 1] (both reduced) -> lanes stay on the +/// innermost dim: lane_layout=[1, 16], lane_data=[2, 1]. +static std::pair, SmallVector> +computeReductionLaneLayoutAndData(ArrayRef srcShape, + ArrayRef reductionDims, + int subgroupSize, int64_t maxReduceVectorSize, + bool verticalLaneLayout = false) { + int srcRank = srcShape.size(); + SmallVector laneLayout(srcRank, 1), laneData(srcRank, 1); - ArrayRef srcShape = srcVecTy.getShape(); - SmallVector sgData = consumerLayout.getEffectiveSgDataAsInt(); - SmallVector instData = consumerLayout.getEffectiveInstDataAsInt(); - SmallVector laneData = consumerLayout.getEffectiveLaneDataAsInt(); - SmallVector laneLayout = - consumerLayout.getEffectiveLaneLayoutAsInt(); + int innermost = srcRank - 1; + int secondInnermost = srcRank - 2; - assert(consumerLayout.getRank() == static_cast(srcShape.size()) && - "consumer layout rank must match source shape rank"); - const size_t innerMostDim = srcShape.size() - 1; - int64_t sgDataValue = -1; - int64_t instDataValue = -1; - int64_t laneDataValue = -1; + if (verticalLaneLayout && secondInnermost >= 0) { + std::swap(innermost, secondInnermost); + } + int laneDim = innermost; + int vectorDim = secondInnermost; // negative for rank 1 - // Interleave doubles the innermost dimension (ratio = 2) - constexpr int ratio = 2; + laneLayout[laneDim] = + std::min(static_cast(subgroupSize), srcShape[laneDim]); + if (vectorDim >= 0) + laneData[vectorDim] = std::min(maxReduceVectorSize, srcShape[vectorDim]); + + return {laneLayout, laneData}; +} + +//===----------------------------------------------------------------------===// +// Result/anchor-layout setup. Each op category derives lane_layout/lane_data +// (and inst_data / sgData) differently. Two things vary across ops: +// +// * Consumer dependence: consumer-driven ops prefer the layout requested by +// their downstream uses and fall back to uArch defaults only when it is +// absent/invalid; sinks (StoreNd, PrefetchNd) have no consumer and always +// pick their own layout from uArch. +// +// * Derivation direction between inst_data and lane_layout/lane_data. Both +// obey the invariant inst_data = k * lane_layout * lane_data, where `k` is +// a per-dim integer >= 1 giving how many times each lane repeats its +// access to cover one instruction's data tile (k == 1 means one lane +// position per element; k > 1 means the instruction loads/stores several +// elements per lane along that dim). Ops solve this invariant from +// opposite ends: +// - Rigid-lane ops (Nd block IO, DPAS): hardware fixes lane_layout / +// lane_data first, then inst_data is built as a multiple of their +// product (using get2DBlockIOInstDataLayout / getDpasInstDataLayouts). +// - inst_data-first ops (scatter load): take inst_data from the consumer +// and derive lane_layout/lane_data underneath it. +// +// - DPAS (+DPAS_MX) : rigid lanes — inst_data from HW block dims; A/B/C/D +// lanes/data follow each operand's matmul role; DPAS_MX +// additionally lays out the scale operand. +// - LoadNd : consumer-driven, rigid lanes — honors the consumer's +// inst_data / lane / sg_layout (incl. transpose & VNNI +// packing) when it satisfies uArch block constraints, +// else falls back to the default 2D-block scheme (lanes +// on the last dim, rank-2 if transposed). The fallback +// picks the LARGEST uArch block that divides the data +// shape, so the resulting inst_data block can be bigger +// than what the consumer asked for (fewer, wider +// loads). +// - StoreNd/PrefetchNd: data sinks, no consumer, rigid lanes — pick the +// 2D-block layout directly from uArch (no VNNI +// packing). +// - Load (scatter) : load_gather / load_matrix, consumer-driven, +// inst_data-first — reuse the consumer's inst_data and +// derive lane_layout/lane_data, else default to lanes + +// per-lane chunk on the innermost dim (chunk capped by +// maxChunkSize). +// - Store (scatter) : store_scatter / store_matrix — same scatter scheme, +// but always self-derived from the scatter default. +// - Reduction : (multi_)reduction, consumer-driven — distribute the +// inner two dims, with lanes on the innermost dim by +// default (reducing across lanes) and switched to a +// non-reduction dim only when that keeps the reduction +// within a lane. Reuses the consumer's slice layout +// when it slices exactly the reduction dims, otherwise +// re-derives. See setupMultiReductionResultLayout for +// the exact switch condition and worked examples. +// - BitCast/Interleave: scale the innermost data field by the bitwidth / +// interleave ratio so the source layout divides back +// out. +// - InsertStridedSlice: clamp lane_data per dim to fit the inserted slice +// (Lane kind only; sg/inst layouts unsupported). +//===----------------------------------------------------------------------===// + +/// Helper function to set up subgroup layouts for DPAS operands A, B, and +/// C/D. Compute subgroup layout candidates based on wgtile and instData, and +/// then pick the best one that satisfies all operands and the consumer (if +/// specified). +static std::optional< + std::tuple> +getDpasSubgroupLayouts( + mlir::MLIRContext *context, VectorType aTy, VectorType bTy, VectorType cdTy, + xegpu::DistributeLayoutAttr consumerLayout, int numSg, + std::tuple, SmallVector, SmallVector> + instDataVecs) { + auto [instDataA, instDataB, instDataCD] = instDataVecs; + + std::optional consumerSgLayout = std::nullopt; + if (consumerLayout && consumerLayout.isForWorkgroup()) { + consumerSgLayout = consumerLayout.getEffectiveSgLayoutAsInt(); + } + + // Get all valid layouts for A, B and C/D operands + auto layoutsA = getSgLayoutCandidates(aTy.getShape(), instDataA, numSg); + auto layoutsB = getSgLayoutCandidates(bTy.getShape(), instDataB, numSg); + auto layoutsCD = getSgLayoutCandidates(cdTy.getShape(), instDataCD, numSg); + if (layoutsA.empty() || layoutsB.empty() || layoutsCD.empty()) + return std::nullopt; + + // Pick the best subgroup layout + std::optional bestPick; + auto checkAlignedSgDataAB = [&](const LayoutRepresentation &sgLayout) { + return aTy.getShape().back() / sgLayout[1] == + bTy.getShape().front() / sgLayout[0]; + }; + for (auto &sgLayout : layoutsB) { + if (llvm::is_contained(layoutsA, sgLayout) && + llvm::is_contained(layoutsCD, sgLayout)) { + if (!checkAlignedSgDataAB(sgLayout)) + continue; + // Is in (A and B and CD) and matches consumer -> best pick + if (consumerSgLayout.has_value() && sgLayout == *consumerSgLayout) { + bestPick = sgLayout; + break; + } + // Is in (A and B and CD) layoutsB is ordered from most + // balanced to least. So the first one we see is the most balanced one, + // remember it and later only update if there is one that matches the + // consumer. + if (!bestPick) + bestPick = sgLayout; + } + } + if (!bestPick) + return std::nullopt; + + const auto &picked = *bestPick; + + auto dpasALayout = buildSgLayout(context, aTy.getShape(), picked, + /*dimK=*/aTy.getRank() - 1); + auto dpasBLayout = buildSgLayout(context, bTy.getShape(), picked, + /*dimK=*/bTy.getRank() - 2); + auto dpasCDLayout = buildSgLayout(context, cdTy.getShape(), picked); + return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout); +} + +/// Sets up the anchor layouts for dpas operands (A, B, and C/D). +/// The numSg and consumerLayout (optional) are only used by sg layout +/// creation. +std::optional< + std::tuple> +xegpu::setupDpasLayout(xegpu::LayoutKind layoutKind, VectorType aTy, + VectorType bTy, VectorType cdTy, + xegpu::DistributeLayoutAttr consumerLayout, int numSg, + const xegpu::uArch::uArch *uArch) { + auto context = aTy.getContext(); + const auto *uArchInstruction = + dyn_cast(uArch->getInstruction( + xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); + if (!uArchInstruction) + return std::nullopt; + auto subgroupSize = uArch->getSubgroupSize(); + + auto [laneLayoutA, laneDataA] = compute2DBlockIOLaneLayoutAndData( + aTy.getShape(), subgroupSize, + aTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeA()); + auto [laneLayoutB, laneDataB] = compute2DBlockIOLaneLayoutAndData( + bTy.getShape(), subgroupSize, + bTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeB(), /*vnni=*/true); + auto [laneLayoutCD, laneDataCD] = compute2DBlockIOLaneLayoutAndData( + cdTy.getShape(), subgroupSize, + cdTy.getElementType().getIntOrFloatBitWidth(), + cdTy.getElementType().getIntOrFloatBitWidth()); + + auto instDataVecs = getDpasInstDataLayouts(aTy, bTy, cdTy, uArchInstruction); + if (!instDataVecs) + return std::nullopt; if (layoutKind == xegpu::LayoutKind::Subgroup) { - sgDataValue = sgData[innerMostDim]; - // Ensure sgDataValue is divisible by ratio so source sgData can be inferred - while ((sgDataValue <= srcShape[innerMostDim]) && - (sgDataValue % ratio != 0)) - sgDataValue *= ratio; + assert(numSg > 0 && + "Number of subgroups must be provided for sg layout creation."); + return getDpasSubgroupLayouts(context, aTy, bTy, cdTy, consumerLayout, + numSg, *instDataVecs); } else if (layoutKind == xegpu::LayoutKind::InstData) { - instDataValue = instData[innerMostDim]; - const int innermostDimLaneLayout = laneLayout.empty() - ? uArch->getSubgroupSize() - : laneLayout[innerMostDim]; - // Adjust instDataValue so it can be divided by (innermostDimLaneLayout * - // ratio) when inferring the source layout - while ((instDataValue <= srcShape[innerMostDim]) && - (instDataValue % (innermostDimLaneLayout * ratio) != 0)) - instDataValue *= ratio; - assert((srcShape[innerMostDim] % instDataValue) == 0 && - "srcShape, instData, and laneLayout for innermost must be 2^n!"); + auto [instDataA, instDataB, instDataCD] = *instDataVecs; + return std::make_tuple( + buildInstDataLayoutWithLane(context, instDataA, laneLayoutA, laneDataA), + buildInstDataLayoutWithLane(context, instDataB, laneLayoutB, laneDataB), + buildInstDataLayoutWithLane(context, instDataCD, laneLayoutCD, + laneDataCD)); } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneDataValue = laneData[innerMostDim]; - // Ensure laneDataValue is at least 2 and divisible by ratio - // so that source laneData = laneDataValue/2 is valid - while ((laneDataValue <= srcShape[innerMostDim]) && - (laneDataValue % ratio != 0)) - laneDataValue *= ratio; + auto aLayout = buildLaneLayout(context, laneLayoutA, laneDataA); + auto bLayout = buildLaneLayout(context, laneLayoutB, laneDataB); + auto cdLayout = buildLaneLayout(context, laneLayoutCD, laneDataCD); + return std::make_tuple(aLayout, bLayout, cdLayout); } + return std::nullopt; +} - return consumerLayout.setDimData(innerMostDim, sgDataValue, instDataValue, - laneDataValue); +/// Helper to create a scale layout derived from a matrix operand layout. +/// The scale layout is computed by mapping each dimension of the matrix +/// layout to the corresponding scale tensor dimension using the ratio +/// between the matrix and scale shapes. +static xegpu::DistributeLayoutAttr +createScaleLayout(mlir::MLIRContext *context, VectorType matrixTy, + VectorType scaleTy, xegpu::DistributeLayoutAttr matrixLayout, + bool isBScale, const xegpu::uArch::uArch *uArch) { + if (!scaleTy || !matrixLayout) + return nullptr; + + // Calculate scaling factor by dividing matrix shape by scale shape + ArrayRef matrixShape = matrixTy.getShape(); + ArrayRef scaleShape = scaleTy.getShape(); + + // Scale shapes can be 1D or 2D, handle both cases + if (scaleShape.empty()) + return nullptr; + + auto uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::SubgroupScaledMatrixMultiplyAcc)); + + int64_t rank = matrixLayout.getRank(); + assert(rank >= 2 && "dpas layouts must be at least two dimensions"); + + SmallVector sgLayout = matrixLayout.getEffectiveSgLayoutAsInt(); + SmallVector sgData = matrixLayout.getEffectiveSgDataAsInt(); + SmallVector instData = matrixLayout.getEffectiveInstDataAsInt(); + SmallVector laneLayout = matrixLayout.getEffectiveLaneLayoutAsInt(); + SmallVector laneData = matrixLayout.getEffectiveLaneDataAsInt(); + auto order = matrixLayout.getOrder(); + + SmallVector scaleSgLayout; + SmallVector scaleSgData; + if (!sgLayout.empty() && !sgData.empty()) { + scaleSgLayout.assign(sgLayout.begin(), sgLayout.end()); + scaleSgData.assign(sgData.begin(), sgData.end()); + scaleSgData[rank - 2] = std::max( + scaleShape[rank - 2] / (matrixShape[rank - 2] / sgData[rank - 2]), 1); + scaleSgData[rank - 1] = std::max( + scaleShape[rank - 1] / (matrixShape[rank - 1] / sgData[rank - 1]), 1); + } + + // For DPAS_MX scales: if matrix has inst_data, scale needs adjusted + // inst_data. Scale inst_data is derived from matrix inst_data divided by + // scale factor. + SmallVector scaleInstData; + if (!instData.empty()) { + scaleInstData.assign(instData.begin(), instData.end()); + if (isBScale) + scaleInstData[rank - 2] = std::max( + scaleShape[rank - 2] / (matrixShape[rank - 2] / instData[rank - 2]), + 1); + else + scaleInstData[rank - 1] = std::max( + scaleShape[rank - 1] / (matrixShape[rank - 1] / instData[rank - 1]), + 1); + } + + SmallVector scaleLaneLayout; + SmallVector scaleLaneData; + if (!laneLayout.empty() && !laneData.empty()) { + scaleLaneLayout.assign(laneLayout.begin(), laneLayout.end()); + scaleLaneData.assign(laneData.size(), 1); + + bool isRowMajor = uArchInstruction->isLaneLayoutRowMajorOrder(); + if (isBScale ^ isRowMajor) + std::swap(scaleLaneLayout[rank - 2], scaleLaneLayout[rank - 1]); + // Cap lane_layout by the per-instruction tile (inst_data) on each dim. + // Then derive lane_data = inst_data / lane_layout so the Category A + // invariant inst_data = lane_layout * lane_data * k (with k = 1) holds + // for the scale operand's load_nd consumer. + auto layoutCap = scaleInstData.empty() ? scaleShape : scaleInstData; + for (int64_t d = rank - 2; d < rank; ++d) + scaleLaneLayout[d] = std::min(layoutCap[d], scaleLaneLayout[d]); + } + return buildLayout(context, scaleSgLayout, scaleSgData, scaleInstData, + scaleLaneLayout, scaleLaneData, order); } -/// Sets up the result layout for an insert strided slice operation. -/// Creates a result layout based on the specified layout kind (InstData or -/// Lane). -xegpu::DistributeLayoutAttr xegpu::setupInsertStridedSliceResultLayout( - xegpu::LayoutKind layoutKind, VectorType srcVectorTy, - VectorType resVectorTy, xegpu::DistributeLayoutAttr consumerLayout, - const xegpu::uArch::uArch *uArch) { +/// Sets up the anchor layouts for dpas_mx operands (A, B, C/D, A_scale, and +/// B_scale). The numSg and consumerLayout (optional) are only used by sg +/// layout creation. +std::optional< + std::tuple> +xegpu::setupDpasMxLayout(xegpu::LayoutKind layoutKind, VectorType aTy, + VectorType bTy, VectorType cdTy, VectorType aScaleTy, + VectorType bScaleTy, + xegpu::DistributeLayoutAttr consumerLayout, int numSg, + const xegpu::uArch::uArch *uArch) { + auto context = aTy.getContext(); + const auto *uArchInstruction = + dyn_cast(uArch->getInstruction( + xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); + if (!uArchInstruction) + return std::nullopt; + auto subgroupSize = uArch->getSubgroupSize(); - xegpu::DistributeLayoutAttr requiredResLayout; - SmallVector consumerInstData = - consumerLayout.getEffectiveInstDataAsInt(); - SmallVector consumerLaneData = - consumerLayout.getEffectiveLaneDataAsInt(); - SmallVector consumerLaneLayout = - consumerLayout.getEffectiveLaneLayoutAsInt(); - ArrayRef srcShape = srcVectorTy.getShape(); - int64_t instDataValue = -1; - int64_t laneDataValue = -1; + auto [laneLayoutA, laneDataA] = compute2DBlockIOLaneLayoutAndData( + aTy.getShape(), subgroupSize, + aTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeA()); + auto [laneLayoutB, laneDataB] = compute2DBlockIOLaneLayoutAndData( + bTy.getShape(), subgroupSize, + bTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeB(), /*vnni=*/true); + auto [laneLayoutCD, laneDataCD] = compute2DBlockIOLaneLayoutAndData( + cdTy.getShape(), subgroupSize, + cdTy.getElementType().getIntOrFloatBitWidth(), + cdTy.getElementType().getIntOrFloatBitWidth()); + auto instDataVecs = getDpasInstDataLayouts(aTy, bTy, cdTy, uArchInstruction); + if (!instDataVecs) + return std::nullopt; - requiredResLayout = consumerLayout; - int srcRank = srcShape.size(); + if (layoutKind == xegpu::LayoutKind::Subgroup) { + assert(numSg > 0 && + "Number of subgroups must be provided for sg layout creation."); + auto dpasLayouts = getDpasSubgroupLayouts( + context, aTy, bTy, cdTy, consumerLayout, numSg, *instDataVecs); + if (!dpasLayouts) + return std::nullopt; + + auto [dpasALayout, dpasBLayout, dpasCDLayout] = *dpasLayouts; + + // Create scale layouts + auto aScaleLayout = + createScaleLayout(context, aTy, aScaleTy, dpasALayout, false, uArch); + + auto bScaleLayout = + createScaleLayout(context, bTy, bScaleTy, dpasBLayout, true, uArch); + + return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout, aScaleLayout, + bScaleLayout); + } else if (layoutKind == xegpu::LayoutKind::InstData) { + + auto [instDataA, instDataB, instDataCD] = *instDataVecs; + + auto dpasALayout = + buildInstDataLayoutWithLane(context, instDataA, laneLayoutA, laneDataA); + auto dpasBLayout = + buildInstDataLayoutWithLane(context, instDataB, laneLayoutB, laneDataB); + auto dpasCDLayout = buildInstDataLayoutWithLane(context, instDataCD, + laneLayoutCD, laneDataCD); + + auto aScaleLayout = + createScaleLayout(context, aTy, aScaleTy, dpasALayout, false, uArch); + auto bScaleLayout = + createScaleLayout(context, bTy, bScaleTy, dpasBLayout, true, uArch); + + return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout, aScaleLayout, + bScaleLayout); + } else if (layoutKind == xegpu::LayoutKind::Lane) { + auto dpasALayout = buildLaneLayout(context, laneLayoutA, laneDataA); + auto dpasBLayout = buildLaneLayout(context, laneLayoutB, laneDataB); + auto dpasCDLayout = buildLaneLayout(context, laneLayoutCD, laneDataCD); + + auto aScaleLayout = + createScaleLayout(context, aTy, aScaleTy, dpasALayout, false, uArch); + auto bScaleLayout = + createScaleLayout(context, bTy, bScaleTy, dpasBLayout, true, uArch); + + return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout, aScaleLayout, + bScaleLayout); + } + return std::nullopt; +} + +/// Sets up the anchor layout for a store_nd operation. StoreNd picks its +/// own layout based on uArch block parameters (it does not take a consumer +/// layout, since it is a data sink). +xegpu::DistributeLayoutAttr +xegpu::setupStoreNdAnchorLayout(xegpu::LayoutKind layoutKind, + VectorType srcVecTy, int numSg, + const xegpu::uArch::uArch *uArch) { + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockStore)); + if (!uArchInstruction) + return nullptr; + + auto context = srcVecTy.getContext(); + Type elemTy = srcVecTy.getElementType(); + auto subgroupSize = uArch->getSubgroupSize(); + auto dataShape = srcVecTy.getShape(); + int rank = srcVecTy.getRank(); + assert(rank >= 2 && "Expected at least 2D shape for ND op"); + + // Compute the default 2D block IO lane layout / lane data. + unsigned bitwidth = elemTy.getIntOrFloatBitWidth(); + auto [laneLayout, laneData] = compute2DBlockIOLaneLayoutAndData( + dataShape, subgroupSize, bitwidth, + uArchInstruction->getPackedFormatBitSize()); + + if (layoutKind == xegpu::LayoutKind::Lane) + return buildLaneLayout(context, laneLayout, laneData); + + auto instData = + get2DBlockIOInstDataLayout(dataShape, elemTy, uArchInstruction); + + if (layoutKind == xegpu::LayoutKind::InstData) { + assert(instData && isValidLaneLayout(*instData, laneLayout, laneData) && + "Expected the store layout to satisfy uArch block constraints"); + return buildInstDataLayoutWithLane(context, *instData, laneLayout, + laneData); + } + + if (layoutKind == xegpu::LayoutKind::Subgroup) { + assert(numSg > 0 && + "Number of subgroups must be provided for sg layout creation."); + auto sgLayouts = getSgLayoutCandidates(dataShape, *instData, numSg); + if (sgLayouts.empty()) + return nullptr; + return buildSgLayout(context, dataShape, sgLayouts.front(), /*dimK=*/-1); + } + + return nullptr; +} + +/// Sets up the anchor layout for a prefetch_nd operation. PrefetchNd has no +/// consumer (it produces no value), so it picks its own layout from uArch +/// block parameters. +xegpu::DistributeLayoutAttr +xegpu::setupPrefetchNdAnchorLayout(xegpu::LayoutKind layoutKind, + xegpu::TensorDescType tdescTy, int numSg, + const xegpu::uArch::uArch *uArch) { + + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockPrefetch)); + if (!uArchInstruction) + return nullptr; + + auto context = tdescTy.getContext(); + Type elemTy = tdescTy.getElementType(); + auto subgroupSize = uArch->getSubgroupSize(); + auto dataShape = tdescTy.getShape(); + int rank = tdescTy.getRank(); + assert(rank >= 2 && "Expected at least 2D shape for ND op"); + + // Compute the default 2D block IO lane layout / lane data. + unsigned bitwidth = elemTy.getIntOrFloatBitWidth(); + auto [laneLayout, laneData] = compute2DBlockIOLaneLayoutAndData( + dataShape, subgroupSize, bitwidth, + uArchInstruction->getPackedFormatBitSize()); + + if (layoutKind == xegpu::LayoutKind::Lane) + return buildLaneLayout(context, laneLayout, laneData); + + auto instData = + get2DBlockIOInstDataLayout(dataShape, elemTy, uArchInstruction); + + if (layoutKind == xegpu::LayoutKind::InstData) { + assert(instData && isValidLaneLayout(*instData, laneLayout, laneData) && + "Expected the prefetch layout to satisfy uArch block constraints"); + return buildInstDataLayoutWithLane(context, *instData, laneLayout, + laneData); + } + + if (layoutKind == xegpu::LayoutKind::Subgroup) { + assert(numSg > 0 && + "Number of subgroups must be provided for sg layout creation."); + auto sgLayouts = getSgLayoutCandidates(dataShape, *instData, numSg); + if (sgLayouts.empty()) + return nullptr; + return buildSgLayout(context, dataShape, sgLayouts.front(), /*dimK=*/-1); + } + + return nullptr; +} + +/// Sets up the anchor layout for a load_nd operation. LoadNd takes a +/// consumer layout (from its result's downstream uses) and validates it +/// against uArch constraints; if valid, the consumer's `inst_data` / +/// `sg_layout` are honored. Otherwise the helper falls back to defaults +/// derived from uArch block parameters. +xegpu::DistributeLayoutAttr +xegpu::setupLoadNdAnchorLayout(xegpu::LayoutKind layoutKind, + VectorType resVecTy, + xegpu::DistributeLayoutAttr consumerLayout, + int numSg, const xegpu::uArch::uArch *uArch) { + + assert(consumerLayout && "Expected a valid consumer layout"); + if (layoutKind == xegpu::LayoutKind::Subgroup) { + assert(consumerLayout.isForWorkgroup() && + "Expected consumer layout to be a complete workgroup-level layout"); + return consumerLayout; + } + + auto context = resVecTy.getContext(); + Type elemTy = resVecTy.getElementType(); + auto subgroupSize = uArch->getSubgroupSize(); + auto dataShape = resVecTy.getShape(); + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockLoad)); + if (!uArchInstruction) + return nullptr; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - assert(true && - "subgroup layout assignment not supported for insertStridedSlice."); - } else if (layoutKind == xegpu::LayoutKind::InstData) { - for (int dim = 0; dim < srcRank; dim++) { - instDataValue = std::min(srcShape[dim], consumerInstData[dim]); - requiredResLayout = - requiredResLayout.setDimData(dim, -1, instDataValue, -1); + int rank = resVecTy.getRank(); + SmallVector consumerInstData = + consumerLayout.getEffectiveInstDataAsInt(); + SmallVector consumerLaneLayout = + consumerLayout.getEffectiveLaneLayoutAsInt(); + SmallVector consumerLaneData = + consumerLayout.getEffectiveLaneDataAsInt(); + auto consumerOrderAttr = consumerLayout.getOrder(); + + assert(!consumerLaneLayout.empty() && !consumerLaneData.empty() && + "Expected consumer layout to have lane_layout and lane_data"); + + // vertical lane layout means that the blockload must be transposed + // note scaleA on PVC has vertical lane layout even without transposed order + // attr + bool hasTranspose = + consumerLaneLayout[rank - 2] > 1 && consumerLaneLayout[rank - 1] == 1; + bool hasTransform = !hasTranspose && consumerLaneData[rank - 2] > 1 && + consumerLaneData[rank - 1] == 1; + assert((consumerLaneData[rank - 2] == 1 || consumerLaneData[rank - 1] == 1) && + "Expected consumer lane data to have at most one non-unit dim"); + + if (layoutKind == xegpu::LayoutKind::InstData) { + auto blockWHC = uArchInstruction->getBlockWidthHeightCount( + elemTy, hasTransform, hasTranspose, + /*upConv=*/false); + if (!blockWHC) + return nullptr; + auto [bWidths, bHeights, bCounts] = blockWHC.value(); + + SmallVector laneLayout; + // set the laneLayout to use consumer's LaneLayout as base, but adjust its + // size to match the subgroupsize in case its original value is larger than + // 1 + for (int i = 0; i < rank; i++) { + if (consumerLaneLayout[i] > 1) + laneLayout.push_back(std::max(static_cast(subgroupSize), + consumerLaneLayout[i])); + else + laneLayout.push_back(1); } - } else if (layoutKind == xegpu::LayoutKind::Lane) { - for (int dim = 0; dim < srcRank; dim++) { - assert(srcShape[dim] % consumerLaneLayout[dim] == 0 && - "srcShape must be divisible by laneLayout for all dimensions"); - laneDataValue = std::min(srcShape[dim] / consumerLaneLayout[dim], - consumerLaneData[dim]); - requiredResLayout = - requiredResLayout.setDimData(dim, -1, -1, laneDataValue); + + // See whether the consumer's inst_data satisfies the block constraints. + int64_t height = consumerInstData[rank - 2]; + int64_t width = consumerInstData[rank - 1]; + auto maxBlockCount = *llvm::max_element(bCounts); + auto maxWidth = *llvm::max_element(bWidths); + if (llvm::is_contained(bWidths, static_cast(width)) || + (width % maxWidth == 0 && width / maxWidth < maxBlockCount)) { + if (llvm::is_contained(bHeights, static_cast(height))) { + return buildInstDataLayoutWithLane(context, consumerInstData, + laneLayout, consumerLaneData, + consumerOrderAttr); + } } + + // if consumer instData size too small, try the larger one. like DPAS_MX's + // scale is smaller than block load + auto instData = get2DBlockIOInstDataLayout( + dataShape, elemTy, uArchInstruction, hasTransform, hasTranspose); + // assert instData is valid against consumer layout since + // transform/transpose attribute are derived from consumer layout + assert(instData && + isValidLaneLayout(*instData, laneLayout, consumerLaneData) && + "Expected the load layout to satisfy uArch block constraints"); + return buildInstDataLayoutWithLane(context, *instData, laneLayout, + consumerLaneData, consumerOrderAttr); } - return requiredResLayout; + if (layoutKind == xegpu::LayoutKind::Lane) { + assert(isValidLaneLayout(dataShape, consumerLaneLayout, consumerLaneData) && + "Expected the lane layout to satisfy uArch block constraints"); + return consumerLayout; + } + return nullptr; } /// Sets up the anchor layout for load gather and load matrix operation. /// load matrix lowers to load gather and 1d block load. All of them share the /// same layout setup logic. +/// /// For Subgroup layout, uses the consumer layout directly. -/// non-chunked loads (1D or 2D): -/// InstData = {1, ..., min(consumer, maxLaneLoadSize * subgroupSize)} -/// LaneLayout = {1, ..., subgroupSize} -/// lane_data = {1, ..., min(consumer, maxLaneLoadSize)} -/// chunked loads (2D only): -/// InstData = {subgroupSize, min(consumer, maxLaneLoadSize)} -/// LaneLayout = {subgroupSize, 1} -/// lane_data={1,min(consumer, maxLaneLoadSize)} +/// +/// For InstData layout, takes consumer's inst_data as-is. lane_layout and +/// lane_data are taken from the consumer when present; otherwise the helper +/// derives the standard scatter-style default (subgroupSize lanes on the +/// innermost dim, per-lane vector capped by maxChunkSize). +/// +/// For Lane layout, lane_layout/lane_data are taken from the consumer when +/// present; otherwise derived from the same default. static xegpu::DistributeLayoutAttr setupGenericLoadAnchorLayout( xegpu::LayoutKind layoutKind, mlir::MLIRContext *context, - xegpu::DistributeLayoutAttr consumerLayout, bool isChunkedLoad, - int maxChunkSize, ArrayRef resShape, int subgroupSize) { + xegpu::DistributeLayoutAttr consumerLayout, int maxChunkSize, + ArrayRef resShape, int subgroupSize) { if (layoutKind == xegpu::LayoutKind::Subgroup) return consumerLayout; SmallVector consumerInstData = consumerLayout.getEffectiveInstDataAsInt(); + SmallVector consumerLaneLayout = + consumerLayout.getEffectiveLaneLayoutAsInt(); SmallVector consumerLaneData = consumerLayout.getEffectiveLaneDataAsInt(); - SmallVector instData(resShape.size(), 1); - SmallVector laneLayout(resShape.size(), 1); - SmallVector laneData(resShape.size(), 1); - - if (!isChunkedLoad) { - if (layoutKind == xegpu::LayoutKind::InstData) { - instData.back() = std::min(static_cast(consumerInstData.back()), - maxChunkSize * subgroupSize); - return xegpu::LayoutAttr::get(context, instData); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneData.back() = - std::min(static_cast(consumerLaneData.back()), maxChunkSize); - laneLayout.back() = std::min(static_cast(subgroupSize), - resShape.back() / laneData.back()); - return xegpu::LayoutAttr::get(context, laneLayout, laneData); - } - } else { - assert(resShape.size() == 2 && "Chunked Store must access 2D tensor tile."); - if (layoutKind == xegpu::LayoutKind::InstData) { - instData[0] = subgroupSize; - instData[1] = - std::min(static_cast(consumerInstData[1]), maxChunkSize); - return xegpu::LayoutAttr::get(context, instData); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneLayout[0] = subgroupSize; - laneData[1] = - std::min(static_cast(consumerLaneData[1]), maxChunkSize); - return xegpu::LayoutAttr::get(context, laneLayout, laneData); + // Pick lane_layout / lane_data: prefer consumer's, fall back to the + // scatter-store default (subgroupSize lanes on innermost dim, per-lane + // vector capped by maxChunkSize). + SmallVector laneLayout; + SmallVector laneData; + assert(!consumerLaneLayout.empty() && !consumerLaneData.empty() && + "Expected consumer layout to have lane_layout and lane_data"); + laneLayout.assign(consumerLaneLayout.begin(), consumerLaneLayout.end()); + laneData.assign(consumerLaneData.begin(), consumerLaneData.end()); + + if (layoutKind == xegpu::LayoutKind::InstData) { + // Take consumer's inst_data as-is. If the consumer doesn't have one, + // fall back to lane_layout * lane_data per dim. + SmallVector instData; + if (!consumerInstData.empty()) { + instData.assign(consumerInstData.begin(), consumerInstData.end()); + } else { + instData.resize(resShape.size()); + for (size_t i = 0; i < resShape.size(); ++i) + instData[i] = laneLayout[i] * laneData[i]; } + return buildInstDataLayoutWithLane(context, instData, laneLayout, laneData); } + if (layoutKind == xegpu::LayoutKind::Lane) + return buildLaneLayout(context, laneLayout, laneData); return nullptr; } /// Sets up the anchor layout for a load gather operation. xegpu::DistributeLayoutAttr xegpu::setupLoadGatherAnchorLayout( - xegpu::LayoutKind layoutKind, VectorType resVecTy, int chunkSize, + xegpu::LayoutKind layoutKind, VectorType resVecTy, int contigChunkSize, xegpu::DistributeLayoutAttr consumerLayout, const uArch::uArch *uArch) { const int subgroupSize = uArch->getSubgroupSize(); @@ -1240,18 +1714,18 @@ xegpu::DistributeLayoutAttr xegpu::setupLoadGatherAnchorLayout( const auto *uArchInstruction = dyn_cast( uArch->getInstruction(xegpu::uArch::InstructionKind::LoadGather)); - int maxChunkSize = uArchInstruction->getMaxLaneLoadSize(elemBitWidth); + int maxChunkSize = std::min( + uArchInstruction->getMaxLaneLoadSize(elemBitWidth), contigChunkSize); return setupGenericLoadAnchorLayout(layoutKind, context, consumerLayout, - (chunkSize > 1), maxChunkSize, resShape, - subgroupSize); + maxChunkSize, resShape, subgroupSize); } /// Sets up the anchor layout for load matrix operation. /// TODO: enhance load matrix to indicate lowering to chunked load or not. xegpu::DistributeLayoutAttr xegpu::setupLoadMatrixAnchorLayout(xegpu::LayoutKind layoutKind, - VectorType resVecTy, + VectorType resVecTy, int contigChunkSize, xegpu::DistributeLayoutAttr consumerLayout, const xegpu::uArch::uArch *uArch) { @@ -1263,61 +1737,41 @@ xegpu::setupLoadMatrixAnchorLayout(xegpu::LayoutKind layoutKind, const auto *uArchInstruction = dyn_cast( uArch->getInstruction(xegpu::uArch::InstructionKind::LoadGather)); - int maxChunkSize = uArchInstruction->getMaxLaneLoadSize(elemBitWidth); + int maxChunkSize = std::min( + uArchInstruction->getMaxLaneLoadSize(elemBitWidth), contigChunkSize); return setupGenericLoadAnchorLayout(layoutKind, context, consumerLayout, - false, maxChunkSize, resShape, - subgroupSize); + maxChunkSize, resShape, subgroupSize); } /// Sets up the anchor layout for store scatter and store matrix operation. -/// store matrix lowers to store scatter and 1d block store. All of them share -/// the same layout setup logic. For Subgroup layout, not supported yet. -/// non-chunked stores (1D or 2D): -/// InstData = {1, ..., subgroupSize} -/// LaneLayout = {1, ..., subgroupSize} -/// lane_data = {1, ..., 1} -/// chunked stores (2D only): -/// InstData = {subgroupSize, min(srcVec, maxLaneStoreSize)} -/// LaneLayout = {subgroupSize, 1} -/// lane_data={1,min(srcVec, maxLaneStoreSize)} +/// store matrix lowers to store scatter and 1d block store. All of them +/// share the same layout setup logic. For Subgroup layout, not supported +/// yet. +/// +/// Lane layout is derived first via `computeScatterIOLaneLayoutAndData`; +/// inst_data is then the element-wise product lane_layout * lane_data. static xegpu::DistributeLayoutAttr setupGenericStoreAnchorLayout(xegpu::LayoutKind layoutKind, - mlir::MLIRContext *context, bool isChunkedStore, - int maxChunkSize, ArrayRef srcShape, - int subgroupSize) { - - int srcShapeSize = srcShape.size(); - SmallVector instData(srcShapeSize, 1); - SmallVector laneLayout(srcShapeSize, 1); - SmallVector laneData(srcShapeSize, 1); + mlir::MLIRContext *context, int maxChunkSize, + ArrayRef srcShape, int subgroupSize) { if (layoutKind == xegpu::LayoutKind::Subgroup) { - assert(true && + assert(false && "subgroup layout assignment not supported for storeScatter."); return nullptr; } - if (!isChunkedStore) { - if (layoutKind == xegpu::LayoutKind::InstData) { - instData[srcShapeSize - 1] = - std::min(subgroupSize, static_cast(srcShape.back())); - return xegpu::LayoutAttr::get(context, instData); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneLayout[srcShapeSize - 1] = - std::min(subgroupSize, static_cast(srcShape.back())); - return xegpu::LayoutAttr::get(context, laneLayout, laneData); - } - } else { - assert(srcShapeSize == 2 && "Chunked Store must access 2D tensor tile."); - if (layoutKind == xegpu::LayoutKind::InstData) { - instData[0] = subgroupSize; - instData[1] = std::min(static_cast(srcShape[1]), maxChunkSize); - return xegpu::LayoutAttr::get(context, instData); - } else if (layoutKind == xegpu::LayoutKind::Lane) { - laneLayout[0] = subgroupSize; - laneData[1] = std::min(static_cast(srcShape[1]), maxChunkSize); - return xegpu::LayoutAttr::get(context, laneLayout, laneData); - } + auto [laneLayout, laneData] = + computeScatterIOLaneLayoutAndData(srcShape, subgroupSize, maxChunkSize); + + if (layoutKind == xegpu::LayoutKind::InstData) { + SmallVector instData(srcShape.size()); + for (size_t i = 0; i < srcShape.size(); ++i) + instData[i] = laneLayout[i] * laneData[i]; + return buildInstDataLayoutWithLane(context, instData, laneLayout, laneData); + } + if (layoutKind == xegpu::LayoutKind::Lane) { + return buildLaneLayout(context, laneLayout, laneData); } return nullptr; } @@ -1325,7 +1779,7 @@ setupGenericStoreAnchorLayout(xegpu::LayoutKind layoutKind, /// Sets up the anchor layout for a store scatter operation. xegpu::DistributeLayoutAttr xegpu::setupStoreScatterAnchorLayout(xegpu::LayoutKind layoutKind, - VectorType srcVecTy, int chunkSize, + VectorType srcVecTy, int contigChunkSize, const uArch::uArch *uArch) { const int subgroupSize = uArch->getSubgroupSize(); @@ -1336,15 +1790,16 @@ xegpu::setupStoreScatterAnchorLayout(xegpu::LayoutKind layoutKind, const auto *uArchInstruction = dyn_cast( uArch->getInstruction(xegpu::uArch::InstructionKind::StoreScatter)); - int maxChunkSize = uArchInstruction->getMaxLaneStoreSize(elemBitWidth); - return setupGenericStoreAnchorLayout(layoutKind, context, (chunkSize > 1), - maxChunkSize, srcShape, subgroupSize); + int maxChunkSize = std::min( + uArchInstruction->getMaxLaneStoreSize(elemBitWidth), contigChunkSize); + return setupGenericStoreAnchorLayout(layoutKind, context, maxChunkSize, + srcShape, subgroupSize); } /// Sets up the anchor layout for a store matrix operation. xegpu::DistributeLayoutAttr xegpu::setupStoreMatrixAnchorLayout(xegpu::LayoutKind layoutKind, - VectorType srcVecTy, + VectorType srcVecTy, int contigChunkSize, const xegpu::uArch::uArch *uArch) { const int subgroupSize = uArch->getSubgroupSize(); @@ -1355,437 +1810,717 @@ xegpu::setupStoreMatrixAnchorLayout(xegpu::LayoutKind layoutKind, const auto *uArchInstruction = dyn_cast( uArch->getInstruction(xegpu::uArch::InstructionKind::StoreScatter)); - int maxChunkSize = uArchInstruction->getMaxLaneStoreSize(elemBitWidth); + int maxChunkSize = std::min( + uArchInstruction->getMaxLaneStoreSize(elemBitWidth), contigChunkSize); - return setupGenericStoreAnchorLayout(layoutKind, context, false, maxChunkSize, + return setupGenericStoreAnchorLayout(layoutKind, context, maxChunkSize, srcShape, subgroupSize); } -// This function returns the default lane layout for a given vector type. -// - `packingSize` means multiple consecutive elements can be accessed -// together as a single unit. -// - `vnni` means data packing is column-wise (i.e., 2x1xf16 with vnni vs. -// 1x2xf16 w/o vnni). -template -static xegpu::LayoutAttr getDefaultLaneLayout2DBlockIo( - RankedTy ty, const xegpu::uArch::uArch *uArch, - std::optional packingSize = std::nullopt, bool vnni = false) { - // Expecting at least 1D vector. For rank > 2, leading dims are batch dims. - assert(((ty.getRank() >= 1 && !vnni) || ty.getRank() >= 2) && - "Expected at least 1D non-vnni or 2D vector."); - // Expecting int or float element type. - assert(ty.getElementType().isIntOrFloat() && - "Expected int or float element type."); - - auto context = ty.getContext(); - auto rank = ty.getRank(); - SmallVector laneLayout(rank, 1); - SmallVector laneData(rank, 1); - if (packingSize.has_value()) { - unsigned bitwidth = ty.getElementType().getIntOrFloatBitWidth(); - int &laneDataPos = vnni ? laneData[rank - 2] : laneData.back(); - laneDataPos = bitwidth < *packingSize ? *packingSize / bitwidth : 1; +/// Completes a scatter IO layout by deriving lane_layout and lane_data from +/// `specifiedLayout`'s inst_data when they are missing. The layout is returned +/// unchanged if `specifiedLayout` is null, carries no inst_data, or already has +/// both lane_layout and lane_data. +/// +/// When lane info is absent, inst_data is treated as the effective shape and +/// the lane factorization is filled in as follows: +/// - If `consumerLayout` is present and its lane_layout / lane_data are a +/// valid factorization of inst_data, that consumer lane info is reused so +/// the completed layout matches the consumer (avoiding a relayout). +/// - Otherwise a standard scatter-style factorization is computed via +/// `computeScatterIOLaneLayoutAndData`, bounded by `maxChunkSize` — the +/// per-lane load width reported by the uArch's LoadGather instruction +/// (`getMaxLaneLoadSize`). +/// +std::optional +xegpu::completeScatterLoadLaneLayoutFromInstData( + xegpu::DistributeLayoutAttr specifiedLayout, + xegpu::DistributeLayoutAttr consumerLayout, Type elemTy, + const xegpu::uArch::LoadGatherInstructionInterface *uArchInstruction, + const int subgroupSize) { + if (!specifiedLayout) + return specifiedLayout; + SmallVector specifiedInstData = + specifiedLayout.getEffectiveInstDataAsInt(); + if (specifiedInstData.empty()) + return specifiedLayout; + if (!specifiedLayout.getEffectiveLaneLayoutAsInt().empty() && + !specifiedLayout.getEffectiveLaneDataAsInt().empty()) + return specifiedLayout; + + // Reuse the load-side setup with inst_data as the destination shape. + auto *context = specifiedLayout.getContext(); + auto elemBitWidth = elemTy.getIntOrFloatBitWidth(); + int maxChunkSize = uArchInstruction->getMaxLaneLoadSize(elemBitWidth); + if (consumerLayout) { + auto consumerLaneLayout = consumerLayout.getEffectiveLaneLayoutAsInt(); + auto consumerLaneData = consumerLayout.getEffectiveLaneDataAsInt(); + if (!consumerLaneLayout.empty() && !consumerLaneData.empty() && + isValidLaneLayout(specifiedInstData, consumerLaneLayout, + consumerLaneData)) + return buildInstDataLayoutWithLane(context, specifiedInstData, + consumerLaneLayout, consumerLaneData); } - laneLayout.back() = uArch->getSubgroupSize(); - return xegpu::LayoutAttr::get(context, laneLayout, laneData); + auto [defLaneLayout, defLaneData] = computeScatterIOLaneLayoutAndData( + specifiedInstData, subgroupSize, maxChunkSize); + if (!isValidLaneLayout(specifiedInstData, defLaneLayout, defLaneData)) + return std::nullopt; + return buildInstDataLayoutWithLane(context, specifiedInstData, defLaneLayout, + defLaneData); } -// This function returns all layouts for the given sgCount, whose sgData: -// 1. Evenly divides the wgShape. -// 2. Is a multiple of instData. -// Example: -// wgShape = [128, 64], instData = [8, 16], sgCount = 32 -// Returns layouts: -// [(8,4), (16,2)], which correspond to sgData [16,16] and [8,32]. -using LayoutRepresentation = std::pair; -static SmallVector -getValidLayouts(ArrayRef wgShape, ArrayRef instData, - int64_t sgCount) { - SmallVector candidates; - for (int sgLayout0 = 1; sgLayout0 <= sgCount; ++sgLayout0) { - if (sgCount % sgLayout0) - continue; - int64_t sgLayout1 = sgCount / sgLayout0; - int64_t sgData0 = wgShape[0] / sgLayout0; - int64_t sgData1 = wgShape[1] / sgLayout1; - if ((wgShape[0] % sgLayout0 || wgShape[1] % sgLayout1) || - (sgData0 % instData[0] || sgData1 % instData[1])) - continue; - candidates.emplace_back(sgLayout0, sgLayout1); - } - // Sort primarily by how balanced they are - // (i.e., minimize the absolute difference between the two dimensions), and - // secondarily by the first dimension in ascending order. - llvm::sort(candidates, [](const LayoutRepresentation &lhs, - const LayoutRepresentation &rhs) { - int diffLhs = std::abs(lhs.first - lhs.second); - int diffRhs = std::abs(rhs.first - rhs.second); - if (diffLhs != diffRhs) - return diffLhs < diffRhs; - return lhs.first < rhs.first; - }); - return candidates; +/// Like completeScatterLoadLaneLayoutFromInstData, but for scatter stores. A +/// store is a data sink, so lane info is derived purely from inst_data (bounded +/// by the uArch's per-lane store width); there is no consumer layout to reuse. +std::optional +xegpu::completeScatterStoreLaneLayoutFromInstData( + xegpu::DistributeLayoutAttr specifiedLayout, Type elemTy, + const xegpu::uArch::StoreScatterInstructionInterface *uArchInstruction, + const int subgroupSize) { + if (!specifiedLayout) + return specifiedLayout; + SmallVector specifiedInstData = + specifiedLayout.getEffectiveInstDataAsInt(); + if (specifiedInstData.empty()) + return specifiedLayout; + if (!specifiedLayout.getEffectiveLaneLayoutAsInt().empty() && + !specifiedLayout.getEffectiveLaneDataAsInt().empty()) + return specifiedLayout; + + // Reuse the store-side setup with inst_data as the source shape. + auto *context = specifiedLayout.getContext(); + auto elemBitWidth = elemTy.getIntOrFloatBitWidth(); + int maxChunkSize = uArchInstruction->getMaxLaneStoreSize(elemBitWidth); + auto [defLaneLayout, defLaneData] = computeScatterIOLaneLayoutAndData( + specifiedInstData, subgroupSize, maxChunkSize); + if (!isValidLaneLayout(specifiedInstData, defLaneLayout, defLaneData)) + return std::nullopt; + return buildInstDataLayoutWithLane(context, specifiedInstData, defLaneLayout, + defLaneData); } -/// Helper function to compute inst_data vectors for DPAS operands A, B, and -/// C/D. -static std::optional, SmallVector, - SmallVector>> -getDpasInstDataVectors(VectorType aTy, VectorType bTy, VectorType cdTy, - const xegpu::uArch::uArch *uArch, - bool isDpasMx = false) { - const int subgroupSize = uArch->getSubgroupSize(); - - const xegpu::uArch::MMAInstructionInterface *uArchInstruction; - if (isDpasMx) - uArchInstruction = dyn_cast( - uArch->getInstruction( - xegpu::uArch::InstructionKind::SubgroupScaledMatrixMultiplyAcc)); - else - uArchInstruction = - dyn_cast(uArch->getInstruction( - xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); - - // M dimension is the second-to-last dim of A (handles batch dims). - const unsigned dataALen = aTy.getShape()[aTy.getRank() - 2]; - auto supportedALen = uArchInstruction->getSupportedM(aTy.getElementType()); - const int maxALen = - xegpu::getLargestDivisor(dataALen, ArrayRef(supportedALen)); - - // N dimension is the last dim of B. - const unsigned dataBLen = bTy.getShape().back(); - auto supportedBLen = uArchInstruction->getSupportedN(bTy.getElementType()); - const int maxBLen = - xegpu::getLargestDivisor(dataBLen, ArrayRef(supportedBLen)); - - auto supportedCLen = uArchInstruction->getSupportedN(cdTy.getElementType()); - const int maxCLen = - xegpu::getLargestDivisor(dataBLen, ArrayRef(supportedCLen)); - if (maxALen == -1 || maxBLen == -1 || maxCLen == -1) +/// Completes a 2D-block store/prefetch layout from its inst_data. store_nd and +/// prefetch_nd are data sinks, so lane info is derived purely from inst_data +/// (no consumer to reuse). One helper serves both via +/// BlockIOInstructionInterface. +std::optional +xegpu::completeBlockStoreLaneLayoutFromInstData( + xegpu::DistributeLayoutAttr specifiedLayout, Type elemTy, + const xegpu::uArch::BlockIOInstructionInterface *uArchInstruction, + const int subgroupSize) { + if (!specifiedLayout) + return specifiedLayout; + SmallVector specifiedInstData = + specifiedLayout.getEffectiveInstDataAsInt(); + if (specifiedInstData.empty()) + return specifiedLayout; + if (!specifiedLayout.getEffectiveLaneLayoutAsInt().empty() && + !specifiedLayout.getEffectiveLaneDataAsInt().empty()) + return specifiedLayout; + + auto *context = specifiedLayout.getContext(); + auto [laneLayout, laneData] = compute2DBlockIOLaneLayoutAndData( + specifiedInstData, subgroupSize, elemTy.getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSize()); + if (!isValidLaneLayout(specifiedInstData, laneLayout, laneData)) return std::nullopt; + return buildInstDataLayoutWithLane(context, specifiedInstData, laneLayout, + laneData); +} - // For DPAS_MX, use getSupportedK to get the scaled K dimension. - // assume single element in the returned vector. - int kDimSize = subgroupSize; - if (isDpasMx) { - auto supportedKLen = uArchInstruction->getSupportedK(aTy.getElementType()); - if (supportedKLen.empty()) - return std::nullopt; - kDimSize = supportedKLen[0]; +/// Like completeBlockStoreLaneLayoutFromInstData, but for load_nd. The +/// consumer's lane_data and order are reused as-is; lane_layout is rebuilt from +/// the consumer's lane_layout, bumping every non-unit dim up to the subgroup +/// size. The user-provided inst_data is preserved. +std::optional +xegpu::completeBlockLoadLaneLayoutFromInstData( + xegpu::DistributeLayoutAttr specifiedLayout, + xegpu::DistributeLayoutAttr consumerLayout, Type elemTy, + const xegpu::uArch::BlockIOInstructionInterface *uArchInstruction, + const int subgroupSize) { + if (!specifiedLayout) + return specifiedLayout; + SmallVector specifiedInstData = + specifiedLayout.getEffectiveInstDataAsInt(); + if (specifiedInstData.empty()) + return specifiedLayout; + if (!specifiedLayout.getEffectiveLaneLayoutAsInt().empty() && + !specifiedLayout.getEffectiveLaneDataAsInt().empty()) + return specifiedLayout; + if (!consumerLayout) + return specifiedLayout; + SmallVector consumerLaneLayout = + consumerLayout.getEffectiveLaneLayoutAsInt(); + SmallVector consumerLaneData = + consumerLayout.getEffectiveLaneDataAsInt(); + if (consumerLaneLayout.empty() || consumerLaneData.empty()) + return specifiedLayout; + + auto *context = specifiedLayout.getContext(); + int rank = specifiedInstData.size(); + + SmallVector laneLayout; + // set the laneLayout to use consumer's LaneLayout as base, but adjust its + // size to match the subgroupsize in case its original value is larger than 1 + for (int i = 0; i < rank; i++) { + if (consumerLaneLayout[i] > 1) { + laneLayout.push_back( + std::max(static_cast(subgroupSize), consumerLaneLayout[i])); + } else { + laneLayout.push_back(1); + } } - SmallVector instDataA(aTy.getRank(), 1); - instDataA[aTy.getRank() - 2] = maxALen; - instDataA[aTy.getRank() - 1] = kDimSize; - SmallVector instDataB(bTy.getRank(), 1); - instDataB[bTy.getRank() - 2] = kDimSize; - instDataB[bTy.getRank() - 1] = maxBLen; - SmallVector instDataCD(cdTy.getRank(), 1); - instDataCD[cdTy.getRank() - 2] = maxALen; - instDataCD[cdTy.getRank() - 1] = maxCLen; - return std::make_tuple(instDataA, instDataB, instDataCD); + if (!isValidLaneLayout(specifiedInstData, laneLayout, consumerLaneData)) + return std::nullopt; + return buildInstDataLayoutWithLane(context, specifiedInstData, laneLayout, + consumerLaneData, + consumerLayout.getOrder()); } -/// Helper function to set up subgroup layouts for DPAS operands A, B, and C/D. -/// Returns the three layouts if successful, nullopt otherwise. -static std::optional< +/// Completes user-provided DPAS A/B/C-D anchors that carry only inst_data by +/// filling in lane_layout / lane_data. The lane factorization mirrors the +/// InstData branch of `setupDpasLayout` (derived from each operand's shape and +/// matmul role, B using VNNI packing); the user's inst_data is preserved. +std::optional< std::tuple> -getupDpasSubgroupLayouts(mlir::MLIRContext *context, VectorType aTy, - VectorType bTy, VectorType cdTy, - xegpu::DistributeLayoutAttr consumerLayout, int numSg, - const xegpu::uArch::uArch *uArch) { - auto instDataVecs = getDpasInstDataVectors(aTy, bTy, cdTy, uArch); - if (!instDataVecs) +xegpu::completeDpasLaneLayoutFromInstData(xegpu::DistributeLayoutAttr aLayout, + xegpu::DistributeLayoutAttr bLayout, + xegpu::DistributeLayoutAttr cdLayout, + VectorType aTy, VectorType bTy, + VectorType cdTy, + const xegpu::uArch::uArch *uArch) { + auto context = aTy.getContext(); + const auto *uArchInstruction = + dyn_cast(uArch->getInstruction( + xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); + if (!uArchInstruction) return std::nullopt; - auto [instDataA, instDataB, instDataCD] = *instDataVecs; - assert(instDataA.size() == 2 && instDataB.size() == 2 && - instDataCD.size() == 2 && - "Sg layout creation expects valid 2D inst data"); + auto subgroupSize = uArch->getSubgroupSize(); - std::optional consumerSgLayout = std::nullopt; - if (consumerLayout && consumerLayout.isForWorkgroup()) { - SmallVector sgLayoutD = consumerLayout.getEffectiveSgLayoutAsInt(); - consumerSgLayout = std::make_pair(sgLayoutD[0], sgLayoutD[1]); - } + auto [laneLayoutA, laneDataA] = compute2DBlockIOLaneLayoutAndData( + aTy.getShape(), subgroupSize, + aTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeA()); + auto [laneLayoutB, laneDataB] = compute2DBlockIOLaneLayoutAndData( + bTy.getShape(), subgroupSize, + bTy.getElementType().getIntOrFloatBitWidth(), + uArchInstruction->getPackedFormatBitSizeB(), /*vnni=*/true); + auto [laneLayoutCD, laneDataCD] = compute2DBlockIOLaneLayoutAndData( + cdTy.getShape(), subgroupSize, + cdTy.getElementType().getIntOrFloatBitWidth(), + cdTy.getElementType().getIntOrFloatBitWidth()); + SmallVector instDataA = aLayout.getEffectiveInstDataAsInt(); + SmallVector instDataB = bLayout.getEffectiveInstDataAsInt(); + SmallVector instDataCD = cdLayout.getEffectiveInstDataAsInt(); + if (!isValidLaneLayout(instDataA, laneLayoutA, laneDataA) || + !isValidLaneLayout(instDataB, laneLayoutB, laneDataB) || + !isValidLaneLayout(instDataCD, laneLayoutCD, laneDataCD)) + return std::nullopt; + return std::make_tuple( + buildInstDataLayoutWithLane(context, instDataA, laneLayoutA, laneDataA), + buildInstDataLayoutWithLane(context, instDataB, laneLayoutB, laneDataB), + buildInstDataLayoutWithLane(context, instDataCD, laneLayoutCD, + laneDataCD)); +} - // Get all valid layouts for A, B and C/D operands - auto layoutsA = getValidLayouts(aTy.getShape(), instDataA, numSg); - auto layoutsB = getValidLayouts(bTy.getShape(), instDataB, numSg); - auto layoutsCD = getValidLayouts(cdTy.getShape(), instDataCD, numSg); - if (layoutsA.empty() || layoutsB.empty() || layoutsCD.empty()) +/// Like completeDpasLaneLayoutFromInstData, but for dpas_mx: also re-derives +/// the A_scale / B_scale layouts from the completed A / B layouts via +/// `createScaleLayout`, matching the default path of `setupDpasMxLayout`. +std::optional< + std::tuple> +xegpu::completeDpasMxLaneLayoutFromInstData( + xegpu::DistributeLayoutAttr aLayout, xegpu::DistributeLayoutAttr bLayout, + xegpu::DistributeLayoutAttr cdLayout, VectorType aTy, VectorType bTy, + VectorType cdTy, VectorType aScaleTy, VectorType bScaleTy, + const xegpu::uArch::uArch *uArch) { + auto completed = completeDpasLaneLayoutFromInstData( + aLayout, bLayout, cdLayout, aTy, bTy, cdTy, uArch); + if (!completed) return std::nullopt; + auto context = aTy.getContext(); + auto [completedA, completedB, completedCD] = *completed; - // Pick the best subgroup layout - llvm::DenseSet setA(layoutsA.begin(), layoutsA.end()); - llvm::DenseSet setCD(layoutsCD.begin(), - layoutsCD.end()); - std::optional bestPick; - auto checkAlignedSgDataAB = [&](LayoutRepresentation sgLayout) { - return aTy.getShape().back() / sgLayout.second == - bTy.getShape().front() / sgLayout.first; - }; - for (auto &sgLayout : layoutsB) { - if (setA.contains(sgLayout) && setCD.contains(sgLayout)) { - if (!checkAlignedSgDataAB(sgLayout)) - continue; - // Is in (A and B and CD) and matches consumer -> best pick - if (consumerSgLayout.has_value() && sgLayout == *consumerSgLayout) { - bestPick = sgLayout; - break; + auto aScaleLayout = + createScaleLayout(context, aTy, aScaleTy, completedA, false, uArch); + auto bScaleLayout = + createScaleLayout(context, bTy, bScaleTy, completedB, true, uArch); + + return std::make_tuple(completedA, completedB, completedCD, aScaleLayout, + bScaleLayout); +} + +/// Sets up layout for reduction operations by creating a SliceAttr for the +/// result. +/// +/// Algorithm Overview: +/// This function attempts to construct a source layout that, when sliced along +/// reduction dimensions, produces a result layout compatible with the +/// consumer layout. +/// +/// For subgroup layouts, it first tries to align the source layout's subgroup +/// layout and data with the consumer's layout on non-reduction dimensions. +/// Then, it distributes remaining subgroups across reduction dimensions. This +/// avoids subgroup data redistribution overhead between the reduced result and +/// its consumer. When the consumer layout is a slice layout, it attempts to +/// reuse the slice layout's parent layout for the source to further minimize +/// potential data redistribution. +/// +/// This is a best-effort alignment, not a hard constraint: the goal is only to +/// pick a *legal* source layout that minimizes redistribution against the +/// (single, first-arriving) consumer layout. There is no failure path - when +/// the consumer's slice layout cannot be reused as-is (example 2 below), the +/// function falls back to distributing all subgroups on the non-reduction +/// dimensions first and the remainder on the reduction dimensions, which always +/// yields a valid source layout. If the resulting source layout still differs +/// from what some consumer expects (e.g. a second, inconsistent consumer), that +/// mismatch is reconciled later by the layout conflict resolution process +/// (`ResolveLayoutConflicts`), which inserts a `convert_layout` op - this +/// function never has to give up. +/// +/// For the InstData and Lane layout kinds only the innermost two dimensions +/// are distributed; all leading dimensions are assumed to be unit dimensions. +/// This assumption is checked via `leadingDimsAreUnit`. The lane_layout and +/// lane_data are computed by `computeReductionLaneLayoutAndData`, which picks +/// a layout that minimizes cross-lane reduction (reducing within a lane when +/// only one of the innermost two dims is a reduction dim). The inst_data is +/// simply the element-wise product lane_layout * lane_data. +/// +/// The function returns the *result* layout (the SliceAttr). The *source* +/// layout it decides on is the parent of that slice; both are listed below so +/// the relationship is explicit. +/// +/// Examples: +/// 1. Subgroup layout - Row reduction on 2D tensor: +/// srcShape=[32, 128], reductionDims=[1], resShape=[32], subgroupSize=16, +/// NumSg=32 +/// * Consumer Layout: +/// #xegpu.slice<#xegpu.layout, dims = +/// [1]>} +/// * Source Layout (decided by this function): +/// #xegpu.layout +/// * Result Layout (returned): +/// #xegpu.slice<#xegpu.layout, dims = +/// [1]>} +/// The consumer slices exactly the reduction dim, so its parent layout is +/// reused for the source: sg_layout is kept, but the source's sg_data on +/// the reduction dim is grown from 8 to 16 (= srcShape[1] / sg_layout[1] = +/// 128 / 8) so the source tile is evenly distributed over the reduction +/// dim. Slicing that source over dim 1 reproduces the consumer. +/// +/// 2. Subgroup layout - Same shapes as above but consumer doesn't have a +/// reusable slice layout, so the algorithm distributes all subgroups on the +/// non-reduction dims first and the remainder on the reduction dims. +/// 2a. * Consumer Layout: +/// #xegpu.layout +/// * Source Layout (decided by this function): +/// #xegpu.layout +/// * Result Layout (returned): +/// #xegpu.slice<#xegpu.layout, +/// dims = [1]>} +/// All 32 subgroups land on the non-reduction dim 0; the reduction dim +/// 1 gets the leftover (sg_layout=1, so the whole length 128 lives in +/// one subgroup's sg_data). +/// 2b. * Consumer Layout: +/// #xegpu.slice<#xegpu.layout, dims = [1, 2]>} +/// * Source Layout (decided by this function): +/// #xegpu.layout +/// * Result Layout (returned): +/// #xegpu.slice<#xegpu.layout, +/// dims = [1]>} +/// The consumer slices dims [1, 2] which do not match this op's +/// reductionDims, so it can't be reused as-is; subgroups are +/// re-distributed (non-reduction dim first, then reduction dim). +/// +/// 3. Lane layout - Default (lanes on innermost dim): +/// srcShape=[32, 64], reductionDims=[0], subgroupSize=16 +/// * Source Layout (decided by this function): +/// laneLayout=[1, 16], laneData=[1, 1] (returned sliced over dim 0). +/// The innermost dim is not reduced, so lanes stay on it. +/// +/// 4. Lane layout - Switch (lanes moved off the reduction dim): +/// srcShape=[32, 64], reductionDims=[1], subgroupSize=16 +/// * Source Layout (decided by this function): +/// laneLayout=[16, 1], laneData=[1, 1] (returned sliced over dim 1). +/// The innermost dim is the sole reduction dim, so lanes move to the +/// non-reduction dim to reduce within a lane. This switch only happens +/// when the consumer has no reduction dims to broadcast the result back +/// along (i.e. the consumer layout is not a slice over this reduction); +/// otherwise the default (example 3) is used. +/// +/// 5. Lane layout - No switch when both inner dims are reduced (reduction to +/// scalar): +/// srcShape=[32, 64], reductionDims=[0, 1], subgroupSize=16 +/// * Source Layout (decided by this function): +/// laneLayout=[1, 16], laneData=[1, 1] (returned sliced over dims +/// [0,1]). +/// Both dims are reduced, so this is not a *sole* innermost reduction; the +/// switch condition (example 4) does not apply and lanes stay on the +/// innermost dim. The cross-lane reduction here is unavoidable. +/// +/// 6. Lane layout - No switch when the consumer slices the reduction dim: +/// srcShape=[32, 64], reductionDims=[1], subgroupSize=16 +/// * Consumer Layout: +/// #xegpu.slice<#xegpu.layout, +/// dims = [1]>} +/// * Source Layout (decided by this function): +/// #xegpu.layout (the consumer +/// slice's parent, reused directly; returned sliced over dim 1). +/// Same shape/reductionDims as example 4, but here the consumer is a slice +/// over the reduction dim, so it can broadcast the result back along that +/// dim. The slice's parent layout is reused as the source (no switch, no +/// re-derivation); the inst_data propagation step has already inserted a +/// convert_layout if needed, so the lane-level layout can be reused as-is. + +xegpu::SliceAttr xegpu::setupMultiReductionResultLayout( + xegpu::LayoutKind layoutKind, VectorType srcVecTy, + DistributeLayoutAttr consumerLayout, SmallVector reductionDims, + int numSg, const xegpu::uArch::uArch *uArch) { + + auto srcShape = srcVecTy.getShape(); + int srcRank = srcShape.size(); + auto context = srcVecTy.getContext(); + + const int subgroupSize = uArch->getSubgroupSize(); + int64_t maxReduceVectorSize = 1; // could extend to spirv vector Size + xegpu::DistributeLayoutAttr srcLayout; + if (layoutKind == xegpu::LayoutKind::Subgroup) { + xegpu::SliceAttr consumerSliceLayout = + dyn_cast_if_present(consumerLayout); + if (consumerSliceLayout && + consumerSliceLayout.getDims().asArrayRef().equals(reductionDims)) { + srcLayout = consumerSliceLayout.getParent(); + SmallVector sgLayoutFromConsumer = + srcLayout.getEffectiveSgLayoutAsInt(); + auto srcSgData = computeShapeRatio(srcShape, sgLayoutFromConsumer); + if (srcSgData) + for (int dim = 0; dim < srcRank; dim++) { + if (llvm::is_contained(reductionDims, dim)) + srcLayout = + srcLayout.setDimData(dim, srcSgData.value()[dim], -1, -1); + } + } else { + SmallVector consumerSgLayout = + consumerLayout ? consumerLayout.getEffectiveSgLayoutAsInt() + : SmallVector(); + SmallVector consumerSgData = + consumerLayout ? consumerLayout.getEffectiveSgDataAsInt() + : SmallVector(); + SmallVector consumerOrder = + consumerLayout ? consumerLayout.getEffectiveOrderAsInt() + : SmallVector(); + DenseI32ArrayAttr orderAttr = + consumerLayout ? consumerLayout.getOrder() : nullptr; + SmallVector sgLayout(srcRank), sgData(srcRank), order(srcRank); + int remainingSgCount = + consumerLayout ? consumerLayout.getNumSubgroups() : numSg; + int consumerIdx = 0; + + // First pass: Match consumer's layout on non-reduction dimensions + for (int i = 0; i < srcRank; i++) { + if (!llvm::is_contained(reductionDims, i) && + consumerIdx < static_cast(consumerSgLayout.size())) { + sgLayout[i] = consumerSgLayout[consumerIdx]; + sgData[i] = consumerSgData[consumerIdx]; + remainingSgCount /= sgLayout[i]; + order[i] = consumerOrder[consumerIdx]; + consumerIdx++; + } } - // Is in (A and B and CD) layoutsB is ordered from most - // balanced to least. So the first one we see is the most balanced one, - // remember it and later only update if there is one that matches the - // consumer. - if (!bestPick) - bestPick = sgLayout; + + // Second pass: Distribute remaining subgroups across reduction dimensions + // the reduction to scalar case is handled only by this loop + int64_t remainOrder = consumerSgLayout.size(); + for (int i = 0; i < srcRank; i++) { + if (llvm::is_contained(reductionDims, i)) { + sgLayout[i] = + std::min(srcShape[i], static_cast(remainingSgCount)); + assert((srcShape[i] % sgLayout[i] == 0) && + "source shape not divisible by sg_layout"); + sgData[i] = srcShape[i] / sgLayout[i]; + remainingSgCount /= sgLayout[i]; + order[i] = remainOrder++; + } + } + DenseI32ArrayAttr resOrderAttr = DenseI32ArrayAttr::get( + context, SmallVector(order.begin(), order.end())); + if (!orderAttr || orderAttr.empty()) + resOrderAttr = nullptr; + assert(remainingSgCount == 1 && "not all subgroups distributed"); + srcLayout = buildLayout(context, sgLayout, sgData, + /*instData=*/{}, /*laneLayout=*/{}, + /*laneData=*/{}, resOrderAttr); + } + } else if (layoutKind == xegpu::LayoutKind::InstData) { + xegpu::SliceAttr consumerSliceLayout = + dyn_cast_if_present(consumerLayout); + auto consumerReductionDims = + consumerSliceLayout + ? SmallVector(consumerSliceLayout.getDims().asArrayRef()) + : SmallVector({}); + // A[i] reduced from A[i, j] is stored out directly, use vertical Lane + // layout like [16, 1] + bool verticalLaneLayout = consumerReductionDims.empty() && + reductionDims.size() == 1 && + reductionDims[0] == (srcRank - 1); + auto [laneLayout, laneData] = computeReductionLaneLayoutAndData( + srcShape, reductionDims, subgroupSize, maxReduceVectorSize, + verticalLaneLayout); + // inst_data is the per-instruction data, i.e. the element-wise product of + // lane_layout and lane_data. + SmallVector instData(srcRank); + for (int i = 0; i < srcRank; i++) + instData[i] = laneLayout[i] * laneData[i]; + srcLayout = + buildInstDataLayoutWithLane(context, instData, laneLayout, laneData); + } else if (layoutKind == xegpu::LayoutKind::Lane) { + // Only the innermost two dimensions are distributed; all leading dimensions + // are assumed to be unit dimensions. + assert(leadingDimsAreUnit(srcShape, /*numInnerDims=*/2) && + "Lane reduction layout assumes all leading (non-innermost-two) " + "dimensions are unit dimensions"); + xegpu::SliceAttr consumerSliceLayout = + dyn_cast_if_present(consumerLayout); + auto consumerReductionDims = + consumerSliceLayout + ? SmallVector(consumerSliceLayout.getDims().asArrayRef()) + : SmallVector({}); + if (consumerSliceLayout && + consumerSliceLayout.getDims().asArrayRef().equals(reductionDims)) { + // at the lane level, the consumerSliceLayout can be directly reused + // since the inst_data propagation already insert convert_layout if + // the layout is not consistent + srcLayout = consumerSliceLayout.getParent(); + } else { + bool verticalLaneLayout = consumerReductionDims.empty() && + reductionDims.size() == 1 && + reductionDims[0] == (srcRank - 1); + auto [laneLayout, laneData] = computeReductionLaneLayoutAndData( + srcShape, reductionDims, subgroupSize, maxReduceVectorSize, + verticalLaneLayout); + srcLayout = buildLaneLayout(context, laneLayout, laneData); } } - if (!bestPick) - return std::nullopt; - - SmallVector sgLayout = {static_cast(bestPick->first), - static_cast(bestPick->second)}; - SmallVector sgDataA = {static_cast(aTy.getShape()[0] / sgLayout[0]), - static_cast(aTy.getShape()[1])}; - SmallVector sgDataB = { - static_cast(bTy.getShape()[0]), - static_cast(bTy.getShape()[1] / sgLayout[1])}; - SmallVector sgDataCD = { - static_cast(cdTy.getShape()[0] / sgLayout[0]), - static_cast(cdTy.getShape()[1] / sgLayout[1])}; - - auto dpasALayout = - xegpu::LayoutAttr::get(context, DenseI32ArrayAttr::get(context, sgLayout), - DenseI32ArrayAttr::get(context, sgDataA), nullptr, - nullptr, nullptr, nullptr); - auto dpasBLayout = - xegpu::LayoutAttr::get(context, DenseI32ArrayAttr::get(context, sgLayout), - DenseI32ArrayAttr::get(context, sgDataB), nullptr, - nullptr, nullptr, nullptr); - auto dpasCDLayout = - xegpu::LayoutAttr::get(context, DenseI32ArrayAttr::get(context, sgLayout), - DenseI32ArrayAttr::get(context, sgDataCD), nullptr, - nullptr, nullptr, nullptr); - return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout); + return xegpu::SliceAttr::get(context, srcLayout, + DenseI64ArrayAttr::get(context, reductionDims)); } -/// Sets up the anchor layouts for dpas operands (A, B, and C/D). -/// The numSg and consumerLayout (optional) are only used by sg layout -/// creation. -std::optional< - std::tuple> -xegpu::setupDpasLayout(xegpu::LayoutKind layoutKind, VectorType aTy, - VectorType bTy, VectorType cdTy, - xegpu::DistributeLayoutAttr consumerLayout, int numSg, - const xegpu::uArch::uArch *uArch) { - auto context = aTy.getContext(); - const auto *uArchInstruction = - dyn_cast(uArch->getInstruction( - xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); +/// Sets up layout for Reduction operations by creating a SliceAttr for the +/// result. +xegpu::SliceAttr +xegpu::setupReductionResultLayout(xegpu::LayoutKind layoutKind, + VectorType srcVecTy, + const xegpu::uArch::uArch *uArch) { + + auto srcShape = srcVecTy.getShape(); + auto context = srcVecTy.getContext(); + auto subgroupSize = uArch->getSubgroupSize(); + xegpu::LayoutAttr srcLayout; if (layoutKind == xegpu::LayoutKind::Subgroup) { - assert(numSg > 0 && - "Number of subgroups must be provided for sg layout creation."); - return getupDpasSubgroupLayouts(context, aTy, bTy, cdTy, consumerLayout, - numSg, uArch); + assert(false && + "subgroup layout assignment not supported for reduction (op " + "is not expected at this level)."); } else if (layoutKind == xegpu::LayoutKind::InstData) { - auto instDataVecs = getDpasInstDataVectors(aTy, bTy, cdTy, uArch); - if (!instDataVecs) - return std::nullopt; - auto [instDataA, instDataB, instDataCD] = *instDataVecs; - return std::make_tuple( - xegpu::LayoutAttr::get( - context, SmallVector(instDataA.begin(), instDataA.end())), - xegpu::LayoutAttr::get( - context, SmallVector(instDataB.begin(), instDataB.end())), - xegpu::LayoutAttr::get( - context, SmallVector(instDataCD.begin(), instDataCD.end()))); + assert(false && + "instData layout assignment not supported for reduction (op " + "is not expected at this level)."); } else if (layoutKind == xegpu::LayoutKind::Lane) { - auto aLayout = getDefaultLaneLayout2DBlockIo( - aTy, uArch, uArchInstruction->getPackedFormatBitSizeA()); - auto bLayout = getDefaultLaneLayout2DBlockIo( - bTy, uArch, uArchInstruction->getPackedFormatBitSizeB(), true); - auto cdLayout = getDefaultLaneLayout2DBlockIo( - cdTy, uArch /*, packingSize = std::nullopt */); - return std::make_tuple(aLayout, bLayout, cdLayout); + SmallVector laneLayout(1), laneData(1); + laneLayout[0] = std::min(static_cast(subgroupSize), srcShape[0]); + laneData[0] = 1; + srcLayout = buildLaneLayout(context, laneLayout, laneData); } - return std::nullopt; + + auto result = xegpu::SliceAttr::get(context, srcLayout, + DenseI64ArrayAttr::get(context, 0)); + return result; } -/// Helper to create a scale layout derived from a matrix operand layout. -/// The scale layout is computed by mapping each dimension of the matrix layout -/// to the corresponding scale tensor dimension using the ratio between the -/// matrix and scale shapes. +/// Adjusts `consumerLayout`'s innermost-dim data field selected by +/// `layoutKind` so that the source layout can be safely inferred by dividing +/// that value by `ratio`. Doubles the value until the divisibility constraint +/// is met, bounded above by `bound` like result-shape. +/// +/// Used by ops whose source relates to the result by a fixed factor along the +/// innermost dim (e.g., bitcast: bitwidth ratio; interleave: 2x). +/// +/// Divisibility constraints per LayoutKind: +/// - Subgroup: sgData[innermost] % ratio == 0 +/// - InstData: instData[innermost] % (laneLayout[innermost] * ratio) == 0 +/// (laneLayout falls back to subgroupSize if absent) +/// - Lane: laneData[innermost] % ratio == 0 static xegpu::DistributeLayoutAttr -createScaleLayout(mlir::MLIRContext *context, VectorType matrixTy, - VectorType scaleTy, xegpu::DistributeLayoutAttr matrixLayout, - bool isBScale, const xegpu::uArch::uArch *uArch) { - if (!scaleTy || !matrixLayout) - return nullptr; - - // Calculate scaling factor by dividing matrix shape by scale shape - ArrayRef matrixShape = matrixTy.getShape(); - ArrayRef scaleShape = scaleTy.getShape(); +adjustInnermostDimForDivisibility(xegpu::DistributeLayoutAttr consumerLayout, + xegpu::LayoutKind layoutKind, + size_t innerMostDim, int ratio, int64_t bound, + const xegpu::uArch::uArch *uArch) { + SmallVector sgData = consumerLayout.getEffectiveSgDataAsInt(); + SmallVector instData = consumerLayout.getEffectiveInstDataAsInt(); + SmallVector laneData = consumerLayout.getEffectiveLaneDataAsInt(); + SmallVector laneLayout = + consumerLayout.getEffectiveLaneLayoutAsInt(); - // Scale shapes can be 1D or 2D, handle both cases - if (scaleShape.empty()) - return nullptr; + int64_t sgDataValue = -1; + int64_t instDataValue = -1; + int64_t laneDataValue = -1; - auto uArchInstruction = - dyn_cast( - uArch->getInstruction( - xegpu::uArch::InstructionKind::SubgroupScaledMatrixMultiplyAcc)); + if (layoutKind == xegpu::LayoutKind::Subgroup) { + sgDataValue = sgData[innerMostDim]; + while ((sgDataValue <= bound) && (sgDataValue % ratio) != 0) + sgDataValue *= 2; + } else if (layoutKind == xegpu::LayoutKind::InstData) { + instDataValue = instData[innerMostDim]; + const int innermostDimLaneLayout = laneLayout.empty() + ? uArch->getSubgroupSize() + : laneLayout[innerMostDim]; + while ((instDataValue <= bound) && + (instDataValue % (innermostDimLaneLayout * ratio) != 0)) + instDataValue *= 2; + assert((bound % instDataValue) == 0 && + "bound, instData, and laneLayout for innermost must be 2^n!"); + } else if (layoutKind == xegpu::LayoutKind::Lane) { + laneDataValue = laneData[innerMostDim]; + while ((laneDataValue <= bound) && (laneDataValue % ratio) != 0) + laneDataValue *= 2; + } - int64_t rank = matrixLayout.getRank(); - assert(rank >= 2 && "dpas layouts must be at least two dimensions"); + return consumerLayout.setDimData(innerMostDim, sgDataValue, instDataValue, + laneDataValue); +} - SmallVector sgLayout = matrixLayout.getEffectiveSgLayoutAsInt(); - SmallVector sgData = matrixLayout.getEffectiveSgDataAsInt(); - SmallVector instData = matrixLayout.getEffectiveInstDataAsInt(); - SmallVector laneLayout = matrixLayout.getEffectiveLaneLayoutAsInt(); - SmallVector laneData = matrixLayout.getEffectiveLaneDataAsInt(); - auto order = matrixLayout.getOrder(); +/// Sets up the result layout for a bitcast operation. +/// When casting to a smaller bitwidth, adjusts the layout dimensions (sgData, +/// instData, or laneData) by multiplying by the bitwidth ratio to ensure the +/// result layout can be correctly divided back to the source layout during +/// inference. +/// +/// Examples: +/// 1. Casting f32 -> f16 (32-bit to 16-bit, bitWidthRatio = 2): +/// Consumer layout: instData=[1, 16], subgroupSize=16 +/// Source shape: [8, 32] +/// Result layout: instData=[1, 32] (16 * 2) +/// The innermost dimension is multiplied by 2 to maintain consistency. +/// +/// 2. Casting f32 -> i8 (32-bit to 8-bit, bitWidthRatio = 4): +/// Consumer instData=[1, 16], subgroupSize=16 +/// Source shape: [4, 128] +/// adjust the instData from [1, 16] to [1, 16 * 4 = 64] +/// +/// 3. Casting i8 -> i32 (8-bit to 32-bit, bitWidthRatio = 1/4): +/// Consumer layout: laneLayout=[1, 16], laneData=[1, 4] +/// No adjustment needed - returns consumer layout directly. +/// +xegpu::DistributeLayoutAttr xegpu::setupBitCastResultLayout( + xegpu::LayoutKind layoutKind, VectorType srcVecTy, VectorType resVecTy, + DistributeLayoutAttr consumerLayout, const xegpu::uArch::uArch *uArch) { - SmallVector scaleSgLayout; - SmallVector scaleSgData; - if (!sgLayout.empty() && !sgData.empty()) { - scaleSgLayout.assign(sgLayout.begin(), sgLayout.end()); - scaleSgData.assign(sgData.begin(), sgData.end()); - scaleSgData[rank - 2] = std::max( - scaleShape[rank - 2] / (matrixShape[rank - 2] / sgData[rank - 2]), 1); - scaleSgData[rank - 1] = std::max( - scaleShape[rank - 1] / (matrixShape[rank - 1] / sgData[rank - 1]), 1); - } + int srcElemTyBitWidth = srcVecTy.getElementType().getIntOrFloatBitWidth(); + int resElemTyBitWidth = resVecTy.getElementType().getIntOrFloatBitWidth(); - // For DPAS_MX scales: if matrix has inst_data, scale needs adjusted - // inst_data. Scale inst_data is derived from matrix inst_data divided by - // scale factor. - SmallVector scaleInstData; - if (!instData.empty()) { - scaleInstData.assign(instData.begin(), instData.end()); - if (isBScale) - scaleInstData[rank - 2] = std::max( - scaleShape[rank - 2] / (matrixShape[rank - 2] / instData[rank - 2]), - 1); - else - scaleInstData[rank - 1] = std::max( - scaleShape[rank - 1] / (matrixShape[rank - 1] / instData[rank - 1]), - 1); - } + ArrayRef srcShape = srcVecTy.getShape(); + ArrayRef resShape = resVecTy.getShape(); - SmallVector scaleLaneLayout; - SmallVector scaleLaneData; - if (!laneLayout.empty() && !laneData.empty()) { - scaleLaneLayout.assign(laneLayout.begin(), laneLayout.end()); - scaleLaneData.assign(laneData.begin(), laneData.end()); - bool isRowMajor = uArchInstruction->isLaneLayoutRowMajorOrder(); - if (isBScale ^ isRowMajor) { - std::swap(scaleLaneLayout[rank - 2], scaleLaneLayout[rank - 1]); - scaleLaneLayout[rank - 2] = - std::min(scaleShape[rank - 2], scaleLaneLayout[rank - 2]); - } - scaleLaneData[rank - 2] = - std::max(scaleShape[rank - 2] / scaleLaneLayout[rank - 2], 1); - scaleLaneData[rank - 1] = - std::max(scaleShape[rank - 1] / scaleLaneLayout[rank - 1], 1); - } - return xegpu::LayoutAttr::get( - context, - scaleSgLayout.empty() ? nullptr - : DenseI32ArrayAttr::get(context, scaleSgLayout), - scaleSgData.empty() ? nullptr - : DenseI32ArrayAttr::get(context, scaleSgData), - scaleInstData.empty() ? nullptr - : DenseI32ArrayAttr::get(context, scaleInstData), - scaleLaneLayout.empty() - ? nullptr - : DenseI32ArrayAttr::get(context, scaleLaneLayout), - scaleLaneData.empty() ? nullptr - : DenseI32ArrayAttr::get(context, scaleLaneData), - order); -} + assert(consumerLayout.getRank() == static_cast(srcShape.size()) && + "laneData must be available for all dimensions"); -/// Sets up the anchor layouts for dpas_mx operands (A, B, C/D, A_scale, and -/// B_scale). The numSg and consumerLayout (optional) are only used by sg layout -/// creation. -std::optional< - std::tuple> -xegpu::setupDpasMxLayout(xegpu::LayoutKind layoutKind, VectorType aTy, - VectorType bTy, VectorType cdTy, VectorType aScaleTy, - VectorType bScaleTy, - xegpu::DistributeLayoutAttr consumerLayout, int numSg, - const xegpu::uArch::uArch *uArch) { - auto context = aTy.getContext(); + // Casting to same/larger element type: result has fewer (or equal) elements + // along the innermost dim, no adjustment needed. + if (srcElemTyBitWidth <= resElemTyBitWidth) + return consumerLayout; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - assert(numSg > 0 && - "Number of subgroups must be provided for sg layout creation."); - auto dpasLayouts = getupDpasSubgroupLayouts(context, aTy, bTy, cdTy, - consumerLayout, numSg, uArch); - if (!dpasLayouts) - return std::nullopt; + // Casting to smaller element type: result has more elements along innermost + // dim. Adjust the innermost data field upward so the source layout can be + // recovered by dividing by bitWidthRatio. + size_t innerMostDim = srcShape.size() - 1; + int bitWidthRatio = srcElemTyBitWidth / resElemTyBitWidth; + return adjustInnermostDimForDivisibility(consumerLayout, layoutKind, + innerMostDim, bitWidthRatio, + resShape[innerMostDim], uArch); +} - auto [dpasALayout, dpasBLayout, dpasCDLayout] = *dpasLayouts; +/// Sets up the result layout for an interleave operation to ensure the source +/// layout can be safely derived. Interleave doubles the innermost dimension, +/// so the result layout must ensure that laneData is a multiple +/// of 2, and instData must be divisible by innermostDimLaneLayout * 2. +/// +/// Example: +/// Interleave: vector<128x256xf4> -> vector<128x512xf4> +/// Consumer layout: laneLayout=[1, 16], laneData=[1, 4], instData=[1, 64] +/// Result layout adjustment to ensure source can be safely inferred: +/// - laneData must be >= 2 and multiple of 2 (so source = laneData/2 is +/// valid) +/// - instData must be divisible by (16 * 2 = 32) (so source = instData/2 is +/// valid) +/// - Adjusted instData: ensure (instData % 32 == 0) +/// +xegpu::DistributeLayoutAttr xegpu::setupInterleaveResultLayout( + xegpu::LayoutKind layoutKind, VectorType srcVecTy, VectorType resVecTy, + DistributeLayoutAttr consumerLayout, const xegpu::uArch::uArch *uArch) { - // Create scale layouts - auto aScaleLayout = - createScaleLayout(context, aTy, aScaleTy, dpasALayout, false, uArch); + ArrayRef resShape = resVecTy.getShape(); + assert(consumerLayout.getRank() == static_cast(resShape.size()) && + "consumer layout rank must match source shape rank"); - auto bScaleLayout = - createScaleLayout(context, bTy, bScaleTy, dpasBLayout, true, uArch); + // Interleave doubles the innermost dimension (ratio = 2). Adjust the + // innermost data field so the source layout can be recovered by dividing + // by 2. + const size_t innerMostDim = resShape.size() - 1; + constexpr int ratio = 2; + return adjustInnermostDimForDivisibility(consumerLayout, layoutKind, + innerMostDim, ratio, + resShape[innerMostDim], uArch); +} - return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout, aScaleLayout, - bScaleLayout); - } else if (layoutKind == xegpu::LayoutKind::InstData) { - auto instDataVecs = - getDpasInstDataVectors(aTy, bTy, cdTy, uArch, /*isDpasMx=*/true); - if (!instDataVecs) - return std::nullopt; - auto [instDataA, instDataB, instDataCD] = *instDataVecs; +/// Sets up the result layout for an insert strided slice operation. +/// Creates a result layout based on the specified layout kind (InstData or +/// Lane). +xegpu::DistributeLayoutAttr xegpu::setupInsertStridedSliceResultLayout( + xegpu::LayoutKind layoutKind, VectorType srcVectorTy, + VectorType resVectorTy, xegpu::DistributeLayoutAttr consumerLayout, + const xegpu::uArch::uArch *uArch) { - auto dpasALayout = xegpu::LayoutAttr::get( - context, SmallVector(instDataA.begin(), instDataA.end())); - auto dpasBLayout = xegpu::LayoutAttr::get( - context, SmallVector(instDataB.begin(), instDataB.end())); - auto dpasCDLayout = xegpu::LayoutAttr::get( - context, SmallVector(instDataCD.begin(), instDataCD.end())); + xegpu::DistributeLayoutAttr requiredResLayout; + SmallVector consumerInstData = + consumerLayout.getEffectiveInstDataAsInt(); + SmallVector consumerLaneData = + consumerLayout.getEffectiveLaneDataAsInt(); + SmallVector consumerLaneLayout = + consumerLayout.getEffectiveLaneLayoutAsInt(); + ArrayRef srcShape = srcVectorTy.getShape(); + int64_t laneDataValue = -1; - // Create scale layouts - auto aScaleLayout = - createScaleLayout(context, aTy, aScaleTy, dpasALayout, false, uArch); - auto bScaleLayout = - createScaleLayout(context, bTy, bScaleTy, dpasBLayout, true, uArch); + requiredResLayout = consumerLayout; + int srcRank = srcShape.size(); - return std::make_tuple(dpasALayout, dpasBLayout, dpasCDLayout, aScaleLayout, - bScaleLayout); + if (layoutKind == xegpu::LayoutKind::Subgroup || + layoutKind == xegpu::LayoutKind::InstData) { + assert(false && "subgroup/instData layout assignment not supported for " + "insertStridedSlice."); } else if (layoutKind == xegpu::LayoutKind::Lane) { - const auto *uArchInstruction = - dyn_cast(uArch->getInstruction( - xegpu::uArch::InstructionKind::SubgroupMatrixMultiplyAcc)); - auto aLayout = getDefaultLaneLayout2DBlockIo( - aTy, uArch, uArchInstruction->getPackedFormatBitSizeA()); - auto bLayout = getDefaultLaneLayout2DBlockIo( - bTy, uArch, uArchInstruction->getPackedFormatBitSizeB(), true); - auto cdLayout = getDefaultLaneLayout2DBlockIo(cdTy, uArch); - - // Create scale layouts - auto aScaleLayout = - createScaleLayout(context, aTy, aScaleTy, aLayout, false, uArch); - auto bScaleLayout = - createScaleLayout(context, bTy, bScaleTy, bLayout, true, uArch); - - return std::make_tuple(aLayout, bLayout, cdLayout, aScaleLayout, - bScaleLayout); + for (int dim = 0; dim < srcRank; dim++) { + assert(srcShape[dim] % consumerLaneLayout[dim] == 0 && + "srcShape must be divisible by laneLayout for all dimensions"); + laneDataValue = std::min(srcShape[dim] / consumerLaneLayout[dim], + consumerLaneData[dim]); + requiredResLayout = + requiredResLayout.setDimData(dim, -1, -1, laneDataValue); + } } - return std::nullopt; + return requiredResLayout; } +/// Back-propagates a known result layout to the layout required on `operand` +/// for a non-anchor (layout-propagating) vector op. Dispatches on the op kind — +/// broadcast, (multi)reduction, bitcast, shape/transpose, insert/extract, +/// interleave, etc. — applying the shape/permutation/bitwidth transform to +/// derive the source layout; elementwise and pass-through ops reuse resLayout +/// as-is. Returns nullptr for unknown ops or an absent result layout. xegpu::DistributeLayoutAttr xegpu::inferSourceLayoutFromResultForNonAnchorOp( OpOperand &operand, xegpu::DistributeLayoutAttr resLayout) { if (!resLayout) @@ -1837,8 +2572,8 @@ xegpu::DistributeLayoutAttr xegpu::inferSourceLayoutFromResultForNonAnchorOp( shapeCast.getSourceVectorType().getShape()); } - // For vector::InsertStridedSliceOp, infer source layout from result layout. - // Dest vector must have the same layout as the result. + // For vector::InsertStridedSliceOp, infer source layout from result + // layout. Dest vector must have the same layout as the result. if (auto insertSlice = dyn_cast(op)) { if (idx == 0) { return xegpu::inferInsertStridedSliceSourceLayout( @@ -1907,14 +2642,17 @@ xegpu::DistributeLayoutAttr xegpu::inferSourceLayoutFromResultForNonAnchorOp( if (dyn_cast(op)) return resLayout; - // For elementwise operations, all operands must have the same layout as the - // result. + // For elementwise operations, all operands must have the same layout as + // the result. if (OpTrait::hasElementwiseMappableTraits(op) && op->getNumResults() == 1) return resLayout; return nullptr; } +/// Returns the layout required on `operand`: anchor ops report their declared +/// per-operand layout directly; non-anchor ops back-derive it from their result +/// layout via inferSourceLayoutFromResultForNonAnchorOp. xegpu::DistributeLayoutAttr xegpu::getConsumerLayoutAt(OpOperand &operand) { Operation *op = operand.getOwner(); // Anchor ops declare the layout they diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp index 6a37ae6502b2d..8600492e4bf41 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPropagateLayout.cpp @@ -130,48 +130,6 @@ struct LayoutInfo { void set(const xegpu::DistributeLayoutAttr &layout) { storage = layout; } }; -SmallVector LayoutInfo::getLaneLayout() const { - if (!isAssigned()) - return {}; - return llvm::map_to_vector(storage.getEffectiveLaneLayoutAsInt(), - [](int64_t val) { return static_cast(val); }); -} - -SmallVector LayoutInfo::getLaneData() const { - if (!isAssigned()) - return {}; - return llvm::map_to_vector(storage.getEffectiveLaneDataAsInt(), - [](int64_t val) { return static_cast(val); }); -} - -SmallVector LayoutInfo::getInstData() const { - if (!isAssigned()) - return {}; - return llvm::map_to_vector(storage.getEffectiveInstDataAsInt(), - [](int64_t val) { return static_cast(val); }); -} - -SmallVector LayoutInfo::getSgLayout() const { - if (!isAssigned()) - return {}; - return llvm::map_to_vector(storage.getEffectiveSgLayoutAsInt(), - [](int64_t val) { return static_cast(val); }); -} - -SmallVector LayoutInfo::getSgData() const { - if (!isAssigned()) - return {}; - return llvm::map_to_vector(storage.getEffectiveSgDataAsInt(), - [](int64_t val) { return static_cast(val); }); -} - -SmallVector LayoutInfo::getOrder() const { - if (!isAssigned() || !storage.getOrder()) - return {}; - return llvm::map_to_vector(storage.getOrder().asArrayRef(), - [](int64_t val) { return static_cast(val); }); -} - void LayoutInfo::print(raw_ostream &os) const { if (isAssigned()) { os << storage; @@ -191,64 +149,6 @@ LayoutInfo LayoutInfo::join(const LayoutInfo &lhs, const LayoutInfo &rhs) { llvm_unreachable("Join should not be triggered by layout propagation."); } -/// Construct a new layout with the transposed inst_data or lane_layout, -/// lane_data. -LayoutInfo LayoutInfo::transpose(ArrayRef permutation) const { - if (!isAssigned()) - return {}; - // Check if the permutation is valid. - llvm::SmallSet seen(permutation.begin(), permutation.end()); - bool hasDuplicates = seen.size() != permutation.size(); - bool withinRange = llvm::all_of(permutation, [&](int64_t idx) { - return idx >= 0 && idx < static_cast(permutation.size()); - }); - - if (!withinRange || hasDuplicates) { - assert(false && "Invalid permutation for transpose."); - return {}; - } - - SmallVector laneLayout; - SmallVector laneData; - SmallVector instData; - SmallVector sgLayout; - SmallVector sgData; - SmallVector order; - - for (int64_t idx : permutation) { - if (getLaneLayout().size()) { - laneLayout.push_back(static_cast(getLaneLayout()[idx])); - laneData.push_back(static_cast(getLaneData()[idx])); - } - if (getInstData().size()) - instData.push_back(static_cast(getInstData()[idx])); - if (getSgData().size()) { - sgLayout.push_back(static_cast(getSgLayout()[idx])); - sgData.push_back(static_cast(getSgData()[idx])); - } - if (getOrder().size()) { - order.push_back(static_cast(getOrder()[idx])); - } - } - auto orderAttr = order.size() - ? DenseI32ArrayAttr::get(storage.getContext(), order) - : nullptr; - xegpu::LayoutAttr layoutAttr; - if (getLaneLayout().size()) - layoutAttr = - xegpu::LayoutAttr::get(storage.getContext(), laneLayout, laneData); - if (getInstData().size()) - layoutAttr = xegpu::LayoutAttr::get(storage.getContext(), instData); - if (getSgData().size()) - layoutAttr = xegpu::LayoutAttr::get( - storage.getContext(), - DenseI32ArrayAttr::get(storage.getContext(), sgLayout), - DenseI32ArrayAttr::get(storage.getContext(), sgData), - /*inst_data =*/nullptr, /*lane_layout =*/nullptr, - /*lane_data =*/nullptr, orderAttr); - return LayoutInfo(layoutAttr); -} - //===----------------------------------------------------------------------===// // LayoutInfoLattice //===----------------------------------------------------------------------===// @@ -259,57 +159,6 @@ struct LayoutInfoLattice : public Lattice { using Lattice::Lattice; }; -/// Helper Functions to get default layouts. A `default layout` is a layout that -/// is assigned to a value when the layout is not fixed by some anchor operation -/// (like DPAS). - -/// Helper Function to get the default layout for uniform values like constants. -/// For 1D vector, lane_layout is [subgroupSize] and lane_data is [1]. -/// For 2D vector, lane_layout is [1, subgroupSize] and lane_data is [1, 1]. -/// For ND vector (N>2), leading dims get unit lane_layout and lane_data. -static LayoutInfo getDefaultSIMTLayoutInfo(mlir::MLIRContext *ctx, - unsigned rank, - const xegpu::uArch::uArch *uArch) { - assert(rank >= 1 && "Expected at least 1D vector."); - if (rank == 1) { - return LayoutInfo( - xegpu::LayoutAttr::get(ctx, {uArch->getSubgroupSize()}, {1})); - } - // For rank >= 2, lane_layout is [1, ..., 1, subgroupSize] and - // lane_data is [1, ..., 1, 1]. - SmallVector laneLayout(rank, 1); - SmallVector laneData(rank, 1); - laneLayout[rank - 1] = uArch->getSubgroupSize(); - return LayoutInfo(xegpu::LayoutAttr::get(ctx, laneLayout, laneData)); -} - -/// Helper to get the default layout for 2D block operations. -/// For ND (N>2) types, leading dimensions get unit layout/data values. -template -static LayoutInfo getSIMTLayoutInfoBlockIO(Ty ty, - const xegpu::uArch::uArch *uArch, - unsigned packingSize) { - // Expecting at least 1D. - assert(ty.getRank() >= 1 && "Expected at least 1D vector."); - // Expecting int or float element type. - assert(ty.getElementType().isIntOrFloat() && - "Expected int or float element type."); - // If the rank is 1, then return default layout for 1D vector. - if (ty.getRank() == 1) - return getDefaultSIMTLayoutInfo(ty.getContext(), 1, uArch); - // Packing factor is determined by the element type bitwidth. - unsigned bitwidth = ty.getElementType().getIntOrFloatBitWidth(); - int packingFactor = bitwidth < packingSize ? packingSize / bitwidth : 1; - // For rank >= 2, distribute along the last dimension with leading units. - unsigned rank = ty.getRank(); - SmallVector laneLayout(rank, 1); - SmallVector laneData(rank, 1); - laneLayout[rank - 1] = uArch->getSubgroupSize(); - laneData[rank - 1] = packingFactor; - return LayoutInfo( - xegpu::LayoutAttr::get(ty.getContext(), laneLayout, laneData)); -} - //===----------------------------------------------------------------------===// // LayoutInfoPropagation //===----------------------------------------------------------------------===// @@ -549,9 +398,9 @@ bool LayoutInfoPropagation::hasParamsOfLayoutKind( // wgShape = [128, 64], instData = [8, 16], sgCount = 32 // Returns layouts: // [(8,4), (16,2)], which correspond to sgData [16,16] and [8,32]. -SmallVector> getValidLayouts(ArrayRef wgShape, - ArrayRef instData, - int64_t sgCount) { +SmallVector> +getSgLayoutCandidates(ArrayRef wgShape, ArrayRef instData, + int64_t sgCount) { SmallVector> candidates; for (int sgLayout0 = 1; sgLayout0 <= sgCount; ++sgLayout0) { if (sgCount % sgLayout0) @@ -578,7 +427,15 @@ SmallVector> getValidLayouts(ArrayRef wgShape, return candidates; } -FailureOr getNumSg(Operation *op, const int sgSize) { +FailureOr +getNumSg(Operation *op, const int sgSize, + xegpu::DistributeLayoutAttr consumerLayout = nullptr) { + // first look for the number of subgroups required by the consumer layout + if (consumerLayout) { + auto sgLayout = consumerLayout.getEffectiveSgLayoutAsInt(); + if (!sgLayout.empty()) + return llvm::product_of(sgLayout); + } // Oblivious to workitem layout, the total count matters. auto gpuFunc = op->getParentOfType(); if (!gpuFunc) @@ -595,53 +452,48 @@ void LayoutInfoPropagation::visitPrefetchNdOp( ArrayRef results) { LayoutInfo prefetchLayout; + const uArch *uArch = getUArch(getChipStr(prefetch).value_or("")); + if (!uArch) + return; xegpu::DistributeLayoutAttr anchorLayout = prefetch.getLayoutAttr(); if (hasParamsOfLayoutKind(anchorLayout)) { prefetchLayout = LayoutInfo(anchorLayout); + if (layoutKind == xegpu::LayoutKind::InstData) { + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockPrefetch)); + if (!uArchInstruction) + return; + auto completed = xegpu::completeBlockStoreLaneLayoutFromInstData( + anchorLayout, prefetch.getTensorDescType().getElementType(), + uArchInstruction, uArch->getSubgroupSize()); + if (!completed) { + prefetch.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + prefetch.setLayoutAttr(*completed); + prefetchLayout = LayoutInfo(*completed); + } } else { - // Here we assign the default layout to the tensor descriptor operand of - // prefetch. auto tdescTy = prefetch.getTensorDescType(); - - const uArch *uArch = getUArch(getChipStr(prefetch).value_or("")); - if (!uArch) - return; - const auto *uArchInstruction = - dyn_cast( - uArch->getInstruction( - xegpu::uArch::InstructionKind::Subgroup2DBlockPrefetch)); - - auto blockWHC = - uArchInstruction->getBlockWidthHeightCount(tdescTy.getElementType()); - if (!blockWHC) - prefetch.emitWarning("No known block params found for the element type."); - auto [bWidth, bHeight, bCount] = blockWHC.value(); - SmallVector instData; - int instWidth = xegpu::getLargestDivisor( - static_cast(tdescTy.getDimSize(tdescTy.getRank() - 1)), bWidth); - if (instWidth == -1) + auto numSgOrErr = getNumSg(prefetch, uArch->getSubgroupSize()); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { prefetch.emitWarning( - "No suitable instruction multiple found for the given shape."); - if (tdescTy.getRank() == 1) - instData = {instWidth}; - else { - int instHeight = xegpu::getLargestDivisor( - static_cast(tdescTy.getDimSize(tdescTy.getRank() - 2)), bHeight); - if (instHeight == -1) - prefetch.emitWarning( - "No suitable instruction multiple found for the given shape."); - instData = {instHeight, instWidth}; + "Unable to determine the number of subgroups for the operation."); + return; } - if (layoutKind == xegpu::LayoutKind::InstData) - prefetchLayout = - LayoutInfo(xegpu::LayoutAttr::get(tdescTy.getContext(), instData)); - else - prefetchLayout = getSIMTLayoutInfoBlockIO( - tdescTy, uArch, uArchInstruction->getPackedFormatBitSize()); - - prefetch.setLayoutAttr( - dyn_cast(prefetchLayout.get())); + auto layoutAttr = xegpu::setupPrefetchNdAnchorLayout( + layoutKind, tdescTy, numSgOrErr.value_or(0), uArch); + if (!layoutAttr) { + prefetch.emitWarning( + "Failed to determine required layout for prefetch_nd."); + return; + } + prefetchLayout = LayoutInfo(layoutAttr); + prefetch.setLayoutAttr(layoutAttr); } // Propagate the layout to the source tensor descriptor. propagateIfChanged(operands[0], operands[0]->meet(prefetchLayout)); @@ -669,11 +521,13 @@ void LayoutInfoPropagation::visitVectorMultiReductionOp( const uArch *uArch = getUArch(xegpu::getChipStr(reduction).value_or("")); if (!uArch) return; - int numSg = 0; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - auto numSgOrErr = getNumSg(reduction, uArch->getSubgroupSize()); - if (succeeded(numSgOrErr)) - numSg = numSgOrErr.value(); + + auto numSgOrErr = + getNumSg(reduction, uArch->getSubgroupSize(), consumerLayoutAttr); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { + reduction.emitWarning( + "Unable to determine the number of subgroups for the operation."); + return; } // The result layout represents the layout requirements of the operation. @@ -682,7 +536,8 @@ void LayoutInfoPropagation::visitVectorMultiReductionOp( // propagated from consumer op, the conflict is resolved in later phase by // converting the required result layout to the consumer layout auto requiredResLayoutAttr = xegpu::setupMultiReductionResultLayout( - layoutKind, sourceTy, consumerLayoutAttr, reductionDims, numSg, uArch); + layoutKind, sourceTy, consumerLayoutAttr, reductionDims, + numSgOrErr.value_or(0), uArch); xegpu::setTemporaryLayout(reduction->getResult(0), requiredResLayoutAttr); @@ -757,6 +612,13 @@ void LayoutInfoPropagation::visitShapeCastOp( xegpu::DistributeLayoutAttr srcLayoutAttr = xegpu::inferShapeCastSourceLayout(resultLayoutAttr, resShape, srcShape); + // TODO: turn this into a real pass failure once propagation failures are + // wired to signalPassFailure(). + if (!srcLayoutAttr) { + shapeCast.emitWarning("Failed to infer source layout for shape_cast; " + "unsupported shape-cast pattern."); + return; + } propagateIfChanged(operands[0], operands[0]->meet(LayoutInfo(srcLayoutAttr))); } @@ -769,6 +631,13 @@ void LayoutInfoPropagation::visitDpasOp( LayoutInfo dpasBLayout; LayoutInfo dpasCDLayout; + const uArch *uArch = getUArch(getChipStr(dpas).value_or("")); + if (!uArch) + return; + VectorType aTy = dpas.getLhsType(); + VectorType bTy = dpas.getRhsType(); + VectorType cdTy = dpas.getResultType(); + xegpu::DistributeLayoutAttr anchorLayoutCD = dpas.getLayoutCdAttr(); if (hasParamsOfLayoutKind(anchorLayoutCD)) { xegpu::DistributeLayoutAttr anchorLayoutA = dpas.getLayoutAAttr(); @@ -780,35 +649,45 @@ void LayoutInfoPropagation::visitDpasOp( dpasALayout = LayoutInfo(anchorLayoutA); dpasBLayout = LayoutInfo(anchorLayoutB); dpasCDLayout = LayoutInfo(anchorLayoutCD); + if (layoutKind == xegpu::LayoutKind::InstData) { + auto completed = xegpu::completeDpasLaneLayoutFromInstData( + anchorLayoutA, anchorLayoutB, anchorLayoutCD, aTy, bTy, cdTy, uArch); + if (!completed) { + dpas.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + auto [completedA, completedB, completedCD] = *completed; + dpas.setLayoutAAttr(completedA); + dpas.setLayoutBAttr(completedB); + dpas.setLayoutCdAttr(completedCD); + dpasALayout = LayoutInfo(completedA); + dpasBLayout = LayoutInfo(completedB); + dpasCDLayout = LayoutInfo(completedCD); + } } else { - const uArch *uArch = getUArch(getChipStr(dpas).value_or("")); - if (!uArch) - return; - VectorType aTy = dpas.getLhsType(); - VectorType bTy = dpas.getRhsType(); - VectorType cdTy = dpas.getResultType(); xegpu::DistributeLayoutAttr consumerLayoutAttr = nullptr; xegpu::DistributeLayoutAttr requiredCDLayoutAttr, requiredALayout, requiredBLayout; - int numSg = 0; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - LayoutInfo consumerLayout = results[0]->getValue(); - if (!consumerLayout.isAssigned()) - return; - consumerLayoutAttr = - dyn_cast(consumerLayout.get()); - auto numSgOrErr = getNumSg(dpas, uArch->getSubgroupSize()); - if (failed(numSgOrErr)) { - dpas.emitWarning( - "Unable to determine the number of subgroups for the operation."); - return; - } - numSg = numSgOrErr.value(); + LayoutInfo consumerLayout = results[0]->getValue(); + if (!consumerLayout.isAssigned()) + return; + consumerLayoutAttr = + dyn_cast(consumerLayout.get()); + + auto numSgOrErr = + getNumSg(dpas, uArch->getSubgroupSize(), consumerLayoutAttr); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { + dpas.emitWarning( + "Unable to determine the number of subgroups for the operation."); + return; } - auto layouts = xegpu::setupDpasLayout(layoutKind, aTy, bTy, cdTy, - consumerLayoutAttr, numSg, uArch); + + auto layouts = + xegpu::setupDpasLayout(layoutKind, aTy, bTy, cdTy, consumerLayoutAttr, + numSgOrErr.value_or(0), uArch); if (!layouts.has_value()) { dpas.emitWarning( "Failed to determine required layouts for DPAS operands."); @@ -846,6 +725,24 @@ void LayoutInfoPropagation::visitDpasMxOp( xegpu::DistributeLayoutAttr anchorLayoutB = dpasMx.getLayoutBAttr(); xegpu::DistributeLayoutAttr anchorLayoutCD = dpasMx.getLayoutCdAttr(); + const uArch *uArch = getUArch(getChipStr(dpasMx).value_or("")); + if (!uArch) + return; + + VectorType aTy = dpasMx.getAType(); + VectorType bTy = dpasMx.getBType(); + VectorType cdTy = dpasMx.getResultType(); + + // Get scale types if present + VectorType aScaleTy; + VectorType bScaleTy; + Value scaleA = dpasMx.getScaleA(); + Value scaleB = dpasMx.getScaleB(); + if (scaleA) + aScaleTy = dyn_cast(scaleA.getType()); + if (scaleB) + bScaleTy = dyn_cast(scaleB.getType()); + // Check if all layouts are already set if (anchorLayoutA && anchorLayoutB && anchorLayoutCD && hasParamsOfLayoutKind(anchorLayoutA) && @@ -864,49 +761,55 @@ void LayoutInfoPropagation::visitDpasMxOp( dpasMxAScaleLayout = LayoutInfo(anchorLayoutAScale); if (anchorLayoutBScale) dpasMxBScaleLayout = LayoutInfo(anchorLayoutBScale); - } else { - // Need to compute layouts - const uArch *uArch = getUArch(getChipStr(dpasMx).value_or("")); - if (!uArch) - return; - - VectorType aTy = dpasMx.getAType(); - VectorType bTy = dpasMx.getBType(); - VectorType cdTy = dpasMx.getResultType(); - - // Get scale types if present - VectorType aScaleTy; - VectorType bScaleTy; - Value scaleA = dpasMx.getScaleA(); - Value scaleB = dpasMx.getScaleB(); - if (scaleA) - aScaleTy = dyn_cast(scaleA.getType()); - if (scaleB) - bScaleTy = dyn_cast(scaleB.getType()); + if (layoutKind == xegpu::LayoutKind::InstData) { + auto completed = xegpu::completeDpasMxLaneLayoutFromInstData( + anchorLayoutA, anchorLayoutB, anchorLayoutCD, aTy, bTy, cdTy, + aScaleTy, bScaleTy, uArch); + if (!completed) { + dpasMx.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + auto [completedA, completedB, completedCD, completedAScale, + completedBScale] = *completed; + dpasMx.setLayoutAAttr(completedA); + dpasMx.setLayoutBAttr(completedB); + dpasMx.setLayoutCdAttr(completedCD); + dpasMxALayout = LayoutInfo(completedA); + dpasMxBLayout = LayoutInfo(completedB); + dpasMxCDLayout = LayoutInfo(completedCD); + if (completedAScale) { + dpasMx.setLayoutAScaleAttr(completedAScale); + dpasMxAScaleLayout = LayoutInfo(completedAScale); + } + if (completedBScale) { + dpasMx.setLayoutBScaleAttr(completedBScale); + dpasMxBScaleLayout = LayoutInfo(completedBScale); + } + } + } else { xegpu::DistributeLayoutAttr consumerLayoutAttr = nullptr; xegpu::DistributeLayoutAttr requiredCDLayoutAttr, requiredALayout, requiredBLayout, requiredAScaleLayout, requiredBScaleLayout; - int numSg = 0; - if (layoutKind == xegpu::LayoutKind::Subgroup) { - LayoutInfo consumerLayout = results[0]->getValue(); - if (!consumerLayout.isAssigned()) - return; - consumerLayoutAttr = - dyn_cast(consumerLayout.get()); - auto numSgOrErr = getNumSg(dpasMx, uArch->getSubgroupSize()); - if (failed(numSgOrErr)) { - dpasMx.emitWarning( - "Unable to determine the number of subgroups for the operation."); - return; - } - numSg = numSgOrErr.value(); + LayoutInfo consumerLayout = results[0]->getValue(); + if (!consumerLayout.isAssigned()) + return; + consumerLayoutAttr = + dyn_cast(consumerLayout.get()); + + auto numSgOrErr = + getNumSg(dpasMx, uArch->getSubgroupSize(), consumerLayoutAttr); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { + dpasMx.emitWarning( + "Unable to determine the number of subgroups for the operation."); + return; } - auto layouts = - xegpu::setupDpasMxLayout(layoutKind, aTy, bTy, cdTy, aScaleTy, bScaleTy, - consumerLayoutAttr, numSg, uArch); + auto layouts = xegpu::setupDpasMxLayout( + layoutKind, aTy, bTy, cdTy, aScaleTy, bScaleTy, consumerLayoutAttr, + numSgOrErr.value_or(0), uArch); if (!layouts.has_value()) { dpasMx.emitWarning( "Failed to determine required layouts for DPAS_MX operands."); @@ -963,78 +866,47 @@ void LayoutInfoPropagation::visitStoreNdOp( xegpu::StoreNdOp store, ArrayRef operands, ArrayRef results) { LayoutInfo storeLayout; + const uArch *uArch = getUArch(getChipStr(store).value_or("")); + if (!uArch) + return; xegpu::DistributeLayoutAttr anchorLayout = store.getLayoutAttr(); if (hasParamsOfLayoutKind(anchorLayout)) { storeLayout = LayoutInfo(anchorLayout); - } else { - const uArch *uArch = getUArch(getChipStr(store).value_or("")); - if (!uArch) - return; - const auto *uArchInstruction = - dyn_cast( - uArch->getInstruction( - xegpu::uArch::InstructionKind::Subgroup2DBlockStore)); - VectorType dataTy = store.getValueType(); - auto blockWHC = uArchInstruction->getBlockWidthHeightCount( - store.getValueType().getElementType()); - if (!blockWHC) - store.emitWarning("No known block params found for the element type."); - auto [bWidth, bHeight, bCount] = blockWHC.value(); - // Default to 1 for any leading batch dims; rank-1 and rank>=2 cases - // overwrite the trailing entries below. - SmallVector instData(dataTy.getRank(), 1); - int instWidth = xegpu::getLargestDivisor( - static_cast(dataTy.getDimSize(dataTy.getRank() - 1)), bWidth); - if (instWidth == -1) - store.emitWarning( - "No suitable instruction multiple found for the given shape."); - if (dataTy.getRank() == 1) { - instData = {instWidth}; - } else { - int instHeight = xegpu::getLargestDivisor( - static_cast(dataTy.getDimSize(dataTy.getRank() - 2)), bHeight); - if (instHeight == -1) - store.emitWarning( - "No suitable instruction multiple found for the given shape."); - instData[dataTy.getRank() - 2] = instHeight; - instData[dataTy.getRank() - 1] = instWidth; - } + if (layoutKind == xegpu::LayoutKind::InstData) { - if (layoutKind == xegpu::LayoutKind::InstData) - storeLayout = - LayoutInfo(xegpu::LayoutAttr::get(dataTy.getContext(), instData)); - else if (layoutKind == xegpu::LayoutKind::Lane) - storeLayout = - getSIMTLayoutInfoBlockIO(store.getValueType(), uArch, - uArchInstruction->getPackedFormatBitSize()); - else { // xegpu::LayoutKind::Subgroup - auto sgSize = uArch->getSubgroupSize(); - auto numSgOrErr = getNumSg(store, sgSize); - if (failed(numSgOrErr)) { - store.emitWarning( - "Unable to determine the number of subgroups for the operation."); + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockStore)); + if (!uArchInstruction) return; - } - auto sgLayouts = getValidLayouts(store.getValueType().getShape(), - instData, numSgOrErr.value()); - if (sgLayouts.empty()) { + auto completed = xegpu::completeBlockStoreLaneLayoutFromInstData( + anchorLayout, store.getValueType().getElementType(), uArchInstruction, + uArch->getSubgroupSize()); + if (!completed) { store.emitWarning( - "Unable to determine suitable subgroup layout for store value."); + "Failed to identify lane layouts for the specified inst_data."); return; } - SmallVector sgLayout = {sgLayouts[0].first, sgLayouts[0].second}; - SmallVector sgData = { - static_cast(dataTy.getShape()[0]) / sgLayout[0], - static_cast(dataTy.getShape()[1]) / sgLayout[1]}; - storeLayout = LayoutInfo(xegpu::LayoutAttr::get( - dataTy.getContext(), - DenseI32ArrayAttr::get(dataTy.getContext(), sgLayout), - DenseI32ArrayAttr::get(dataTy.getContext(), sgData), - /*inst_data =*/nullptr, /*lane_layout =*/nullptr, - /*lane_data =*/nullptr, /*order =*/nullptr)); + store.setLayoutAttr(*completed); + storeLayout = LayoutInfo(*completed); } - store.setLayoutAttr( - dyn_cast(storeLayout.get())); + } else { + auto numSgOrErr = getNumSg(store, uArch->getSubgroupSize()); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { + store.emitWarning( + "Unable to determine the number of subgroups for the operation."); + return; + } + + auto layoutAttr = xegpu::setupStoreNdAnchorLayout( + layoutKind, store.getValueType(), numSgOrErr.value_or(0), uArch); + if (!layoutAttr) { + store.emitWarning("Failed to determine required layout for store_nd."); + return; + } + storeLayout = LayoutInfo(layoutAttr); + store.setLayoutAttr(layoutAttr); } // Propagate the layout to the value operand. // Both operands should have the same layout @@ -1048,25 +920,54 @@ void LayoutInfoPropagation::visitLoadNdOp( xegpu::LoadNdOp load, ArrayRef operands, ArrayRef results) { LayoutInfo loadLayout; + + const uArch *uArch = getUArch(getChipStr(load).value_or("")); + if (!uArch) + return; + LayoutInfo valueLayout = results[0]->getValue(); + if (!valueLayout.isAssigned()) + return; + auto consumerLayoutAttr = + dyn_cast(valueLayout.get()); xegpu::DistributeLayoutAttr anchorLayout = load.getLayoutAttr(); if (hasParamsOfLayoutKind(anchorLayout)) { loadLayout = LayoutInfo(anchorLayout); + if (layoutKind == xegpu::LayoutKind::InstData && + !consumerLayoutAttr.getEffectiveLaneLayoutAsInt().empty()) { + const auto *uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::Subgroup2DBlockLoad)); + if (!uArchInstruction) + return; + auto completed = xegpu::completeBlockLoadLaneLayoutFromInstData( + anchorLayout, consumerLayoutAttr, load.getType().getElementType(), + uArchInstruction, uArch->getSubgroupSize()); + if (!completed) { + load.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + load.setLayoutAttr(*completed); + loadLayout = LayoutInfo(*completed); + } } else { - - LayoutInfo valueLayout = results[0]->getValue(); - // Need the layout of the value to propagate to the tensor descriptor. - if (!valueLayout.isAssigned()) + auto numSgOrErr = + getNumSg(load, uArch->getSubgroupSize(), consumerLayoutAttr); + if (layoutKind == xegpu::LayoutKind::Subgroup && failed(numSgOrErr)) { + load.emitWarning( + "Unable to determine the number of subgroups for the operation."); return; - loadLayout = valueLayout; - // LoadNdOp has the transpose effect. However, at the stage of this analysis - // this effect is not expected and should be abstracted away. Emit a - // warning. - if (auto transpose = load.getTranspose()) { - load.emitWarning("Transpose effect is not expected for LoadNdOp at " - "LayoutInfoPropagation stage."); - loadLayout = valueLayout.transpose(transpose.value()); } - load.setLayoutAttr(dyn_cast(loadLayout.get())); + auto layoutAttr = xegpu::setupLoadNdAnchorLayout( + layoutKind, load.getType(), consumerLayoutAttr, numSgOrErr.value_or(0), + uArch); + if (!layoutAttr) { + load.emitWarning("Failed to determine required layout for load_nd."); + return; + } + loadLayout = LayoutInfo(layoutAttr); + load.setLayoutAttr(layoutAttr); } // Propagate the new layout to the tensor descriptor operand. propagateIfChanged(operands[0], operands[0]->meet(loadLayout)); @@ -1077,6 +978,49 @@ void LayoutInfoPropagation::visitLoadNdOp( void LayoutInfoPropagation::visitConvertLayoutOp( xegpu::ConvertLayoutOp convert, ArrayRef operands, ArrayRef results) { + + LayoutInfo resultLayout = results[0]->getValue(); + + // TODO: fix if one of the layouts is a slice layout + auto targetLayoutAttr = + dyn_cast(convert.getTargetLayoutAttr()); + auto inputLayoutAttr = + dyn_cast(convert.getInputLayoutAttr()); + + // The result's propagated layout is authoritative for the converted value. + // Fill the lane_layout / lane_data / order parameters the target_layout is + // missing from it (sg_layout / sg_data / inst_data are left as-is), so the + // target stays consistent with what is actually propagated downstream. + auto resultLayoutAttr = resultLayout.isAssigned() + ? dyn_cast(resultLayout.get()) + : nullptr; + if (resultLayoutAttr && targetLayoutAttr) { + if (layoutKind == xegpu::LayoutKind::InstData && + !targetLayoutAttr.getLaneLayout()) { + targetLayoutAttr = xegpu::LayoutAttr::get( + convert.getContext(), targetLayoutAttr.getSgLayout(), + targetLayoutAttr.getSgData(), targetLayoutAttr.getInstData(), + resultLayoutAttr.getLaneLayout(), resultLayoutAttr.getLaneData(), + resultLayoutAttr.getOrder()); + convert.setTargetLayoutAttr(targetLayoutAttr); + } + } + + // Fill only the lane_layout / lane_data / order parameters the input_layout + // is missing from the target_layout (sg_layout / sg_data / inst_data are left + // as-is), so the producer side receives a fully-populated lane layout. + if (inputLayoutAttr && targetLayoutAttr) { + if (layoutKind == xegpu::LayoutKind::InstData && + !inputLayoutAttr.getLaneLayout()) { + auto merged = xegpu::LayoutAttr::get( + convert.getContext(), inputLayoutAttr.getSgLayout(), + inputLayoutAttr.getSgData(), inputLayoutAttr.getInstData(), + targetLayoutAttr.getLaneLayout(), targetLayoutAttr.getLaneData(), + targetLayoutAttr.getOrder()); + convert.setInputLayoutAttr(merged); + } + } + xegpu::DistributeLayoutAttr anchorLayout = convert.getInputLayoutAttr(); LayoutInfo convertLayout(anchorLayout); // Propagate the new layout to the tensor descriptor operand. @@ -1135,9 +1079,9 @@ void LayoutInfoPropagation::visitVectorBitcastOp( propagateIfChanged(operands[0], operands[0]->meet(LayoutInfo(srcLayoutAttr))); } -/// For vector::InterleaveOp, the result has double the innermost dimension size -/// compared to each source operand. The layout is propagated from result to -/// sources, adjusting for the 2x size increase. +/// For vector::InterleaveOp, the result has double the innermost dimension +/// size compared to each source operand. The layout is propagated from result +/// to sources, adjusting for the 2x size increase. void LayoutInfoPropagation::visitVectorInterleaveOp( vector::InterleaveOp interleave, ArrayRef operands, ArrayRef results) { @@ -1185,8 +1129,8 @@ void LayoutInfoPropagation::visitVectorDeinterleaveOp( auto consumerLayoutAttr = dyn_cast(resLayoutInfo.get()); - // Derive the source layout from the result layout (double the innermost dim) - // No setup function needed - just infer directly + // Derive the source layout from the result layout (double the innermost + // dim) No setup function needed - just infer directly auto srcLayoutAttr = xegpu::inferDeinterleaveSourceLayout(consumerLayoutAttr); propagateIfChanged(operands[0], operands[0]->meet(LayoutInfo(srcLayoutAttr))); @@ -1223,8 +1167,8 @@ void LayoutInfoPropagation::visitInsertStridedSliceOp( operands[1]->meet(LayoutInfo(requiredResLayoutAttr))); } -/// Propagate the layout of the result to the tensor descriptor, mask and offset -/// operands in LoadGatherOp. +/// Propagate the layout of the result to the tensor descriptor, mask and +/// offset operands in LoadGatherOp. void LayoutInfoPropagation::visitLoadGatherOp( xegpu::LoadGatherOp load, ArrayRef operands, ArrayRef results) { @@ -1244,6 +1188,24 @@ void LayoutInfoPropagation::visitLoadGatherOp( if (hasParamsOfLayoutKind(anchorLayoutAttr)) { requiredAnchorLayoutAttr = anchorLayoutAttr; + if (layoutKind == xegpu::LayoutKind::InstData && + !consumerLayoutAttr.getEffectiveLaneLayoutAsInt().empty()) { + const auto uArchInstruction = + dyn_cast( + uArch->getInstruction(xegpu::uArch::InstructionKind::LoadGather)); + if (!uArchInstruction) + return; + auto completed = xegpu::completeScatterLoadLaneLayoutFromInstData( + anchorLayoutAttr, consumerLayoutAttr, resVecTy.getElementType(), + uArchInstruction, uArch->getSubgroupSize()); + if (!completed) { + load.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + requiredAnchorLayoutAttr = *completed; + load.setLayoutAttr(requiredAnchorLayoutAttr); + } } else { if (!resVecTy) { load.emitWarning("Not propagating, non-vector payload supplied."); @@ -1268,8 +1230,8 @@ void LayoutInfoPropagation::visitLoadGatherOp( propagateIfChanged(operands[2], operands[2]->meet(maskLayoutInfo)); } -/// Set the layout for the value, tensor descriptor, offset and mask operands in -/// the StoreScatterOp. +/// Set the layout for the value, tensor descriptor, offset and mask operands +/// in the StoreScatterOp. void LayoutInfoPropagation::visitStoreScatterOp( xegpu::StoreScatterOp storeScatter, ArrayRef operands, ArrayRef results) { @@ -1284,6 +1246,24 @@ void LayoutInfoPropagation::visitStoreScatterOp( if (hasParamsOfLayoutKind(anchorLayoutAttr)) { requiredAnchorLayoutAttr = anchorLayoutAttr; + if (layoutKind == xegpu::LayoutKind::InstData) { + const auto uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::StoreScatter)); + if (!uArchInstruction) + return; + auto completed = xegpu::completeScatterStoreLaneLayoutFromInstData( + anchorLayoutAttr, srcVecTy.getElementType(), uArchInstruction, + uArch->getSubgroupSize()); + if (!completed) { + storeScatter.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + requiredAnchorLayoutAttr = *completed; + storeScatter.setLayoutAttr(requiredAnchorLayoutAttr); + } } else { if (!srcVecTy) { storeScatter.emitWarning("Not propagating, non-vector payload supplied."); @@ -1331,8 +1311,10 @@ void LayoutInfoPropagation::visitLoadMatrixOp( const uArch *uArch = getUArch(getChipStr(loadMatrixOp).value_or("")); if (!uArch) return; + int chunkSize = + 1; // placeHolder for future use when LoadMatrix supports coalescing auto requiredAnchorLayoutAttr = xegpu::setupLoadMatrixAnchorLayout( - layoutKind, resVecTy, consumerLayoutAttr, uArch); + layoutKind, resVecTy, chunkSize, consumerLayoutAttr, uArch); loadMatrixOp.setLayoutAttr(requiredAnchorLayoutAttr); } } @@ -1340,22 +1322,41 @@ void LayoutInfoPropagation::visitLoadMatrixOp( void LayoutInfoPropagation::visitStoreMatrixOp( xegpu::StoreMatrixOp storeMatrix, ArrayRef operands, ArrayRef results) { - xegpu::DistributeLayoutAttr anchorLayout = storeMatrix.getLayoutAttr(); + xegpu::DistributeLayoutAttr requiredAnchorLayoutAttr; + xegpu::DistributeLayoutAttr anchorLayoutAttr = storeMatrix.getLayoutAttr(); LayoutInfo layout; - if (hasParamsOfLayoutKind(anchorLayout)) { - layout = LayoutInfo(anchorLayout); + VectorType srcVecTy = llvm::cast(storeMatrix.getData().getType()); + const uArch *uArch = getUArch(getChipStr(storeMatrix).value_or("")); + if (!uArch) + return; + if (hasParamsOfLayoutKind(anchorLayoutAttr)) { + requiredAnchorLayoutAttr = anchorLayoutAttr; + if (layoutKind == xegpu::LayoutKind::InstData) { + const auto uArchInstruction = + dyn_cast( + uArch->getInstruction( + xegpu::uArch::InstructionKind::StoreScatter)); + if (!uArchInstruction) + return; + auto completed = xegpu::completeScatterStoreLaneLayoutFromInstData( + anchorLayoutAttr, srcVecTy.getElementType(), uArchInstruction, + uArch->getSubgroupSize()); + if (!completed) { + storeMatrix.emitWarning( + "Failed to identify lane layouts for the specified inst_data."); + return; + } + requiredAnchorLayoutAttr = *completed; + storeMatrix.setLayoutAttr(requiredAnchorLayoutAttr); + } } else { - VectorType srcVecTy = - llvm::cast(storeMatrix.getData().getType()); - const uArch *uArch = getUArch(getChipStr(storeMatrix).value_or("")); - if (!uArch) - return; - auto requiredAnchorLayoutAttr = - xegpu::setupStoreMatrixAnchorLayout(layoutKind, srcVecTy, uArch); + int chunkSize = + 1; // placeHolder for future use when StoreMatrix supports coalescing + requiredAnchorLayoutAttr = xegpu::setupStoreMatrixAnchorLayout( + layoutKind, srcVecTy, chunkSize, uArch); storeMatrix.setLayoutAttr(requiredAnchorLayoutAttr); - layout = LayoutInfo(requiredAnchorLayoutAttr); } - + layout = LayoutInfo(requiredAnchorLayoutAttr); propagateIfChanged(operands[0], operands[0]->meet(layout)); } @@ -1451,9 +1452,9 @@ namespace { // ResolveLayoutConflicts //===----------------------------------------------------------------------===// -/// Helper to get the defining CreateNdDescOp of a tensor descriptor value. This -/// function tries to find the defining CreateNdDescOp recursively accross -/// control-flow boundaries. +/// Helper to get the defining CreateNdDescOp of a tensor descriptor value. +/// This function tries to find the defining CreateNdDescOp recursively +/// accross control-flow boundaries. static xegpu::CreateNdDescOp getDefiningCreateNdDescOp(Value tdescValue) { // Try to get the defining CreateNdDescOp of the tensor descriptor. auto definingOp = tdescValue.getDefiningOp(); @@ -1492,9 +1493,9 @@ LogicalResult ResolveLayoutConflicts::run() { // Scan all operations in the parent op and resolve layout conflicts at // tensor descriptor and vector use points. auto r = parentOp->walk([&](Operation *op) -> WalkResult { - // if the operation inputs vector and output scalar, like multi-reduction we - // need to check if the result has layout and add a convert_layout to serve - // as anchor op for the reduction op's layout. + // if the operation inputs vector and output scalar, like multi-reduction + // we need to check if the result has layout and add a convert_layout to + // serve as anchor op for the reduction op's layout. if (isa(op) || isa(op)) { for (OpResult result : op->getResults()) { if (result.getType().isIntOrFloat()) { @@ -1561,7 +1562,8 @@ ResolveLayoutConflicts::resolveVectorConsumer(OpOperand &operand) { if (auto vectorTy = dyn_cast(vectorValue.getType()); vectorTy && vectorTy.getRank() > 1) consumerOp->emitWarning("Expected layout for non-1D vectors."); - return success(); // uniform non-tensor-data vector does not require layout + return success(); // uniform non-tensor-data vector does not require + // layout } // Region branch ops (e.g. scf.for) and their terminators (e.g. scf.yield) // forward their operands to successor region inputs / parent op results; @@ -1580,6 +1582,23 @@ ResolveLayoutConflicts::resolveVectorConsumer(OpOperand &operand) { if (consumerLayout.isEqualTo(producerLayout)) return success(); + // Consumer is a convert_layout: retarget its input_layout to the producer + // instead of chaining a second convert. Always safe (single source + // operand). + if (auto consumerConvert = dyn_cast(consumerOp)) { + consumerConvert.setInputLayoutAttr(producerLayout); + return success(); + } + + // Producer is a convert_layout feeding only this use: retarget its + // target_layout to the consumer instead of appending another convert. + if (auto producerConvert = + vectorValue.getDefiningOp(); + producerConvert && vectorValue.hasOneUse()) { + producerConvert.setTargetLayoutAttr(consumerLayout); + return success(); + } + // If the producer is trivially rematerializable (e.g. `vector.step`, splat // `arith.constant`), clone it and stamp the consumer's expected layout on // the clone instead of inserting a `xegpu.convert_layout`. The convert diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp index fc7c3b170dd3b..a9c73b3b84025 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUUnroll.cpp @@ -969,8 +969,6 @@ struct UnrollConvertLayoutOp : public UnrollPattern { if (valType.isIntOrFloat()) { rewriter.replaceOp(op, op.getSource()); - assert(!inputLayout.dropInstData() && !targetLayout.dropInstData() && - "unexpected layout attributes for scalar type"); return success(); } @@ -990,7 +988,7 @@ struct UnrollConvertLayoutOp : public UnrollPattern { Value newSource = op.getSource(); SmallVector newOps; - if (inputLayout && targetLayout) { + if (inputLayout && targetLayout && !inputLayout.isEqualTo(targetLayout)) { SmallVector convertedValTypes = getUnrolledTypes(valueTy, *targetShape); SmallVector convertedValues = diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir index 5f493c8ca0df6..515c59db72819 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout-inst-data.mlir @@ -4,13 +4,13 @@ // CHECK-LABEL: func.func @load_store_no_array_len( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x32xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<8x32xf32>) { // CHECK: %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32> -// CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -// CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -// CHECK: xegpu.prefetch_nd %[[TDESC_SRC]][0, 0] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -// CHECK: %[[LOADED:.*]] = xegpu.load_nd %0[0, 0] <{layout = #xegpu.layout}> -// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -> vector<8x32xf32> -// CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]][0, 0] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: %[[TDESC_SRC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: %[[TDESC_DST:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: xegpu.prefetch_nd %[[TDESC_SRC]][0, 0] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +// CHECK: %[[LOADED:.*]] = xegpu.load_nd %0[0, 0] <{layout = #xegpu.layout}> +// CHECK-SAME: !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -> vector<8x32xf32> +// CHECK: xegpu.store_nd %[[LOADED]], %[[TDESC_DST]][0, 0] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> gpu.module @test { // Although the uArch allows 8x32 inst data using block count (or array_len), // it is up to optimization passes to decide on the block count usage. @@ -29,17 +29,17 @@ func.func @load_store_no_array_len(%arg0: memref<8x32xf32>, %arg1: memref<8x32xf // CHECK-LABEL: func.func @dpas_f16( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<16x16xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { -// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<8x16xf32> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout -// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout> -// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.layout> -> vector<8x16xf16> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.layout> -> vector<16x16xf16> -// CHECK: %[[T4:.*]] = xegpu.dpas %[[T2]], %[[T3]], %[[CST]] {layout_a = #xegpu.layout, layout_b = #xegpu.layout, layout_cd = #xegpu.layout} : +// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<8x16xf32> +// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.layout +// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout> +// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.layout> -> vector<8x16xf16> +// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.layout> -> vector<16x16xf16> +// CHECK: %[[T4:.*]] = xegpu.dpas %[[T2]], %[[T3]], %[[CST]] {layout_a = #xegpu.layout, layout_b = #xegpu.layout, layout_cd = #xegpu.layout} : // CHECK-SAME: vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32> -// CHECK: %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout -// CHECK: xegpu.store_nd %[[T4]], %[[T5]][0, 0] <{layout = #xegpu.layout}> : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout> +// CHECK: %[[T5:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout +// CHECK: xegpu.store_nd %[[T4]], %[[T5]][0, 0] <{layout = #xegpu.layout}> : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout> gpu.module @test { func.func @dpas_f16(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) { %c0 = arith.constant 0 : index @@ -70,15 +70,15 @@ gpu.module @test_kernel { %c_tdesc = xegpu.create_nd_tdesc %C : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x32xf16> scf.for %k = %c0 to %c1024 step %c32 { - //CHECK: xegpu.load_nd {{.*}} <{layout = #xegpu.layout}> : - //CHECK-SAME: !xegpu.tensor_desc<16x32xf16, #xegpu.layout> -> vector<16x32xf16> + //CHECK: xegpu.load_nd {{.*}} <{layout = #xegpu.layout}> : + //CHECK-SAME: !xegpu.tensor_desc<16x32xf16, #xegpu.layout> -> vector<16x32xf16> %a = xegpu.load_nd %a_tdesc[0, %k] : !xegpu.tensor_desc<16x32xf16> -> vector<16x32xf16> %b = xegpu.load_nd %b_tdesc[0, %k] : !xegpu.tensor_desc<16x32xf16> -> vector<16x32xf16> - //CHECK-COUNT: arith.addf {{.*}} {layout_result_0 = #xegpu.layout} : vector<16x32xf16> + //CHECK-COUNT: arith.addf {{.*}} {layout_result_0 = #xegpu.layout} : vector<16x32xf16> %c = arith.addf %a, %b : vector<16x32xf16> - //CHECK-COUNT: xegpu.store_nd {{.*}} : vector<16x32xf16>, !xegpu.tensor_desc<16x32xf16, #xegpu.layout> + //CHECK-COUNT: xegpu.store_nd {{.*}} : vector<16x32xf16>, !xegpu.tensor_desc<16x32xf16, #xegpu.layout> xegpu.store_nd %c, %c_tdesc[0, %k] : vector<16x32xf16>, !xegpu.tensor_desc<16x32xf16> } gpu.return @@ -100,46 +100,26 @@ gpu.module @test_kernel { %c_tdesc = xegpu.create_nd_tdesc %C : memref<1024x1024xf16> -> !xegpu.tensor_desc<12x32xf16> scf.for %k = %c0 to %c1024 step %c32 { - //CHECK: xegpu.load_nd {{.*}} <{layout = #xegpu.layout}> : - //CHECK-SAME: !xegpu.tensor_desc<12x32xf16, #xegpu.layout> -> vector<12x32xf16> + //CHECK: xegpu.load_nd {{.*}} <{layout = #xegpu.layout}> : + //CHECK-SAME: !xegpu.tensor_desc<12x32xf16, #xegpu.layout> -> vector<12x32xf16> %a = xegpu.load_nd %a_tdesc[0, %k] : !xegpu.tensor_desc<12x32xf16> -> vector<12x32xf16> %b = xegpu.load_nd %b_tdesc[0, %k] : !xegpu.tensor_desc<12x32xf16> -> vector<12x32xf16> - //CHECK-COUNT: arith.addf {{.*}} {layout_result_0 = #xegpu.layout} : vector<12x32xf16> + //CHECK-COUNT: arith.addf {{.*}} {layout_result_0 = #xegpu.layout} : vector<12x32xf16> %c = arith.addf %a, %b : vector<12x32xf16> - //CHECK-COUNT: xegpu.store_nd {{.*}} : vector<12x32xf16>, !xegpu.tensor_desc<12x32xf16, #xegpu.layout> + //CHECK-COUNT: xegpu.store_nd {{.*}} : vector<12x32xf16>, !xegpu.tensor_desc<12x32xf16, #xegpu.layout> xegpu.store_nd %c, %c_tdesc[0, %k] : vector<12x32xf16>, !xegpu.tensor_desc<12x32xf16> } gpu.return } } -// ----- -gpu.module @test { -// CHECK-LABEL: func.func @scatter_ops_chunksize( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) { -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16xindex> -// CHECK: %{{.*}} = xegpu.load %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 8 : i64, layout = #xegpu.layout}> -// CHECK-SAME: memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> -// CHECK: xegpu.store %0, %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 8 : i64, layout = #xegpu.layout}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> -func.func @scatter_ops_chunksize(%src: memref<256xf16>) { - %1 = arith.constant dense<1>: vector<16xi1> - %offset = arith.constant dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> - : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> - xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> - : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> - return -} -} - // ----- gpu.module @test { // CHECK-LABEL: func.func @store_matrix( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: !xegpu.mem_desc<16x64xf16>) { -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x16xf16> +// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x16xf16> func.func @store_matrix(%arg0: !xegpu.mem_desc<16x64xf16>) { %cst = arith.constant dense<0.0000> : vector<16x16xf16> xegpu.store_matrix %cst, %arg0[8, 8]: vector<16x16xf16>, !xegpu.mem_desc<16x64xf16> @@ -148,108 +128,49 @@ func.func @store_matrix(%arg0: !xegpu.mem_desc<16x64xf16>) { } } -// ----- -gpu.module @test { -// CHECK-LABEL: func.func @scatter_ops_chunksize_excessive( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<1024xf32>) { -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16xindex> -// CHECK: %{{.*}} = xegpu.load %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 32 : i64, layout = #xegpu.layout}> : -// CHECK-SAME: memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16x32xf32> -// CHECK: xegpu.store %0, %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 32 : i64, layout = #xegpu.layout}> : -// CHECK-SAME: vector<16x32xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> -func.func @scatter_ops_chunksize_excessive(%src: memref<1024xf32>) { - %1 = arith.constant dense<1>: vector<16xi1> - %offset = arith.constant dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 <{chunk_size=32}> - : memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16x32xf32> - xegpu.store %3, %src[%offset], %1 <{chunk_size=32}> - : vector<16x32xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> - return -} -} - -// ----- - -gpu.module @test { -// CHECK-LABEL: func.func @scatter_ops_chunksize_excessive_anchor( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<1024xf32>) { -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16xindex> -// CHECK: %{{.*}} = xegpu.load %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 32 : i64, layout = #xegpu.layout}> : -// CHECK-SAME: memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16x32xf32> -// CHECK: xegpu.store %0, %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 32 : i64, layout = #xegpu.layout}> : -// CHECK-SAME: vector<16x32xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> -func.func @scatter_ops_chunksize_excessive_anchor(%src: memref<1024xf32>) { - %1 = arith.constant dense<1>: vector<16xi1> - %offset = arith.constant dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 <{chunk_size=32}> - : memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16x32xf32> - xegpu.store %3, %src[%offset], %1 <{chunk_size=32, layout = #xegpu.layout}> - : vector<16x32xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> - return -} -} - // ----- gpu.module @test { -// CHECK-LABEL: func.func @scatter_ops_chunksize_slice( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<1024xf32>) { -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16xindex> -// CHECK: %[[LOADED:.*]] = xegpu.load %[[ARG0]][%{{.*}}], %{{.*}} <{layout = #xegpu.layout}> : -// CHECK-SAME: memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16xf32> -// CHECK: %[[BCAST:.*]] = vector.broadcast %[[LOADED]] {layout_result_0 = #xegpu.layout} : vector<16xf32> to vector<16x16xf32> -// CHECK: xegpu.store %[[BCAST]], %[[ARG0]][%{{.*}}], %{{.*}} <{chunk_size = 16 : i64, layout = #xegpu.layout}> : -// CHECK-SAME: vector<16x16xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> -func.func @scatter_ops_chunksize_slice(%src: memref<1024xf32>) { - %1 = arith.constant dense<1>: vector<16xi1> - %offset = arith.constant dense<12> : vector<16xindex> +// CHECK-LABEL: func.func @scatter_ops_coalesce_chunksize( +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<512xf32>) { +// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16x32xi1> +// CHECK: %{{.*}} = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16x32xindex> +// CHECK: %{{.*}} = xegpu.load %[[ARG0]][%{{.*}}], %{{.*}} <{layout = #xegpu.layout}> : +// CHECK-SAME: memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> -> vector<16x32xf32> +// CHECK: xegpu.store %0, %[[ARG0]][%{{.*}}], %{{.*}} <{layout = #xegpu.layout}> : +// CHECK-SAME: vector<16x32xf32>, memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> +func.func @scatter_ops_coalesce_chunksize(%src: memref<512xf32>) { + %1 = arith.constant dense<1>: vector<16x32xi1> + %offset = arith.constant dense<12> : vector<16x32xindex> %3 = xegpu.load %src[%offset], %1 - : memref<1024xf32>, vector<16xindex>, vector<16xi1> -> vector<16xf32> - - %4 = vector.broadcast %3 : vector<16xf32> to vector<16x16xf32> - xegpu.store %4, %src[%offset], %1 <{chunk_size=16, layout = #xegpu.layout}> - : vector<16x16xf32>, memref<1024xf32>, vector<16xindex>, vector<16xi1> + : memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> -> vector<16x32xf32> + xegpu.store %3, %src[%offset], %1 <{layout = #xegpu.layout}> + : vector<16x32xf32>, memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> return } } // ----- -gpu.module @test { -// CHECK-LABEL: func.func @insert_strided_slice_inst_data_no_packing( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x32xf32>) { -// CHECK: %[[CST_SMALL:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<1.000000e+00> : vector<4x16xf32> -// CHECK: %[[CST_LARGE:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<8x32xf32> -// CHECK: %[[INSERT:.*]] = vector.insert_strided_slice %[[CST_SMALL]], %[[CST_LARGE]] {layout_result_0 = #xegpu.layout, offsets = [0, 0], strides = [1, 1]} : vector<4x16xf32> into vector<8x32xf32> -// CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -// CHECK: xegpu.store_nd %[[INSERT]], %[[TDESC]][0, 0] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> -func.func @insert_strided_slice_inst_data_no_packing(%arg0: memref<8x32xf32>) { - %c0 = arith.constant 0 : index - %cst_small = arith.constant dense<1.0> : vector<4x16xf32> - %cst_large = arith.constant dense<0.0> : vector<8x32xf32> - %insert = vector.insert_strided_slice %cst_small, %cst_large {offsets = [0, 0], strides = [1, 1]} : vector<4x16xf32> into vector<8x32xf32> - %tdesc = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> - xegpu.store_nd %insert, %tdesc[0, 0] : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32> - return -} -} -// ----- gpu.module @test { -// CHECK-LABEL: func.func @insert_strided_slice_inst_data_with_packing( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x64xi8>) { -// CHECK: %[[CST_SMALL:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<1> : vector<4x64xi8> -// CHECK: %[[CST_LARGE:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0> : vector<8x64xi8> -// CHECK: %[[INSERT:.*]] = vector.insert_strided_slice %[[CST_SMALL]], %[[CST_LARGE]] {layout_result_0 = #xegpu.layout, offsets = [0, 0], strides = [1, 1]} : vector<4x64xi8> into vector<8x64xi8> -func.func @insert_strided_slice_inst_data_with_packing(%arg0: memref<8x64xi8>) { +// CHECK-LABEL: func.func @load_gather_with_coalesce_chunksize( +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<256xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { +// CHECK: %[[OFFSET:.*]] = arith.constant {layout_result_0 = #xegpu.layout} +// CHECK-SAME: dense<0> : vector<16x16xindex> +// CHECK-NEXT: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16x16xi1> +// CHECK-NEXT: %{{.*}} = xegpu.load %arg1[%[[OFFSET]]], %[[MASK]] <{layout = #xegpu.layout}> : memref<256xf16>, vector<16x16xindex>, vector<16x16xi1> -> vector<16x16xf16> +func.func @load_gather_with_coalesce_chunksize(%arg0: memref<8x16xf16>, %arg1: memref<256xf16>, %arg2: memref<8x16xf32>) { %c0 = arith.constant 0 : index - %cst_small = arith.constant dense<1> : vector<4x64xi8> - %cst_large = arith.constant dense<0> : vector<8x64xi8> - %insert = vector.insert_strided_slice %cst_small, %cst_large {offsets = [0, 0], strides = [1, 1]} : vector<4x64xi8> into vector<8x64xi8> - %tdesc = xegpu.create_nd_tdesc %arg0 : memref<8x64xi8> -> !xegpu.tensor_desc<8x64xi8> - xegpu.store_nd %insert, %tdesc[0, 0] <{layout = #xegpu.layout}>: vector<8x64xi8>, !xegpu.tensor_desc<8x64xi8> + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> + %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> + %offset = arith.constant dense<0> : vector<16x16xindex> + %mask = arith.constant dense : vector<16x16xi1> + %3 = xegpu.load %arg1[%offset], %mask + : memref<256xf16>, vector<16x16xindex>, vector<16x16xi1> -> vector<16x16xf16> + %4 = vector.transpose %3, [1, 0] : vector<16x16xf16> to vector<16x16xf16> + %5 = xegpu.dpas %1, %4 : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> + %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> + xegpu.store_nd %5, %6[0, 0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> return } } @@ -257,12 +178,12 @@ func.func @insert_strided_slice_inst_data_with_packing(%arg0: memref<8x64xi8>) { // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_shape_cast_expand_non_unit_dims( -// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP:.*]]], %[[CST:.*]] <{layout = #xegpu.layout}> : memref<1024xf16>, vector<1024xindex>, vector<1024xi1> -> vector<1024xf16> -// CHECK: %[[CAST:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<1024xf16> to vector<8x8x16xf16> -// CHECK: %[[CST_0:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<8x16xf16> -// CHECK: %[[CST_1:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<16xf16> -// CHECK: %[[REDUCE_0:.*]] = vector.multi_reduction , %[[CAST]], %[[CST_0]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x8x16xf16> to vector<8x16xf16> -// CHECK: %[[REDUCE_1:.*]] = vector.multi_reduction , %[[REDUCE_0]], %[[CST_1]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x16xf16> to vector<16xf16> +// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP:.*]]], %[[CST:.*]] <{layout = #xegpu.layout}> : memref<1024xf16>, vector<1024xindex>, vector<1024xi1> -> vector<1024xf16> +// CHECK: %[[CAST:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<1024xf16> to vector<8x8x16xf16> +// CHECK: %[[CST_0:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<8x16xf16> +// CHECK: %[[CST_1:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<16xf16> +// CHECK: %[[REDUCE_0:.*]] = vector.multi_reduction , %[[CAST]], %[[CST_0]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x8x16xf16> to vector<8x16xf16> +// CHECK: %[[REDUCE_1:.*]] = vector.multi_reduction , %[[REDUCE_0]], %[[CST_1]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x16xf16> to vector<16xf16> func.func @vector_shape_cast_expand_non_unit_dims(%arg0: memref<1024xf16>, %arg1: memref<16xf16>) { %cst = arith.constant dense : vector<1024xi1> %0 = vector.step : vector<1024xindex> @@ -282,7 +203,7 @@ func.func @vector_shape_cast_expand_non_unit_dims(%arg0: memref<1024xf16>, %arg1 // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_2d_reduction_with_fractional_subgroup_size( -// CHECK: %[[ReduceVal:.*]] = vector.multi_reduction , %[[Val:.*]], %[[CST:.*]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1, 2]>} [1, 2] : vector<1x16x1xf16> to vector<1xf16> +// CHECK: %[[ReduceVal:.*]] = vector.multi_reduction , %[[Val:.*]], %[[CST:.*]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1, 2]>} [1, 2] : vector<1x16x1xf16> to vector<1xf16> func.func @vector_2d_reduction_with_fractional_subgroup_size(%arg0: memref<1024xf16>, %arg1: memref<16xf16>) { %cst = arith.constant dense : vector<16xi1> %0 = vector.step : vector<16xindex> @@ -300,7 +221,7 @@ func.func @vector_2d_reduction_with_fractional_subgroup_size(%arg0: memref<1024x // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_2d_reduction_with_fractional_subgroup_size_1x4x1( -// CHECK: %[[ReduceVal:.*]] = vector.multi_reduction , %[[Val:.*]], %[[CST:.*]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1, 2]>} [1, 2] : vector<1x16x4xf16> to vector<1xf16> +// CHECK: %[[ReduceVal:.*]] = vector.multi_reduction , %[[Val:.*]], %[[CST:.*]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1, 2]>} [1, 2] : vector<1x16x4xf16> to vector<1xf16> func.func @vector_2d_reduction_with_fractional_subgroup_size_1x4x1(%arg0: memref<1024xf16>, %arg1: memref<16xf16>) { %cst = arith.constant dense : vector<64xi1> %0 = vector.step : vector<64xindex> @@ -318,13 +239,13 @@ func.func @vector_2d_reduction_with_fractional_subgroup_size_1x4x1(%arg0: memref // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_shape_cast_expand_and_merge( -// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<256xi1> -// CHECK: %[[STEP:.*]] = vector.step {layout_result_0 = #xegpu.layout} : vector<256xindex> -// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP]]], %[[CST]] <{layout = #xegpu.layout}> : memref<256xf16>, vector<256xindex>, vector<256xi1> -> vector<256xf16> -// CHECK: %[[CAST_0:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256xf16> to vector<2x4x32xf16> -// CHECK: %[[CAST_1:.*]] = vector.shape_cast %[[CAST_0]] {layout_result_0 = #xegpu.layout} : vector<2x4x32xf16> to vector<1x256xf16> -// CHECK: %[[CAST_2:.*]] = vector.shape_cast %[[CAST_1]] {layout_result_0 = #xegpu.layout} : vector<1x256xf16> to vector<256xf16> -// CHECK: xegpu.store %[[CAST_2]], %arg1[%[[STEP]]], %[[CST]] <{layout = #xegpu.layout}> : vector<256xf16>, memref<256xf16>, vector<256xindex>, vector<256xi1> +// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<256xi1> +// CHECK: %[[STEP:.*]] = vector.step {layout_result_0 = #xegpu.layout} : vector<256xindex> +// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP]]], %[[CST]] <{layout = #xegpu.layout}> : memref<256xf16>, vector<256xindex>, vector<256xi1> -> vector<256xf16> +// CHECK: %[[CAST_0:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256xf16> to vector<2x4x32xf16> +// CHECK: %[[CAST_1:.*]] = vector.shape_cast %[[CAST_0]] {layout_result_0 = #xegpu.layout} : vector<2x4x32xf16> to vector<1x256xf16> +// CHECK: %[[CAST_2:.*]] = vector.shape_cast %[[CAST_1]] {layout_result_0 = #xegpu.layout} : vector<1x256xf16> to vector<256xf16> +// CHECK: xegpu.store %[[CAST_2]], %arg1[%[[STEP]]], %[[CST]] <{layout = #xegpu.layout}> : vector<256xf16>, memref<256xf16>, vector<256xindex>, vector<256xi1> func.func @vector_shape_cast_expand_and_merge(%arg0: memref<256xf16>, %arg1: memref<256xf16>) { %cst = arith.constant dense : vector<256xi1> %0 = vector.step : vector<256xindex> @@ -341,8 +262,8 @@ func.func @vector_shape_cast_expand_and_merge(%arg0: memref<256xf16>, %arg1: mem // ----- gpu.module @test{ // CHECK-LABEL: load_store_matrix - // CHECK: xegpu.load_matrix %{{.*}} <{layout = #xegpu.layout}> - // CHECK: xegpu.store_matrix %{{.*}} <{layout = #xegpu.layout}> + // CHECK: xegpu.load_matrix %{{.*}} <{layout = #xegpu.layout}> + // CHECK: xegpu.store_matrix %{{.*}} <{layout = #xegpu.layout}> func.func @load_store_matrix(%arg0: !xegpu.mem_desc<64x128xf32>, %arg1: i1) { %c0 = arith.constant 0 : index scf.if %arg1 { @@ -356,11 +277,11 @@ gpu.module @test{ // ----- gpu.module @test{ // CHECK-LABEL: broadcast_both_leadingdims_innerdims - // CHECK: arith.constant {layout_result_0 = #xegpu.layout} dense : vector<2x2x6x32xi1> - // CHECK: arith.constant {layout_result_0 = #xegpu.layout} dense<1.000000e+00> : vector<2x2x6x32xf32> - // CHECK: vector.step {layout_result_0 = #xegpu.slice<#xegpu.slice<#xegpu.layout, dims = [0, 1]>, dims = [1]>} : vector<6xindex> - // CHECK: vector.shape_cast {{.*}} {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0, 1]>} : vector<6xindex> to vector<6x1xindex> - // CHECK: vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout} : vector<6x1xindex> to vector<2x2x6x32xindex> + // CHECK: arith.constant {layout_result_0 = #xegpu.layout} dense : vector<2x2x6x32xi1> + // CHECK: arith.constant {layout_result_0 = #xegpu.layout} dense<1.000000e+00> : vector<2x2x6x32xf32> + // CHECK: vector.step {layout_result_0 = #xegpu.slice<#xegpu.slice<#xegpu.layout, dims = [0, 1]>, dims = [1]>} : vector<6xindex> + // CHECK: vector.shape_cast {{.*}} {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0, 1]>} : vector<6xindex> to vector<6x1xindex> + // CHECK: vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout} : vector<6x1xindex> to vector<2x2x6x32xindex> gpu.func @broadcast_both_leadingdims_innerdims(%arg0: memref<32x2x192xf32>, %arg1: memref<32x2x192xf32>, %arg2: memref<32x2x192xf32>) kernel attributes {known_block_size = array, known_grid_size = array} { %cst = arith.constant dense : vector<2x2x6x32xi1> %cst_0 = arith.constant dense<1.000000e+00> : vector<2x2x6x32xf32> @@ -391,7 +312,7 @@ gpu.module @test_collapse_dims [#xevm.target] { %mask = arith.constant dense : vector<32x32xi1> %data = arith.constant dense<0.0> : vector<32x32xf32> - // CHECK: xegpu.store {{.*}} <{{{.*}}layout = #xegpu.layout{{.*}}}> : + // CHECK: xegpu.store {{.*}} <{{{.*}}layout = #xegpu.layout{{.*}}}> : xegpu.store %data, %ptr_i64[%1], %mask { layout = #xegpu.layout } : vector<32x32xf32>, i64, vector<32x32xindex>, vector<32x32xi1> @@ -403,22 +324,22 @@ gpu.module @test_collapse_dims [#xevm.target] { // ----- gpu.module @test { // CHECK-LABEL: func.func @bitcast_ui8_to_f4( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256x16xui8>) { -// CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x16xui8> -> !xegpu.tensor_desc<256x16xui8, #xegpu.layout> -// CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<256x16xui8, #xegpu.layout> -> vector<256x16xui8> -// CHECK: %[[BC:.*]] = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256x16xui8> to vector<256x32xf4E2M1FN> +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256x32xui8>) { +// CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x32xui8> -> !xegpu.tensor_desc<256x32xui8, #xegpu.layout> +// CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<256x32xui8, #xegpu.layout> -> vector<256x32xui8> +// CHECK: %[[BC:.*]] = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256x32xui8> to vector<256x64xf4E2M1FN> // CHECK: xegpu.convert_layout %[[BC]] -// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> -// CHECK-SAME: : vector<256x32xf4E2M1FN> -func.func @bitcast_ui8_to_f4(%arg0: memref<256x16xui8>) { - %0 = xegpu.create_nd_tdesc %arg0 : memref<256x16xui8> -> !xegpu.tensor_desc<256x16xui8> - %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x16xui8> -> vector<256x16xui8> - %2 = vector.bitcast %1 : vector<256x16xui8> to vector<256x32xf4E2M1FN> +// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> +// CHECK-SAME: : vector<256x64xf4E2M1FN> +func.func @bitcast_ui8_to_f4(%arg0: memref<256x32xui8>) { + %0 = xegpu.create_nd_tdesc %arg0 : memref<256x32xui8> -> !xegpu.tensor_desc<256x32xui8> + %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x32xui8> -> vector<256x32xui8> + %2 = vector.bitcast %1 : vector<256x32xui8> to vector<256x64xf4E2M1FN> %3 = xegpu.convert_layout %2 - <{input_layout = #xegpu.layout, - target_layout = #xegpu.layout}> - : vector<256x32xf4E2M1FN> + <{input_layout = #xegpu.layout, + target_layout = #xegpu.layout}> + : vector<256x64xf4E2M1FN> return } } @@ -427,20 +348,20 @@ func.func @bitcast_ui8_to_f4(%arg0: memref<256x16xui8>) { gpu.module @test { // CHECK-LABEL: func.func @bitcast_ui16_to_f4( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256x16xui16>) { -// CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x16xui16> -> !xegpu.tensor_desc<256x16xui16, #xegpu.layout> -// CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<256x16xui16, #xegpu.layout> -> vector<256x16xui16> -// CHECK: %[[BC:.*]] = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256x16xui16> to vector<256x64xf4E2M1FN> +// CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256x16xui16> -> !xegpu.tensor_desc<256x16xui16, #xegpu.layout> +// CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<256x16xui16, #xegpu.layout> -> vector<256x16xui16> +// CHECK: %[[BC:.*]] = vector.bitcast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<256x16xui16> to vector<256x64xf4E2M1FN> // CHECK: xegpu.convert_layout %[[BC]] -// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> +// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> // CHECK-SAME: : vector<256x64xf4E2M1FN> func.func @bitcast_ui16_to_f4(%arg0: memref<256x16xui16>) { %0 = xegpu.create_nd_tdesc %arg0 : memref<256x16xui16> -> !xegpu.tensor_desc<256x16xui16> %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<256x16xui16> -> vector<256x16xui16> %2 = vector.bitcast %1 : vector<256x16xui16> to vector<256x64xf4E2M1FN> %3 = xegpu.convert_layout %2 - <{input_layout = #xegpu.layout, - target_layout = #xegpu.layout}> + <{input_layout = #xegpu.layout, + target_layout = #xegpu.layout}> : vector<256x64xf4E2M1FN> return } @@ -449,40 +370,40 @@ func.func @bitcast_ui16_to_f4(%arg0: memref<256x16xui16>) { // ----- // CHECK-LABEL: func.func @dpas_mx_f8e5m2 -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<16x64xf8E5M2>, %[[ARG1:[0-9a-zA-Z]+]]: memref<64x32xf8E5M2>, %[[ARG2:[0-9a-zA-Z]+]]: memref<16x32xbf16> -// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<16x2xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<2x32xf8E8M0FNU> -// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x32xbf16> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x64xf8E5M2> -> !xegpu.tensor_desc<16x64xf8E5M2, #xegpu.layout> -// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<64x32xf8E5M2> -> !xegpu.tensor_desc<64x32xf8E5M2, #xegpu.layout> -// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<16x64xf8E5M2, #xegpu.layout> -> vector<16x64xf8E5M2> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<64x32xf8E5M2, #xegpu.layout> -> vector<64x32xf8E5M2> -// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<16x2xf8E8M0FNU> -> !xegpu.tensor_desc<16x2xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<16x2xf8E8M0FNU, #xegpu.layout> -> vector<16x2xf8E8M0FNU> -// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<2x32xf8E8M0FNU> -> !xegpu.tensor_desc<2x32xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<2x32xf8E8M0FNU, #xegpu.layout> -> vector<2x32xf8E8M0FNU> +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<16x1024xf8E5M2>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x32xf8E5M2>, %[[ARG2:[0-9a-zA-Z]+]]: memref<16x32xbf16> +// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<16x32xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<32x32xf8E8M0FNU> +// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x32xbf16> +// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x1024xf8E5M2> -> !xegpu.tensor_desc<16x1024xf8E5M2, #xegpu.layout> +// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x32xf8E5M2> -> !xegpu.tensor_desc<1024x32xf8E5M2, #xegpu.layout> +// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<16x1024xf8E5M2, #xegpu.layout> -> vector<16x1024xf8E5M2> +// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<1024x32xf8E5M2, #xegpu.layout> -> vector<1024x32xf8E5M2> +// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<16x32xf8E8M0FNU> -> !xegpu.tensor_desc<16x32xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<16x32xf8E8M0FNU, #xegpu.layout> -> vector<16x32xf8E8M0FNU> +// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<32x32xf8E8M0FNU> -> !xegpu.tensor_desc<32x32xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<32x32xf8E8M0FNU, #xegpu.layout> -> vector<32x32xf8E8M0FNU> // CHECK: %[[T8:.*]] = xegpu.dpas_mx %[[T2]], %[[T3]], %[[CST]] scale_a = %[[T5]] scale_b = %[[T7]] -// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : -// CHECK-SAME: (vector<16x64xf8E5M2>, vector<64x32xf8E5M2>, vector<16x32xbf16>, vector<16x2xf8E8M0FNU>, vector<2x32xf8E8M0FNU>) -> vector<16x32xbf16> -// CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> -// CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> +// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : +// CHECK-SAME: (vector<16x1024xf8E5M2>, vector<1024x32xf8E5M2>, vector<16x32xbf16>, vector<16x32xf8E8M0FNU>, vector<32x32xf8E8M0FNU>) -> vector<16x32xbf16> +// CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> +// CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> gpu.module @test { -func.func @dpas_mx_f8e5m2(%arg0: memref<16x64xf8E5M2>, %arg1: memref<64x32xf8E5M2>, %arg2: memref<16x32xbf16>, - %arg3: memref<16x2xf8E8M0FNU>, %arg4: memref<2x32xf8E8M0FNU>) { +func.func @dpas_mx_f8e5m2(%arg0: memref<16x1024xf8E5M2>, %arg1: memref<1024x32xf8E5M2>, %arg2: memref<16x32xbf16>, + %arg3: memref<16x32xf8E8M0FNU>, %arg4: memref<32x32xf8E8M0FNU>) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0.000000e+00> : vector<16x32xbf16> - %0 = xegpu.create_nd_tdesc %arg0 : memref<16x64xf8E5M2> -> !xegpu.tensor_desc<16x64xf8E5M2> - %1 = xegpu.create_nd_tdesc %arg1 : memref<64x32xf8E5M2> -> !xegpu.tensor_desc<64x32xf8E5M2> - %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<16x64xf8E5M2> -> vector<16x64xf8E5M2> - %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<64x32xf8E5M2> -> vector<64x32xf8E5M2> - %4 = xegpu.create_nd_tdesc %arg3 : memref<16x2xf8E8M0FNU> -> !xegpu.tensor_desc<16x2xf8E8M0FNU> - %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<16x2xf8E8M0FNU> -> vector<16x2xf8E8M0FNU> - %6 = xegpu.create_nd_tdesc %arg4 : memref<2x32xf8E8M0FNU> -> !xegpu.tensor_desc<2x32xf8E8M0FNU> - %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<2x32xf8E8M0FNU> -> vector<2x32xf8E8M0FNU> - %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<16x64xf8E5M2>, vector<64x32xf8E5M2>, vector<16x32xbf16>, vector<16x2xf8E8M0FNU>, vector<2x32xf8E8M0FNU>) -> vector<16x32xbf16> + %0 = xegpu.create_nd_tdesc %arg0 : memref<16x1024xf8E5M2> -> !xegpu.tensor_desc<16x1024xf8E5M2> + %1 = xegpu.create_nd_tdesc %arg1 : memref<1024x32xf8E5M2> -> !xegpu.tensor_desc<1024x32xf8E5M2> + %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<16x1024xf8E5M2> -> vector<16x1024xf8E5M2> + %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<1024x32xf8E5M2> -> vector<1024x32xf8E5M2> + %4 = xegpu.create_nd_tdesc %arg3 : memref<16x32xf8E8M0FNU> -> !xegpu.tensor_desc<16x32xf8E8M0FNU> + %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<16x32xf8E8M0FNU> -> vector<16x32xf8E8M0FNU> + %6 = xegpu.create_nd_tdesc %arg4 : memref<32x32xf8E8M0FNU> -> !xegpu.tensor_desc<32x32xf8E8M0FNU> + %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<32x32xf8E8M0FNU> -> vector<32x32xf8E8M0FNU> + %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<16x1024xf8E5M2>, vector<1024x32xf8E5M2>, vector<16x32xbf16>, vector<16x32xf8E8M0FNU>, vector<32x32xf8E8M0FNU>) -> vector<16x32xbf16> %9 = xegpu.create_nd_tdesc %arg2 : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16> xegpu.store_nd %8, %9[0, 0] : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16> return @@ -491,40 +412,40 @@ func.func @dpas_mx_f8e5m2(%arg0: memref<16x64xf8E5M2>, %arg1: memref<64x32xf8E5M // ----- // CHECK-LABEL: func.func @dpas_mx_f4e2m1 -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<16x128xf4E2M1FN>, %[[ARG1:[0-9a-zA-Z]+]]: memref<128x32xf4E2M1FN>, %[[ARG2:[0-9a-zA-Z]+]]: memref<16x32xbf16> -// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<16x4xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<4x32xf8E8M0FNU> -// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x32xbf16> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x128xf4E2M1FN> -> !xegpu.tensor_desc<16x128xf4E2M1FN, #xegpu.layout> -// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<128x32xf4E2M1FN> -> !xegpu.tensor_desc<128x32xf4E2M1FN, #xegpu.layout> -// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<16x128xf4E2M1FN, #xegpu.layout> -> vector<16x128xf4E2M1FN> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<128x32xf4E2M1FN, #xegpu.layout> -> vector<128x32xf4E2M1FN> -// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<16x4xf8E8M0FNU> -> !xegpu.tensor_desc<16x4xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<16x4xf8E8M0FNU, #xegpu.layout> -> vector<16x4xf8E8M0FNU> -// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<4x32xf8E8M0FNU> -> !xegpu.tensor_desc<4x32xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<4x32xf8E8M0FNU, #xegpu.layout> -> vector<4x32xf8E8M0FNU> +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<16x1024xf4E2M1FN>, %[[ARG1:[0-9a-zA-Z]+]]: memref<1024x32xf4E2M1FN>, %[[ARG2:[0-9a-zA-Z]+]]: memref<16x32xbf16> +// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<16x32xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<32x32xf8E8M0FNU> +// CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<16x32xbf16> +// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<16x1024xf4E2M1FN> -> !xegpu.tensor_desc<16x1024xf4E2M1FN, #xegpu.layout> +// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<1024x32xf4E2M1FN> -> !xegpu.tensor_desc<1024x32xf4E2M1FN, #xegpu.layout> +// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<16x1024xf4E2M1FN, #xegpu.layout> -> vector<16x1024xf4E2M1FN> +// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<1024x32xf4E2M1FN, #xegpu.layout> -> vector<1024x32xf4E2M1FN> +// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<16x32xf8E8M0FNU> -> !xegpu.tensor_desc<16x32xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<16x32xf8E8M0FNU, #xegpu.layout> -> vector<16x32xf8E8M0FNU> +// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<32x32xf8E8M0FNU> -> !xegpu.tensor_desc<32x32xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<32x32xf8E8M0FNU, #xegpu.layout> -> vector<32x32xf8E8M0FNU> // CHECK: %[[T8:.*]] = xegpu.dpas_mx %[[T2]], %[[T3]], %[[CST]] scale_a = %[[T5]] scale_b = %[[T7]] -// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : -// CHECK-SAME: (vector<16x128xf4E2M1FN>, vector<128x32xf4E2M1FN>, vector<16x32xbf16>, vector<16x4xf8E8M0FNU>, vector<4x32xf8E8M0FNU>) -> vector<16x32xbf16> -// CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> -// CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> +// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : +// CHECK-SAME: (vector<16x1024xf4E2M1FN>, vector<1024x32xf4E2M1FN>, vector<16x32xbf16>, vector<16x32xf8E8M0FNU>, vector<32x32xf8E8M0FNU>) -> vector<16x32xbf16> +// CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> +// CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16, #xegpu.layout> gpu.module @test { -func.func @dpas_mx_f4e2m1(%arg0: memref<16x128xf4E2M1FN>, %arg1: memref<128x32xf4E2M1FN>, %arg2: memref<16x32xbf16>, - %arg3: memref<16x4xf8E8M0FNU>, %arg4: memref<4x32xf8E8M0FNU>) { +func.func @dpas_mx_f4e2m1(%arg0: memref<16x1024xf4E2M1FN>, %arg1: memref<1024x32xf4E2M1FN>, %arg2: memref<16x32xbf16>, + %arg3: memref<16x32xf8E8M0FNU>, %arg4: memref<32x32xf8E8M0FNU>) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0.000000e+00> : vector<16x32xbf16> - %0 = xegpu.create_nd_tdesc %arg0 : memref<16x128xf4E2M1FN> -> !xegpu.tensor_desc<16x128xf4E2M1FN> - %1 = xegpu.create_nd_tdesc %arg1 : memref<128x32xf4E2M1FN> -> !xegpu.tensor_desc<128x32xf4E2M1FN> - %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<16x128xf4E2M1FN> -> vector<16x128xf4E2M1FN> - %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<128x32xf4E2M1FN> -> vector<128x32xf4E2M1FN> - %4 = xegpu.create_nd_tdesc %arg3 : memref<16x4xf8E8M0FNU> -> !xegpu.tensor_desc<16x4xf8E8M0FNU> - %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<16x4xf8E8M0FNU> -> vector<16x4xf8E8M0FNU> - %6 = xegpu.create_nd_tdesc %arg4 : memref<4x32xf8E8M0FNU> -> !xegpu.tensor_desc<4x32xf8E8M0FNU> - %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<4x32xf8E8M0FNU> -> vector<4x32xf8E8M0FNU> - %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<16x128xf4E2M1FN>, vector<128x32xf4E2M1FN>, vector<16x32xbf16>, vector<16x4xf8E8M0FNU>, vector<4x32xf8E8M0FNU>) -> vector<16x32xbf16> + %0 = xegpu.create_nd_tdesc %arg0 : memref<16x1024xf4E2M1FN> -> !xegpu.tensor_desc<16x1024xf4E2M1FN> + %1 = xegpu.create_nd_tdesc %arg1 : memref<1024x32xf4E2M1FN> -> !xegpu.tensor_desc<1024x32xf4E2M1FN> + %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<16x1024xf4E2M1FN> -> vector<16x1024xf4E2M1FN> + %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<1024x32xf4E2M1FN> -> vector<1024x32xf4E2M1FN> + %4 = xegpu.create_nd_tdesc %arg3 : memref<16x32xf8E8M0FNU> -> !xegpu.tensor_desc<16x32xf8E8M0FNU> + %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<16x32xf8E8M0FNU> -> vector<16x32xf8E8M0FNU> + %6 = xegpu.create_nd_tdesc %arg4 : memref<32x32xf8E8M0FNU> -> !xegpu.tensor_desc<32x32xf8E8M0FNU> + %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<32x32xf8E8M0FNU> -> vector<32x32xf8E8M0FNU> + %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<16x1024xf4E2M1FN>, vector<1024x32xf4E2M1FN>, vector<16x32xbf16>, vector<16x32xf8E8M0FNU>, vector<32x32xf8E8M0FNU>) -> vector<16x32xbf16> %9 = xegpu.create_nd_tdesc %arg2 : memref<16x32xbf16> -> !xegpu.tensor_desc<16x32xbf16> xegpu.store_nd %8, %9[0, 0] : vector<16x32xbf16>, !xegpu.tensor_desc<16x32xbf16> return @@ -607,3 +528,128 @@ func.func @vector_shape_cast_collapse_multi_groups(%arg0: memref<8x128xf16>) { return } } + +// ----- +// completeBlockStoreLaneLayoutFromInstData: user supplies only inst_data on a +// store_nd; lane_layout / lane_data are completed from it (data sink, no +// consumer). inst_data=[8,16] -> lane_layout=[1,16], lane_data=[1,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_store_nd_inst_data( +// CHECK: xegpu.store_nd %{{.*}} <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +func.func @complete_store_nd_inst_data(%arg0: memref<8x32xf32>) { + %cst = arith.constant dense<0.000000e+00> : vector<8x32xf32> + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> + xegpu.store_nd %cst, %0[0, 0] <{layout = #xegpu.layout}> : vector<8x32xf32>, !xegpu.tensor_desc<8x32xf32> + return +} +} + +// ----- +// completeBlockStoreLaneLayoutFromInstData (prefetch path): prefetch_nd is also +// a data sink served by the same helper. inst_data=[8,16] -> [1,16]/[1,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_prefetch_nd_inst_data( +// CHECK: xegpu.prefetch_nd %{{.*}} <{{{.*}}layout = #xegpu.layout}> : !xegpu.tensor_desc<8x32xf32, #xegpu.layout> +func.func @complete_prefetch_nd_inst_data(%arg0: memref<8x32xf32>) { + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf32> -> !xegpu.tensor_desc<8x32xf32> + xegpu.prefetch_nd %0[0, 0] <{l1_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : !xegpu.tensor_desc<8x32xf32> + return +} +} + +// ----- +// completeBlockLoadLaneLayoutFromInstData: load_nd feeds a DPAS, so the consumer +// supplies the transform / transpose / packing properties while lane_layout / +// lane_data are recomputed from inst_data. A (inst=[8,16]) -> [1,16]/[1,1]; +// B (inst=[16,16], VNNI packing from the DPAS B consumer) -> [1,16]/[2,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_load_nd_inst_data( +// CHECK: xegpu.load_nd %{{.*}} <{layout = #xegpu.layout}> : !xegpu.tensor_desc<8x16xf16, #xegpu.layout> -> vector<8x16xf16> +// CHECK: xegpu.load_nd %{{.*}} <{layout = #xegpu.layout}> : !xegpu.tensor_desc<16x16xf16, #xegpu.layout> -> vector<16x16xf16> +func.func @complete_load_nd_inst_data(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) { + %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32> + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> + %1 = xegpu.create_nd_tdesc %arg1 : memref<16x16xf16> -> !xegpu.tensor_desc<16x16xf16> + %2 = xegpu.load_nd %0[0, 0] <{layout = #xegpu.layout}> : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> + %3 = xegpu.load_nd %1[0, 0] <{layout = #xegpu.layout}> : !xegpu.tensor_desc<16x16xf16> -> vector<16x16xf16> + %4 = xegpu.dpas %2, %3, %cst : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32> + %5 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> + xegpu.store_nd %4, %5[0, 0] : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> + return +} +} + +// ----- +// completeScatterStoreLaneLayoutFromInstData: user supplies only inst_data on a +// scatter store; lane info derived purely from inst_data (data sink). +// inst_data=[1,16] -> lane_layout=[1,16], lane_data=[1,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_scatter_store_inst_data( +// CHECK: xegpu.store %{{.*}} <{layout = #xegpu.layout}> : vector<16x32xf32>, memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> +func.func @complete_scatter_store_inst_data(%src: memref<512xf32>) { + %mask = arith.constant dense<1> : vector<16x32xi1> + %offset = arith.constant dense<12> : vector<16x32xindex> + %data = arith.constant dense<0.000000e+00> : vector<16x32xf32> + xegpu.store %data, %src[%offset], %mask <{layout = #xegpu.layout}> + : vector<16x32xf32>, memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> + return +} +} + +// ----- +// completeScatterLoadLaneLayoutFromInstData: user supplies only inst_data on a +// scatter load; with no usable consumer lane info, the scatter default is used. +// inst_data=[1,16] -> lane_layout=[1,16], lane_data=[1,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_scatter_load_inst_data( +// CHECK: xegpu.load %{{.*}} <{layout = #xegpu.layout}> : memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> -> vector<16x32xf32> +func.func @complete_scatter_load_inst_data(%src: memref<512xf32>) { + %mask = arith.constant dense<1> : vector<16x32xi1> + %offset = arith.constant dense<12> : vector<16x32xindex> + %0 = xegpu.load %src[%offset], %mask <{layout = #xegpu.layout}> + : memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> -> vector<16x32xf32> + xegpu.store %0, %src[%offset], %mask <{layout = #xegpu.layout}> + : vector<16x32xf32>, memref<512xf32>, vector<16x32xindex>, vector<16x32xi1> + return +} +} + +// ----- +// completeDpasLaneLayoutFromInstData: user supplies only inst_data on all three +// DPAS operands; lane info is completed from each operand's shape / matmul role. +// A=[8,16]->[1,16]/[1,1]; B=[16,16]->[1,16]/[2,1] (VNNI); CD=[8,16]->[1,16]/[1,1]. +gpu.module @test { +// CHECK-LABEL: func.func @complete_dpas_inst_data( +// CHECK: xegpu.dpas %{{.*}}, %{{.*}}, %{{.*}} {layout_a = #xegpu.layout, layout_b = #xegpu.layout, layout_cd = #xegpu.layout} : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32> +func.func @complete_dpas_inst_data(%arg0: vector<8x16xf16>, %arg1: vector<16x16xf16>) { + %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32> + %0 = xegpu.dpas %arg0, %arg1, %cst { + layout_a = #xegpu.layout, + layout_b = #xegpu.layout, + layout_cd = #xegpu.layout} + : vector<8x16xf16>, vector<16x16xf16>, vector<8x16xf32> -> vector<8x16xf32> + return +} +} + +// ----- +// completeDpasMxLaneLayoutFromInstData: user supplies only inst_data on A/B/C-D; +// lane info completed from shapes and scale layouts re-derived via +// createScaleLayout. Matches the dpas_mx_f8e5m2 default-path result. +gpu.module @test { +// CHECK-LABEL: func.func @complete_dpas_mx_inst_data( +// CHECK: xegpu.dpas_mx %{{.*}}, %{{.*}}, %{{.*}} scale_a = %{{[0-9a-zA-Z]+}} scale_b = %{{[0-9a-zA-Z]+}} +// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : +func.func @complete_dpas_mx_inst_data(%arg0: vector<16x1024xf8E5M2>, %arg1: vector<1024x32xf8E5M2>, + %arg2: vector<16x32xf8E8M0FNU>, %arg3: vector<32x32xf8E8M0FNU>) { + %cst = arith.constant dense<0.000000e+00> : vector<16x32xbf16> + %0 = xegpu.dpas_mx %arg0, %arg1, %cst scale_a = %arg2 scale_b = %arg3 { + layout_a = #xegpu.layout, + layout_a_scale = #xegpu.layout, + layout_b = #xegpu.layout, + layout_b_scale = #xegpu.layout, + layout_cd = #xegpu.layout} + : (vector<16x1024xf8E5M2>, vector<1024x32xf8E5M2>, vector<16x32xbf16>, vector<16x32xf8E8M0FNU>, vector<32x32xf8E8M0FNU>) -> vector<16x32xbf16> + return +} +} diff --git a/mlir/test/Dialect/XeGPU/propagate-layout.mlir b/mlir/test/Dialect/XeGPU/propagate-layout.mlir index bad956d45d186..25d713ccc8a0f 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout.mlir @@ -127,26 +127,29 @@ func.func @extf_truncf(%arg0: !xegpu.tensor_desc<8x16xf16>, %arg1: !xegpu.tensor %2 = arith.extf %1 : vector<16x16xf16> to vector<16x16xf32> %3 = arith.truncf %2 : vector<16x16xf32> to vector<16x16xf16> %4 = xegpu.dpas %0, %3 : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> + %5 = xegpu.convert_layout %4 + <{input_layout = #xegpu.layout, + target_layout = #xegpu.layout}> + : vector<8x16xf32> return %4 : vector<8x16xf32> } } // ----- gpu.module @test { -// CHECK-LABEL: func.func @load_gather_with_chunksize( +// CHECK-LABEL: func.func @load_gather_with_coalesce_chunksize( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x16xf16>, %[[ARG1:[0-9a-zA-Z]+]]: memref<256xf16>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xf32>) { -// CHECK: %[[OFFSET:.*]] = arith.constant {layout_result_0 = #xegpu.layout} -// CHECK-SAME: dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex> -// CHECK-NEXT: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK-NEXT: %{{.*}} = xegpu.load %arg1[%[[OFFSET]]], %[[MASK]] <{chunk_size = 16 : i64, layout = #xegpu.layout}> : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x16xf16> -func.func @load_gather_with_chunksize(%arg0: memref<8x16xf16>, %arg1: memref<256xf16>, %arg2: memref<8x16xf32>) { +// CHECK: %[[OFFSET:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0> : vector<16x16xindex> +// CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16x16xi1> +// CHECK: %{{.*}} = xegpu.load %arg1[%[[OFFSET]]], %[[MASK]] <{layout = #xegpu.layout}> : memref<256xf16>, vector<16x16xindex>, vector<16x16xi1> -> vector<16x16xf16> +func.func @load_gather_with_coalesce_chunksize(%arg0: memref<8x16xf16>, %arg1: memref<256xf16>, %arg2: memref<8x16xf32>) { %c0 = arith.constant 0 : index %0 = xegpu.create_nd_tdesc %arg0 : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> - %offset = arith.constant dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex> - %mask = arith.constant dense : vector<16xi1> - %3 = xegpu.load %arg1[%offset], %mask <{chunk_size=16}> - : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x16xf16> + %offset = arith.constant dense<0> : vector<16x16xindex> + %mask = arith.constant dense : vector<16x16xi1> + %3 = xegpu.load %arg1[%offset], %mask + : memref<256xf16>, vector<16x16xindex>, vector<16x16xi1> -> vector<16x16xf16> %4 = vector.transpose %3, [1, 0] : vector<16x16xf16> to vector<16x16xf16> %5 = xegpu.dpas %1, %4 : vector<8x16xf16>, vector<16x16xf16> -> vector<8x16xf32> %6 = xegpu.create_nd_tdesc %arg2 : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32> @@ -157,37 +160,20 @@ func.func @load_gather_with_chunksize(%arg0: memref<8x16xf16>, %arg1: memref<256 // ----- gpu.module @test { -// CHECK-LABEL: func.func @store_scatter_with_chunksize( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<128xf32>) { -// CHECK-NEXT: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<1.000000e+00> : vector<16x8xf32> -// CHECK-NEXT: %[[CST_0:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK-NEXT: %[[CST_1:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex> -// CHECK-NEXT: xegpu.store %[[CST]], %[[ARG0]][%[[CST_1]]], %[[CST_0]] <{chunk_size = 8 : i64, layout = #xegpu.layout}> : vector<16x8xf32>, memref<128xf32>, vector<16xindex>, vector<16xi1> -func.func @store_scatter_with_chunksize(%arg0: memref<128xf32>) { - %val = arith.constant dense<1.000000e+00> : vector<16x8xf32> - %mask = arith.constant dense : vector<16xi1> - %offset = arith.constant dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex> - xegpu.store %val, %arg0[%offset], %mask <{chunk_size = 8}>: vector<16x8xf32>, memref<128xf32>, vector<16xindex>, vector<16xi1> - return -} -} - -// ----- -gpu.module @test { -// CHECK-LABEL: func.func @scatter_ops_chunksize( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) { -// CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> -// CHECK: %[[OFFSETS:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16xindex> -// CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]] <{chunk_size = 8 : i64, layout = #xegpu.layout}> -// CHECK-SAME: memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> -// CHECK: xegpu.store %[[LOAD_VEC]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]] <{chunk_size = 8 : i64, layout = #xegpu.layout}> : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> -func.func @scatter_ops_chunksize(%src: memref<256xf16>) { - %1 = arith.constant dense<1>: vector<16xi1> - %offset = arith.constant dense<12> : vector<16xindex> - %3 = xegpu.load %src[%offset], %1 <{chunk_size=8}> - : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16x8xf16> - xegpu.store %3, %src[%offset], %1 <{chunk_size=8}> - : vector<16x8xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1> +// CHECK-LABEL: func.func @scatter_ops_coalesce_chunksize( +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<128xf16>) { +// CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16x8xi1> +// CHECK: %[[OFFSETS:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<12> : vector<16x8xindex> +// CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]] <{layout = #xegpu.layout}> +// CHECK-SAME: memref<128xf16>, vector<16x8xindex>, vector<16x8xi1> -> vector<16x8xf16> +// CHECK: xegpu.store %[[LOAD_VEC]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]] <{layout = #xegpu.layout}> : vector<16x8xf16>, memref<128xf16>, vector<16x8xindex>, vector<16x8xi1> +func.func @scatter_ops_coalesce_chunksize(%src: memref<128xf16>) { + %1 = arith.constant dense<1>: vector<16x8xi1> + %offset = arith.constant dense<12> : vector<16x8xindex> + %3 = xegpu.load %src[%offset], %1 + : memref<128xf16>, vector<16x8xindex>, vector<16x8xi1> -> vector<16x8xf16> + xegpu.store %3, %src[%offset], %1 + : vector<16x8xf16>, memref<128xf16>, vector<16x8xindex>, vector<16x8xi1> return } } @@ -473,37 +459,45 @@ func.func @if_multiple_uses(%arg0: !xegpu.tensor_desc<8x16xf16>, %arg1: !xegpu.t // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_outer_reduction( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: vector<16x16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: !xegpu.tensor_desc<16xf32, #xegpu.layout>) { +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: vector<16x16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<256xf32>) { // CHECK: %{{.*}} = vector.multi_reduction , %[[ARG0]], %{{.*}} {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<16x16xf32> to vector<16xf32> -func.func @vector_outer_reduction(%arg0: vector<16x16xf32>, %arg1: !xegpu.tensor_desc<16xf32>) { +func.func @vector_outer_reduction(%arg0: vector<16x16xf32>, %arg1: memref<256xf32>) { %cst = arith.constant dense<0.000000e+00> : vector<16xf32> + %mask = arith.constant dense : vector<16xi1> + %offset = vector.step : vector<16xindex> %0 = vector.multi_reduction , %arg0, %cst [0] : vector<16x16xf32> to vector<16xf32> - xegpu.store_nd %0, %arg1[0] : vector<16xf32>, !xegpu.tensor_desc<16xf32> + xegpu.store %0, %arg1[%offset], %mask : vector<16xf32>, memref<256xf32>, vector<16xindex>, vector<16xi1> return } } // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_inner_reduction( -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: vector<16x16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: !xegpu.tensor_desc<16xf32, #xegpu.layout>) { -// CHECK: %{{.*}} = vector.multi_reduction , %[[ARG0]], %{{.*}} {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} [1] : vector<16x16xf32> to vector<16xf32> -func.func @vector_inner_reduction(%arg0: vector<16x16xf32>, %arg1: !xegpu.tensor_desc<16xf32>) { +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: vector<16x16xf32>, %[[ARG1:[0-9a-zA-Z]+]]: memref<256xf32>) { +// CHECK: %{{.*}} = vector.multi_reduction , %[[ARG0]], %{{.*}} {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [1]>} [1] : vector<16x16xf32> to vector<16xf32> +func.func @vector_inner_reduction(%arg0: vector<16x16xf32>, %arg1: memref<256xf32>) { %cst = arith.constant dense<0.000000e+00> : vector<16xf32> + %mask = arith.constant dense : vector<16xi1> + %offset = vector.step : vector<16xindex> %0 = vector.multi_reduction , %arg0, %cst [1] : vector<16x16xf32> to vector<16xf32> - xegpu.store_nd %0, %arg1[0] : vector<16xf32>, !xegpu.tensor_desc<16xf32> + xegpu.store %0, %arg1[%offset], %mask : vector<16xf32>, memref<256xf32>, vector<16xindex>, vector<16xi1> return } } // ----- gpu.module @test { -// CHECK-LABEL: func.func @store_nd_with_offset( +// CHECK-LABEL: func.func @store_with_offset( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf32>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256xf32> -> !xegpu.tensor_desc<16xf32, #xegpu.layout> -func.func @store_nd_with_offset(%arg0: memref<256xf32>){ +// CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense : vector<16xi1> +// CHECK: %[[OFFSET:.*]] = vector.step {layout_result_0 = #xegpu.layout} : vector<16xindex> +// CHECK: %[[VAL:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<1.000000e+00> : vector<16xf32> +// CHECK: xegpu.store %[[VAL]], %[[ARG0]][%[[OFFSET]]], %[[MASK]] <{layout = #xegpu.layout}> : vector<16xf32>, memref<256xf32>, vector<16xindex>, vector<16xi1> +func.func @store_with_offset(%arg0: memref<256xf32>){ %c32 = arith.constant 32 : index + %mask = arith.constant dense : vector<16xi1> + %offset = vector.step : vector<16xindex> %1 = arith.constant dense<1.000000e+00> : vector<16xf32> - %0 = xegpu.create_nd_tdesc %arg0 : memref<256xf32> -> !xegpu.tensor_desc<16xf32> - xegpu.store_nd %1, %0[%c32] : vector<16xf32>, !xegpu.tensor_desc<16xf32> + xegpu.store %1, %arg0[%offset], %mask : vector<16xf32>, memref<256xf32>, vector<16xindex>, vector<16xi1> return } } @@ -537,12 +531,10 @@ func.func @prefetch_2d(%arg0: memref<256x256xf16>){ gpu.module @test { // CHECK-LABEL: func.func @prefetch_1d( // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) { -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<256xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.layout> -// CHECK-NEXT: xegpu.prefetch_nd %[[T0]][0] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : !xegpu.tensor_desc<16xf16, #xegpu.layout> +// CHECK: xegpu.prefetch %[[ARG0]][%{{.*}}] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : memref<256xf16>, vector<16xindex> func.func @prefetch_1d(%arg0: memref<256xf16>){ - %c0 = arith.constant 0 : index - %0 = xegpu.create_nd_tdesc %arg0 : memref<256xf16> -> !xegpu.tensor_desc<16xf16> - xegpu.prefetch_nd %0[0] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint}>: !xegpu.tensor_desc<16xf16> + %offset = vector.step : vector<16xindex> + xegpu.prefetch %arg0[%offset] <{l1_hint = #xegpu.cache_hint, l2_hint = #xegpu.cache_hint, layout = #xegpu.layout}> : memref<256xf16>, vector<16xindex> return } } @@ -562,9 +554,9 @@ func.func @scf_while_and_condition(%arg0: memref<256xf32>, %arg1: memref<256xf32 %c16 = arith.constant 16 : i32 %c16_idx = arith.constant 16 : index %c256 = arith.constant 256 : i32 - %0 = xegpu.create_nd_tdesc %arg0 : memref<256xf32> -> !xegpu.tensor_desc<16xf32> - %1 = xegpu.load_nd %0[0] : !xegpu.tensor_desc<16xf32> -> vector<16xf32> - %2 = xegpu.create_nd_tdesc %arg1 : memref<256xf32> -> !xegpu.tensor_desc<16xf32> + %mask = arith.constant dense : vector<16xi1> + %offset = vector.step : vector<16xindex> + %1 = xegpu.load %arg0[%offset], %mask : memref<256xf32>, vector<16xindex>, vector<16xi1> -> vector<16xf32> %3:2 = scf.while (%arg2 = %1, %arg3 = %c0) : (vector<16xf32>, i32) -> (vector<16xf32>, i32) { @@ -572,12 +564,14 @@ func.func @scf_while_and_condition(%arg0: memref<256xf32>, %arg1: memref<256xf32 scf.condition(%4) %arg2, %arg3 : vector<16xf32>, i32 } do { ^bb0(%arg2: vector<16xf32>, %arg3: i32): - xegpu.store_nd %arg2, %2[0] : vector<16xf32>, !xegpu.tensor_desc<16xf32> + xegpu.store %arg2, %arg1[%offset], %mask : vector<16xf32>, memref<256xf32>, vector<16xindex>, vector<16xi1> %4 = arith.addi %arg3, %c16 : i32 - %offset = arith.index_cast %4 : i32 to index - %6 = xegpu.load_nd %0[%offset] : !xegpu.tensor_desc<16xf32> -> vector<16xf32> + %offset2 = arith.index_cast %4 : i32 to index + %offset2_v = vector.broadcast %offset2 : index to vector<16xindex> + %6 = xegpu.load %arg0[%offset], %mask : memref<256xf32>, vector<16xindex>, vector<16xi1> -> vector<16xf32> scf.yield %6, %4 : vector<16xf32>, i32 } + return } } @@ -656,20 +650,20 @@ gpu.module @test{ // ----- gpu.module @test { // CHECK-LABEL: func.func @vector_shape_cast_expand_non_unit_dims( -// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP:.*]]], %[[CST:.*]] <{layout = #xegpu.layout}> : memref<1024xf16>, vector<1024xindex>, vector<1024xi1> -> vector<1024xf16> -// CHECK: %[[CAST:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<1024xf16> to vector<8x8x16xf16> +// CHECK: %[[LOAD:.*]] = xegpu.load %arg0[%[[STEP:.*]]], %[[CST:.*]] <{layout = #xegpu.layout}> : memref<128xf16>, vector<128xindex>, vector<128xi1> -> vector<128xf16> +// CHECK: %[[CAST:.*]] = vector.shape_cast %[[LOAD]] {layout_result_0 = #xegpu.layout} : vector<128xf16> to vector<1x8x16xf16> // CHECK: %[[CST_0:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<8x16xf16> // CHECK: %[[CST_1:.*]] = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} dense<0.000000e+00> : vector<16xf16> -// CHECK: %[[REDUCE_0:.*]] = vector.multi_reduction , %[[CAST]], %[[CST_0]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x8x16xf16> to vector<8x16xf16> +// CHECK: %[[REDUCE_0:.*]] = vector.multi_reduction , %[[CAST]], %[[CST_0]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<1x8x16xf16> to vector<8x16xf16> // CHECK: %[[REDUCE_1:.*]] = vector.multi_reduction , %[[REDUCE_0]], %[[CST_1]] {layout_result_0 = #xegpu.slice<#xegpu.layout, dims = [0]>} [0] : vector<8x16xf16> to vector<16xf16> -func.func @vector_shape_cast_expand_non_unit_dims(%arg0: memref<1024xf16>, %arg1: memref<16xf16>) { - %cst = arith.constant dense : vector<1024xi1> - %0 = vector.step : vector<1024xindex> - %1 = xegpu.load %arg0[%0], %cst : memref<1024xf16>, vector<1024xindex>, vector<1024xi1> -> vector<1024xf16> - %2 = vector.shape_cast %1 : vector<1024xf16> to vector<8x8x16xf16> +func.func @vector_shape_cast_expand_non_unit_dims(%arg0: memref<128xf16>, %arg1: memref<16xf16>) { + %cst = arith.constant dense : vector<128xi1> + %0 = vector.step : vector<128xindex> + %1 = xegpu.load %arg0[%0], %cst : memref<128xf16>, vector<128xindex>, vector<128xi1> -> vector<128xf16> + %2 = vector.shape_cast %1 : vector<128xf16> to vector<1x8x16xf16> %cst_0 = arith.constant dense<0.000000e+00> : vector<8x16xf16> %cst_1 = arith.constant dense<0.000000e+00> : vector<16xf16> - %3 = vector.multi_reduction , %2, %cst_0 [0] : vector<8x8x16xf16> to vector<8x16xf16> + %3 = vector.multi_reduction , %2, %cst_0 [0] : vector<1x8x16xf16> to vector<8x16xf16> %4 = vector.multi_reduction , %3, %cst_1 [0] : vector<8x16xf16> to vector<16xf16> %cst_2 = arith.constant dense : vector<16xi1> %cst_3 = arith.constant dense<1> : vector<16xindex> @@ -983,40 +977,40 @@ gpu.module @test{ // ----- gpu.module @test { -// CHECK-LABEL: func.func @dpas_mx_f8e5m2 -// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x32xf8E5M2>, %[[ARG1:[0-9a-zA-Z]+]]: memref<32x16xf8E5M2>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xbf16> -// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<8x1xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<1x16xf8E8M0FNU> +// CHECK-LABEL: func.func @dpas_mx_fp4 +// CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<8x64xf4E2M1FN>, %[[ARG1:[0-9a-zA-Z]+]]: memref<64x16xf4E2M1FN>, %[[ARG2:[0-9a-zA-Z]+]]: memref<8x16xbf16> +// CHECK-SAME: %[[ARG3:[0-9a-zA-Z]+]]: memref<8x2xf8E8M0FNU>, %[[ARG4:[0-9a-zA-Z]+]]: memref<2x16xf8E8M0FNU> // CHECK: %[[CST:.*]] = arith.constant {layout_result_0 = #xegpu.layout} dense<0.000000e+00> : vector<8x16xbf16> -// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x32xf8E5M2> -> !xegpu.tensor_desc<8x32xf8E5M2, #xegpu.layout> -// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<32x16xf8E5M2> -> !xegpu.tensor_desc<32x16xf8E5M2, #xegpu.layout> -// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<8x32xf8E5M2, #xegpu.layout> -> vector<8x32xf8E5M2> -// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<32x16xf8E5M2, #xegpu.layout> -> vector<32x16xf8E5M2> -// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x1xf8E8M0FNU> -> !xegpu.tensor_desc<8x1xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T0:.*]] = xegpu.create_nd_tdesc %[[ARG0]] : memref<8x64xf4E2M1FN> -> !xegpu.tensor_desc<8x64xf4E2M1FN, #xegpu.layout> +// CHECK: %[[T1:.*]] = xegpu.create_nd_tdesc %[[ARG1]] : memref<64x16xf4E2M1FN> -> !xegpu.tensor_desc<64x16xf4E2M1FN, #xegpu.layout> +// CHECK: %[[T2:.*]] = xegpu.load_nd %[[T0]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x64xf4E2M1FN, #xegpu.layout> -> vector<8x64xf4E2M1FN> +// CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<64x16xf4E2M1FN, #xegpu.layout> -> vector<64x16xf4E2M1FN> +// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x2xf8E8M0FNU> -> !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> // CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<8x1xf8E8M0FNU, #xegpu.layout> -> vector<8x1xf8E8M0FNU> -// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<1x16xf8E8M0FNU> -> !xegpu.tensor_desc<1x16xf8E8M0FNU, #xegpu.layout> +// CHECK-SAME: !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> -> vector<8x2xf8E8M0FNU> +// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<2x16xf8E8M0FNU> -> !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> // CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<1x16xf8E8M0FNU, #xegpu.layout> -> vector<1x16xf8E8M0FNU> +// CHECK-SAME: !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> -> vector<2x16xf8E8M0FNU> // CHECK: %[[T8:.*]] = xegpu.dpas_mx %[[T2]], %[[T3]], %[[CST]] scale_a = %[[T5]] scale_b = %[[T7]] -// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : -// CHECK-SAME: (vector<8x32xf8E5M2>, vector<32x16xf8E5M2>, vector<8x16xbf16>, vector<8x1xf8E8M0FNU>, vector<1x16xf8E8M0FNU>) -> vector<8x16xbf16> +// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : +// CHECK-SAME: (vector<8x64xf4E2M1FN>, vector<64x16xf4E2M1FN>, vector<8x16xbf16>, vector<8x2xf8E8M0FNU>, vector<2x16xf8E8M0FNU>) -> vector<8x16xbf16> // CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xbf16> -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout> // CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<8x16xbf16>, !xegpu.tensor_desc<8x16xbf16, #xegpu.layout> -func.func @dpas_mx_f8e5m2(%arg0: memref<8x32xf8E5M2>, %arg1: memref<32x16xf8E5M2>, %arg2: memref<8x16xbf16>, - %arg3: memref<8x1xf8E8M0FNU>, %arg4: memref<1x16xf8E8M0FNU>) { +func.func @dpas_mx_fp4(%arg0: memref<8x64xf4E2M1FN>, %arg1: memref<64x16xf4E2M1FN>, %arg2: memref<8x16xbf16>, + %arg3: memref<8x2xf8E8M0FNU>, %arg4: memref<2x16xf8E8M0FNU>) { %c0 = arith.constant 0 : index %cst = arith.constant dense<0.000000e+00> : vector<8x16xbf16> - %0 = xegpu.create_nd_tdesc %arg0 : memref<8x32xf8E5M2> -> !xegpu.tensor_desc<8x32xf8E5M2> - %1 = xegpu.create_nd_tdesc %arg1 : memref<32x16xf8E5M2> -> !xegpu.tensor_desc<32x16xf8E5M2> - %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<8x32xf8E5M2> -> vector<8x32xf8E5M2> - %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<32x16xf8E5M2> -> vector<32x16xf8E5M2> - %4 = xegpu.create_nd_tdesc %arg3 : memref<8x1xf8E8M0FNU> -> !xegpu.tensor_desc<8x1xf8E8M0FNU> - %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<8x1xf8E8M0FNU> -> vector<8x1xf8E8M0FNU> - %6 = xegpu.create_nd_tdesc %arg4 : memref<1x16xf8E8M0FNU> -> !xegpu.tensor_desc<1x16xf8E8M0FNU> - %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<1x16xf8E8M0FNU> -> vector<1x16xf8E8M0FNU> - %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<8x32xf8E5M2>, vector<32x16xf8E5M2>, vector<8x16xbf16>, vector<8x1xf8E8M0FNU>, vector<1x16xf8E8M0FNU>) -> vector<8x16xbf16> + %0 = xegpu.create_nd_tdesc %arg0 : memref<8x64xf4E2M1FN> -> !xegpu.tensor_desc<8x64xf4E2M1FN> + %1 = xegpu.create_nd_tdesc %arg1 : memref<64x16xf4E2M1FN> -> !xegpu.tensor_desc<64x16xf4E2M1FN> + %2 = xegpu.load_nd %0[0, 0] : !xegpu.tensor_desc<8x64xf4E2M1FN> -> vector<8x64xf4E2M1FN> + %3 = xegpu.load_nd %1[0, 0] : !xegpu.tensor_desc<64x16xf4E2M1FN> -> vector<64x16xf4E2M1FN> + %4 = xegpu.create_nd_tdesc %arg3 : memref<8x2xf8E8M0FNU> -> !xegpu.tensor_desc<8x2xf8E8M0FNU> + %5 = xegpu.load_nd %4[0, 0] : !xegpu.tensor_desc<8x2xf8E8M0FNU> -> vector<8x2xf8E8M0FNU> + %6 = xegpu.create_nd_tdesc %arg4 : memref<2x16xf8E8M0FNU> -> !xegpu.tensor_desc<2x16xf8E8M0FNU> + %7 = xegpu.load_nd %6[0, 0] : !xegpu.tensor_desc<2x16xf8E8M0FNU> -> vector<2x16xf8E8M0FNU> + %8 = xegpu.dpas_mx %2, %3, %cst scale_a = %5 scale_b = %7 : (vector<8x64xf4E2M1FN>, vector<64x16xf4E2M1FN>, vector<8x16xbf16>, vector<8x2xf8E8M0FNU>, vector<2x16xf8E8M0FNU>) -> vector<8x16xbf16> %9 = xegpu.create_nd_tdesc %arg2 : memref<8x16xbf16> -> !xegpu.tensor_desc<8x16xbf16> xegpu.store_nd %8, %9[0, 0] : vector<8x16xbf16>, !xegpu.tensor_desc<8x16xbf16> return @@ -1035,14 +1029,14 @@ gpu.module @test { // CHECK-SAME: !xegpu.tensor_desc<8x64xf4E2M1FN, #xegpu.layout> -> vector<8x64xf4E2M1FN> // CHECK: %[[T3:.*]] = xegpu.load_nd %[[T1]][0, 0] <{layout = #xegpu.layout}> : // CHECK-SAME: !xegpu.tensor_desc<64x16xf4E2M1FN, #xegpu.layout> -> vector<64x16xf4E2M1FN> -// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x2xf8E8M0FNU> -> !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> -> vector<8x2xf8E8M0FNU> -// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<2x16xf8E8M0FNU> -> !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> -// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : -// CHECK-SAME: !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> -> vector<2x16xf8E8M0FNU> +// CHECK: %[[T4:.*]] = xegpu.create_nd_tdesc %[[ARG3]] : memref<8x2xf8E8M0FNU> -> !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T5:.*]] = xegpu.load_nd %[[T4]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<8x2xf8E8M0FNU, #xegpu.layout> -> vector<8x2xf8E8M0FNU> +// CHECK: %[[T6:.*]] = xegpu.create_nd_tdesc %[[ARG4]] : memref<2x16xf8E8M0FNU> -> !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> +// CHECK: %[[T7:.*]] = xegpu.load_nd %[[T6]][0, 0] <{layout = #xegpu.layout}> : +// CHECK-SAME: !xegpu.tensor_desc<2x16xf8E8M0FNU, #xegpu.layout> -> vector<2x16xf8E8M0FNU> // CHECK: %[[T8:.*]] = xegpu.dpas_mx %[[T2]], %[[T3]], %[[CST]] scale_a = %[[T5]] scale_b = %[[T7]] -// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : +// CHECK-SAME: {layout_a = #xegpu.layout, layout_a_scale = #xegpu.layout, layout_b = #xegpu.layout, layout_b_scale = #xegpu.layout, layout_cd = #xegpu.layout} : // CHECK-SAME: (vector<8x64xf4E2M1FN>, vector<64x16xf4E2M1FN>, vector<8x16xbf16>, vector<8x2xf8E8M0FNU>, vector<2x16xf8E8M0FNU>) -> vector<8x16xbf16> // CHECK: %[[T9:.*]] = xegpu.create_nd_tdesc %[[ARG2]] : memref<8x16xbf16> -> !xegpu.tensor_desc<8x16xbf16, #xegpu.layout> // CHECK: xegpu.store_nd %[[T8]], %[[T9]][0, 0] <{layout = #xegpu.layout}> : vector<8x16xbf16>, !xegpu.tensor_desc<8x16xbf16, #xegpu.layout> diff --git a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir index 40e907be6e4a1..55d0e64bb2c65 100644 --- a/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir +++ b/mlir/test/Dialect/XeGPU/resolve-layout-conflicts.mlir @@ -254,8 +254,7 @@ func.func @conflict_postop() { } // CHECK-LABEL: func.func @convert_layout -// CHECK: %[[V0:.*]] = xegpu.convert_layout %[[CST:.*]] <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> : vector<32x128xf32> -// CHECK: %[[V1:.*]] = xegpu.convert_layout %[[V0]] <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> : vector<32x128xf32> +// CHECK: %[[V1:.*]] = xegpu.convert_layout %[[V0:.*]] <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> : vector<32x128xf32> func.func @convert_layout() { %src0 = arith.constant {layout_result_0 = #xegpu.layout} @@ -354,10 +353,7 @@ func.func @extract_source_conflict_with_order() -> vector<16x32xf16> { // CHECK-LABEL: func.func @convert_layout_bridge_input_mismatch // CHECK: %[[V0:.*]] = "some_op"() {layout_result_0 = #xegpu.layout} : () -> vector<32x32xf16> // CHECK-NEXT: %[[BRIDGE:.*]] = xegpu.convert_layout %[[V0]] -// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> -// CHECK-SAME: : vector<32x32xf16> -// CHECK-NEXT: %[[CVT:.*]] = xegpu.convert_layout %[[BRIDGE]] -// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> +// CHECK-SAME: <{input_layout = #xegpu.layout, target_layout = #xegpu.layout}> // CHECK-SAME: : vector<32x32xf16> gpu.module @test_convert_layout_bridge { func.func @convert_layout_bridge_input_mismatch() { From 25df6129da64f38fc2ff62579402d29ffc270556 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Tue, 23 Jun 2026 13:05:56 -0700 Subject: [PATCH 250/511] [MemoryBuiltins][NFC] Clang format and fixed coding style (#205205) --- llvm/include/llvm/Analysis/MemoryBuiltins.h | 2 +- llvm/lib/Analysis/MemoryBuiltins.cpp | 150 ++++++++++---------- 2 files changed, 76 insertions(+), 76 deletions(-) diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h index 10a31973be7fa..a42d662146563 100644 --- a/llvm/include/llvm/Analysis/MemoryBuiltins.h +++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h @@ -309,7 +309,7 @@ class ObjectSizeOffsetVisitor OffsetSpan combineOffsetRange(OffsetSpan LHS, OffsetSpan RHS); OffsetSpan computeImpl(Value *V); OffsetSpan computeValue(Value *V); - bool CheckedZextOrTrunc(APInt &I); + bool checkedZextOrTrunc(APInt &I); }; /// SizeOffsetValue - Used by \p ObjectSizeOffsetEvaluator, which works with diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp index a587f72fa9a18..0364a8bff9e53 100644 --- a/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -56,6 +56,7 @@ static cl::opt ObjectSizeOffsetVisitorMaxVisitInstructions( "look at"), cl::init(100)); +// clang-format off enum AllocType : uint8_t { OpNewLike = 1<<0, // allocates; never returns null MallocLike = 1<<1, // allocates; may return null @@ -75,8 +76,9 @@ enum class MallocFamily { MSVCArrayNew, // new[](unsigned int) VecMalloc, }; +// clang-format on -StringRef mangledNameForMallocFamily(const MallocFamily &Family) { +static StringRef mangledNameForMallocFamily(const MallocFamily &Family) { switch (Family) { case MallocFamily::Malloc: return "malloc"; @@ -113,42 +115,42 @@ struct AllocFnsTy { // FIXME: certain users need more information. E.g., SimplifyLibCalls needs to // know which functions are nounwind, noalias, nocapture parameters, etc. static const std::pair AllocationFnData[] = { - {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int) - {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int, nothrow) - {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t) - {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t, nothrow) - {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long) - {LibFunc_Znwm12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, __hot_cold_t) - {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow) - {LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow, __hot_cold_t) - {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t) - {LibFunc_ZnwmSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, __hot_cold_t) - {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow) - {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow, __hot_cold_t) - {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int) - {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int, nothrow) - {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t) - {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow) - {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long) - {LibFunc_Znam12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, __hot_cold_t) - {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long, nothrow) - {LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, nothrow, __hot_cold_t) - {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t) - {LibFunc_ZnamSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, __hot_cold_t) - {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow) - {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, nothrow, __hot_cold_t) - {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int) - {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int, nothrow) - {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long) - {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long, nothrow) - {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int) - {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int, nothrow) - {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long) - {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long, nothrow) - {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, - {LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, - {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, - {LibFunc_dunder_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_Znwj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int) + {LibFunc_ZnwjRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned int, nothrow) + {LibFunc_ZnwjSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t) + {LibFunc_ZnwjSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned int, align_val_t, nothrow) + {LibFunc_Znwm, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long) + {LibFunc_Znwm12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, __hot_cold_t) + {LibFunc_ZnwmRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow) + {LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new(unsigned long, nothrow, __hot_cold_t) + {LibFunc_ZnwmSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t) + {LibFunc_ZnwmSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, __hot_cold_t) + {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow) + {LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new(unsigned long, align_val_t, nothrow, __hot_cold_t) + {LibFunc_Znaj, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int) + {LibFunc_ZnajRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned int, nothrow) + {LibFunc_ZnajSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t) + {LibFunc_ZnajSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned int, align_val_t, nothrow) + {LibFunc_Znam, {OpNewLike, 1, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long) + {LibFunc_Znam12__hot_cold_t, {OpNewLike, 2, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, __hot_cold_t) + {LibFunc_ZnamRKSt9nothrow_t, {MallocLike, 2, 0, -1, -1, MallocFamily::CPPNewArray}}, // new[](unsigned long, nothrow) + {LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t, {MallocLike, 3, 0, -1, -1, MallocFamily::CPPNew}}, // new[](unsigned long, nothrow, __hot_cold_t) + {LibFunc_ZnamSt11align_val_t, {OpNewLike, 2, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t) + {LibFunc_ZnamSt11align_val_t12__hot_cold_t, {OpNewLike, 3, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, __hot_cold_t) + {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t, {MallocLike, 3, 0, -1, 1, MallocFamily::CPPNewArrayAligned}}, // new[](unsigned long, align_val_t, nothrow) + {LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t, {MallocLike, 4, 0, -1, 1, MallocFamily::CPPNewAligned}}, // new[](unsigned long, align_val_t, nothrow, __hot_cold_t) + {LibFunc_msvc_new_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int) + {LibFunc_msvc_new_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned int, nothrow) + {LibFunc_msvc_new_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long) + {LibFunc_msvc_new_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCNew}}, // new(unsigned long long, nothrow) + {LibFunc_msvc_new_array_int, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int) + {LibFunc_msvc_new_array_int_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned int, nothrow) + {LibFunc_msvc_new_array_longlong, {OpNewLike, 1, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long) + {LibFunc_msvc_new_array_longlong_nothrow, {MallocLike, 2, 0, -1, -1, MallocFamily::MSVCArrayNew}}, // new[](unsigned long long, nothrow) + {LibFunc_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_dunder_strdup, {StrDupLike, 1, -1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, + {LibFunc_dunder_strndup, {StrDupLike, 2, 1, -1, -1, MallocFamily::Malloc}}, }; // clang-format on @@ -182,10 +184,10 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, if (!TLI || !TLI->getLibFunc(*Callee, TLIFn) || !TLI->has(TLIFn)) return std::nullopt; - const auto *Iter = find_if( - AllocationFnData, [TLIFn](const std::pair &P) { - return P.first == TLIFn; - }); + const auto *Iter = find_if(AllocationFnData, + [TLIFn](const std::pair &P) { + return P.first == TLIFn; + }); if (Iter == std::end(AllocationFnData)) return std::nullopt; @@ -201,11 +203,9 @@ getAllocationDataForFunction(const Function *Callee, AllocType AllocTy, if (FTy->getReturnType()->isPointerTy() && FTy->getNumParams() == FnData->NumParams && - (FstParam < 0 || - (FTy->getParamType(FstParam)->isIntegerTy(32) || - FTy->getParamType(FstParam)->isIntegerTy(64))) && - (SndParam < 0 || - FTy->getParamType(SndParam)->isIntegerTy(32) || + (FstParam < 0 || (FTy->getParamType(FstParam)->isIntegerTy(32) || + FTy->getParamType(FstParam)->isIntegerTy(64))) && + (SndParam < 0 || FTy->getParamType(SndParam)->isIntegerTy(32) || FTy->getParamType(SndParam)->isIntegerTy(64))) return *FnData; return std::nullopt; @@ -293,7 +293,8 @@ bool llvm::isAllocationFn( /// Tests if a value is a call or invoke to a library function that /// allocates memory similar to malloc or calloc. -bool llvm::isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) { +bool llvm::isMallocOrCallocLikeFn(const Value *V, + const TargetLibraryInfo *TLI) { // TODO: Function behavior does not match name. return getAllocationData(V, MallocOrOpNewLike, TLI).has_value(); } @@ -342,7 +343,7 @@ Value *llvm::getAllocAlignment(const CallBase *V, /// trouble with APInt size issues. This function handles resizing + overflow /// checks for us. Check and zext or trunc \p I depending on IntTyBits and /// I's value. -static bool CheckedZextOrTrunc(APInt &I, unsigned IntTyBits) { +static bool checkedZextOrTrunc(APInt &I, unsigned IntTyBits) { // More bits than we can handle. Checking the bit width isn't necessary, but // it's faster than checking active bits, and should give `false` in the // vast majority of cases. @@ -376,7 +377,7 @@ llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, // Strndup limits strlen. if (FnData->FstParam > 0) { const ConstantInt *Arg = - dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); + dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); if (!Arg) return std::nullopt; @@ -388,12 +389,12 @@ llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, } const ConstantInt *Arg = - dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); + dyn_cast(Mapper(CB->getArgOperand(FnData->FstParam))); if (!Arg) return std::nullopt; APInt Size = Arg->getValue(); - if (!CheckedZextOrTrunc(Size, IntTyBits)) + if (!checkedZextOrTrunc(Size, IntTyBits)) return std::nullopt; // Size is determined by just 1 parameter. @@ -405,7 +406,7 @@ llvm::getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, return std::nullopt; APInt NumElems = Arg->getValue(); - if (!CheckedZextOrTrunc(NumElems, IntTyBits)) + if (!checkedZextOrTrunc(NumElems, IntTyBits)) return std::nullopt; bool Overflow; @@ -477,8 +478,8 @@ static const std::pair FreeFnData[] = { }; // clang-format on -std::optional getFreeFunctionDataForFunction(const Function *Callee, - const LibFunc TLIFn) { +static std::optional +getFreeFunctionDataForFunction(const Function *Callee, const LibFunc TLIFn) { const auto *Iter = find_if(FreeFnData, [TLIFn](const std::pair &P) { return P.first == TLIFn; @@ -664,10 +665,11 @@ Value *llvm::lowerObjectSizeCall( auto *ResultType = cast(ObjectSize->getType()); bool StaticOnly = cast(ObjectSize->getArgOperand(3))->isZero(); if (StaticOnly) { - // FIXME: Does it make sense to just return a failure value if the size won't - // fit in the output and `!MustSucceed`? + // FIXME: Does it make sense to just return a failure value if the size + // won't fit in the output and `!MustSucceed`? uint64_t Size; - if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, EvalOptions) && + if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, + EvalOptions) && isUIntN(ResultType->getBitWidth(), Size)) return ConstantInt::get(ResultType, Size); } else { @@ -723,15 +725,14 @@ combinePossibleConstantValues(std::optional LHS, return std::nullopt; if (EvalMode == ObjectSizeOpts::Mode::Max) return LHS->sge(*RHS) ? *LHS : *RHS; - else - return LHS->sle(*RHS) ? *LHS : *RHS; + return LHS->sle(*RHS) ? *LHS : *RHS; } static std::optional aggregatePossibleConstantValuesImpl( const Value *V, ObjectSizeOpts::Mode EvalMode, unsigned BitWidth, - unsigned recursionDepth) { - constexpr unsigned maxRecursionDepth = 4; - if (recursionDepth == maxRecursionDepth) + unsigned RecursionDepth) { + constexpr unsigned MaxRecursionDepth = 4; + if (RecursionDepth == MaxRecursionDepth) return std::nullopt; if (const auto *CI = dyn_cast(V)) { @@ -739,19 +740,19 @@ static std::optional aggregatePossibleConstantValuesImpl( } else if (const auto *SI = dyn_cast(V)) { return combinePossibleConstantValues( aggregatePossibleConstantValuesImpl(SI->getTrueValue(), EvalMode, - BitWidth, recursionDepth + 1), + BitWidth, RecursionDepth + 1), aggregatePossibleConstantValuesImpl(SI->getFalseValue(), EvalMode, - BitWidth, recursionDepth + 1), + BitWidth, RecursionDepth + 1), EvalMode); } else if (const auto *PN = dyn_cast(V)) { unsigned Count = PN->getNumIncomingValues(); if (Count == 0) return std::nullopt; auto Acc = aggregatePossibleConstantValuesImpl( - PN->getIncomingValue(0), EvalMode, BitWidth, recursionDepth + 1); + PN->getIncomingValue(0), EvalMode, BitWidth, RecursionDepth + 1); for (unsigned I = 1; Acc && I < Count; ++I) { auto Tmp = aggregatePossibleConstantValuesImpl( - PN->getIncomingValue(I), EvalMode, BitWidth, recursionDepth + 1); + PN->getIncomingValue(I), EvalMode, BitWidth, RecursionDepth + 1); Acc = combinePossibleConstantValues(Acc, Tmp, EvalMode); } return Acc; @@ -869,9 +870,9 @@ OffsetSpan ObjectSizeOffsetVisitor::computeImpl(Value *V) { // the argument index type size and apply the offset, as required. if (IndexTypeSizeChanged) { if (ORT.knownBefore() && - !::CheckedZextOrTrunc(ORT.Before, InitialIntTyBits)) + !::checkedZextOrTrunc(ORT.Before, InitialIntTyBits)) ORT.Before = APInt(); - if (ORT.knownAfter() && !::CheckedZextOrTrunc(ORT.After, InitialIntTyBits)) + if (ORT.knownAfter() && !::checkedZextOrTrunc(ORT.After, InitialIntTyBits)) ORT.After = APInt(); } // If the computed bound is "unknown" we cannot add the stripped offset. @@ -936,8 +937,8 @@ OffsetSpan ObjectSizeOffsetVisitor::computeValue(Value *V) { return ObjectSizeOffsetVisitor::unknown(); } -bool ObjectSizeOffsetVisitor::CheckedZextOrTrunc(APInt &I) { - return ::CheckedZextOrTrunc(I, IntTyBits); +bool ObjectSizeOffsetVisitor::checkedZextOrTrunc(APInt &I) { + return ::checkedZextOrTrunc(I, IntTyBits); } OffsetSpan ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { @@ -956,7 +957,7 @@ OffsetSpan ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { ArraySize, Options.EvalMode, ArraySize->getType()->getScalarSizeInBits())) { APInt NumElems = *PossibleSize; - if (!CheckedZextOrTrunc(NumElems)) + if (!checkedZextOrTrunc(NumElems)) return ObjectSizeOffsetVisitor::unknown(); bool Overflow; @@ -971,7 +972,7 @@ OffsetSpan ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { OffsetSpan ObjectSizeOffsetVisitor::visitArgument(Argument &A) { Type *MemoryTy = A.getPointeeInMemoryValueType(); // No interprocedural analysis is done at the moment. - if (!MemoryTy|| !MemoryTy->isSized()) { + if (!MemoryTy || !MemoryTy->isSized()) { ++ObjectVisitorArgument; return ObjectSizeOffsetVisitor::unknown(); } @@ -1310,8 +1311,7 @@ SizeOffsetValue ObjectSizeOffsetEvaluator::compute_(Value *V) { } else if (isa(V) || (isa(V) && cast(V)->getOpcode() == Instruction::IntToPtr) || - isa(V) || - isa(V)) { + isa(V) || isa(V)) { // Ignore values where we cannot do more than ObjectSizeVisitor. Result = ObjectSizeOffsetEvaluator::unknown(); } else { @@ -1396,7 +1396,7 @@ SizeOffsetValue ObjectSizeOffsetEvaluator::visitLoadInst(LoadInst &LI) { SizeOffsetValue ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) { // Create 2 PHIs: one for size and another for offset. - PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); + PHINode *SizePHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); PHINode *OffsetPHI = Builder.CreatePHI(IntTy, PHI.getNumIncomingValues()); // Insert right away in the cache to handle recursive PHIs. From 77275b6381d5fef8bded729fc646c2d1739ef59f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 23 Jun 2026 13:06:08 -0700 Subject: [PATCH 251/511] [RISCV] Remove assembly string from PseudoLA_TLSDESC. NFC (#205406) --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 90a3fd7bf3de9..2abd63202a4bf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1956,8 +1956,7 @@ def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 32 in -def PseudoLA_TLSDESC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], - "la.tlsdesc", "$dst, $src">; +def PseudoLA_TLSDESC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), []>; def TLSDESCCallSymbol : AsmOperandClass { let Name = "TLSDESCCallSymbol"; From cedeac3649026d8deb2b7fccfc10039d435edd3d Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 23 Jun 2026 22:09:32 +0200 Subject: [PATCH 252/511] [LV] Remove unused getInt/Fp/PointerInductionDescriptor accessors (NFC) (#205414) getIntOrFpInductionDescriptor and getPointerInductionDescriptor are unused, remove them. --- .../Vectorize/LoopVectorizationLegality.h | 10 --------- .../Vectorize/LoopVectorizationLegality.cpp | 21 ------------------- 2 files changed, 31 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 0e33540d8baa7..3e8db73fd79d2 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -353,16 +353,6 @@ class LoopVectorizationLegality { /// Returns True if V is a Phi node of an induction variable in this loop. LLVM_ABI bool isInductionPhi(const Value *V) const; - /// Returns a pointer to the induction descriptor, if \p Phi is an integer or - /// floating point induction. - LLVM_ABI const InductionDescriptor * - getIntOrFpInductionDescriptor(PHINode *Phi) const; - - /// Returns a pointer to the induction descriptor, if \p Phi is pointer - /// induction. - LLVM_ABI const InductionDescriptor * - getPointerInductionDescriptor(PHINode *Phi) const; - /// Returns True if V is a cast that is part of an induction def-use chain, /// and had been proven to be redundant under a runtime guard (in other /// words, the cast has the same SCEV expression as the induction phi). diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 6813930c76a6d..5a4a419fbc80c 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1325,27 +1325,6 @@ bool LoopVectorizationLegality::isInductionPhi(const Value *V) const { return Inductions.count(PN); } -const InductionDescriptor * -LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const { - if (!isInductionPhi(Phi)) - return nullptr; - auto &ID = getInductionVars().find(Phi)->second; - if (ID.getKind() == InductionDescriptor::IK_IntInduction || - ID.getKind() == InductionDescriptor::IK_FpInduction) - return &ID; - return nullptr; -} - -const InductionDescriptor * -LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const { - if (!isInductionPhi(Phi)) - return nullptr; - auto &ID = getInductionVars().find(Phi)->second; - if (ID.getKind() == InductionDescriptor::IK_PtrInduction) - return &ID; - return nullptr; -} - bool LoopVectorizationLegality::isCastedInductionVariable( const Value *V) const { auto *Inst = dyn_cast(V); From 282f6b0b1075e36153e4901a48c9e9111c36f68a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 20:12:32 +0000 Subject: [PATCH 253/511] [PSI] Drop AllowSynthetic parameter to getProfileCount This was not set anywhere and synthetic profile counts are not emitted/used anywhere, so remove it. Reviewers: david-xl, mtrofin, teresajohnson Pull Request: https://github.com/llvm/llvm-project/pull/204765 --- llvm/include/llvm/Analysis/ProfileSummaryInfo.h | 3 +-- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 96af93dfc9cf4..7038b4efebf2e 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -102,8 +102,7 @@ class ProfileSummaryInfo { /// Returns the profile count for \p CallInst. LLVM_ABI std::optional - getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI, - bool AllowSynthetic = false) const; + getProfileCount(const CallBase &CallInst, BlockFrequencyInfo *BFI) const; /// Returns true if module \c M has partial-profile sample profile. LLVM_ABI bool hasPartialSampleProfile() const; /// Returns true if the working set size of the code is considered huge. diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 44d7a175cc7fe..59726e8ecdab1 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -75,8 +75,9 @@ void ProfileSummaryInfo::refresh(std::unique_ptr &&Other) { computeThresholds(); } -std::optional ProfileSummaryInfo::getProfileCount( - const CallBase &Call, BlockFrequencyInfo *BFI, bool AllowSynthetic) const { +std::optional +ProfileSummaryInfo::getProfileCount(const CallBase &Call, + BlockFrequencyInfo *BFI) const { assert((isa(Call) || isa(Call)) && "We can only get profile count for call/invoke instruction."); if (hasSampleProfile()) { @@ -90,7 +91,7 @@ std::optional ProfileSummaryInfo::getProfileCount( return std::nullopt; } if (BFI) - return BFI->getBlockProfileCount(Call.getParent(), AllowSynthetic); + return BFI->getBlockProfileCount(Call.getParent()); return std::nullopt; } From d40066b931578e17d08d8a38d3a4ac0bcd1f3eb9 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 20:13:58 +0000 Subject: [PATCH 254/511] [BFI] Drop AllowSynthetic Parameter This was never set anywhere to something other than the default outside of the implementation and synthetic profile counts are slated for removal. Reviewers: teresajohnson, mtrofin, david-xl Pull Request: https://github.com/llvm/llvm-project/pull/204767 --- .../llvm/Analysis/BlockFrequencyInfoImpl.h | 26 +++++++------------ llvm/lib/Analysis/BlockFrequencyInfo.cpp | 2 +- llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 12 ++++----- 3 files changed, 17 insertions(+), 23 deletions(-) diff --git a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h index c21eeb56c632e..5db6efe9136ef 100644 --- a/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h +++ b/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h @@ -521,12 +521,10 @@ class LLVM_ABI BlockFrequencyInfoImplBase { Scaled64 getFloatingBlockFreq(const BlockNode &Node) const; BlockFrequency getBlockFreq(const BlockNode &Node) const; - std::optional - getBlockProfileCount(const Function &F, const BlockNode &Node, - bool AllowSynthetic = false) const; - std::optional - getProfileCountFromFreq(const Function &F, BlockFrequency Freq, - bool AllowSynthetic = false) const; + std::optional getBlockProfileCount(const Function &F, + const BlockNode &Node) const; + std::optional getProfileCountFromFreq(const Function &F, + BlockFrequency Freq) const; bool isIrrLoopHeader(const BlockNode &Node); void setBlockFreq(const BlockNode &Node, BlockFrequency Freq); @@ -1000,18 +998,14 @@ template class BlockFrequencyInfoImpl : BlockFrequencyInfoImplBase { return BlockFrequencyInfoImplBase::getBlockFreq(getNode(BB)); } - std::optional - getBlockProfileCount(const Function &F, const BlockT *BB, - bool AllowSynthetic = false) const { - return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB), - AllowSynthetic); + std::optional getBlockProfileCount(const Function &F, + const BlockT *BB) const { + return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB)); } - std::optional - getProfileCountFromFreq(const Function &F, BlockFrequency Freq, - bool AllowSynthetic = false) const { - return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq, - AllowSynthetic); + std::optional getProfileCountFromFreq(const Function &F, + BlockFrequency Freq) const { + return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq); } bool isIrrLoopHeader(const BlockT *BB) { diff --git a/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/llvm/lib/Analysis/BlockFrequencyInfo.cpp index df99ec299a409..9a2bd2c91d11a 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -207,7 +207,7 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB, if (!BFI) return std::nullopt; - return BFI->getBlockProfileCount(*getFunction(), BB, AllowSynthetic); + return BFI->getBlockProfileCount(*getFunction(), BB); } std::optional diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 9f6e53ba15b6a..36af4f146ca42 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -582,14 +582,14 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { std::optional BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F, - const BlockNode &Node, - bool AllowSynthetic) const { - return getProfileCountFromFreq(F, getBlockFreq(Node), AllowSynthetic); + const BlockNode &Node) const { + return getProfileCountFromFreq(F, getBlockFreq(Node)); } -std::optional BlockFrequencyInfoImplBase::getProfileCountFromFreq( - const Function &F, BlockFrequency Freq, bool AllowSynthetic) const { - auto EntryCount = F.getEntryCount(AllowSynthetic); +std::optional +BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, + BlockFrequency Freq) const { + auto EntryCount = F.getEntryCount(); if (!EntryCount) return std::nullopt; // Use 128 bit APInt to do the arithmetic to avoid overflow. From a782f8b4b8a81e7f9e89eb01ca13eb5b9be669a3 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 20:15:20 +0000 Subject: [PATCH 255/511] [IR] Remove Synthetic Profile Support from Function Synthetic profiles are not generated anywhere and support is very sporadic across the code base. They are slated to be removed, so remove support for them from Function member functions. A future PR will clean up the ProfileCount abstraction that is now no longer necessary. Reviewers: teresajohnson, david-xl, mtrofin Pull Request: https://github.com/llvm/llvm-project/pull/204768 --- llvm/include/llvm/IR/Function.h | 10 +++---- llvm/lib/IR/Function.cpp | 26 +++++++------------ .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 4 +-- llvm/lib/Transforms/Utils/ProfileVerify.cpp | 6 ++--- llvm/unittests/IR/MetadataTest.cpp | 10 ------- 5 files changed, 18 insertions(+), 38 deletions(-) diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index f39fe509a49a4..0b48e088c3db2 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -325,17 +325,13 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { /// Get the entry count for this function. /// /// Entry count is the number of times the function was executed. - /// When AllowSynthetic is false, only pgo_data will be returned. - std::optional getEntryCount(bool AllowSynthetic = false) const; + std::optional getEntryCount() const; /// Return true if the function is annotated with profile data. /// /// Presence of entry counts from a profile run implies the function has - /// profile annotations. If IncludeSynthetic is false, only return true - /// when the profile data is real. - bool hasProfileData(bool IncludeSynthetic = false) const { - return getEntryCount(IncludeSynthetic).has_value(); - } + /// profile annotations. + bool hasProfileData() const { return getEntryCount().has_value(); } /// Returns the set of GUIDs that needs to be imported to the function for /// sample PGO, to enable the same inlines as the profiled optimized binary. diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 0cf23f0ddda7c..ff5a11fbb08cd 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -1101,25 +1101,19 @@ void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type, setEntryCount(ProfileCount(Count, Type), Imports); } -std::optional Function::getEntryCount(bool AllowSynthetic) const { +std::optional Function::getEntryCount() const { MDNode *MD = getMetadata(LLVMContext::MD_prof); if (MD && MD->getOperand(0)) if (MDString *MDS = dyn_cast(MD->getOperand(0))) { - if (MDS->getString() == MDProfLabels::FunctionEntryCount) { - ConstantInt *CI = mdconst::extract(MD->getOperand(1)); - uint64_t Count = CI->getValue().getZExtValue(); - // A value of -1 is used for SamplePGO when there were no samples. - // Treat this the same as unknown. - if (Count == (uint64_t)-1) - return std::nullopt; - return ProfileCount(Count, PCT_Real); - } else if (AllowSynthetic && - MDS->getString() == - MDProfLabels::SyntheticFunctionEntryCount) { - ConstantInt *CI = mdconst::extract(MD->getOperand(1)); - uint64_t Count = CI->getValue().getZExtValue(); - return ProfileCount(Count, PCT_Synthetic); - } + if (MDS->getString() != MDProfLabels::FunctionEntryCount) + return std::nullopt; + ConstantInt *CI = mdconst::extract(MD->getOperand(1)); + uint64_t Count = CI->getValue().getZExtValue(); + // A value of -1 is used for SamplePGO when there were no samples. + // Treat this the same as unknown. + if (Count == static_cast(-1)) + return std::nullopt; + return ProfileCount(Count, PCT_Real); } return std::nullopt; } diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index e1da7b7cd4e92..c48771506b73f 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1579,7 +1579,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, auto &F = *CB.getCaller(); auto &BFI = FAM.getResult(F); auto EC = BFI.getBlockFreq(&F.getEntryBlock()); - auto CC = F.getEntryCount(/*AllowSynthetic=*/true); + auto CC = F.getEntryCount(); double CallCount = 0.0; if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) { double CallFreq = @@ -1629,7 +1629,7 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, for (auto &P : SlotInfo.ConstCSInfo) Apply(P.second); for (auto &[F, C] : FunctionEntryCounts) { - assert(!F->getEntryCount(/*AllowSynthetic=*/true) && + assert(!F->getEntryCount() && "Unexpected entry count for funnel that was freshly synthesized"); F->setEntryCount(static_cast(std::round(C))); } diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp index 84c5787205db5..70376edd0d32e 100644 --- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp +++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp @@ -116,11 +116,11 @@ bool ProfileInjector::inject() { // as cold (we do want some explicit information in the spirit of what this // verifier wants to achieve - make dropping / corrupting MD_prof // unit-testable) - if (!F.getEntryCount(/*AllowSynthetic=*/true)) + if (!F.getEntryCount()) F.setEntryCount(DefaultFunctionEntryCount); // If there is an entry count that's 0, then don't bother injecting. We won't // verify these either. - if (F.getEntryCount(/*AllowSynthetic=*/true)->getCount() == 0) + if (F.getEntryCount()->getCount() == 0) return false; bool Changed = false; // Cycle through the weights list. If we didn't, tests with more than (say) @@ -241,7 +241,7 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F, if (IgnoreList.contains(&F)) return PreservedAnalyses::all(); - const auto EntryCount = F.getEntryCount(/*AllowSynthetic=*/true); + const auto EntryCount = F.getEntryCount(); if (!EntryCount) { auto *MD = F.getMetadata(LLVMContext::MD_prof); if (!MD || !isExplicitlyUnknownProfileMetadata(*MD)) { diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index 7222c885548e0..3b5978792c849 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -5225,16 +5225,6 @@ TEST_F(FunctionAttachmentTest, RealEntryCount) { EXPECT_EQ(Function::PCT_Real, Count->getType()); } -TEST_F(FunctionAttachmentTest, SyntheticEntryCount) { - Function *F = getFunction("bar"); - EXPECT_FALSE(F->getEntryCount().has_value()); - F->setEntryCount(123, Function::PCT_Synthetic); - auto Count = F->getEntryCount(true /*allow synthetic*/); - EXPECT_TRUE(Count.has_value()); - EXPECT_EQ(123u, Count->getCount()); - EXPECT_EQ(Function::PCT_Synthetic, Count->getType()); -} - TEST_F(FunctionAttachmentTest, SubprogramAttachment) { Function *F = getFunction("foo"); DISubprogram *SP = getSubprogram(); From c7a9e158afe5f72e48217e6a15929ed3e80fcf0d Mon Sep 17 00:00:00 2001 From: Sy Brand Date: Tue, 23 Jun 2026 21:19:55 +0100 Subject: [PATCH 256/511] [WebAssembly] Cooperative threading for WASIP3 (#200855) This PR builds on the changes to allow libcall thread context from https://github.com/llvm/llvm-project/pull/175800/changes and adds the necessary changes to support cooperative multithreading in the WASIP3 target: - Not marking memory as shared - Allowing thread local accesses without atomics - Only using passive segments for TLS segments The linker changes are supported by a new flag called `--cooperative-multithreading`. We talked about having two flags, one for the `--libcall-thread-context` part and one for the cooperative multithreading part. For now, I've simply replaced the `--libcall-thread-context` flag with the `--cooperative-multithreading` one and kept the internal configuration intact for simplicity. --- clang/lib/Basic/Targets/WebAssembly.cpp | 2 +- clang/lib/Basic/Targets/WebAssembly.h | 5 +- clang/lib/Driver/ToolChains/WebAssembly.cpp | 32 +++++-- clang/test/Driver/wasm-toolchain.c | 6 ++ lld/test/wasm/cooperative-threading.s | 85 +++++++++++++++++++ lld/test/wasm/stack-pointer-abi.s | 2 +- lld/test/wasm/thread-context-abi-mismatch.s | 5 +- lld/test/wasm/tls-libcall.s | 2 +- lld/wasm/Config.h | 3 + lld/wasm/Driver.cpp | 27 +++--- lld/wasm/Options.td | 4 +- lld/wasm/Relocations.cpp | 2 +- lld/wasm/SyntheticSections.cpp | 13 +-- lld/wasm/Writer.cpp | 58 ++++++++----- .../WebAssembly/WebAssemblySubtarget.cpp | 7 +- .../Target/WebAssembly/WebAssemblySubtarget.h | 4 + .../WebAssembly/WebAssemblyTargetMachine.cpp | 13 ++- .../WebAssembly/cooperative-strip-tls.ll | 25 ++++++ 18 files changed, 231 insertions(+), 64 deletions(-) create mode 100644 lld/test/wasm/cooperative-threading.s create mode 100644 llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll diff --git a/clang/lib/Basic/Targets/WebAssembly.cpp b/clang/lib/Basic/Targets/WebAssembly.cpp index 46f9bd10f01ec..a483e3d6f9b10 100644 --- a/clang/lib/Basic/Targets/WebAssembly.cpp +++ b/clang/lib/Basic/Targets/WebAssembly.cpp @@ -424,7 +424,7 @@ void WebAssemblyTargetInfo::adjust(DiagnosticsEngine &Diags, LangOptions &Opts, // Turn off POSIXThreads and ThreadModel so that we don't predefine _REENTRANT // or __STDCPP_THREADS__ if we will eventually end up stripping atomics // because they are unsupported. - if (!HasAtomics || !HasBulkMemory) { + if ((!HasCooperativeThreading && !HasAtomics) || !HasBulkMemory) { Opts.POSIXThreads = false; Opts.setThreadModel(LangOptions::ThreadModelKind::Single); Opts.ThreadsafeStatics = false; diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h index 2732cafb3906d..88192a756cc4f 100644 --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -63,6 +63,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { bool HasBulkMemory = false; bool HasBulkMemoryOpt = false; bool HasCallIndirectOverlong = false; + bool HasCooperativeThreading = false; bool HasCompactImports = false; bool HasExceptionHandling = false; bool HasExtendedConst = false; @@ -111,8 +112,10 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyTargetInfo : public TargetInfo { PtrDiffType = SignedLong; IntPtrType = SignedLong; } - if (T.getOS() == llvm::Triple::WASIp3) + if (T.getOS() == llvm::Triple::WASIp3) { HasLibcallThreadContext = true; + HasCooperativeThreading = true; + } } StringRef getABI() const override; diff --git a/clang/lib/Driver/ToolChains/WebAssembly.cpp b/clang/lib/Driver/ToolChains/WebAssembly.cpp index 8ca09b11836e5..7dabefbee7f79 100644 --- a/clang/lib/Driver/ToolChains/WebAssembly.cpp +++ b/clang/lib/Driver/ToolChains/WebAssembly.cpp @@ -85,14 +85,23 @@ static bool WantsPthread(const llvm::Triple &Triple, const ArgList &Args) { if (Triple.isOSWASI() && Triple.getEnvironmentName() == "threads") WantsPthread = true; + // WASIp3 also implies pthreads support + if (Triple.getOS() == llvm::Triple::WASIp3) + WantsPthread = true; + return WantsPthread; } -static bool WantsLibcallThreadContext(const llvm::Triple &Triple, - const ArgList &Args) { +static bool WantsCooperativeMultithreading(const llvm::Triple &Triple, + const ArgList &Args) { return Triple.getOS() == llvm::Triple::WASIp3; } +static bool WantsSharedMemory(const llvm::Triple &Triple, const ArgList &Args) { + return WantsPthread(Triple, Args) && + !WantsCooperativeMultithreading(Triple, Args); +} + void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, const InputInfo &Output, const InputInfoList &Inputs, @@ -174,10 +183,10 @@ void wasm::Linker::ConstructJob(Compilation &C, const JobAction &JA, AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); - if (WantsLibcallThreadContext(ToolChain.getTriple(), Args)) - CmdArgs.push_back("--libcall-thread-context"); + if (WantsCooperativeMultithreading(ToolChain.getTriple(), Args)) + CmdArgs.push_back("--cooperative-threading"); - if (WantsPthread(ToolChain.getTriple(), Args)) + if (WantsSharedMemory(ToolChain.getTriple(), Args)) CmdArgs.push_back("--shared-memory"); if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { @@ -331,9 +340,12 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, options::OPT_fno_use_init_array, true)) CC1Args.push_back("-fno-use-init-array"); - // '-pthread' implies atomics, bulk-memory, mutable-globals, and sign-ext + // '-pthread' implies bulk-memory, mutable-globals, and sign-ext. + // It also implies atomics, so long as we're not targeting a cooperative + // threading environment. if (WantsPthread(getTriple(), DriverArgs)) { - if (DriverArgs.hasFlag(options::OPT_mno_atomics, options::OPT_matomics, + if (!WantsCooperativeMultithreading(getTriple(), DriverArgs) && + DriverArgs.hasFlag(options::OPT_mno_atomics, options::OPT_matomics, false)) getDriver().Diag(diag::err_drv_argument_not_allowed_with) << "-pthread" @@ -353,8 +365,10 @@ void WebAssembly::addClangTargetOptions(const ArgList &DriverArgs, getDriver().Diag(diag::err_drv_argument_not_allowed_with) << "-pthread" << "-mno-sign-ext"; - CC1Args.push_back("-target-feature"); - CC1Args.push_back("+atomics"); + if (!WantsCooperativeMultithreading(getTriple(), DriverArgs)) { + CC1Args.push_back("-target-feature"); + CC1Args.push_back("+atomics"); + } CC1Args.push_back("-target-feature"); CC1Args.push_back("+bulk-memory"); CC1Args.push_back("-target-feature"); diff --git a/clang/test/Driver/wasm-toolchain.c b/clang/test/Driver/wasm-toolchain.c index 29a94aeec77a9..c02a102fab081 100644 --- a/clang/test/Driver/wasm-toolchain.c +++ b/clang/test/Driver/wasm-toolchain.c @@ -303,3 +303,9 @@ // RUN: | FileCheck -check-prefix=LINK_WALI_BASIC %s // LINK_WALI_BASIC: "-cc1" {{.*}} "-o" "[[temp:[^"]*]]" // LINK_WALI_BASIC: wasm-ld{{.*}}" "-L/foo/lib/wasm32-linux-muslwali" "crt1.o" "[[temp]]" "-lc" "{{.*[/\\]}}libclang_rt.builtins.a" "-o" "a.out" + +// Test that `wasm32-wasip3` passes `--cooperative-threading` to the linker. + +// RUN: %clang -### --target=wasm32-wasip3 -fuse-ld=lld %s --sysroot /foo 2>&1 \ +// RUN: | FileCheck -check-prefix=LINK_WASIP3_COOP %s +// LINK_WASIP3_COOP: wasm-ld{{.*}}" {{.*}} "--cooperative-threading" diff --git a/lld/test/wasm/cooperative-threading.s b/lld/test/wasm/cooperative-threading.s new file mode 100644 index 0000000000000..64e392fbc45dd --- /dev/null +++ b/lld/test/wasm/cooperative-threading.s @@ -0,0 +1,85 @@ +# Test that --cooperative-threading uses the libcall ABI naming for +# thread-context globals (__init_stack_pointer, __init_tls_base, etc.) and +# works without --shared-memory and atomics. + +# RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s +# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o +# RUN: obj2yaml %t.wasm | FileCheck %s +# RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS + +# Test that --cooperative-threading and --shared-memory are mutually exclusive. +# RUN: not wasm-ld --cooperative-threading --shared-memory %t.o -o %t2.wasm 2>&1 | FileCheck %s --check-prefix=INCOMPAT +# INCOMPAT: --cooperative-threading is incompatible with --shared-memory + +.globl __wasm_get_tls_base +__wasm_get_tls_base: + .functype __wasm_get_tls_base () -> (i32) + i32.const 0 + end_function + +.globl _start +_start: + .functype _start () -> (i32) + call __wasm_get_tls_base + i32.const tls1@TLSREL + i32.add + i32.load 0 + call __wasm_get_tls_base + i32.const tls2@TLSREL + i32.add + i32.load 0 + i32.add + end_function + +.section .tdata.tls1,"",@ +.globl tls1 +tls1: + .int32 1 + .size tls1, 4 + +.section .tdata.tls2,"",@ +.globl tls2 +tls2: + .int32 2 + .size tls2, 4 + +.section .custom_section.target_features,"",@ + .int8 2 + .int8 43 + .int8 11 + .ascii "bulk-memory" + .int8 43 + .int8 7 + .ascii "atomics" + +# Memory must NOT be marked as shared. +# CHECK: - Type: MEMORY +# CHECK-NEXT: Memories: +# CHECK-NEXT: - Minimum: 0x2 +# CHECK-NOT: Shared + +# Globals should use the libcall ABI naming, not the global ABI. +# CHECK: GlobalNames: +# CHECK-NEXT: - Index: 0 +# CHECK-NEXT: Name: __init_stack_pointer +# CHECK-NEXT: - Index: 1 +# CHECK-NEXT: Name: __init_tls_base +# CHECK-NEXT: - Index: 2 +# CHECK-NEXT: Name: __tls_size +# CHECK-NEXT: - Index: 3 +# CHECK-NEXT: Name: __tls_align + +# DIS-LABEL: <__wasm_init_memory>: + +# DIS-LABEL: <_start>: +# DIS-EMPTY: +# DIS-NEXT: call {{[0-9]+}} +# DIS-NEXT: i32.const 0 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: call {{[0-9]+}} +# DIS-NEXT: i32.const 4 +# DIS-NEXT: i32.add +# DIS-NEXT: i32.load 0 +# DIS-NEXT: i32.add +# DIS-NEXT: end diff --git a/lld/test/wasm/stack-pointer-abi.s b/lld/test/wasm/stack-pointer-abi.s index 869f972710991..fbae0475bcba2 100644 --- a/lld/test/wasm/stack-pointer-abi.s +++ b/lld/test/wasm/stack-pointer-abi.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --libcall-thread-context --no-gc-sections -o %t.libcall.wasm %t.o +# RUN: wasm-ld --cooperative-threading --no-gc-sections -o %t.libcall.wasm %t.o # RUN: obj2yaml %t.libcall.wasm | FileCheck %s --check-prefix=LIBCALL # RUN: wasm-ld --no-gc-sections -o %t.global.wasm %t.o # RUN: obj2yaml %t.global.wasm | FileCheck %s --check-prefix=GLOBAL diff --git a/lld/test/wasm/thread-context-abi-mismatch.s b/lld/test/wasm/thread-context-abi-mismatch.s index 069534cbe5762..3debc1de662a1 100644 --- a/lld/test/wasm/thread-context-abi-mismatch.s +++ b/lld/test/wasm/thread-context-abi-mismatch.s @@ -3,10 +3,9 @@ # as an indication that the global thread context ABI is being used. # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: not wasm-ld --libcall-thread-context %t.o -o %t.wasm 2>&1 | FileCheck %s - -# CHECK: object file uses globals for thread context, but --libcall-thread-context was specified +# RUN: not wasm-ld --cooperative-threading %t.o -o %t.wasm 2>&1 | FileCheck %s +# CHECK: object file uses globals for thread context, but --cooperative-threading was specified .globl _start _start: .functype _start () -> () diff --git a/lld/test/wasm/tls-libcall.s b/lld/test/wasm/tls-libcall.s index df8b8f8be0207..d8fb1c5e8a9ca 100644 --- a/lld/test/wasm/tls-libcall.s +++ b/lld/test/wasm/tls-libcall.s @@ -1,5 +1,5 @@ # RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown -o %t.o %s -# RUN: wasm-ld --libcall-thread-context --shared-memory -no-gc-sections -o %t.wasm %t.o +# RUN: wasm-ld --cooperative-threading -no-gc-sections -o %t.wasm %t.o # RUN: obj2yaml %t.wasm | FileCheck %s # RUN: llvm-objdump -d --no-print-imm-hex --no-show-raw-insn %t.wasm | FileCheck %s --check-prefix=DIS diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h index fb1c3f9f2e739..f2f1b895f5b69 100644 --- a/lld/wasm/Config.h +++ b/lld/wasm/Config.h @@ -65,6 +65,7 @@ struct Config { bool growableTable; bool gcSections; llvm::StringSet<> keepSections; + bool cooperativeThreading; bool libcallThreadContext; std::optional> memoryImport; std::optional memoryExport; @@ -134,6 +135,8 @@ struct Config { std::optional> features; std::optional> extraFeatures; llvm::SmallVector buildIdVector; + + bool isMultithreaded() const { return sharedMemory || cooperativeThreading; } }; // The Ctx object hold all other (non-configuration) global state. diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp index fe1e2eec95037..9a2e3a82a9279 100644 --- a/lld/wasm/Driver.cpp +++ b/lld/wasm/Driver.cpp @@ -561,7 +561,7 @@ static void readConfigs(opt::InputArgList &args) { ctx.arg.soName = args.getLastArgValue(OPT_soname); ctx.arg.importTable = args.hasArg(OPT_import_table); ctx.arg.importUndefined = args.hasArg(OPT_import_undefined); - ctx.arg.libcallThreadContext = args.hasArg(OPT_libcall_thread_context); + ctx.arg.cooperativeThreading = args.hasArg(OPT_cooperative_threading); ctx.arg.ltoo = args::getInteger(args, OPT_lto_O, 2); if (ctx.arg.ltoo > 3) error("invalid optimization level for LTO: " + Twine(ctx.arg.ltoo)); @@ -755,6 +755,11 @@ static void setConfigs() { if (!ctx.arg.memoryExport.has_value() && !ctx.arg.memoryImport.has_value()) { ctx.arg.memoryExport = memoryName; } + if (ctx.arg.cooperativeThreading) { + if (ctx.arg.sharedMemory) + error("--cooperative-threading is incompatible with --shared-memory"); + ctx.arg.libcallThreadContext = true; + } } // Some command line options or some combinations of them are not allowed. @@ -964,7 +969,7 @@ static void createSyntheticSymbols() { createGlobalVariable(stack_pointer_name, !ctx.arg.libcallThreadContext); } - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { // TLS symbols are all hidden/dso-local auto tls_base_name = ctx.arg.libcallThreadContext ? "__init_tls_base" : "__tls_base"; @@ -986,9 +991,11 @@ static void createSyntheticSymbols() { static WasmSignature setTLSBaseSignature{{}, {ValType::I32}}; ctx.sym.setTLSBase = createUndefinedFunction("__wasm_set_tls_base", &setTLSBaseSignature); + ctx.sym.setTLSBase->markLive(); static WasmSignature getTLSBaseSignature{{ValType::I32}, {}}; ctx.sym.getTLSBase = createUndefinedFunction("__wasm_get_tls_base", &getTLSBaseSignature); + ctx.sym.getTLSBase->markLive(); } } } @@ -1019,16 +1026,12 @@ static void createOptionalSymbols() { if (ctx.sym.firstPageEnd) ctx.sym.firstPageEnd->setVA(ctx.arg.pageSize); - // For non-shared memory programs we still need to define __tls_base since we - // allow object files built with TLS to be linked into single threaded - // programs, and such object files can contain references to this symbol. - // - // However, in this case __tls_base is immutable and points directly to the - // start of the `.tdata` static segment. - // - // __tls_size and __tls_align are not needed in this case since they are only - // needed for __wasm_init_tls (which we do not create in this case). - if (!ctx.arg.sharedMemory) + // TLS object files may be linked into single-threaded programs, so + // __tls_base must always be defined. In this case it is immutable and points + // directly to the start of the `.tdata` segment. __tls_size and __tls_align + // are omitted since they are only used by __wasm_init_tls, which is not + // created in this case. + if (!ctx.sym.tlsBase) ctx.sym.tlsBase = createOptionalGlobal("__tls_base", false); } diff --git a/lld/wasm/Options.td b/lld/wasm/Options.td index 144eee33061e1..bd46794e067b3 100644 --- a/lld/wasm/Options.td +++ b/lld/wasm/Options.td @@ -238,8 +238,8 @@ def page_size: JJ<"page-size=">, def initial_memory: JJ<"initial-memory=">, HelpText<"Initial size of the linear memory">; -def libcall_thread_context: FF<"libcall-thread-context">, - HelpText<"Use library calls for thread context access instead of globals.">; +def cooperative_threading: FF<"cooperative-threading">, + HelpText<"Enable cooperative multithreading.">; def max_memory: JJ<"max-memory=">, HelpText<"Maximum size of the linear memory">; diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp index a1840abe88b3a..cb597fdeffcf3 100644 --- a/lld/wasm/Relocations.cpp +++ b/lld/wasm/Relocations.cpp @@ -125,7 +125,7 @@ void scanRelocations(InputChunk *chunk) { // In single-threaded builds TLS is lowered away and TLS data can be // merged with normal data and allowing TLS relocation in non-TLS // segments. - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { if (!sym->isTLS()) { error(toString(file) + ": relocation " + relocTypeToString(reloc.Type) + diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp index d1a01c7ec3f9d..050f61c7f5c56 100644 --- a/lld/wasm/SyntheticSections.cpp +++ b/lld/wasm/SyntheticSections.cpp @@ -57,7 +57,7 @@ void writeGetTLSBase(const Ctx &ctx, raw_ostream &os) { writeU8(os, WASM_OPCODE_CALL, "call"); writeUleb128(os, ctx.sym.getTLSBase->getFunctionIndex(), "function index"); } else { - writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_SET"); + writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET"); writeUleb128(os, ctx.sym.tlsBase->getGlobalIndex(), "__tls_base"); } } @@ -532,7 +532,7 @@ void GlobalSection::writeBody() { mutable_ = true; // With multi-threading any TLS globals must be mutable since they get // set during `__wasm_apply_global_tls_relocs` - if (ctx.arg.sharedMemory && sym->isTLS()) + if (ctx.arg.isMultithreaded() && sym->isTLS()) mutable_ = true; } WasmGlobalType type{itype, mutable_}; @@ -569,10 +569,11 @@ void GlobalSection::writeBody() { } else { WasmInitExpr initExpr; if (auto *d = dyn_cast(sym)) - // In the sharedMemory case TLS globals are set during - // `__wasm_apply_global_tls_relocs`, but in the non-shared case + // In the multithreaded case, TLS globals are set during + // `__wasm_apply_global_tls_relocs`, but in the single-threaded case // we know the absolute value at link time. - initExpr = intConst(d->getVA(/*absolute=*/!ctx.arg.sharedMemory), is64); + initExpr = + intConst(d->getVA(/*absolute=*/!ctx.arg.isMultithreaded()), is64); else if (auto *f = dyn_cast(sym)) initExpr = intConst(f->isStub ? 0 : f->getTableIndex(), is64); else { @@ -680,7 +681,7 @@ bool DataCountSection::isNeeded() const { // instructions are not yet supported in input files. However, in the case // of shared memory, lld itself will generate these instructions as part of // `__wasm_init_memory`. See Writer::createInitMemoryFunction. - return numSegments && ctx.arg.sharedMemory; + return numSegments && ctx.arg.isMultithreaded(); } void LinkingSection::writeBody() { diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp index b617ea2912b91..6ff5fbe6d9c0d 100644 --- a/lld/wasm/Writer.cpp +++ b/lld/wasm/Writer.cpp @@ -423,7 +423,7 @@ void Writer::layoutMemory() { // Even in the absense of any actual TLS data, this symbol can still be // referenced (for example by __builtin_thread_pointer, which should not // return NULL). - if (!ctx.arg.sharedMemory && ctx.sym.tlsBase) { + if (!ctx.arg.isMultithreaded() && ctx.sym.tlsBase) { setGlobalPtr(ctx.sym.tlsBase, fixedTLSBase); } @@ -650,7 +650,7 @@ void Writer::populateTargetFeatures() { sym->importModule && sym->importModule == "env"; })) error(fileName + ": object file uses globals for thread context, " - "but --libcall-thread-context was specified"); + "but --cooperative-threading was specified"); } if (inferFeatures) @@ -673,10 +673,12 @@ void Writer::populateTargetFeatures() { } if (tlsUsed) { - for (auto feature : {"atomics", "bulk-memory"}) - if (!allowed.contains(feature)) - error(StringRef("'") + feature + - "' feature must be used in order to use thread-local storage"); + if (!allowed.contains("bulk-memory")) + error("'bulk-memory' feature must be used in order to use thread-local " + "storage"); + if (!allowed.contains("atomics") && !ctx.arg.cooperativeThreading) + error("'atomics' feature must be used in order to use thread-local " + "storage"); } // Validate that used features are allowed in output @@ -1054,7 +1056,17 @@ static StringRef getOutputDataSegmentName(const InputChunk &seg) { OutputSegment *Writer::createOutputSegment(StringRef name) { LLVM_DEBUG(dbgs() << "new segment: " << name << "\n"); OutputSegment *s = make(name); - if (ctx.arg.sharedMemory) + // In the shared memory case, all data segments must be passive since they + // will be initialized once by the main thread and then shared with other + // threads. In the cooperative threading case, TLS segments must be passive + // so they can be re-initialized per-thread via memory.init, and .bss + // segments are passive to avoid serializing their zero bytes into the binary; + // they are still present as passive segment entries and zero-filled via + // memory.fill in __wasm_init_memory. + bool needsPassiveInit = + ctx.arg.sharedMemory || (ctx.arg.cooperativeThreading && + (s->isTLS() || s->name.starts_with(".bss"))); + if (needsPassiveInit) s->initFlags = WASM_DATA_SEGMENT_IS_PASSIVE; if (!ctx.arg.relocatable && name.starts_with(".bss")) s->isBss = true; @@ -1113,7 +1125,7 @@ void Writer::combineOutputSegments() { // This restriction does not apply when the extended const extension is // available: https://github.com/WebAssembly/extended-const assert(!ctx.arg.extendedConst); - assert(ctx.isPic && !ctx.arg.sharedMemory); + assert(ctx.isPic && !ctx.arg.isMultithreaded()); if (segments.size() <= 1) return; OutputSegment *combined = make(".data"); @@ -1188,22 +1200,25 @@ void Writer::createSyntheticInitFunctions() { "__wasm_init_memory", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_init_memory")); ctx.sym.initMemory->markLive(); - if (ctx.arg.sharedMemory) { - // This global is assigned during __wasm_init_memory in the shared memory - // case. + // __wasm_init_memory uses __tls_base/__wasm_set_tls_base + if (ctx.sym.setTLSBase) + ctx.sym.setTLSBase->markLive(); + else if (ctx.arg.sharedMemory) ctx.sym.tlsBase->markLive(); - } } - if (ctx.arg.sharedMemory) { + if (ctx.arg.isMultithreaded()) { if (out.globalSec->needsTLSRelocations()) { ctx.sym.applyGlobalTLSRelocs = symtab->addSyntheticFunction( "__wasm_apply_global_tls_relocs", WASM_SYMBOL_VISIBILITY_HIDDEN, make(nullSignature, "__wasm_apply_global_tls_relocs")); ctx.sym.applyGlobalTLSRelocs->markLive(); - // TLS relocations depend on the __tls_base symbols - ctx.sym.tlsBase->markLive(); + // TLS relocations depend on the __tls_base/__wasm_get_tls_base symbols + if (ctx.sym.getTLSBase) + ctx.sym.getTLSBase->markLive(); + else if (ctx.arg.sharedMemory) + ctx.sym.tlsBase->markLive(); } auto hasTLSRelocs = [](const OutputSegment *segment) { @@ -1375,7 +1390,7 @@ void Writer::createInitMemoryFunction() { // When we initialize the TLS segment we also set the TLS base. // This allows the runtime to use this static copy of the TLS data // for the first/main thread. - if (ctx.arg.sharedMemory && s->isTLS()) { + if (ctx.arg.isMultithreaded() && s->isTLS()) { if (ctx.isPic) { // Cache the result of the addionion in local 0 writeU8(os, WASM_OPCODE_LOCAL_TEE, "local.tee"); @@ -1446,7 +1461,7 @@ void Writer::createInitMemoryFunction() { if (needsPassiveInitialization(s) && !s->isBss) { // The TLS region should not be dropped since its is needed // during the initialization of each thread (__wasm_init_tls). - if (ctx.arg.sharedMemory && s->isTLS()) + if (ctx.arg.isMultithreaded() && s->isTLS()) continue; // data.drop instruction writeU8(os, WASM_OPCODE_MISC_PREFIX, "bulk-memory prefix"); @@ -1499,7 +1514,7 @@ void Writer::createApplyDataRelocationsFunction() { writeUleb128(os, 0, "num locals"); bool generated = false; for (const OutputSegment *seg : segments) - if (!ctx.arg.sharedMemory || !seg->isTLS()) + if (!ctx.arg.isMultithreaded() || !seg->isTLS()) for (const InputChunk *inSeg : seg->inputSegments) generated |= inSeg->generateRelocationCode(os); @@ -1655,7 +1670,6 @@ void Writer::createInitTLSFunction() { if (tlsSeg) { writeU8(os, WASM_OPCODE_LOCAL_GET, "local.get"); writeUleb128(os, 0, "local index"); - writeSetTLSBase(ctx, os); // FIXME(wvo): this local needs to be I64 in wasm64, or we need an extend @@ -1788,9 +1802,9 @@ void Writer::run() { // `__memory_base` import. Unless we support the extended const expression we // can't do addition inside the constant expression, so we much combine the // segments into a single one that can live at `__memory_base`. - if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.sharedMemory) { - // In shared memory mode all data segments are passive and initialized - // via __wasm_init_memory. + if (ctx.isPic && !ctx.arg.extendedConst && !ctx.arg.isMultithreaded()) { + // In multithreaded modes (shared or cooperative), data segments may be + // passive and must not be combined into a single active segment. log("-- combineOutputSegments"); combineOutputSegments(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 6326b7d76db82..9dea29fb0205d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -40,9 +40,12 @@ WebAssemblySubtarget::initializeSubtargetDependencies(StringRef CPU, ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS); - // WASIP3 implies using the libcall thread context. - if (TargetTriple.getOS() == Triple::WASIp3) + // WASIP3 uses cooperative multithreading, which implies using libcall + // thread context. + if (TargetTriple.getOS() == Triple::WASIp3) { + HasCooperativeMultithreading = true; HasLibcallThreadContext = true; + } FeatureBitset Bits = getFeatureBits(); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h index 5c6f4cb5b36ff..f637ce59ebfce 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -52,6 +52,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool HasExtendedConst = false; bool HasFP16 = false; bool HasGC = false; + bool HasCooperativeMultithreading = false; bool HasLibcallThreadContext = false; bool HasMultiMemory = false; bool HasMultivalue = false; @@ -117,6 +118,9 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { bool hasExtendedConst() const { return HasExtendedConst; } bool hasFP16() const { return HasFP16; } bool hasGC() const { return HasGC; } + bool hasCooperativeMultithreading() const { + return HasCooperativeMultithreading; + } bool hasLibcallThreadContext() const { return HasLibcallThreadContext; } bool hasMultiMemory() const { return HasMultiMemory; } bool hasMultivalue() const { return HasMultivalue; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 886ea0a8ab574..110d6820bb76e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -278,14 +278,21 @@ class CoalesceFeaturesAndStripAtomics final : public ModulePass { bool StrippedAtomics = false; bool StrippedTLS = false; + // In cooperative threading mode, thread locals are meaningful even without + // atomics. + bool CooperativeThreading = + WasmTM->getSubtargetImpl()->hasCooperativeMultithreading(); + if (!Features[WebAssembly::FeatureAtomics]) { StrippedAtomics = stripAtomics(M); + if (!CooperativeThreading) + StrippedTLS = stripThreadLocals(M); + } + if (!Features[WebAssembly::FeatureBulkMemory] && !StrippedTLS) { StrippedTLS = stripThreadLocals(M); - } else if (!Features[WebAssembly::FeatureBulkMemory]) { - StrippedTLS |= stripThreadLocals(M); } - if (StrippedAtomics && !StrippedTLS) + if (StrippedAtomics && !StrippedTLS && !CooperativeThreading) stripThreadLocals(M); else if (StrippedTLS && !StrippedAtomics) stripAtomics(M); diff --git a/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll new file mode 100644 index 0000000000000..0cefa1b6b1f21 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/cooperative-strip-tls.ll @@ -0,0 +1,25 @@ +; Test that in cooperative threading mode (wasm32-wasip3), thread-local variables +; are NOT stripped even when atomics are absent. In non-cooperative mode +; (wasm32-unknown-unknown) TLS is treated as normal data when atomics are absent. + +; RUN: llc < %s -mtriple=wasm32-wasip3 -mcpu=mvp -mattr=-atomics,+bulk-memory \ +; RUN: | FileCheck %s --check-prefixes=COOP +; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=-atomics,+bulk-memory \ +; RUN: | FileCheck %s --check-prefixes=PLAIN + +target triple = "wasm32-unknown-unknown" + +@foo = internal thread_local global i32 0 +@bar = internal thread_local global i32 1 + +; Cooperative threading: TLS is preserved — the section stays .tbss. +; COOP: .tbss.foo +; COOP: .tdata.bar +; COOP-NOT: .bss.foo +; COOP-NOT: .data.bar + +; Non-cooperative: TLS stripped +; PLAIN: .bss.foo +; PLAIN: .data.bar +; PLAIN-NOT: .tbss.foo +; PLAIN-NOT: .tdata.bar From 11f1d0cca76604bbc38a5f798cbccde42fd18fe2 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 20:23:40 +0000 Subject: [PATCH 257/511] [PSI] Return raw entry count values Now that synthetic entry counts are being removed, stop using the ProfileCount wrapper around entrycounts given it only exists to distinguish between synthetic and real profile counts. Reviewers: teresajohnson, david-xl, mtrofin Pull Request: https://github.com/llvm/llvm-project/pull/204769 --- .../llvm/Analysis/ProfileSummaryInfo.h | 23 +++++++++---------- llvm/lib/CodeGen/MachineFunction.cpp | 6 +++-- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 7038b4efebf2e..547bc344d314b 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -115,11 +115,11 @@ class ProfileSummaryInfo { template bool isFunctionEntryHot(const FuncT *F) const { if (!F || !hasProfileSummary()) return false; - std::optional FunctionCount = getEntryCount(F); + std::optional FunctionCount = getEntryCount(F); // FIXME: The heuristic used below for determining hotness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isHotCount(FunctionCount->getCount()); + return FunctionCount && isHotCount(*FunctionCount); } /// Returns true if \p F contains hot code. @@ -128,7 +128,7 @@ class ProfileSummaryInfo { if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = getEntryCount(F)) - if (isHotCount(FunctionCount->getCount())) + if (isHotCount(*FunctionCount)) return true; if (auto TotalCallCount = getTotalCallCount(F)) @@ -148,7 +148,7 @@ class ProfileSummaryInfo { if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = getEntryCount(F)) - if (!isColdCount(FunctionCount->getCount())) + if (!isColdCount(*FunctionCount)) return false; if (auto TotalCallCount = getTotalCallCount(F)) @@ -278,11 +278,9 @@ class ProfileSummaryInfo { if (!F || !hasProfileSummary()) return false; if (auto FunctionCount = getEntryCount(F)) { - if (isHot && - isHotCountNthPercentile(PercentileCutoff, FunctionCount->getCount())) + if (isHot && isHotCountNthPercentile(PercentileCutoff, *FunctionCount)) return true; - if (!isHot && !isColdCountNthPercentile(PercentileCutoff, - FunctionCount->getCount())) + if (!isHot && !isColdCountNthPercentile(PercentileCutoff, *FunctionCount)) return false; } if (auto TotalCallCount = getTotalCallCount(F)) { @@ -326,8 +324,10 @@ class ProfileSummaryInfo { } template - std::optional getEntryCount(const FuncT *F) const { - return F->getEntryCount(); + std::optional getEntryCount(const FuncT *F) const { + if (!F->getEntryCount().has_value()) + return std::nullopt; + return F->getEntryCount()->getCount(); } }; @@ -349,8 +349,7 @@ ProfileSummaryInfo::getTotalCallCount(const Function *F) const { // here, because we cannot include MachineFunction header here, that would break // dependency rules. template <> -std::optional -ProfileSummaryInfo::getEntryCount( +std::optional ProfileSummaryInfo::getEntryCount( const MachineFunction *F) const; /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo. diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 2922922535333..8246b77ac9dce 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1666,10 +1666,12 @@ void MachineConstantPool::print(raw_ostream &OS) const { // ProfileSummaryInfo::getEntryCount(). //===----------------------------------------------------------------------===// template <> -std::optional +std::optional ProfileSummaryInfo::getEntryCount( const llvm::MachineFunction *F) const { - return F->getFunction().getEntryCount(); + if (!F->getFunction().getEntryCount().has_value()) + return std::nullopt; + return F->getFunction().getEntryCount()->getCount(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) From a69f9a95f9c339bb1555f155b150b3dda658795a Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 23 Jun 2026 13:24:47 -0700 Subject: [PATCH 258/511] [lldb] Disable dynamic script interpreters by default under Xcode (#205423) When LLDB_ENABLE_DYNAMIC_SCRIPTINTERPRETERS is set, liblldb's export list is built by merging the undefined LLDB symbols extracted from each script interpreter plugin's objects (119e57630281). Because the plugins link liblldb, the generated file is wired into liblldb's link via LINK_DEPENDS, a file-level dependency with no target-level edge. The Xcode generator only has coarse target-level dependencies, so that generated liblldb-script-interpreter.exports ends up attached to two targets with no common dependency, which its "new build system" rejects at generation time: ``` CMake Error in source/API/CMakeLists.txt: .../source/API/liblldb-script-interpreter.exports is attached to multiple targets ... but none of these is a common dependency of the other(s). This is not allowed by the Xcode "new build system". ``` Default the option to OFF under the Xcode generator. The script interpreter plugins are then linked statically into liblldb and the per-plugin export path is never taken. rdar://180422686 --- lldb/cmake/modules/LLDBConfig.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/cmake/modules/LLDBConfig.cmake b/lldb/cmake/modules/LLDBConfig.cmake index 660cd00a64414..e086aaf5d3632 100644 --- a/lldb/cmake/modules/LLDBConfig.cmake +++ b/lldb/cmake/modules/LLDBConfig.cmake @@ -190,7 +190,7 @@ else() set(LLDB_ENABLE_MTE OFF) endif() -if (CMAKE_SYSTEM_NAME MATCHES "Darwin|FreeBSD") +if (CMAKE_SYSTEM_NAME MATCHES "Darwin|FreeBSD" AND NOT CMAKE_GENERATOR MATCHES "Xcode") set(default_enable_dynamic_scriptinterpreters ON) else() set(default_enable_dynamic_scriptinterpreters OFF) From f671d581e6409f3726ab652e5b862edd8f83e005 Mon Sep 17 00:00:00 2001 From: Delaram Talaashrafi Date: Tue, 23 Jun 2026 16:26:12 -0400 Subject: [PATCH 259/511] [MLIR][OpenACC] Add acc-emit-remarks-loop pass (#205203) Add a function-level pass that emits optimization remarks for loops in `acc.compute_region`, describing their mapping to OpenACC parallel levels (gang, worker, vector, sequential) and GPU dimensions (blockIdx, threadIdx). --- .../mlir/Dialect/OpenACC/Transforms/Passes.td | 17 ++ .../OpenACC/Transforms/ACCEmitRemarksLoop.cpp | 163 ++++++++++++++++++ .../Dialect/OpenACC/Transforms/CMakeLists.txt | 1 + .../acc-emit-remarks-loop-pipeline.mlir | 40 +++++ .../OpenACC/acc-emit-remarks-loop.mlir | 155 +++++++++++++++++ 5 files changed, 376 insertions(+) create mode 100644 mlir/lib/Dialect/OpenACC/Transforms/ACCEmitRemarksLoop.cpp create mode 100644 mlir/test/Dialect/OpenACC/acc-emit-remarks-loop-pipeline.mlir create mode 100644 mlir/test/Dialect/OpenACC/acc-emit-remarks-loop.mlir diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td index 485f68dfc9338..e35e38adceb02 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td @@ -176,6 +176,23 @@ def ACCLegalizeSerial : Pass<"acc-legalize-serial", "mlir::func::FuncOp"> { } +def ACCEmitRemarksLoop : Pass<"acc-emit-remarks-loop", "mlir::func::FuncOp"> { + let summary = "Emit OpenACC loop parallelism mapping remarks"; + let description = [{ + This pass emits optimization remarks describing how loops inside OpenACC + compute regions are mapped to parallelism levels (gang, worker, vector, + sequential) and the corresponding GPU parallel dimensions. + + The pass walks `acc.compute_region` operations that originated from OpenACC + compute constructs and reports remarks for each loop carrying an + `acc.par_dims` attribute. + }]; + let options = [ + Option<"gpuDimSeparator", "gpu-dim-separator", "std::string", "\".\"", + "Separator between GPU index prefix and axis name in remarks "> + ]; +} + def ACCLoopTiling : Pass<"acc-loop-tiling", "mlir::func::FuncOp"> { let summary = "Tile OpenACC loops with tile clauses"; let description = [{ diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCEmitRemarksLoop.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCEmitRemarksLoop.cpp new file mode 100644 index 0000000000000..29e71fe1e9097 --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCEmitRemarksLoop.cpp @@ -0,0 +1,163 @@ +//===- ACCEmitRemarksLoop.cpp - Emit OpenACC loop mapping remarks --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass emits optimization remarks describing how loops inside OpenACC +// compute regions are mapped to parallelism levels and GPU dimensions. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/Dialect/OpenACC/OpenACCParMapping.h" +#include "mlir/Dialect/OpenACC/OpenACCUtilsLoop.h" +#include "mlir/Dialect/OpenACC/Transforms/Passes.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/Interfaces/LoopLikeInterface.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Debug.h" + +namespace mlir { +namespace acc { +#define GEN_PASS_DEF_ACCEMITREMARKSLOOP +#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc" +} // namespace acc +} // namespace mlir + +#define DEBUG_TYPE "acc-emit-remarks-loop" + +using namespace mlir; + +namespace { + +static bool shouldEmitLoopRemarks(acc::ComputeRegionOp computeRegion) { + StringRef origin = computeRegion.getOrigin(); + if (origin == acc::KernelsOp::getOperationName() || + origin == acc::ParallelOp::getOperationName() || + origin == acc::SerialOp::getOperationName()) + return true; + + if (auto func = computeRegion->getParentOfType()) + return acc::isSpecializedAccRoutine(func); + return false; +} + +static std::string getACCParLevelName(acc::GPUParallelDimAttr parDim, + const acc::ACCToGPUMappingPolicy &policy, + acc::ComputeRegionOp computeRegion) { + std::string accName; + if (policy.isSeq(parDim)) + accName = "sequential"; + else if (policy.isVector(parDim)) + accName = "vector"; + else if (policy.isWorker(parDim)) + accName = "worker"; + else if (policy.isGang(parDim)) + accName = "gang"; + + if (!policy.isSeq(parDim)) { + if (std::optional constant = + computeRegion.getKnownConstantLaunchArg(parDim)) + accName += "(" + std::to_string(*constant) + ")"; + } + return accName; +} + +static std::string getGPUParDimName(acc::GPUParallelDimAttr parDim, + llvm::StringRef separator) { + auto formatDim = [&](llvm::StringRef prefix, char axis) { + return (prefix + separator).str() + axis; + }; + + if (parDim.isThreadX()) + return formatDim("threadidx", 'x'); + if (parDim.isThreadY()) + return formatDim("threadidx", 'y'); + if (parDim.isThreadZ()) + return formatDim("threadidx", 'z'); + if (parDim.isBlockX()) + return formatDim("blockidx", 'x'); + if (parDim.isBlockY()) + return formatDim("blockidx", 'y'); + if (parDim.isBlockZ()) + return formatDim("blockidx", 'z'); + return {}; +} + +static void emitLoopMappingRemark(acc::ComputeRegionOp computeRegion, + LoopLikeOpInterface loopOp, + acc::OpenACCSupport &accSupport, + const acc::ACCToGPUMappingPolicy &policy, + llvm::StringRef gpuDimSeparator) { + acc::GPUParallelDimsAttr parDimsAttr = + loopOp->getAttrOfType( + acc::GPUParallelDimsAttr::name); + + SmallVector seqParDims; + ArrayRef parDims; + if (parDimsAttr) { + parDims = parDimsAttr.getArray(); + } else if (isa(loopOp.getOperation())) { + seqParDims.push_back(acc::GPUParallelDimAttr::seqDim(loopOp->getContext())); + parDims = seqParDims; + } else { + return; + } + + accSupport.emitRemark( + loopOp, + [&]() { + SmallVector accMsgs; + SmallVector gpuMsgs; + + for (acc::GPUParallelDimAttr parDim : parDims) { + accMsgs.push_back(getACCParLevelName(parDim, policy, computeRegion)); + if (std::string gpuName = getGPUParDimName(parDim, gpuDimSeparator); + !gpuName.empty()) + gpuMsgs.push_back(std::move(gpuName)); + } + + std::string msg = "!$acc loop " + llvm::join(accMsgs, ", "); + + if (uint64_t collapseCount = acc::getCollapseCount(loopOp); + collapseCount > 1) + msg += " collapse(" + std::to_string(collapseCount) + ")"; + + if (!gpuMsgs.empty()) + msg += " ! " + llvm::join(gpuMsgs, " "); + return msg; + }, + DEBUG_TYPE); +} + +class ACCEmitRemarksLoop + : public acc::impl::ACCEmitRemarksLoopBase { +public: + using ACCEmitRemarksLoopBase::ACCEmitRemarksLoopBase; + + void runOnOperation() override { + func::FuncOp func = getOperation(); + acc::OpenACCSupport &accSupport = getAnalysis(); + acc::DefaultACCToGPUMappingPolicy policy; + if (gpuDimSeparator.empty()) + gpuDimSeparator = "."; + + func.walk([&](acc::ComputeRegionOp computeRegion) { + if (!shouldEmitLoopRemarks(computeRegion)) + return; + + computeRegion.getRegion().walk([&](LoopLikeOpInterface loopOp) { + emitLoopMappingRemark(computeRegion, loopOp, accSupport, policy, + gpuDimSeparator); + }); + }); + } +}; + +} // namespace diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt index 5bb92592a6512..2ae3571673469 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ add_mlir_dialect_library(MLIROpenACCTransforms ACCRoutineLowering.cpp ACCRoutineToGPUFunc.cpp ACCDeclareGPUModuleInsertion.cpp + ACCEmitRemarksLoop.cpp ACCIfClauseLowering.cpp ACCImplicitData.cpp ACCRecipeMaterialization.cpp diff --git a/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop-pipeline.mlir b/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop-pipeline.mlir new file mode 100644 index 0000000000000..c559ef68090e2 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop-pipeline.mlir @@ -0,0 +1,40 @@ +// RUN: mlir-opt %s -split-input-file -acc-compute-lowering -acc-emit-remarks-loop --remarks-filter="(open)?acc.*" 2>&1 | FileCheck %s + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=parallel_gang_loop | Remark="!$acc loop gang(10) ! blockidx.x" +func.func @parallel_gang_loop(%buf: memref<1xi32>) { + %c0 = arith.constant 0 : index + %c1_i32 = arith.constant 1 : i32 + %c10_i32 = arith.constant 10 : i32 + %c100_i32 = arith.constant 100 : i32 + + %dev = acc.copyin varPtr(%buf : memref<1xi32>) -> memref<1xi32> + acc.parallel num_gangs({%c10_i32 : i32}) dataOperands(%dev : memref<1xi32>) { + acc.loop gang control(%arg0 : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) { + memref.store %arg0, %dev[%c0] : memref<1xi32> + acc.yield + } attributes {independent = [#acc.device_type]} + acc.yield + } + acc.copyout accPtr(%dev : memref<1xi32>) to varPtr(%buf : memref<1xi32>) + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=parallel_loop_auto_collapse | Remark="!$acc loop sequential collapse(2)" +func.func @parallel_loop_auto_collapse(%buf: memref<1xi32>, %lb0 : index, %ub0 : index, %lb1 : index, %ub1 : index) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + + %dev = acc.copyin varPtr(%buf : memref<1xi32>) -> memref<1xi32> + acc.parallel dataOperands(%dev : memref<1xi32>) { + acc.loop control(%i : index, %j : index) = (%lb0, %lb1 : index, index) to (%ub0, %ub1 : index, index) step (%c1, %c1 : index, index) { + %vi = arith.index_cast %i : index to i32 + memref.store %vi, %dev[%c0] : memref<1xi32> + acc.yield + } attributes {auto_ = [#acc.device_type]} + acc.yield + } + acc.copyout accPtr(%dev : memref<1xi32>) to varPtr(%buf : memref<1xi32>) + return +} diff --git a/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop.mlir b/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop.mlir new file mode 100644 index 0000000000000..0c310510652e7 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-emit-remarks-loop.mlir @@ -0,0 +1,155 @@ +// RUN: mlir-opt %s -split-input-file -acc-emit-remarks-loop --remarks-filter="(open)?acc.*" 2>&1 | FileCheck %s +// RUN: mlir-opt %s -split-input-file -acc-emit-remarks-loop='gpu-dim-separator=%' --remarks-filter="(open)?acc.*" 2>&1 | FileCheck %s --check-prefix=PERCENT + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=vector_loop | Remark="!$acc loop vector(128) ! threadidx.x" +func.func @vector_loop() { + %c128 = arith.constant 128 : index + acc.kernel_environment { + %w0 = acc.par_width %c128 {par_dim = #acc.par_dim} + acc.compute_region launch(%arg0 = %w0) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c128_inner = arith.constant 128 : index + scf.parallel (%iv) = (%c0) to (%c128_inner) step (%c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=gang_loop | Remark="!$acc loop gang(8) ! blockidx.x" +func.func @gang_loop() { + %c8 = arith.constant 8 : index + acc.kernel_environment { + %w0 = acc.par_width %c8 {par_dim = #acc.par_dim} + acc.compute_region launch(%arg0 = %w0) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8_inner = arith.constant 8 : index + scf.parallel (%iv) = (%c0) to (%c8_inner) step (%c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=worker_loop | Remark="!$acc loop worker(4) ! threadidx.y" +func.func @worker_loop() { + %c4 = arith.constant 4 : index + acc.kernel_environment { + %w0 = acc.par_width %c4 {par_dim = #acc.par_dim} + acc.compute_region launch(%arg0 = %w0) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8 = arith.constant 8 : index + scf.parallel (%iv) = (%c0) to (%c8) step (%c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=sequential_loop | Remark="!$acc loop sequential" +func.func @sequential_loop() { + acc.kernel_environment { + acc.compute_region { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + scf.parallel (%iv) = (%c0) to (%c4) step (%c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.kernels"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=block_and_vector | Remark="!$acc loop gang(8), vector(128) ! blockidx.x threadidx.x" +func.func @block_and_vector() { + %c8 = arith.constant 8 : index + %c128 = arith.constant 128 : index + acc.kernel_environment { + %w0 = acc.par_width %c8 {par_dim = #acc.par_dim} + %w1 = acc.par_width %c128 {par_dim = #acc.par_dim} + acc.compute_region launch(%arg0 = %w0, %arg1 = %w1) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c8_inner = arith.constant 8 : index + %c128_inner = arith.constant 128 : index + scf.parallel (%i, %j) = (%c0, %c0) to (%c8_inner, %c128_inner) step (%c1, %c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=scf_for_sequential | Remark="!$acc loop sequential" +func.func @scf_for_sequential() { + acc.kernel_environment { + acc.compute_region { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + scf.for %iv = %c0 to %c4 step %c1 { + } + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// CHECK: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=collapse_loop | Remark="!$acc loop sequential collapse(2)" +func.func @collapse_loop() { + acc.kernel_environment { + acc.compute_region { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + scf.for %iv = %c0 to %c4 step %c1 { + } {acc.par_dims = #acc, acc.collapse_count = 2 : i64} + acc.yield + } {origin = "acc.parallel"} + } + return +} + +// ----- + +// PERCENT: remark: [Passed] openacc | Category:acc-emit-remarks-loop | Function=percent_separator | Remark="!$acc loop vector(128) ! threadidx%x" +func.func @percent_separator() { + %c128 = arith.constant 128 : index + acc.kernel_environment { + %w0 = acc.par_width %c128 {par_dim = #acc.par_dim} + acc.compute_region launch(%arg0 = %w0) { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c128_inner = arith.constant 128 : index + scf.parallel (%iv) = (%c0) to (%c128_inner) step (%c1) { + scf.reduce + } {acc.par_dims = #acc} + acc.yield + } {origin = "acc.parallel"} + } + return +} From b7234fca115d79a079f6241c9768287bc78e33a7 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 20:26:14 +0000 Subject: [PATCH 260/511] [IR] Remove ProfileCount Abstraction This only exists to differentiate between real and synthetic profiles. Remove the abstraction now that we plan to fully remove synthetic profiles. Reviewers: mtrofin, david-xl Reviewed By: mtrofin Pull Request: https://github.com/llvm/llvm-project/pull/204770 --- .../llvm/Analysis/ProfileSummaryInfo.h | 4 +-- llvm/include/llvm/IR/Function.h | 26 ++------------- .../Utils/SampleProfileLoaderBaseImpl.h | 9 ++---- llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp | 2 +- llvm/lib/Analysis/InlineCost.cpp | 9 +++--- llvm/lib/Analysis/ProfileSummaryInfo.cpp | 2 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 +- llvm/lib/CodeGen/MIRSampleProfile.cpp | 1 - llvm/lib/CodeGen/MachineFunction.cpp | 4 +-- llvm/lib/IR/Function.cpp | 22 +++---------- llvm/lib/IR/ProfDataUtils.cpp | 6 ++-- .../AggressiveInstCombine.cpp | 4 +-- .../Transforms/IPO/FunctionSpecialization.cpp | 9 +++--- llvm/lib/Transforms/IPO/PartialInlining.cpp | 6 ++-- llvm/lib/Transforms/IPO/SampleProfile.cpp | 3 +- .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 4 +-- .../Instrumentation/PGOInstrumentation.cpp | 12 +++---- .../Scalar/TailRecursionElimination.cpp | 8 ++--- llvm/lib/Transforms/Utils/CodeExtractor.cpp | 4 +-- llvm/lib/Transforms/Utils/InlineFunction.cpp | 32 ++++++++----------- .../Transforms/Utils/LowerMemIntrinsics.cpp | 5 ++- llvm/lib/Transforms/Utils/ProfileVerify.cpp | 4 +-- llvm/lib/Transforms/Vectorize/VPlan.cpp | 2 +- llvm/unittests/IR/MetadataTest.cpp | 7 ++-- 24 files changed, 63 insertions(+), 125 deletions(-) diff --git a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h index 547bc344d314b..55b195b93f6da 100644 --- a/llvm/include/llvm/Analysis/ProfileSummaryInfo.h +++ b/llvm/include/llvm/Analysis/ProfileSummaryInfo.h @@ -325,9 +325,7 @@ class ProfileSummaryInfo { template std::optional getEntryCount(const FuncT *F) const { - if (!F->getEntryCount().has_value()) - return std::nullopt; - return F->getEntryCount()->getCount(); + return F->getEntryCount(); } }; diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h index 0b48e088c3db2..bd28f0d9902da 100644 --- a/llvm/include/llvm/IR/Function.h +++ b/llvm/include/llvm/IR/Function.h @@ -291,41 +291,19 @@ class LLVM_ABI Function : public GlobalObject, public ilist_node { } } - enum ProfileCountType { PCT_Real, PCT_Synthetic }; - - /// Class to represent profile counts. - /// - /// This class represents both real and synthetic profile counts. - class ProfileCount { - private: - uint64_t Count = 0; - ProfileCountType PCT = PCT_Real; - - public: - ProfileCount(uint64_t Count, ProfileCountType PCT) - : Count(Count), PCT(PCT) {} - uint64_t getCount() const { return Count; } - ProfileCountType getType() const { return PCT; } - bool isSynthetic() const { return PCT == PCT_Synthetic; } - }; - /// Set the entry count for this function. /// /// Entry count is the number of times this function was executed based on /// pgo data. \p Imports points to a set of GUIDs that needs to /// be imported by the function for sample PGO, to enable the same inlines as /// the profiled optimized binary. - void setEntryCount(ProfileCount Count, - const DenseSet *Imports = nullptr); - - /// A convenience wrapper for setting entry count - void setEntryCount(uint64_t Count, ProfileCountType Type = PCT_Real, + void setEntryCount(uint64_t Count, const DenseSet *Imports = nullptr); /// Get the entry count for this function. /// /// Entry count is the number of times the function was executed. - std::optional getEntryCount() const; + std::optional getEntryCount() const; /// Return true if the function is annotated with profile data. /// diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index 2e47ba63abcf0..5346871249cf5 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -47,7 +47,6 @@ namespace llvm { using namespace sampleprof; using namespace sampleprofutil; -using ProfileCount = Function::ProfileCount; namespace vfs { class FileSystem; @@ -1069,9 +1068,7 @@ void SampleProfileLoaderBaseImpl::initWeightPropagation( // Sets the GUIDs that are inlined in the profiled binary. This is used // for ThinLink to make correct liveness analysis, and also make the IR // match the profiled binary before annotation. - getFunction(F).setEntryCount( - ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), - &InlinedGUIDs); + getFunction(F).setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs); if (!SampleProfileUseProfi) { // Compute dominance and loop info needed for propagation. @@ -1101,9 +1098,7 @@ void SampleProfileLoaderBaseImpl::finalizeWeightPropagation( if (SampleProfileUseProfi) { const BasicBlockT *EntryBB = getEntryBB(&F); if (BlockWeights[EntryBB] > 0) { - getFunction(F).setEntryCount( - ProfileCount(BlockWeights[EntryBB], Function::PCT_Real), - &InlinedGUIDs); + getFunction(F).setEntryCount(BlockWeights[EntryBB], &InlinedGUIDs); } } } diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 36af4f146ca42..b1e7b09306ac5 100644 --- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -593,7 +593,7 @@ BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, if (!EntryCount) return std::nullopt; // Use 128 bit APInt to do the arithmetic to avoid overflow. - APInt BlockCount(128, EntryCount->getCount()); + APInt BlockCount(128, *EntryCount); APInt BlockFreq(128, Freq.getFrequency()); APInt EntryFreq(128, getEntryFreq().getFrequency()); BlockCount *= BlockFreq; diff --git a/llvm/lib/Analysis/InlineCost.cpp b/llvm/lib/Analysis/InlineCost.cpp index d975a93e9b1fd..55d5569dc6ba0 100644 --- a/llvm/lib/Analysis/InlineCost.cpp +++ b/llvm/lib/Analysis/InlineCost.cpp @@ -928,7 +928,7 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // Make sure we have a nonzero entry count. auto EntryCount = F.getEntryCount(); - if (!EntryCount || !EntryCount->getCount()) + if (!EntryCount || *EntryCount == 0) return false; BlockFrequencyInfo *CalleeBFI = &(GetBFI(F)); @@ -1017,10 +1017,9 @@ class InlineCostCallAnalyzer final : public CallAnalyzer { // Compute the cycle savings per call. auto EntryProfileCount = F.getEntryCount(); - assert(EntryProfileCount && EntryProfileCount->getCount()); - auto EntryCount = EntryProfileCount->getCount(); - CycleSavings += EntryCount / 2; - CycleSavings = CycleSavings.udiv(EntryCount); + assert(EntryProfileCount && *EntryProfileCount); + CycleSavings += *EntryProfileCount / 2; + CycleSavings = CycleSavings.udiv(*EntryProfileCount); // Compute the total savings for the call site. auto *CallerBB = CandidateCall.getParent(); diff --git a/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/llvm/lib/Analysis/ProfileSummaryInfo.cpp index 59726e8ecdab1..a50c34d1b1007 100644 --- a/llvm/lib/Analysis/ProfileSummaryInfo.cpp +++ b/llvm/lib/Analysis/ProfileSummaryInfo.cpp @@ -114,7 +114,7 @@ bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) const { // FIXME: The heuristic used below for determining coldness is based on // preliminary SPEC tuning for inliner. This will eventually be a // convenience method that calls isHotCount. - return FunctionCount && isColdCount(FunctionCount->getCount()); + return FunctionCount && isColdCount(*FunctionCount); } /// Compute the hot and cold thresholds. diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d93e06f0300ed..aedc4956f1bd8 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1608,8 +1608,7 @@ void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { if (Features.FuncEntryCount) { OutStreamer->AddComment("function entry count"); auto MaybeEntryCount = MF.getFunction().getEntryCount(); - OutStreamer->emitULEB128IntValue( - MaybeEntryCount ? MaybeEntryCount->getCount() : 0); + OutStreamer->emitULEB128IntValue(MaybeEntryCount ? *MaybeEntryCount : 0); } const MachineBlockFrequencyInfo *MBFI = Features.BBFreq diff --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp index 913ad7a65d323..56ef30899879f 100644 --- a/llvm/lib/CodeGen/MIRSampleProfile.cpp +++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp @@ -38,7 +38,6 @@ using namespace llvm; using namespace sampleprof; using namespace llvm::sampleprofutil; -using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "fs-profile-loader" diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index 8246b77ac9dce..b0ffdb36f21be 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1669,9 +1669,7 @@ template <> std::optional ProfileSummaryInfo::getEntryCount( const llvm::MachineFunction *F) const { - if (!F->getFunction().getEntryCount().has_value()) - return std::nullopt; - return F->getFunction().getEntryCount()->getCount(); + return F->getFunction().getEntryCount(); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index ff5a11fbb08cd..03e91bc8e2aa4 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -56,7 +56,6 @@ #include using namespace llvm; -using ProfileCount = Function::ProfileCount; // Explicit instantiations of SymbolTableListTraits since some of the methods // are not in the public header file... @@ -1079,29 +1078,18 @@ void Function::setValueSubclassDataBit(unsigned Bit, bool On) { setValueSubclassData(getSubclassDataFromValue() & ~(1 << Bit)); } -void Function::setEntryCount(ProfileCount Count, +void Function::setEntryCount(uint64_t Count, const DenseSet *S) { -#if !defined(NDEBUG) - auto PrevCount = getEntryCount(); - assert(!PrevCount || PrevCount->getType() == Count.getType()); -#endif - auto ImportGUIDs = getImportGUIDs(); if (S == nullptr && ImportGUIDs.size()) S = &ImportGUIDs; MDBuilder MDB(getContext()); - setMetadata( - LLVMContext::MD_prof, - MDB.createFunctionEntryCount(Count.getCount(), Count.isSynthetic(), S)); -} - -void Function::setEntryCount(uint64_t Count, Function::ProfileCountType Type, - const DenseSet *Imports) { - setEntryCount(ProfileCount(Count, Type), Imports); + setMetadata(LLVMContext::MD_prof, + MDB.createFunctionEntryCount(Count, false, S)); } -std::optional Function::getEntryCount() const { +std::optional Function::getEntryCount() const { MDNode *MD = getMetadata(LLVMContext::MD_prof); if (MD && MD->getOperand(0)) if (MDString *MDS = dyn_cast(MD->getOperand(0))) { @@ -1113,7 +1101,7 @@ std::optional Function::getEntryCount() const { // Treat this the same as unknown. if (Count == static_cast(-1)) return std::nullopt; - return ProfileCount(Count, PCT_Real); + return Count; } return std::nullopt; } diff --git a/llvm/lib/IR/ProfDataUtils.cpp b/llvm/lib/IR/ProfDataUtils.cpp index 09794478103ea..34d46cb062bc3 100644 --- a/llvm/lib/IR/ProfDataUtils.cpp +++ b/llvm/lib/IR/ProfDataUtils.cpp @@ -282,15 +282,13 @@ void llvm::setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, F = F ? F : I.getFunction(); assert(F && "Either pass a instruction attached to a Function, or explicitly " "pass the Function that it will be attached to"); - if (std::optional EC = F->getEntryCount(); - EC && EC->getCount() > 0) + if (std::optional EC = F->getEntryCount(); EC && *EC > 0) setExplicitlyUnknownBranchWeights(I, PassName); } MDNode *llvm::getExplicitlyUnknownBranchWeightsIfProfiled(Function &F, StringRef PassName) { - if (std::optional EC = F.getEntryCount(); - !EC || EC->getCount() == 0) + if (std::optional EC = F.getEntryCount(); !EC || *EC == 0) return nullptr; MDBuilder MDB(F.getContext()); return MDNode::get( diff --git a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp index 18af4448087d7..b3a89f5e16258 100644 --- a/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp +++ b/llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombine.cpp @@ -1953,8 +1953,8 @@ void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N, Function *F = CI->getFunction(); assert(F && "Instruction does not belong to a function!"); - std::optional EC = F->getEntryCount(); - if (EC && EC->getCount() > 0) + std::optional EC = F->getEntryCount(); + if (EC && *EC > 0) setExplicitlyUnknownBranchWeights(*CondBrInst, DEBUG_TYPE); } else { B.CreateBr(BBNE); diff --git a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp index a4844006319c9..e36090ac0e387 100644 --- a/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp +++ b/llvm/lib/Transforms/IPO/FunctionSpecialization.cpp @@ -795,14 +795,13 @@ bool FunctionSpecializer::run() { std::optional Count = BFI.getBlockProfileCount(Call->getParent()); if (Count && !ProfcheckDisableMetadataFixes) { - std::optional MaybeCloneCount = - Clone->getEntryCount(); + std::optional MaybeCloneCount = Clone->getEntryCount(); if (MaybeCloneCount) { - uint64_t CallCount = *Count + MaybeCloneCount->getCount(); + uint64_t CallCount = *Count + *MaybeCloneCount; Clone->setEntryCount(CallCount); - if (std::optional MaybeOriginalCount = + if (std::optional MaybeOriginalCount = S.F->getEntryCount()) { - uint64_t OriginalCount = MaybeOriginalCount->getCount(); + uint64_t OriginalCount = *MaybeOriginalCount; if (OriginalCount >= *Count) { S.F->setEntryCount(OriginalCount - *Count); } else { diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index 31eac4064aaa2..b95ab41993d19 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1355,8 +1355,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (CalleeEntryCount) computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap); - uint64_t CalleeEntryCountV = - (CalleeEntryCount ? CalleeEntryCount->getCount() : 0); + uint64_t CalleeEntryCountV = (CalleeEntryCount ? *CalleeEntryCount : 0); bool AnyInline = false; for (User *User : Users) { @@ -1408,8 +1407,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { if (AnyInline) { Cloner.IsFunctionInlined = true; if (CalleeEntryCount) - Cloner.OrigFunc->setEntryCount(Function::ProfileCount( - CalleeEntryCountV, CalleeEntryCount->getType())); + Cloner.OrigFunc->setEntryCount(CalleeEntryCountV); OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc); OrigFuncORE.emit([&]() { return OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", Cloner.OrigFunc) diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index ab324ec76baa5..4234e05430dbf 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -93,7 +93,6 @@ using namespace llvm; using namespace sampleprof; using namespace llvm::sampleprofutil; -using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "sample-profile" #define CSINLINE_DEBUG DEBUG_TYPE "-inline" @@ -2291,7 +2290,7 @@ bool SampleProfileLoader::runOnFunction(Function &F, // Initialize entry count when the function has no existing entry // count value. if (!F.getEntryCount()) - F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real)); + F.setEntryCount(initialEntryCount); auto &FAM = AM.getResult(*F.getParent()) .getManager(); ORE = &FAM.getResult(F); diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp index c48771506b73f..d9598ff7b982f 100644 --- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp +++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp @@ -1581,12 +1581,12 @@ void DevirtModule::applyICallBranchFunnel(VTableSlotInfo &SlotInfo, auto EC = BFI.getBlockFreq(&F.getEntryBlock()); auto CC = F.getEntryCount(); double CallCount = 0.0; - if (EC.getFrequency() != 0 && CC && CC->getCount() != 0) { + if (EC.getFrequency() != 0 && CC && *CC != 0) { double CallFreq = static_cast( BFI.getBlockFreq(CB.getParent()).getFrequency()) / EC.getFrequency(); - CallCount = CallFreq * CC->getCount(); + CallCount = CallFreq * *CC; } FunctionEntryCounts[&JT] += CallCount; } diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index b6d07aa821e7f..4a094460d6073 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -128,7 +128,6 @@ #include using namespace llvm; -using ProfileCount = Function::ProfileCount; using VPCandidateInfo = ValueProfileCollector::CandidateInfo; #define DEBUG_TYPE "pgo-instrumentation" @@ -1675,7 +1674,7 @@ void PGOUseFunc::populateCounters() { // Fix the obviously inconsistent entry count. if (FuncMaxCount > 0 && FuncEntryCount == 0) FuncEntryCount = 1; - F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real)); + F.setEntryCount(FuncEntryCount); markFunctionAttributes(FuncEntryCount, FuncMaxCount); LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile.")); @@ -1943,7 +1942,7 @@ static bool skipPGOGen(const Function &F) { return true; if (PGOInstrumentColdFunctionOnly) { if (auto EntryCount = F.getEntryCount()) - return EntryCount->getCount() > PGOColdInstrumentEntryThreshold; + return *EntryCount > PGOColdInstrumentEntryThreshold; return !PGOTreatUnknownAsCold; } return false; @@ -2031,8 +2030,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, BlockFrequencyInfo NBFI(F, NBPI, LI); #ifndef NDEBUG auto BFIEntryCount = F.getEntryCount(); - assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) && - "Invalid BFI Entrycount"); + assert(BFIEntryCount && (*BFIEntryCount > 0) && "Invalid BFI Entrycount"); #endif auto SumCount = APFloat::getZero(APFloat::IEEEdouble()); auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble()); @@ -2063,7 +2061,7 @@ static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI, if (NewEntryCount == 0) NewEntryCount = 1; if (NewEntryCount != FuncEntryCount) { - F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real)); + F.setEntryCount(NewEntryCount); LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName() << ", entry_count " << FuncEntryCount << " --> " << NewEntryCount << "\n"); @@ -2255,7 +2253,7 @@ static bool annotateAllFunctions( if (!Func.readCounters(AllZeros, PseudoKind)) continue; if (AllZeros) { - F.setEntryCount(ProfileCount(0, Function::PCT_Real)); + F.setEntryCount(0); if (Func.getProgramMaxCount() != 0) ColdFunctions.push_back(&F); continue; diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp index 98ba75d1d45ae..3c38171292718 100644 --- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -442,7 +442,7 @@ class TailRecursionEliminator { : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU), BFI(BFI), OrigEntryBBFreq( BFI ? BFI->getBlockFreq(&F.getEntryBlock()).getFrequency() : 0U), - OrigEntryCount(F.getEntryCount() ? F.getEntryCount()->getCount() : 0) { + OrigEntryCount(F.getEntryCount() ? *F.getEntryCount() : 0) { if (BFI) { // The assert is meant as API documentation for the caller. assert((OrigEntryCount != 0 && OrigEntryBBFreq != 0) && @@ -771,7 +771,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { static_cast(OrigEntryBBFreq); auto ToSubtract = static_cast(std::round(RelativeBBFreq * OrigEntryCount)); - auto OldEntryCount = F.getEntryCount()->getCount(); + auto OldEntryCount = *F.getEntryCount(); if (OldEntryCount <= ToSubtract) { LLVM_DEBUG( errs() << "[TRE] The entrycount attributable to the recursive call, " @@ -779,7 +779,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) { << ", should be strictly lower than the function entry count, " << OldEntryCount << "\n"); } else { - F.setEntryCount(OldEntryCount - ToSubtract, F.getEntryCount()->getType()); + F.setEntryCount(OldEntryCount - ToSubtract); } } return true; @@ -992,7 +992,7 @@ PreservedAnalyses TailCallElimPass::run(Function &F, // the lines asking for the cached result, should they be nullptr (which, in // the case of the PDT, is likely), updates to the trees would be missed. auto *BFI = (!ForceDisableBFI && UpdateFunctionEntryCount && - F.getEntryCount().has_value() && F.getEntryCount()->getCount()) + F.getEntryCount().has_value() && *F.getEntryCount()) ? &AM.getResult(F) : nullptr; auto &ORE = AM.getResult(F); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp index 7ffa99878cf74..a462d181b2235 100644 --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -66,7 +66,6 @@ using namespace llvm; using namespace llvm::PatternMatch; -using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "code-extractor" @@ -1092,8 +1091,7 @@ Function *CodeExtractor::constructFunctionDeclaration( if (BFI) { auto Count = BFI->getProfileCountFromFreq(EntryFreq); if (Count.has_value()) - newFunction->setEntryCount( - ProfileCount(*Count, Function::PCT_Real)); // FIXME + newFunction->setEntryCount(*Count); } return newFunction; diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index be186ffbf7e42..c8fe3da956c76 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -86,7 +86,6 @@ using namespace llvm; using namespace llvm::memprof; -using ProfileCount = Function::ProfileCount; static cl::opt EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), @@ -2162,15 +2161,14 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock, /// Update the branch metadata for cloned call instructions. static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, - const ProfileCount &CalleeEntryCount, + const uint64_t &CalleeEntryCount, const CallBase &TheCall, ProfileSummaryInfo *PSI, BlockFrequencyInfo *CallerBFI) { - if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1) + if (CalleeEntryCount < 1) return; auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt; - int64_t CallCount = - std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount()); + int64_t CallCount = std::min(CallSiteCount.value_or(0), CalleeEntryCount); updateProfileCallee(Callee, -CallCount, &VMap); } @@ -2181,14 +2179,12 @@ void llvm::updateProfileCallee( if (!CalleeCount) return; - const uint64_t PriorEntryCount = CalleeCount->getCount(); - // Since CallSiteCount is an estimate, it could exceed the original callee // count and has to be set to 0 so guard against underflow. const uint64_t NewEntryCount = - (EntryDelta < 0 && static_cast(-EntryDelta) > PriorEntryCount) + (EntryDelta < 0 && static_cast(-EntryDelta) > *CalleeCount) ? 0 - : PriorEntryCount + EntryDelta; + : *CalleeCount + EntryDelta; auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount, const uint64_t PriorEntryCount) { @@ -2199,18 +2195,18 @@ void llvm::updateProfileCallee( // During inlining ? if (VMap) { - uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount; + uint64_t CloneEntryCount = *CalleeCount - NewEntryCount; for (auto Entry : *VMap) { if (isa(Entry.first)) if (auto *CI = dyn_cast_or_null(Entry.second)) { - CI->updateProfWeight(CloneEntryCount, PriorEntryCount); - updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount); + CI->updateProfWeight(CloneEntryCount, *CalleeCount); + updateVTableProfWeight(CI, CloneEntryCount, *CalleeCount); } if (isa(Entry.first)) if (auto *II = dyn_cast_or_null(Entry.second)) { - II->updateProfWeight(CloneEntryCount, PriorEntryCount); - updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount); + II->updateProfWeight(CloneEntryCount, *CalleeCount); + updateVTableProfWeight(II, CloneEntryCount, *CalleeCount); } } } @@ -2223,12 +2219,12 @@ void llvm::updateProfileCallee( if (!VMap || VMap->count(&BB)) for (Instruction &I : BB) { if (CallInst *CI = dyn_cast(&I)) { - CI->updateProfWeight(NewEntryCount, PriorEntryCount); - updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount); + CI->updateProfWeight(NewEntryCount, *CalleeCount); + updateVTableProfWeight(CI, NewEntryCount, *CalleeCount); } if (InvokeInst *II = dyn_cast(&I)) { - II->updateProfWeight(NewEntryCount, PriorEntryCount); - updateVTableProfWeight(II, NewEntryCount, PriorEntryCount); + II->updateProfWeight(NewEntryCount, *CalleeCount); + updateVTableProfWeight(II, NewEntryCount, *CalleeCount); } } } diff --git a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp index c934940264c36..198a950c7c961 100644 --- a/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/llvm/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -72,9 +72,8 @@ struct LoopExpansionInfo { std::optional getAverageMemOpLoopTripCount(const MemIntrinsic &I) { if (ProfcheckDisableMetadataFixes) return std::nullopt; - if (std::optional EC = - I.getFunction()->getEntryCount(); - !EC || !EC->getCount()) + if (std::optional EC = I.getFunction()->getEntryCount(); + !EC || *EC == 0) return std::nullopt; if (const auto Len = I.getLengthInBytes()) return Len->getZExtValue(); diff --git a/llvm/lib/Transforms/Utils/ProfileVerify.cpp b/llvm/lib/Transforms/Utils/ProfileVerify.cpp index 70376edd0d32e..099ad965219a5 100644 --- a/llvm/lib/Transforms/Utils/ProfileVerify.cpp +++ b/llvm/lib/Transforms/Utils/ProfileVerify.cpp @@ -120,7 +120,7 @@ bool ProfileInjector::inject() { F.setEntryCount(DefaultFunctionEntryCount); // If there is an entry count that's 0, then don't bother injecting. We won't // verify these either. - if (F.getEntryCount()->getCount() == 0) + if (*F.getEntryCount() == 0) return false; bool Changed = false; // Cycle through the weights list. If we didn't, tests with more than (say) @@ -248,7 +248,7 @@ PreservedAnalyses ProfileVerifierPass::run(Function &F, emitProfileError("function entry count missing (set to 0 if cold)", F); return PreservedAnalyses::all(); } - } else if (EntryCount->getCount() == 0) { + } else if (*EntryCount == 0) { return PreservedAnalyses::all(); } for (const auto &BB : F) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 8565bb85c4314..83333aae39cf9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -1796,7 +1796,7 @@ void LoopVectorizationPlanner::updateLoopMetadataAndProfileInfo( unsigned AverageVectorTripCount = 0; unsigned RemainderAverageTripCount = 0; auto EC = VectorLoop->getLoopPreheader()->getParent()->getEntryCount(); - auto IsProfiled = EC && EC->getCount(); + auto IsProfiled = EC && *EC != 0; if (!OrigAverageTripCount) { if (!IsProfiled) return; diff --git a/llvm/unittests/IR/MetadataTest.cpp b/llvm/unittests/IR/MetadataTest.cpp index 3b5978792c849..469c012984ff2 100644 --- a/llvm/unittests/IR/MetadataTest.cpp +++ b/llvm/unittests/IR/MetadataTest.cpp @@ -5215,14 +5215,13 @@ TEST_F(FunctionAttachmentTest, Verifier) { EXPECT_FALSE(verifyFunction(*F)); } -TEST_F(FunctionAttachmentTest, RealEntryCount) { +TEST_F(FunctionAttachmentTest, EntryCount) { Function *F = getFunction("foo"); EXPECT_FALSE(F->getEntryCount().has_value()); - F->setEntryCount(12304, Function::PCT_Real); + F->setEntryCount(12304); auto Count = F->getEntryCount(); EXPECT_TRUE(Count.has_value()); - EXPECT_EQ(12304u, Count->getCount()); - EXPECT_EQ(Function::PCT_Real, Count->getType()); + EXPECT_EQ(12304u, *Count); } TEST_F(FunctionAttachmentTest, SubprogramAttachment) { From 2ae4849f91986c2d2d8015a95b6647f59bc2af89 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 23 Jun 2026 13:27:04 -0700 Subject: [PATCH 261/511] [dsymutil] Reuse a single thread pool across architectures (#204691) dsymutil links the architectures of a universal binary on a thread pool, and the parallel linker's DWARFLinkerImpl::link() then created a second pool to link each architecture's object files. With one such inner pool per architecture, dsymutil spun up more worker threads than the machine has cores. Add DWARFLinkerBase::setThreadPool() so the caller provides the pool. The parallel linker schedules the object files on it as a ThreadPoolTaskGroup. dsymutil hands over the pool it already uses to schedule the architectures, llvm-dwarfutil passes one sized by --num-threads, and the classic linker ignores it and manages its own threads (always 2 for the lockstep algorithm). The per-compile-unit cloning still runs on the global llvm::parallel executor, whose per-thread allocators are indexed by getThreadIndex(), so it can't move onto this pool. --- .../llvm/DWARFLinker/Classic/DWARFLinker.h | 3 ++ .../llvm/DWARFLinker/DWARFLinkerBase.h | 3 ++ .../DWARFLinker/Parallel/DWARFLinkerImpl.cpp | 7 ++-- .../DWARFLinker/Parallel/DWARFLinkerImpl.h | 6 +++ llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 1 + llvm/tools/dsymutil/DwarfLinkerForBinary.h | 7 +++- llvm/tools/dsymutil/dsymutil.cpp | 37 +++++++------------ llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp | 11 +++++- 8 files changed, 44 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h index 81f4545dfbf4c..4ba1f007b0d00 100644 --- a/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h +++ b/llvm/include/llvm/DWARFLinker/Classic/DWARFLinker.h @@ -277,6 +277,9 @@ class LLVM_ABI DWARFLinker : public DWARFLinkerBase { Options.Threads = NumThreads; } + /// The classic linker does not use a shared thread pool. + void setThreadPool(ThreadPoolInterface *Pool) override {} + /// Add kind of accelerator tables to be generated. void addAccelTableKind(AccelTableKind Kind) override { assert(!llvm::is_contained(Options.AccelTables, Kind)); diff --git a/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h b/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h index 30290cc77332b..e883e21e60539 100644 --- a/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h +++ b/llvm/include/llvm/DWARFLinker/DWARFLinkerBase.h @@ -21,6 +21,7 @@ #include namespace llvm { class DWARFUnit; +class ThreadPoolInterface; namespace dwarf_linker { @@ -138,6 +139,8 @@ class DWARFLinkerBase { virtual void setObjectPrefixMap(ObjectPrefixMapTy *Map) = 0; /// Set target DWARF version. virtual Error setTargetDWARFVersion(uint16_t TargetDWARFVersion) = 0; + /// Set the thread pool used to link the object files. + virtual void setThreadPool(ThreadPoolInterface *Pool) = 0; }; } // end namespace dwarf_linker } // end namespace llvm diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp index 27fefc3231993..fc608c2cb7a20 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.cpp @@ -195,17 +195,16 @@ Error DWARFLinkerImpl::link() { GlobalData.error(std::move(Err), Context->InputDWARFFile.FileName); } } else { - DefaultThreadPool Pool(llvm::parallel::strategy); + assert(ThreadPool && "setThreadPool() must be called before link()"); + ThreadPoolTaskGroup Group(*ThreadPool); for (std::unique_ptr &Context : ObjectContexts) - Pool.async([&]() { + Group.async([&]() { // Link object file. if (Error Err = Context->link(ArtificialTypeUnit.get())) GlobalData.error(std::move(Err), Context->InputDWARFFile.FileName); if (Error Err = Context->unloadInput()) GlobalData.error(std::move(Err), Context->InputDWARFFile.FileName); }); - - Pool.wait(); } // Merge staged parseable Swift interface entries into the shared map. Done diff --git a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h index 09a59222ae354..95104c6830c2c 100644 --- a/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h +++ b/llvm/lib/DWARFLinker/Parallel/DWARFLinkerImpl.h @@ -89,6 +89,9 @@ class DWARFLinkerImpl : public DWARFLinker { GlobalData.Options.Threads = NumThreads; } + /// Use the specified thread pool to link the object files. + void setThreadPool(ThreadPoolInterface *Pool) override { ThreadPool = Pool; } + /// Add kind of accelerator tables to be generated. void addAccelTableKind(AccelTableKind Kind) override { assert(!llvm::is_contained(GlobalData.getOptions().AccelTables, Kind)); @@ -431,6 +434,9 @@ class DWARFLinkerImpl : public DWARFLinker { /// Hanler for output sections. SectionHandlerTy SectionHandler = nullptr; + + /// Thread pool that links the object files, or null to use a private pool. + ThreadPoolInterface *ThreadPool = nullptr; /// @} }; diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index a6a566e4470a3..1e51cb85c9ca3 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -743,6 +743,7 @@ bool DwarfLinkerForBinary::linkImpl( GeneralLinker->setNumThreads(Options.Threads); GeneralLinker->setPrependPath(Options.PrependPath); GeneralLinker->setKeepFunctionForStatic(Options.KeepFunctionForStatic); + GeneralLinker->setThreadPool(ThreadPool); GeneralLinker->setInputVerificationHandler( [&](const DWARFFile &File, llvm::StringRef Output) { std::lock_guard Guard(ErrorHandlerMutex); diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.h b/llvm/tools/dsymutil/DwarfLinkerForBinary.h index 507a523ba4530..62e33a33d960e 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.h +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.h @@ -21,6 +21,7 @@ #include namespace llvm { +class ThreadPoolInterface; using namespace dwarf_linker; namespace dsymutil { @@ -73,9 +74,10 @@ struct ObjectWithRelocMap { class DwarfLinkerForBinary { public: DwarfLinkerForBinary(raw_fd_ostream &OutFile, BinaryHolder &BinHolder, - LinkOptions Options, std::mutex &ErrorHandlerMutex) + LinkOptions Options, std::mutex &ErrorHandlerMutex, + ThreadPoolInterface *ThreadPool = nullptr) : OutFile(OutFile), BinHolder(BinHolder), Options(std::move(Options)), - ErrorHandlerMutex(ErrorHandlerMutex) {} + ErrorHandlerMutex(ErrorHandlerMutex), ThreadPool(ThreadPool) {} /// Link the contents of the DebugMap. bool link(const DebugMap &); @@ -295,6 +297,7 @@ class DwarfLinkerForBinary { BinaryHolder &BinHolder; LinkOptions Options; std::mutex &ErrorHandlerMutex; + ThreadPoolInterface *ThreadPool; std::vector EmptyWarnings; diff --git a/llvm/tools/dsymutil/dsymutil.cpp b/llvm/tools/dsymutil/dsymutil.cpp index 45b9d68a03f16..76c4d4140488c 100644 --- a/llvm/tools/dsymutil/dsymutil.cpp +++ b/llvm/tools/dsymutil/dsymutil.cpp @@ -869,18 +869,10 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { } Options.LinkOpts.ResourceDir = OutputLocationOrErr->getResourceDir(); - // Statistics only require different architectures to be processed - // sequentially, the link itself can still happen in parallel. Change the - // thread pool strategy here instead of modifying LinkOpts.Threads. - ThreadPoolStrategy S = hardware_concurrency( - Options.LinkOpts.Statistics ? 1 : Options.LinkOpts.Threads); - if (Options.LinkOpts.Threads == 0) { - // If NumThreads is not specified, create one thread for each input, up to - // the number of hardware threads. - S.ThreadsRequested = DebugMapPtrsOrErr->size(); - S.Limit = true; - } - DefaultThreadPool Threads(S); + // Use a single thread for --statistics and --verbose (which forces one + // thread) so the per-architecture link output is emitted in order. + DefaultThreadPool ThreadPool(hardware_concurrency( + Options.LinkOpts.Statistics ? 1 : Options.LinkOpts.Threads)); // If there is more than one link to execute, we need to generate // temporary files. @@ -900,11 +892,10 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { const bool Crashed = !CRC.RunSafely([&]() { for (auto &Map : *DebugMapPtrsOrErr) { - if (Options.LinkOpts.Verbose || Options.DumpDebugMap) + if (Options.DumpDebugMap) { Map->print(outs()); - - if (Options.DumpDebugMap) continue; + } if (Map->begin() == Map->end()) { if (!Options.LinkOpts.Quiet) { @@ -950,8 +941,12 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { auto LinkLambda = [&, OutputFile](std::shared_ptr Stream) { + // Print the debug map here, on the thread that links it, so verbose + // output stays interleaved per architecture. + if (Options.LinkOpts.Verbose) + Map->print(outs()); DwarfLinkerForBinary Linker(*Stream, BinHolder, Options.LinkOpts, - ErrorHandlerMutex); + ErrorHandlerMutex, &ThreadPool); AllOK.fetch_and(Linker.link(*Map)); Stream->flush(); if (flagIsSet(Options.Verify, DWARFVerify::Output) || @@ -963,16 +958,10 @@ int dsymutil_main(int argc, char **argv, const llvm::ToolContext &) { } }; - // FIXME: The DwarfLinker can have some very deep recursion that can max - // out the (significantly smaller) stack when using threads. We don't - // want this limitation when we only have a single thread. - if (S.ThreadsRequested == 1) - LinkLambda(OS); - else - Threads.async(LinkLambda, OS); + ThreadPool.async(LinkLambda, OS); } - Threads.wait(); + ThreadPool.wait(); }); if (Crashed) diff --git a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp index ccc1324bc66d7..70f4b05ba2943 100644 --- a/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp +++ b/llvm/tools/llvm-dwarfutil/DebugInfoLinker.cpp @@ -15,7 +15,10 @@ #include "llvm/DebugInfo/DWARF/DWARFContext.h" #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ThreadPool.h" +#include "llvm/Support/Threading.h" #include +#include #include namespace llvm { @@ -345,6 +348,11 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, else return StreamerOrErr.takeError(); + // The parallel linker links the object files on this pool; it must outlive + // link() below. The classic linker ignores it. + DefaultThreadPool ThreadPool(hardware_concurrency(Options.NumThreads)); + DebugInfoLinker->setThreadPool(&ThreadPool); + if constexpr (std::is_same::value) { DebugInfoLinker->setOutputDWARFHandler( @@ -354,8 +362,9 @@ Error linkDebugInfoImpl(object::ObjectFile &File, const Options &Options, Streamer->emitSectionContents(Section->getContents(), Section->getKind()); }); - } else + } else { DebugInfoLinker->setOutputDWARFEmitter(Streamer.get()); + } DebugInfoLinker->setEstimatedObjfilesAmount(1); DebugInfoLinker->setNumThreads(Options.NumThreads); From cb2c81576776fd243e3d11cccc531f754b6be03d Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Tue, 23 Jun 2026 13:56:35 -0700 Subject: [PATCH 262/511] [gn build] Port commits (#205428) 55d7f777d958 582a20269e2c --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 1 + llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index 2091b0abe7b4b..107109cdfd47f 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -44,6 +44,7 @@ static_library("profile") { "InstrProfilingPlatformGPU.c", "InstrProfilingPlatformLinux.c", "InstrProfilingPlatformOther.c", + "InstrProfilingPlatformROCm.cpp", "InstrProfilingPlatformWindows.c", "InstrProfilingPort.h", "InstrProfilingRuntime.cpp", diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn index 4cca154f94a71..d743074210ce4 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/BUILD.gn @@ -155,6 +155,7 @@ static_library("LLVMRISCVCodeGen") { "RISCVPostRAExpandPseudoInsts.cpp", "RISCVPromoteConstant.cpp", "RISCVPushPopOptimizer.cpp", + "RISCVQCRelaxMarking.cpp", "RISCVRedundantCopyElimination.cpp", "RISCVRegisterInfo.cpp", "RISCVSelectionDAGInfo.cpp", From 417939f4ea18ab63908beffa5f689fd0cee1976d Mon Sep 17 00:00:00 2001 From: Luke Hutton Date: Tue, 23 Jun 2026 22:02:23 +0100 Subject: [PATCH 263/511] [mlir][tosa] Add support for matmul_t (#203894) Adds support for matmul_t added to the specification in https://github.com/arm/tosa-specification/pull/50. This includes: - Operator definition - Verification logic for the operator - Output shape inference for the operator - Validation checks to ensure compliance with the TOSA specification including profile compliance and level checks. - Support for matmul_t to matmul in the downgrade pass. Note that MXFP support will be added in a follow-up commit. --- .../Dialect/Tosa/IR/TosaComplianceData.h.inc | 55 +++++ .../mlir/Dialect/Tosa/IR/TosaOpBase.td | 6 + mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h | 6 + mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td | 38 ++++ mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 188 ++++++++++++++---- .../Tosa/Transforms/TosaDowngrade1p1To1p0.cpp | 83 +++++++- .../Tosa/Transforms/TosaProfileCompliance.cpp | 9 + .../Tosa/Transforms/TosaValidation.cpp | 4 +- mlir/test/Dialect/Tosa/dynamic_extension.mlir | 16 ++ mlir/test/Dialect/Tosa/invalid_extension.mlir | 17 ++ mlir/test/Dialect/Tosa/level_check.mlir | 9 + mlir/test/Dialect/Tosa/ops.mlir | 81 ++++++++ .../Dialect/Tosa/profile_all_unsupported.mlir | 7 + .../Tosa/tosa-downgrade-1-1-to-1-0.mlir | 34 ++++ mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 66 ++++++ .../tosa-validation-version-1p0-invalid.mlir | 10 + .../tosa-validation-version-1p1-valid.mlir | 10 + mlir/test/Dialect/Tosa/verifier.mlir | 76 ++++++- 18 files changed, 669 insertions(+), 46 deletions(-) diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc index 7db696cc935ea..45d56416fdba0 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaComplianceData.h.inc @@ -60,6 +60,13 @@ profileComplianceMap = { {{{fp16T, fp16T, fp16T, fp16T, fp16T}, SpecificationVersion::V_1_0}, {{fp16T, fp16T, fp16T, fp16T, fp32T}, SpecificationVersion::V_1_0}, {{fp32T, fp32T, fp32T, fp32T, fp32T}, SpecificationVersion::V_1_0}}}}}, + {"tosa.matmul_t", + {{{Profile::pro_int}, + {{{i8T, i8T, i32T}, SpecificationVersion::V_1_1_DRAFT}}}, + {{Profile::pro_fp}, + {{{fp16T, fp16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, fp16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp32T, fp32T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}}}}, {"tosa.max_pool2d", {{{Profile::pro_int}, {{{i8T, i8T}, SpecificationVersion::V_1_0}}}, {{Profile::pro_fp}, @@ -687,6 +694,54 @@ extensionComplianceMap = { allOf}, {{Extension::bf16}, {{{bf16T, bf16T, bf16T, bf16T, fp32T}, SpecificationVersion::V_1_0}}}}}, + {"tosa.matmul_t", + {{{Extension::int16}, + {{{i16T, i16T, i48T}, SpecificationVersion::V_1_1_DRAFT}}}, + {{Extension::fp8e4m3}, + {{{fp8e4m3T, fp8e4m3T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, fp16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, fp8e4m3T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, fp8e4m3T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, fp16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, fp32T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, fp8e4m3T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp32T, fp8e4m3T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}}, + {{Extension::fp8e4m3, Extension::fp8e5m2}, + {{{fp8e4m3T, fp8e5m2T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp8e4m3T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, fp8e5m2T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp8e4m3T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}, + allOf}, + {{Extension::bf16, Extension::fp8e4m3}, + {{{fp8e4m3T, bf16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp8e4m3T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e4m3T, bf16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp8e4m3T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}, + allOf}, + {{Extension::fp8e5m2}, + {{{fp8e5m2T, fp8e5m2T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, fp8e5m2T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp8e5m2T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, fp32T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, fp8e5m2T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp32T, fp8e5m2T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}}, + {{Extension::bf16, Extension::fp8e5m2}, + {{{fp8e5m2T, bf16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp8e5m2T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp8e5m2T, bf16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp8e5m2T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}, + allOf}, + {{Extension::bf16}, + {{{fp16T, bf16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, bf16T, fp16T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp16T, bf16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, fp32T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{fp32T, bf16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}, + {{bf16T, bf16T, fp32T}, SpecificationVersion::V_1_1_DRAFT}}}}}, {"tosa.matmul_t_block_scaled", {{{Extension::mxfp}, {{{fp4e2m1T, fp8ue8m0T, fp4e2m1T, fp8ue8m0T, fp32T}, diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td index 591073e9985ae..3c7d870a44879 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOpBase.td @@ -166,6 +166,12 @@ def Tosa_MatMulOpQuantInfoBuilder : OpBuilder< a, b); }]>; +def Tosa_MatMulTOpQuantInfoBuilder + : OpBuilder<(ins "Type":$outputType, "Value":$a, "Value":$b), [{ + buildMatMulTOpWithQuantInfo($_builder, $_state, outputType, + a, b); + }]>; + // Both the tosa.avg_pool2d and unary ops use the same // UnaryOpQuantizationAttr but the avg_pool operator has its own builder as it // has additional parameters not part of the unary ops. diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h index e0626368175ee..2d96fb6891139 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h @@ -135,6 +135,12 @@ RankedTensorType getVariableType(VariableOp variableOp); // Returns the bitwidth of a TOSA tensor element type unsigned getBitWidth(Type type); +// Returns the storage element type for a given type +Type getStorageElementTypeOrSelf(Type type); + +// Returns the storage element type for a given value +Type getStorageElementTypeOrSelf(Value value); + } // namespace tosa } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td index a333505082b7e..c01f32239a59f 100644 --- a/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td +++ b/mlir/include/mlir/Dialect/Tosa/IR/TosaOps.td @@ -454,6 +454,44 @@ def Tosa_MatMulOp : Tosa_InferShapedTypeOp<"matmul", [NoMemoryEffect]> { "operands attr-dict `:` functional-type(operands, results)"; } +//===----------------------------------------------------------------------===// +// Operator: matmul_t +//===----------------------------------------------------------------------===// +def Tosa_MatMulTOp : Tosa_InferShapedTypeOp<"matmul_t", [NoMemoryEffect]> { + let summary = "Matrix multiplication with transposed B operator."; + + let description = [{ + Performs two dimensional matrix multiplications. `A` matrix is of shape + `N x H x C`. `B` matrix is of shape `D x W x C`. This is effectively a + matrix multiply of `A` by the transposed `B` matrix. If the batched + dimension of input `B` is of size 1, the `B` matrix is broadcast. + }]; + + let arguments = (ins Tosa_Tensor3D:$a, Tosa_Tensor3D:$b, + Tosa_ScalarIntOrFloatTensor:$a_zp, Tosa_ScalarIntOrFloatTensor:$b_zp); + + let results = (outs Tosa_Tensor3D:$output); + + list availability = + [Profile<[Tosa_PRO_INT, Tosa_PRO_FP]>, + Extension<[Tosa_EXT_INT16, Tosa_EXT_FP8E4M3, Tosa_EXT_FP8E5M2, + Tosa_EXT_BF16]>, + ]; + + let extraClassDeclaration = [{ + FailureOr getAZeroPoint(); + FailureOr getBZeroPoint(); + LogicalResult verifyAZeroPoint(int64_t zp); + LogicalResult verifyBZeroPoint(int64_t zp); + }]; + + let builders = [Tosa_MatMulTOpQuantInfoBuilder]; + let hasVerifier = 1; + + let assemblyFormat = + "operands attr-dict `:` functional-type(operands, results)"; +} + //===----------------------------------------------------------------------===// // Operator: matmul_t_block_scaled //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 3c54d7448d021..c4705c8080069 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -581,14 +581,14 @@ static std::optional idivCheck(const int64_t lhs, const int64_t rhs) { return lhs / rhs; } -static Type getStorageElementTypeOrSelf(Type type) { +Type mlir::tosa::getStorageElementTypeOrSelf(Type type) { auto srcType = getElementTypeOrSelf(type); if (auto quantType = llvm::dyn_cast(srcType)) srcType = getStorageElementTypeFromQuantized(quantType); return srcType; } -static Type getStorageElementTypeOrSelf(Value value) { +Type mlir::tosa::getStorageElementTypeOrSelf(Value value) { return getStorageElementTypeOrSelf(value.getType()); } @@ -1447,18 +1447,14 @@ buildTransConvOpWithQuantInfo(OpBuilder &builder, OperationState &result, result.addTypes(finalOutputType); } -/// The tosa.matmul op is also intended to be generated where a fully_connected -/// op must be constructed where the weight is not a constant. In this case, -/// the fully_connected op must be expressed using matmul. -/// TODO: Add link to the leglization document explaining this. -static void buildMatMulOpWithQuantInfo(OpBuilder &builder, - OperationState &result, Type outputType, - Value a, Value b) { - auto zps = createZPsAsConst(builder, a, b); +static void buildMatMulLikeOpWithQuantInfo(OpBuilder &builder, + OperationState &result, + Type outputType, Value a, Value b) { + const std::pair zps = createZPsAsConst(builder, a, b); result.addOperands({a, b, zps.first, zps.second}); Type finalOutputType{outputType}; - if (auto quantAttr = buildMatMulOpQuantizationAttr(builder, a, b)) { + if (buildMatMulOpQuantizationAttr(builder, a, b)) { auto eType = getStorageElementTypeOrSelf(a.getType()); auto inputBits = eType.getIntOrFloatBitWidth(); @@ -1476,6 +1472,18 @@ static void buildMatMulOpWithQuantInfo(OpBuilder &builder, result.addTypes(finalOutputType); } +static void buildMatMulOpWithQuantInfo(OpBuilder &builder, + OperationState &result, Type outputType, + Value a, Value b) { + buildMatMulLikeOpWithQuantInfo(builder, result, outputType, a, b); +} + +static void buildMatMulTOpWithQuantInfo(OpBuilder &builder, + OperationState &result, Type outputType, + Value a, Value b) { + buildMatMulLikeOpWithQuantInfo(builder, result, outputType, a, b); +} + /// Both the tosa.avg_pool2d and unary ops use the same /// UnaryOpQuantizationAttr but avg_pool operator has its own builder as it /// has additional parameters not part of the unary ops. @@ -2052,12 +2060,9 @@ LogicalResult tosa::MatMulOp::inferReturnTypeComponents( return success(); } -LogicalResult MatMulOp::verify() { - const ShapeAdaptor aShape(getA().getType()); - const ShapeAdaptor bShape(getB().getType()); - const Type aElementType = aShape.getElementType(); - const Type bElementType = bShape.getElementType(); - +template +static LogicalResult verifyMatMulQuantizedOperandsType(T op, Type aElementType, + Type bElementType) { const auto aQuantizedEType = llvm::dyn_cast(aElementType); const auto bQuantizedEType = @@ -2065,33 +2070,52 @@ LogicalResult MatMulOp::verify() { if (aQuantizedEType || bQuantizedEType) { if (!aQuantizedEType || !bQuantizedEType) { - return emitOpError("expect operands to be both quantized or both not " - "quantized, got ") + return op.emitOpError("expect operands to be both quantized or both not " + "quantized, got ") << aElementType << " and " << bElementType; } // both a and b have quantized element types auto aQuantWidth = aQuantizedEType.getStorageTypeIntegralWidth(); auto bQuantWidth = bQuantizedEType.getStorageTypeIntegralWidth(); if (aQuantWidth != bQuantWidth) { - return emitOpError("expect quantized operands to have same widths, got ") + return op.emitOpError("expect quantized operands to have same widths, " + "got ") << aQuantWidth << " and " << bQuantWidth; } } - // check a_zp and b_zp - auto aEType = getStorageElementTypeOrSelf(aElementType); - auto aZpEType = getStorageElementTypeOrSelf(getAZp().getType()); - if (aEType != aZpEType) - return emitOpError("expect input a and a_zp have the same " - "element type, got ") - << aEType << " and " << aZpEType; + return success(); +} - const Type bEType = getStorageElementTypeOrSelf(bElementType); - const Type bZpEType = getStorageElementTypeOrSelf(getBZp().getType()); - if (bEType != bZpEType) - return emitOpError("expect input b and b_zp have the same " - "element type, got ") - << bEType << " and " << bZpEType; +template +static LogicalResult verifyMatMulZeroPointType(T op, Value input, Value zp, + StringRef inputName, + StringRef zpName) { + const Type inputStorageElementType = getStorageElementTypeOrSelf(input); + const Type zpElementType = getStorageElementTypeOrSelf(zp); + + if (inputStorageElementType != zpElementType) + return op.emitOpError("expect input ") + << inputName << " and " << zpName + << " have the same element type, got " << inputStorageElementType + << " and " << zpElementType; + + return success(); +} + +LogicalResult MatMulOp::verify() { + const ShapeAdaptor aShape(getA().getType()); + const ShapeAdaptor bShape(getB().getType()); + const Type aElementType = aShape.getElementType(); + const Type bElementType = bShape.getElementType(); + + if (failed( + verifyMatMulQuantizedOperandsType(*this, aElementType, bElementType))) + return failure(); + + if (failed(verifyMatMulZeroPointType(*this, getA(), getAZp(), "a", "a_zp")) || + failed(verifyMatMulZeroPointType(*this, getB(), getBZp(), "b", "b_zp"))) + return failure(); FailureOr maybeAZp = getAZeroPoint(); if (succeeded(maybeAZp) && verifyAZeroPoint(*maybeAZp).failed()) @@ -2125,15 +2149,97 @@ LogicalResult MatMulOp::verify() { const SmallVector expectedOutputShape = {N, H, W}; const auto outputType = cast(getResult().getType()); if (outputType.hasRank() && - failed( - verifyCompatibleShape(outputType.getShape(), expectedOutputShape))) { - InFlightDiagnostic opError = emitOpError("expected output shape "); - printShapeToDiagnostic(opError, outputType.getShape()); - opError << " to be compatible with expected output shape "; - printShapeToDiagnostic(opError, expectedOutputShape); - return opError; + failed(verifyOutputShapeCompatibleWithExpected(getOperation(), outputType, + expectedOutputShape))) + return failure(); + + return success(); +} + +LogicalResult tosa::MatMulTOp::inferReturnTypeComponents( + MLIRContext *context, ::std::optional location, + MatMulTOp::Adaptor adaptor, + SmallVectorImpl &inferredReturnShapes) { + const ShapeAdaptor lhsShape(adaptor.getA().getType()); + const ShapeAdaptor rhsShape(adaptor.getB().getType()); + + SmallVector outShape(3, ShapedType::kDynamic); + + if (lhsShape.hasRank()) { + outShape[0] = lhsShape.getDimSize(0); + outShape[1] = lhsShape.getDimSize(1); + } + + if (rhsShape.hasRank()) { + const int64_t bBatchSize = rhsShape.getDimSize(0); + if (bBatchSize != 1 && ShapedType::isDynamic(outShape[0])) + outShape[0] = bBatchSize; + outShape[2] = rhsShape.getDimSize(1); } + inferredReturnShapes.push_back(ShapedTypeComponents(outShape)); + return success(); +} + +LogicalResult MatMulTOp::verify() { + const ShapeAdaptor aShape(getA().getType()); + const ShapeAdaptor bShape(getB().getType()); + const Type aElementType = aShape.getElementType(); + const Type bElementType = bShape.getElementType(); + + if (failed( + verifyMatMulQuantizedOperandsType(*this, aElementType, bElementType))) + return failure(); + + if (failed(verifyMatMulZeroPointType(*this, getA(), getAZp(), "a", "a_zp")) || + failed(verifyMatMulZeroPointType(*this, getB(), getBZp(), "b", "b_zp"))) + return failure(); + + FailureOr maybeAZp = getAZeroPoint(); + if (succeeded(maybeAZp) && verifyAZeroPoint(*maybeAZp).failed()) + return failure(); + + FailureOr maybeBZp = getBZeroPoint(); + if (succeeded(maybeBZp) && verifyBZeroPoint(*maybeBZp).failed()) + return failure(); + + // Verify input/output shapes + int64_t N = ShapedType::kDynamic; + int64_t D = ShapedType::kDynamic; + int64_t H = ShapedType::kDynamic; + int64_t W = ShapedType::kDynamic; + int64_t C = ShapedType::kDynamic; + + if (aShape.hasRank()) { + N = aShape.getDimSize(0); + H = aShape.getDimSize(1); + C = aShape.getDimSize(2); + } + + if (bShape.hasRank()) { + D = bShape.getDimSize(0); + W = bShape.getDimSize(1); + if (failed(tryUpdateDimOrFailure(*this, C, bShape.getDimSize(2), "b", + "channels"))) + return failure(); + } + + // Verify B batch size is broadcast compatible with A. + if (ShapedType::isStatic(N) && ShapedType::isStatic(D) && N != D && D != 1) + return emitOpError("expect B matrix batch size to be broadcast compatible " + "with A, got D=") + << D << " vs N=" << N; + + if (ShapedType::isDynamic(N) && ShapedType::isStatic(D) && D != 1) + N = D; + + const SmallVector expectedOutputShape = {N, H, W}; + const auto outputType = cast(getResult().getType()); + if (outputType.hasRank() && + failed(verifyOutputShapeCompatibleWithExpected(getOperation(), outputType, + expectedOutputShape))) + return failure(); + return success(); } @@ -3195,6 +3301,8 @@ ZERO_POINT_HELPER(AvgPool2dAdaptiveOp, Input, true) ZERO_POINT_HELPER(AvgPool2dAdaptiveOp, Output, true) ZERO_POINT_HELPER(MatMulOp, A, true) ZERO_POINT_HELPER(MatMulOp, B, true) +ZERO_POINT_HELPER(MatMulTOp, A, true) +ZERO_POINT_HELPER(MatMulTOp, B, true) ZERO_POINT_HELPER(NegateOp, Input1, true) ZERO_POINT_HELPER(NegateOp, Output, true) ZERO_POINT_HELPER(RescaleOp, Input, !getInputUnsigned()) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaDowngrade1p1To1p0.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaDowngrade1p1To1p0.cpp index cfd2dd9d29650..163850f741166 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaDowngrade1p1To1p0.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaDowngrade1p1To1p0.cpp @@ -16,6 +16,8 @@ #include "mlir/Dialect/Tosa/Transforms/Passes.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/Tosa/IR/TosaOps.h" +#include "mlir/Dialect/Tosa/Utils/ConversionUtils.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -140,6 +142,83 @@ class BoolScatterRewrite : public OpRewritePattern { } }; +static LogicalResult isMatMulTTypeCompatibleForDowngrade(tosa::MatMulTOp op) { + const Type aElementType = getStorageElementTypeOrSelf(op.getA().getType()); + const Type bElementType = getStorageElementTypeOrSelf(op.getB().getType()); + const Type outputElementType = + getStorageElementTypeOrSelf(op.getOutput().getType()); + + if (aElementType != bElementType) + return failure(); + + if ((aElementType.isF16() && outputElementType.isF16()) || + (aElementType.isF16() && outputElementType.isF32()) || + (aElementType.isF32() && outputElementType.isF32()) || + (aElementType.isBF16() && outputElementType.isF32()) || + (aElementType.isInteger(8) && outputElementType.isInteger(32)) || + (aElementType.isInteger(16) && outputElementType.isInteger(48)) || + (isa(aElementType) && outputElementType.isF16()) || + (isa(aElementType) && outputElementType.isF16())) + return success(); + + return failure(); +} + +class MatMulTRewrite : public OpRewritePattern { +public: + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(tosa::MatMulTOp op, + PatternRewriter &rewriter) const override { + if (failed(isMatMulTTypeCompatibleForDowngrade(op))) + return rewriter.notifyMatchFailure( + op, "expected 1.0-compatible matmul_t element types"); + + const Type aType = op.getA().getType(); + const Type bType = op.getB().getType(); + const ShapeAdaptor aShape(aType); + const ShapeAdaptor bShape(bType); + if (!aShape.hasRank() || !bShape.hasRank()) + return rewriter.notifyMatchFailure(op, "expected ranked A and B tensors"); + + const int64_t dSize = bShape.getDimSize(0); + const int64_t nSize = aShape.getDimSize(0); + + // To convert broadcasting behaviour to TOSA 1.0, we're required to tile the + // input. TOSA 1.0 does not support shape expressions, so the batch size + // must be known at compile time. + if (ShapedType::isDynamic(dSize) || + (dSize == 1 && ShapedType::isDynamic(nSize))) + return rewriter.notifyMatchFailure( + op, "expected known batch size for broadcast"); + + const int64_t wSize = bShape.getDimSize(1); + const int64_t cSize = bShape.getDimSize(2); + const Location loc = op.getLoc(); + const RankedTensorType transposedBType = + cast(bType).clone({dSize, cSize, wSize}); + auto transpose = + tosa::TransposeOp::create(rewriter, loc, transposedBType, op.getB(), + rewriter.getDenseI32ArrayAttr({0, 2, 1})); + Value matMulB = transpose.getOutput(); + + // Matmul does not support broadcasting, so tile b if required + if (dSize == 1 && nSize != 1) { + const RankedTensorType tiledBType = + cast(bType).clone({nSize, cSize, wSize}); + const Value multiples = getTosaConstShape(rewriter, loc, {nSize, 1, 1}); + auto tile = + tosa::TileOp::create(rewriter, loc, tiledBType, matMulB, multiples); + matMulB = tile.getOutput(); + } + + auto matmul = tosa::MatMulOp::create(rewriter, loc, op.getType(), op.getA(), + matMulB, op.getAZp(), op.getBZp()); + rewriter.replaceOp(op, matmul.getOutput()); + return success(); + } +}; + struct TosaDowngrade1p1To1p0Pass : public tosa::impl::TosaDowngrade1p1To1p0PassBase< TosaDowngrade1p1To1p0Pass> { @@ -150,8 +229,8 @@ struct TosaDowngrade1p1To1p0Pass func::FuncOp func = getOperation(); RewritePatternSet patterns(&context); - patterns.add( - &context); + patterns.add(&context); FrozenRewritePatternSet frozenPatterns(std::move(patterns)); if (failed(applyPatternsGreedily(func, frozenPatterns))) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp index 1bf92ff562a84..0b4983bea4bc6 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaProfileCompliance.cpp @@ -256,6 +256,14 @@ LogicalResult ProfileInfoDepot::populateProfileInfo(tosa::MatMulOp op) { return success(); } +template <> +LogicalResult ProfileInfoDepot::populateProfileInfo(tosa::MatMulTOp op) { + addValue(op.getA()); + addValue(op.getB()); + addValue(op.getOutput()); + return success(); +} + template <> LogicalResult ProfileInfoDepot::populateProfileInfo(tosa::VariableOp op) { addType(op.getType()); @@ -315,6 +323,7 @@ LogicalResult ProfileInfoDepot::populatationDispatch(Operation *op) { POPULATE_PROFILE_INFO_CUSTOM(Select) POPULATE_PROFILE_INFO_CUSTOM(Rescale) POPULATE_PROFILE_INFO_CUSTOM(MatMul) + POPULATE_PROFILE_INFO_CUSTOM(MatMulT) POPULATE_PROFILE_INFO_CUSTOM(Variable) POPULATE_PROFILE_INFO_CUSTOM(VariableWrite) POPULATE_PROFILE_INFO_CUSTOM(Dim) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp index 4a29e5327cc55..5d4a2affbd7cd 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaValidation.cpp @@ -107,7 +107,8 @@ static LogicalResult checkConstantOperandConvOps(Operation *op, static LogicalResult checkConstantOperandMatMul(Operation *op, const TargetEnv &env) { - if (!env.allows(Extension::dynamic) && isa(op)) { + if (!env.allows(Extension::dynamic) && + isa(op)) { // Check 'A_zp' and 'B_zp' return checkConstantOperands(op, {2, 3}); } @@ -838,6 +839,7 @@ LogicalResult TosaValidation::levelCheckRanksAndSizes(Operation *op) { CHECK_SIZES(TransposeConv2D); CHECK_SIZES(FFT2d); CHECK_SIZES(MatMul); + CHECK_SIZES(MatMulT); CHECK_SIZES(MatmulTBlockScaled); CHECK_SIZES(MaxPool2d); CHECK_SIZES(MaxPool2dAdaptive); diff --git a/mlir/test/Dialect/Tosa/dynamic_extension.mlir b/mlir/test/Dialect/Tosa/dynamic_extension.mlir index 5f5ab795459f7..bb27f4bbbd8af 100644 --- a/mlir/test/Dialect/Tosa/dynamic_extension.mlir +++ b/mlir/test/Dialect/Tosa/dynamic_extension.mlir @@ -73,6 +73,22 @@ func.func @test_matmul_non_const_zps(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1 // ----- +func.func @test_matmul_t_non_const_a_zp(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>, %a_zp: tensor<1xf32>) -> tensor<1x14x28xf32> { + %b_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> + %0 = tosa.matmul_t %arg0, %arg1, %a_zp, %b_zp : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + +func.func @test_matmul_t_non_const_b_zp(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>, %b_zp: tensor<1xf32>) -> tensor<1x14x28xf32> { + %a_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> + %0 = tosa.matmul_t %arg0, %arg1, %a_zp, %b_zp : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + func.func @test_negate_non_const_zps(%arg0: tensor<1xf32>, %input1_zp: tensor<1xf32>, %output_zp: tensor<1xf32>) -> tensor<1xf32> { %0 = tosa.negate %arg0, %input1_zp, %output_zp {} : (tensor<1xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1xf32> return %0 : tensor<1xf32> diff --git a/mlir/test/Dialect/Tosa/invalid_extension.mlir b/mlir/test/Dialect/Tosa/invalid_extension.mlir index cd82e314e3b6e..c69ea83f5ef36 100644 --- a/mlir/test/Dialect/Tosa/invalid_extension.mlir +++ b/mlir/test/Dialect/Tosa/invalid_extension.mlir @@ -518,6 +518,23 @@ func.func @test_matmul_non_const_b_zp(%arg0: tensor<1x14x19xf32>, %arg1: tensor< // ----- +func.func @test_matmul_t_non_const_a_zp(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>, %a_zp: tensor<1xf32>, %b_zp: tensor<1xf32>) -> tensor<1x14x28xf32> { + // expected-error@+1 {{'tosa.matmul_t' op expected compile time resolvable constant, but got variable value for operand #2}} + %0 = tosa.matmul_t %arg0, %arg1, %a_zp, %b_zp : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + +func.func @test_matmul_t_non_const_b_zp(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>, %b_zp: tensor<1xf32>) -> tensor<1x14x28xf32> { + %a_zp = "tosa.const"() {values = dense<0.0> : tensor<1xf32> } : () -> tensor<1xf32> + // expected-error@+1 {{'tosa.matmul_t' op expected compile time resolvable constant, but got variable value for operand #3}} + %0 = tosa.matmul_t %arg0, %arg1, %a_zp, %b_zp : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + func.func @test_mul_non_const(%arg0: tensor<13x21x3xi8>, %arg1: tensor<13x1x3xi8>, %shift: tensor<1xi8>) -> tensor<13x21x3xi32> { // expected-error@+1 {{'tosa.mul' op expected compile time resolvable constant, but got variable value for operand #2}} %0 = tosa.mul %arg0, %arg1, %shift : (tensor<13x21x3xi8>, tensor<13x1x3xi8>, tensor<1xi8>) -> tensor<13x21x3xi32> diff --git a/mlir/test/Dialect/Tosa/level_check.mlir b/mlir/test/Dialect/Tosa/level_check.mlir index 85b5d002434bc..8f91d2f40d37d 100644 --- a/mlir/test/Dialect/Tosa/level_check.mlir +++ b/mlir/test/Dialect/Tosa/level_check.mlir @@ -1278,6 +1278,15 @@ func.func @test_matmul_tensor_size_invalid(%arg0: tensor<23178x20000x19xf32>, %a // ----- +func.func @test_matmul_t_tensor_size_invalid(%arg0: tensor<23178x20000x19xf32>, %arg1: tensor<23178x28x19xf32>) -> tensor<23178x20000x28xf32> { + %zero = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> + // expected-error@+1 {{'tosa.matmul_t' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} + %0 = tosa.matmul_t %arg0, %arg1, %zero, %zero : (tensor<23178x20000x19xf32>, tensor<23178x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<23178x20000x28xf32> + return %0 : tensor<23178x20000x28xf32> +} + +// ----- + func.func @test_gather_tensor_size_invalid(%arg0: tensor<536870912x21x3xf32>, %arg1: tensor<536870912x26xi32>) -> tensor<536870912x26x3xf32> { // expected-error@+1 {{'tosa.gather' op failed level check: operand tensor size (in bytes) <= (1 << MAX_LOG2_SIZE - 1)}} %0 = tosa.gather %arg0, %arg1 : (tensor<536870912x21x3xf32>, tensor<536870912x26xi32>) -> tensor<536870912x26x3xf32> diff --git a/mlir/test/Dialect/Tosa/ops.mlir b/mlir/test/Dialect/Tosa/ops.mlir index 50f28df37a169..262b8aa548506 100644 --- a/mlir/test/Dialect/Tosa/ops.mlir +++ b/mlir/test/Dialect/Tosa/ops.mlir @@ -232,6 +232,87 @@ func.func @test_matmul(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x19x28xf32>) - return %0 : tensor<1x14x28xf32> } +// ----- +// CHECK-LABEL: test_matmul_t +func.func @test_matmul_t(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<1x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_i8 +func.func @test_matmul_t_i8(%arg0: tensor<2x14x19xi8>, %arg1: tensor<2x28x19xi8>) -> tensor<2x14x28xi32> { +%azp0 = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8> +%bzp0 = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x14x19xi8>, tensor<2x28x19xi8>, tensor<1xi8>, tensor<1xi8>) -> tensor<2x14x28xi32> + return %0 : tensor<2x14x28xi32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_quantized_i8 +func.func @test_matmul_t_quantized_i8(%arg0: tensor<2x14x19x!quant.uniform>, %arg1: tensor<2x28x19x!quant.uniform>) -> tensor<2x14x28xi32> { +%azp0 = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8> +%bzp0 = "tosa.const"() <{values = dense<0> : tensor<1xi8>}> : () -> tensor<1xi8> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x14x19x!quant.uniform>, tensor<2x28x19x!quant.uniform>, tensor<1xi8>, tensor<1xi8>) -> tensor<2x14x28xi32> + return %0 : tensor<2x14x28xi32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_f16_acc_f32 +func.func @test_matmul_t_f16_acc_f32(%arg0: tensor<2x14x19xf16>, %arg1: tensor<2x28x19xf16>) -> tensor<2x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf16>}> : () -> tensor<1xf16> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf16>}> : () -> tensor<1xf16> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x14x19xf16>, tensor<2x28x19xf16>, tensor<1xf16>, tensor<1xf16>) -> tensor<2x14x28xf32> + return %0 : tensor<2x14x28xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_bf16 +func.func @test_matmul_t_bf16(%arg0: tensor<2x14x19xbf16>, %arg1: tensor<2x28x19xbf16>) -> tensor<2x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xbf16>}> : () -> tensor<1xbf16> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xbf16>}> : () -> tensor<1xbf16> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x14x19xbf16>, tensor<2x28x19xbf16>, tensor<1xbf16>, tensor<1xbf16>) -> tensor<2x14x28xf32> + return %0 : tensor<2x14x28xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_fp8_mixed +func.func @test_matmul_t_fp8_mixed(%arg0: tensor<2x14x19xf8E4M3FN>, %arg1: tensor<2x28x19xf8E5M2>) -> tensor<2x14x28xf16> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x14x19xf8E4M3FN>, tensor<2x28x19xf8E5M2>, tensor<1xf8E4M3FN>, tensor<1xf8E5M2>) -> tensor<2x14x28xf16> + return %0 : tensor<2x14x28xf16> +} + +// ----- +// CHECK-LABEL: test_matmul_t_dynamic_dims +func.func @test_matmul_t_dynamic_dims(%arg0: tensor, %arg1: tensor) -> tensor { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor + return %0 : tensor +} + +// ----- +// CHECK-LABEL: test_matmul_t_static_broadcast +func.func @test_matmul_t_static_broadcast(%arg0: tensor<4x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<4x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<4x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> + return %0 : tensor<4x14x28xf32> +} + +// ----- +// CHECK-LABEL: test_matmul_t_unranked +func.func @test_matmul_t_unranked(%arg0: tensor<*xf32>, %arg1: tensor<*xf32>) -> tensor<*xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<*xf32>, tensor<*xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<*xf32> + return %0 : tensor<*xf32> +} + // ----- // CHECK-LABEL: max_pool2d_f32 func.func @test_max_pool2d_f32(%arg0: tensor<1x32x32x8xf32>) -> tensor<1x32x32x8xf32> { diff --git a/mlir/test/Dialect/Tosa/profile_all_unsupported.mlir b/mlir/test/Dialect/Tosa/profile_all_unsupported.mlir index f9011766c885d..caa547a0830e7 100644 --- a/mlir/test/Dialect/Tosa/profile_all_unsupported.mlir +++ b/mlir/test/Dialect/Tosa/profile_all_unsupported.mlir @@ -88,6 +88,13 @@ func.func @test_matmul(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x19x28xf32>, % return %0 : tensor<1x14x28xf32> } +// ----- +func.func @test_matmul_t(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>, %arg2: tensor<1xf32>) -> tensor<1x14x28xf32> { + // expected-error@+1 {{'tosa.matmul_t' op illegal: requires specification version compatible with 1.1.draft (got 1.0) and requires any of [pro_fp] profiles/extensions to be specified in the target environment}} + %0 = tosa.matmul_t %arg0, %arg1, %arg2, %arg2: (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + // ----- func.func @test_sigmoid(%arg0: tensor<13x21x3xf32>) -> tensor<13x21x3xf32> { // expected-error@+1 {{'tosa.sigmoid' op illegal: requires any of [pro_fp] profiles/extensions to be specified in the target environment}} diff --git a/mlir/test/Dialect/Tosa/tosa-downgrade-1-1-to-1-0.mlir b/mlir/test/Dialect/Tosa/tosa-downgrade-1-1-to-1-0.mlir index 5427d9119b7af..378e5eb7b0147 100644 --- a/mlir/test/Dialect/Tosa/tosa-downgrade-1-1-to-1-0.mlir +++ b/mlir/test/Dialect/Tosa/tosa-downgrade-1-1-to-1-0.mlir @@ -116,3 +116,37 @@ func.func @test_preserve_scatter_i8_i32(%arg0: tensor<13x52x3xi8>, %arg1: tensor %0 = tosa.scatter %arg0, %arg1, %arg2 : (tensor<13x52x3xi8>, tensor<13x26xi32>, tensor<13x26x3xi8>) -> tensor<13x52x3xi8> return %0 : tensor<13x52x3xi8> } + +// ----- + +// CHECK-LABEL: @test_matmul_t_static_batch +// CHECK: %[[TRANSPOSE:.+]] = tosa.transpose %arg1 {perms = array} : (tensor<4x28x19xf32>) -> tensor<4x19x28xf32> +// CHECK: %[[MATMUL:.+]] = tosa.matmul %arg0, %[[TRANSPOSE]], %arg2, %arg3 : (tensor<4x14x19xf32>, tensor<4x19x28xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> +// CHECK: return %[[MATMUL]] +func.func @test_matmul_t_static_batch(%arg0: tensor<4x14x19xf32>, %arg1: tensor<4x28x19xf32>, %arg2: tensor<1xf32>, %arg3: tensor<1xf32>) -> tensor<4x14x28xf32> { + %0 = tosa.matmul_t %arg0, %arg1, %arg2, %arg3 : (tensor<4x14x19xf32>, tensor<4x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> + return %0 : tensor<4x14x28xf32> +} + +// ----- + +// CHECK-LABEL: @test_matmul_t_static_broadcast +// CHECK: %[[MULTIPLES:.+]] = tosa.const_shape {values = dense<[4, 1, 1]> : tensor<3xindex>} : () -> !tosa.shape<3> +// CHECK: %[[TRANSPOSE:.+]] = tosa.transpose %arg1 {perms = array} : (tensor<1x28x19xf32>) -> tensor<1x19x28xf32> +// CHECK: %[[TILE:.+]] = tosa.tile %[[TRANSPOSE]], %[[MULTIPLES]] : (tensor<1x19x28xf32>, !tosa.shape<3>) -> tensor<4x19x28xf32> +// CHECK: %[[MATMUL:.+]] = tosa.matmul %arg0, %[[TILE]], %arg2, %arg3 : (tensor<4x14x19xf32>, tensor<4x19x28xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> +// CHECK: return %[[MATMUL]] +func.func @test_matmul_t_static_broadcast(%arg0: tensor<4x14x19xf32>, %arg1: tensor<1x28x19xf32>, %arg2: tensor<1xf32>, %arg3: tensor<1xf32>) -> tensor<4x14x28xf32> { + %0 = tosa.matmul_t %arg0, %arg1, %arg2, %arg3 : (tensor<4x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> + return %0 : tensor<4x14x28xf32> +} + +// ----- + +// CHECK-LABEL: @test_preserve_matmul_t_dynamic_broadcast +// CHECK: %[[MATMUL_T:.+]] = tosa.matmul_t %arg0, %arg1, %arg2, %arg3 : (tensor<4x14x19xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> +// CHECK: return %[[MATMUL_T]] +func.func @test_preserve_matmul_t_dynamic_broadcast(%arg0: tensor<4x14x19xf32>, %arg1: tensor, %arg2: tensor<1xf32>, %arg3: tensor<1xf32>) -> tensor<4x14x28xf32> { + %0 = tosa.matmul_t %arg0, %arg1, %arg2, %arg3 : (tensor<4x14x19xf32>, tensor, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> + return %0 : tensor<4x14x28xf32> +} diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index cfe4ea4782037..d0d1906524f83 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -365,6 +365,72 @@ func.func @test_unranked_zero_points_matmul(%arg0: tensor<1x2x3xf32>, %arg1: ten // ----- +// CHECK-LABEL: @test_static_matmul_t +func.func @test_static_matmul_t(%arg0 : tensor<2x3x4xi32>, %arg1 : tensor<2x5x4xi32>) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<2x3x4xi32>, tensor<2x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2x3x5xi32> + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<2x3x4xi32>, tensor<2x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<*xi32> + return +} + +// ----- + +// CHECK-LABEL: @test_dynamic_lhs_matmul_t +func.func @test_dynamic_lhs_matmul_t(%arg0 : tensor, %arg1 : tensor<2x5x4xi32>) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor, tensor<2x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<2x?x5xi32> + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor, tensor<2x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + return +} + +// ----- + +// CHECK-LABEL: @test_dynamic_rhs_matmul_t +func.func @test_dynamic_rhs_matmul_t(%arg0 : tensor<2x3x4xi32>, %arg1 : tensor) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<2x3x4xi32>, tensor, tensor<1xi32>, tensor<1xi32>) -> tensor<2x3x?xi32> + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<2x3x4xi32>, tensor, tensor<1xi32>, tensor<1xi32>) -> tensor + return +} + +// ----- + +// CHECK-LABEL: @test_broadcast_matmul_t +func.func @test_broadcast_matmul_t(%arg0 : tensor<4x3x4xi32>, %arg1 : tensor<1x5x4xi32>) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<4x3x4xi32>, tensor<1x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<4x3x5xi32> + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<4x3x4xi32>, tensor<1x5x4xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + return +} + +// ----- + +// CHECK-LABEL: @test_dynamic_broadcast_matmul_t +func.func @test_dynamic_broadcast_matmul_t(%arg0 : tensor, %arg1 : tensor<1x?x?xi32>) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor, tensor<1x?x?xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor, tensor<1x?x?xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor<*xi32> + return +} + +// ----- + +// CHECK-LABEL: @test_unranked_matmul_t +func.func @test_unranked_matmul_t(%arg0 : tensor<*xi32>, %arg1 : tensor<*xi32>) -> () { + // CHECK: tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<*xi32>, tensor<*xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + %0 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %1 = "tosa.const"() <{values = dense<0> : tensor<1xi32>}> : () -> tensor<1xi32> + %2 = tosa.matmul_t %arg0, %arg1, %0, %1 : (tensor<*xi32>, tensor<*xi32>, tensor<1xi32>, tensor<1xi32>) -> tensor + return +} + +// ----- + // CHECK-LABEL: @test_accepts_unranked_scalar_tensor func.func @test_accepts_unranked_scalar_tensor(%arg0: tensor<1x2x2xf32>, %arg1: tensor<1xf32>) -> tensor<*xf32> { // CHECK-DAG: %[[SHAPE:.*]] = tosa.const_shape {values = dense<[0, 0, 0, 1, 0, 1]> : tensor<6xindex>} : () -> !tosa.shape<6> diff --git a/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir b/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir index 3dc10eff15a62..cbff768ec1ecb 100644 --- a/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir +++ b/mlir/test/Dialect/Tosa/tosa-validation-version-1p0-invalid.mlir @@ -22,6 +22,16 @@ func.func @test_matmul_fp8_input_fp32_acc_type(%arg0: tensor<1x14x19xf8E4M3FN>, // ----- +func.func @test_matmul_t_fp32(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<1x14x28xf32> { + %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> + %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> + // expected-error@+1 {{'tosa.matmul_t' op illegal: requires specification version compatible with 1.1.draft (got 1.0) to be specified in the target environment}} + %0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + func.func @test_conv2d_fp8_acc32(%arg0: tensor<1x4x4x4xf8E5M2>, %arg1: tensor<8x1x1x4xf8E5M2>, %arg2: tensor<8xf32>) -> tensor<1x4x4x8xf32> { %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2> %weight_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2> diff --git a/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir index 584dc4e9aa9ed..f4b5ab5dd11fd 100644 --- a/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir +++ b/mlir/test/Dialect/Tosa/tosa-validation-version-1p1-valid.mlir @@ -22,6 +22,16 @@ func.func @test_matmul_fp8_input_fp32_acc_type(%arg0: tensor<1x14x19xf8E4M3FN>, // ----- +// CHECK-LABEL: test_matmul_t_fp8_input_fp32_acc_type +func.func @test_matmul_t_fp8_input_fp32_acc_type(%arg0: tensor<1x14x19xf8E4M3FN>, %arg1: tensor<1x28x19xf8E4M3FN>) -> tensor<1x14x28xf32> { + %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN> + %bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E4M3FN>}> : () -> tensor<1xf8E4M3FN> + %0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf8E4M3FN>, tensor<1x28x19xf8E4M3FN>, tensor<1xf8E4M3FN>, tensor<1xf8E4M3FN>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + // CHECK-LABEL: test_conv2d_fp8_acc32 func.func @test_conv2d_fp8_acc32(%arg0: tensor<1x4x4x4xf8E5M2>, %arg1: tensor<8x1x1x4xf8E5M2>, %arg2: tensor<8xf32>) -> tensor<1x4x4x8xf32> { %input_zp = "tosa.const"() <{values = dense<0.0> : tensor<1xf8E5M2>}> : () -> tensor<1xf8E5M2> diff --git a/mlir/test/Dialect/Tosa/verifier.mlir b/mlir/test/Dialect/Tosa/verifier.mlir index 9d97df2f43c6d..0eb679ca7ced3 100644 --- a/mlir/test/Dialect/Tosa/verifier.mlir +++ b/mlir/test/Dialect/Tosa/verifier.mlir @@ -1314,7 +1314,7 @@ func.func @test_matmul_output_channel_mismatch(%arg0: tensor<2x3x4xf32>, %arg1: func.func @test_matmul_output_shape_mismatch(%arg0: tensor<2x3x4xf32>, %arg1: tensor<2x4x6xf32>) -> tensor<2x5x6xf32> { %azp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> %bzp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> - // expected-error@+1 {{'tosa.matmul' op expected output shape 2, 5, 6 to be compatible with expected output shape 2, 3, 6}} + // expected-error@+1 {{'tosa.matmul' op expected output shape 2, 5, 6 to be compatible with inferred shape 2, 3, 6}} %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<2x3x4xf32>, tensor<2x4x6xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x5x6xf32> return %0 : tensor<2x5x6xf32> } @@ -1345,7 +1345,7 @@ func.func @test_matmul_dynamic_channel_mismatch(%arg0: tensor, %arg1: func.func @test_matmul_dynamic_output_shape_mismatch(%arg0: tensor, %arg1: tensor<2x4x6xf32>) -> tensor<5x3x6xf32> { %azp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> %bzp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> - // expected-error@+1 {{'tosa.matmul' op expected output shape 5, 3, 6 to be compatible with expected output shape 2, 3, 6}} + // expected-error@+1 {{'tosa.matmul' op expected output shape 5, 3, 6 to be compatible with inferred shape 2, 3, 6}} %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor, tensor<2x4x6xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<5x3x6xf32> return %0 : tensor<5x3x6xf32> } @@ -1356,7 +1356,7 @@ func.func @test_matmul_dynamic_output_shape_mismatch(%arg0: tensor, % func.func @test_matmul_unranked_b_output_shape_mismatch(%arg0: tensor<2x3x4xf32>, %arg1: tensor<*xf32>) -> tensor<2x5x?xf32> { %azp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> %bzp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> - // expected-error@+1 {{'tosa.matmul' op expected output shape 2, 5, ? to be compatible with expected output shape 2, 3, ?}} + // expected-error@+1 {{'tosa.matmul' op expected output shape 2, 5, ? to be compatible with inferred shape 2, 3, ?}} %0 = tosa.matmul %arg0, %arg1, %azp0, %bzp0 : (tensor<2x3x4xf32>, tensor<*xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<2x5x?xf32> return %0 : tensor<2x5x?xf32> } @@ -1384,6 +1384,26 @@ func.func @test_matmul_quantized_width_mismatch(%arg0: tensor<2x3x4x!quant.unifo // ----- +func.func @test_matmul_t_quantized_mixed_operands(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x6x4xf32>) -> tensor<2x3x6xi32> { + %azp0 = "tosa.const"() {values = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> + %bzp0 = "tosa.const"() {values = dense<0.0> : tensor<1xf32>} : () -> tensor<1xf32> + // expected-error@+1 {{'tosa.matmul_t' op expect operands to be both quantized or both not quantized, got '!quant.uniform' and 'f32'}} + %0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x3x4x!quant.uniform>, tensor<2x6x4xf32>, tensor<1xi8>, tensor<1xf32>) -> tensor<2x3x6xi32> + return %0 : tensor<2x3x6xi32> +} + +// ----- + +func.func @test_matmul_t_quantized_width_mismatch(%arg0: tensor<2x3x4x!quant.uniform>, %arg1: tensor<2x6x4x!quant.uniform>) -> tensor<2x3x6xi32> { + %azp0 = "tosa.const"() {values = dense<0> : tensor<1xi8>} : () -> tensor<1xi8> + %bzp0 = "tosa.const"() {values = dense<0> : tensor<1xi16>} : () -> tensor<1xi16> + // expected-error@+1 {{'tosa.matmul_t' op expect quantized operands to have same widths, got 8 and 16}} + %0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<2x3x4x!quant.uniform>, tensor<2x6x4x!quant.uniform>, tensor<1xi8>, tensor<1xi16>) -> tensor<2x3x6xi32> + return %0 : tensor<2x3x6xi32> +} + +// ----- + // CHECK-LABEL: test_matmul_a_zp_same_element_type func.func @test_matmul_a_zp_same_element_type(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x19x28xf32>) -> tensor<1x14x28xf32> { %azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf16>}> : () -> tensor<1xf16> @@ -1428,6 +1448,56 @@ func.func @test_matmul_b_zp_non_zero(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1 // ----- +func.func @test_matmul_t_channel_mismatch(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x20xf32>) -> tensor<1x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +// expected-error@+1 {{'tosa.matmul_t' op expected channels of b to match size 19, got 20}} +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf32>, tensor<1x28x20xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + +func.func @test_matmul_t_batch_mismatch(%arg0: tensor<4x14x19xf32>, %arg1: tensor<2x28x19xf32>) -> tensor<4x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +// expected-error@+1 {{'tosa.matmul_t' op expect B matrix batch size to be broadcast compatible with A, got D=2 vs N=4}} +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<4x14x19xf32>, tensor<2x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x28xf32> + return %0 : tensor<4x14x28xf32> +} + +// ----- + +func.func @test_matmul_t_output_shape_mismatch(%arg0: tensor<4x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<4x14x29xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +// expected-error@+1 {{'tosa.matmul_t' op expected output shape 4, 14, 29 to be compatible with inferred shape 4, 14, 28}} +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<4x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<4x14x29xf32> + return %0 : tensor<4x14x29xf32> +} + +// ----- + +func.func @test_matmul_t_a_zp_same_element_type(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<1x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf16>}> : () -> tensor<1xf16> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +// expected-error@+1 {{'tosa.matmul_t' op expect input a and a_zp have the same element type, got 'f32' and 'f16'}} +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf16>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + +func.func @test_matmul_t_a_zp_non_zero(%arg0: tensor<1x14x19xf32>, %arg1: tensor<1x28x19xf32>) -> tensor<1x14x28xf32> { +%azp0 = "tosa.const"() <{values = dense<1.0> : tensor<1xf32>}> : () -> tensor<1xf32> +%bzp0 = "tosa.const"() <{values = dense<0.0> : tensor<1xf32>}> : () -> tensor<1xf32> +// expected-error@+1 {{'tosa.matmul_t' op a zero point must be zero for non-int8 integer types}} +%0 = tosa.matmul_t %arg0, %arg1, %azp0, %bzp0 : (tensor<1x14x19xf32>, tensor<1x28x19xf32>, tensor<1xf32>, tensor<1xf32>) -> tensor<1x14x28xf32> + return %0 : tensor<1x14x28xf32> +} + +// ----- + func.func @test_matmul_t_block_scaled_data_mismatch(%arg0: tensor<4x8x32xf8E4M3FN>, %arg1: tensor<4x8x1xf8E8M0FNU>, %arg2: tensor<4x16x32xf8E5M2>, %arg3: tensor<4x16x1xf8E8M0FNU>) -> tensor<4x8x16xf32> { // expected-error@+1 {{'tosa.matmul_t_block_scaled' op expect A_data and B_data to have same element type, got 'f8E4M3FN' and 'f8E5M2'}} %0 = tosa.matmul_t_block_scaled %arg0, %arg1, %arg2, %arg3 {block_size = #tosa.block_size : i32} : (tensor<4x8x32xf8E4M3FN>, tensor<4x8x1xf8E8M0FNU>, tensor<4x16x32xf8E5M2>, tensor<4x16x1xf8E8M0FNU>) -> tensor<4x8x16xf32> From 126eaba93b3b7cdf77632b09f7336022d756ed8b Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Tue, 23 Jun 2026 14:16:50 -0700 Subject: [PATCH 264/511] [flang][cuda] Prefer valid intrinsics over host-only generics in device code (#205376) In CUDA device code, a host-only generic can shadow an intrinsic with the same name and cause a valid call such as maxloc(a, 1) to be rejected as non-device-callable. Retry intrinsic resolution for host-only generic function calls in device context, preserving normal diagnostics when no valid intrinsic matches. --- flang/lib/Semantics/expression.cpp | 39 ++++++++++++++++ flang/test/Semantics/cuf29.cuf | 72 ++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 flang/test/Semantics/cuf29.cuf diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index 6c0a21cc769c1..aea685e575754 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3067,6 +3067,33 @@ const Symbol *ExpressionAnalyzer::ResolveForward(const Symbol &symbol) { // Resolve a call to a generic procedure with given actual arguments. // adjustActuals is called on procedure bindings to handle pass arg. +static bool IsCUDADeviceCallable(const Symbol &symbol) { + const auto *subprogram{ + symbol.GetUltimate().detailsIf()}; + if (!subprogram) { + return false; + } + auto attrs{subprogram->cudaSubprogramAttrs()}; + return attrs && + (*attrs == common::CUDASubprogramAttrs::Device || + *attrs == common::CUDASubprogramAttrs::HostDevice); +} + +static bool IsCudaDeviceIntrinsicShadowedByHostProcedure( + const parser::CharBlock &callSite, semantics::SemanticsContext &context, + const Symbol *resolution, bool isSubroutine) { + if (isSubroutine || + !context.languageFeatures().IsEnabled(common::LanguageFeature::CUDA) || + !resolution || !IsProcedure(*resolution) || + resolution->attrs().test(semantics::Attr::INTRINSIC) || + !semantics::FindCUDADeviceContext(&context.FindScope(callSite))) { + return false; + } + // Keep use-associated names visible in device code, but do not let a + // host-only procedure hide a valid intrinsic with the same generic name. + return !IsCUDADeviceCallable(*resolution); +} + auto ExpressionAnalyzer::ResolveGeneric(const Symbol &symbol, const ActualArguments &actuals, const AdjustActuals &adjustActuals, bool isSubroutine, SymbolVector &&tried, bool mightBeStructureConstructor) @@ -3320,6 +3347,18 @@ auto ExpressionAnalyzer::GetCalleeAndArguments(const parser::Name &name, resolution = result.specific; dueToAmbiguity = result.failedDueToAmbiguity; tried = std::move(result.tried); + if (IsCudaDeviceIntrinsicShadowedByHostProcedure( + name.source, context_, resolution, isSubroutine)) { + ActualArguments localArguments{arguments}; + if (std::optional specificCall{context_.intrinsics().Probe( + CallCharacteristics{name.source.ToString(), isSubroutine}, + localArguments, GetFoldingContext())}) { + CheckBadExplicitType(*specificCall, *symbol); + return CalleeAndArguments{ + ProcedureDesignator{std::move(specificCall->specificIntrinsic)}, + std::move(specificCall->arguments)}; + } + } if (resolution) { if (context_.GetPPCBuiltinsScope() && resolution->name().ToString().rfind("__ppc_", 0) == 0) { diff --git a/flang/test/Semantics/cuf29.cuf b/flang/test/Semantics/cuf29.cuf new file mode 100644 index 0000000000000..1a89ebada6e90 --- /dev/null +++ b/flang/test/Semantics/cuf29.cuf @@ -0,0 +1,72 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 + +! Host-side generic wrappers can share names with intrinsics. In device code, +! intrinsic resolution must still win when the host wrapper is not +! device-callable. +module host_reduction_wrappers + interface sum + module procedure fake_sum + end interface + interface maxval + module procedure fake_maxval + end interface + interface minval + module procedure fake_minval + end interface + interface maxloc + module procedure fake_maxloc + end interface + interface minloc + module procedure fake_minloc + end interface + interface host_wrapper + module procedure fake_host_wrapper + end interface +contains + function fake_sum(array) result(res) + real(8) :: array(:) + real(8) :: res + end function + function fake_maxval(array) result(res) + real(8) :: array(:) + real(8) :: res + end function + function fake_minval(array) result(res) + real(8) :: array(:) + real(8) :: res + end function + function fake_maxloc(array, dim) result(res) + real(8) :: array(:) + integer :: dim + integer :: res + end function + function fake_minloc(array, dim) result(res) + real(8) :: array(:) + integer :: dim + integer :: res + end function + function fake_host_wrapper(array) result(res) + real(8) :: array(:) + real(8) :: res + end function +end module + +module test + use host_reduction_wrappers +contains + attributes(global) subroutine reduction_intrinsics(a, locs, vals) + real(8), intent(in) :: a(3) + integer, intent(out) :: locs(2) + real(8), intent(out) :: vals(3) + real(8) :: local(3) + + local = a + locs(1) = maxloc(local, 1) + locs(2) = minloc(local, 1) + vals(1) = sum(local) + vals(2) = maxval(local) + vals(3) = minval(local) + !ERROR: No specific function of generic 'host_wrapper' matches the actual arguments + vals(1) = host_wrapper(local) + end subroutine +end module From 01162b855af7a5ba8e44a5f41651c97aa540d01b Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Tue, 23 Jun 2026 14:31:49 -0700 Subject: [PATCH 265/511] Revert "[NFC][Support] Implement slash-agnostic path matching in GlobPattern" (#205409) Reverts llvm/llvm-project#202854 due to downstream breakage (see discussion in https://github.com/llvm/llvm-project/pull/202854#issuecomment-4746579478) --- llvm/include/llvm/Support/GlobPattern.h | 9 ++-- llvm/lib/Support/GlobPattern.cpp | 51 +++++------------- llvm/unittests/Support/GlobPatternTest.cpp | 62 ---------------------- 3 files changed, 17 insertions(+), 105 deletions(-) diff --git a/llvm/include/llvm/Support/GlobPattern.h b/llvm/include/llvm/Support/GlobPattern.h index 8c84c93834c6b..bb4084603f547 100644 --- a/llvm/include/llvm/Support/GlobPattern.h +++ b/llvm/include/llvm/Support/GlobPattern.h @@ -56,8 +56,7 @@ class GlobPattern { /// created from expanding braces otherwise disable /// brace expansion LLVM_ABI static Expected - create(StringRef Pat, std::optional MaxSubPatterns = {}, - bool SlashAgnostic = false); + create(StringRef Pat, std::optional MaxSubPatterns = {}); /// \returns \p true if \p S matches this glob pattern LLVM_ABI bool match(StringRef S) const; @@ -88,14 +87,12 @@ class GlobPattern { StringRef Pattern; size_t PrefixSize = 0; size_t SuffixSize = 0; - bool SlashAgnostic = false; struct SubGlobPattern { /// \param Pat the pattern to match against - LLVM_ABI static Expected create(StringRef Pat, - bool SlashAgnostic); + LLVM_ABI static Expected create(StringRef Pat); /// \returns \p true if \p S matches this glob pattern - LLVM_ABI bool match(StringRef S, bool SlashAgnostic) const; + LLVM_ABI bool match(StringRef S) const; StringRef getPat() const { return StringRef(Pat.data(), Pat.size()); } // Brackets with their end position and matched bytes. diff --git a/llvm/lib/Support/GlobPattern.cpp b/llvm/lib/Support/GlobPattern.cpp index ebaa08cba96aa..1aaddbb8408a3 100644 --- a/llvm/lib/Support/GlobPattern.cpp +++ b/llvm/lib/Support/GlobPattern.cpp @@ -18,8 +18,6 @@ using namespace llvm; static constexpr char PrefixMetacharacters[] = "?*[{\\"; static constexpr char SuffixMetacharacters[] = "?*[]{}\\"; -static constexpr char PrefixMetacharactersWithSlash[] = "?*[{\\/"; -static constexpr char SuffixMetacharactersWithSlash[] = "?*[]{}\\/"; // Expands character ranges and returns a bitmap. // For example, "a-cf-hz" is expanded to "abcfghz". @@ -137,12 +135,10 @@ parseBraceExpansions(StringRef S, std::optional MaxSubPatterns) { return std::move(SubPatterns); } -static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) { - const char *Metas = - SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters; +static StringRef maxPlainSubstring(StringRef S) { StringRef Best; while (!S.empty()) { - size_t PrefixSize = S.find_first_of(Metas); + size_t PrefixSize = S.find_first_of(PrefixMetacharacters); if (PrefixSize == std::string::npos) PrefixSize = S.size(); @@ -182,20 +178,13 @@ static StringRef maxPlainSubstring(StringRef S, bool SlashAgnostic) { return Best; } -Expected GlobPattern::create(StringRef S, - std::optional MaxSubPatterns, - bool SlashAgnostic) { +Expected +GlobPattern::create(StringRef S, std::optional MaxSubPatterns) { GlobPattern Pat; - Pat.SlashAgnostic = SlashAgnostic; Pat.Pattern = S; - const char *PrefixMetas = - SlashAgnostic ? PrefixMetacharactersWithSlash : PrefixMetacharacters; - const char *SuffixMetas = - SlashAgnostic ? SuffixMetacharactersWithSlash : SuffixMetacharacters; - // Store the prefix that does not contain any metacharacter. - Pat.PrefixSize = S.find_first_of(PrefixMetas); + Pat.PrefixSize = S.find_first_of(PrefixMetacharacters); if (Pat.PrefixSize == std::string::npos) { Pat.PrefixSize = S.size(); return Pat; @@ -203,7 +192,7 @@ Expected GlobPattern::create(StringRef S, S = S.substr(Pat.PrefixSize); // Just in case we stop on unmatched opening brackets. - size_t SuffixStart = S.find_last_of(SuffixMetas); + size_t SuffixStart = S.find_last_of(SuffixMetacharacters); assert(SuffixStart != std::string::npos); if (S[SuffixStart] == '\\') ++SuffixStart; @@ -216,7 +205,7 @@ Expected GlobPattern::create(StringRef S, if (auto Err = parseBraceExpansions(S, MaxSubPatterns).moveInto(SubPats)) return std::move(Err); for (StringRef SubPat : SubPats) { - auto SubGlobOrErr = SubGlobPattern::create(SubPat, SlashAgnostic); + auto SubGlobOrErr = SubGlobPattern::create(SubPat); if (!SubGlobOrErr) return SubGlobOrErr.takeError(); Pat.SubGlobs.push_back(*SubGlobOrErr); @@ -226,7 +215,7 @@ Expected GlobPattern::create(StringRef S, } Expected -GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { +GlobPattern::SubGlobPattern::create(StringRef S) { SubGlobPattern Pat; // Parse brackets. @@ -248,10 +237,6 @@ GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { if (!BVOrErr) return BVOrErr.takeError(); BitVector &BV = *BVOrErr; - if (SlashAgnostic && (BV['\\'] || BV['/'])) { - BV.set('\\'); - BV.set('/'); - } if (Invert) BV.flip(); Pat.Brackets.push_back(Bracket{J + 1, std::move(BV)}); @@ -266,8 +251,8 @@ GlobPattern::SubGlobPattern::create(StringRef S, bool SlashAgnostic) { } StringRef GlobPattern::longest_substr() const { - return maxPlainSubstring(Pattern.drop_front(PrefixSize).drop_back(SuffixSize), - SlashAgnostic); + return maxPlainSubstring( + Pattern.drop_front(PrefixSize).drop_back(SuffixSize)); } bool GlobPattern::match(StringRef S) const { @@ -278,23 +263,15 @@ bool GlobPattern::match(StringRef S) const { if (SubGlobs.empty() && S.empty()) return true; for (auto &Glob : SubGlobs) - if (Glob.match(S, SlashAgnostic)) + if (Glob.match(S)) return true; return false; } -static bool matchChar(char PatC, char QueryC, bool SlashAgnostic) { - if (PatC == QueryC) - return true; - return SlashAgnostic && (PatC == '\\' || PatC == '/') && - (QueryC == '\\' || QueryC == '/'); -} - // Factor the pattern into segments split by '*'. The segment is matched // sequentianlly by finding the first occurrence past the end of the previous // match. -bool GlobPattern::SubGlobPattern::match(StringRef Str, - bool SlashAgnostic) const { +bool GlobPattern::SubGlobPattern::match(StringRef Str) const { const char *P = Pat.data(), *SegmentBegin = nullptr, *S = Str.data(), *SavedS = S; const char *const PEnd = P + Pat.size(), *const End = S + Str.size(); @@ -316,12 +293,12 @@ bool GlobPattern::SubGlobPattern::match(StringRef Str, continue; } } else if (*P == '\\') { - if (matchChar(*++P, *S, SlashAgnostic)) { + if (*++P == *S) { ++P; ++S; continue; } - } else if (matchChar(*P, *S, SlashAgnostic) || *P == '?') { + } else if (*P == *S || *P == '?') { ++P; ++S; continue; diff --git a/llvm/unittests/Support/GlobPatternTest.cpp b/llvm/unittests/Support/GlobPatternTest.cpp index 35423e37a3ae0..872a21e948d7a 100644 --- a/llvm/unittests/Support/GlobPatternTest.cpp +++ b/llvm/unittests/Support/GlobPatternTest.cpp @@ -327,30 +327,6 @@ TEST_F(GlobPatternTest, PrefixSuffix) { ASSERT_TRUE((bool)Pat); EXPECT_EQ("", Pat->prefix()); EXPECT_EQ("cd", Pat->suffix()); - - Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("ab", Pat->prefix()); - EXPECT_EQ("cd", Pat->suffix()); - - Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("ab", Pat->prefix()); - EXPECT_EQ("d", Pat->suffix()); - - Pat = GlobPattern::create("ab/cd", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/false); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("ab/cd", Pat->prefix()); - EXPECT_EQ("", Pat->suffix()); - - Pat = GlobPattern::create("ab\\cd", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/false); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("ab", Pat->prefix()); - EXPECT_EQ("d", Pat->suffix()); } TEST_F(GlobPatternTest, Substr) { @@ -417,26 +393,6 @@ TEST_F(GlobPatternTest, Substr) { Pat = GlobPattern::create("a*bcdef{g}*h"); ASSERT_TRUE((bool)Pat); EXPECT_EQ("bcdef", Pat->longest_substr()); - - Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("bc", Pat->longest_substr()); - - Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("bc", Pat->longest_substr()); - - Pat = GlobPattern::create("a*bc/de*f", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/false); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("bc/de", Pat->longest_substr()); - - Pat = GlobPattern::create("a*bc\\de*f", /*MaxSubPatterns=*/{}, - /*SlashAgnostic=*/false); - ASSERT_TRUE((bool)Pat); - EXPECT_EQ("bc", Pat->longest_substr()); } TEST_F(GlobPatternTest, Pathological) { @@ -453,22 +409,4 @@ TEST_F(GlobPatternTest, Pathological) { EXPECT_FALSE(Pat->match(S)); EXPECT_TRUE(Pat->match(S + 'b')); } - -TEST_F(GlobPatternTest, SlashAgnosticMatch) { - auto Pat1 = GlobPattern::create("foo\\\\bar[a\\\\-z]", 1024, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat1); - EXPECT_TRUE(Pat1->match("foo/bar\\")); - EXPECT_TRUE(Pat1->match("foo/barb")); - EXPECT_TRUE(Pat1->match("foo/bar/")); -} - -TEST_F(GlobPatternTest, SlashAgnosticMatchInverted) { - auto Pat = GlobPattern::create("foo\\\\bar[^a\\\\-z]", 1024, - /*SlashAgnostic=*/true); - ASSERT_TRUE((bool)Pat); - EXPECT_FALSE(Pat->match("foo/bar/")); - EXPECT_FALSE(Pat->match("foo/barb")); - EXPECT_TRUE(Pat->match("foo/bar1")); -} } From b3ed7343c7469f6e495f2ece5f6fef2bac60b814 Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Wed, 24 Jun 2026 05:40:19 +0800 Subject: [PATCH 266/511] [NSan] Fix null-pointer crash on unsupported vector element sizes (#202270) In propagateNonFTStore, BitcastTy is left null when a ConstantDataVector has an element width other than 32/64/80 bits (e.g., f16, bf16, fp128). Passing a null element type into VectorType::get crashes. Guard the VectorType::get call so the unsupported case falls through to the "reset shadow to unknown" path. --------- Co-authored-by: Alexander Shaposhnikov --- .../NumericalStabilitySanitizer.cpp | 5 ++-- .../non_float_store.ll | 28 +++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp index f40816d07d566..de2854d79277e 100644 --- a/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/NumericalStabilitySanitizer.cpp @@ -1897,8 +1897,9 @@ void NumericalStabilitySanitizer::propagateNonFTStore( break; } - if (auto *VectorTy = dyn_cast(C->getType())) - BitcastTy = VectorType::get(BitcastTy, VectorTy->getElementCount()); + if (BitcastTy) + if (auto *VectorTy = dyn_cast(C->getType())) + BitcastTy = VectorType::get(BitcastTy, VectorTy->getElementCount()); } if (BitcastTy) { const MemoryExtents Extents = getMemoryExtentsOrDie(BitcastTy); diff --git a/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll b/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll index 432c811c11524..2e6d554ee13e5 100644 --- a/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll +++ b/llvm/test/Instrumentation/NumericalStabilitySanitizer/non_float_store.ll @@ -30,4 +30,32 @@ entry: ret void } +; An element width other than 32/64/80 bits (here i16) has no FT type to +; bitcast to, so the shadow is reset to unknown instead of crashing. +define void @store_non_float_unsupported_size(ptr %dst) sanitize_numerical_stability { +; CHECK-LABEL: @store_non_float_unsupported_size( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i16 42, ptr [[DST:%.*]], align 1 +; CHECK-NEXT: call void @__nsan_set_value_unknown(ptr [[DST]], i64 2) +; CHECK-NEXT: ret void +; +entry: + store i16 42, ptr %dst, align 1 + ret void +} + +; Same for a vector with an unsupported element width (i16): no crash, +; the shadow is reset to unknown. +define void @store_non_float_vector_unsupported_size(ptr %dst) sanitize_numerical_stability { +; CHECK-LABEL: @store_non_float_vector_unsupported_size( +; CHECK-NEXT: entry: +; CHECK-NEXT: store <4 x i16> splat (i16 42), ptr [[DST:%.*]], align 1 +; CHECK-NEXT: call void @__nsan_set_value_unknown(ptr [[DST]], i64 8) +; CHECK-NEXT: ret void +; +entry: + store <4 x i16> splat (i16 42), ptr %dst, align 1 + ret void +} + attributes #0 = { nounwind readonly uwtable sanitize_numerical_stability "correctly-rounded-divide-sqrt-fp-math"="false" denormal_fpenv(preservesign, float: ieee) "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="true" "use-soft-float"="false" } From b786c41e66ce248030a370ebbe99d3dd2c61d1db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Tue, 23 Jun 2026 15:38:17 -0700 Subject: [PATCH 267/511] [flang][cuda] Materialize trivial computation to avoid data transfer error (#205422) Avoid this error: `error: 'cuf.data_transfer' op expect src and dst to be references or descriptors or src to be a constant: 'f32' - '!fir.ref'` --- flang/lib/Lower/Bridge.cpp | 9 +++-- flang/test/Lower/CUDA/cuda-data-transfer.cuf | 37 +++++++++++++++++++- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index b19890a5367b7..ab7c141f11905 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -5443,8 +5443,13 @@ class FirConverter : public Fortran::lower::AbstractConverter { } auto transferKindAttr = cuf::DataTransferKindAttr::get( builder.getContext(), cuf::DataTransferKind::DeviceHost); - cuf::DataTransferOp::create(builder, loc, rhsVal, lhsVal, shape, - transferKindAttr, hasManagedOrUnifedSymbols); + if (fir::isa_trivial(rhsVal.getType())) { + fir::StoreOp::create(builder, loc, rhsVal, lhsVal); + } else { + cuf::DataTransferOp::create(builder, loc, rhsVal, lhsVal, shape, + transferKindAttr, + hasManagedOrUnifedSymbols); + } return; } diff --git a/flang/test/Lower/CUDA/cuda-data-transfer.cuf b/flang/test/Lower/CUDA/cuda-data-transfer.cuf index a1006437485ca..8d75696f98d3c 100644 --- a/flang/test/Lower/CUDA/cuda-data-transfer.cuf +++ b/flang/test/Lower/CUDA/cuda-data-transfer.cuf @@ -23,6 +23,9 @@ module mod1 real(kind=8), device, allocatable, dimension(:) :: p + real, constant :: c1 = 1.0 + real, device :: d1 = 1.0 + interface function __sum(a_d) result(res_h) integer(4), managed, intent(in) :: a_d(:,:,:,:) @@ -495,7 +498,7 @@ subroutine sub25() end ! CHECK-LABEL: func.func @_QPsub25() -! CHECK: fir.allocmem !fir.array, %15#1 {bindc_name = ".tmp", uniq_name = ""} +! CHECK: fir.allocmem !fir.array, %{{.*}} {bindc_name = ".tmp", uniq_name = ""} ! CHECK: cuf.data_transfer %{{.*}} to %{{.*}} {transfer_kind = #cuf.cuda_transfer} : !fir.ref>>>, !fir.box> ! CHECK: hlfir.assign %{{.*}} to %{{.*}} : f64, !fir.ref ! CHECK: fir.freemem %{{.*}} : !fir.heap> @@ -724,3 +727,35 @@ subroutine sub41() lm(1:5) = a%m(1:5) end subroutine + +subroutine sub42() + use mod1 + real :: a + a = c1 * c1 +end subroutine + +! CHECK-LABEL: func.func @_QPsub42() +! CHECK: %[[RES:.*]] = arith.mulf +! CHECK: fir.store %[[RES]] to %{{.*}} : !fir.ref +! CHECK-NOT: cuf.data_transfer + +subroutine sub43() + use mod1 + real :: a + a = d1 * d1 +end subroutine + +! CHECK-LABEL: func.func @_QPsub43() +! CHECK: %[[RES:.*]] = arith.mulf +! CHECK: fir.store %[[RES]] to %{{.*}} : !fir.ref +! CHECK-NOT: cuf.data_transfer + +subroutine sub44() + use mod1 + real :: a + a = -d1 +end subroutine + +! CHECK-LABEL: func.func @_QPsub44() +! CHECK: arith.negf +! CHECK-NOT: cuf.data_transfer From 1c21a2d8703f7c837c6a2efd32653ba3604de465 Mon Sep 17 00:00:00 2001 From: vporpo Date: Tue, 23 Jun 2026 15:53:52 -0700 Subject: [PATCH 268/511] [SandboxVec][Scheduler] Implement direction (#205193) DGNode::UnscheduledPreds was added in a previous patch, so this patch makes use of it in the scheduler. Depending on Dir we can now schedule BottomUp or TopDown. --- .../Vectorize/SandboxVectorizer/Scheduler.h | 24 +++- .../Vectorize/SandboxVectorizer/VecUtils.h | 10 ++ .../SandboxVectorizer/DependencyGraph.cpp | 5 +- .../Vectorize/SandboxVectorizer/Scheduler.cpp | 117 ++++++++++++++---- .../SandboxVectorizer/SchedulerTest.cpp | 93 ++++++++++++++ .../SandboxVectorizer/VecUtilsTest.cpp | 14 +++ 6 files changed, 234 insertions(+), 29 deletions(-) diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h index f80260c18ef33..86ab5fb8b464b 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/Scheduler.h @@ -103,6 +103,14 @@ class ReadyListContainer { #endif // NDEBUG }; +enum class SchedDirection { + BottomUp, + TopDown, +}; +#ifndef NDEBUG +StringLiteral schedDirectionToStr(SchedDirection Dir); +#endif + /// The nodes that need to be scheduled back-to-back in a single scheduling /// cycle form a SchedBundle. class SchedBundle { @@ -150,8 +158,11 @@ class SchedBundle { /// Move all bundle instructions to \p Where back-to-back. LLVM_ABI void cluster(BasicBlock::iterator Where); /// \Returns true if all nodes in the bundle are ready. - bool ready() const { - return all_of(Nodes, [](const auto *N) { return N->readyBottomUp(); }); + bool ready(SchedDirection Dir) const { + return all_of(Nodes, [Dir](const auto *N) { + return Dir == SchedDirection::BottomUp ? N->readyBottomUp() + : N->readyTopDown(); + }); } #ifndef NDEBUG void dump(raw_ostream &OS) const; @@ -215,6 +226,9 @@ class Scheduler { Scheduler(const Scheduler &) = delete; Scheduler &operator=(const Scheduler &) = delete; +private: + SchedDirection Dir = SchedDirection::BottomUp; + public: Scheduler(AAResults &AA, Context &Ctx) : DAG(AA, Ctx), Ctx(Ctx) { // NOTE: The scheduler's callback depends on the DAG's callback running @@ -226,6 +240,12 @@ class Scheduler { if (CreateInstrCB) Ctx.unregisterCreateInstrCallback(*CreateInstrCB); } + void setDirection(SchedDirection NewDir) { + assert(Bndls.empty() && DAG.empty() && ReadyList.empty() && + !ScheduleTopItOpt && ScheduledBB == nullptr && + "We can't change the direction during scheduling!"); + Dir = NewDir; + } /// Tries to build a schedule that includes all of \p Instrs scheduled at the /// same scheduling cycle. This essentially checks that there are no /// dependencies among \p Instrs. This function may involve scheduling diff --git a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h index 99e411a1723b0..803718fc33792 100644 --- a/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h +++ b/llvm/include/llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h @@ -146,6 +146,16 @@ class VecUtils { } return LowestI; } + /// \Returns the instruction in \p Instrs that is highest in the BB. Expects + /// that all instructions are in the same BB. + static Instruction *getHighest(ArrayRef Instrs) { + Instruction *HighestI = Instrs.front(); + for (auto *I : drop_begin(Instrs)) { + if (I->comesBefore(HighestI)) + HighestI = I; + } + return HighestI; + } /// \Returns the lowest instruction in \p Vals, or nullptr if no instructions /// are found. Skips instructions not in \p BB. static Instruction *getLowest(ArrayRef Vals, BasicBlock *BB) { diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp index 5927d374883c0..11149a16b044f 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/DependencyGraph.cpp @@ -355,9 +355,10 @@ void DependencyGraph::setDefUseUnscheduledSuccs( continue; if (!TopInterval.contains(OpI)) continue; - OpN->incrUnscheduledSuccs(); - if (!OpN->scheduled()) + if (!OpN->scheduled()) { + OpN->incrUnscheduledSuccs(); ++CntUnscheduledPreds; + } } *BotN->UnscheduledPreds += CntUnscheduledPreds; } diff --git a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp index 4b36d61cf0dcd..82a327719f29f 100644 --- a/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp +++ b/llvm/lib/Transforms/Vectorize/SandboxVectorizer/Scheduler.cpp @@ -11,6 +11,18 @@ namespace llvm::sandboxir { +#ifndef NDEBUG +StringLiteral schedDirectionToStr(SchedDirection Dir) { + switch (Dir) { + case SchedDirection::BottomUp: + return "BottomUp"; + case SchedDirection::TopDown: + return "TopDown"; + } + llvm_unreachable("Unhandled Dir!"); +} +#endif // NDEBUG + // TODO: Check if we can cache top/bottom to reduce compile-time. DGNode *SchedBundle::getTop() const { DGNode *TopN = Nodes.front(); @@ -69,18 +81,34 @@ void ReadyListContainer::dump() const { void Scheduler::scheduleAndUpdateReadyList(SchedBundle &Bndl) { // Find where we should schedule the instructions. assert(ScheduleTopItOpt && "Should have been set by now!"); - auto Where = *ScheduleTopItOpt; + auto Where = Dir == SchedDirection::BottomUp ? *ScheduleTopItOpt + : std::next(*ScheduleTopItOpt); // Move all instructions in `Bndl` to `Where`. Bndl.cluster(Where); // Update the last scheduled bundle. - ScheduleTopItOpt = Bndl.getTop()->getInstruction()->getIterator(); - // Set nodes as "scheduled" and decrement the UnsceduledSuccs counter of all - // dependency predecessors. + ScheduleTopItOpt = Dir == SchedDirection::BottomUp + ? Bndl.getTop()->getInstruction()->getIterator() + : Bndl.getBot()->getInstruction()->getIterator(); + // Set nodes as "scheduled" and decrement the UnscheduledSuccs/Preds counter + // of all dependency predecessors/successors. for (DGNode *N : Bndl) { - for (auto *DepN : N->preds(DAG)) { - DepN->decrUnscheduledSuccs(); - if (DepN->readyBottomUp() && !DepN->scheduled()) - ReadyList.insert(DepN); + switch (Dir) { + case SchedDirection::BottomUp: { + for (auto *DepN : N->preds(DAG)) { + DepN->decrUnscheduledSuccs(); + if (DepN->readyBottomUp() && !DepN->scheduled()) + ReadyList.insert(DepN); + } + break; + } + case SchedDirection::TopDown: { + for (auto *DepN : N->succs(DAG)) { + DepN->decrUnscheduledPreds(); + if (DepN->readyTopDown() && !DepN->scheduled()) + ReadyList.insert(DepN); + } + break; + } } N->setScheduled(); } @@ -97,16 +125,26 @@ void Scheduler::notifyCreateInstr(Instruction *I) { // "scheduled". bool IsScheduled = ScheduleTopItOpt && *ScheduleTopItOpt != I->getParent()->end() && - (*ScheduleTopItOpt.value()).comesBefore(I); + ((Dir == SchedDirection::BottomUp && + (*ScheduleTopItOpt.value()).comesBefore(I)) || + (Dir == SchedDirection::TopDown && + I->comesBefore(&*ScheduleTopItOpt.value()))); if (IsScheduled) N->setScheduled(); // If the new instruction is above the top of schedule we need to remove its // dependency predecessors from the ready list and increment their // `UnscheduledSuccs` counters. if (!IsScheduled) { - for (auto *PredN : N->preds(DAG)) { - ReadyList.remove(PredN); - PredN->incrUnscheduledSuccs(); + if (Dir == SchedDirection::BottomUp) { + for (auto *PredN : N->preds(DAG)) { + ReadyList.remove(PredN); + PredN->incrUnscheduledSuccs(); + } + } else { + for (auto *SuccN : N->succs(DAG)) { + ReadyList.remove(SuccN); + SuccN->incrUnscheduledPreds(); + } } } } @@ -152,7 +190,7 @@ bool Scheduler::tryScheduleUntil(ArrayRef Instrs) { scheduleAndUpdateReadyList(*SingletonSB); return TryScheduleRes::Success; } - if (SB->ready()) { + if (SB->ready(Dir)) { // Remove the rest of the bundle from the ready list. // TODO: Perhaps change the Scheduler + ReadyList to operate on // SchedBundles instead of DGNodes. @@ -251,8 +289,15 @@ void Scheduler::trimSchedule(ArrayRef Instrs) { // N // N <- DAGInterval.bottom() // - Instruction *TopI = &*ScheduleTopItOpt.value(); - Instruction *LowestI = VecUtils::getLowest(Instrs); + // Note: this figure assumes bottom-up scheduling. In top-down we have the + // top-down mirror image. + Instruction *TopI = Dir == SchedDirection::BottomUp + ? &*ScheduleTopItOpt.value() + : VecUtils::getHighest(Instrs); + Instruction *LowestI = Dir == SchedDirection::BottomUp + ? VecUtils::getLowest(Instrs) + : &*ScheduleTopItOpt.value(); + // Destroy the singleton schedule bundles from LowestI all the way to the top. for (auto *I = LowestI, *E = TopI->getPrevNode(); I != E; I = I->getPrevNode()) { @@ -272,11 +317,20 @@ void Scheduler::trimSchedule(ArrayRef Instrs) { for (Instruction &I : ResetIntvl) { auto *N = DAG.getNode(&I); N->resetScheduleState(); - // Recompute UnscheduledSuccs for nodes not only in ResetIntvl but even for - // nodes above the top of schedule. - for (auto *PredN : N->preds(DAG)) - PredN->incrUnscheduledSuccs(); + if (Dir == SchedDirection::BottomUp) { + // Recompute UnscheduledSuccs for nodes not only in ResetIntvl but even + // for nodes above the top of schedule. + for (auto *PredN : N->preds(DAG)) + PredN->incrUnscheduledSuccs(); + } else { + assert(Dir == SchedDirection::TopDown); + // Recompute UnscheduledPreds for nodes not only in ResetIntvl but even + // for nodes below the bottom of schedule. + for (auto *SuccN : N->succs(DAG)) + SuccN->incrUnscheduledPreds(); + } } + // Refill the ready list by visiting all nodes from the top of DAG to LowestI. ReadyList.clear(); Interval RefillIntvl(DAG.getInterval().top(), LowestI); @@ -305,6 +359,16 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { if (any_of(Instrs, [this](Instruction *I) { return I->getParent() != ScheduledBB; })) return false; + + auto GetSchedPoint = [](SchedDirection Dir, const auto &Instrs) { + switch (Dir) { + case SchedDirection::BottomUp: + return std::next(VecUtils::getLowest(Instrs)->getIterator()); + case SchedDirection::TopDown: + return VecUtils::getHighest(Instrs)->getIterator(); + } + llvm_unreachable("Unhandled Dir!"); + }; auto SchedState = getBndlSchedState(Instrs); switch (SchedState) { case BndlSchedState::FullyScheduled: @@ -321,19 +385,20 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { // re-schedule. DAG.extend(Instrs); trimSchedule(Instrs); - ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); + ScheduleTopItOpt = GetSchedPoint(Dir, Instrs); return tryScheduleUntil(Instrs); case BndlSchedState::NoneScheduled: { // TODO: Set the window of the DAG that we are interested in. if (!ScheduleTopItOpt) - // We start scheduling at the bottom instr of Instrs. - ScheduleTopItOpt = std::next(VecUtils::getLowest(Instrs)->getIterator()); + // We start scheduling at the bottom instr of Instrs (top in TopDown). + ScheduleTopItOpt = GetSchedPoint(Dir, Instrs); // Extend the DAG to include Instrs. Interval Extension = DAG.extend(Instrs); - // Add nodes to ready list. + // Add nodes from the new interval to ready list if they are ready. for (auto &I : Extension) { auto *N = DAG.getNode(&I); - if (N->readyBottomUp()) + if (Dir == SchedDirection::BottomUp ? N->readyBottomUp() + : N->readyTopDown()) ReadyList.insert(N); } // Try schedule all nodes until we can schedule Instrs back-to-back. @@ -347,7 +412,9 @@ bool Scheduler::trySchedule(ArrayRef Instrs) { void Scheduler::dump(raw_ostream &OS) const { OS << "ReadyList:\n"; ReadyList.dump(OS); - OS << "Top of schedule: "; + OS << "Dir=" << schedDirectionToStr(Dir) << " " + << (Dir == SchedDirection::BottomUp ? "Top" : "Bottom") + << " of schedule: "; if (ScheduleTopItOpt) OS << **ScheduleTopItOpt; else diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp index fe2dbf6d1e790..0c756e6b705a0 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/SchedulerTest.cpp @@ -260,6 +260,99 @@ define void @foo(ptr %ptr, i8 %v0, i8 %v1) { } } +TEST_F(SchedulerTest, Basic_TopDown) { + parseIR(C, R"IR( +define void @foo(ptr noalias %ptr0, ptr noalias %ptr1) { + %ld0 = load i8, ptr %ptr0 + %ld1 = load i8, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + store i8 %ld1, ptr %ptr1 + store i8 %ld0, ptr %ptr0 + ret void +} +)IR"); + llvm::Function *LLVMF = &*M->getFunction("foo"); + sandboxir::Context Ctx(C); + auto *F = Ctx.createFunction(LLVMF); + auto *BB = &*F->begin(); + auto It = BB->begin(); + auto *L0 = cast(&*It++); + auto *L1 = cast(&*It++); + auto *S0 = cast(&*It++); + auto *S1 = cast(&*It++); + auto *S2 = cast(&*It++); + auto *Ret = cast(&*It++); + + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + EXPECT_TRUE(Sched.trySchedule({L0, L1})); + EXPECT_TRUE(Sched.trySchedule({S0, S1})); + EXPECT_TRUE(Sched.trySchedule({Ret})); + Ctx.revert(); + } + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + EXPECT_TRUE(Sched.trySchedule({L0, L1})); + EXPECT_TRUE(Sched.trySchedule({S1})); + EXPECT_TRUE(Sched.trySchedule({S0})); + EXPECT_TRUE(Sched.trySchedule({Ret})); + Ctx.revert(); + } + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + EXPECT_TRUE(Sched.trySchedule({L0, L1})); + EXPECT_TRUE(Sched.trySchedule({S0})); + EXPECT_TRUE(Sched.trySchedule({S1})); + EXPECT_TRUE(Sched.trySchedule({S2})); + EXPECT_TRUE(Sched.trySchedule({Ret})); + Ctx.revert(); + } + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + EXPECT_TRUE(Sched.trySchedule({L0})); + EXPECT_TRUE(Sched.trySchedule({L1})); + EXPECT_FALSE(Sched.trySchedule({S0, S2})); + EXPECT_TRUE(Sched.trySchedule({S0, S1})); + EXPECT_TRUE(Sched.trySchedule({Ret})); + Ctx.revert(); + } + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + EXPECT_TRUE(Sched.trySchedule({L1})); + EXPECT_TRUE(Sched.trySchedule({L0})); + EXPECT_TRUE(Sched.trySchedule({S0, S1})); + EXPECT_TRUE(Sched.trySchedule({Ret})); + Ctx.revert(); + } + { + Ctx.save(); + sandboxir::Scheduler Sched(getAA(*LLVMF), Ctx); + Sched.setDirection(sandboxir::SchedDirection::TopDown); + // Dependent instrs. + EXPECT_FALSE(Sched.trySchedule({L0, L1, S0, S1})); + EXPECT_FALSE(Sched.trySchedule({L0, L1, S0})); + EXPECT_FALSE(Sched.trySchedule({L0, S0})); + EXPECT_FALSE(Sched.trySchedule({L1, S1})); + EXPECT_FALSE(Sched.trySchedule({L1, S1, Ret})); + Sched.clear(); // TODO: Remove + // This should succeed. + EXPECT_TRUE(Sched.trySchedule({L0, L1})); + // Dependent instrs. + EXPECT_FALSE(Sched.trySchedule({S0, S2})); + Ctx.revert(); + } +} + TEST_F(SchedulerTest, Bundles) { parseIR(C, R"IR( define void @foo(ptr noalias %ptr0, ptr noalias %ptr1) { diff --git a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp index 9ade4b0638846..ea6cf7d45f525 100644 --- a/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/SandboxVectorizer/VecUtilsTest.cpp @@ -545,6 +545,20 @@ define void @foo(i8 %v) { SmallVector CBA({IC, IB, IA}); EXPECT_EQ(sandboxir::VecUtils::getLowest(CBA), IC); + { + // Check getHighest(ArrayRef) + SmallVector A({IA}); + EXPECT_EQ(sandboxir::VecUtils::getHighest(A), IA); + SmallVector ABC({IA, IB, IC}); + EXPECT_EQ(sandboxir::VecUtils::getHighest(ABC), IA); + SmallVector ACB({IA, IC, IB}); + EXPECT_EQ(sandboxir::VecUtils::getHighest(ACB), IA); + SmallVector CAB({IC, IA, IB}); + EXPECT_EQ(sandboxir::VecUtils::getHighest(CAB), IA); + SmallVector CBA({IC, IB, IA}); + EXPECT_EQ(sandboxir::VecUtils::getHighest(CBA), IA); + } + // Check getLowest(ArrayRef) SmallVector C1Only({C1}); EXPECT_EQ(sandboxir::VecUtils::getLowest(C1Only, &BB), nullptr); From 88337d262f27eb49b6aae4a7e582e9a4160103c6 Mon Sep 17 00:00:00 2001 From: Steven Wu Date: Tue, 23 Jun 2026 16:03:45 -0700 Subject: [PATCH 269/511] [dsymutil] Fix help message after #200971 (#203337) The default DWARF linker is parallel after #200971. Fix help message which still suggests classic DWARF linker. --- llvm/tools/dsymutil/Options.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/dsymutil/Options.td b/llvm/tools/dsymutil/Options.td index 74c8d3834ae71..5a1f878196be3 100644 --- a/llvm/tools/dsymutil/Options.td +++ b/llvm/tools/dsymutil/Options.td @@ -219,7 +219,7 @@ def include_swiftmodules_from_interface: Flag<["--", "-"], "include-swiftmodules def linker: Separate<["--", "-"], "linker">, MetaVarName<"">, - HelpText<"Specify the desired type of DWARF linker. Defaults to 'classic'">, + HelpText<"Specify the desired type of DWARF linker. Defaults to 'parallel'">, Group; def: Joined<["--", "-"], "linker=">, Alias; From e83d5fc7828f8f73eee6cd4ef764dd831adc973c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 23 Jun 2026 16:16:26 -0700 Subject: [PATCH 270/511] [lldb] Resolve dyld introspection SPIs with dlsym (NFC) (#205434) HostInfoMacOSX's SharedCacheInfo used the dyld process-snapshot introspection SPIs only when was present, gating the calling code behind a compile-time macro. To avoid bifurcating the behavior based on the SDK, rather than the presence of the symbols, use dlsym to resolve them at runtime. While here, fold the duplicate dlsym of dyld_image_segment_data_ into the new, once-initialized, shared table. Assisted-by: Claude --- .../Host/macosx/objcxx/HostInfoMacOSX.mm | 177 +++++++++--------- 1 file changed, 87 insertions(+), 90 deletions(-) diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm index ad62757a37298..75a584d90121a 100644 --- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm +++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm @@ -47,13 +47,6 @@ #include #include #include -#if defined(MAC_OS_X_VERSION_MIN_REQUIRED) && \ - MAC_OS_X_VERSION_MIN_REQUIRED >= MAC_OS_VERSION_12_0 -#if __has_include() -#include -#define SDK_HAS_NEW_DYLD_INTROSPECTION_SPIS -#endif -#endif #include // These are needed when compiling on systems @@ -687,6 +680,71 @@ void dyld_shared_cache_for_each_image(dyld_shared_cache_t cache, } namespace { +/// Function pointers for dyld SPIs that may be absent from the SDK LLDB is +/// built against but present in libdyld at runtime. Resolving them with dlsym +/// rather than linking against them keeps a single binary working across SDKs +/// and OS versions; a null pointer means the running dyld doesn't vend that +/// SPI and the caller must fall back. +struct LazyDyldSPIs { + // Process snapshot introspection SPIs, available on macOS 12 and newer. + dyld_process_t (*process_create_for_current_task)(void) = nullptr; + void (*process_dispose)(dyld_process_t process) = nullptr; + dyld_process_snapshot_t (*process_snapshot_create_for_process)( + dyld_process_t process, kern_return_t *error) = nullptr; + void (*process_snapshot_dispose)(dyld_process_snapshot_t snapshot) = nullptr; + dyld_shared_cache_t (*process_snapshot_get_shared_cache)( + dyld_process_snapshot_t snapshot) = nullptr; + + // Shared cache image segment SPIs, available on macOS 26.4 and newer. + void (*image_retain_4HWTrace)(void *image) = nullptr; + void (*image_release_4HWTrace)(void *image) = nullptr; + dispatch_data_t (*image_segment_data_4HWTrace)( + void *image, const char *segment_name) = nullptr; + + bool HasProcessSnapshotSPIs() const { + return process_create_for_current_task && process_dispose && + process_snapshot_create_for_process && process_snapshot_dispose && + process_snapshot_get_shared_cache; + } + + bool Has4HWTraceSPIs() const { + return image_retain_4HWTrace && image_release_4HWTrace && + image_segment_data_4HWTrace; + } +}; + +const LazyDyldSPIs &GetLazyDyldSPIs() { + static LazyDyldSPIs g_spis; + static std::once_flag g_once_flag; + std::call_once(g_once_flag, []() { + auto lookup = [](const char *name) { return dlsym(RTLD_DEFAULT, name); }; + g_spis.process_create_for_current_task = + (decltype(g_spis.process_create_for_current_task))lookup( + "dyld_process_create_for_current_task"); + g_spis.process_dispose = + (decltype(g_spis.process_dispose))lookup("dyld_process_dispose"); + g_spis.process_snapshot_create_for_process = + (decltype(g_spis.process_snapshot_create_for_process))lookup( + "dyld_process_snapshot_create_for_process"); + g_spis.process_snapshot_dispose = + (decltype(g_spis.process_snapshot_dispose))lookup( + "dyld_process_snapshot_dispose"); + g_spis.process_snapshot_get_shared_cache = + (decltype(g_spis.process_snapshot_get_shared_cache))lookup( + "dyld_process_snapshot_get_shared_cache"); + g_spis.image_retain_4HWTrace = + (decltype(g_spis.image_retain_4HWTrace))lookup( + "dyld_image_retain_4HWTrace"); + g_spis.image_release_4HWTrace = + (decltype(g_spis.image_release_4HWTrace))lookup( + "dyld_image_release_4HWTrace"); + g_spis.image_segment_data_4HWTrace = + (decltype(g_spis.image_segment_data_4HWTrace))lookup( + "dyld_image_segment_data_4HWTrace"); + }); + return g_spis; +} + class SharedCacheInfo { public: SharedCacheImageInfo GetByFilename(UUID sc_uuid, ConstString filename) { @@ -721,7 +779,6 @@ SharedCacheImageInfo GetByUUID(UUID sc_uuid, UUID file_uuid) { private: bool CreateSharedCacheInfoWithInstrospectionSPIs(); - void CreateSharedCacheInfoLLDBsVirtualMemory(); bool CreateHostSharedCacheImageList(); // These three ivars have an initial key of a shared cache UUID. @@ -734,26 +791,11 @@ SharedCacheImageInfo GetByUUID(UUID sc_uuid, UUID file_uuid) { UUID m_host_uuid; llvm::sys::RWMutex m_mutex; - - // macOS 26.4 and newer - void (*m_dyld_image_retain_4HWTrace)(void *image); - void (*m_dyld_image_release_4HWTrace)(void *image); - dispatch_data_t (*m_dyld_image_segment_data_4HWTrace)( - void *image, const char *segmentName); }; } // namespace SharedCacheInfo::SharedCacheInfo(SymbolSharedCacheUse sc_mode) { - // macOS 26.4 and newer - m_dyld_image_retain_4HWTrace = - (void (*)(void *))dlsym(RTLD_DEFAULT, "dyld_image_retain_4HWTrace"); - m_dyld_image_release_4HWTrace = - (void (*)(void *))dlsym(RTLD_DEFAULT, "dyld_image_release_4HWTrace"); - m_dyld_image_segment_data_4HWTrace = - (dispatch_data_t(*)(void *image, const char *segmentName))dlsym( - RTLD_DEFAULT, "dyld_image_segment_data_4HWTrace"); - uuid_t dsc_uuid; _dyld_get_shared_cache_uuid(dsc_uuid); m_host_uuid = UUID(dsc_uuid); @@ -770,14 +812,9 @@ SharedCacheImageInfo GetByUUID(UUID sc_uuid, UUID file_uuid) { if (use_libdyld_spi && CreateHostSharedCacheImageList()) return; - // Scan lldb's shared cache memory if we're built against the - // internal SDK and have those headers. - if (CreateSharedCacheInfoWithInstrospectionSPIs()) - return; - - // Scan lldb's shared cache memory if we're built against the public - // SDK. - CreateSharedCacheInfoLLDBsVirtualMemory(); + // Otherwise scan lldb's own shared cache, preferring the process snapshot + // SPIs when the running dyld vends them. + CreateSharedCacheInfoWithInstrospectionSPIs(); } struct segment { @@ -796,15 +833,8 @@ static DataExtractorSP map_shared_cache_binary_segments(void *image) { static std::mutex g_mutex; std::lock_guard guard(g_mutex); - static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( - void *image, const char *segmentName); - static std::once_flag g_once_flag; - std::call_once(g_once_flag, [&]() { - g_dyld_image_segment_data_4HWTrace = - (dispatch_data_t(*)(void *, const char *))dlsym( - RTLD_DEFAULT, "dyld_image_segment_data_4HWTrace"); - }); - if (!g_dyld_image_segment_data_4HWTrace) + const LazyDyldSPIs &dyld = GetLazyDyldSPIs(); + if (!dyld.image_segment_data_4HWTrace) return {}; __block std::vector segments; @@ -818,7 +848,7 @@ static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( seg.vmsize = vmSize; dispatch_data_t data_from_libdyld = - g_dyld_image_segment_data_4HWTrace(image_copy, segmentName); + dyld.image_segment_data_4HWTrace(image_copy, segmentName); (void)dispatch_data_create_map(data_from_libdyld, &seg.data, &seg.size); if (seg.size > 0 && seg.data != 0) @@ -872,8 +902,8 @@ static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( bool SharedCacheInfo::CreateSharedCacheImageList(UUID sc_uuid, std::string filepath) { llvm::sys::ScopedWriter guard(m_mutex); - if (!m_dyld_image_retain_4HWTrace || !m_dyld_image_release_4HWTrace || - !m_dyld_image_segment_data_4HWTrace) + const LazyDyldSPIs &dyld = GetLazyDyldSPIs(); + if (!dyld.Has4HWTraceSPIs()) return false; if (filepath.empty()) @@ -917,7 +947,7 @@ static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( LLDB_LOGF_VERBOSE(log, "sc file %s image %p", installname.GetCString(), (void *)image); - m_dyld_image_retain_4HWTrace(image); + dyld.image_retain_4HWTrace(image); m_file_infos[sc_uuid].push_back(SharedCacheImageInfo( installname, image_uuid, map_shared_cache_binary_segments, image)); }); @@ -955,29 +985,33 @@ static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( return false; } -// Index the binaries in lldb's own shared cache memory, using -// libdyld SPI present on macOS 12 and newer, when building against -// the internal SDK, and add an entry to the m_caches map. +// Index the binaries in lldb's own shared cache memory, using the process +// snapshot libdyld SPIs present on macOS 12 and newer. The SPIs are resolved +// at runtime, so this works regardless of which SDK lldb was built against; +// it returns false when the running dyld doesn't vend them. bool SharedCacheInfo::CreateSharedCacheInfoWithInstrospectionSPIs() { llvm::sys::ScopedWriter guard(m_mutex); -#if defined(SDK_HAS_NEW_DYLD_INTROSPECTION_SPIS) - dyld_process_t dyld_process = dyld_process_create_for_current_task(); + const LazyDyldSPIs &dyld = GetLazyDyldSPIs(); + if (!dyld.HasProcessSnapshotSPIs()) + return false; + + dyld_process_t dyld_process = dyld.process_create_for_current_task(); if (!dyld_process) return false; llvm::scope_exit cleanup_process_on_exit( - [&]() { dyld_process_dispose(dyld_process); }); + [&]() { dyld.process_dispose(dyld_process); }); dyld_process_snapshot_t snapshot = - dyld_process_snapshot_create_for_process(dyld_process, nullptr); + dyld.process_snapshot_create_for_process(dyld_process, nullptr); if (!snapshot) return false; llvm::scope_exit cleanup_snapshot_on_exit( - [&]() { dyld_process_snapshot_dispose(snapshot); }); + [&]() { dyld.process_snapshot_dispose(snapshot); }); dyld_shared_cache_t shared_cache = - dyld_process_snapshot_get_shared_cache(snapshot); + dyld.process_snapshot_get_shared_cache(snapshot); if (!shared_cache) return false; @@ -1018,43 +1052,6 @@ static dispatch_data_t (*g_dyld_image_segment_data_4HWTrace)( m_uuid_map[m_host_uuid][entry->GetUUID()] = i; } return true; -#endif - return false; -} - -// Index the binaries in lldb's own shared cache memory using -// libdyld SPI available on macOS 10.13 or newer, add an entry to -// m_caches. -void SharedCacheInfo::CreateSharedCacheInfoLLDBsVirtualMemory() { - llvm::sys::ScopedWriter guard(m_mutex); - size_t shared_cache_size; - uint8_t *shared_cache_start = - _dyld_get_shared_cache_range(&shared_cache_size); - - // In macOS 26, a shared cache has around 3500 files. - m_file_infos[m_host_uuid].reserve(4000); - - dyld_shared_cache_iterate_text( - m_host_uuid.GetBytes().data(), - ^(const dyld_shared_cache_dylib_text_info *info) { - lldb::DataBufferSP buffer_sp = std::make_shared( - shared_cache_start + info->textSegmentOffset, - shared_cache_size - info->textSegmentOffset); - lldb::DataExtractorSP extractor_sp = - std::make_shared(buffer_sp); - ConstString filepath(info->path); - m_file_infos[m_host_uuid].push_back(SharedCacheImageInfo( - filepath, UUID(info->dylibUuid, 16), extractor_sp)); - }); - - // std::vector of SharedCacheImageInfos has been fully populated, we can - // take pointers to the objects now. - size_t file_info_size = m_file_infos[m_host_uuid].size(); - for (size_t i = 0; i < file_info_size; i++) { - SharedCacheImageInfo *entry = &m_file_infos[m_host_uuid][i]; - m_filename_map[m_host_uuid][entry->GetFilename()] = i; - m_uuid_map[m_host_uuid][entry->GetUUID()] = i; - } } SharedCacheInfo &GetSharedCacheSingleton(SymbolSharedCacheUse sc_mode) { From bf2234b03c13ca01db53dea83a1535743b0c7d2d Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Tue, 23 Jun 2026 18:29:30 -0500 Subject: [PATCH 271/511] [Hexagon] Add HVX caller-save remark pass for call-site diagnostics (#189188) Add a new MachineFunctionPass (HexagonHVXSaveRemark) that emits optimization analysis remarks when HVX vector registers must be saved and restored around function calls. All HVX registers are caller-saved (Section 5.3 of the Hexagon ABI), so any HVX value live across a call requires a save/restore pair on the stack. Each HVX vector is 64 or 128 bytes, making this overhead expensive. The pass exits when remarks are not requested (-Rpass-analysis=hexagon-hvx-save) or when HVX is not enabled. A byte threshold (default 1024, tunable via -hexagon-hvx-save-threshold) filters out functions with only a small number of saves. The remarks help programmers identify call sites where inlining, hoisting, or sinking could reduce the save/restore cost. --- llvm/lib/Target/Hexagon/CMakeLists.txt | 1 + llvm/lib/Target/Hexagon/Hexagon.h | 2 + .../Target/Hexagon/HexagonHVXSaveRemark.cpp | 228 ++++++++++++++++++ .../Target/Hexagon/HexagonTargetMachine.cpp | 2 + llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll | 141 +++++++++++ 5 files changed, 374 insertions(+) create mode 100644 llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp create mode 100644 llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll diff --git a/llvm/lib/Target/Hexagon/CMakeLists.txt b/llvm/lib/Target/Hexagon/CMakeLists.txt index 38dcc09282330..a2fe7c9e5f95f 100644 --- a/llvm/lib/Target/Hexagon/CMakeLists.txt +++ b/llvm/lib/Target/Hexagon/CMakeLists.txt @@ -42,6 +42,7 @@ add_llvm_target(HexagonCodeGen HexagonGlobalScheduler.cpp HexagonLiveVariables.cpp HexagonHardwareLoops.cpp + HexagonHVXSaveRemark.cpp HexagonHazardRecognizer.cpp HexagonInstrInfo.cpp HexagonISelDAGToDAG.cpp diff --git a/llvm/lib/Target/Hexagon/Hexagon.h b/llvm/lib/Target/Hexagon/Hexagon.h index 1db2326b274dc..8ecd3520fc660 100644 --- a/llvm/lib/Target/Hexagon/Hexagon.h +++ b/llvm/lib/Target/Hexagon/Hexagon.h @@ -43,6 +43,7 @@ void initializeHexagonGenMemAbsolutePass(PassRegistry &); void initializeHexagonGenMuxPass(PassRegistry &); void initializeHexagonGlobalSchedulerPass(PassRegistry &); void initializeHexagonHardwareLoopsPass(PassRegistry &); +void initializeHexagonHVXSaveRemarkPass(PassRegistry &); void initializeHexagonLiveVariablesPass(PassRegistry &); void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); void initializeHexagonLoopAlignPass(PassRegistry &); @@ -97,6 +98,7 @@ FunctionPass *createHexagonGenMux(); FunctionPass *createHexagonGenPredicate(); FunctionPass *createHexagonGlobalScheduler(); FunctionPass *createHexagonHardwareLoops(); +FunctionPass *createHexagonHVXSaveRemark(); FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOptLevel OptLevel); FunctionPass *createHexagonLoopAlign(); diff --git a/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp b/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp new file mode 100644 index 0000000000000..5364f6e024786 --- /dev/null +++ b/llvm/lib/Target/Hexagon/HexagonHVXSaveRemark.cpp @@ -0,0 +1,228 @@ +//===- HexagonHVXSaveRemark.cpp - Remark on HVX saves around calls --------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Diagnostic pass that emits optimization remarks when HVX vector registers +// are live across function calls. All HVX registers are caller-saved +// (Section 5.3 of the Hexagon ABI), so every HVX value that is live across a +// call requires a save/restore pair on the stack. Each HVX vector is 64 or +// 128 bytes (depending on the mode), making this overhead expensive. The +// remarks help programmers identify call sites where inlining, hoisting, or +// sinking the call could reduce the save/restore cost. +// +// The pass runs before register allocation while values are still in virtual +// registers. A backward liveness scan over each basic block counts the HVX +// virtual registers (and their corresponding byte cost) live at each call +// instruction. +// +//===----------------------------------------------------------------------===// + +#include "HexagonSubtarget.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "hexagon-hvx-save" + +static cl::opt HVXSaveThreshold( + "hexagon-hvx-save-threshold", cl::Hidden, cl::init(128 * 8), + cl::desc("Minimum number of bytes of HVX caller-saved register data live " + "across a call to trigger a remark (default: 8 x 128-byte " + "vectors)")); + +namespace { + +struct HexagonHVXSaveRemark : public MachineFunctionPass { + static char ID; + + HexagonHVXSaveRemark() : MachineFunctionPass(ID) {} + + // Returns the number of HVX vectors represented by VReg: 2 for HvxWR + // (vector pair), 1 for HvxVR (single vector), 0 for non-HVX registers. + static unsigned hvxVecCount(Register VReg, const MachineRegisterInfo &MRI) { + const TargetRegisterClass *RC = MRI.getRegClass(VReg); + if (RC == &Hexagon::HvxWRRegClass) + return 2; + if (RC == &Hexagon::HvxVRRegClass) + return 1; + return 0; + } + + bool runOnMachineFunction(MachineFunction &MF) override { + auto &MORE = getAnalysis().getORE(); + if (!MORE.allowExtraAnalysis(DEBUG_TYPE)) + return false; + + const HexagonSubtarget &HST = MF.getSubtarget(); + if (!HST.useHVXOps()) + return false; + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + unsigned HVXLen = HST.getVectorLength(); + + // Compute LiveOut[B] for each block: the set of HVX virtual registers + // that are live on exit from B. We use a standard backward dataflow + // fixed-point: + // + // LiveIn[B] = UEVar[B] union (LiveOut[B] - Def[B]) + // LiveOut[B] = union over successors S of LiveIn[S] + // + // where UEVar[B] is the set of HVX vregs that are used in B before any + // definition of that vreg in B (upward-exposed uses), and Def[B] is the + // set of HVX vregs defined in B. + // + // Because MachineBasicBlock::liveins() only contains physical registers, + // we cannot seed cross-block virtual register liveness from successor + // liveins -- we must compute it ourselves. + + unsigned NumBlocks = MF.getNumBlockIDs(); + using VRegSet = SmallSet; + + // Per-block UEVar and Def sets (HVX vregs only). + SmallVector UEVar(NumBlocks), BlockDef(NumBlocks); + + for (const MachineBasicBlock &MBB : MF) { + unsigned BN = MBB.getNumber(); + VRegSet Defs; + for (const MachineInstr &MI : MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + Register R = MO.getReg(); + if (!R.isVirtual() || !hvxVecCount(R, MRI)) + continue; + if (MO.isDef()) { + Defs.insert(R); + } else if (MO.isUse() && !Defs.count(R)) { + UEVar[BN].insert(R); // upward-exposed use + } + } + } + BlockDef[BN] = Defs; + } + + // LiveOut[B] and LiveIn[B] maps. + SmallVector LiveOut(NumBlocks), LiveIn(NumBlocks); + + // Seed LiveIn from UEVar and iterate until stable. + for (unsigned I = 0; I < NumBlocks; ++I) + LiveIn[I] = UEVar[I]; + + bool Changed = true; + while (Changed) { + Changed = false; + for (const MachineBasicBlock &MBB : MF) { + unsigned BN = MBB.getNumber(); + + // LiveOut[B] = union of LiveIn[S] for each successor S. + VRegSet NewLiveOut; + for (const MachineBasicBlock *Succ : MBB.successors()) + for (Register R : LiveIn[Succ->getNumber()]) + NewLiveOut.insert(R); + + if (NewLiveOut != LiveOut[BN]) { + LiveOut[BN] = NewLiveOut; + Changed = true; + } + + // LiveIn[B] = UEVar[B] union (LiveOut[B] - Def[B]). + VRegSet NewLiveIn = UEVar[BN]; + for (Register R : LiveOut[BN]) + if (!BlockDef[BN].count(R)) + NewLiveIn.insert(R); + + if (NewLiveIn != LiveIn[BN]) { + LiveIn[BN] = NewLiveIn; + Changed = true; + } + } + } + + // Now do the backward scan over each block, seeded from LiveOut[B]. + for (const MachineBasicBlock &MBB : MF) { + // Backward liveness scan over virtual registers. We track which + // virtual registers are live at each point, then at call instructions + // count those with HVX register classes. + // + // When walking backwards: + // - a def removes a vreg from the live set + // - a use adds a vreg to the live set + // At each call, the live set holds vregs live after the call (i.e., the + // values that must survive across it and therefore need save/restore). + VRegSet LiveVRegs = LiveOut[MBB.getNumber()]; + + for (const MachineInstr &MI : llvm::reverse(MBB)) { + if (MI.isCall()) { + // Count HVX virtual registers live after (and thus across) this + // call. HvxVR holds one vector (HVXLen bytes); HvxWR holds two + // (2 * HVXLen bytes). + unsigned NumVecs = 0; + for (Register VReg : LiveVRegs) + NumVecs += hvxVecCount(VReg, MRI); + unsigned TotalBytes = NumVecs * HVXLen; + + LLVM_DEBUG(dbgs() << "HVXSaveRemark: call in " << MF.getName() + << " has " << NumVecs << " HVX vector(s) live (" + << TotalBytes << " bytes)\n"); + + if (TotalBytes >= HVXSaveThreshold) { + MORE.emit([&]() { + MachineOptimizationRemarkAnalysis R( + DEBUG_TYPE, "HVXSaveAroundCall", MI.getDebugLoc(), &MBB); + R << ore::NV("NumVecs", NumVecs) + << " HVX caller-saved register(s) (" + << ore::NV("TotalBytes", TotalBytes) + << " bytes) live across call"; + return R; + }); + } + } + + // Update liveness: defs kill vregs, uses add them. + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.getReg().isVirtual()) + continue; + if (MO.isDef()) + LiveVRegs.erase(MO.getReg()); + else if (MO.isUse()) + LiveVRegs.insert(MO.getReg()); + } + } + } + + return false; + } + + StringRef getPassName() const override { return "Hexagon HVX Save Remarks"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +char HexagonHVXSaveRemark::ID = 0; + +} // end anonymous namespace + +INITIALIZE_PASS(HexagonHVXSaveRemark, DEBUG_TYPE, "Hexagon HVX Save Remarks", + false, false) + +FunctionPass *llvm::createHexagonHVXSaveRemark() { + return new HexagonHVXSaveRemark(); +} diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp index 5d835c9c6e71c..733234ab94689 100644 --- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -199,6 +199,7 @@ LLVMInitializeHexagonTarget() { initializeHexagonGlobalSchedulerPass(PR); initializeHexagonLiveVariablesPass(PR); initializeHexagonHardwareLoopsPass(PR); + initializeHexagonHVXSaveRemarkPass(PR); initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR); initializeHexagonNewValueJumpPass(PR); initializeHexagonOptAddrModePass(PR); @@ -463,6 +464,7 @@ void HexagonPassConfig::addPreRegAlloc() { } if (TM->getOptLevel() >= CodeGenOptLevel::Default) addPass(&MachinePipelinerID); + addPass(createHexagonHVXSaveRemark()); } void HexagonPassConfig::addPostRegAlloc() { diff --git a/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll b/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll new file mode 100644 index 0000000000000..14053cbc55a0f --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/hvx-save-remarks.ll @@ -0,0 +1,141 @@ +; RUN: llc -mtriple=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +; RUN: -pass-remarks-analysis=hexagon-hvx-save %s -o /dev/null 2>&1 \ +; RUN: | FileCheck %s +; +; RUN: llc -mtriple=hexagon -mcpu=hexagonv68 -mattr=+hvxv68,+hvx-length128b \ +; RUN: -pass-remarks-analysis=hexagon-hvx-save \ +; RUN: -hexagon-hvx-save-threshold=256 %s -o /dev/null 2>&1 \ +; RUN: | FileCheck %s --check-prefix=LOW + +;; Test that the HVX save remark pass reports HVX registers live across calls. +;; All HVX registers are caller-saved, so any HVX value live across a call +;; requires a save/restore pair on the stack. The default threshold is 1024 +;; bytes (8 x 128-byte vectors). + +; CHECK: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; CHECK: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; CHECK: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; CHECK-NOT: remark: + +; LOW: remark: {{.*}} 4 HVX caller-saved register(s) (512 bytes) live across call +; LOW: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; LOW: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; LOW: remark: {{.*}} 8 HVX caller-saved register(s) (1024 bytes) live across call +; LOW-NOT: remark: + +declare void @bar() + +;; 4 HVX vectors live across a call (4 x 128 = 512 bytes) -- above LOW threshold, +;; but below the default 1024-byte threshold. +define void @test_four_vecs(ptr %p0, ptr %p1, ptr %p2, ptr %p3) { +entry: + %v0 = load <32 x i32>, ptr %p0, align 128 + %v1 = load <32 x i32>, ptr %p1, align 128 + %v2 = load <32 x i32>, ptr %p2, align 128 + %v3 = load <32 x i32>, ptr %p3, align 128 + call void @bar() + store <32 x i32> %v0, ptr %p0, align 128 + store <32 x i32> %v1, ptr %p1, align 128 + store <32 x i32> %v2, ptr %p2, align 128 + store <32 x i32> %v3, ptr %p3, align 128 + ret void +} + +;; 8 HVX vectors live across a call (8 x 128 = 1024 bytes) -- meets default +;; threshold. +define void @test_hvx_save_around_call(ptr %p0, ptr %p1, ptr %p2, ptr %p3, + ptr %p4, ptr %p5, ptr %p6, ptr %p7) { +entry: + %v0 = load <32 x i32>, ptr %p0, align 128 + %v1 = load <32 x i32>, ptr %p1, align 128 + %v2 = load <32 x i32>, ptr %p2, align 128 + %v3 = load <32 x i32>, ptr %p3, align 128 + %v4 = load <32 x i32>, ptr %p4, align 128 + %v5 = load <32 x i32>, ptr %p5, align 128 + %v6 = load <32 x i32>, ptr %p6, align 128 + %v7 = load <32 x i32>, ptr %p7, align 128 + call void @bar() + store <32 x i32> %v0, ptr %p0, align 128 + store <32 x i32> %v1, ptr %p1, align 128 + store <32 x i32> %v2, ptr %p2, align 128 + store <32 x i32> %v3, ptr %p3, align 128 + store <32 x i32> %v4, ptr %p4, align 128 + store <32 x i32> %v5, ptr %p5, align 128 + store <32 x i32> %v6, ptr %p6, align 128 + store <32 x i32> %v7, ptr %p7, align 128 + ret void +} + +;; Single HVX vector live across call (128 bytes) -- below threshold. +define void @test_below_threshold(ptr %p) { +entry: + %v = load <32 x i32>, ptr %p, align 128 + call void @bar() + store <32 x i32> %v, ptr %p, align 128 + ret void +} + +;; 8 HVX vectors loaded in entry, call in a separate call_block, stores in +;; exit. The vectors are live out of entry and must be counted as live at +;; the call even though the call is in a different basic block. +define void @test_cross_block(ptr %p0, ptr %p1, ptr %p2, ptr %p3, + ptr %p4, ptr %p5, ptr %p6, ptr %p7, + i1 %cond) { +entry: + %v0 = load <32 x i32>, ptr %p0, align 128 + %v1 = load <32 x i32>, ptr %p1, align 128 + %v2 = load <32 x i32>, ptr %p2, align 128 + %v3 = load <32 x i32>, ptr %p3, align 128 + %v4 = load <32 x i32>, ptr %p4, align 128 + %v5 = load <32 x i32>, ptr %p5, align 128 + %v6 = load <32 x i32>, ptr %p6, align 128 + %v7 = load <32 x i32>, ptr %p7, align 128 + br label %call_block +call_block: + call void @bar() + br label %exit +exit: + store <32 x i32> %v0, ptr %p0, align 128 + store <32 x i32> %v1, ptr %p1, align 128 + store <32 x i32> %v2, ptr %p2, align 128 + store <32 x i32> %v3, ptr %p3, align 128 + store <32 x i32> %v4, ptr %p4, align 128 + store <32 x i32> %v5, ptr %p5, align 128 + store <32 x i32> %v6, ptr %p6, align 128 + store <32 x i32> %v7, ptr %p7, align 128 + ret void +} + +;; 8 HVX vectors live across a call inside a loop body. This is the +;; canonical high-cost pattern: the vectors are loaded before the loop, +;; the loop body calls bar(), and the vectors are used after the loop. +define void @test_loop_call(ptr %p0, ptr %p1, ptr %p2, ptr %p3, + ptr %p4, ptr %p5, ptr %p6, ptr %p7, + i32 %n) { +entry: + %v0 = load <32 x i32>, ptr %p0, align 128 + %v1 = load <32 x i32>, ptr %p1, align 128 + %v2 = load <32 x i32>, ptr %p2, align 128 + %v3 = load <32 x i32>, ptr %p3, align 128 + %v4 = load <32 x i32>, ptr %p4, align 128 + %v5 = load <32 x i32>, ptr %p5, align 128 + %v6 = load <32 x i32>, ptr %p6, align 128 + %v7 = load <32 x i32>, ptr %p7, align 128 + br label %loop +loop: + %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] + call void @bar() + %i.next = add i32 %i, 1 + %done = icmp eq i32 %i.next, %n + br i1 %done, label %exit, label %loop +exit: + store <32 x i32> %v0, ptr %p0, align 128 + store <32 x i32> %v1, ptr %p1, align 128 + store <32 x i32> %v2, ptr %p2, align 128 + store <32 x i32> %v3, ptr %p3, align 128 + store <32 x i32> %v4, ptr %p4, align 128 + store <32 x i32> %v5, ptr %p5, align 128 + store <32 x i32> %v6, ptr %p6, align 128 + store <32 x i32> %v7, ptr %p7, align 128 + ret void +} From faa9720445b5a4cd888dfb76aa0e603546e7c690 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Tue, 23 Jun 2026 23:50:41 +0000 Subject: [PATCH 272/511] [Github] Make prune-unused-branches only delete branches after 7 days To hopefully prevent the last failure mode that led to the job being disabled where the GitHub API failed to return results for >24 hours. Reviewers: cmtice Pull Request: https://github.com/llvm/llvm-project/pull/205438 --- .github/workflows/prune-unused-branches.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/prune-unused-branches.py b/.github/workflows/prune-unused-branches.py index 3115b4424fb57..25b3146936b46 100644 --- a/.github/workflows/prune-unused-branches.py +++ b/.github/workflows/prune-unused-branches.py @@ -1,3 +1,4 @@ +import datetime import subprocess import sys import os @@ -165,8 +166,14 @@ def get_branches_found_in_previous_run(github_token: str) -> list[str]: for workflow_run in iter( repo.get_workflow("prune-branches.yml").get_runs(branch="main") ): - if workflow_run.status == "completed": - break + if not workflow_run.status == "completed": + continue + cutoff = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + days=7 + ) + if workflow_run.run_started_at > cutoff: + continue + break assert workflow_run workflow_artifact = None for workflow_artifact in iter(workflow_run.get_artifacts()): From b71d6e0f023949acaa77d2f3deb34f2ee4da41bb Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 24 Jun 2026 00:00:19 +0000 Subject: [PATCH 273/511] [Github] Reenable prune-unused-branches workflow The Github API has recovered and the previous failure mode has been rectified by ensuring that branches are ready for deletion for seven days rather than 24 hours. Reviewers: cmtice Reviewed By: cmtice Pull Request: https://github.com/llvm/llvm-project/pull/205439 --- .github/workflows/prune-branches.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/prune-branches.yml b/.github/workflows/prune-branches.yml index 753fc7e42bc16..d86451a52d0a0 100644 --- a/.github/workflows/prune-branches.yml +++ b/.github/workflows/prune-branches.yml @@ -8,6 +8,8 @@ on: paths: - .github/workflows/prune-branches.yml - .github/workflows/prune-unused-branches.py + schedule: + - cron: '0 8 * * *' # Runs daily at 08:00 UTC. jobs: prune-branches: From 3e1b6b12e969899b35720cce70914da91b9ae177 Mon Sep 17 00:00:00 2001 From: Matt Date: Tue, 23 Jun 2026 19:07:16 -0500 Subject: [PATCH 274/511] [flang][OpenMP] Fix ICE lowering user-defined operator declare reduction A REDUCTION clause naming a user-defined operator (e.g., reduction(.myop.:x)) crashed in lowering: ReductionProcessor assumed the DefinedOperator clause variant always held an intrinsic operator and called std::get unconditionally, which aborts for the DefinedOpName alternative. Handle DefinedOpName in the reduction clause processor, adding the clause-side counterpart to the directive handling from #190288. For a locally declared user-defined operator reduction, resolve the operator to its reduction symbol and reference the omp.declare_reduction op materialized for the declare reduction directive. The op name is now module-scoped via AbstractConverter::mangleName, on the directive and clause sides in lockstep, so reductions with the same operator spelling in different modules no longer collide. Cases that are not yet supported (reductions imported by USE association, renamed or merged operators, and declarations with multiple types) now emit a clean "not yet implemented" diagnostic instead of crashing or silently binding the wrong combiner. Support for the USE-associated and cross-module cases is a follow-up that builds on the semantic fix in #200329. Tests cover the issue's integer case and a derived-type case (both lower, with a module-scoped op name), plus the USE-associated and multiple-type cases (clean TODO). Fixes #204299 Assisted-by: Claude Opus 4.8, GPT-5.5. --- .../flang/Lower/Support/ReductionProcessor.h | 10 +++ flang/lib/Lower/OpenMP/OpenMP.cpp | 20 +++++- .../lib/Lower/Support/ReductionProcessor.cpp | 64 +++++++++++++++++++ ...lare-reduction-operator-multiple-types.f90 | 38 +++++++++++ .../declare-reduction-operator-use-assoc.f90 | 36 +++++++++++ .../declare-reduction-operator-derived.f90 | 36 +++++++++++ .../OpenMP/declare-reduction-operator.f90 | 31 +++++++++ 7 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 flang/test/Lower/OpenMP/Todo/declare-reduction-operator-multiple-types.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/declare-reduction-operator-use-assoc.f90 create mode 100644 flang/test/Lower/OpenMP/declare-reduction-operator-derived.f90 create mode 100644 flang/test/Lower/OpenMP/declare-reduction-operator.f90 diff --git a/flang/include/flang/Lower/Support/ReductionProcessor.h b/flang/include/flang/Lower/Support/ReductionProcessor.h index 0b4a692827a79..a949da875b3a2 100644 --- a/flang/include/flang/Lower/Support/ReductionProcessor.h +++ b/flang/include/flang/Lower/Support/ReductionProcessor.h @@ -96,6 +96,16 @@ class ReductionProcessor { const fir::KindMapping &kindMap, mlir::Type ty, bool isByRef); + /// Returns the module-unique name of the omp.declare_reduction op that + /// materializes a user-defined reduction (named or operator). The name is + /// derived from the reduction symbol's ultimate name, qualified with its + /// owning scope via AbstractConverter::mangleName, so that reductions with + /// the same spelling in different modules do not collide. The directive and + /// clause lowering must both use this to agree on the op's symbol name. + static std::string + getScopedUserReductionName(AbstractConverter &converter, + const semantics::Symbol &reductionSymbol); + /// This function returns the identity value of the operator \p /// reductionOpName. For example: /// 0 + x = x, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 99ce48206c33b..094cec737d481 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -4514,7 +4514,25 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, }, [&](const clause::DefinedOperator::DefinedOpName &opName) -> std::string { - return opName.v.sym()->name().ToString(); + // Directive side of the user-defined operator reduction + // naming contract (the clause side is in + // ReductionProcessor::processReductionArguments). + // opName.v.sym() is the reduction symbol + // "op". Only single-declaration, single-type + // reductions are supported; otherwise emit a clean + // TODO. + const semantics::Symbol &redSym = + opName.v.sym()->GetUltimate(); + const auto *userDetails = + redSym.detailsIf(); + if (!userDetails || typeNameList.v.size() != 1 || + userDetails->GetDeclList().size() != 1 || + userDetails->GetTypeList().size() != 1) + TODO(converter.getCurrentLocation(), + "OpenMP user-defined operator declare reduction " + "with multiple declarations or multiple types"); + return ReductionProcessor::getScopedUserReductionName( + converter, redSym); }, }, defOp.u); diff --git a/flang/lib/Lower/Support/ReductionProcessor.cpp b/flang/lib/Lower/Support/ReductionProcessor.cpp index 7db48601d5aba..d57aa48fd82b8 100644 --- a/flang/lib/Lower/Support/ReductionProcessor.cpp +++ b/flang/lib/Lower/Support/ReductionProcessor.cpp @@ -216,6 +216,17 @@ ReductionProcessor::getReductionName(ReductionIdentifier redId, return getReductionName(reductionName, kindMap, ty, isByRef); } +std::string ReductionProcessor::getScopedUserReductionName( + AbstractConverter &converter, const semantics::Symbol &reductionSymbol) { + // Qualify the reduction symbol's ultimate name with its owning scope so that + // user-defined reductions with the same spelling in different modules get + // distinct op names. Use the (name, scope) mangleName overload: the + // (symbol) overload does not handle UserReductionDetails. + const semantics::Symbol &ultimate = reductionSymbol.GetUltimate(); + std::string name = ultimate.name().ToString(); + return converter.mangleName(name, ultimate.owner()); +} + mlir::Value ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type, ReductionIdentifier redId, @@ -810,6 +821,59 @@ bool ReductionProcessor::processReductionArguments( redOperatorList.front(); if (const auto &redDefinedOp = std::get_if(&redOperator.u)) { + if (const auto *definedOpName = + std::get_if( + &redDefinedOp->u)) { + // User-defined operator reduction (e.g. reduction(.myop.:x)). Resolve + // the use-site operator to its reduction symbol, which semantics + // names "op" (MangleDefinedOperator in resolve-names), in + // the current scope, then reference the omp.declare_reduction op the + // directive materialized for it. Only a locally-declared, + // single-declaration, single-type reduction whose type the variable + // supports is handled here; anything else (imported, renamed, merged, + // or multiple declarations/types) is a clean TODO rather than a crash + // or a wrong binding. + const semantics::Symbol *opSym = definedOpName->v.sym(); + std::string mangledName = "op" + opSym->name().ToString(); + const semantics::Symbol *redSym = + converter.getCurrentScope().FindSymbol( + parser::CharBlock{mangledName}); + const semantics::Symbol *ultimate = + redSym ? &redSym->GetUltimate() : nullptr; + const semantics::UserReductionDetails *userDetails = + ultimate ? ultimate->detailsIf() + : nullptr; + const semantics::DeclTypeSpec *varType = + reductionSymbols[idx]->GetUltimate().GetType(); + if (!redSym || ultimate != redSym || !userDetails || + userDetails->GetDeclList().size() != 1 || + userDetails->GetTypeList().size() != 1 || !varType || + !userDetails->SupportsType(*varType)) { + TODO(currentLocation, + "OpenMP user-defined operator reduction is not yet supported " + "for imported, renamed, or multiple-declaration/type " + "reductions"); + } + std::string opName = ReductionProcessor::getScopedUserReductionName( + converter, *redSym); + mlir::ModuleOp module = builder.getModule(); + auto existingDecl = module.lookupSymbol(opName); + // The MLIR verifier does not type-check these ops (they have no + // atomic region), so this is the only guard against binding a + // mismatched declaration. Compare unwrapped types: the clause redType + // is always a reference type, while the op stores the unwrapped type + // for by-value reductions. + if (!existingDecl || fir::unwrapRefType(existingDecl.getType()) != + fir::unwrapRefType(redType)) { + TODO(currentLocation, + "OpenMP user-defined operator reduction declaration was not " + "materialized for this type"); + } + reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get( + builder.getContext(), existingDecl.getSymName())); + ++idx; + continue; + } const auto &intrinsicOp{ std::get( redDefinedOp->u)}; diff --git a/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-multiple-types.f90 b/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-multiple-types.f90 new file mode 100644 index 0000000000000..626316221af1b --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-multiple-types.f90 @@ -0,0 +1,38 @@ +! A user-defined operator declare reduction listing multiple types reaches +! lowering but is not yet supported (the op name is not type-specific, so the +! per-type ops would collide). It must emit a clean TODO rather than ICE or +! miscompile (#204299). The guard is on the directive side, which is lowered +! before any reduction clause. + +! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +! CHECK: not yet implemented: OpenMP user-defined operator declare reduction with multiple declarations or multiple types + +program p + type :: t1 + integer :: v = 0 + end type + type :: t2 + integer :: w = 0 + end type + interface operator(.mt.) + function f1(a, b) + import :: t1 + type(t1), intent(in) :: a, b + type(t1) :: f1 + end function f1 + function f2(a, b) + import :: t2 + type(t2), intent(in) :: a, b + type(t2) :: f2 + end function f2 + end interface + !$omp declare reduction(.mt.:t1,t2:omp_out=omp_in) + type(t1) :: x1 + integer :: i + !$omp parallel do reduction(.mt.:x1) + do i = 1, 5 + x1 = x1 .mt. t1(1) + end do + !$omp end parallel do +end program p diff --git a/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-use-assoc.f90 b/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-use-assoc.f90 new file mode 100644 index 0000000000000..68be5781ce5cf --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/declare-reduction-operator-use-assoc.f90 @@ -0,0 +1,36 @@ +! A user-defined operator reduction whose declaration is USE-associated from a +! module (plain USE, so the "op" reduction symbol is imported) reaches +! lowering but is not yet supported: lowering does not materialize imported +! declare reductions. It must emit a clean TODO rather than ICE (#204299). + +! RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +! CHECK: not yet implemented: OpenMP user-defined operator reduction is not yet supported for imported, renamed, or multiple-declaration/type reductions + +module m_use_op + type :: t + integer :: val = 0 + end type + interface operator(.plus.) + module procedure add_t + end interface + !$omp declare reduction(.plus.:t:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t(0)) +contains + type(t) function add_t(a, b) + type(t), intent(in) :: a, b + add_t%val = a%val + b%val + end function add_t +end module m_use_op + +program p + use m_use_op + type(t) :: x + integer :: i + x = t(0) + !$omp parallel do reduction(.plus.:x) + do i = 1, 100 + x = x .plus. t(1) + end do + !$omp end parallel do +end program p diff --git a/flang/test/Lower/OpenMP/declare-reduction-operator-derived.f90 b/flang/test/Lower/OpenMP/declare-reduction-operator-derived.f90 new file mode 100644 index 0000000000000..62b30cb5d4542 --- /dev/null +++ b/flang/test/Lower/OpenMP/declare-reduction-operator-derived.f90 @@ -0,0 +1,36 @@ +! Test lowering of an OpenMP REDUCTION clause that uses a locally-declared +! user-defined operator on a derived type (#204299). This exercises the by-ref +! reduction path and the unwrapped-type guard: the clause variable type is a +! reference type while the op stores the unwrapped reduction type. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +subroutine red_derived + type :: t + integer :: val = 0 + end type + interface operator(.myop.) + function add_t(a, b) + import :: t + type(t), intent(in) :: a, b + type(t) :: add_t + end function add_t + end interface + !$omp declare reduction(.myop.:t:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t(0)) + type(t) :: x + integer :: i + x = t(0) + !$omp parallel do reduction(.myop.:x) + do i = 1, 100 + x = x .myop. t(1) + end do + !$omp end parallel do +end subroutine red_derived + +! The op name must be module-scoped (a mangled "_QQ..." generated name), NOT the +! bare operator spelling. The directive and clause must reference the same name. +! CHECK: omp.declare_reduction @[[RED:_QQ[A-Za-z0-9_.]*op\.myop\.]] : !fir.ref +! CHECK-NOT: omp.declare_reduction @op.myop. +! CHECK: omp.wsloop +! CHECK-SAME: reduction(byref @[[RED]] diff --git a/flang/test/Lower/OpenMP/declare-reduction-operator.f90 b/flang/test/Lower/OpenMP/declare-reduction-operator.f90 new file mode 100644 index 0000000000000..11897212078eb --- /dev/null +++ b/flang/test/Lower/OpenMP/declare-reduction-operator.f90 @@ -0,0 +1,31 @@ +! Test lowering of an OpenMP REDUCTION clause that uses a locally-declared +! user-defined operator. See https://github.com/llvm/llvm-project/issues/204299: +! this used to ICE in ReductionProcessor because the defined-operator variant +! was assumed to be an intrinsic operator. This is the trivial (by-value) +! integer case from the issue reproducer. + +! RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s + +subroutine red_integer + interface operator(.zzzz.) + function zzzz_op(a, b) + integer, intent(in) :: a, b + integer :: zzzz_op + end function zzzz_op + end interface + !$omp declare reduction(.zzzz.:integer:omp_out=omp_out+omp_in) & + !$omp initializer(omp_priv=0) + integer :: x + x = 0 + !$omp parallel reduction(.zzzz.:x) + x = x .zzzz. 1 + !$omp end parallel +end subroutine red_integer + +! The op name must be module-scoped (a mangled "_QQ..." generated name), NOT the +! bare operator spelling, so reductions with the same spelling in different +! modules do not collide. The directive and clause must reference the same name. +! CHECK: omp.declare_reduction @[[RED:_QQ[A-Za-z0-9_.]*op\.zzzz\.]] : i32 +! CHECK-NOT: omp.declare_reduction @op.zzzz. +! CHECK: omp.parallel +! CHECK-SAME: reduction(@[[RED]] From 722024dc7b9417a7efddd2f37b3ae26d02761b55 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 24 Jun 2026 09:23:34 +0900 Subject: [PATCH 275/511] [SYCL][CMake] Honor LLVM_LIBDIR_SUFFIX (#22364) LLVM CMake has the `LLVM_LIBDIR_SUFFIX` option to optionally add a suffix to the install library directory, so setting `-DLLVM_LIBDIR_SUFFIX=64` would result in a `lib64` library directory. We didn't honor the variable correctly in `libdevice`, `xpti`, `xptifw`, the driver, `sycl-jit` nor E2E testing. This option will be set by Linux distros packing the repo, so we need it to work. After this, further work is required to get Driver `lit` tests to pass with `LLVM_LIBDIR_SUFFIX` set, as the example device libraries we have are always in `lib`. Closes: https://github.com/intel/llvm/issues/22355 Signed-off-by: Nick Sarnie --- clang/lib/Driver/ToolChains/SYCL.cpp | 33 +++++++++++-------- libdevice/cmake/modules/SYCLLibdevice.cmake | 2 +- .../lib/rtc/DeviceCompilation.cpp | 6 ++-- sycl/CMakeLists.txt | 4 ++- sycl/test-e2e/CMakeLists.txt | 21 ++++++++++++ sycl/test-e2e/lit.site.cfg.py.in | 4 +-- xpti/CMakeLists.txt | 2 +- xptifw/CMakeLists.txt | 2 +- 8 files changed, 52 insertions(+), 22 deletions(-) diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index a1388979a674d..31e16ba241e45 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "SYCL.h" #include "clang/Basic/Version.h" +#include "clang/Config/config.h" #include "clang/Driver/CommonArgs.h" #include "clang/Driver/Compilation.h" #include "clang/Driver/Driver.h" @@ -43,7 +44,7 @@ SYCLInstallationDetector::SYCLInstallationDetector( if (DriverDir.starts_with(SysRoot) && Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) { SmallString<128> LibDir(DriverDir); - llvm::sys::path::append(LibDir, "..", "lib"); + llvm::sys::path::append(LibDir, "..", CLANG_INSTALL_LIBDIR_BASENAME); // Verify SYCL runtime library exists SmallString<128> SYCLLibPath(LibDir); @@ -54,21 +55,21 @@ SYCLInstallationDetector::SYCLInstallationDetector( } } else { SmallString<128> LibPath(DriverDir); - llvm::sys::path::append(LibPath, "..", "lib", HostTriple.str(), + llvm::sys::path::append(LibPath, "..", CLANG_INSTALL_LIBDIR_BASENAME, HostTriple.str(), "libsycl.so"); // Flat lib path for LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF builds, // where the library is installed directly in lib/ with no triple subdir. SmallString<128> FlatLibPath(DriverDir); - llvm::sys::path::append(FlatLibPath, "..", "lib", "libsycl.so"); + llvm::sys::path::append(FlatLibPath, "..", CLANG_INSTALL_LIBDIR_BASENAME, "libsycl.so"); if (DriverDir.starts_with(SysRoot) && Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) { // LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=ON: library is in lib// if (D.getVFS().exists(LibPath)) - llvm::sys::path::append(DriverDir, "..", "lib", HostTriple.str()); + llvm::sys::path::append(DriverDir, "..", CLANG_INSTALL_LIBDIR_BASENAME, HostTriple.str()); // LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF: library is in lib/ else if (D.getVFS().exists(FlatLibPath)) - llvm::sys::path::append(DriverDir, "..", "lib"); + llvm::sys::path::append(DriverDir, "..", CLANG_INSTALL_LIBDIR_BASENAME); else return; // Neither path exists : broken install, leave SYCLRTLibPath // unset @@ -79,19 +80,22 @@ SYCLInstallationDetector::SYCLInstallationDetector( #else // !INTEL_CUSTOMIZATION // Intel: SYCL RT is libsycl.so; Windows lib path is handled at link stage. SmallString<128> LibPath(DriverDir); - llvm::sys::path::append(LibPath, "..", "lib", HostTriple.str(), "libsycl.so"); + llvm::sys::path::append(LibPath, "..", CLANG_INSTALL_LIBDIR_BASENAME, + HostTriple.str(), "libsycl.so"); // Flat lib path for LLVM_ENABLE_PER_TARGET_RUNTIME_DIR=OFF builds, // where the library is installed directly in lib/ with no triple subdir. SmallString<128> FlatLibPath(DriverDir); - llvm::sys::path::append(FlatLibPath, "..", "lib", "libsycl.so"); + llvm::sys::path::append(FlatLibPath, "..", CLANG_INSTALL_LIBDIR_BASENAME, + "libsycl.so"); if (DriverDir.starts_with(SysRoot) && Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false)) { // We put driver in bin/compiler, so one more ../ than llorg. if (D.getVFS().exists(DriverDir + "/../../lib/libsycl.so")) - llvm::sys::path::append(DriverDir, "..", "..", "lib"); + llvm::sys::path::append(DriverDir, "..", "..", + CLANG_INSTALL_LIBDIR_BASENAME); else - llvm::sys::path::append(DriverDir, "..", "lib"); + llvm::sys::path::append(DriverDir, "..", CLANG_INSTALL_LIBDIR_BASENAME); SYCLRTLibPath = DriverDir; } @@ -126,8 +130,8 @@ const char *SYCLInstallationDetector::findLibspirvPath( const SmallString<64> Basename = getLibSpirvBasename(HostTriple); SmallString<256> LibclcPath(D.ResourceDir); - llvm::sys::path::append(LibclcPath, "lib", DeviceTriple.getTriple(), - Basename); + llvm::sys::path::append(LibclcPath, CLANG_INSTALL_LIBDIR_BASENAME, + DeviceTriple.getTriple(), Basename); if (D.getVFS().exists(LibclcPath)) return Args.MakeArgString(LibclcPath); @@ -158,7 +162,8 @@ void SYCLInstallationDetector::addLibspirvLinkArgs( void SYCLInstallationDetector::getSYCLDeviceLibPath( llvm::SmallVector, 4> &DeviceLibPaths) const { std::string LinuxDirSuffix = - llvm::formatv("/lib/dpcpp-{0}/sycl", DPCPP_VERSION_MAJOR); + llvm::formatv("/{0}/dpcpp-{1}/sycl", CLANG_INSTALL_LIBDIR_BASENAME, + DPCPP_VERSION_MAJOR); for (const auto &IC : InstallationCandidates) { if (!HostTriple.isWindowsMSVCEnvironment() && !HostTriple.isWindowsItaniumEnvironment()) { @@ -167,7 +172,7 @@ void SYCLInstallationDetector::getSYCLDeviceLibPath( DeviceLibPaths.emplace_back(InstallPath); } SmallString<128> InstallPath(IC); - llvm::sys::path::append(InstallPath, "lib"); + llvm::sys::path::append(InstallPath, CLANG_INSTALL_LIBDIR_BASENAME); DeviceLibPaths.emplace_back(InstallPath); } if (!HostTriple.isWindowsMSVCEnvironment() && @@ -177,7 +182,7 @@ void SYCLInstallationDetector::getSYCLDeviceLibPath( DeviceLibPaths.emplace_back(Path.str()); } SmallString<128> Path(D.SysRoot); - llvm::sys::path::append(Path, "lib"); + llvm::sys::path::append(Path, CLANG_INSTALL_LIBDIR_BASENAME); DeviceLibPaths.emplace_back(Path.str()); } diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index 8c8395c8dcbf3..648e4da85ca7a 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -16,7 +16,7 @@ if(WIN32) set(bc_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") else() # On other platforms, install to lib/dpcpp-/sycl - set(bc_dir "lib/dpcpp-${DPCPP_VERSION_MAJOR}/sycl") + set(bc_dir "lib${LLVM_LIBDIR_SUFFIX}/dpcpp-${DPCPP_VERSION_MAJOR}/sycl") set(install_dest_bc ${bc_dir}) set(bc_binary_dir "${CMAKE_BINARY_DIR}/${bc_dir}") endif() diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index b3f8701d71313..c932d1e80c392 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -170,9 +171,10 @@ template <> struct std::hash { namespace { std::string getLibPathSuffix() { #ifdef _WIN32 - return "/lib/"; + return llvm::formatv("/{0}/", CLANG_INSTALL_LIBDIR_BASENAME); #else - return llvm::formatv("/lib/dpcpp-{0}/sycl/", DPCPP_VERSION_MAJOR); + return llvm::formatv("/{0}/dpcpp-{1}/sycl/", CLANG_INSTALL_LIBDIR_BASENAME, + DPCPP_VERSION_MAJOR); #endif } class SYCLToolchain { diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 1d6ec795cf9e2..37591e5f8765d 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -367,7 +367,7 @@ if(SYCL_HEADERS_ONLY) foreach(sycl_compiler_tool append-file clang clang-offload-bundler clang-offload-deps clang-offload-extract clang-offload-wrapper clang-linker-wrapper - file-table-tform llc llvm-ar llvm-foreach llvm-link + file-table-tform llc llvm-ar llvm-config llvm-foreach llvm-link llvm-offload-binary llvm-objcopy llvm-spirv spirv-to-ir-wrapper sycl-post-link) if(TARGET ${sycl_compiler_tool}) @@ -434,6 +434,7 @@ add_custom_target(sycl-compiler file-table-tform llc llvm-ar + llvm-config llvm-foreach llvm-spirv llvm-link @@ -517,6 +518,7 @@ set( SYCL_TOOLCHAIN_DEPLOY_COMPONENTS file-table-tform llc llvm-ar + llvm-config llvm-foreach llvm-spirv llvm-link diff --git a/sycl/test-e2e/CMakeLists.txt b/sycl/test-e2e/CMakeLists.txt index d363b91a4a405..2ee613f148a87 100644 --- a/sycl/test-e2e/CMakeLists.txt +++ b/sycl/test-e2e/CMakeLists.txt @@ -102,6 +102,27 @@ if(CMPLR_RESULT EQUAL 0 AND CMPLR_VER) set(DPCPP_VERSION_MAJOR "${CMAKE_MATCH_1}") endif() +get_filename_component(DPCPP_ROOT_DIR ${SYCL_CXX_COMPILER} DIRECTORY) +if(SYCL_TEST_E2E_STANDALONE) + execute_process( + COMMAND ${DPCPP_ROOT_DIR}/llvm-config${CMAKE_EXECUTABLE_SUFFIX} --libdir + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + RESULT_VARIABLE CMPLR_LIBS_RESULT + OUTPUT_VARIABLE CMPLR_LIBS_DIR + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + if(NOT CMPLR_LIBS_RESULT EQUAL 0 OR NOT CMPLR_LIBS_DIR) + message(FATAL_ERROR "Failed to determine SYCL library directory") + endif() + set(SYCL_LIBDIR_BASENAME_FULL "${CMPLR_LIBS_DIR}") +else() + set(SYCL_LIBDIR_BASENAME_FULL "${DPCPP_ROOT_DIR}/${CLANG_INSTALL_LIBDIR_BASENAME}") +endif() + +# Extract out only the libdir folder name, don't use the full path so we can +# be more portable. +get_filename_component(SYCL_LIBDIR_BASENAME "${SYCL_LIBDIR_BASENAME_FULL}" NAME) + if("${DPCPP_VERSION_MAJOR}" STREQUAL "") message(FATAL_ERROR "Could not detect SYCL compiler version") endif() diff --git a/sycl/test-e2e/lit.site.cfg.py.in b/sycl/test-e2e/lit.site.cfg.py.in index 04736673a3177..5ae02752917b1 100644 --- a/sycl/test-e2e/lit.site.cfg.py.in +++ b/sycl/test-e2e/lit.site.cfg.py.in @@ -25,8 +25,8 @@ config.dump_ir_supported = lit_config.params.get("dump_ir", ("@DUMP_IR_SUPPORTED config.sycl_tools_dir = config.llvm_tools_dir config.sycl_include = os.path.join(config.dpcpp_root_dir, 'include') config.sycl_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" -config.sycl_libs_dir = os.path.join(config.dpcpp_root_dir, ('bin' if platform.system() == "Windows" else 'lib')) -config.sycl_device_libs_dir = os.path.join(config.dpcpp_root_dir, ('lib' if platform.system() == "Windows" else "lib/dpcpp-@DPCPP_VERSION_MAJOR@/sycl")) +config.sycl_libs_dir = os.path.join(config.dpcpp_root_dir, ('bin' if platform.system() == "Windows" else '@SYCL_LIBDIR_BASENAME@')) +config.sycl_device_libs_dir = os.path.join(config.dpcpp_root_dir, ('@SYCL_LIBDIR_BASENAME@' if platform.system() == "Windows" else "@SYCL_LIBDIR_BASENAME@/dpcpp-@DPCPP_VERSION_MAJOR@/sycl")) config.opencl_libs_dir = (os.path.dirname("@OpenCL_LIBRARY@") if "@OpenCL_LIBRARY@" else "") config.level_zero_libs_dir = "@LEVEL_ZERO_LIBS_DIR@" diff --git a/xpti/CMakeLists.txt b/xpti/CMakeLists.txt index bf9f2201e92e3..cc56bc669658b 100644 --- a/xpti/CMakeLists.txt +++ b/xpti/CMakeLists.txt @@ -93,7 +93,7 @@ install(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/include/xpti # Install CMake config files. include(CMakePackageConfigHelpers) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) -set(LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}) +set(LIB_INSTALL_DIR lib${LLVM_LIBDIR_SUFFIX}) set(CONFIG_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/xpti) configure_package_config_file( diff --git a/xptifw/CMakeLists.txt b/xptifw/CMakeLists.txt index 106d6c615ebae..78549309cafec 100644 --- a/xptifw/CMakeLists.txt +++ b/xptifw/CMakeLists.txt @@ -79,7 +79,7 @@ endif() # Install CMake config files include(CMakePackageConfigHelpers) set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR}) -set(LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR}${LLVM_LIBDIR_SUFFIX}) +set(LIB_INSTALL_DIR lib${LLVM_LIBDIR_SUFFIX}) set(CONFIG_INSTALL_DIR ${LIB_INSTALL_DIR}/cmake/xptifw) configure_package_config_file( From 36d5e81783c4900813cafb1312546f9b37969fd5 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 24 Jun 2026 08:24:18 +0800 Subject: [PATCH 276/511] [SYCL][NFC] Move target triple name out of loop in KernelCompiler DeviceCompilation.cpp (#22344) Signed-off-by: jinge90 --- sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp index c932d1e80c392..5c44068413c5a 100644 --- a/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp +++ b/sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp @@ -844,13 +844,12 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, LibNames.push_back(Libclc); } + std::string TripleName = (Format == BinaryFormat::PTX) ? "nvptx64-nvidia-cuda" + : "amdgcn-amd-amdhsa"; + LLVMContext &Context = Module.getContext(); SYCLToolchain &TC = SYCLToolchain::instance(); for (const std::string &LibName : LibNames) { - std::string TripleName = (Format == BinaryFormat::PTX) - ? "nvptx64-nvidia-cuda" - : "amdgcn-amd-amdhsa"; - std::string LibPath = (LibName.find("libspirv") != std::string::npos) ? (TC.getLibclcDir() + TripleName + "/" + LibName).str() From 4f051ae7397c08da32881063ec738ed953363eb0 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Wed, 24 Jun 2026 09:24:43 +0800 Subject: [PATCH 277/511] [mlir][gpu] Fix memref.dim folding with negative index (#205338) Fixes #205073. --- mlir/lib/Dialect/GPU/IR/GPUDialect.cpp | 7 ++++--- mlir/test/Dialect/GPU/canonicalize.mlir | 11 +++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp index a178bb453d86b..eaea0142e0438 100644 --- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp +++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp @@ -2323,9 +2323,10 @@ struct SimplifyDimOfAllocOp : public OpRewritePattern { if (!index) return failure(); + int64_t indexVal = index.value(); auto memrefType = llvm::dyn_cast(dimOp.getSource().getType()); - if (!memrefType || index.value() >= memrefType.getRank() || - !memrefType.isDynamicDim(index.value())) + if (!memrefType || indexVal < 0 || indexVal >= memrefType.getRank() || + !memrefType.isDynamicDim(indexVal)) return failure(); auto alloc = dimOp.getSource().getDefiningOp(); @@ -2333,7 +2334,7 @@ struct SimplifyDimOfAllocOp : public OpRewritePattern { return failure(); Value substituteOp = *(alloc.getDynamicSizes().begin() + - memrefType.getDynamicDimIndex(index.value())); + memrefType.getDynamicDimIndex(indexVal)); rewriter.replaceOp(dimOp, substituteOp); return success(); } diff --git a/mlir/test/Dialect/GPU/canonicalize.mlir b/mlir/test/Dialect/GPU/canonicalize.mlir index 7627af11c636c..9943c8b3a572e 100644 --- a/mlir/test/Dialect/GPU/canonicalize.mlir +++ b/mlir/test/Dialect/GPU/canonicalize.mlir @@ -252,6 +252,17 @@ func.func @out_of_bound_memref.dim(%arg : memref, %size: index) -> index { // ----- +// CHECK-LABEL: func @negative_memref_dim +// CHECK: %[[MEMREF:.*]] = memref.dim +// CHECK: return %[[MEMREF]] : index +func.func @negative_memref_dim(%arg: memref) -> index { + %c-2 = arith.constant -2 : index + %1 = memref.dim %arg, %c-2 : memref + return %1 : index +} + +// ----- + // CHECK-LABEL: func @simplify_gpu_launch func.func @simplify_gpu_launch() attributes {llvm.emit_c_interface} { %cst = arith.constant 0.000000e+00 : f32 From dba3717a7083d5ab58353735d7535d132a2f9504 Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Wed, 24 Jun 2026 09:25:13 +0800 Subject: [PATCH 278/511] [mlir] Simplify DimOp::fold by using `getConstantIndex`(NFC) (#205343) Refactor `DimOp::fold` in both memref and tensor dialects to use the existing `getConstantIndex()` helper instead of manually extracting the index via `IntegerAttr`. --- mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp | 18 ++++++++---------- mlir/lib/Dialect/Tensor/IR/TensorOps.cpp | 18 ++++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp index 91c5015a19c4d..0ef57172e380c 100644 --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -1112,7 +1112,7 @@ llvm::SmallBitVector SubViewOp::getDroppedDims() { OpFoldResult DimOp::fold(FoldAdaptor adaptor) { // All forms of folding require a known index. - auto index = llvm::dyn_cast_if_present(adaptor.getIndex()); + std::optional index = getConstantIndex(); if (!index) return {}; @@ -1123,40 +1123,38 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) { // Out of bound indices produce undefined behavior but are still valid IR. // Don't choke on them. - int64_t indexVal = index.getInt(); + int64_t indexVal = index.value(); if (indexVal < 0 || indexVal >= memrefType.getRank()) return {}; // Fold if the shape extent along the given index is known. - if (!memrefType.isDynamicDim(index.getInt())) { + if (!memrefType.isDynamicDim(indexVal)) { Builder builder(getContext()); - return builder.getIndexAttr(memrefType.getShape()[index.getInt()]); + return builder.getIndexAttr(memrefType.getShape()[indexVal]); } // The size at the given index is now known to be a dynamic size. - unsigned unsignedIndex = index.getValue().getZExtValue(); - // Fold dim to the size argument for an `AllocOp`, `ViewOp`, or `SubViewOp`. Operation *definingOp = getSource().getDefiningOp(); if (auto alloc = dyn_cast_or_null(definingOp)) return *(alloc.getDynamicSizes().begin() + - memrefType.getDynamicDimIndex(unsignedIndex)); + memrefType.getDynamicDimIndex(indexVal)); if (auto alloca = dyn_cast_or_null(definingOp)) return *(alloca.getDynamicSizes().begin() + - memrefType.getDynamicDimIndex(unsignedIndex)); + memrefType.getDynamicDimIndex(indexVal)); if (auto view = dyn_cast_or_null(definingOp)) return *(view.getDynamicSizes().begin() + - memrefType.getDynamicDimIndex(unsignedIndex)); + memrefType.getDynamicDimIndex(indexVal)); if (auto subview = dyn_cast_or_null(definingOp)) { // The result dim is dynamic (the static case was handled above). Dropped // dims always have static size 1, so dynamic source sizes are never // dropped and map in order to the dynamic result dims. Find the k-th // dynamic source size, where k is the dynamic dim index of the result dim. - unsigned dynamicResultDimIdx = memrefType.getDynamicDimIndex(unsignedIndex); + unsigned dynamicResultDimIdx = memrefType.getDynamicDimIndex(indexVal); unsigned dynamicIdx = 0; for (OpFoldResult size : subview.getMixedSizes()) { if (llvm::isa(size)) diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp index 091f8b8d528f8..637366a289ac9 100644 --- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp @@ -924,7 +924,7 @@ void DimOp::inferResultRangesFromOptional(ArrayRef argRanges, OpFoldResult DimOp::fold(FoldAdaptor adaptor) { // All forms of folding require a known index. - auto index = llvm::dyn_cast_if_present(adaptor.getIndex()); + std::optional index = getConstantIndex(); if (!index) return {}; @@ -935,14 +935,14 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) { // Out of bound indices produce undefined behavior but are still valid IR. // Don't choke on them. - int64_t indexVal = index.getInt(); + int64_t indexVal = index.value(); if (indexVal < 0 || indexVal >= tensorType.getRank()) return {}; // Fold if the shape extent along the given index is known. - if (!tensorType.isDynamicDim(index.getInt())) { + if (!tensorType.isDynamicDim(indexVal)) { Builder builder(getContext()); - return builder.getIndexAttr(tensorType.getShape()[index.getInt()]); + return builder.getIndexAttr(tensorType.getShape()[indexVal]); } Operation *definingOp = getSource().getDefiningOp(); @@ -953,11 +953,11 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) { llvm::cast(fromElements.getResult().getType()); // The case where the type encodes the size of the dimension is handled // above. - assert(ShapedType::isDynamic(resultType.getShape()[index.getInt()])); + assert(ShapedType::isDynamic(resultType.getShape()[indexVal])); // Find the operand of the fromElements that corresponds to this index. auto dynExtents = fromElements.getDynamicExtents().begin(); - for (auto dim : resultType.getShape().take_front(index.getInt())) + for (auto dim : resultType.getShape().take_front(indexVal)) if (ShapedType::isDynamic(dim)) dynExtents++; @@ -965,14 +965,12 @@ OpFoldResult DimOp::fold(FoldAdaptor adaptor) { } // The size at the given index is now known to be a dynamic size. - unsigned unsignedIndex = index.getValue().getZExtValue(); - if (auto sliceOp = dyn_cast_or_null(definingOp)) { // Fold only for non-rank reduced ops. For the rank-reduced version, rely on // `resolve-shaped-type-result-dims` pass. if (sliceOp.getType().getRank() == sliceOp.getSourceType().getRank() && - sliceOp.isDynamicSize(unsignedIndex)) { - return {sliceOp.getDynamicSize(unsignedIndex)}; + sliceOp.isDynamicSize(indexVal)) { + return {sliceOp.getDynamicSize(indexVal)}; } } From c1208859fa4d33bcc02b1d7cbe82017c3dcfe919 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 24 Jun 2026 09:50:57 +0800 Subject: [PATCH 279/511] [llubi] Add basic support for pointer comparisons (#205410) This patch was a part of https://github.com/llvm/llvm-project/pull/201170. I split the `icmp ptr` support from the original PR since I am worried it might not catch up for the LLVM 23 release (#201170 is blocked by #200672 for curating mixed provenance tests). I hope we can pick most of the low-hanging fruit exposed by fuzzers before the release. The released version should be able to run csmith-generated tests without obvious false positives or crashes. BTW, this patch doesn't respect the exact semantics of `icmp ptr` (i.e., truncating the address to the address width. The naming is a bit confusing...). Currently, we don't model external state in non-address bits of a pointer in llubi. So I think it is fine. --- llvm/test/tools/llubi/icmp_ptr.ll | 31 ++++++++++++++++++++++++++++ llvm/tools/llubi/lib/Interpreter.cpp | 7 ++++--- 2 files changed, 35 insertions(+), 3 deletions(-) create mode 100644 llvm/test/tools/llubi/icmp_ptr.ll diff --git a/llvm/test/tools/llubi/icmp_ptr.ll b/llvm/test/tools/llubi/icmp_ptr.ll new file mode 100644 index 0000000000000..7770d8fcf1833 --- /dev/null +++ b/llvm/test/tools/llubi/icmp_ptr.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6 +; RUN: llubi --verbose < %s 2>&1 | FileCheck %s + +target datalayout = "e-p:64:64:64" + +@global = global i32 0 + +define void @main() { + %alloc = alloca i32 + %icmp1 = icmp eq ptr @global, %alloc + %icmp2 = icmp ne ptr @global, %alloc + %icmp3 = icmp eq ptr %alloc, poison + %icmp4 = icmp sle ptr @global, %alloc + %neg = getelementptr i8, ptr @global, i64 -100 + %icmp5 = icmp samesign ule ptr %neg, @global + %icmp6 = icmp samesign ule ptr %alloc, @global + %icmp7 = icmp eq <4 x ptr> , + ret void +} +; CHECK: Entering function: main +; CHECK-NEXT: %alloc = alloca i32, align 4 => ptr 0x10 [alloc] +; CHECK-NEXT: %icmp1 = icmp eq ptr @global, %alloc => F +; CHECK-NEXT: %icmp2 = icmp ne ptr @global, %alloc => T +; CHECK-NEXT: %icmp3 = icmp eq ptr %alloc, poison => poison +; CHECK-NEXT: %icmp4 = icmp sle ptr @global, %alloc => T +; CHECK-NEXT: %neg = getelementptr i8, ptr @global, i64 -100 => ptr 0xFFFFFFFFFFFFFFA4 [@global + -100] +; CHECK-NEXT: %icmp5 = icmp samesign ule ptr %neg, @global => poison +; CHECK-NEXT: %icmp6 = icmp samesign ule ptr %alloc, @global => F +; CHECK-NEXT: %icmp7 = icmp eq <4 x ptr> , => { T, F, T, poison } +; CHECK-NEXT: ret void +; CHECK-NEXT: Exiting function: main diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp index f833b660f4e3a..61059371dcb58 100644 --- a/llvm/tools/llubi/lib/Interpreter.cpp +++ b/llvm/tools/llubi/lib/Interpreter.cpp @@ -2106,9 +2106,10 @@ class InstExecutor : public InstVisitor, visitBinOp(I, [&](const AnyValue &LHS, const AnyValue &RHS) -> AnyValue { if (LHS.isPoison() || RHS.isPoison()) return AnyValue::poison(); - // TODO: handle pointer comparison. - const APInt &LHSVal = LHS.asInteger(); - const APInt &RHSVal = RHS.asInteger(); + const APInt &LHSVal = + LHS.isPointer() ? LHS.asPointer().address() : LHS.asInteger(); + const APInt &RHSVal = + RHS.isPointer() ? RHS.asPointer().address() : RHS.asInteger(); if (I.hasSameSign() && LHSVal.isNonNegative() != RHSVal.isNonNegative()) return AnyValue::poison(); return AnyValue::boolean( From f63c340b8e266c1dfc4af9a59db054b97166c0fe Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 24 Jun 2026 03:59:17 +0200 Subject: [PATCH 280/511] [Clang][SYCL] Fix driver build after BoundArch struct migration (#22408) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes build breakage caused by 448b725bb78bc288c5f6e3ad27856c92902f6537 ("clang/Driver: Use struct type for BoundArch instead of StringRef"), which changed virtual signatures in `ToolChain.h` and related APIs. Update SYCL/CUDA driver code to use the new `BoundArch` struct type instead of `StringRef`/`const char*`: - SYCL.h/Cuda.h: update override signatures to match base class (`getDeviceLibs`, `getSupportedSanitizers`) - Driver.cpp: migrate `DeviceTargetInfo::BoundArch` field from `const char*` to `BoundArch`; fix all downstream uses including `appendSYCLDeviceLink`, `addSYCLDeviceLibs`, `CollectForEachInputs`, and `BuildJobsForActionNoCache` (fix stale `BoundArch` type-as-value references to use `BA` parameter); fix `nullptr`/`StringRef()` in `DeviceDependences::add`, `HostDependence`, and unbundling action `registerDependentActionInfo` calls - Clang.cpp: fix `getOffloadingArch()` → `.ArchName` for `StringRef` contexts; fix `doOnEachDependence` lambda param type; fix `getArgsForToolChain` calls with empty arch Co-authored-by: Claude Sonnet 4.6 --- clang/lib/Driver/Driver.cpp | 173 ++++++++++++++------------ clang/lib/Driver/ToolChains/Clang.cpp | 35 +++--- clang/lib/Driver/ToolChains/Cuda.cpp | 2 +- clang/lib/Driver/ToolChains/Cuda.h | 2 +- clang/lib/Driver/ToolChains/SYCL.cpp | 11 +- clang/lib/Driver/ToolChains/SYCL.h | 4 +- 6 files changed, 119 insertions(+), 108 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c245482a0204c..582a5099538b3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -4913,7 +4913,7 @@ class OffloadingActionBuilder final { for (unsigned I = 0; I < ToolChains.size(); ++I) { OpenMPDeviceActions.push_back(UA); UA->registerDependentActionInfo( - ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP); + ToolChains[I], /*BoundArch=*/BoundArch{}, Action::OFK_OpenMP); } return ABRT_Success; } @@ -4928,12 +4928,12 @@ class OffloadingActionBuilder final { "Toolchains and device action sizes do not match."); OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, Action::OFK_OpenMP); + /*BoundArch=*/BoundArch{}, Action::OFK_OpenMP); auto TC = ToolChains.begin(); for (Action *&A : OpenMPDeviceActions) { assert(isa(A)); OffloadAction::DeviceDependences DDep; - DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + DDep.add(*A, **TC, /*BoundArch=*/BoundArch{}, Action::OFK_OpenMP); A = C.MakeAction(HDep, DDep); ++TC; } @@ -4953,7 +4953,7 @@ class OffloadingActionBuilder final { auto TI = ToolChains.begin(); for (auto *A : OpenMPDeviceActions) { OffloadAction::DeviceDependences Dep; - Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + Dep.add(*A, **TI, /*BoundArch=*/BoundArch{}, Action::OFK_OpenMP); AL.push_back(C.MakeAction(Dep, A->getType())); ++TI; } @@ -4971,8 +4971,8 @@ class OffloadingActionBuilder final { auto *DeviceLinkAction = C.MakeAction(LI, types::TY_Image); OffloadAction::DeviceDependences DeviceLinkDeps; - DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, - Action::OFK_OpenMP); + DeviceLinkDeps.add(*DeviceLinkAction, **TC, /*BoundArch=*/BoundArch{}, + Action::OFK_OpenMP); AL.push_back(C.MakeAction(DeviceLinkDeps, DeviceLinkAction->getType())); ++TC; @@ -5049,10 +5049,10 @@ class OffloadingActionBuilder final { /// we keep them together under a struct for clarity. struct DeviceTargetInfo { DeviceTargetInfo(const ToolChain *TC, const char *BA) - : TC(TC), BoundArch(BA) {} + : TC(TC), BoundArch(BA ? StringRef(BA) : StringRef()) {} const ToolChain *TC; - const char *BoundArch; + ::clang::BoundArch BoundArch; }; SmallVector SYCLTargetInfoList; @@ -5410,7 +5410,8 @@ class OffloadingActionBuilder final { auto &LI = LinkInputEnum.value(); int Index = LinkInputEnum.index(); const ToolChain *TC = SYCLTargetInfoList[Index].TC; - const char *BoundArch = SYCLTargetInfoList[Index].BoundArch; + const ::clang::BoundArch &BoundArch = + SYCLTargetInfoList[Index].BoundArch; auto TripleIt = llvm::find_if(SYCLTripleList, [&](auto &SYCLTriple) { return SYCLTriple == TC->getTriple(); @@ -5458,7 +5459,10 @@ class OffloadingActionBuilder final { for (auto &TripleAndArchPair : GpuArchList) { if (ToolChains.front()->getTriple() == TripleAndArchPair.first) { Dep.add(*SYCLLinkBinary, *ToolChains.front(), - TripleAndArchPair.second, Action::OFK_SYCL); + BoundArch(TripleAndArchPair.second + ? StringRef(TripleAndArchPair.second) + : StringRef()), + Action::OFK_SYCL); } } @@ -5527,13 +5531,13 @@ class OffloadingActionBuilder final { // wrapping step to be performed solely on the host side of the toolchain. void appendSYCLDeviceLink(const ActionList &ListIndex, const ToolChain *TC, ActionList &DeviceLinkActions, - const char *BoundArch, bool SkipWrapper, + ::clang::BoundArch BA, bool SkipWrapper, bool AddOffloadAction = false) { auto addDeps = [&](Action *A, const ToolChain *TC, - const char *BoundArch) { + ::clang::BoundArch BA) { if (AddOffloadAction) { OffloadAction::DeviceDependences Deps; - Deps.add(*A, *TC, BoundArch, Action::OFK_SYCL); + Deps.add(*A, *TC, BA, Action::OFK_SYCL); DeviceLinkActions.push_back( C.MakeAction(Deps, A->getType())); } else @@ -5582,12 +5586,12 @@ class OffloadingActionBuilder final { if (InputType == types::TY_Archive) InputType = types::TY_Tempfilelist; auto *UA = C.MakeAction(Input, InputType); - UA->registerDependentActionInfo(TC, /*BoundArch=*/"", + UA->registerDependentActionInfo(TC, /*BoundArch=*/BoundArch{}, Action::OFK_SYCL); UA->setTargetString(TargetString.str()); // Add lists to the final link. - addDeps(UA, TC, ""); + addDeps(UA, TC, ::clang::BoundArch{}); } } if (!LinkObjects.empty()) { @@ -5692,7 +5696,7 @@ class OffloadingActionBuilder final { Action *NativeCPULib = nullptr; if (IsSPIR || IsNVPTX || IsAMDGCN || IsNativeCPU) SYCLDeviceLibLinked = addSYCLDeviceLibs( - TC, SYCLDeviceLibs, IsNativeCPU, NativeCPULib, BoundArch); + TC, SYCLDeviceLibs, IsNativeCPU, NativeCPULib, BA); JobAction *LinkSYCLLibs = C.MakeAction(SYCLDeviceLibs, types::TY_LLVM_BC); for (Action *FullLinkObject : FullLinkObjects) { @@ -5790,10 +5794,10 @@ class OffloadingActionBuilder final { FullDeviceLinkAction, types::TY_PP_Asm); auto *AsmAct = C.MakeAction(BackendAct, types::TY_Object); - addDeps(AsmAct, TC, BoundArch); + addDeps(AsmAct, TC, BA); auto *DeviceWrappingAction = C.MakeAction( PostLinkAction, types::TY_Object); - addDeps(DeviceWrappingAction, TC, BoundArch); + addDeps(DeviceWrappingAction, TC, BA); continue; } if ((IsNVPTX || IsAMDGCN) && @@ -5805,7 +5809,7 @@ class OffloadingActionBuilder final { // application. auto *WrapBitcodeAction = C.MakeAction( PostLinkAction, types::TY_Object, true); - addDeps(WrapBitcodeAction, TC, BoundArch); + addDeps(WrapBitcodeAction, TC, BA); } bool NoRDCFatStaticArchive = !IsRDC && @@ -5896,7 +5900,7 @@ class OffloadingActionBuilder final { } if (SkipWrapper) { // Wrapper step not requested. - addDeps(WrapperInputs.front(), TC, BoundArch); + addDeps(WrapperInputs.front(), TC, BA); continue; } @@ -5907,10 +5911,11 @@ class OffloadingActionBuilder final { if (IsSpirvAOT) { bool AddBA = (TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_gen && - BoundArch != nullptr); - addDeps(DeviceWrappingAction, TC, AddBA ? BoundArch : nullptr); + !BA.empty()); + addDeps(DeviceWrappingAction, TC, + AddBA ? BA : ::clang::BoundArch{}); } else { - addDeps(DeviceWrappingAction, TC, BoundArch); + addDeps(DeviceWrappingAction, TC, BA); } } } @@ -5918,7 +5923,7 @@ class OffloadingActionBuilder final { bool addSYCLDeviceLibs(const ToolChain *TC, ActionList &DeviceLinkObjects, bool isNativeCPU, Action *&NativeCPULib, - const char *BoundArch) { + ::clang::BoundArch BA) { int NumOfDeviceLibLinked = 0; SmallVector, 4> LibLocCandidates; SYCLInstallation.getSYCLDeviceLibPath(LibLocCandidates); @@ -5988,7 +5993,7 @@ class OffloadingActionBuilder final { const toolchains::CudaToolChain *CudaTC = static_cast(TC); std::string LibDeviceFile = - CudaTC->CudaInstallation.getLibDeviceFile(BoundArch); + CudaTC->CudaInstallation.getLibDeviceFile(BA.ArchName); if (!LibDeviceFile.empty()) { Arg *CudaDeviceLibInputArg = makeInputArg( Args, C.getDriver().getOpts(), Args.MakeArgString(LibDeviceFile)); @@ -6023,15 +6028,15 @@ class OffloadingActionBuilder final { "No toolchain found for this AOT input"); DA.add(*DeviceWrappingAction, **SYCLDeviceTC, - /*BoundArch=*/nullptr, Action::OFK_SYCL); + /*BoundArch=*/BoundArch{}, Action::OFK_SYCL); } } void addDeviceLinkDependencies(OffloadDepsJobAction *DA) override { unsigned I = 0; for (auto &TargetInfo : SYCLTargetInfoList) { - DA->registerDependentActionInfo(TargetInfo.TC, TargetInfo.BoundArch, - Action::OFK_SYCL); + DA->registerDependentActionInfo( + TargetInfo.TC, TargetInfo.BoundArch.ArchName, Action::OFK_SYCL); DeviceLinkerInputs[I++].push_back(DA); } } @@ -6165,9 +6170,9 @@ class OffloadingActionBuilder final { for (auto &SyclTarget : Targets) { std::string SectionTriple = SyclTarget.TC->getTriple().str(); - if (SyclTarget.BoundArch) { + if (!SyclTarget.BoundArch.empty()) { SectionTriple += "-"; - SectionTriple += SyclTarget.BoundArch; + SectionTriple += SyclTarget.BoundArch.ArchName; } // If any matching section is found, we are good. if (std::find(UniqueSections.begin(), UniqueSections.end(), @@ -6381,10 +6386,11 @@ class OffloadingActionBuilder final { const char *OffloadArch = TargetTripleArchPair.second; // Add an arch to the SYCLTargetInfoList only if it is not // already present in the list. - if (llvm::none_of( - SYCLTargetInfoList, [&](auto &DeviceTargetInfo) { - return OffloadArch == DeviceTargetInfo.BoundArch; - })) + if (llvm::none_of(SYCLTargetInfoList, + [&](auto &DeviceTargetInfo) { + return StringRef(OffloadArch) == + DeviceTargetInfo.BoundArch.ArchName; + })) SYCLTargetInfoList.emplace_back(*TCIt, OffloadArch); } } @@ -6480,7 +6486,7 @@ class OffloadingActionBuilder final { SmallVector> TCAndArchs; for (auto &TargetInfo : SYCLTargetInfoList) { const ToolChain *TC = TargetInfo.TC; - StringRef Arch(TargetInfo.BoundArch); + StringRef Arch(TargetInfo.BoundArch.ArchName); std::pair TCAndArch(TC, Arch); TCAndArchs.push_back(TCAndArch); } @@ -6637,7 +6643,7 @@ class OffloadingActionBuilder final { if (OffloadKind == (Action::OFK_Cuda | Action::OFK_SYCL)) { OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain(), - /*BoundArch=*/nullptr, Action::OFK_SYCL | Action::OFK_Cuda); + /*BoundArch=*/BoundArch{}, Action::OFK_SYCL | Action::OFK_Cuda); return C.MakeAction(HDep, DDeps); } @@ -6800,7 +6806,7 @@ class OffloadingActionBuilder final { OffloadAction::HostDependence HDep( *LA, *C.getSingleOffloadToolChain(), - /*BoundArch*/ nullptr, ActiveOffloadKinds); + /*BoundArch=*/BoundArch{}, ActiveOffloadKinds); auto *DA = C.MakeAction(HDep, types::TY_LLVM_BC); @@ -6837,7 +6843,7 @@ class OffloadingActionBuilder final { // This created host action has no originating input argument, therefore // needs to set its offloading kind directly. HA->propagateHostOffloadInfo(SB->getAssociatedOffloadKind(), - /*BoundArch=*/nullptr); + /*BoundArch=*/BoundArch{}); Inputs.push_back(HA); } } @@ -6856,7 +6862,7 @@ class OffloadingActionBuilder final { for (size_t i = 0; i < LinkerInputs.size(); ++i) { OffloadAction::HostDependence HDep( *LinkerInputs[i], *C.getSingleOffloadToolChain(), - nullptr, + BoundArch{}, InputArgToOffloadKindMap[HostActionToInputArgMap[LinkerInputs[i]]]); LinkerInputs[i] = C.MakeAction(HDep); } @@ -7211,7 +7217,7 @@ static void extractPCH(bool UseNewOffloadingDriver, C.MakeAction(A, A->getType()); UnbundlingHostAction->registerDependentActionInfo( C.getSingleOffloadToolChain(), - /*BoundArch=*/StringRef(), Action::OFK_Host); + /*BoundArch=*/BoundArch{}, Action::OFK_Host); OAB->addHostDependenceToDeviceActions(A, MainArg, Args); auto PL = types::getCompilationPhases(types::TY_PCH); OAB->addDeviceDependencesToHostAction(A, MainArg, phases::Compile, @@ -7219,7 +7225,7 @@ static void extractPCH(bool UseNewOffloadingDriver, AL.push_back(A); OffloadAction::DeviceDependences DDep; DDep.add(*UnbundlingHostAction, - *C.getSingleOffloadToolChain(), nullptr, + *C.getSingleOffloadToolChain(), BoundArch{}, C.getActiveOffloadKinds()); } else { AL.push_back(A); @@ -7895,8 +7901,9 @@ Driver::getOffloadArchs(Compilation &C, const llvm::opt::DerivedArgList &Args, // For SYCL offloading, we need to check the triple for NVPTX or AMDGPU. // The default arch is set for NVPTX if not provided. For AMDGPU, emit // an error as the user is responsible to set the arch. - if (auto *Arg = C.getArgsForToolChain(&TC, /*BoundArch=*/"", Kind) - .getLastArg(options::OPT_march_EQ)) { + if (auto *Arg = + C.getArgsForToolChain(&TC, /*BoundArch=*/BoundArch{}, Kind) + .getLastArg(options::OPT_march_EQ)) { Archs.insert(Arg->getValue()); } else { if (TC.getTriple().isNVPTX()) @@ -8031,8 +8038,8 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, C.MakeAction(PPActions, types::TY_PP_CXX); DDep.add(*PackagerAction, - *C.getSingleOffloadToolChain(), nullptr, - C.getActiveOffloadKinds()); + *C.getSingleOffloadToolChain(), + BoundArch{}, C.getActiveOffloadKinds()); DeviceActions.push_back(PackagerAction); continue; } @@ -8153,8 +8160,12 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, } // For SYCL based offloading, populate the device traits macros that are // used during compilation. - if (Kind == Action::OFK_SYCL) - tools::SYCL::populateSYCLDeviceTraitsMacrosArgs(C, Args, TCAndArchs); + if (Kind == Action::OFK_SYCL) { + SmallVector> TCAndArchsStr; + for (auto &P : TCAndArchs) + TCAndArchsStr.push_back({P.first, P.second.ArchName}); + tools::SYCL::populateSYCLDeviceTraitsMacrosArgs(C, Args, TCAndArchsStr); + } } // Now that we have all of the offload actions populated, we special case @@ -9125,7 +9136,7 @@ static std::string GetTriplePlusArchString(const ToolChain *TC, BoundArch BA, static void CollectForEachInputs( InputInfoList &InputInfos, const Action *SourceAction, const ToolChain *TC, - StringRef BoundArch, Action::OffloadKind TargetDeviceOffloadKind, + BoundArch BA, Action::OffloadKind TargetDeviceOffloadKind, const std::map, InputInfoList> &CachedResults, const ForEachWrappingAction *FEA) { @@ -9133,15 +9144,14 @@ static void CollectForEachInputs( // Search for the Input, if not in the cache assume actions were collapsed // so recurse. auto Lookup = CachedResults.find( - {Input, - GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}); + {Input, GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}); if (Lookup != CachedResults.end()) { if (!FEA->getSerialActions().count(Input)) { InputInfos.append(Lookup->second); } } else { - CollectForEachInputs(InputInfos, Input, TC, BoundArch, - TargetDeviceOffloadKind, CachedResults, FEA); + CollectForEachInputs(InputInfos, Input, TC, BA, TargetDeviceOffloadKind, + CachedResults, FEA); } } } @@ -9473,23 +9483,23 @@ InputInfoList Driver::BuildJobsForActionNoCache( // Check that the main action wasn't already processed. auto MainActionOutput = CachedResults.find( {FEA->getJobAction(), - GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}); + GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}); if (MainActionOutput != CachedResults.end()) { // The input was processed on behalf of another foreach. // Add entry in cache and return. - CachedResults[{FEA, GetTriplePlusArchString(TC, BoundArch, - TargetDeviceOffloadKind)}] = + CachedResults[{ + FEA, GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}] = MainActionOutput->second; return MainActionOutput->second; } // Build commands for the TFormInput then take any command added after as // needing a llvm-foreach wrapping. - BuildJobsForAction(C, FEA->getTFormInput(), TC, BoundArch, + BuildJobsForAction(C, FEA->getTFormInput(), TC, BA, /*AtTopLevel=*/false, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); unsigned OffsetIdx = C.getJobs().size(); - BuildJobsForAction(C, FEA->getJobAction(), TC, BoundArch, + BuildJobsForAction(C, FEA->getJobAction(), TC, BA, /*AtTopLevel=*/false, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); @@ -9510,11 +9520,13 @@ InputInfoList Driver::BuildJobsForActionNoCache( C.addCommand(std::move(Cmd)); continue; } - ActionResult = CachedResults.at( - {SourceAction, - GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}).front(); + ActionResult = + CachedResults + .at({SourceAction, + GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}) + .front(); InputInfoList InputInfos; - CollectForEachInputs(InputInfos, SourceAction, TC, BoundArch, + CollectForEachInputs(InputInfos, SourceAction, TC, BA, TargetDeviceOffloadKind, CachedResults, FEA); const Tool *Creator = &Cmd->getCreator(); StringRef ParallelJobs; @@ -9565,10 +9577,10 @@ InputInfoList Driver::BuildJobsForActionNoCache( // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa(A) || isa(A)); - InputInfos.append(BuildJobsForAction( - C, Input, JATC, DA ? DA->getOffloadingArch() : BA, - SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, - A->getOffloadingDeviceKind())); + InputInfos.append( + BuildJobsForAction(C, Input, JATC, DA ? DA->getOffloadingArch() : BA, + SubJobAtTopLevel, MultipleArchs, LinkingOutput, + CachedResults, A->getOffloadingDeviceKind())); } // Always use the first file input as the base input. @@ -9680,7 +9692,7 @@ InputInfoList Driver::BuildJobsForActionNoCache( // Now that we have all the results generated, select the one that should // be returned for the current depending action. std::pair ActionTC = { - A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + A, GetTriplePlusArchString(TC, BA, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); Result = CachedResults[ActionTC].front(); @@ -9695,22 +9707,22 @@ InputInfoList Driver::BuildJobsForActionNoCache( /*CreatePrefixForHost=*/true); auto CurI = InputInfo( DA, - GetNamedOutputPath(C, *DA, BaseInput, DI.DependentBoundArch, - /*AtTopLevel=*/false, - MultipleArchs || - DI.DependentOffloadKind == Action::OFK_HIP, - OffloadingPrefix), + GetNamedOutputPath( + C, *DA, BaseInput, BoundArch(DI.DependentBoundArch), + /*AtTopLevel=*/false, + MultipleArchs || DI.DependentOffloadKind == Action::OFK_HIP, + OffloadingPrefix), BaseInput); // Save the result. UnbundlingResults.push_back(CurI); // Get the unique string identifier for this dependence and cache the // result. - StringRef Arch = TargetDeviceOffloadKind == Action::OFK_HIP - ? DI.DependentOffloadKind == Action::OFK_Host - ? StringRef() - : DI.DependentBoundArch - : BoundArch; + BoundArch Arch = TargetDeviceOffloadKind == Action::OFK_HIP + ? (DI.DependentOffloadKind == Action::OFK_Host + ? BoundArch{} + : BoundArch(DI.DependentBoundArch)) + : BA; CachedResults[{A, GetTriplePlusArchString(DI.DependentToolChain, Arch, DI.DependentOffloadKind)}] = { @@ -9762,9 +9774,9 @@ InputInfoList Driver::BuildJobsForActionNoCache( BaseInput = C.getArgs().MakeArgString(std::string(BaseInput) + "-wrapper"); } - Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BA, - AtTopLevel, MultipleArchs, - OffloadingPrefix), + Result = InputInfo(A, + GetNamedOutputPath(C, *JA, BaseInput, BA, AtTopLevel, + MultipleArchs, OffloadingPrefix), BaseInput); // Enable time tracing capability for SYCL host and device compilations // as well. @@ -9854,8 +9866,7 @@ static bool HasPreprocessOutput(const Action &JA) { const char *Driver::CreateTempFile(Compilation &C, StringRef Prefix, StringRef Suffix, bool MultipleArchs, - StringRef BoundArchStr, - types::ID Type, + StringRef BoundArchStr, types::ID Type, bool NeedUniqueDirectory) const { SmallString<128> TmpName; Arg *A = C.getArgs().getLastArg(options::OPT_fcrash_diagnostics_dir); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 6e9ac4a03b3b5..4f5d9991f3b8e 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5651,7 +5651,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (IsSYCL) { if (IsSYCLDevice) { if (Triple.isNVPTX()) { - StringRef GPUArchName = JA.getOffloadingArch(); + StringRef GPUArchName = JA.getOffloadingArch().ArchName; // TODO: Once default arch is moved to at least SM_53, empty arch should // also result in the flag added. if (!GPUArchName.empty() && @@ -5780,7 +5780,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if ((Triple.isSPIR() && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen) || Triple.isNVPTX() || Triple.isAMDGCN()) { - StringRef Device = JA.getOffloadingArch(); + StringRef Device = JA.getOffloadingArch().ArchName; if (!Device.empty() && !SYCL::gen::getGenDeviceMacro(Device).empty()) { Macro = "-D"; @@ -10307,8 +10307,7 @@ void OffloadBundler::ConstructJob(Compilation &C, const JobAction &JA, TCArgs, CurDep->getOffloadingArch())) .normalize(llvm::Triple::CanonicalForm::FOUR_IDENT); } - if (CurKind != Action::OFK_Host && - !CurDep->getOffloadingArch().empty() && + if (CurKind != Action::OFK_Host && !CurDep->getOffloadingArch().empty() && !TCArgs.hasArg(options::OPT_fno_bundle_offload_arch)) { Triples += '-'; Triples += CurDep->getOffloadingArch().ArchName; @@ -10540,8 +10539,8 @@ static void addRunTimeWrapperOpts(Compilation &C, if (TT.getSubArch() == llvm::Triple::NoSubArch) { // Only store compile/link opts in the image descriptor for the SPIR-V // target; AOT compilation has already been performed otherwise. - const ArgList &Args = C.getArgsForToolChain(nullptr, StringRef(), - DeviceOffloadKind); + const ArgList &Args = + C.getArgsForToolChain(nullptr, BoundArch{}, DeviceOffloadKind); const ToolChain *HostTC = C.getSingleOffloadToolChain(); SYCLTC.AddSPIRVImpliedTargetArgs(TT, Args, BuildArgs, JA, *HostTC); SYCLTC.TranslateBackendTargetArgs(TT, Args, BuildArgs); @@ -10720,7 +10719,7 @@ void OffloadWrapper::ConstructJob(Compilation &C, const JobAction &JA, "Expected one device dependence!"); Action::OffloadKind DeviceKind = Action::OFK_None; const ToolChain *DeviceTC = nullptr; - OA->doOnEachDependence([&](Action *A, const ToolChain *TC, const char *) { + OA->doOnEachDependence([&](Action *A, const ToolChain *TC, BoundArch) { DeviceKind = A->getOffloadingDeviceKind(); DeviceTC = TC; }); @@ -10816,7 +10815,7 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, // Instead, we pass it like arch=pvc,arch=bdw, then // llvm-offload-binary joins them back to arch=pvc,bdw. SmallVector Archs; - Arch.split(Archs, ','); + Arch.ArchName.split(Archs, ','); if (Archs.size() > 1) { Parts[2] = "arch=" + llvm::join(Archs, ",arch="); } @@ -10853,12 +10852,12 @@ void OffloadPackager::ConstructJob(Compilation &C, const JobAction &JA, } }; const ArgList &Args = - C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_SYCL); + C.getArgsForToolChain(nullptr, BoundArch{}, Action::OFK_SYCL); const ToolChain *HostTC = C.getSingleOffloadToolChain(); const toolchains::SYCLToolChain &SYCLTC = static_cast(*TC); SYCLTC.AddSPIRVImpliedTargetArgs(TC->getTriple(), Args, BuildArgs, JA, - *HostTC, Arch); + *HostTC, Arch.ArchName); SYCLTC.TranslateBackendTargetArgs(TC->getTriple(), Args, BuildArgs); createArgString("compile-opts="); BuildArgs.clear(); @@ -10904,8 +10903,8 @@ void OffloadPackagerExtract::ConstructJob(Compilation &C, const JobAction &JA, // and associated offload kind. assert(Output.isFilename() && "Invalid output."); StringRef File = Output.getFilename(); - StringRef Arch = OffloadAction->getOffloadingArch() - ? OffloadAction->getOffloadingArch() + StringRef Arch = !OffloadAction->getOffloadingArch().empty() + ? OffloadAction->getOffloadingArch().ArchName : TCArgs.getLastArgValue(options::OPT_march_EQ); StringRef Kind = Action::GetOffloadKindName(OffloadAction->getOffloadingDeviceKind()); @@ -11181,7 +11180,7 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, // Handle -Xspirv-translator TC.TranslateTargetOpt( Triple, TCArgs, TranslatorArgs, options::OPT_Xspirv_translator, - options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch()); + options::OPT_Xspirv_translator_EQ, JA.getOffloadingArch().ArchName); } } for (auto I : Inputs) { @@ -11494,9 +11493,9 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // Add output file table file option assert(Output.isFilename() && "output must be a filename"); - StringRef Device = JA.getOffloadingArch(); + StringRef Device = JA.getOffloadingArch().ArchName; std::string OutputArg = Output.getFilename(); - if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && Device.data()) + if (T.getSubArch() == llvm::Triple::SPIRSubArch_gen && !Device.empty()) OutputArg = ("intel_gpu_" + Device + "," + OutputArg).str(); else if (T.getSubArch() == llvm::Triple::SPIRSubArch_x86_64) OutputArg = "spir64_x86_64," + OutputArg; @@ -11507,7 +11506,7 @@ void SYCLPostLink::ConstructJob(Compilation &C, const JobAction &JA, // Handle -Xdevice-post-link TC.TranslateTargetOpt(T, TCArgs, CmdArgs, options::OPT_Xdevice_post_link, options::OPT_Xdevice_post_link_EQ, - JA.getOffloadingArch()); + JA.getOffloadingArch().ArchName); addArgs(CmdArgs, TCArgs, {"-o", OutputArg}); @@ -11695,7 +11694,7 @@ void SpirvToIrWrapper::ConstructJob(Compilation &C, const JobAction &JA, TC.TranslateTargetOpt(getToolChain().getTriple(), TCArgs, CmdArgs, options::OPT_Xspirv_to_ir_wrapper, options::OPT_Xspirv_to_ir_wrapper_EQ, - JA.getOffloadingArch()); + JA.getOffloadingArch().ArchName); auto Cmd = std::make_unique( JA, *this, ResponseFileSupport::None(), @@ -12212,7 +12211,7 @@ void LinkerWrapper::ConstructJob(Compilation &C, const JobAction &JA, // Add option to enable creating of the .syclbin file. const ArgList &Args = - C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_SYCL); + C.getArgsForToolChain(nullptr, BoundArch{}, Action::OFK_SYCL); if (Arg *A = Args.getLastArg(options::OPT_fsyclbin_EQ)) CmdArgs.push_back( Args.MakeArgString("--syclbin=" + StringRef{A->getValue()})); diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp index 36bce6e9d3611..ebc7622560bda 100644 --- a/clang/lib/Driver/ToolChains/Cuda.cpp +++ b/clang/lib/Driver/ToolChains/Cuda.cpp @@ -1209,7 +1209,7 @@ void CudaToolChain::AddIAMCUIncludeArgs(const ArgList &Args, llvm::SmallVector CudaToolChain::getDeviceLibs( - const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch, + const llvm::opt::ArgList &DriverArgs, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const { StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ); std::string LibDeviceFile = CudaInstallation.getLibDeviceFile(GpuArch); diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h index d57fed2afd3c1..7ed6af4dd7109 100644 --- a/clang/lib/Driver/ToolChains/Cuda.h +++ b/clang/lib/Driver/ToolChains/Cuda.h @@ -198,7 +198,7 @@ class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain { llvm::opt::ArgStringList &CC1Args) const override; llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const override; SanitizerMask diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 2f24fc3828a7f..ab313f12dc2d5 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1102,7 +1102,7 @@ void SYCL::gen::BackendCompiler::ConstructJob(Compilation &C, // The next line prevents ocloc from modifying the image name CmdArgs.push_back("-output_no_suffix"); CmdArgs.push_back("-spirv_input"); - StringRef Device = JA.getOffloadingArch(); + StringRef Device = JA.getOffloadingArch().ArchName; // Add -Xsycl-target* options. const toolchains::SYCLToolChain &TC = @@ -1517,7 +1517,8 @@ void SYCLToolChain::addClangTargetOptions( return; llvm::SmallVector BCLibs; - BCLibs.append(SYCLToolChain::getDeviceLibs(DriverArgs, BA, DeviceOffloadingKind)); + BCLibs.append( + SYCLToolChain::getDeviceLibs(DriverArgs, BA, DeviceOffloadingKind)); for (const auto &BCFile : BCLibs) { CC1Args.push_back(BCFile.ShouldInternalize ? "-mlink-builtin-bitcode" : "-mlink-bitcode-file"); @@ -1788,7 +1789,7 @@ void SYCLToolChain::AddSPIRVImpliedTargetArgs(const llvm::Triple &Triple, // For GEN (spir64_gen) we have implied -device settings given usage // of intel_gpu_ as a target. Handle those here, and also check that no // other -device was passed, as that is a conflict. - StringRef DepInfo = JA.getOffloadingArch(); + StringRef DepInfo = JA.getOffloadingArch().ArchName; if (!DepInfo.empty()) { ArgStringList TargArgs; Args.AddAllArgValues(TargArgs, options::OPT_Xs, options::OPT_Xs_separate); @@ -1985,7 +1986,7 @@ void SYCLToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args, llvm::SmallVector SYCLToolChain::getDeviceLibs( - const llvm::opt::ArgList &DriverArgs, llvm::StringRef BoundArch, + const llvm::opt::ArgList &DriverArgs, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const { llvm::SmallVector BCLibs; @@ -2020,7 +2021,7 @@ SYCLToolChain::getDeviceLibs( } SanitizerMask SYCLToolChain::getSupportedSanitizers( - StringRef /*BoundArch*/, Action::OffloadKind /*DeviceOffloadKind*/) const { + BoundArch /*BA*/, Action::OffloadKind /*DeviceOffloadKind*/) const { return SanitizerKind::Address | SanitizerKind::Memory | SanitizerKind::Thread; } diff --git a/clang/lib/Driver/ToolChains/SYCL.h b/clang/lib/Driver/ToolChains/SYCL.h index 1d3af0308d6b5..777c1193c260f 100644 --- a/clang/lib/Driver/ToolChains/SYCL.h +++ b/clang/lib/Driver/ToolChains/SYCL.h @@ -191,11 +191,11 @@ class LLVM_LIBRARY_VISIBILITY SYCLToolChain : public ToolChain { // Provides a vector of device library names including the full path that are // associated with the offloading kind. llvm::SmallVector - getDeviceLibs(const llvm::opt::ArgList &Args, llvm::StringRef BoundArch, + getDeviceLibs(const llvm::opt::ArgList &Args, BoundArch BA, const Action::OffloadKind DeviceOffloadingKind) const override; SanitizerMask - getSupportedSanitizers(StringRef BoundArch, + getSupportedSanitizers(BoundArch BA, Action::OffloadKind DeviceOffloadKind) const override; protected: From 9fa8669d89d37c2200b3a5d1dd21d0b0355b7196 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Tue, 23 Jun 2026 19:17:54 -0700 Subject: [PATCH 281/511] [clang] Exclude EmptyRecord when calculating larger CXX records (#205040) To match with GCC: https://godbolt.org/z/KPKGhhenK Fixes: #203760 Assisted-by: Claude Sonnet 4.6 --- clang/docs/ReleaseNotes.rst | 2 ++ clang/lib/CodeGen/Targets/X86.cpp | 1 + clang/test/CodeGen/X86/avx-cxx-record.cpp | 18 ++++++++++++++++++ 3 files changed, 21 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 0ff8e8f5afd3c..8bb17755b28f5 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -113,6 +113,8 @@ ABI Changes in This Version compilers. On most targets this is not a breaking change because ``fastcc`` and the platform C calling convention agree for ``void(ptr)``. It is an ABI break on i686, MIPS O32, PowerPC64 ELFv1, and Lanai. +- Fixed incorrect struct return when single large vector (256/512-bit) used on + x86-64 targets. (#GH203760) The bug was introduced since Clang 21. (#GH120670) AST Dumping Potentially Breaking Changes ---------------------------------------- diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index dbe4d656aabc5..abf903647336d 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -2085,6 +2085,7 @@ void X86_64ABIInfo::classify(QualType Ty, uint64_t OffsetBase, Class &Lo, Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); if (returnCXXRecordGreaterThan128InMem() && + !isEmptyRecord(getContext(), I.getType(), true) && (Size > 128 && (Size != getContext().getTypeSize(I.getType()) || Size > getNativeVectorSizeForAVXABI(AVXLevel)))) { // The only case a 256(or 512)-bit wide vector could be used to return diff --git a/clang/test/CodeGen/X86/avx-cxx-record.cpp b/clang/test/CodeGen/X86/avx-cxx-record.cpp index b20bcdd616a43..e64cef32e78ec 100644 --- a/clang/test/CodeGen/X86/avx-cxx-record.cpp +++ b/clang/test/CodeGen/X86/avx-cxx-record.cpp @@ -46,3 +46,21 @@ YMM2 bar() { ((YMM1<1>*)&result)->x = UInt64x4{5, 6, 7, 8}; return result; } + +// Test that empty base classes do not prevent structs with a single wide +// vector member from being passed/returned in registers (issue #203760). +struct EmptyBase {}; + +struct YMMWithEmptyBase : EmptyBase { + UInt64x4 x; +}; + +// A struct with a single 256-bit vector and an empty base should use registers, +// matching the behavior with no base class. +// CHECK: define{{.*}} <4 x i64> @_Z18ymm_empty_base_retv() +// CLANG-20: define{{.*}} <4 x i64> @_Z18ymm_empty_base_retv() +YMMWithEmptyBase ymm_empty_base_ret() { return {}; } + +// CHECK: define{{.*}} i64 @_Z19ymm_empty_base_pass16YMMWithEmptyBase(<4 x i64> +// CLANG-20: define{{.*}} i64 @_Z19ymm_empty_base_pass16YMMWithEmptyBase(<4 x i64> +unsigned long long ymm_empty_base_pass(YMMWithEmptyBase x) { return x.x[0]; } From 1f0799cc766c2f1f00500d43d82d14f06153ae7e Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Wed, 24 Jun 2026 02:27:38 +0000 Subject: [PATCH 282/511] [Github] Bump release-binaries python version (#179287) This makes it more consistent with the rest of the repository. --- .github/workflows/release-binaries.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index f900d59859ae8..6fdf5e8c14cb8 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -230,13 +230,13 @@ jobs: # get changed unexpectedly. - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: - python-version: '3.11.9' + python-version: '3.14.6' # For some reason this is needed on Windows or else the build system can't find python3.lib. - name: Setup Python library path if: runner.os == 'Windows' run: | - echo "LIB=$env:LIB;C:\hostedtoolcache\windows\Python\3.11.9\$($env:RUNNER_ARCH.ToLower())\libs" >> $env:GITHUB_ENV + echo "LIB=$env:LIB;C:\hostedtoolcache\windows\Python\3.14.6\$($env:RUNNER_ARCH.ToLower())\libs" >> $env:GITHUB_ENV - name: Setup crlf if: runner.os == 'Windows' From 20cf0468128bc370bb680347dc55d0c0569cbc1c Mon Sep 17 00:00:00 2001 From: AZero13 Date: Tue, 23 Jun 2026 22:28:12 -0400 Subject: [PATCH 283/511] [AArch64] Add final missing instructions to sForm (#167518) Fix missing opcodes in table of flag-setting instructions. --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 38 ++- .../AArch64/peephole-substitute-cmp-adcs.mir | 286 ++++++++++++++++++ 2 files changed, 312 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/peephole-substitute-cmp-adcs.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 57ea2d2f2f992..d5d1e17216e63 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2078,12 +2078,16 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::ADDSXri: case AArch64::ADDSWrx: case AArch64::ADDSXrx: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: case AArch64::SUBSWrr: case AArch64::SUBSWri: case AArch64::SUBSWrx: + case AArch64::SUBSWrs: case AArch64::SUBSXrr: case AArch64::SUBSXri: case AArch64::SUBSXrx: + case AArch64::SUBSXrs: case AArch64::ANDSWri: case AArch64::ANDSWrr: case AArch64::ANDSWrs: @@ -2094,6 +2098,10 @@ static unsigned sForm(MachineInstr &Instr) { case AArch64::BICSXrr: case AArch64::BICSWrs: case AArch64::BICSXrs: + case AArch64::ADCSWr: + case AArch64::ADCSXr: + case AArch64::SBCSWr: + case AArch64::SBCSXr: return Instr.getOpcode(); case AArch64::ADDWrr: @@ -2108,6 +2116,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::ADDSWrx; case AArch64::ADDXrx: return AArch64::ADDSXrx; + case AArch64::ADDWrs: + return AArch64::ADDSWrs; + case AArch64::ADDXrs: + return AArch64::ADDSXrs; case AArch64::ADCWr: return AArch64::ADCSWr; case AArch64::ADCXr: @@ -2124,6 +2136,10 @@ static unsigned sForm(MachineInstr &Instr) { return AArch64::SUBSWrx; case AArch64::SUBXrx: return AArch64::SUBSXrx; + case AArch64::SUBWrs: + return AArch64::SUBSWrs; + case AArch64::SUBXrs: + return AArch64::SUBSXrs; case AArch64::SBCWr: return AArch64::SBCSWr; case AArch64::SBCXr: @@ -2315,13 +2331,13 @@ static bool isANDOpcode(MachineInstr &MI) { /// MI and CmpInstr /// or if MI opcode is not the S form there must be neither defs of flags /// nor uses of flags between MI and CmpInstr. -/// - and, if C/V flags are not used after CmpInstr -/// or if N flag is used but MI produces poison value if signed overflow -/// occurs. +/// - and, C is not used after CmpInstr; CmpInstr's C is from adds/subs #0 on +/// SrcReg and can differ from MI (e.g. carry out of ADCS/SBCS). +/// - and, V is not used after CmpInstr unless MI is AND/BIC (V cleared) or MI +/// has NoSWrap (overflow is poison and the fold is still safe). static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI) { - // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction - // that may or may not set flags. + // MI is an opcode sForm maps (add/sub/adc/sbc/and/bic and their S forms). assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END); const unsigned CmpOpcode = CmpInstr.getOpcode(); @@ -2336,13 +2352,11 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, if (!NZVCUsed || NZVCUsed->C) return false; - // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either - // '%vreg = add ...' or '%vreg = sub ...'. - // Condition flag V is used to indicate signed overflow. - // 1) MI and CmpInstr set N and V to the same value. - // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when - // signed overflow occurs, so CmpInstr could still be simplified away. - // Note that Ands and Bics instructions always clear the V flag. + // CmpInstr is ADDS/SUBS with immediate 0 on SrcReg (compare SrcReg to zero). + // After the fold, users see NZCV from MI (or its S form), not from CmpInstr. + // N/Z match CmpInstr for the value in SrcReg; C/V need not match in general + // (e.g. ADCS vs adds #0), so we require C unused after CmpInstr and gate V + // as below. NoSWrap makes signed overflow poison; AND/BIC clear V. if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI)) return false; diff --git a/llvm/test/CodeGen/AArch64/peephole-substitute-cmp-adcs.mir b/llvm/test/CodeGen/AArch64/peephole-substitute-cmp-adcs.mir new file mode 100644 index 0000000000000..2a73446b201fb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/peephole-substitute-cmp-adcs.mir @@ -0,0 +1,286 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64-unknown-linux -run-pass=peephole-opt -verify-machineinstrs %s -o - | FileCheck %s +# RUN: llc -mtriple=aarch64-unknown-linux -passes=peephole-opt %s -o - | FileCheck %s +# +# Exercise sForm-driven substituteCmpToZero for: +# - ADCSXr / ADCSWr (carry chain + redundant ADDS*ri #0) +# - SBCSXr / SBCSWr (borrow chain + redundant ADDS*ri #0) +# - ADDWrs / ADDXrs / SUBWrs / SUBXrs (shifted reg without flags -> *S*rs; +# redundant ADDS*ri #0 or SUBS*ri #0) + +--- +name: peephole_substitute_cmp_adcs +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr64 } + - { id: 4, class: gpr64 } + - { id: 5, class: gpr64common } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr64 } +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: peephole_substitute_cmp_adcs + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[ADDSXrr:%[0-9]+]]:gpr64 = ADDSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCSXr:%[0-9]+]]:gpr64common = ADCSXr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64 = COPY $x2 + %3:gpr64 = COPY $x3 + %4:gpr64 = ADDSXrr %0, %1, implicit-def $nzcv + %5:gpr64common = ADCSXr %2, %3, implicit-def $nzcv, implicit $nzcv + dead %7:gpr64 = ADDSXri killed %5:gpr64common, 0, 0, implicit-def $nzcv + %6:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_adcswr +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32common } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3 + + ; CHECK-LABEL: name: peephole_substitute_cmp_adcswr + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3 + ; CHECK-NEXT: [[ADDSWrr:%[0-9]+]]:gpr32 = ADDSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[ADCSWr:%[0-9]+]]:gpr32common = ADCSWr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32 = COPY $w2 + %3:gpr32 = COPY $w3 + %4:gpr32 = ADDSWrr %0, %1, implicit-def $nzcv + %5:gpr32common = ADCSWr %2, %3, implicit-def $nzcv, implicit $nzcv + dead %7:gpr32 = ADDSWri killed %5:gpr32common, 0, 0, implicit-def $nzcv + %6:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_sbcsxr +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64 } + - { id: 3, class: gpr64 } + - { id: 4, class: gpr64 } + - { id: 5, class: gpr64common } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr64 } +body: | + bb.0: + liveins: $x0, $x1, $x2, $x3 + + ; CHECK-LABEL: name: peephole_substitute_cmp_sbcsxr + ; CHECK: liveins: $x0, $x1, $x2, $x3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x3 + ; CHECK-NEXT: [[SUBSXrr:%[0-9]+]]:gpr64 = SUBSXrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSXr:%[0-9]+]]:gpr64common = SBCSXr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64 = COPY $x2 + %3:gpr64 = COPY $x3 + %4:gpr64 = SUBSXrr %0, %1, implicit-def $nzcv + %5:gpr64common = SBCSXr %2, %3, implicit-def $nzcv, implicit $nzcv + dead %7:gpr64 = ADDSXri killed %5:gpr64common, 0, 0, implicit-def $nzcv + %6:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_addwrs +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32common } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: peephole_substitute_cmp_addwrs + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[ADDSWrs:%[0-9]+]]:gpr32common = ADDSWrs [[COPY]], [[COPY1]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32common = ADDWrs %0, %1, 0 + dead %4:gpr32 = ADDSWri killed %2:gpr32common, 0, 0, implicit-def $nzcv + %3:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_addxrs +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64common } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr64 } +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: peephole_substitute_cmp_addxrs + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[ADDSXrs:%[0-9]+]]:gpr64common = ADDSXrs [[COPY]], [[COPY1]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64common = ADDXrs %0, %1, 0 + dead %4:gpr64 = ADDSXri killed %2:gpr64common, 0, 0, implicit-def $nzcv + %3:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_subwrs +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32common } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } +body: | + bb.0: + liveins: $w0, $w1 + + ; CHECK-LABEL: name: peephole_substitute_cmp_subwrs + ; CHECK: liveins: $w0, $w1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[SUBSWrs:%[0-9]+]]:gpr32common = SUBSWrs [[COPY]], [[COPY1]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32common = SUBWrs %0, %1, 0 + dead %4:gpr32 = SUBSWri killed %2:gpr32common, 0, 0, implicit-def $nzcv + %3:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_subxrs +tracksRegLiveness: true +registers: + - { id: 0, class: gpr64 } + - { id: 1, class: gpr64 } + - { id: 2, class: gpr64common } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr64 } +body: | + bb.0: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: peephole_substitute_cmp_subxrs + ; CHECK: liveins: $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 + ; CHECK-NEXT: [[SUBSXrs:%[0-9]+]]:gpr64common = SUBSXrs [[COPY]], [[COPY1]], 0, implicit-def $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr64 = COPY $x0 + %1:gpr64 = COPY $x1 + %2:gpr64common = SUBXrs %0, %1, 0 + dead %4:gpr64 = SUBSXri killed %2:gpr64common, 0, 0, implicit-def $nzcv + %3:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %3 + RET_ReallyLR implicit $w0 +... +--- +name: peephole_substitute_cmp_sbcswr +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32 } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } + - { id: 5, class: gpr32common } + - { id: 6, class: gpr32 } + - { id: 7, class: gpr32 } +body: | + bb.0: + liveins: $w0, $w1, $w2, $w3 + + ; CHECK-LABEL: name: peephole_substitute_cmp_sbcswr + ; CHECK: liveins: $w0, $w1, $w2, $w3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY $w2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr32 = COPY $w3 + ; CHECK-NEXT: [[SUBSWrr:%[0-9]+]]:gpr32 = SUBSWrr [[COPY]], [[COPY1]], implicit-def $nzcv + ; CHECK-NEXT: [[SBCSWr:%[0-9]+]]:gpr32common = SBCSWr [[COPY2]], [[COPY3]], implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + ; CHECK-NEXT: $w0 = COPY [[CSINCWr]] + ; CHECK-NEXT: RET_ReallyLR implicit $w0 + %0:gpr32 = COPY $w0 + %1:gpr32 = COPY $w1 + %2:gpr32 = COPY $w2 + %3:gpr32 = COPY $w3 + %4:gpr32 = SUBSWrr %0, %1, implicit-def $nzcv + %5:gpr32common = SBCSWr %2, %3, implicit-def $nzcv, implicit $nzcv + dead %7:gpr32 = ADDSWri killed %5:gpr32common, 0, 0, implicit-def $nzcv + %6:gpr32 = CSINCWr $wzr, $wzr, 0, implicit $nzcv + $w0 = COPY %6 + RET_ReallyLR implicit $w0 +... From 554c610f51ceee9127a6fa228d282275a66655f1 Mon Sep 17 00:00:00 2001 From: Jin Huang Date: Tue, 23 Jun 2026 20:28:28 -0700 Subject: [PATCH 284/511] =?UTF-8?q?Reapply=20[AA]=20Improve=20precision=20?= =?UTF-8?q?for=20monotonic=20atomic=20load/stor=E2=80=A6=20(#195015)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reverts https://github.com/llvm/llvm-project/pull/173135 and and add two new IR tests to demonstrate the impact of different atomic orderings on Dead Store Elimination(DSE). This reverts commit c8941df6a1e4ed5e1ba3287985a60e1d7512c250. Co-authored-by: Aiden Grossman --- llvm/lib/Analysis/AliasAnalysis.cpp | 6 +- .../DeadStoreElimination/atomic-todo.ll | 23 ----- .../Transforms/DeadStoreElimination/atomic.ll | 93 +++++++++++++------ 3 files changed, 69 insertions(+), 53 deletions(-) delete mode 100644 llvm/test/Transforms/DeadStoreElimination/atomic-todo.ll diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index a1e4a18718b52..c7984c35e87ae 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -493,8 +493,7 @@ ModRefInfo AAResults::getModRefInfo(const LoadInst *L, AliasResult AR = alias(MemoryLocation::get(L), Loc, AAQI, L); if (AR == AliasResult::NoAlias) { // Synchronization effects may affect locations that do not alias. - // FIXME: Should be isStrongerThanMonotonic(). - if (isStrongerThanUnordered(L->getOrdering())) + if (isStrongerThanMonotonic(L->getOrdering())) return getSyncEffects(this, Loc, AAQI); return ModRefInfo::NoModRef; } @@ -517,8 +516,7 @@ ModRefInfo AAResults::getModRefInfo(const StoreInst *S, // specified memory cannot be modified by the store. if (AR == AliasResult::NoAlias) { // Synchronization effects may affect locations that do not alias. - // FIXME: Should be isStrongerThanMonotonic(). - if (isStrongerThanUnordered(S->getOrdering())) + if (isStrongerThanMonotonic(S->getOrdering())) return getSyncEffects(this, Loc, AAQI); return ModRefInfo::NoModRef; } diff --git a/llvm/test/Transforms/DeadStoreElimination/atomic-todo.ll b/llvm/test/Transforms/DeadStoreElimination/atomic-todo.ll deleted file mode 100644 index 1c160442f8579..0000000000000 --- a/llvm/test/Transforms/DeadStoreElimination/atomic-todo.ll +++ /dev/null @@ -1,23 +0,0 @@ -; XFAIL: * -; RUN: opt -passes=dse -S < %s | FileCheck %s - -target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" -target triple = "x86_64-apple-macosx10.7.0" - -; Basic correctness tests for atomic stores. -; Note that it turns out essentially every transformation DSE does is legal on -; atomic ops, just some transformations are not allowed across release-acquire pairs. - -@x = common global i32 0, align 4 -@y = common global i32 0, align 4 - -; DSE across monotonic load (allowed as long as the eliminated store isUnordered) -define i32 @test9() { -; CHECK-LABEL: test9 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 - store i32 0, ptr @x - %x = load atomic i32, ptr @y monotonic, align 4 - store i32 1, ptr @x - ret i32 %x -} diff --git a/llvm/test/Transforms/DeadStoreElimination/atomic.ll b/llvm/test/Transforms/DeadStoreElimination/atomic.ll index 55b9384e88d93..41c444595fa8a 100644 --- a/llvm/test/Transforms/DeadStoreElimination/atomic.ll +++ b/llvm/test/Transforms/DeadStoreElimination/atomic.ll @@ -37,9 +37,21 @@ define void @test4() { ret void } -; DSE unordered store overwriting non-atomic store (allowed) +; DSE doesn't remove monotonic store. define void @test5() { ; CHECK-LABEL: @test5( +; CHECK-NEXT: store atomic i32 2, ptr @x monotonic, align 4 +; CHECK-NEXT: store i32 1, ptr @x, align 4 +; CHECK-NEXT: ret void +; + store atomic i32 2, ptr @x monotonic, align 4 + store i32 1, ptr @x + ret void +} + +; DSE unordered store overwriting non-atomic store (allowed) +define void @test6() { +; CHECK-LABEL: @test6( ; CHECK-NEXT: store atomic i32 1, ptr @x unordered, align 4 ; CHECK-NEXT: ret void ; @@ -49,8 +61,8 @@ define void @test5() { } ; DSE no-op unordered atomic store (allowed) -define void @test6() { -; CHECK-LABEL: @test6( +define void @test7() { +; CHECK-LABEL: @test7( ; CHECK-NEXT: ret void ; %x = load atomic i32, ptr @x unordered, align 4 @@ -60,8 +72,8 @@ define void @test6() { ; DSE seq_cst store (be conservative; DSE doesn't have infrastructure ; to reason about atomic operations). -define void @test7() { -; CHECK-LABEL: @test7( +define void @test8() { +; CHECK-LABEL: @test8( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: store atomic i32 0, ptr [[A]] seq_cst, align 4 ; CHECK-NEXT: ret void @@ -73,8 +85,8 @@ define void @test7() { ; DSE and seq_cst load (be conservative; DSE doesn't have infrastructure ; to reason about atomic operations). -define i32 @test8() { -; CHECK-LABEL: @test8( +define i32 @test9() { +; CHECK-LABEL: @test9( ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4 ; CHECK-NEXT: call void @randomop(ptr [[A]]) ; CHECK-NEXT: store i32 0, ptr [[A]], align 4 @@ -88,11 +100,40 @@ define i32 @test8() { ret i32 %x } +; DSE across monotonic load (allowed if the monotonic load's address is NoAlias) +define i32 @test10() { +; CHECK-LABEL: @test10( +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @y monotonic, align 4 +; CHECK-NEXT: store i32 1, ptr @x, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + store i32 0, ptr @x + %x = load atomic i32, ptr @y monotonic, align 4 + store i32 1, ptr @x + ret i32 %x +} + +; DSE across monotonic load (blocked if the atomic load's address isn't NoAlias) +define i32 @test11(ptr %ptr) { +; CHECK-LABEL: @test11( +; CHECK-NEXT: store i32 0, ptr @x, align 4 +; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr [[PTR:%.*]] monotonic, align 4 +; CHECK-NEXT: store i32 1, ptr @x, align 4 +; CHECK-NEXT: ret i32 [[X]] +; + store i32 0, ptr @x + %x = load atomic i32, ptr %ptr monotonic, align 4 + store i32 1, ptr @x + ret i32 %x +} + ; DSE across monotonic store (allowed as long as the eliminated store isUnordered) -define void @test10() { -; CHECK-LABEL: test10 -; CHECK-NOT: store i32 0 -; CHECK: store i32 1 +define void @test12() { +; CHECK-LABEL: @test12( +; CHECK-NEXT: store atomic i32 42, ptr @y monotonic, align 4 +; CHECK-NEXT: store i32 1, ptr @x, align 4 +; CHECK-NEXT: ret void +; store i32 0, ptr @x store atomic i32 42, ptr @y monotonic, align 4 store i32 1, ptr @x @@ -100,8 +141,8 @@ define void @test10() { } ; DSE across monotonic load (forbidden since the eliminated store is atomic) -define i32 @test11() { -; CHECK-LABEL: @test11( +define i32 @test13() { +; CHECK-LABEL: @test13( ; CHECK-NEXT: store atomic i32 0, ptr @x monotonic, align 4 ; CHECK-NEXT: [[X:%.*]] = load atomic i32, ptr @y monotonic, align 4 ; CHECK-NEXT: store atomic i32 1, ptr @x monotonic, align 4 @@ -114,8 +155,8 @@ define i32 @test11() { } ; DSE across monotonic store (forbidden since the eliminated store is atomic) -define void @test12() { -; CHECK-LABEL: @test12( +define void @test14() { +; CHECK-LABEL: @test14( ; CHECK-NEXT: store atomic i32 0, ptr @x monotonic, align 4 ; CHECK-NEXT: store atomic i32 42, ptr @y monotonic, align 4 ; CHECK-NEXT: store atomic i32 1, ptr @x monotonic, align 4 @@ -150,7 +191,7 @@ define i32 @test15() { define i64 @test_atomicrmw_0() { ; CHECK-LABEL: @test_atomicrmw_0( ; CHECK-NEXT: store i64 1, ptr @z, align 8 -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 monotonic +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 monotonic, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; store i64 1, ptr @z @@ -162,7 +203,7 @@ define i64 @test_atomicrmw_0() { define i64 @test_atomicrmw_1() { ; CHECK-LABEL: @test_atomicrmw_1( ; CHECK-NEXT: store i64 1, ptr @z, align 8 -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 acq_rel +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 acq_rel, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; store i64 1, ptr @z @@ -173,7 +214,7 @@ define i64 @test_atomicrmw_1() { ; Monotonic atomicrmw should not block eliminating no-aliasing stores. define i64 @test_atomicrmw_2() { ; CHECK-LABEL: @test_atomicrmw_2( -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @a, i64 -1 monotonic +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @a, i64 -1 monotonic, align 8 ; CHECK-NEXT: store i64 2, ptr @z, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -187,7 +228,7 @@ define i64 @test_atomicrmw_2() { define i64 @test_atomicrmw_3() { ; CHECK-LABEL: @test_atomicrmw_3( ; CHECK-NEXT: store i64 1, ptr @z, align 8 -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @a, i64 -1 release +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @a, i64 -1 release, align 8 ; CHECK-NEXT: store i64 2, ptr @z, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -201,7 +242,7 @@ define i64 @test_atomicrmw_3() { define i64 @test_atomicrmw_4(ptr %ptr) { ; CHECK-LABEL: @test_atomicrmw_4( ; CHECK-NEXT: store i64 1, ptr @z, align 8 -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr [[PTR:%.*]], i64 -1 monotonic +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr [[PTR:%.*]], i64 -1 monotonic, align 8 ; CHECK-NEXT: store i64 2, ptr @z, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -215,7 +256,7 @@ define i64 @test_atomicrmw_4(ptr %ptr) { define i64 @test_atomicrmw_5() { ; CHECK-LABEL: @test_atomicrmw_5( ; CHECK-NEXT: store i64 1, ptr @z, align 8 -; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 monotonic +; CHECK-NEXT: [[RES:%.*]] = atomicrmw add ptr @z, i64 -1 monotonic, align 8 ; CHECK-NEXT: store i64 2, ptr @z, align 8 ; CHECK-NEXT: ret i64 [[RES]] ; @@ -229,7 +270,7 @@ define i64 @test_atomicrmw_5() { define { i32, i1} @test_cmpxchg_1() { ; CHECK-LABEL: @test_cmpxchg_1( ; CHECK-NEXT: store i32 1, ptr @x, align 4 -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @x, i32 10, i32 20 seq_cst monotonic +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @x, i32 10, i32 20 seq_cst monotonic, align 4 ; CHECK-NEXT: store i32 2, ptr @x, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] ; @@ -242,7 +283,7 @@ define { i32, i1} @test_cmpxchg_1() { ; Monotonic cmpxchg should not block DSE for non-aliasing stores. define { i32, i1} @test_cmpxchg_2() { ; CHECK-LABEL: @test_cmpxchg_2( -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @y, i32 10, i32 20 monotonic monotonic +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @y, i32 10, i32 20 monotonic monotonic, align 4 ; CHECK-NEXT: store i32 2, ptr @x, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] ; @@ -256,7 +297,7 @@ define { i32, i1} @test_cmpxchg_2() { define { i32, i1} @test_cmpxchg_3() { ; CHECK-LABEL: @test_cmpxchg_3( ; CHECK-NEXT: store i32 1, ptr @x, align 4 -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @y, i32 10, i32 20 seq_cst seq_cst +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @y, i32 10, i32 20 seq_cst seq_cst, align 4 ; CHECK-NEXT: store i32 2, ptr @x, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] ; @@ -270,7 +311,7 @@ define { i32, i1} @test_cmpxchg_3() { define { i32, i1} @test_cmpxchg_4(ptr %ptr) { ; CHECK-LABEL: @test_cmpxchg_4( ; CHECK-NEXT: store i32 1, ptr @x, align 4 -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr [[PTR:%.*]], i32 10, i32 20 monotonic monotonic +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr [[PTR:%.*]], i32 10, i32 20 monotonic monotonic, align 4 ; CHECK-NEXT: store i32 2, ptr @x, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] ; @@ -284,7 +325,7 @@ define { i32, i1} @test_cmpxchg_4(ptr %ptr) { define { i32, i1} @test_cmpxchg_5(ptr %ptr) { ; CHECK-LABEL: @test_cmpxchg_5( ; CHECK-NEXT: store i32 1, ptr @x, align 4 -; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @x, i32 10, i32 20 monotonic monotonic +; CHECK-NEXT: [[RET:%.*]] = cmpxchg volatile ptr @x, i32 10, i32 20 monotonic monotonic, align 4 ; CHECK-NEXT: store i32 2, ptr @x, align 4 ; CHECK-NEXT: ret { i32, i1 } [[RET]] ; From 2c0c6eba7c2ff63133332ea1557e6f124e546309 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 24 Jun 2026 05:29:28 +0200 Subject: [PATCH 285/511] [clang][AST] Refactor `EvaluatedStmt` accessors in `VarDecl` (#205033) 1) Return the evaluated APValue as a const pointer since it may not be modified by callers. 2) Only return a non-nullptr from `getEvaluatedValue()` if the APValue not absent. --- .../clang-tidy/altera/UnrollLoopsCheck.cpp | 2 +- clang/include/clang/AST/Decl.h | 15 ++++++++------- clang/lib/AST/Decl.cpp | 15 ++++++++------- clang/lib/AST/ExprConstant.cpp | 4 ++-- clang/lib/CodeGen/CGExprConstant.cpp | 2 +- clang/lib/Serialization/ASTWriter.cpp | 2 +- 6 files changed, 21 insertions(+), 19 deletions(-) diff --git a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp index eb41acaf4650a..62fc3b159241d 100644 --- a/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/UnrollLoopsCheck.cpp @@ -178,7 +178,7 @@ bool UnrollLoopsCheck::hasLargeNumIterations(const Stmt *Statement, InitDeclStatement && InitDeclStatement->isSingleDecl()) { if (const auto *VariableDecl = dyn_cast(InitDeclStatement->getSingleDecl())) { - APValue *Evaluation = VariableDecl->evaluateValue(); + const APValue *Evaluation = VariableDecl->evaluateValue(); if (!Evaluation || !Evaluation->isInt()) return true; InitValue = Evaluation->getInt().getExtValue(); diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index e200b8f06ec4b..2ea16d0ba6b03 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1430,18 +1430,19 @@ class VarDecl : public DeclaratorDecl, public Redeclarable { /// Attempt to evaluate the value of the initializer attached to this /// declaration, and produce notes explaining why it cannot be evaluated. - /// Returns a pointer to the value if evaluation succeeded, 0 otherwise. - APValue *evaluateValue() const; + /// Returns a pointer to the value if evaluation succeeded, \c nullptr + /// otherwise. + const APValue *evaluateValue() const; private: - APValue *evaluateValueImpl(SmallVectorImpl *Notes, - bool IsConstantInitialization) const; + const APValue *evaluateValueImpl(SmallVectorImpl *Notes, + bool IsConstantInitialization) const; public: /// Return the already-evaluated value of this variable's - /// initializer, or NULL if the value is not yet known. Returns pointer - /// to untyped APValue if the value could not be evaluated. - APValue *getEvaluatedValue() const; + /// initializer, or \c nullptr if the value is not yet known or couldn't be + /// evaluated. + const APValue *getEvaluatedValue() const; /// Evaluate the destruction of this variable to determine if it constitutes /// constant destruction. diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 7ab4235717dde..b23bf73ae803c 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -2551,12 +2551,13 @@ EvaluatedStmt *VarDecl::getEvaluatedStmt() const { return dyn_cast_if_present(Init); } -APValue *VarDecl::evaluateValue() const { +const APValue *VarDecl::evaluateValue() const { return evaluateValueImpl(/*Notes=*/nullptr, hasConstantInitialization()); } -APValue *VarDecl::evaluateValueImpl(SmallVectorImpl *Notes, - bool IsConstantInitialization) const { +const APValue * +VarDecl::evaluateValueImpl(SmallVectorImpl *Notes, + bool IsConstantInitialization) const { EvaluatedStmt *Eval = ensureEvaluatedStmt(); const auto *Init = getInit(); @@ -2606,10 +2607,10 @@ APValue *VarDecl::evaluateValueImpl(SmallVectorImpl *Notes, return Result ? &Eval->Evaluated : nullptr; } -APValue *VarDecl::getEvaluatedValue() const { - if (EvaluatedStmt *Eval = getEvaluatedStmt()) - if (Eval->WasEvaluated) - return &Eval->Evaluated; +const APValue *VarDecl::getEvaluatedValue() const { + if (EvaluatedStmt *Eval = getEvaluatedStmt(); + Eval && Eval->WasEvaluated && !Eval->Evaluated.isAbsent()) + return &Eval->Evaluated; return nullptr; } diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 8efceff7e8c31..dde3b8bab43ec 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -3502,7 +3502,7 @@ static bool evaluateVarDeclInit(EvalInfo &Info, const Expr *E, return false; } - Result = VD->getEvaluatedValue(); + Result = const_cast(VD->getEvaluatedValue()); if (!Result && !AllowConstexprUnknown) return false; @@ -21719,7 +21719,7 @@ bool VarDecl::evaluateDestruction( // Otherwise, treat the value as default-initialized; if the destructor works // anyway, then the destruction is constant (and must be essentially empty). APValue DestroyedValue; - if (getEvaluatedValue() && !getEvaluatedValue()->isAbsent()) + if (getEvaluatedValue()) DestroyedValue = *getEvaluatedValue(); else if (!handleDefaultInitValue(getType(), DestroyedValue)) return false; diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index ffcd3fef9cd52..a8c5985cde705 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1923,7 +1923,7 @@ llvm::Constant *ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &D) { // Try to emit the initializer. Note that this can allow some things that // are not allowed by tryEmitPrivateForMemory alone. - if (APValue *value = D.evaluateValue()) { + if (const APValue *value = D.evaluateValue()) { assert(!value->allowConstexprUnknown() && "Constexpr unknown values are not allowed in CodeGen"); return tryEmitPrivateForMemory(*value, destType); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 21dda6f3733e4..357a7f7e95fa0 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -7526,7 +7526,7 @@ void ASTRecordWriter::AddVarDeclInit(const VarDecl *VD) { assert(ES->CheckedForSideEffects); Val |= (ES->HasConstantInitialization ? 2 : 0); Val |= (ES->HasConstantDestruction ? 4 : 0); - APValue *Evaluated = VD->getEvaluatedValue(); + const APValue *Evaluated = VD->getEvaluatedValue(); // If the evaluated result is constant, emit it. if (Evaluated && (Evaluated->isInt() || Evaluated->isFloat())) Val |= 8; From 1fc1de8d66b996c97ec1c2aa2a9ae3e0db782fc0 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Wed, 24 Jun 2026 11:40:34 +0800 Subject: [PATCH 286/511] [libc][NFC] Remove a compiler warning for MPCommon (#205264) Local build on Linux platform reports a compiler warning: llvm-project/libc/utils/MPFRWrapper/MPCommon.cpp:546:15: warning: implicit conversion loses integer precision: 'long' to 'int' [-Wshorten-64-to-32] 546 | int mod = mpfr_get_si(value_ret_exact.value, MPFR_RNDN); | ~~~ ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 warning generated. Signed-off-by: jinge90 --- libc/utils/MPFRWrapper/MPCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/utils/MPFRWrapper/MPCommon.cpp b/libc/utils/MPFRWrapper/MPCommon.cpp index bdd8286148641..452a1391058d4 100644 --- a/libc/utils/MPFRWrapper/MPCommon.cpp +++ b/libc/utils/MPFRWrapper/MPCommon.cpp @@ -543,7 +543,7 @@ MPFRNumber MPFRNumber::tanpi() const { mpfr_mul_si(value_ret_exact.value, value_ret_exact.value, 4, MPFR_RNDN); if (mpfr_integer_p(value_ret_exact.value)) { - int mod = mpfr_get_si(value_ret_exact.value, MPFR_RNDN); + long mod = mpfr_get_si(value_ret_exact.value, MPFR_RNDN); mod = (mod < 0 ? -1 * mod : mod); switch (mod) { From 6c86c95a079c26262090a693ecf80d37c785f750 Mon Sep 17 00:00:00 2001 From: Udit Kumar Agarwal Date: Tue, 23 Jun 2026 20:44:58 -0700 Subject: [PATCH 287/511] [SYCL] Use `ext_vector_type` for optimizing marray arithmetic (#22342) Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> --- .../detail/type_traits/vec_marray_traits.hpp | 25 +- sycl/include/sycl/detail/vector_traits.hpp | 3 - sycl/include/sycl/marray.hpp | 203 +++- .../marray/marray_vs_vec.cpp | 880 ++++++++---------- 4 files changed, 559 insertions(+), 552 deletions(-) diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index 90779a0463d86..cf5a45c614b9b 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -24,6 +24,11 @@ template class __SYCL_EBO vec; template class marray; namespace detail { + +template +inline constexpr bool is_allowed_vec_size_v = + N == 1 || N == 2 || N == 3 || N == 4 || N == 8 || N == 16; + #if __SYCL_USE_LIBSYCL8_VEC_IMPL template class OperationCurrentT, int... Indexes> @@ -66,16 +71,18 @@ template constexpr bool is_vec_v = is_vec::value; template struct is_ext_vector : std::false_type {}; -template -struct is_valid_type_for_ext_vector : std::false_type {}; +template +struct is_valid_type_length_for_ext_vector : std::false_type {}; #if defined(__has_extension) #if __has_extension(attribute_ext_vector_type) template using ext_vector = T __attribute__((ext_vector_type(N))); template struct is_ext_vector> : std::true_type {}; -template -struct is_valid_type_for_ext_vector>> +template +struct is_valid_type_length_for_ext_vector< + T, N, + std::enable_if_t, std::void_t>>> : std::true_type {}; #endif #endif @@ -83,7 +90,10 @@ template inline constexpr bool is_ext_vector_v = is_ext_vector::value; template inline constexpr bool is_valid_type_for_ext_vector_v = - is_valid_type_for_ext_vector::value; + is_valid_type_length_for_ext_vector::value; +template +inline constexpr bool is_valid_type_length_for_ext_vector_v = + is_valid_type_length_for_ext_vector::value; template struct is_swizzle : std::false_type {}; #if __SYCL_USE_LIBSYCL8_VEC_IMPL @@ -157,11 +167,6 @@ struct element_type { #endif #endif template using element_type_t = typename element_type::type; - -template -inline constexpr bool is_allowed_vec_size_v = - N == 1 || N == 2 || N == 3 || N == 4 || N == 8 || N == 16; - } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/include/sycl/detail/vector_traits.hpp b/sycl/include/sycl/detail/vector_traits.hpp index 76396cb3933f1..f0e045bd6ab4c 100644 --- a/sycl/include/sycl/detail/vector_traits.hpp +++ b/sycl/include/sycl/detail/vector_traits.hpp @@ -15,9 +15,6 @@ #ifndef __has_extension #define __has_extension(x) 0 #endif -#ifdef __HAS_EXT_VECTOR_TYPE__ -#error "Undefine __HAS_EXT_VECTOR_TYPE__ macro" -#endif #if __has_extension(attribute_ext_vector_type) #define __HAS_EXT_VECTOR_TYPE__ #endif diff --git a/sycl/include/sycl/marray.hpp b/sycl/include/sycl/marray.hpp index 304718100143e..37fba95b1e538 100644 --- a/sycl/include/sycl/marray.hpp +++ b/sycl/include/sycl/marray.hpp @@ -9,8 +9,26 @@ #pragma once #include +#include #include #include +#include +#include + +// Check if Clang's ext_vector_type attribute is available. Host compiler +// may not be Clang, and Clang may not be built with the extension. +#ifdef __clang__ +#ifndef __has_extension +#define __has_extension(x) 0 +#endif +#if __has_extension(attribute_ext_vector_type) +#define __HAS_EXT_VECTOR_TYPE__ +#endif +#endif // __clang__ + +#if !defined(__HAS_EXT_VECTOR_TYPE__) && defined(__SYCL_DEVICE_ONLY__) +#error "SYCL device compiler is built without ext_vector_type support" +#endif namespace sycl { inline namespace _V1 { @@ -35,6 +53,18 @@ template struct GetMArrayArgsSize { static constexpr std::size_t value = 1 + GetMArrayArgsSize::value; }; +// Helper to define ext_vector_type only when it will actually be used. +// This avoids the warning about attributes on dependent types. +template +struct ExtVectorTypeHelper { + using type = void; +}; + +#ifdef __SYCL_DEVICE_ONLY__ +template struct ExtVectorTypeHelper { + using type = T __attribute__((ext_vector_type(N))); +}; +#endif } // namespace detail /// Provides a cross-platform math array class template that works on @@ -165,6 +195,50 @@ template class marray { const_iterator end() const { return MData + NumElements; } + // Use ext_vector_type opportunistically for doing marray operations. +private: + using vec_elem_ty = + typename detail::map_type*/ std::uint8_t, // +#endif + bool, /*->*/ std::uint8_t, +#ifdef __SYCL_DEVICE_ONLY__ + half, /*->*/ _Float16, +#endif + DataT, /*->*/ DataT>::type; + + // Don't use ext_vector_type for: + // - Host code (might not have ext_vector_type support) + // - When the number of elements is 3, marray doesn't use padding + // while ext_vector_type does, so the results of operations will differ. + constexpr static bool use_ext_vector_type = +#ifdef __SYCL_DEVICE_ONLY__ + detail::is_valid_type_length_for_ext_vector_v && + NumElements != 3; +#else + false; +#endif + + using ext_vector_t = + typename detail::ExtVectorTypeHelper::type; + + // Writes the result of an ext_vector_type operation back into MData. + // When the vector element type matches DataT, a raw byte copy is safe. + // Otherwise each lane is converted explicitly so the destination always + // holds a valid DataT representation (important for bool, half, byte, ...). + template + static void storeVecResult(DataT (&Dst)[NumElements], const VecT &Src) { + if constexpr (std::is_same_v) { + sycl::detail::memcpy_no_adl(Dst, &Src, sizeof(Src)); + } else { + for (size_t I = 0; I < NumElements; ++I) + Dst[I] = static_cast(Src[I]); + } + } + +public: #ifdef __SYCL_BINOP #error "Undefine __SYCL_BINOP macro" #endif @@ -176,8 +250,14 @@ template class marray { #define __SYCL_BINOP(BINOP, OPASSIGN) \ friend marray operator BINOP(const marray &Lhs, const marray &Rhs) { \ marray Ret; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ + if constexpr (use_ext_vector_type) { \ + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); \ + ext_vector_t RhsVec = sycl::bit_cast(Rhs.MData); \ + ext_vector_t ResVec = LhsVec BINOP RhsVec; \ + storeVecResult(Ret.MData, ResVec); \ + } else { \ + for (size_t I = 0; I < NumElements; ++I) \ + Ret[I] = Lhs[I] BINOP Rhs[I]; \ } \ return Ret; \ } \ @@ -186,8 +266,7 @@ template class marray { std::is_convertible_v && \ (std::is_fundamental_v || \ std::is_same_v::type, half>), \ - marray> \ - operator BINOP(const marray &Lhs, const T &Rhs) { \ + marray> operator BINOP(const marray & Lhs, const T & Rhs) { \ return Lhs BINOP marray(static_cast(Rhs)); \ } \ template \ @@ -195,17 +274,16 @@ template class marray { std::is_convertible_v && \ (std::is_fundamental_v || \ std::is_same_v::type, half>), \ - marray> \ - operator BINOP(const T &Lhs, const marray &Rhs) { \ + marray> operator BINOP(const T & Lhs, const marray & Rhs) { \ return marray(static_cast(Lhs)) BINOP Rhs; \ } \ - friend marray &operator OPASSIGN(marray &Lhs, const marray &Rhs) { \ + friend marray &operator OPASSIGN(marray & Lhs, const marray & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ template \ friend typename std::enable_if_t operator OPASSIGN( \ - marray &Lhs, const DataT &Rhs) { \ + marray & Lhs, const DataT & Rhs) { \ Lhs = Lhs BINOP marray(Rhs); \ return Lhs; \ } @@ -215,8 +293,15 @@ template class marray { typename = std::enable_if_t, marray>> \ friend marray operator BINOP(const marray &Lhs, const marray &Rhs) { \ marray Ret; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] BINOP Rhs[I]; \ + if constexpr (use_ext_vector_type) { \ + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); \ + ext_vector_t RhsVec = sycl::bit_cast(Rhs.MData); \ + ext_vector_t ResVec = LhsVec BINOP RhsVec; \ + storeVecResult(Ret.MData, ResVec); \ + } else { \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = Lhs[I] BINOP Rhs[I]; \ + } \ } \ return Ret; \ } \ @@ -224,28 +309,29 @@ template class marray { friend typename std::enable_if_t && \ std::is_integral_v && \ std::is_integral_v, \ - marray> \ - operator BINOP(const marray &Lhs, const T &Rhs) { \ + marray> operator BINOP(const marray & Lhs, \ + const T & Rhs) { \ return Lhs BINOP marray(static_cast(Rhs)); \ } \ template \ - friend typename std::enable_if_t && \ - std::is_integral_v && \ - std::is_integral_v, \ - marray> \ - operator BINOP(const T &Lhs, const marray &Rhs) { \ + friend \ + typename std::enable_if_t && \ + std::is_integral_v && \ + std::is_integral_v, \ + marray> operator BINOP(const T & Lhs, \ + const marray & Rhs) { \ return marray(static_cast(Lhs)) BINOP Rhs; \ } \ template , marray>> \ - friend marray &operator OPASSIGN(marray &Lhs, const marray &Rhs) { \ + friend marray &operator OPASSIGN(marray & Lhs, const marray & Rhs) { \ Lhs = Lhs BINOP Rhs; \ return Lhs; \ } \ template \ friend \ typename std::enable_if_t, marray &> \ - operator OPASSIGN(marray &Lhs, const DataT &Rhs) { \ + operator OPASSIGN(marray & Lhs, const DataT & Rhs) { \ Lhs = Lhs BINOP marray(Rhs); \ return Lhs; \ } @@ -273,28 +359,37 @@ template class marray { #endif #define __SYCL_RELLOGOP(RELLOGOP) \ - friend marray operator RELLOGOP(const marray &Lhs, \ - const marray &Rhs) { \ + friend marray operator RELLOGOP(const marray & Lhs, \ + const marray & Rhs) { \ marray Ret; \ - for (size_t I = 0; I < NumElements; ++I) { \ - Ret[I] = Lhs[I] RELLOGOP Rhs[I]; \ + if constexpr (use_ext_vector_type) { \ + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); \ + ext_vector_t RhsVec = sycl::bit_cast(Rhs.MData); \ + auto ResVec = LhsVec RELLOGOP RhsVec; /* returns vec of -1/0 */ \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = ResVec[I] != 0; \ + } \ + } else { \ + for (size_t I = 0; I < NumElements; ++I) { \ + Ret[I] = Lhs[I] RELLOGOP Rhs[I]; \ + } \ } \ return Ret; \ } \ template \ - friend typename std::enable_if_t && \ - (std::is_fundamental_v || \ - std::is_same_v), \ - marray> \ - operator RELLOGOP(const marray &Lhs, const T &Rhs) { \ + friend typename std::enable_if_t< \ + std::is_convertible_v && \ + (std::is_fundamental_v || std::is_same_v), \ + marray> operator RELLOGOP(const marray & Lhs, \ + const T & Rhs) { \ return Lhs RELLOGOP marray(static_cast(Rhs)); \ } \ template \ - friend typename std::enable_if_t && \ - (std::is_fundamental_v || \ - std::is_same_v), \ - marray> \ - operator RELLOGOP(const T &Lhs, const marray &Rhs) { \ + friend typename std::enable_if_t< \ + std::is_convertible_v && \ + (std::is_fundamental_v || std::is_same_v), \ + marray> operator RELLOGOP(const T & Lhs, \ + const marray & Rhs) { \ return marray(static_cast(Lhs)) RELLOGOP Rhs; \ } @@ -359,32 +454,58 @@ template class marray { friend std::enable_if_t, marray> operator~(const marray &Lhs) { marray Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = ~Lhs[I]; + if constexpr (use_ext_vector_type) { + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); + ext_vector_t ResVec = ~LhsVec; + sycl::detail::memcpy_no_adl(Ret.MData, &ResVec, sizeof(ResVec)); + } else { + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = ~Lhs[I]; + } } return Ret; } friend marray operator!(const marray &Lhs) { marray Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = !Lhs[I]; + if constexpr (use_ext_vector_type) { + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); + auto ResVec = !LhsVec; /* returns vec of -1/0 */ + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = ResVec[I] != 0; + } + } else { + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = !Lhs[I]; + } } return Ret; } friend marray operator+(const marray &Lhs) { marray Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = +Lhs[I]; + if constexpr (use_ext_vector_type) { + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); + ext_vector_t ResVec = +LhsVec; + storeVecResult(Ret.MData, ResVec); + } else { + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = +Lhs[I]; + } } return Ret; } friend marray operator-(const marray &Lhs) { marray Ret; - for (size_t I = 0; I < NumElements; ++I) { - Ret[I] = -Lhs[I]; + if constexpr (use_ext_vector_type) { + ext_vector_t LhsVec = sycl::bit_cast(Lhs.MData); + ext_vector_t ResVec = -LhsVec; + storeVecResult(Ret.MData, ResVec); + } else { + for (size_t I = 0; I < NumElements; ++I) { + Ret[I] = -Lhs[I]; + } } return Ret; } diff --git a/sycl/test/check_device_code/marray/marray_vs_vec.cpp b/sycl/test/check_device_code/marray/marray_vs_vec.cpp index 947c32eec14c8..95dac0a350b99 100644 --- a/sycl/test/check_device_code/marray/marray_vs_vec.cpp +++ b/sycl/test/check_device_code/marray/marray_vs_vec.cpp @@ -21,43 +21,31 @@ using namespace sycl; SYCL_EXTERNAL vec - // CHECK-LABEL: @_Z19arithmetic_i8x4_vecN4sycl3_V13vecIaLi4EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) - // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16:![0-9]+]], !noalias [[META17:![0-9]+]] - // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META17]] - // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] - // CHECK-NEXT: store <4 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META18:![0-9]+]] - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z19arithmetic_i8x4_vecN4sycl3_V13vecIaLi4EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16:![0-9]+]], !noalias [[META17:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META17]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: ret void +// arithmetic_i8x4_vec(vec a, vec b) { return a + b; } SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z20arithmetic_i8x4_marrN4sycl3_V16marrayIaLm4EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIALM4EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META21]] - // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA16]], !noalias [[META21]] - // CHECK-NEXT: [[ADD_I:%.*]] = add i8 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i8 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META21]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP24:![0-9]+]] - // CHECK: _ZN4sycl3_V1plERKNS0_6marrayIaLm4EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z20arithmetic_i8x4_marrN4sycl3_V16marrayIaLm4EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1, !tbaa [[TBAA16]], !noalias [[META21]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B:%.*]], align 1, !tbaa [[TBAA16]], !noalias [[META21]] +// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x i8> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 1, !alias.scope [[META21]] +// CHECK-NEXT: ret void +// arithmetic_i8x4_marr(marray a, marray b) { return a + b; } @@ -65,27 +53,27 @@ marray // Non-standard size - should use loop SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z20arithmetic_i8x5_marrN4sycl3_V16marrayIaLm5EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 5 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIALM5EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META26]] - // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA16]], !noalias [[META26]] - // CHECK-NEXT: [[ADD_I:%.*]] = add i8 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i8 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META26]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP29:![0-9]+]] - // CHECK: _ZN4sycl3_V1plERKNS0_6marrayIaLm5EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z20arithmetic_i8x5_marrN4sycl3_V16marrayIaLm5EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 5 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIALM5EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META24]] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA16]], !noalias [[META24]] +// CHECK-NEXT: [[ADD_I:%.*]] = add i8 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: store i8 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META24]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK: _ZN4sycl3_V1plERKNS0_6marrayIaLm5EEES4_.exit: +// CHECK-NEXT: ret void +// arithmetic_i8x5_marr(marray a, marray b) { return a + b; } @@ -96,43 +84,31 @@ marray SYCL_EXTERNAL vec - // CHECK-LABEL: @_Z20arithmetic_i16x8_vecN4sycl3_V13vecIsLi8EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) - // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META36:![0-9]+]] - // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META36]] - // CHECK-NEXT: [[SUB_I_I_I_I_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] - // CHECK-NEXT: store <8 x i16> [[SUB_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META36]] - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z20arithmetic_i16x8_vecN4sycl3_V13vecIsLi8EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META35:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META35]] +// CHECK-NEXT: [[SUB_I_I_I_I_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x i16> [[SUB_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META35]] +// CHECK-NEXT: ret void +// arithmetic_i16x8_vec(vec a, vec b) { return a - b; } SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z21arithmetic_i16x8_marrN4sycl3_V16marrayIsLm8EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MIERKNS0_6MARRAYISLM8EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA40:![0-9]+]], !noalias [[META37]] - // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I9_I]], align 2, !tbaa [[TBAA40]], !noalias [[META37]] - // CHECK-NEXT: [[SUB_I:%.*]] = sub i16 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i16 [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 2, !tbaa [[TBAA40]], !alias.scope [[META37]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP42:![0-9]+]] - // CHECK: _ZN4sycl3_V1miERKNS0_6marrayIsLm8EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_i16x8_marrN4sycl3_V16marrayIsLm8EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i16>, ptr [[A:%.*]], align 2, !tbaa [[TBAA16]], !noalias [[META36]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[B:%.*]], align 2, !tbaa [[TBAA16]], !noalias [[META36]] +// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x i16> [[SUB_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 2, !alias.scope [[META36]] +// CHECK-NEXT: ret void +// arithmetic_i16x8_marr(marray a, marray b) { return a - b; @@ -141,27 +117,27 @@ marray // Non-standard size - should use loop SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z21arithmetic_i16x7_marrN4sycl3_V16marrayIsLm7EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 7 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MIERKNS0_6MARRAYISLM7EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA40]], !noalias [[META43]] - // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I9_I]], align 2, !tbaa [[TBAA40]], !noalias [[META43]] - // CHECK-NEXT: [[SUB_I:%.*]] = sub i16 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i16 [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 2, !tbaa [[TBAA40]], !alias.scope [[META43]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP46:![0-9]+]] - // CHECK: _ZN4sycl3_V1miERKNS0_6marrayIsLm7EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_i16x7_marrN4sycl3_V16marrayIsLm7EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 7 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MIERKNS0_6MARRAYISLM7EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA42:![0-9]+]], !noalias [[META39]] +// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX_I9_I]], align 2, !tbaa [[TBAA42]], !noalias [[META39]] +// CHECK-NEXT: [[SUB_I:%.*]] = sub i16 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I10_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I10_I]], align 2, !tbaa [[TBAA42]], !alias.scope [[META39]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP44:![0-9]+]] +// CHECK: _ZN4sycl3_V1miERKNS0_6marrayIsLm7EEES4_.exit: +// CHECK-NEXT: ret void +// arithmetic_i16x7_marr(marray a, marray b) { return a - b; @@ -173,43 +149,31 @@ marray SYCL_EXTERNAL vec - // CHECK-LABEL: @_Z20arithmetic_i32x4_vecN4sycl3_V13vecIiLi4EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) - // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META53:![0-9]+]] - // CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META53]] - // CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = mul <4 x i32> [[TMP0]], [[TMP1]] - // CHECK-NEXT: store <4 x i32> [[MUL_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META53]] - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z20arithmetic_i32x4_vecN4sycl3_V13vecIiLi4EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META51:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META51]] +// CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = mul <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x i32> [[MUL_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META51]] +// CHECK-NEXT: ret void +// arithmetic_i32x4_vec(vec a, vec b) { return a * b; } SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z21arithmetic_i32x4_marrN4sycl3_V16marrayIiLm4EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYIILM4EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5:![0-9]+]], !noalias [[META54]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META54]] - // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i32 [[MUL_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META54]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP57:![0-9]+]] - // CHECK: _ZN4sycl3_V1mlERKNS0_6marrayIiLm4EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_i32x4_marrN4sycl3_V16marrayIiLm4EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META52]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META52]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x i32> [[MUL_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META52]] +// CHECK-NEXT: ret void +// arithmetic_i32x4_marr(marray a, marray b) { return a * b; @@ -218,27 +182,27 @@ marray // Non-standard size - should use loop SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z21arithmetic_i32x6_marrN4sycl3_V16marrayIiLm6EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 6 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYIILM6EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META58]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META58]] - // CHECK-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i32 [[MUL_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META58]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP61:![0-9]+]] - // CHECK: _ZN4sycl3_V1mlERKNS0_6marrayIiLm6EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_i32x6_marrN4sycl3_V16marrayIiLm6EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 6 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYIILM6EEES4__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5:![0-9]+]], !noalias [[META55]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META55]] +// CHECK-NEXT: [[MUL_I:%.*]] = mul nsw i32 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: store i32 [[MUL_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META55]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK: _ZN4sycl3_V1mlERKNS0_6marrayIiLm6EEES4_.exit: +// CHECK-NEXT: ret void +// arithmetic_i32x6_marr(marray a, marray b) { return a * b; @@ -250,43 +214,31 @@ marray SYCL_EXTERNAL vec - // CHECK-LABEL: @_Z21arithmetic_i64x16_vecN4sycl3_V13vecIlLi16EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) - // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i64>, ptr [[A:%.*]], align 64, !tbaa [[TBAA16]], !noalias [[META68:![0-9]+]] - // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i64>, ptr [[B:%.*]], align 64, !tbaa [[TBAA16]], !noalias [[META68]] - // CHECK-NEXT: [[OR_I_I_I_I_I:%.*]] = or <16 x i64> [[TMP0]], [[TMP1]] - // CHECK-NEXT: store <16 x i64> [[OR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 64, !alias.scope [[META68]] - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_i64x16_vecN4sycl3_V13vecIlLi16EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i64>, ptr [[A:%.*]], align 64, !tbaa [[TBAA16]], !noalias [[META65:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i64>, ptr [[B:%.*]], align 64, !tbaa [[TBAA16]], !noalias [[META65]] +// CHECK-NEXT: [[OR_I_I_I_I_I:%.*]] = or <16 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <16 x i64> [[OR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 64, !alias.scope [[META65]] +// CHECK-NEXT: ret void +// arithmetic_i64x16_vec(vec a, vec b) { return a | b; } SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z22arithmetic_i64x16_marrN4sycl3_V16marrayIlLm16EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 16 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ORILNS0_6MARRAYILLM16EEEEES3_RKS3_S5__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA72:![0-9]+]], !noalias [[META69]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA72]], !noalias [[META69]] - // CHECK-NEXT: [[OR_I:%.*]] = or i64 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i64 [[OR_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA72]], !alias.scope [[META69]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP74:![0-9]+]] - // CHECK: _ZN4sycl3_V1orIlNS0_6marrayIlLm16EEEEES3_RKS3_S5_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z22arithmetic_i64x16_marrN4sycl3_V16marrayIlLm16EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i64>, ptr [[A:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META66]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i64>, ptr [[B:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META66]] +// CHECK-NEXT: [[OR_I:%.*]] = or <16 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <16 x i64> [[OR_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 8, !alias.scope [[META66]] +// CHECK-NEXT: ret void +// arithmetic_i64x16_marr(marray a, marray b) { return a | b; @@ -295,27 +247,27 @@ marray // Non-standard size - should use loop SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z22arithmetic_i64x32_marrN4sycl3_V16marrayIlLm32EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 32 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ORILNS0_6MARRAYILLM32EEEEES3_RKS3_S5__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA72]], !noalias [[META75]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA72]], !noalias [[META75]] - // CHECK-NEXT: [[OR_I:%.*]] = or i64 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i64 [[OR_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA72]], !alias.scope [[META75]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP78:![0-9]+]] - // CHECK: _ZN4sycl3_V1orIlNS0_6marrayIlLm32EEEEES3_RKS3_S5_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z22arithmetic_i64x32_marrN4sycl3_V16marrayIlLm32EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 32 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ORILNS0_6MARRAYILLM32EEEEES3_RKS3_S5__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA72:![0-9]+]], !noalias [[META69]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA72]], !noalias [[META69]] +// CHECK-NEXT: [[OR_I:%.*]] = or i64 [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: store i64 [[OR_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA72]], !alias.scope [[META69]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP74:![0-9]+]] +// CHECK: _ZN4sycl3_V1orIlNS0_6marrayIlLm32EEEEES3_RKS3_S5_.exit: +// CHECK-NEXT: ret void +// arithmetic_i64x32_marr(marray a, marray b) { return a | b; @@ -327,12 +279,12 @@ marray // CHECK-LABEL: @_Z20arithmetic_f32x4_vecN4sycl3_V13vecIfLi4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META85:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META85]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META81:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META81]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <4 x float> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META85]] +// CHECK-NEXT: store <4 x float> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META81]] // CHECK-NEXT: ret void // SYCL_EXTERNAL vec arithmetic_f32x4_vec(vec a, @@ -342,23 +294,11 @@ SYCL_EXTERNAL vec arithmetic_f32x4_vec(vec a, // CHECK-LABEL: @_Z21arithmetic_f32x4_marrN4sycl3_V16marrayIfLm4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIFLM4EEES4__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA89:![0-9]+]], !noalias [[META86]] -// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA89]], !noalias [[META86]] -// CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store float [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA89]], !alias.scope [[META86]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP91:![0-9]+]] -// CHECK: _ZN4sycl3_V1plERKNS0_6marrayIfLm4EEES4_.exit: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META82]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META82]] +// CHECK-NEXT: [[ADD_I:%.*]] = fadd <4 x float> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x float> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META82]] // CHECK-NEXT: ret void // SYCL_EXTERNAL marray arithmetic_f32x4_marr(marray a, @@ -369,7 +309,7 @@ SYCL_EXTERNAL marray arithmetic_f32x4_marr(marray a, // Non-standard size - should use loop // CHECK-LABEL: @_Z21arithmetic_f32x5_marrN4sycl3_V16marrayIfLm5EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -377,14 +317,14 @@ SYCL_EXTERNAL marray arithmetic_f32x4_marr(marray a, // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIFLM5EEES4__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA89]], !noalias [[META92]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA88:![0-9]+]], !noalias [[META85]] // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA89]], !noalias [[META92]] +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA88]], !noalias [[META85]] // CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[TMP0]], [[TMP1]] // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store float [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA89]], !alias.scope [[META92]] +// CHECK-NEXT: store float [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA88]], !alias.scope [[META85]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP95:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK: _ZN4sycl3_V1plERKNS0_6marrayIfLm5EEES4_.exit: // CHECK-NEXT: ret void // @@ -399,12 +339,12 @@ SYCL_EXTERNAL marray arithmetic_f32x5_marr(marray a, // CHECK-LABEL: @_Z20arithmetic_f64x4_vecN4sycl3_V13vecIdLi4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META101:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 32, !tbaa [[TBAA16]], !noalias [[META104:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[B:%.*]], align 32, !tbaa [[TBAA16]], !noalias [[META104]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 32, !tbaa [[TBAA16]], !noalias [[META99:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[B:%.*]], align 32, !tbaa [[TBAA16]], !noalias [[META99]] // CHECK-NEXT: [[SUB_I_I_I_I_I:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <4 x double> [[SUB_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 32, !alias.scope [[META104]] +// CHECK-NEXT: store <4 x double> [[SUB_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 32, !alias.scope [[META99]] // CHECK-NEXT: ret void // SYCL_EXTERNAL vec arithmetic_f64x4_vec(vec a, @@ -414,23 +354,11 @@ SYCL_EXTERNAL vec arithmetic_f64x4_vec(vec a, // CHECK-LABEL: @_Z21arithmetic_f64x4_marrN4sycl3_V16marrayIdLm4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MIERKNS0_6MARRAYIDLM4EEES4__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA108:![0-9]+]], !noalias [[META105]] -// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA108]], !noalias [[META105]] -// CHECK-NEXT: [[SUB_I:%.*]] = fsub double [[TMP0]], [[TMP1]] -// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store double [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA108]], !alias.scope [[META105]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP110:![0-9]+]] -// CHECK: _ZN4sycl3_V1miERKNS0_6marrayIdLm4EEES4_.exit: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[A:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META100]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x double>, ptr [[B:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META100]] +// CHECK-NEXT: [[SUB_I:%.*]] = fsub <4 x double> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x double> [[SUB_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 8, !alias.scope [[META100]] // CHECK-NEXT: ret void // SYCL_EXTERNAL marray arithmetic_f64x4_marr(marray a, @@ -441,7 +369,7 @@ SYCL_EXTERNAL marray arithmetic_f64x4_marr(marray a, // Non-standard size - should use loop // CHECK-LABEL: @_Z21arithmetic_f64x5_marrN4sycl3_V16marrayIdLm5EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META103:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -449,14 +377,14 @@ SYCL_EXTERNAL marray arithmetic_f64x4_marr(marray a, // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MIERKNS0_6MARRAYIDLM5EEES4__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA108]], !noalias [[META111]] +// CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA106:![0-9]+]], !noalias [[META103]] // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA108]], !noalias [[META111]] +// CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA106]], !noalias [[META103]] // CHECK-NEXT: [[SUB_I:%.*]] = fsub double [[TMP0]], [[TMP1]] // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store double [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA108]], !alias.scope [[META111]] +// CHECK-NEXT: store double [[SUB_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA106]], !alias.scope [[META103]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP114:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP108:![0-9]+]] // CHECK: _ZN4sycl3_V1miERKNS0_6marrayIdLm5EEES4_.exit: // CHECK-NEXT: ret void // @@ -471,12 +399,12 @@ SYCL_EXTERNAL marray arithmetic_f64x5_marr(marray a, // CHECK-LABEL: @_Z20arithmetic_f16x8_vecN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META123:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META123]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META114:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META117:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B:%.*]], align 16, !tbaa [[TBAA16]], !noalias [[META117]] // CHECK-NEXT: [[MUL_I_I_I_I_I:%.*]] = fmul <8 x half> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x half> [[MUL_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META123]] +// CHECK-NEXT: store <8 x half> [[MUL_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 16, !alias.scope [[META117]] // CHECK-NEXT: ret void // SYCL_EXTERNAL vec arithmetic_f16x8_vec(vec a, @@ -486,22 +414,22 @@ SYCL_EXTERNAL vec arithmetic_f16x8_vec(vec a, // CHECK-LABEL: @_Z21arithmetic_f16x8_marrN4sycl3_V16marrayINS0_6detail9half_impl4halfELm8EEES5_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYINS0_6DETAIL9HALF_IMPL4HALFELM8EEES7__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA127:![0-9]+]], !noalias [[META124]] -// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA127]], !noalias [[META124]] -// CHECK-NEXT: [[MUL_I_I_I:%.*]] = fmul half [[TMP1]], [[TMP0]] -// CHECK-NEXT: [[ARRAYIDX_I11_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store half [[MUL_I_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I]], align 2, !tbaa [[TBAA127]], !alias.scope [[META124]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP129:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A:%.*]], align 2, !tbaa [[TBAA16]], !noalias [[META118]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B:%.*]], align 2, !tbaa [[TBAA16]], !noalias [[META118]] +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <8 x half> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[MUL_I]] to <8 x i16> +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYINS0_6DETAIL9HALF_IMPL4HALFELM8EEES7__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[TMP3]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA121:![0-9]+]], !alias.scope [[META118]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP123:![0-9]+]] // CHECK: _ZN4sycl3_V1mlERKNS0_6marrayINS0_6detail9half_impl4halfELm8EEES7_.exit: // CHECK-NEXT: ret void // @@ -513,7 +441,7 @@ SYCL_EXTERNAL marray arithmetic_f16x8_marr(marray a, // Non-standard size - should use loop // CHECK-LABEL: @_Z21arithmetic_f16x7_marrN4sycl3_V16marrayINS0_6detail9half_impl4halfELm7EEES5_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -521,14 +449,14 @@ SYCL_EXTERNAL marray arithmetic_f16x8_marr(marray a, // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYINS0_6DETAIL9HALF_IMPL4HALFELM7EEES7__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA127]], !noalias [[META130]] +// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[ARRAYIDX_I_I]], align 2, !tbaa [[TBAA121]], !noalias [[META124]] // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA127]], !noalias [[META130]] +// CHECK-NEXT: [[TMP1:%.*]] = load half, ptr [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA121]], !noalias [[META124]] // CHECK-NEXT: [[MUL_I_I_I:%.*]] = fmul half [[TMP1]], [[TMP0]] // CHECK-NEXT: [[ARRAYIDX_I11_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store half [[MUL_I_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I]], align 2, !tbaa [[TBAA127]], !alias.scope [[META130]] +// CHECK-NEXT: store half [[MUL_I_I_I]], ptr addrspace(4) [[ARRAYIDX_I11_I]], align 2, !tbaa [[TBAA121]], !alias.scope [[META124]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP133:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP127:![0-9]+]] // CHECK: _ZN4sycl3_V1mlERKNS0_6marrayINS0_6detail9half_impl4halfELm7EEES7_.exit: // CHECK-NEXT: ret void // @@ -543,39 +471,39 @@ SYCL_EXTERNAL marray arithmetic_f16x7_marr(marray a, SYCL_EXTERNAL vec - // CHECK-LABEL: @_Z21arithmetic_bf16x8_vecN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEES5_( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 - // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.280", align 16 - // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A:%.*]] to ptr addrspace(4) - // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B:%.*]] to ptr addrspace(4) - // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[RES_I_I]]), !noalias [[META134:![0-9]+]] - // CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RES_I_I]], i8 0, i64 16, i1 false), !noalias [[META137:![0-9]+]] - // CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) - // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] - // CHECK: for.cond.i.i: - // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] - // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 - // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILMLERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI8EEES8__EXIT:%.*]] - // CHECK: for.body.i.i: - // CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A_ASCAST]], i64 [[I_0_I_I]] - // CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I_I]] - // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META140:![0-9]+]] - // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR9:[0-9]+]], !noalias [[META143:![0-9]+]] - // CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR9]], !noalias [[META143]] - // CHECK-NEXT: [[MUL_I_I_I_I:%.*]] = fmul float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] - // CHECK-NEXT: store float [[MUL_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA89]], !noalias [[META143]] - // CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META143]] - // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META140]] - // CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[RES_I_I]], i64 [[I_0_I_I]] - // CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA40]], !noalias [[META137]] - // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP146:![0-9]+]] - // CHECK: _ZN4sycl3_V16detailmlERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi8EEES8_.exit: - // CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 16 [[AGG_RESULT:%.*]], ptr align 16 [[RES_I_I]], i64 16, i1 false) - // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[RES_I_I]]), !noalias [[META134]] - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_bf16x8_vecN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEES5_( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.280", align 16 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A:%.*]] to ptr addrspace(4) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B:%.*]] to ptr addrspace(4) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[RES_I_I]]), !noalias [[META128:![0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[RES_I_I]], i8 0, i64 16, i1 false), !noalias [[META131:![0-9]+]] +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILMLERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI8EEES8__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[ARRAYIDX_I_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A_ASCAST]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[ARRAYIDX_I12_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META134:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I]]) #[[ATTR9:[0-9]+]], !noalias [[META137:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I12_I_I]]) #[[ATTR9]], !noalias [[META137]] +// CHECK-NEXT: [[MUL_I_I_I_I:%.*]] = fmul float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] +// CHECK-NEXT: store float [[MUL_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA88]], !noalias [[META137]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META137]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META134]] +// CHECK-NEXT: [[ARRAYIDX_I14_I_I:%.*]] = getelementptr inbounds [2 x i8], ptr [[RES_I_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I14_I_I]], align 2, !tbaa [[TBAA42]], !noalias [[META131]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP140:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailmlERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi8EEES8_.exit: +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 16 [[AGG_RESULT:%.*]], ptr align 16 [[RES_I_I]], i64 16, i1 false) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[RES_I_I]]), !noalias [[META128]] +// CHECK-NEXT: ret void +// arithmetic_bf16x8_vec(vec a, vec b) { return a * b; @@ -583,35 +511,35 @@ vec SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z22arithmetic_bf16x8_marrN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm8EEES5_( - // CHECK-NEXT: entry: - // CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 - // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A:%.*]] to ptr addrspace(4) - // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B:%.*]] to ptr addrspace(4) - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META147:![0-9]+]]) - // CHECK-NEXT: [[REF_TMP_ASCAST_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I]] to ptr addrspace(4) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM8EEES7__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A_ASCAST]], i64 [[I_0_I]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I]] - // CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I]]), !noalias [[META147]] - // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I]]) #[[ATTR9]], !noalias [[META150:![0-9]+]] - // CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I7_I]]) #[[ATTR9]], !noalias [[META150]] - // CHECK-NEXT: [[MUL_I_I:%.*]] = fmul float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] - // CHECK-NEXT: store float [[MUL_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA89]], !noalias [[META150]] - // CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], !noalias [[META150]] - // CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I]]), !noalias [[META147]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA40]], !alias.scope [[META147]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP153:![0-9]+]] - // CHECK: _ZN4sycl3_V1mlERKNS0_6marrayINS0_3ext6oneapi8bfloat16ELm8EEES7_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z22arithmetic_bf16x8_marrN4sycl3_V16marrayINS0_3ext6oneapi8bfloat16ELm8EEES5_( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_TMP_I_I:%.*]] = alloca float, align 4 +// CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A:%.*]] to ptr addrspace(4) +// CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B:%.*]] to ptr addrspace(4) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) +// CHECK-NEXT: [[REF_TMP_ASCAST_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I]] to ptr addrspace(4) +// CHECK-NEXT: br label [[FOR_COND_I:%.*]] +// CHECK: for.cond.i: +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 +// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1MLERKNS0_6MARRAYINS0_3EXT6ONEAPI8BFLOAT16ELM8EEES7__EXIT:%.*]] +// CHECK: for.body.i: +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[A_ASCAST]], i64 [[I_0_I]] +// CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[B_ASCAST]], i64 [[I_0_I]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[REF_TMP_I_I]]), !noalias [[META141]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I]]) #[[ATTR9]], !noalias [[META144:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I7_I]]) #[[ATTR9]], !noalias [[META144]] +// CHECK-NEXT: [[MUL_I_I:%.*]] = fmul float [[CALL_I_I_I_I]], [[CALL_I_I2_I_I]] +// CHECK-NEXT: store float [[MUL_I_I]], ptr [[REF_TMP_I_I]], align 4, !tbaa [[TBAA88]], !noalias [[META144]] +// CHECK-NEXT: [[CALL_I_I3_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I]]) #[[ATTR9]], !noalias [[META144]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[REF_TMP_I_I]]), !noalias [[META141]] +// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [2 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 2, !tbaa [[TBAA42]], !alias.scope [[META141]] +// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP147:![0-9]+]] +// CHECK: _ZN4sycl3_V1mlERKNS0_6marrayINS0_3ext6oneapi8bfloat16ELm8EEES7_.exit: +// CHECK-NEXT: ret void +// arithmetic_bf16x8_marr(marray a, marray b) { return a * b; @@ -623,29 +551,26 @@ marray // CHECK-LABEL: @_Z18logical_i32x4_marrN4sycl3_V16marrayIiLm4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META148]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META148]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i32> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[CMP3_I:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i1> [[CMP_I]], [[CMP3_I]] +// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32> // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIILM4EEES4__EXIT:%.*]] +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP4_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP4_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIILM4EEES4__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META154]] -// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 -// CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LAND_END_I]], label [[LAND_RHS_I:%.*]] -// CHECK: land.rhs.i: -// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !noalias [[META154]] -// CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i32 [[TMP1]], 0 -// CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 -// CHECK-NEXT: br label [[LAND_END_I]] -// CHECK: land.end.i: -// CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP2]], [[LAND_RHS_I]] ] -// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA157:![0-9]+]], !alias.scope [[META154]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[SEXT_I]], i64 [[I_0_I]] +// CHECK-NEXT: [[CMP5_I:%.*]] = icmp ne i32 [[VECEXT_I]], 0 +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[CMP5_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA151:![0-9]+]], !alias.scope [[META148]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP159:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP153:![0-9]+]] // CHECK: _ZN4sycl3_V1aaERKNS0_6marrayIiLm4EEES4_.exit: // CHECK-NEXT: ret void // @@ -657,7 +582,7 @@ SYCL_EXTERNAL marray logical_i32x4_marr(marray a, // Non-standard size - should use loop // CHECK-LABEL: @_Z18logical_i32x5_marrN4sycl3_V16marrayIiLm5EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META160:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] @@ -665,21 +590,21 @@ SYCL_EXTERNAL marray logical_i32x4_marr(marray a, // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIILM5EEES4__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META160]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META154]] // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: br i1 [[TOBOOL_NOT_I]], label [[LAND_END_I]], label [[LAND_RHS_I:%.*]] // CHECK: land.rhs.i: // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !noalias [[META160]] +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !noalias [[META154]] // CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TOBOOL2_I]] to i8 // CHECK-NEXT: br label [[LAND_END_I]] // CHECK: land.end.i: // CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP2]], [[LAND_RHS_I]] ] // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA157]], !alias.scope [[META160]] +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA151]], !alias.scope [[META154]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP163:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP157:![0-9]+]] // CHECK: _ZN4sycl3_V1aaERKNS0_6marrayIiLm5EEES4_.exit: // CHECK-NEXT: ret void // @@ -698,45 +623,39 @@ SYCL_EXTERNAL marray logical_i32x5_marr(marray a, // CHECK-LABEL: @_Z15bitnot_i8x8_vecN4sycl3_V13vecIaLi8EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META164:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META167:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META170:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META158:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META161:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META164:![0-9]+]] // CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <8 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 8, !alias.scope [[META171:![0-9]+]] +// CHECK-NEXT: store <8 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 8, !alias.scope [[META165:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL vec bitnot_i8x8_vec(vec a) { return ~a; } +// +SYCL_EXTERNAL +marray // CHECK-LABEL: @_Z16bitnot_i8x8_marrN4sycl3_V16marrayIaLm8EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META174:![0-9]+]]) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1COIAEENST9ENABLE_IFIXSR3STDE13IS_INTEGRAL_VIT_EENS0_6MARRAYIALM8EEEE4TYPEERKS5__EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META174]] -// CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], -1 -// CHECK-NEXT: [[ARRAYIDX_I6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX_I6_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META174]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP177:![0-9]+]] -// CHECK: _ZN4sycl3_V1coIaEENSt9enable_ifIXsr3stdE13is_integral_vIT_EENS0_6marrayIaLm8EEEE4typeERKS5_.exit: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A:%.*]], align 1, !tbaa [[TBAA16]], !noalias [[META168]] +// CHECK-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1) +// CHECK-NEXT: store <8 x i8> [[NOT_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 1, !alias.scope [[META168]] // CHECK-NEXT: ret void // -SYCL_EXTERNAL marray -bitnot_i8x8_marr(marray a) { + bitnot_i8x8_marr(marray a) { return ~a; } // Non-standard size - should use loop +// +SYCL_EXTERNAL +marray // CHECK-LABEL: @_Z16bitnot_i8x7_marrN4sycl3_V16marrayIaLm7EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META178:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META171:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -744,17 +663,16 @@ bitnot_i8x8_marr(marray a) { // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1COIAEENST9ENABLE_IFIXSR3STDE13IS_INTEGRAL_VIT_EENS0_6MARRAYIALM7EEEE4TYPEERKS5__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META178]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA16]], !noalias [[META171]] // CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], -1 // CHECK-NEXT: [[ARRAYIDX_I6_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX_I6_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META178]] +// CHECK-NEXT: store i8 [[TMP1]], ptr addrspace(4) [[ARRAYIDX_I6_I]], align 1, !tbaa [[TBAA16]], !alias.scope [[META171]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP181:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP174:![0-9]+]] // CHECK: _ZN4sycl3_V1coIaEENSt9enable_ifIXsr3stdE13is_integral_vIT_EENS0_6marrayIaLm7EEEE4typeERKS5_.exit: // CHECK-NEXT: ret void // -SYCL_EXTERNAL marray -bitnot_i8x7_marr(marray a) { + bitnot_i8x7_marr(marray a) { return ~a; } @@ -764,21 +682,23 @@ bitnot_i8x7_marr(marray a) { // CHECK-LABEL: @_Z17lognot_i32x4_marrN4sycl3_V16marrayIiLm4EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META182:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META175:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META175]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp eq <4 x i32> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1NTERKNS0_6MARRAYIILM4EEE_EXIT:%.*]] +// CHECK-NEXT: [[CMP1_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP1_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1NTERKNS0_6MARRAYIILM4EEE_EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META182]] -// CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 -// CHECK-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_NOT_I]] to i8 -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 1, !tbaa [[TBAA157]], !alias.scope [[META182]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i32> [[SEXT_I]], i64 [[I_0_I]] +// CHECK-NEXT: [[CMP2_I:%.*]] = icmp ne i32 [[VECEXT_I]], 0 +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[CMP2_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA151]], !alias.scope [[META175]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP185:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP178:![0-9]+]] // CHECK: _ZN4sycl3_V1ntERKNS0_6marrayIiLm4EEE.exit: // CHECK-NEXT: ret void // @@ -789,7 +709,7 @@ SYCL_EXTERNAL marray lognot_i32x4_marr(marray a) { // Non-standard size - should use loop // CHECK-LABEL: @_Z17lognot_i32x5_marrN4sycl3_V16marrayIiLm5EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META186:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -797,13 +717,13 @@ SYCL_EXTERNAL marray lognot_i32x4_marr(marray a) { // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1NTERKNS0_6MARRAYIILM5EEE_EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META186]] +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META179]] // CHECK-NEXT: [[TOBOOL_NOT_I:%.*]] = icmp eq i32 [[TMP0]], 0 // CHECK-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] // CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[TOBOOL_NOT_I]] to i8 -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 1, !tbaa [[TBAA157]], !alias.scope [[META186]] +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 1, !tbaa [[TBAA151]], !alias.scope [[META179]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP189:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP182:![0-9]+]] // CHECK: _ZN4sycl3_V1ntERKNS0_6marrayIiLm5EEE.exit: // CHECK-NEXT: ret void // @@ -817,20 +737,9 @@ SYCL_EXTERNAL marray lognot_i32x5_marr(marray a) { // CHECK-LABEL: @_Z16uplus_f32x4_marrN4sycl3_V16marrayIfLm4EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META190:![0-9]+]]) -// CHECK-NEXT: br label [[FOR_COND_I:%.*]] -// CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PSERKNS0_6MARRAYIFLM4EEE_EXIT:%.*]] -// CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA89]], !noalias [[META190]] -// CHECK-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store float [[TMP0]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA89]], !alias.scope [[META190]] -// CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP193:![0-9]+]] -// CHECK: _ZN4sycl3_V1psERKNS0_6marrayIfLm4EEE.exit: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META183:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META183]] +// CHECK-NEXT: store <4 x float> [[TMP0]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META183]] // CHECK-NEXT: ret void // SYCL_EXTERNAL marray uplus_f32x4_marr(marray a) { @@ -840,7 +749,7 @@ SYCL_EXTERNAL marray uplus_f32x4_marr(marray a) { // Non-standard size - should use loop // CHECK-LABEL: @_Z16uplus_f32x5_marrN4sycl3_V16marrayIfLm5EEE( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META194:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META186:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] @@ -848,11 +757,11 @@ SYCL_EXTERNAL marray uplus_f32x4_marr(marray a) { // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PSERKNS0_6MARRAYIFLM5EEE_EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA89]], !noalias [[META194]] +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA88]], !noalias [[META186]] // CHECK-NEXT: [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store float [[TMP0]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA89]], !alias.scope [[META194]] +// CHECK-NEXT: store float [[TMP0]], ptr addrspace(4) [[ARRAYIDX_I5_I]], align 4, !tbaa [[TBAA88]], !alias.scope [[META186]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP197:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP189:![0-9]+]] // CHECK: _ZN4sycl3_V1psERKNS0_6marrayIfLm5EEE.exit: // CHECK-NEXT: ret void // @@ -898,7 +807,7 @@ SYCL_EXTERNAL void stream_i64x16_vec(vec *in, // CHECK-LABEL: @_Z18stream_i64x16_marrPN4sycl3_V16marrayIlLm16EEES3_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 8 dereferenceable(128) [[OUT:%.*]], ptr addrspace(4) noundef align 8 dereferenceable(128) [[IN:%.*]], i64 128, i1 false), !tbaa.struct [[TBAA_STRUCT198:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 8 dereferenceable(128) [[OUT:%.*]], ptr addrspace(4) noundef align 8 dereferenceable(128) [[IN:%.*]], i64 128, i1 false), !tbaa.struct [[TBAA_STRUCT190:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL void stream_i64x16_marr(marray *in, @@ -912,27 +821,15 @@ SYCL_EXTERNAL void stream_i64x16_marr(marray *in, SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z21arithmetic_u32x4_marrN4sycl3_V16marrayIjLm4EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META199:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1PLERKNS0_6MARRAYIJLM4EEES4__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX_I_I]], align 4, !tbaa [[TBAA5]], !noalias [[META199]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [4 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX_I7_I]], align 4, !tbaa [[TBAA5]], !noalias [[META199]] - // CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [4 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i32 [[ADD_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 4, !tbaa [[TBAA5]], !alias.scope [[META199]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP202:![0-9]+]] - // CHECK: _ZN4sycl3_V1plERKNS0_6marrayIjLm4EEES4_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z21arithmetic_u32x4_marrN4sycl3_V16marrayIjLm4EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META191:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META191]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[B:%.*]], align 4, !tbaa [[TBAA16]], !noalias [[META191]] +// CHECK-NEXT: [[ADD_I:%.*]] = add <4 x i32> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <4 x i32> [[ADD_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 4, !alias.scope [[META191]] +// CHECK-NEXT: ret void +// arithmetic_u32x4_marr(marray a, marray b) { return a + b; @@ -940,27 +837,15 @@ marray SYCL_EXTERNAL marray - // CHECK-LABEL: @_Z18bitwise_u64x8_marrN4sycl3_V16marrayImLm8EEES2_( - // CHECK-NEXT: entry: - // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META203:![0-9]+]]) - // CHECK-NEXT: br label [[FOR_COND_I:%.*]] - // CHECK: for.cond.i: - // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] - // CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 8 - // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1ANIMNS0_6MARRAYIMLM8EEEEES3_RKS3_S5__EXIT:%.*]] - // CHECK: for.body.i: - // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[A:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX_I_I]], align 8, !tbaa [[TBAA72]], !noalias [[META203]] - // CHECK-NEXT: [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds [8 x i8], ptr [[B:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[ARRAYIDX_I7_I]], align 8, !tbaa [[TBAA72]], !noalias [[META203]] - // CHECK-NEXT: [[AND_I:%.*]] = and i64 [[TMP0]], [[TMP1]] - // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds nuw [8 x i8], ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] - // CHECK-NEXT: store i64 [[AND_I]], ptr addrspace(4) [[ARRAYIDX_I8_I]], align 8, !tbaa [[TBAA72]], !alias.scope [[META203]] - // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 - // CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP206:![0-9]+]] - // CHECK: _ZN4sycl3_V1anImNS0_6marrayImLm8EEEEES3_RKS3_S5_.exit: - // CHECK-NEXT: ret void - // +// CHECK-LABEL: @_Z18bitwise_u64x8_marrN4sycl3_V16marrayImLm8EEES2_( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META194:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[A:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META194]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr [[B:%.*]], align 8, !tbaa [[TBAA16]], !noalias [[META194]] +// CHECK-NEXT: [[AND_I:%.*]] = and <8 x i64> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x i64> [[AND_I]], ptr addrspace(4) [[AGG_RESULT:%.*]], align 8, !alias.scope [[META194]] +// CHECK-NEXT: ret void +// bitwise_u64x8_marr(marray a, marray b) { return (a & b); } @@ -972,27 +857,26 @@ marray // Note: bool marray uses int8_t internally for ext_vector_type // CHECK-LABEL: @_Z19logical_boolx4_marrN4sycl3_V16marrayIbLm4EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META207:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META197:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A:%.*]], align 1, !tbaa [[TBAA16]], !noalias [[META197]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B:%.*]], align 1, !tbaa [[TBAA16]], !noalias [[META197]] +// CHECK-NEXT: [[CMP_I:%.*]] = icmp ne <4 x i8> [[TMP0]], zeroinitializer +// CHECK-NEXT: [[CMP3_I:%.*]] = icmp ne <4 x i8> [[TMP1]], zeroinitializer +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i1> [[CMP_I]], [[CMP3_I]] +// CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i8> // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: -// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] -// CHECK-NEXT: [[CMP_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 -// CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIBLM4EEES4__EXIT:%.*]] +// CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ] +// CHECK-NEXT: [[CMP4_I:%.*]] = icmp samesign ult i64 [[I_0_I]], 4 +// CHECK-NEXT: br i1 [[CMP4_I]], label [[FOR_BODY_I]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIBLM4EEES4__EXIT:%.*]] // CHECK: for.body.i: -// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA157]], !range [[RNG210:![0-9]+]], !noalias [[META207]], !noundef [[META9:![0-9]+]] -// CHECK-NEXT: [[LOADEDV_I:%.*]] = trunc nuw i8 [[TMP0]] to i1 -// CHECK-NEXT: br i1 [[LOADEDV_I]], label [[LAND_RHS_I:%.*]], label [[LAND_END_I]] -// CHECK: land.rhs.i: -// CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I8_I]], align 1, !tbaa [[TBAA157]], !range [[RNG210]], !noalias [[META207]], !noundef [[META9]] -// CHECK-NEXT: br label [[LAND_END_I]] -// CHECK: land.end.i: -// CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP1]], [[LAND_RHS_I]] ] -// CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA157]], !alias.scope [[META207]] +// CHECK-NEXT: [[VECEXT_I:%.*]] = extractelement <4 x i8> [[SEXT_I]], i64 [[I_0_I]] +// CHECK-NEXT: [[CMP5_I:%.*]] = icmp ne i8 [[VECEXT_I]], 0 +// CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] +// CHECK-NEXT: [[STOREDV_I:%.*]] = zext i1 [[CMP5_I]] to i8 +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA151]], !alias.scope [[META197]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP211:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP200:![0-9]+]] // CHECK: _ZN4sycl3_V1aaERKNS0_6marrayIbLm4EEES4_.exit: // CHECK-NEXT: ret void // @@ -1003,7 +887,7 @@ SYCL_EXTERNAL marray logical_boolx4_marr(marray a, // CHECK-LABEL: @_Z19logical_boolx5_marrN4sycl3_V16marrayIbLm5EEES2_( // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META212:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META201:![0-9]+]]) // CHECK-NEXT: br label [[FOR_COND_I:%.*]] // CHECK: for.cond.i: // CHECK-NEXT: [[I_0_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[LAND_END_I:%.*]] ] @@ -1011,19 +895,19 @@ SYCL_EXTERNAL marray logical_boolx4_marr(marray a, // CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I:%.*]], label [[_ZN4SYCL3_V1AAERKNS0_6MARRAYIBLM5EEES4__EXIT:%.*]] // CHECK: for.body.i: // CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA157]], !range [[RNG210]], !noalias [[META212]], !noundef [[META9]] +// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX_I_I]], align 1, !tbaa [[TBAA151]], !range [[RNG204:![0-9]+]], !noalias [[META201]], !noundef [[META9:![0-9]+]] // CHECK-NEXT: [[LOADEDV_I:%.*]] = trunc nuw i8 [[TMP0]] to i1 // CHECK-NEXT: br i1 [[LOADEDV_I]], label [[LAND_RHS_I:%.*]], label [[LAND_END_I]] // CHECK: land.rhs.i: // CHECK-NEXT: [[ARRAYIDX_I8_I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I8_I]], align 1, !tbaa [[TBAA157]], !range [[RNG210]], !noalias [[META212]], !noundef [[META9]] +// CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[ARRAYIDX_I8_I]], align 1, !tbaa [[TBAA151]], !range [[RNG204]], !noalias [[META201]], !noundef [[META9]] // CHECK-NEXT: br label [[LAND_END_I]] // CHECK: land.end.i: // CHECK-NEXT: [[STOREDV_I:%.*]] = phi i8 [ 0, [[FOR_BODY_I]] ], [ [[TMP1]], [[LAND_RHS_I]] ] // CHECK-NEXT: [[ARRAYIDX_I9_I:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(4) [[AGG_RESULT:%.*]], i64 [[I_0_I]] -// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA157]], !alias.scope [[META212]] +// CHECK-NEXT: store i8 [[STOREDV_I]], ptr addrspace(4) [[ARRAYIDX_I9_I]], align 1, !tbaa [[TBAA151]], !alias.scope [[META201]] // CHECK-NEXT: [[INC_I]] = add nuw nsw i64 [[I_0_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP215:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK: _ZN4sycl3_V1aaERKNS0_6marrayIbLm5EEES4_.exit: // CHECK-NEXT: ret void // From d9b207621fc3da4b169720ea0456b80adca4da70 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 24 Jun 2026 05:50:24 +0200 Subject: [PATCH 288/511] [Clang][SYCL] Update undocumented-attrs test for SYCL attributes (#22409) Test fails after 96eb0cb194. Add NativeCPULibclcCall, SYCLGlobalVar, SYCLIntelESimdVectorize, SYCLUsesAspects to undocumented list; remove ReqdWorkGroupSize and WorkGroupSizeHint (now documented); update total 84->86. Co-authored-by: Claude Sonnet 4.6 --- clang/test/AST/undocumented-attrs.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/test/AST/undocumented-attrs.cpp b/clang/test/AST/undocumented-attrs.cpp index eeebd7f938644..19b0c44d1d8d6 100644 --- a/clang/test/AST/undocumented-attrs.cpp +++ b/clang/test/AST/undocumented-attrs.cpp @@ -46,6 +46,7 @@ CHECK-NEXT: MayAlias CHECK-NEXT: Mips16 CHECK-NEXT: Mode CHECK-NEXT: Naked +CHECK-NEXT: NativeCPULibclcCall CHECK-NEXT: NeonPolyVectorType CHECK-NEXT: NeonVectorType CHECK-NEXT: NoCommon @@ -76,9 +77,11 @@ CHECK-NEXT: PtGuardedBy CHECK-NEXT: PtGuardedVar CHECK-NEXT: Pure CHECK-NEXT: ReentrantCapability -CHECK-NEXT: ReqdWorkGroupSize CHECK-NEXT: RequiresCapability CHECK-NEXT: ReturnsTwice +CHECK-NEXT: SYCLGlobalVar +CHECK-NEXT: SYCLIntelESimdVectorize +CHECK-NEXT: SYCLUsesAspects CHECK-NEXT: ScopedLockable CHECK-NEXT: Unavailable CHECK-NEXT: Uuid @@ -89,5 +92,4 @@ CHECK-NEXT: VectorSize CHECK-NEXT: Visibility CHECK-NEXT: WeakImport CHECK-NEXT: WeakRef -CHECK-NEXT: WorkGroupSizeHint -CHECK-NEXT: Total: 84 +CHECK-NEXT: Total: 86 From e72e9581b9afc8f914fd2c60688c7c840e675715 Mon Sep 17 00:00:00 2001 From: Frederick Vu <100011202+FrederickVu@users.noreply.github.com> Date: Tue, 23 Jun 2026 21:13:28 -0700 Subject: [PATCH 289/511] [AMDGPU] Reject src1 immediates with dpp when unsupported (#201494) This fixes an oversight in #164241. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 10 ++++++++++ .../test/CodeGen/AMDGPU/dpp_combine_gfx11.mir | 6 ++++-- .../MachineVerifier/AMDGPU/dpp-imm-src1.mir | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 llvm/test/MachineVerifier/AMDGPU/dpp-imm-src1.mir diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 43cdaa34cf3e3..48953cee43956 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -4782,6 +4782,12 @@ bool SIInstrInfo::isLiteralOperandLegal(const MCInstrDesc &InstDesc, bool SIInstrInfo::isImmOperandLegal(const MCInstrDesc &InstDesc, unsigned OpNo, int64_t ImmVal) const { + const unsigned Opc = InstDesc.getOpcode(); + int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1); + if (Src1Idx != -1 && isDPP(Opc) && !ST.hasDPPSrc1SGPR() && + OpNo == static_cast(Src1Idx)) + return false; + const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo]; if (isInlineConstant(ImmVal, OpInfo.OperandType)) { if (isMAI(InstDesc) && ST.hasMFMAInlineLiteralBug() && @@ -5396,6 +5402,10 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, ErrInfo = "DPP src1 cannot be SGPR on this subtarget"; return false; } + if (Src1MO.isImm()) { + ErrInfo = "DPP src1 cannot be an immediate on this subtarget"; + return false; + } } // Verify MIMG / VIMAGE / VSAMPLE diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir index ab44ea970025d..fd5a662815e21 100644 --- a/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir +++ b/llvm/test/CodeGen/AMDGPU/dpp_combine_gfx11.mir @@ -8,7 +8,8 @@ # GCN: %6:vgpr_32, %7:sreg_32_xm0_xexec = V_SUBBREV_U32_e64_dpp %3, %0, %1, %5, 1, 1, 15, 15, 1, implicit $exec # GCN: %8:vgpr_32 = V_CVT_PK_U8_F32_e64_dpp %3, 4, %0, 2, %2, 2, %1, 1, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %0, 0, 12345678, 0, 0, implicit $mode, implicit $exec -# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GFX_NO_SRC1_SGPR: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 2, 0, %7, 0, 0, implicit $mode, implicit $exec +# GFX_SRC1_SGPR: %12:vgpr_32 = V_MED3_F32_e64_dpp %3, 0, %1, 0, 2, 0, %7, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec name: vop3 tracksRegLiveness: true body: | @@ -42,7 +43,8 @@ body: | # GFX_SRC1_SGPR: %8:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %1, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec # GFX_NO_SRC1_SGPR: %10:vgpr_32 = V_MED3_F32_e64 0, %9, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec # GFX_SRC1_SGPR: %10:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, %2, 0, %3, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec -# GCN: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec +# GFX_NO_SRC1_SGPR: %12:vgpr_32 = V_MED3_F32_e64 0, %11, 0, 42, 0, %2, 0, 0, implicit $mode, implicit $exec +# GFX_SRC1_SGPR: %12:vgpr_32 = V_MED3_F32_e64_dpp %4, 0, %0, 0, 42, 0, %2, 0, 0, 1, 15, 15, 1, implicit $mode, implicit $exec # GCN: %14:vgpr_32 = V_MED3_F32_e64 0, %13, 0, 4242, 0, %2, 0, 0, implicit $mode, implicit $exec name: vop3_sgpr_src1 tracksRegLiveness: true diff --git a/llvm/test/MachineVerifier/AMDGPU/dpp-imm-src1.mir b/llvm/test/MachineVerifier/AMDGPU/dpp-imm-src1.mir new file mode 100644 index 0000000000000..a15ca1c394044 --- /dev/null +++ b/llvm/test/MachineVerifier/AMDGPU/dpp-imm-src1.mir @@ -0,0 +1,19 @@ +# RUN: not --crash llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --check-prefix=GFX_NO_SRC1_SGPR +# RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --check-prefix=GFX_SRC1_SGPR +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -filetype=null %s 2>&1 | FileCheck %s --check-prefix=GFX_SRC1_SGPR + +# GFX_SRC1_SGPR-NOT: *** Bad machine code: DPP src1 cannot be an immediate on this subtarget *** + +--- +name: dpp_imm_src1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + + %0:vgpr_32 = COPY $vgpr0 + + ; GFX_NO_SRC1_SGPR: *** Bad machine code: DPP src1 cannot be an immediate on this subtarget *** + ; GFX_NO_SRC1_SGPR: V_ADD_CO_U32_e64_dpp + %1:vgpr_32, %2:sreg_32_xexec = V_ADD_CO_U32_e64_dpp %0:vgpr_32, %0:vgpr_32, 42, 0, 228, 12, 15, 0, implicit $exec +... From bf1f61b26bcfff22e07e53afed8372092a8565ff Mon Sep 17 00:00:00 2001 From: flovent Date: Wed, 24 Jun 2026 12:47:37 +0800 Subject: [PATCH 290/511] [clang-tidy][NFC] Remove a wrong comment in ProTypeMemberInitCheck (#205477) `getAsCXXRecordDecl` will return nullptr for any dependent types. It's introduced by #192786, see https://github.com/llvm/llvm-project/pull/192786#issuecomment-4785223372 in original PR. --- .../clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp index 74cd62cd869f8..7dd5b0b6ea108 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProTypeMemberInitCheck.cpp @@ -572,8 +572,6 @@ void ProTypeMemberInitCheck::checkMissingBaseClassInitializer( for (const CXXCtorInitializer *Init : Ctor->inits()) if (Init->isBaseInitializer() && Init->isWritten()) { - // In template AST BaseInitializer could be generated too even if it's - // not target to base class. if (const CXXRecordDecl *CRD = Init->getBaseClass()->getAsCXXRecordDecl()) BasesToInit.erase(CRD->getCanonicalDecl()); From 10c0e8e99f67972960e9a4e4734870d737f168f1 Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Wed, 24 Jun 2026 12:00:08 +0530 Subject: [PATCH 291/511] [LV] Accept swapped operands in early-exit condition compare (#199989) Use m_c_ICmp so the load can be on either side of the icmp. --- .../Vectorize/LoopVectorizationLegality.cpp | 6 +- .../early_exit_store_legality.ll | 25 +++++++ .../LoopVectorize/early_exit_with_stores.ll | 72 +++++++++++++++++++ 3 files changed, 100 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index 5a4a419fbc80c..8d875b2b6e492 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1783,16 +1783,16 @@ bool LoopVectorizationLegality::canUncountableExitConditionLoadBeMoved( Instruction *L = nullptr; Value *Ptr = nullptr; Value *R = nullptr; + // The exit-condition load can appear on either side of the icmp. if (!match(Br->getCondition(), - m_OneUse(m_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))), - m_Value(R))))) { + m_OneUse(m_c_ICmp(m_OneUse(m_Instruction(L, m_Load(m_Value(Ptr)))), + m_Value(R))))) { reportVectorizationFailure( "Early exit loop with store but no supported condition load", "NoConditionLoadForEarlyExitLoop", ORE, TheLoop); return false; } - // FIXME: Don't rely on operand ordering for the comparison. if (!TheLoop->isLoopInvariant(R)) { reportVectorizationFailure( "Early exit loop with store but no supported condition load", diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll index 12065c01d47a4..dfa772887d73e 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_store_legality.ll @@ -54,6 +54,31 @@ exit: ret void } +;; Exit-condition load on the RHS of the icmp must still be accepted. +define void @swapped_cmp_operands(ptr noalias %array, ptr %pred) { +; CHECK-LABEL: LV: Checking a loop in 'swapped_cmp_operands' +; CHECK: LV: We can vectorize this loop! +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ] + %st.addr = getelementptr i16, ptr %array, i64 %iv + store i16 0, ptr %st.addr, align 2 + %ee.addr = getelementptr i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp slt i16 500, %ee.val + br i1 %ee.cond, label %exit, label %latch + +latch: + %iv.next = add i64 %iv, 1 + %latch.cond = icmp eq i64 %iv.next, 20 + br i1 %latch.cond, label %exit, label %loop + +exit: + ret void +} + ;; Avoid vectorization because we will either exit on the first iteration, or ;; never exit early. ;; We shouldn't see IR like this if LV-LICM has done its job. diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_with_stores.ll b/llvm/test/Transforms/LoopVectorize/early_exit_with_stores.ll index c2fd759504759..7e9a73d197a10 100644 --- a/llvm/test/Transforms/LoopVectorize/early_exit_with_stores.ll +++ b/llvm/test/Transforms/LoopVectorize/early_exit_with_stores.ll @@ -1316,4 +1316,76 @@ exit.uncountable: ret i32 1 } +;; Exit-condition icmp with the invariant on LHS and the load on RHS; must +;; vectorize the same as the canonical operand order. +define void @swapped_cmp_operands(ptr dereferenceable(40) noalias %array, ptr align 2 dereferenceable(40) readonly %pred) { +; CHECK-LABEL: define void @swapped_cmp_operands( +; CHECK-SAME: ptr noalias dereferenceable(40) [[ARRAY:%.*]], ptr readonly align 2 dereferenceable(40) [[PRED:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i16> splat (i16 500), [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP2]], i1 false) +; CHECK-NEXT: [[UNCOUNTABLE_EXIT_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 0, i64 [[TMP3]]) +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i16, ptr [[ARRAY]], i64 [[INDEX]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr align 2 [[TMP0]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]], <4 x i16> poison) +; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[WIDE_MASKED_LOAD]], splat (i16 1) +; CHECK-NEXT: call void @llvm.masked.store.v4i16.p0(<4 x i16> [[TMP4]], ptr align 2 [[TMP0]], <4 x i1> [[UNCOUNTABLE_EXIT_MASK]]) +; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP2]] +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 20 +; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]] +; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP3]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[TMP9]], 20 +; CHECK-NEXT: br i1 [[TMP10]], label %[[EXIT:.*]], label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: br label %[[FOR_BODY:.*]] +; CHECK: [[FOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[TMP9]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_INC:.*]] ] +; CHECK-NEXT: [[ST_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[ARRAY]], i64 [[IV]] +; CHECK-NEXT: [[DATA:%.*]] = load i16, ptr [[ST_ADDR]], align 2 +; CHECK-NEXT: [[INC:%.*]] = add nsw i16 [[DATA]], 1 +; CHECK-NEXT: store i16 [[INC]], ptr [[ST_ADDR]], align 2 +; CHECK-NEXT: [[EE_ADDR:%.*]] = getelementptr inbounds nuw i16, ptr [[PRED]], i64 [[IV]] +; CHECK-NEXT: [[EE_VAL:%.*]] = load i16, ptr [[EE_ADDR]], align 2 +; CHECK-NEXT: [[EE_COND:%.*]] = icmp slt i16 500, [[EE_VAL]] +; CHECK-NEXT: br i1 [[EE_COND]], label %[[EXIT]], label %[[FOR_INC]] +; CHECK: [[FOR_INC]]: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[COUNTED_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 20 +; CHECK-NEXT: br i1 [[COUNTED_COND]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] + %st.addr = getelementptr inbounds nuw i16, ptr %array, i64 %iv + %data = load i16, ptr %st.addr, align 2 + %inc = add nsw i16 %data, 1 + store i16 %inc, ptr %st.addr, align 2 + %ee.addr = getelementptr inbounds nuw i16, ptr %pred, i64 %iv + %ee.val = load i16, ptr %ee.addr, align 2 + %ee.cond = icmp slt i16 500, %ee.val + br i1 %ee.cond, label %exit, label %for.inc + +for.inc: + %iv.next = add nuw nsw i64 %iv, 1 + %counted.cond = icmp eq i64 %iv.next, 20 + br i1 %counted.cond, label %exit, label %for.body + +exit: + ret void +} + declare i64 @get_an_unknown_offset(); From 2a782a8b2b2ca1aa3180a6c756bfa2a8dc647acf Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 24 Jun 2026 02:03:41 -0500 Subject: [PATCH 292/511] [Hexagon] Fix unused variable in non-assert builds (KCFI) (#205491) Without asserts, we see failures like so: /repo/llvm/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp:982:43: error: unused variable 'NextI' [-Werror,-Wunused-variable] 982 | MachineBasicBlock::const_instr_iterator NextI = std::next(MI.getIterator()); | ^~~~~ 1 error generated. Mark NextI `maybe_unused` to address the issue. Fixes a regression introduced by f8aa5f66209d. --- llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 61ca9020bb689..3925f9fea668a 100644 --- a/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -979,7 +979,8 @@ void HexagonAsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, void HexagonAsmPrinter::LowerKCFI_CHECK(const MachineInstr &MI) { Register AddrReg = MI.getOperand(0).getReg(); const int64_t Type = MI.getOperand(1).getImm(); - MachineBasicBlock::const_instr_iterator NextI = std::next(MI.getIterator()); + [[maybe_unused]] MachineBasicBlock::const_instr_iterator NextI = + std::next(MI.getIterator()); assert(NextI != MI.getParent()->instr_end() && NextI->isCall() && "KCFI_CHECK not followed by a call instruction"); assert(NextI->getOperand(0).getReg() == AddrReg && From 13c45a6ca03a8104be40c59adf65eeb2365f2e16 Mon Sep 17 00:00:00 2001 From: Zhige Chen Date: Wed, 24 Jun 2026 15:08:56 +0800 Subject: [PATCH 293/511] [llubi] Implement memory manipulation intrinsics (#204932) Implement `memset`, `memcpy`, `memmove` intrinsics and their corresponding inline version. Note that the `isvolatile` argument is ignored and left for future PRs. --- llvm/test/tools/llubi/intr_memory.ll | 106 +++++++++++++++ llvm/test/tools/llubi/intr_memory_align_ub.ll | 46 +++++++ .../tools/llubi/intr_memory_constant_ub.ll | 15 +++ llvm/test/tools/llubi/intr_memory_len_ub.ll | 30 +++++ llvm/test/tools/llubi/intr_memory_ub.ll | 53 ++++++++ llvm/tools/llubi/lib/ExecutorBase.cpp | 14 +- llvm/tools/llubi/lib/Interpreter.cpp | 121 ++++++++++++++++++ 7 files changed, 378 insertions(+), 7 deletions(-) create mode 100644 llvm/test/tools/llubi/intr_memory.ll create mode 100644 llvm/test/tools/llubi/intr_memory_align_ub.ll create mode 100644 llvm/test/tools/llubi/intr_memory_constant_ub.ll create mode 100644 llvm/test/tools/llubi/intr_memory_len_ub.ll create mode 100644 llvm/test/tools/llubi/intr_memory_ub.ll diff --git a/llvm/test/tools/llubi/intr_memory.ll b/llvm/test/tools/llubi/intr_memory.ll new file mode 100644 index 0000000000000..5ab3a1d101fa1 --- /dev/null +++ b/llvm/test/tools/llubi/intr_memory.ll @@ -0,0 +1,106 @@ +; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6 +; RUN: llubi --verbose < %s 2>&1 | FileCheck %s + +define void @main() { + %src = alloca [8 x i8], align 1 + %dst = alloca [8 x i8], align 1 + call void @llvm.memset.p0.i16(ptr %src, i8 17, i16 8, i1 false) + call void @llvm.memcpy.p0.p0.i8(ptr %dst, ptr %src, i8 8, i1 false) + + %dst2 = getelementptr i8, ptr %dst, i64 2 + call void @llvm.memset.inline.p0.i8(ptr %dst2, i8 34, i8 3, i1 false) + + %src4 = getelementptr i8, ptr %src, i64 4 + %dst5 = getelementptr i8, ptr %dst, i64 5 + call void @llvm.memcpy.inline.p0.p0.i8(ptr %dst5, ptr %src4, i8 2, i1 false) + + %v0 = load i8, ptr %dst, align 1 + %p1 = getelementptr i8, ptr %dst, i64 1 + %v1 = load i8, ptr %p1, align 1 + %p2 = getelementptr i8, ptr %dst, i64 2 + %v2 = load i8, ptr %p2, align 1 + %p4 = getelementptr i8, ptr %dst, i64 4 + %v4 = load i8, ptr %p4, align 1 + %p5 = getelementptr i8, ptr %dst, i64 5 + %v5 = load i8, ptr %p5, align 1 + + %move = alloca [6 x i8], align 1 + store i8 1, ptr %move, align 1 + %m1 = getelementptr i8, ptr %move, i64 1 + store i8 2, ptr %m1, align 1 + %m2 = getelementptr i8, ptr %move, i64 2 + store i8 3, ptr %m2, align 1 + %m3 = getelementptr i8, ptr %move, i64 3 + store i8 4, ptr %m3, align 1 + %m4 = getelementptr i8, ptr %move, i64 4 + store i8 5, ptr %m4, align 1 + %m5 = getelementptr i8, ptr %move, i64 5 + store i8 6, ptr %m5, align 1 + call void @llvm.memmove.p0.p0.i8(ptr %m1, ptr %move, i8 5, i1 false) + + %mv0 = load i8, ptr %move, align 1 + %mv1 = load i8, ptr %m1, align 1 + %mv2 = load i8, ptr %m2, align 1 + %mv5 = load i8, ptr %m5, align 1 + + %prov_src = alloca ptr, align 8 + %prov_dst = alloca ptr, align 8 + %prov_ptr = alloca i8, align 1 + store ptr %prov_ptr, ptr %prov_src, align 8 + call void @llvm.memcpy.p0.p0.i8(ptr %prov_dst, ptr %prov_src, i8 8, i1 false) + %prov_copy = load ptr, ptr %prov_dst, align 8 + store i8 0, ptr %prov_copy, align 1 + + call void @llvm.memset.p0.i16(ptr poison, i8 0, i16 0, i1 false) + call void @llvm.memcpy.p0.p0.i8(ptr poison, ptr poison, i8 0, i1 false) + + ret void +} + +; CHECK: Entering function: main +; CHECK-NEXT: %src = alloca [8 x i8], align 1 => ptr 0x8 [src] +; CHECK-NEXT: %dst = alloca [8 x i8], align 1 => ptr 0x11 [dst] +; CHECK-NEXT: call void @llvm.memset.p0.i16(ptr %src, i8 17, i16 8, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i8(ptr %dst, ptr %src, i8 8, i1 false) +; CHECK-NEXT: %dst2 = getelementptr i8, ptr %dst, i64 2 => ptr 0x13 [dst + 2] +; CHECK-NEXT: call void @llvm.memset.inline.p0.i8(ptr %dst2, i8 34, i8 3, i1 false) +; CHECK-NEXT: %src4 = getelementptr i8, ptr %src, i64 4 => ptr 0xC [src + 4] +; CHECK-NEXT: %dst5 = getelementptr i8, ptr %dst, i64 5 => ptr 0x16 [dst + 5] +; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i8(ptr %dst5, ptr %src4, i8 2, i1 false) +; CHECK-NEXT: %v0 = load i8, ptr %dst, align 1 => i8 17 +; CHECK-NEXT: %p1 = getelementptr i8, ptr %dst, i64 1 => ptr 0x12 [dst + 1] +; CHECK-NEXT: %v1 = load i8, ptr %p1, align 1 => i8 17 +; CHECK-NEXT: %p2 = getelementptr i8, ptr %dst, i64 2 => ptr 0x13 [dst + 2] +; CHECK-NEXT: %v2 = load i8, ptr %p2, align 1 => i8 34 +; CHECK-NEXT: %p4 = getelementptr i8, ptr %dst, i64 4 => ptr 0x15 [dst + 4] +; CHECK-NEXT: %v4 = load i8, ptr %p4, align 1 => i8 34 +; CHECK-NEXT: %p5 = getelementptr i8, ptr %dst, i64 5 => ptr 0x16 [dst + 5] +; CHECK-NEXT: %v5 = load i8, ptr %p5, align 1 => i8 17 +; CHECK-NEXT: %move = alloca [6 x i8], align 1 => ptr 0x1A [move] +; CHECK-NEXT: store i8 1, ptr %move, align 1 +; CHECK-NEXT: %m1 = getelementptr i8, ptr %move, i64 1 => ptr 0x1B [move + 1] +; CHECK-NEXT: store i8 2, ptr %m1, align 1 +; CHECK-NEXT: %m2 = getelementptr i8, ptr %move, i64 2 => ptr 0x1C [move + 2] +; CHECK-NEXT: store i8 3, ptr %m2, align 1 +; CHECK-NEXT: %m3 = getelementptr i8, ptr %move, i64 3 => ptr 0x1D [move + 3] +; CHECK-NEXT: store i8 4, ptr %m3, align 1 +; CHECK-NEXT: %m4 = getelementptr i8, ptr %move, i64 4 => ptr 0x1E [move + 4] +; CHECK-NEXT: store i8 5, ptr %m4, align 1 +; CHECK-NEXT: %m5 = getelementptr i8, ptr %move, i64 5 => ptr 0x1F [move + 5] +; CHECK-NEXT: store i8 6, ptr %m5, align 1 +; CHECK-NEXT: call void @llvm.memmove.p0.p0.i8(ptr %m1, ptr %move, i8 5, i1 false) +; CHECK-NEXT: %mv0 = load i8, ptr %move, align 1 => i8 1 +; CHECK-NEXT: %mv1 = load i8, ptr %m1, align 1 => i8 1 +; CHECK-NEXT: %mv2 = load i8, ptr %m2, align 1 => i8 2 +; CHECK-NEXT: %mv5 = load i8, ptr %m5, align 1 => i8 5 +; CHECK-NEXT: %prov_src = alloca ptr, align 8 => ptr 0x28 [prov_src] +; CHECK-NEXT: %prov_dst = alloca ptr, align 8 => ptr 0x38 [prov_dst] +; CHECK-NEXT: %prov_ptr = alloca i8, align 1 => ptr 0x41 [prov_ptr] +; CHECK-NEXT: store ptr %prov_ptr, ptr %prov_src, align 8 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i8(ptr %prov_dst, ptr %prov_src, i8 8, i1 false) +; CHECK-NEXT: %prov_copy = load ptr, ptr %prov_dst, align 8 => ptr 0x41 [prov_ptr] +; CHECK-NEXT: store i8 0, ptr %prov_copy, align 1 +; CHECK-NEXT: call void @llvm.memset.p0.i16(ptr poison, i8 0, i16 0, i1 false) +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i8(ptr poison, ptr poison, i8 0, i1 false) +; CHECK-NEXT: ret void +; CHECK-NEXT: Exiting function: main diff --git a/llvm/test/tools/llubi/intr_memory_align_ub.ll b/llvm/test/tools/llubi/intr_memory_align_ub.ll new file mode 100644 index 0000000000000..d6dc7aa49b2a8 --- /dev/null +++ b/llvm/test/tools/llubi/intr_memory_align_ub.ll @@ -0,0 +1,46 @@ +; RUN: sed 's/OP/memcpy_misaligned_dst/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=COPY-DST +; RUN: sed 's/OP/memcpy_misaligned_src/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=COPY-SRC +; RUN: sed 's/OP/memset_misaligned_dst/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=SET-DST + +define void @main() { + call void @OP() + ret void +} + +define void @memcpy_misaligned_dst() { + %src = alloca [4 x i8], align 2 + %dst = alloca [4 x i8], align 2 + %dst1 = getelementptr i8, ptr %dst, i64 1 + call void @llvm.memcpy.p0.p0.i8(ptr align 2 %dst1, ptr align 1 %src, i8 1, i1 false) + ret void +} + +define void @memcpy_misaligned_src() { + %src = alloca [4 x i8], align 1 + %dst = alloca [4 x i8], align 2 + %src1 = getelementptr i8, ptr %src, i64 1 + call void @llvm.memcpy.p0.p0.i8(ptr align 1 %dst, ptr align 2 %src1, i8 1, i1 false) + ret void +} + +define void @memset_misaligned_dst() { + %dst = alloca [4 x i8], align 1 + %dst1 = getelementptr i8, ptr %dst, i64 1 + call void @llvm.memset.p0.i8(ptr align 2 %dst1, i8 0, i8 1, i1 false) + ret void +} + +; COPY-DST: Entering function: main +; COPY-DST: Entering function: memcpy_misaligned_dst +; COPY-DST: Immediate UB detected: Memory transfer intrinsic with poison destination pointer. +; COPY-DST: error: Execution of function 'main' failed. + +; COPY-SRC: Entering function: main +; COPY-SRC: Entering function: memcpy_misaligned_src +; COPY-SRC: Immediate UB detected: Memory transfer intrinsic with poison source pointer. +; COPY-SRC: error: Execution of function 'main' failed. + +; SET-DST: Entering function: main +; SET-DST: Entering function: memset_misaligned_dst +; SET-DST: Immediate UB detected: memset called with poison destination pointer. +; SET-DST: error: Execution of function 'main' failed. diff --git a/llvm/test/tools/llubi/intr_memory_constant_ub.ll b/llvm/test/tools/llubi/intr_memory_constant_ub.ll new file mode 100644 index 0000000000000..4f377dd93bfa1 --- /dev/null +++ b/llvm/test/tools/llubi/intr_memory_constant_ub.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6 +; RUN: not llubi --verbose < %s 2>&1 | FileCheck %s + +@constant_dst = constant [4 x i8] zeroinitializer + +define void @main() { + call void @llvm.memset.p0.i8(ptr @constant_dst, i8 0, i8 1, i1 false) + ret void +} + +; CHECK: Entering function: main +; CHECK-NEXT: Stacktrace: +; CHECK-NEXT: #0 call void @llvm.memset.p0.i8(ptr @constant_dst, i8 0, i8 1, i1 false) at @main +; CHECK-NEXT: Immediate UB detected: Try to write to a constant memory object: ptr 0x8 [@constant_dst]. +; CHECK-NEXT: error: Execution of function 'main' failed. diff --git a/llvm/test/tools/llubi/intr_memory_len_ub.ll b/llvm/test/tools/llubi/intr_memory_len_ub.ll new file mode 100644 index 0000000000000..3153bc44864db --- /dev/null +++ b/llvm/test/tools/llubi/intr_memory_len_ub.ll @@ -0,0 +1,30 @@ +; RUN: sed 's/OP/memcpy_len_overflow/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=COPY +; RUN: sed 's/OP/memset_len_overflow/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=SET + +define void @main() { + call void @OP() + ret void +} + +define void @memcpy_len_overflow() { + %src = alloca [4 x i8], align 1 + %dst = alloca [4 x i8], align 1 + call void @llvm.memcpy.p0.p0.i128(ptr %dst, ptr %src, i128 18446744073709551616, i1 false) + ret void +} + +define void @memset_len_overflow() { + %dst = alloca [4 x i8], align 1 + call void @llvm.memset.p0.i128(ptr %dst, i8 0, i128 18446744073709551616, i1 false) + ret void +} + +; COPY: Entering function: main +; COPY: Entering function: memcpy_len_overflow +; COPY: Immediate UB detected: Memory transfer intrinsic length overflows uint64_t. +; COPY: error: Execution of function 'main' failed. + +; SET: Entering function: main +; SET: Entering function: memset_len_overflow +; SET: Immediate UB detected: memset called with length overflows uint64_t. +; SET: error: Execution of function 'main' failed. diff --git a/llvm/test/tools/llubi/intr_memory_ub.ll b/llvm/test/tools/llubi/intr_memory_ub.ll new file mode 100644 index 0000000000000..945870300a84d --- /dev/null +++ b/llvm/test/tools/llubi/intr_memory_ub.ll @@ -0,0 +1,53 @@ +; RUN: sed 's/OP/memcpy_overlap/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=OVERLAP +; RUN: sed 's/OP/memcpy_inline_poison_src/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=COPY-SRC +; RUN: sed 's/OP/memset_poison_dst/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=SET-DST +; RUN: sed 's/OP/memset_poison_len/g' %s | not llubi --verbose 2>&1 | FileCheck %s --check-prefix=SET-LEN + +define void @main() { + call void @OP() + ret void +} + +define void @memcpy_overlap() { + %buf = alloca [4 x i8], align 1 + %dst = getelementptr i8, ptr %buf, i64 1 + call void @llvm.memcpy.p0.p0.i8(ptr %dst, ptr %buf, i8 2, i1 false) + ret void +} + +define void @memcpy_inline_poison_src() { + %dst = alloca [4 x i8], align 1 + call void @llvm.memcpy.inline.p0.p0.i8(ptr %dst, ptr poison, i8 1, i1 false) + ret void +} + +define void @memset_poison_dst() { + call void @llvm.memset.p0.i8(ptr poison, i8 0, i8 1, i1 false) + ret void +} + +define void @memset_poison_len() { + %dst = alloca [4 x i8], align 1 + call void @llvm.memset.p0.i8(ptr %dst, i8 0, i8 poison, i1 false) + ret void +} + +; OVERLAP: Entering function: main +; OVERLAP: Entering function: memcpy_overlap +; OVERLAP: Immediate UB detected: memcpy with overlapping source and destination. +; OVERLAP: error: Execution of function 'main' failed. + +; COPY-SRC: Entering function: main +; COPY-SRC: Entering function: memcpy_inline_poison_src +; COPY-SRC: Immediate UB detected: Memory transfer intrinsic with poison source pointer. +; COPY-SRC: error: Execution of function 'main' failed. + +; SET-DST: Entering function: main +; SET-DST: Entering function: memset_poison_dst +; SET-DST: Immediate UB detected: memset called with poison destination pointer. +; SET-DST: error: Execution of function 'main' failed. + +; SET-LEN: Entering function: main +; SET-LEN: Entering function: memset_poison_len +; SET-LEN: Immediate UB detected: memset called with poison length. +; SET-LEN: error: Execution of function 'main' failed. diff --git a/llvm/tools/llubi/lib/ExecutorBase.cpp b/llvm/tools/llubi/lib/ExecutorBase.cpp index 482930af81b5b..da49a934123c9 100644 --- a/llvm/tools/llubi/lib/ExecutorBase.cpp +++ b/llvm/tools/llubi/lib/ExecutorBase.cpp @@ -77,6 +77,12 @@ ExecutorBase::verifyMemAccess(const Pointer &Ptr, uint64_t AccessSize, return {}; } + if (IsStore && MO->isConstant()) { + reportImmediateUB() << "Try to write to a constant memory object: " << Ptr + << "."; + return {}; + } + if (Address.countr_zero() < Log2(Alignment)) { reportImmediateUB() << "Misaligned memory access. Address: 0x" << Twine::utohexstr(Address.getZExtValue()) @@ -144,14 +150,8 @@ void ExecutorBase::store(const AnyValue &Ptr, Align Alignment, if (auto [MO, Offset] = verifyMemAccess( PtrVal, Ctx.getEffectiveTypeStoreSize(ValTy), Alignment, /*IsStore=*/true); - MO) { - if (MO->isConstant()) { - reportImmediateUB() << "Try to write to a constant memory object: " - << PtrVal << "."; - return; - } + MO) Ctx.store(*MO, Offset, Val, ValTy); - } } void ExecutorBase::requestProgramExit(ProgramExitInfo::ProgramExitKind Kind, diff --git a/llvm/tools/llubi/lib/Interpreter.cpp b/llvm/tools/llubi/lib/Interpreter.cpp index 61059371dcb58..cc3df95916db7 100644 --- a/llvm/tools/llubi/lib/Interpreter.cpp +++ b/llvm/tools/llubi/lib/Interpreter.cpp @@ -19,12 +19,14 @@ #include "llvm/Analysis/VectorUtils.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Support/Allocator.h" #include "llvm/TargetParser/Triple.h" #include +#include #include namespace llvm::ubi { @@ -698,6 +700,118 @@ class InstExecutor : public InstVisitor, return V.asInteger(); } + AnyValue callMemTransferIntrinsic(CallBase &CB, ArrayRef Args, + Intrinsic::ID IID) { + const AnyValue &Dest = Args[0]; + const AnyValue &Src = Args[1]; + const AnyValue &Length = Args[2]; + // TODO: Handle isvolatile argument. + if (Length.isPoison()) { + reportImmediateUB() << "Memory transfer intrinsic with poison length."; + return AnyValue(); + } + + const APInt &LengthInt = Args[2].asInteger(); + if (LengthInt.getActiveBits() > 64) { + reportImmediateUB() + << "Memory transfer intrinsic length overflows uint64_t."; + return AnyValue(); + } + + const uint64_t Len = LengthInt.getZExtValue(); + if (Len == 0) + return AnyValue(); + + if (Dest.isPoison()) { + reportImmediateUB() + << "Memory transfer intrinsic with poison destination pointer."; + return AnyValue(); + } + + if (Src.isPoison()) { + reportImmediateUB() + << "Memory transfer intrinsic with poison source pointer."; + return AnyValue(); + } + + const Pointer &DstPtr = Dest.asPointer(); + const Pointer &SrcPtr = Src.asPointer(); + + Align DstAlign = CB.getParamAlign(0).valueOrOne(); + Align SrcAlign = CB.getParamAlign(1).valueOrOne(); + + auto [SrcMO, SrcOffset] = + verifyMemAccess(SrcPtr, Len, SrcAlign, /*IsStore=*/false); + if (!SrcMO) + return AnyValue(); + + auto [DstMO, DstOffset] = + verifyMemAccess(DstPtr, Len, DstAlign, /*IsStore=*/true); + if (!DstMO) + return AnyValue(); + + if (IID == Intrinsic::memcpy || IID == Intrinsic::memcpy_inline) { + if (SrcMO == DstMO && SrcOffset != DstOffset) { + const uint64_t SrcEnd = SrcOffset + Len; + const uint64_t DstEnd = DstOffset + Len; + if (SrcOffset < DstEnd && DstOffset < SrcEnd) { + reportImmediateUB() + << "memcpy with overlapping source and destination."; + return AnyValue(); + } + } + } + + MutableArrayRef DstBytes = DstMO->getBytes().slice(DstOffset, Len); + if (SrcMO->getState() == MemoryObjectState::Dead) { + fill(DstBytes, Byte::poison()); + } else { + ArrayRef SrcBytes = SrcMO->getBytes().slice(SrcOffset, Len); + std::memmove(DstBytes.data(), SrcBytes.data(), Len * sizeof(Byte)); + } + return AnyValue(); + } + + AnyValue callMemSetIntrinsic(CallBase &CB, ArrayRef Args) { + const AnyValue &Dest = Args[0]; + const AnyValue &Val = Args[1]; + const AnyValue &Length = Args[2]; + + if (Length.isPoison()) { + reportImmediateUB() << "memset called with poison length."; + return AnyValue(); + } + + const APInt &LengthInt = Length.asInteger(); + if (LengthInt.getActiveBits() > 64) { + reportImmediateUB() << "memset called with length overflows uint64_t."; + return AnyValue(); + } + + const uint64_t Len = LengthInt.getZExtValue(); + if (Len == 0) + return AnyValue(); + + if (Dest.isPoison()) { + reportImmediateUB() << "memset called with poison destination pointer."; + return AnyValue(); + } + + const Pointer &DstPtr = Dest.asPointer(); + + Align DstAlign = CB.getParamAlign(0).valueOrOne(); + auto [DstMO, DstOffset] = + verifyMemAccess(DstPtr, Len, DstAlign, /*IsStore=*/true); + if (!DstMO) + return AnyValue(); + + Byte FillByte = Val.isPoison() + ? Byte::poison() + : Byte::concrete(Val.asInteger().getZExtValue()); + fill(DstMO->getBytes().slice(DstOffset, Len), FillByte); + return AnyValue(); + } + public: InstExecutor(Context &C, EventHandler &H, Function &F, ArrayRef Args, AnyValue &RetVal) @@ -1502,6 +1616,13 @@ class InstExecutor : public InstVisitor, return V; }); } + case Intrinsic::memcpy: + case Intrinsic::memcpy_inline: + case Intrinsic::memmove: + return callMemTransferIntrinsic(CB, Args, IID); + case Intrinsic::memset: + case Intrinsic::memset_inline: + return callMemSetIntrinsic(CB, Args); default: Handler.onUnrecognizedInstruction(CB); setFailed(); From 0ae1673a317fb43a58085a7713295717f7def90c Mon Sep 17 00:00:00 2001 From: firmiana402 Date: Wed, 24 Jun 2026 15:13:41 +0800 Subject: [PATCH 294/511] [DebugInfo][LLDB] Fix generic DW_OP_const handling (#204353) This PR fixes two related DWARF constant-handling bugs that were blocking each other. First, LLDB's DWARF expression evaluator in [`DWARFExpression.cpp`](https://github.com/llvm/llvm-project/blob/main/lldb/source/Expression/DWARFExpression.cpp) handled `DW_OP_constu` and `DW_OP_consts` without going through `to_generic`. Under DWARF, these operators push a generic value: an address-sized integral value with unspecified signedness. That means the result should be truncated to the target address size (via `to_generic`). Second, LLVM already had a producer-side issue tracked as [#47431](https://github.com/llvm/llvm-project/issues/47431): on 32-bit targets, LLVM could emit `DW_OP_consts` / `DW_OP_constu` for source integer constants wider than the target generic type. If LLDB were fixed alone, those producer-emitted constants would become truncated as DWARF requires, exposing incorrect debug info for wide source values. This patch fixes both sides together. ## What Changed On the LLDB consumer side: - `DW_OP_constu` now uses `to_generic`. - `DW_OP_consts` now uses `to_generic`. - The corresponding LLDB DWARF expression tests were updated to expect address-sized generic values. On the LLVM producer side: - Wide integer debug-location constants that cannot be represented by the target generic type are emitted as `DW_OP_implicit_value` instead of `DW_OP_const*`. - This preserves the source value bytes instead of relying on an address-sized DWARF generic constant. - The producer-side change is limited to complete constant values, where there are no remaining `DIExpression` operations. ## Validation Locally verified with: ```text build/tools/lldb/unittests/Expression/ExpressionTests --gtest_filter='DWARFExpression.*' 74 tests passed build/bin/llvm-lit -sv llvm/test/DebugInfo/X86/constant-loclist.ll 1 test passed ninja -C build check-lldb -j12 No unexpected failures ninja -C build check-all -j12 Completed with one unrelated local failure in Clang Tools :: clang-doc/DR-141990.cpp, caused by host warning-option output. No DebugInfo, DWARF, LLDB expression, or AsmPrinter-related failures were observed. ``` --- lldb/source/Expression/DWARFExpression.cpp | 6 ++-- .../Expression/DWARFExpressionTest.cpp | 6 ++-- llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 36 +++++++++++++++++-- .../CodeGen/AsmPrinter/DwarfExpression.cpp | 22 ++++++++++++ llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h | 3 ++ llvm/test/DebugInfo/X86/constant-loclist.ll | 32 +++++++++++++++++ 6 files changed, 95 insertions(+), 10 deletions(-) diff --git a/lldb/source/Expression/DWARFExpression.cpp b/lldb/source/Expression/DWARFExpression.cpp index dd436e0c8afd9..7966673bb65ed 100644 --- a/lldb/source/Expression/DWARFExpression.cpp +++ b/lldb/source/Expression/DWARFExpression.cpp @@ -1411,13 +1411,11 @@ llvm::Expected DWARFExpression::Evaluate( case DW_OP_const8s: stack.push_back(to_generic(static_cast(op->getRawOperand(0)))); break; - // These should also use to_generic, but we can't do that due to a - // producer-side bug in llvm. See llvm.org/pr48087. case DW_OP_constu: - stack.push_back(Scalar(op->getRawOperand(0))); + stack.push_back(to_generic(op->getRawOperand(0))); break; case DW_OP_consts: - stack.push_back(Scalar(static_cast(op->getRawOperand(0)))); + stack.push_back(to_generic(static_cast(op->getRawOperand(0)))); break; case DW_OP_dup: diff --git a/lldb/unittests/Expression/DWARFExpressionTest.cpp b/lldb/unittests/Expression/DWARFExpressionTest.cpp index 6d4a624505390..7feacb2b9da24 100644 --- a/lldb/unittests/Expression/DWARFExpressionTest.cpp +++ b/lldb/unittests/Expression/DWARFExpressionTest.cpp @@ -388,13 +388,13 @@ TEST(DWARFExpression, DW_OP_const) { Evaluate({DW_OP_const8s, 0x00, 0x11, 0x22, 0x33, 0x44, 0x42, 0x47, 0x88}), ExpectScalar(0x33221100)); - // Don't truncate to address size for compatibility with clang (pr48087). + // LEB constants also push generic values, so truncate to address size. EXPECT_THAT_EXPECTED( Evaluate({DW_OP_constu, 0x81, 0x82, 0x84, 0x88, 0x90, 0xa0, 0x40}), - ExpectScalar(0x01010101010101)); + ExpectScalar(32, 0x01010101, false)); EXPECT_THAT_EXPECTED( Evaluate({DW_OP_consts, 0x81, 0x82, 0x84, 0x88, 0x90, 0xa0, 0x40}), - ExpectScalar(0xffff010101010101)); + ExpectScalar(32, 0x01010101, true)); } TEST(DWARFExpression, DW_OP_skip) { diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3479485cf866c..64b6b27346575 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MD5.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" @@ -3251,10 +3252,39 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT, &AP](const DbgValueLocEntry &Entry, DIExpressionCursor &Cursor) -> bool { if (Entry.isInt()) { - if (BT && (BT->getEncoding() == dwarf::DW_ATE_boolean)) + if (BT && (BT->getEncoding() == dwarf::DW_ATE_boolean)) { DwarfExpr.addBooleanConstant(Entry.getInt()); - else if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed || - BT->getEncoding() == dwarf::DW_ATE_signed_char)) + return true; + } + + bool IsSigned = BT && (BT->getEncoding() == dwarf::DW_ATE_signed || + BT->getEncoding() == dwarf::DW_ATE_signed_char); + if (BT && AP.getDwarfVersion() >= 4 && + !AP.getDwarfDebug()->tuneForSCE() && !Cursor) { + // DW_OP_const* pushes a generic, address-sized value. For a wider + // source integer value that cannot fit in the generic type, use + // DW_OP_implicit_value to preserve the source bytes instead. Keep this + // limited to complete constant values: SCE tuning already avoids + // DW_OP_implicit_value for compatibility, and expressions with + // remaining operations may need a scalar stack value rather than an + // implicit value block. + unsigned GenericBitSize = AP.MAI.getCodePointerSize() * 8; + uint64_t TypeBitSize = BT->getSizeInBits(); + bool IsByteSized = TypeBitSize % 8 == 0; + bool IsOutOfRange = + IsSigned ? !isIntN(GenericBitSize, Entry.getInt()) + : !isUIntN(GenericBitSize, + static_cast(Entry.getInt())); + if (TypeBitSize > GenericBitSize && IsByteSized && IsOutOfRange) { + DwarfExpr.addImplicitValue( + APInt(static_cast(TypeBitSize), + static_cast(Entry.getInt()), IsSigned), + AP); + return true; + } + } + + if (IsSigned) DwarfExpr.addSignedConstant(Entry.getInt()); else DwarfExpr.addUnsignedConstant(Entry.getInt()); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp index d9b2deb6ccf3d..ef83a877ad37c 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp @@ -236,6 +236,28 @@ void DwarfExpression::addUnsignedConstant(const APInt &Value) { } } +void DwarfExpression::addImplicitValue(const APInt &Value, + const AsmPrinter &AP) { + assert(isImplicitLocation() || isUnknownLocation()); + assert(DwarfVersion >= 4); + + APInt API = Value; + unsigned NumBytes = API.getBitWidth() / 8; + assert(API.getBitWidth() == NumBytes * 8 && + "implicit value must be byte-sized"); + + emitOp(dwarf::DW_OP_implicit_value); + emitUnsigned(NumBytes); + + // The loop below is emitting the value starting at the least significant + // byte, so byte-swap first for big-endian targets. + if (AP.getDataLayout().isBigEndian()) + API = API.byteSwap(); + + for (unsigned I = 0; I < NumBytes; ++I) + emitData1(API.extractBits(8, I * 8).getZExtValue()); +} + void DwarfExpression::addConstantFP(const APFloat &APF, const AsmPrinter &AP) { assert(isImplicitLocation() || isUnknownLocation()); APInt API = APF.bitcastToAPInt(); diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h index c4929aed1c197..2525938c78d86 100644 --- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h +++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h @@ -240,6 +240,9 @@ class DwarfExpression { /// Emit an unsigned constant. void addUnsignedConstant(const APInt &Value); + /// Emit an implicit value. + void addImplicitValue(const APInt &Value, const AsmPrinter &AP); + /// Emit an floating point constant. void addConstantFP(const APFloat &Value, const AsmPrinter &AP); diff --git a/llvm/test/DebugInfo/X86/constant-loclist.ll b/llvm/test/DebugInfo/X86/constant-loclist.ll index 630c652ba0ccf..702f25ab2b71d 100644 --- a/llvm/test/DebugInfo/X86/constant-loclist.ll +++ b/llvm/test/DebugInfo/X86/constant-loclist.ll @@ -1,4 +1,6 @@ ; RUN: llc -filetype=obj %s -o - -experimental-debug-variable-locations=true | llvm-dwarfdump -v -debug-info - | FileCheck %s +; RUN: llc -filetype=obj %s -o - -experimental-debug-variable-locations=true -mtriple=i386-unknown-linux-gnu -dwarf-version=4 | llvm-dwarfdump -v -debug-info - | FileCheck %s --check-prefix=I386 +; RUN: llc -filetype=obj %s -o - -experimental-debug-variable-locations=true -mtriple=i386-unknown-linux-gnu -dwarf-version=2 -debugger-tune=sce | llvm-dwarfdump -v -debug-info - | FileCheck %s --check-prefix=I386-COMPAT ; A hand-written testcase to check 64-bit constant handling in location lists. @@ -18,6 +20,36 @@ ; CHECK-NEXT: {{.*}}: DW_OP_constu 0x4000000000000000) ; CHECK-NEXT: DW_AT_name {{.*}}"d" +; On 32-bit targets, source integer constants that do not fit in the +; address-sized DWARF generic type must use DW_OP_implicit_value to avoid +; truncating the source value. +; I386: .debug_info contents: +; I386: DW_TAG_variable +; I386-NEXT: DW_AT_location +; I386-NEXT: {{.*}}: DW_OP_lit0 +; I386-NEXT: {{.*}}: DW_OP_implicit_value 0x8 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x40) +; I386-NEXT: DW_AT_name {{.*}}"u" +; I386: DW_TAG_variable +; I386-NEXT: DW_AT_location +; I386-NEXT: {{.*}}: DW_OP_consts +0 +; I386-NEXT: {{.*}}: DW_OP_implicit_value 0x8 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x40) +; I386-NEXT: DW_AT_name {{.*}}"i" + +; DWARF v2 and SCE tuning keep using DW_OP_const* for compatibility. +; I386-COMPAT: .debug_info contents: +; I386-COMPAT: DW_TAG_variable +; I386-COMPAT-NEXT: DW_AT_location +; I386-COMPAT-NEXT: {{.*}}: DW_OP_lit0 +; I386-COMPAT-NOT: DW_OP_implicit_value +; I386-COMPAT-NEXT: {{.*}}: DW_OP_constu 0x4000000000000000) +; I386-COMPAT-NEXT: DW_AT_name {{.*}}"u" +; I386-COMPAT: DW_TAG_variable +; I386-COMPAT-NEXT: DW_AT_location +; I386-COMPAT-NEXT: {{.*}}: DW_OP_consts +0 +; I386-COMPAT-NOT: DW_OP_implicit_value +; I386-COMPAT-NEXT: {{.*}}: DW_OP_consts +4611686018427387904) +; I386-COMPAT-NEXT: DW_AT_name {{.*}}"i" + source_filename = "test.c" target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx" From 337e747e467e292a5e006715b99f018395a5354a Mon Sep 17 00:00:00 2001 From: Madhur Amilkanthwar Date: Wed, 24 Jun 2026 13:12:54 +0530 Subject: [PATCH 295/511] [LoopFusion][NFC] Share fusion tail between guarded and unguarded paths (#205492) `performFusion()` and `fuseGuardedLoops()` carried two character-for-character identical tails: header-PHI migration plus latch rewiring, and the SCEV-forget / block-merge / latch-merge finalization. Extract them into `rewireFusedHeaderPHIsAndLatches()` and `finalizeFusedLoop()` and call both from each path. --- llvm/lib/Transforms/Scalar/LoopFuse.cpp | 317 +++++++++--------------- 1 file changed, 119 insertions(+), 198 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopFuse.cpp b/llvm/lib/Transforms/Scalar/LoopFuse.cpp index 1b83c971c01bf..012a54de173e7 100644 --- a/llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ b/llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -1354,6 +1354,121 @@ struct LoopFuser { } } + /// Move FC1's header PHIs into FC0's header, insert the loop-carried PHIs + /// needed to keep SSA valid when FC0 exits without taking its back-edge, and + /// rewire both latches to form the fused loop. Latch dominator-tree updates + /// are appended to \p TreeUpdates for the caller to apply. + void rewireFusedHeaderPHIsAndLatches( + const FusionCandidate &FC0, const FusionCandidate &FC1, + const SmallVectorImpl &OriginalFC0PHIs, + SmallVectorImpl &TreeUpdates) { + // Moves the phi nodes from the second to the first loops header block. + while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { + if (SE.isSCEVable(PHI->getType())) + SE.forgetValue(PHI); + if (PHI->hasNUsesOrMore(1)) + PHI->moveBefore(FC0.Header->getFirstInsertionPt()); + else + PHI->eraseFromParent(); + } + + // Introduce new phi nodes in the second loop header to ensure + // exiting the first and jumping to the header of the second does not break + // the SSA property of the phis originally in the first loop. See also the + // comment above. + BasicBlock::iterator L1HeaderIP = FC1.Header->begin(); + for (PHINode *LCPHI : OriginalFC0PHIs) { + int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); + assert(L1LatchBBIdx >= 0 && + "Expected loop carried value to be rewired at this point!"); + + Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); + + PHINode *L1HeaderPHI = + PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0"); + L1HeaderPHI->insertBefore(L1HeaderIP); + L1HeaderPHI->addIncoming(LCV, FC0.Latch); + L1HeaderPHI->addIncoming(PoisonValue::get(LCV->getType()), + FC0.ExitingBlock); + + LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); + } + + // Replace latch terminator destinations. + FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); + FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); + + // Modify the latch branch of FC0 to be unconditional as both successors of + // the branch are the same. + simplifyLatchBranch(FC0); + + // If FC0.Latch and FC0.ExitingBlock are the same then we have already + // performed the updates above. + if (FC0.Latch != FC0.ExitingBlock) + TreeUpdates.emplace_back(DominatorTree::UpdateType( + DominatorTree::Insert, FC0.Latch, FC1.Header)); + + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC0.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, + FC1.Latch, FC0.Header)); + TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, + FC1.Latch, FC1.Header)); + } + + /// Forget cached SCEV state for both loops, move all of FC1's blocks and + /// child loops into FC0, erase the now-empty FC1, and merge the latches. + /// Returns the fused loop (FC0.L). + Loop *finalizeFusedLoop(const FusionCandidate &FC0, + const FusionCandidate &FC1) { + // Is there a way to keep SE up-to-date so we don't need to forget the loops + // and rebuild the information in subsequent passes of fusion? + // Note: Need to forget the loops before merging the loop latches, as + // mergeLatch may remove the only block in FC1. + SE.forgetLoop(FC1.L); + SE.forgetLoop(FC0.L); + + // Merge the loops. + SmallVector Blocks(FC1.L->blocks()); + for (BasicBlock *BB : Blocks) { + FC0.L->addBlockEntry(BB); + FC1.L->removeBlockFromLoop(BB); + if (LI.getLoopFor(BB) != FC1.L) + continue; + LI.changeLoopFor(BB, FC0.L); + } + while (!FC1.L->isInnermost()) { + const auto &ChildLoopIt = FC1.L->begin(); + Loop *ChildLoop = *ChildLoopIt; + FC1.L->removeChildLoop(ChildLoopIt); + FC0.L->addChildLoop(ChildLoop); + } + + // Delete the now empty loop L1. + LI.erase(FC1.L); + + // Forget block dispositions as well, so that there are no dangling + // pointers to erased/free'ed blocks. It should be done after mergeLatch() + // since merging the latches may affect the dispositions. + SE.forgetBlockAndLoopDispositions(); + + // Move instructions from FC0.Latch to FC1.Latch. + // Note: mergeLatch requires an updated DT. + mergeLatch(FC0, FC1); + +#ifndef NDEBUG + assert(!verifyFunction(*FC0.Header->getParent(), &errs())); + assert(DT.verify(DominatorTree::VerificationLevel::Fast)); + assert(PDT.verify()); + LI.verify(DT); + SE.verify(); +#endif + + LLVM_DEBUG(dbgs() << "Fusion done:\n"); + + return FC0.L; + } + /// Fuse two fusion candidates, creating a new fused loop. /// /// This method contains the mechanics of fusing two loops, represented by \p @@ -1472,58 +1587,7 @@ struct LoopFuser { TreeUpdates.emplace_back(DominatorTree::UpdateType( DominatorTree::Delete, FC1.Preheader, FC1.Header)); - // Moves the phi nodes from the second to the first loops header block. - while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { - if (SE.isSCEVable(PHI->getType())) - SE.forgetValue(PHI); - if (PHI->hasNUsesOrMore(1)) - PHI->moveBefore(FC0.Header->getFirstInsertionPt()); - else - PHI->eraseFromParent(); - } - - // Introduce new phi nodes in the second loop header to ensure - // exiting the first and jumping to the header of the second does not break - // the SSA property of the phis originally in the first loop. See also the - // comment above. - BasicBlock::iterator L1HeaderIP = FC1.Header->begin(); - for (PHINode *LCPHI : OriginalFC0PHIs) { - int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); - assert(L1LatchBBIdx >= 0 && - "Expected loop carried value to be rewired at this point!"); - - Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); - - PHINode *L1HeaderPHI = - PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0"); - L1HeaderPHI->insertBefore(L1HeaderIP); - L1HeaderPHI->addIncoming(LCV, FC0.Latch); - L1HeaderPHI->addIncoming(PoisonValue::get(LCV->getType()), - FC0.ExitingBlock); - - LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); - } - - // Replace latch terminator destinations. - FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); - FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - - // Modify the latch branch of FC0 to be unconditional as both successors of - // the branch are the same. - simplifyLatchBranch(FC0); - - // If FC0.Latch and FC0.ExitingBlock are the same then we have already - // performed the updates above. - if (FC0.Latch != FC0.ExitingBlock) - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.Latch, FC1.Header)); - - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC0.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, - FC1.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC1.Latch, FC1.Header)); + rewireFusedHeaderPHIsAndLatches(FC0, FC1, OriginalFC0PHIs, TreeUpdates); // Update DT/PDT DTU.applyUpdates(TreeUpdates); @@ -1537,52 +1601,7 @@ struct LoopFuser { DTU.flush(); - // Is there a way to keep SE up-to-date so we don't need to forget the loops - // and rebuild the information in subsequent passes of fusion? - // Note: Need to forget the loops before merging the loop latches, as - // mergeLatch may remove the only block in FC1. - SE.forgetLoop(FC1.L); - SE.forgetLoop(FC0.L); - - // Merge the loops. - SmallVector Blocks(FC1.L->blocks()); - for (BasicBlock *BB : Blocks) { - FC0.L->addBlockEntry(BB); - FC1.L->removeBlockFromLoop(BB); - if (LI.getLoopFor(BB) != FC1.L) - continue; - LI.changeLoopFor(BB, FC0.L); - } - while (!FC1.L->isInnermost()) { - const auto &ChildLoopIt = FC1.L->begin(); - Loop *ChildLoop = *ChildLoopIt; - FC1.L->removeChildLoop(ChildLoopIt); - FC0.L->addChildLoop(ChildLoop); - } - - // Delete the now empty loop L1. - LI.erase(FC1.L); - - // Forget block dispositions as well, so that there are no dangling - // pointers to erased/free'ed blocks. It should be done after mergeLatch() - // since merging the latches may affect the dispositions. - SE.forgetBlockAndLoopDispositions(); - - // Move instructions from FC0.Latch to FC1.Latch. - // Note: mergeLatch requires an updated DT. - mergeLatch(FC0, FC1); - -#ifndef NDEBUG - assert(!verifyFunction(*FC0.Header->getParent(), &errs())); - assert(DT.verify(DominatorTree::VerificationLevel::Fast)); - assert(PDT.verify()); - LI.verify(DT); - SE.verify(); -#endif - - LLVM_DEBUG(dbgs() << "Fusion done:\n"); - - return FC0.L; + return finalizeFusedLoop(FC0, FC1); } /// Report details on loop fusion opportunities. @@ -1758,60 +1777,7 @@ struct LoopFuser { TreeUpdates.emplace_back(DominatorTree::UpdateType( DominatorTree::Delete, FC1.Preheader, FC1.Header)); - // Moves the phi nodes from the second to the first loops header block. - while (PHINode *PHI = dyn_cast(&FC1.Header->front())) { - if (SE.isSCEVable(PHI->getType())) - SE.forgetValue(PHI); - if (PHI->hasNUsesOrMore(1)) - PHI->moveBefore(FC0.Header->getFirstInsertionPt()); - else - PHI->eraseFromParent(); - } - - // Introduce new phi nodes in the second loop header to ensure - // exiting the first and jumping to the header of the second does not break - // the SSA property of the phis originally in the first loop. See also the - // comment above. - BasicBlock::iterator L1HeaderIP = FC1.Header->begin(); - for (PHINode *LCPHI : OriginalFC0PHIs) { - int L1LatchBBIdx = LCPHI->getBasicBlockIndex(FC1.Latch); - assert(L1LatchBBIdx >= 0 && - "Expected loop carried value to be rewired at this point!"); - - Value *LCV = LCPHI->getIncomingValue(L1LatchBBIdx); - - PHINode *L1HeaderPHI = - PHINode::Create(LCV->getType(), 2, LCPHI->getName() + ".afterFC0"); - L1HeaderPHI->insertBefore(L1HeaderIP); - L1HeaderPHI->addIncoming(LCV, FC0.Latch); - L1HeaderPHI->addIncoming(PoisonValue::get(LCV->getType()), - FC0.ExitingBlock); - - LCPHI->setIncomingValue(L1LatchBBIdx, L1HeaderPHI); - } - - // Update the latches - - // Replace latch terminator destinations. - FC0.Latch->getTerminator()->replaceUsesOfWith(FC0.Header, FC1.Header); - FC1.Latch->getTerminator()->replaceUsesOfWith(FC1.Header, FC0.Header); - - // Modify the latch branch of FC0 to be unconditional as both successors of - // the branch are the same. - simplifyLatchBranch(FC0); - - // If FC0.Latch and FC0.ExitingBlock are the same then we have already - // performed the updates above. - if (FC0.Latch != FC0.ExitingBlock) - TreeUpdates.emplace_back(DominatorTree::UpdateType( - DominatorTree::Insert, FC0.Latch, FC1.Header)); - - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC0.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Insert, - FC1.Latch, FC0.Header)); - TreeUpdates.emplace_back(DominatorTree::UpdateType(DominatorTree::Delete, - FC1.Latch, FC1.Header)); + rewireFusedHeaderPHIsAndLatches(FC0, FC1, OriginalFC0PHIs, TreeUpdates); // All done // Apply the updates to the Dominator Tree and cleanup. @@ -1834,52 +1800,7 @@ struct LoopFuser { DTU.deleteBB(FC0.ExitBlock); DTU.flush(); - // Is there a way to keep SE up-to-date so we don't need to forget the loops - // and rebuild the information in subsequent passes of fusion? - // Note: Need to forget the loops before merging the loop latches, as - // mergeLatch may remove the only block in FC1. - SE.forgetLoop(FC1.L); - SE.forgetLoop(FC0.L); - - // Merge the loops. - SmallVector Blocks(FC1.L->blocks()); - for (BasicBlock *BB : Blocks) { - FC0.L->addBlockEntry(BB); - FC1.L->removeBlockFromLoop(BB); - if (LI.getLoopFor(BB) != FC1.L) - continue; - LI.changeLoopFor(BB, FC0.L); - } - while (!FC1.L->isInnermost()) { - const auto &ChildLoopIt = FC1.L->begin(); - Loop *ChildLoop = *ChildLoopIt; - FC1.L->removeChildLoop(ChildLoopIt); - FC0.L->addChildLoop(ChildLoop); - } - - // Delete the now empty loop L1. - LI.erase(FC1.L); - - // Forget block dispositions as well, so that there are no dangling - // pointers to erased/free'ed blocks. It should be done after mergeLatch() - // since merging the latches may affect the dispositions. - SE.forgetBlockAndLoopDispositions(); - - // Move instructions from FC0.Latch to FC1.Latch. - // Note: mergeLatch requires an updated DT. - mergeLatch(FC0, FC1); - -#ifndef NDEBUG - assert(!verifyFunction(*FC0.Header->getParent(), &errs())); - assert(DT.verify(DominatorTree::VerificationLevel::Fast)); - assert(PDT.verify()); - LI.verify(DT); - SE.verify(); -#endif - - LLVM_DEBUG(dbgs() << "Fusion done:\n"); - - return FC0.L; + return finalizeFusedLoop(FC0, FC1); } }; } // namespace From da77f74edfa601cbc91f164ed6caad5f019d3536 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Wed, 24 Jun 2026 16:07:05 +0800 Subject: [PATCH 296/511] [RISCV] Convert opaque pointers in vp-combine-reverse-load.ll. NFC (#205498) --- .../RISCV/rvv/vp-combine-reverse-load.ll | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll index 831012ed0fef3..9bf38753e5054 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-reverse-load.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=riscv32 -mattr=+f,+v -verify-machineinstrs < %s | FileCheck %s ; RUN: llc -mtriple=riscv64 -mattr=+f,+v -verify-machineinstrs < %s | FileCheck %s -define @test_reverse_load_combiner(* %ptr, i32 zeroext %evl) { +define @test_reverse_load_combiner(ptr %ptr, i32 zeroext %evl) { ; CHECK-LABEL: test_reverse_load_combiner: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a2, a1, 2 @@ -12,12 +12,12 @@ define @test_reverse_load_combiner(* %p ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlse32.v v8, (a0), a2 ; CHECK-NEXT: ret - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, splat (i1 true), i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(ptr %ptr, splat (i1 true), i32 %evl) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl) ret %rev } -define @test_load_mask_is_vp_reverse(* %ptr, %mask, i32 zeroext %evl) { +define @test_load_mask_is_vp_reverse(ptr %ptr, %mask, i32 zeroext %evl) { ; CHECK-LABEL: test_load_mask_is_vp_reverse: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a2, a1, 2 @@ -28,12 +28,12 @@ define @test_load_mask_is_vp_reverse(* ; CHECK-NEXT: vlse32.v v8, (a0), a2, v0.t ; CHECK-NEXT: ret %loadmask = call @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl) - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(ptr %ptr, %loadmask, i32 %evl) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl) ret %rev } -define @test_load_mask_is_vp_reverse_with_mask(* %ptr, %mask, %revmask, i32 zeroext %evl) { +define @test_load_mask_is_vp_reverse_with_mask(ptr %ptr, %mask, %revmask, i32 zeroext %evl) { ; CHECK-LABEL: test_load_mask_is_vp_reverse_with_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: slli a2, a1, 2 @@ -44,12 +44,12 @@ define @test_load_mask_is_vp_reverse_with_mask( @llvm.experimental.vp.reverse.nxv2i1( %mask, %revmask, i32 %evl) - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(ptr %ptr, %loadmask, i32 %evl) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl) ret %rev } -define @test_load_mask_not_all_one(* %ptr, %notallones, i32 zeroext %evl) { +define @test_load_mask_not_all_one(ptr %ptr, %notallones, i32 zeroext %evl) { ; CHECK-LABEL: test_load_mask_not_all_one: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma @@ -59,7 +59,7 @@ define @test_load_mask_not_all_one(* %p ; CHECK-NEXT: vrsub.vx v10, v8, a1, v0.t ; CHECK-NEXT: vrgather.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %notallones, i32 %evl) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(ptr %ptr, %notallones, i32 %evl) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, %notallones, i32 %evl) ret %rev } @@ -79,7 +79,7 @@ define @test_load_reverse_mask_not_all_one(ptr %ptr, %rev } -define @test_different_evl(* %ptr, %mask, i32 zeroext %evl1, i32 zeroext %evl2) { +define @test_different_evl(ptr %ptr, %mask, i32 zeroext %evl1, i32 zeroext %evl2) { ; CHECK-LABEL: test_different_evl: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma @@ -101,7 +101,7 @@ define @test_different_evl(* %ptr, @llvm.experimental.vp.reverse.nxv2i1( %mask, splat (i1 true), i32 %evl1) - %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(* %ptr, %loadmask, i32 %evl2) + %load = call @llvm.vp.load.nxv2f32.p0nxv2f32(ptr %ptr, %loadmask, i32 %evl2) %rev = call @llvm.experimental.vp.reverse.nxv2f32( %load, splat (i1 true), i32 %evl2) ret %rev } From 3dce6e626ae715477cb95480e25643d5ffe4c3aa Mon Sep 17 00:00:00 2001 From: AZero13 Date: Wed, 24 Jun 2026 04:20:50 -0400 Subject: [PATCH 297/511] [AArch64] Run cleanup one final time after peephole (#199711) It's a lightweight pass. Should always be the last SSA pass since peephole can end up making some instructions dead. --- .../Target/AArch64/AArch64TargetMachine.cpp | 4 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 + .../aarch64-neon-vector-insert-uaddlv.ll | 44 +++++++++---------- llvm/test/CodeGen/AArch64/fabs-fp128.ll | 5 +-- 4 files changed, 28 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 568563cf53220..c20fb31ab8854 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -815,8 +815,10 @@ void AArch64PassConfig::addMachineSSAOptimization() { // Run default MachineSSAOptimization first. TargetPassConfig::addMachineSSAOptimization(); - if (TM->getOptLevel() != CodeGenOptLevel::None) + if (TM->getOptLevel() != CodeGenOptLevel::None) { addPass(createAArch64MIPeepholeOptLegacyPass()); + addPass(&DeadMachineInstructionElimID); + } } bool AArch64PassConfig::addILPOpts() { diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index 08d3b94530d14..ed2453941866a 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -172,6 +172,7 @@ ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: AArch64 MI Peephole Optimization pass +; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: AArch64 Dead register definitions ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 91eda8d552397..72270e3be443f 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: st1.s { v1 }[2], [x8] -; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: st1.s { v0 }[2], [x8] +; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: @@ -283,9 +283,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: ucvtf.4s v2, v2 -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: ushll.4s v1, v2, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: @@ -389,9 +389,9 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -408,13 +408,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] +; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: @@ -435,9 +435,9 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ucvtf.4s v0, v0 +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret entry: @@ -454,14 +454,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 -; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] +; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll index 903aa8adf7085..17b75f89b32da 100644 --- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll +++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll @@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-LABEL: fabs_v4f128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, v0.d[1] -; CHECK-GI-NEXT: mov v7.d[0], v0.d[0] +; CHECK-GI-NEXT: mov v0.d[0], v0.d[0] ; CHECK-GI-NEXT: mov x9, v1.d[1] ; CHECK-GI-NEXT: mov x10, v2.d[1] ; CHECK-GI-NEXT: mov x11, v3.d[1] @@ -152,14 +152,13 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-NEXT: mov v2.d[0], v2.d[0] ; CHECK-GI-NEXT: mov v3.d[0], v3.d[0] ; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff -; CHECK-GI-NEXT: mov v7.d[1], x8 +; CHECK-GI-NEXT: mov v0.d[1], x8 ; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov v3.d[1], x10 -; CHECK-GI-NEXT: mov v0.16b, v7.16b ; CHECK-GI-NEXT: ret entry: %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a) From 7fe0b4ce1f9b959e3fcb4fd79d3cd5cef6494ca0 Mon Sep 17 00:00:00 2001 From: Haohai Wen Date: Wed, 24 Jun 2026 16:23:52 +0800 Subject: [PATCH 298/511] [ObjectYAML][NFC] Derive BBAddrMap section size from the CBA offset (#204056) Add the CBA offset delta to sh_size once at the end instead of after each write. --- llvm/lib/ObjectYAML/ELFEmitter.cpp | 41 ++++++++++++++---------------- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 4d423e71e959e..6dc162356ab40 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1462,6 +1462,7 @@ void ELFState::writeSectionContent( PGOAnalyses = &Section.PGOAnalyses.value(); } + uint64_t CurrentOffset = CBA.getOffset(); for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (E.Version > 5) @@ -1469,14 +1470,10 @@ void ELFState::writeSectionContent( << static_cast(E.Version) << "; encoding using the most recent version"; CBA.write(E.Version); - SHeader.sh_size += 1; - if (E.Version < 5) { + if (E.Version < 5) CBA.write(static_cast(E.Feature)); - SHeader.sh_size += 1; - } else { + else CBA.write(E.Feature, ELFT::Endianness); - SHeader.sh_size += 2; - } auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature); if (!FeatureOrErr) { // Invalid feature: warn and skip the entry. @@ -1496,7 +1493,7 @@ void ELFState::writeSectionContent( // 'NumBBRanges' field when specified. uint64_t NumBBRanges = E.NumBBRanges.value_or(E.BBRanges ? E.BBRanges->size() : 0); - SHeader.sh_size += CBA.writeULEB128(NumBBRanges); + CBA.writeULEB128(NumBBRanges); } if (!E.BBRanges) continue; @@ -1511,31 +1508,30 @@ void ELFState::writeSectionContent( // specified. uint64_t NumBlocks = BBR.NumBlocks.value_or(BBR.BBEntries ? BBR.BBEntries->size() : 0); - SHeader.sh_size += sizeof(uintX_t) + CBA.writeULEB128(NumBlocks); + CBA.writeULEB128(NumBlocks); // Write all BBEntries in this BBRange. if (!BBR.BBEntries || FeatureOrErr->OmitBBEntries) continue; for (const BBAddrMapYAML::BBAddrMapEntry::BBEntry &BBE : *BBR.BBEntries) { ++TotalNumBlocks; if (E.Version > 1) - SHeader.sh_size += CBA.writeULEB128(BBE.ID); - SHeader.sh_size += CBA.writeULEB128(BBE.AddressOffset); + CBA.writeULEB128(BBE.ID); + CBA.writeULEB128(BBE.AddressOffset); if (EmitCallsiteEndOffsets) { size_t NumCallsiteEndOffsets = BBE.CallsiteEndOffsets ? BBE.CallsiteEndOffsets->size() : 0; - SHeader.sh_size += CBA.writeULEB128(NumCallsiteEndOffsets); + CBA.writeULEB128(NumCallsiteEndOffsets); if (BBE.CallsiteEndOffsets) { for (uint32_t Offset : *BBE.CallsiteEndOffsets) - SHeader.sh_size += CBA.writeULEB128(Offset); + CBA.writeULEB128(Offset); } } - SHeader.sh_size += CBA.writeULEB128(BBE.Size); - SHeader.sh_size += CBA.writeULEB128(BBE.Metadata); + CBA.writeULEB128(BBE.Size); + CBA.writeULEB128(BBE.Metadata); if (FeatureOrErr->BBHash || BBE.Hash.has_value()) { uint64_t Hash = BBE.Hash.has_value() ? BBE.Hash.value() : llvm::yaml::Hex64(0); CBA.write(Hash, ELFT::Endianness); - SHeader.sh_size += 8; } } } @@ -1544,7 +1540,7 @@ void ELFState::writeSectionContent( const BBAddrMapYAML::PGOAnalysisMapEntry &PGOEntry = PGOAnalyses->at(Idx); if (PGOEntry.FuncEntryCount) - SHeader.sh_size += CBA.writeULEB128(*PGOEntry.FuncEntryCount); + CBA.writeULEB128(*PGOEntry.FuncEntryCount); if (!PGOEntry.PGOBBEntries) continue; @@ -1560,20 +1556,21 @@ void ELFState::writeSectionContent( for (const auto &PGOBBE : PGOBBEntries) { if (PGOBBE.BBFreq) - SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); + CBA.writeULEB128(*PGOBBE.BBFreq); if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value()) - SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0)); + CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0)); if (PGOBBE.Successors) { - SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); + CBA.writeULEB128(PGOBBE.Successors->size()); for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) { - SHeader.sh_size += CBA.writeULEB128(ID); - SHeader.sh_size += CBA.writeULEB128(BrProb); + CBA.writeULEB128(ID); + CBA.writeULEB128(BrProb); if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value()) - SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0)); + CBA.writeULEB128(PostLinkBrFreq.value_or(0)); } } } } + SHeader.sh_size += CBA.getOffset() - CurrentOffset; } template From 2eb20486f1f3dc5551e674a8a019421e1c2b39fd Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Wed, 24 Jun 2026 09:33:58 +0100 Subject: [PATCH 299/511] [libc] Refactor qsort code (#198781) This patch makes the following changes: - Refactor the internal sorting functions to reduce code duplication. - Move the testing machinery done for the testing of `qsort_r` to a shared place. These changes are done in anticipation to the introduction of Annex K's `qsort_s`. This function shares most of its semantics with `qsort_r`, therefore most of the testing logic can be shared between the two. Besides, `qsort`, `qsort_r` and `qsort_s` are all very similar, hence we can attempt to reduce duplication a bit more. --- libc/src/stdlib/qsort.cpp | 6 +- libc/src/stdlib/qsort_r.cpp | 7 +- libc/src/stdlib/qsort_util.h | 25 +++- libc/test/src/stdlib/CMakeLists.txt | 3 +- libc/test/src/stdlib/QsortReentrantTest.h | 156 ++++++++++++++++++++++ libc/test/src/stdlib/qsort_r_test.cpp | 136 +------------------ 6 files changed, 184 insertions(+), 149 deletions(-) create mode 100644 libc/test/src/stdlib/QsortReentrantTest.h diff --git a/libc/src/stdlib/qsort.cpp b/libc/src/stdlib/qsort.cpp index f66b686d4e54b..46a74fb9118a3 100644 --- a/libc/src/stdlib/qsort.cpp +++ b/libc/src/stdlib/qsort.cpp @@ -18,11 +18,7 @@ LLVM_LIBC_FUNCTION(void, qsort, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *))) { - const auto is_less = [compare](const void *a, const void *b) -> bool { - return compare(a, b) < 0; - }; - - internal::unstable_sort(array, array_size, elem_size, is_less); + internal::unstable_sort(array, array_size, elem_size, compare); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_r.cpp b/libc/src/stdlib/qsort_r.cpp index 47448201eddbd..65afcee77885d 100644 --- a/libc/src/stdlib/qsort_r.cpp +++ b/libc/src/stdlib/qsort_r.cpp @@ -18,12 +18,7 @@ LLVM_LIBC_FUNCTION(void, qsort_r, (void *array, size_t array_size, size_t elem_size, int (*compare)(const void *, const void *, void *), void *arg)) { - - const auto is_less = [compare, arg](const void *a, const void *b) -> bool { - return compare(a, b, arg) < 0; - }; - - internal::unstable_sort(array, array_size, elem_size, is_less); + internal::unstable_sort(array, array_size, elem_size, compare, arg); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/stdlib/qsort_util.h b/libc/src/stdlib/qsort_util.h index 7882b829d3274..2f0438d624d14 100644 --- a/libc/src/stdlib/qsort_util.h +++ b/libc/src/stdlib/qsort_util.h @@ -64,10 +64,29 @@ LIBC_INLINE void unstable_sort_impl(void *array, size_t array_len, } template +LIBC_INLINE void unstable_sort_dispatch(void *array, size_t array_len, + size_t elem_size, F is_less) { + constexpr bool USE_QUICK_SORT = (LIBC_QSORT_IMPL == LIBC_QSORT_QUICK_SORT); + unstable_sort_impl(array, array_len, elem_size, is_less); +} + +template +LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size, + CmpFn compare) { + const auto is_less = [compare](const void *a, const void *b) -> bool { + return compare(a, b) < 0; + }; + unstable_sort_dispatch(array, array_len, elem_size, is_less); +} + +template LIBC_INLINE void unstable_sort(void *array, size_t array_len, size_t elem_size, - const F &is_less) { -#define USE_QUICK_SORT ((LIBC_QSORT_IMPL) == (LIBC_QSORT_QUICK_SORT)) - unstable_sort_impl(array, array_len, elem_size, is_less); + CmpFn compare, void *context) { + const auto is_less = [compare, context](const void *a, + const void *b) -> bool { + return compare(a, b, context) < 0; + }; + unstable_sort_dispatch(array, array_len, elem_size, is_less); } } // namespace internal diff --git a/libc/test/src/stdlib/CMakeLists.txt b/libc/test/src/stdlib/CMakeLists.txt index 95e5a3ccc2bb5..2a7cd51d21e12 100644 --- a/libc/test/src/stdlib/CMakeLists.txt +++ b/libc/test/src/stdlib/CMakeLists.txt @@ -360,8 +360,9 @@ add_libc_test( libc-stdlib-tests SRCS qsort_r_test.cpp + HDRS + QsortReentrantTest.h DEPENDS - libc.hdr.types.size_t libc.src.stdlib.qsort_r ) diff --git a/libc/test/src/stdlib/QsortReentrantTest.h b/libc/test/src/stdlib/QsortReentrantTest.h new file mode 100644 index 0000000000000..85d549098c740 --- /dev/null +++ b/libc/test/src/stdlib/QsortReentrantTest.h @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains a template class for testing reentrant qsort functions. +/// +//===----------------------------------------------------------------------===// + +#include "test/UnitTest/Test.h" + +/// Provides shared tests for reentrant qsort variants. +/// +/// The fixture verifies that a qsort-like implementation correctly handles +/// sorted data, reverse-sorted data, and comparators that dispatch through a +/// type-erased context pointer. +template +class QsortReentrantTest : public LIBC_NAMESPACE::testing::Test { +private: + static int int_compare_count(const void *l, const void *r, void *count_arg) { + int li = *static_cast(l); + int ri = *static_cast(r); + SizeTy *count = static_cast(count_arg); + *count = *count + 1; + if (li == ri) + return 0; + if (li > ri) + return 1; + return -1; + } + + struct PriorityVal { + int priority; + int size; + }; + + static int compare_priority_val(const PriorityVal *l, const PriorityVal *r) { + // Subtracting the priorities is unsafe, but it's fine for this test. + int priority_diff = l->priority - r->priority; + if (priority_diff != 0) { + return priority_diff; + } + if (l->size == r->size) + return 0; + if (l->size > r->size) + return 1; + return -1; + } + + // The following test is intended to mimic the CPP library pattern of having a + // comparison function that takes a specific type, which is passed to a + // library that then needs to sort an array of that type. The library can't + // safely pass the comparison function to qsort because a function that takes + // const T* being cast to a function that takes const void* is undefined + // behavior. The safer pattern is to pass a type erased comparator that calls + // into the typed comparator to qsort_r. + template + static int type_erased_comp(const void *l, const void *r, + void *erased_func_ptr) { + using TypedComp = int (*)(const T *, const T *); + TypedComp typed_func_ptr = reinterpret_cast(erased_func_ptr); + const T *lt = static_cast(l); + const T *rt = static_cast(r); + return typed_func_ptr(lt, rt); + } + +public: + void sorted_array(QsortFnTy func) { + int array[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, + 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, + 1133, 1135, 1155, 1170, 1171, 11100, 12310}; + constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(int); + + SizeTy count = 0; + + func(array, ARRAY_SIZE, sizeof(int), int_compare_count, &count); + + ASSERT_LE(array[0], 10); + ASSERT_LE(array[1], 23); + ASSERT_LE(array[2], 33); + ASSERT_LE(array[3], 35); + ASSERT_LE(array[4], 55); + ASSERT_LE(array[5], 70); + ASSERT_LE(array[6], 71); + ASSERT_LE(array[7], 100); + ASSERT_LE(array[8], 110); + ASSERT_LE(array[9], 123); + ASSERT_LE(array[10], 133); + ASSERT_LE(array[11], 135); + ASSERT_LE(array[12], 155); + ASSERT_LE(array[13], 170); + ASSERT_LE(array[14], 171); + ASSERT_LE(array[15], 1100); + ASSERT_LE(array[16], 1110); + ASSERT_LE(array[17], 1123); + ASSERT_LE(array[18], 1133); + ASSERT_LE(array[19], 1135); + ASSERT_LE(array[20], 1155); + ASSERT_LE(array[21], 1170); + ASSERT_LE(array[22], 1171); + ASSERT_LE(array[23], 11100); + ASSERT_LE(array[24], 12310); + + // This is a sorted list, but there still have to have been at least N - 1 + // comparisons made. + ASSERT_GE(count, ARRAY_SIZE - 1); + } + + void reverse_sorted_array(QsortFnTy func) { + int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, + 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; + constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(int); + + SizeTy count = 0; + + func(array, ARRAY_SIZE, sizeof(int), int_compare_count, &count); + + for (int i = 0; i < int(ARRAY_SIZE - 1); ++i) + ASSERT_LE(array[i], i + 1); + + ASSERT_GE(count, ARRAY_SIZE); + } + + void safe_type_erasure(QsortFnTy func) { + PriorityVal array[5] = { + {10, 3}, {1, 10}, {-1, 100}, {10, 0}, {3, 3}, + }; + constexpr SizeTy ARRAY_SIZE = sizeof(array) / sizeof(PriorityVal); + + func(array, ARRAY_SIZE, sizeof(PriorityVal), type_erased_comp, + reinterpret_cast(compare_priority_val)); + + EXPECT_EQ(array[0].priority, -1); + EXPECT_EQ(array[0].size, 100); + EXPECT_EQ(array[1].priority, 1); + EXPECT_EQ(array[1].size, 10); + EXPECT_EQ(array[2].priority, 3); + EXPECT_EQ(array[2].size, 3); + EXPECT_EQ(array[3].priority, 10); + EXPECT_EQ(array[3].size, 0); + EXPECT_EQ(array[4].priority, 10); + EXPECT_EQ(array[4].size, 3); + } +}; + +#define QSORTREENTRANT_TEST(name, func, sizetype) \ + using LlvmLibc##name##Test = QsortReentrantTest; \ + TEST_F(LlvmLibc##name##Test, SortedArray) { sorted_array(func); } \ + TEST_F(LlvmLibc##name##Test, ReverseSortedArray) { \ + reverse_sorted_array(func); \ + } \ + TEST_F(LlvmLibc##name##Test, SafeTypeErasure) { safe_type_erasure(func); } diff --git a/libc/test/src/stdlib/qsort_r_test.cpp b/libc/test/src/stdlib/qsort_r_test.cpp index f18923618ed5e..b1fff5a1bc3dd 100644 --- a/libc/test/src/stdlib/qsort_r_test.cpp +++ b/libc/test/src/stdlib/qsort_r_test.cpp @@ -6,139 +6,7 @@ // //===----------------------------------------------------------------------===// +#include "QsortReentrantTest.h" #include "src/stdlib/qsort_r.h" -#include "test/UnitTest/Test.h" - -#include "hdr/types/size_t.h" - -static int int_compare_count(const void *l, const void *r, void *count_arg) { - int li = *reinterpret_cast(l); - int ri = *reinterpret_cast(r); - size_t *count = reinterpret_cast(count_arg); - *count = *count + 1; - if (li == ri) - return 0; - else if (li > ri) - return 1; - else - return -1; -} - -TEST(LlvmLibcQsortRTest, SortedArray) { - int array[25] = {10, 23, 33, 35, 55, 70, 71, 100, 110, - 123, 133, 135, 155, 170, 171, 1100, 1110, 1123, - 1133, 1135, 1155, 1170, 1171, 11100, 12310}; - constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - - size_t count = 0; - - LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(int), int_compare_count, - &count); - - ASSERT_LE(array[0], 10); - ASSERT_LE(array[1], 23); - ASSERT_LE(array[2], 33); - ASSERT_LE(array[3], 35); - ASSERT_LE(array[4], 55); - ASSERT_LE(array[5], 70); - ASSERT_LE(array[6], 71); - ASSERT_LE(array[7], 100); - ASSERT_LE(array[8], 110); - ASSERT_LE(array[9], 123); - ASSERT_LE(array[10], 133); - ASSERT_LE(array[11], 135); - ASSERT_LE(array[12], 155); - ASSERT_LE(array[13], 170); - ASSERT_LE(array[14], 171); - ASSERT_LE(array[15], 1100); - ASSERT_LE(array[16], 1110); - ASSERT_LE(array[17], 1123); - ASSERT_LE(array[18], 1133); - ASSERT_LE(array[19], 1135); - ASSERT_LE(array[20], 1155); - ASSERT_LE(array[21], 1170); - ASSERT_LE(array[22], 1171); - ASSERT_LE(array[23], 11100); - ASSERT_LE(array[24], 12310); - - // This is a sorted list, but there still have to have been at least N - 1 - // comparisons made. - ASSERT_GE(count, ARRAY_SIZE - 1); -} - -TEST(LlvmLibcQsortRTest, ReverseSortedArray) { - int array[25] = {25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, - 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1}; - constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(int); - - size_t count = 0; - - LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(int), int_compare_count, - &count); - - for (int i = 0; i < int(ARRAY_SIZE - 1); ++i) - ASSERT_LE(array[i], i + 1); - - ASSERT_GE(count, ARRAY_SIZE); -} - -// The following test is intended to mimic the CPP library pattern of having a -// comparison function that takes a specific type, which is passed to a library -// that then needs to sort an array of that type. The library can't safely pass -// the comparison function to qsort because a function that takes const T* -// being cast to a function that takes const void* is undefined behavior. The -// safer pattern is to pass a type erased comparator that calls into the typed -// comparator to qsort_r. - -struct PriorityVal { - int priority; - int size; -}; - -static int compare_priority_val(const PriorityVal *l, const PriorityVal *r) { - // Subtracting the priorities is unsafe, but it's fine for this test. - int priority_diff = l->priority - r->priority; - if (priority_diff != 0) { - return priority_diff; - } - if (l->size == r->size) { - return 0; - } else if (l->size > r->size) { - return 1; - } else { - return -1; - } -} - -template -static int type_erased_comp(const void *l, const void *r, - void *erased_func_ptr) { - typedef int (*TypedComp)(const T *, const T *); - TypedComp typed_func_ptr = reinterpret_cast(erased_func_ptr); - const T *lt = reinterpret_cast(l); - const T *rt = reinterpret_cast(r); - return typed_func_ptr(lt, rt); -} - -TEST(LlvmLibcQsortRTest, SafeTypeErasure) { - PriorityVal array[5] = { - {10, 3}, {1, 10}, {-1, 100}, {10, 0}, {3, 3}, - }; - constexpr size_t ARRAY_SIZE = sizeof(array) / sizeof(PriorityVal); - - LIBC_NAMESPACE::qsort_r(array, ARRAY_SIZE, sizeof(PriorityVal), - type_erased_comp, - reinterpret_cast(compare_priority_val)); - - EXPECT_EQ(array[0].priority, -1); - EXPECT_EQ(array[0].size, 100); - EXPECT_EQ(array[1].priority, 1); - EXPECT_EQ(array[1].size, 10); - EXPECT_EQ(array[2].priority, 3); - EXPECT_EQ(array[2].size, 3); - EXPECT_EQ(array[3].priority, 10); - EXPECT_EQ(array[3].size, 0); - EXPECT_EQ(array[4].priority, 10); - EXPECT_EQ(array[4].size, 3); -} +QSORTREENTRANT_TEST(QsortR, LIBC_NAMESPACE::qsort_r, size_t) From 3e9479e7e5d929e6c53af31cdf4d6944e8b2f6db Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Wed, 24 Jun 2026 09:34:01 +0100 Subject: [PATCH 300/511] [compiler-rt][ARM] Fix underflow handling in new divdf3.S (#204784) The code which calculates the 'errsign' parameter to pass to `__compiler_rt_dunder` was wrong in two ways. It calculated the value with the wrong sign, and also in the wrong register, r12 rather than r2! In this code's original context, both of those things made sense (the 'dunder' function had a nonstandard ABI). Somehow none of the existing test cases detected the problem. We found this bug in a test case downstream that only failed big-endian (because that changes which half of the denominator mantissa is left in r2 to be accidentally used as errsign). However, the new test cases here are designed to detect the failure in both endiannesses. --- compiler-rt/lib/builtins/arm/divdf3.S | 8 ++++---- .../test/builtins/Unit/divdf3new_test.c | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/builtins/arm/divdf3.S b/compiler-rt/lib/builtins/arm/divdf3.S index ebbd868a04b17..213483e38ea13 100644 --- a/compiler-rt/lib/builtins/arm/divdf3.S +++ b/compiler-rt/lib/builtins/arm/divdf3.S @@ -497,11 +497,11 @@ LOCAL_LABEL(ddiv_underflow): // was rounded up), or negative if the quotient was rounded down. But we must // also distinguish the third case of the residual being exactly zero. add xh, xh, #0x60000000 // apply IEEE 754 exponent bias for __dunder - orrs r12, r6, r8 // set r12=0 and Z=1 if quotient was exact - movne r12, #1 // otherwise, set r12 = +1 - orrne r12, r12, r6, asr #31 // and change to -1 if residual is negative + orrs r2, r6, r8 // set r2=0 and Z=1 if quotient was exact + mvnne r2, r6, asr #31 // otherwise, r2 = {-1,0} for {+,-} residual + orrne r2, r2, #1 // and then turn that into {-1,+1} pop {r4,r5,r6,r7,r8,lr} // pop all locally saved registers - b SYMBOL_NAME(__compiler_rt_dunder) // and tailcall __dunder to finish + b SYMBOL_NAME(__compiler_rt_dunder) // and tailcall dunder to finish LOCAL_LABEL(ddiv_zerodenorm): // We come here if either input had exponent 0, so there's at least one zero diff --git a/compiler-rt/test/builtins/Unit/divdf3new_test.c b/compiler-rt/test/builtins/Unit/divdf3new_test.c index 866c7cb08e519..8fd22951d62ae 100644 --- a/compiler-rt/test/builtins/Unit/divdf3new_test.c +++ b/compiler-rt/test/builtins/Unit/divdf3new_test.c @@ -107,6 +107,24 @@ int main(void) { test__divdf3(0x0000000000000009, 0x4022000000000000, 0x0000000000000001); status |= test__divdf3(0x0000000000000009, 0xc022000000000000, 0x8000000000000001); + status |= + test__divdf3(0x0008000000000092, 0x4010000000000000, 0x0002000000000024); + status |= + test__divdf3(0x0010000000000008, 0x4030000040000000, 0x0000fffffc000010); + status |= + test__divdf3(0x0010000000000008, 0x4030000080000000, 0x0000fffff8000040); + status |= + test__divdf3(0x0010000000000018, 0x4030000040000000, 0x0000fffffc000011); + status |= + test__divdf3(0x0010000000000018, 0x4030000080000000, 0x0000fffff8000041); + status |= + test__divdf3(0x0010000001000008, 0x401fffff80000000, 0x0002000008200022); + status |= + test__divdf3(0x0010000001000010, 0x401fffff80000000, 0x0002000008200023); + status |= + test__divdf3(0x001000000f00000a, 0x401fffff40000000, 0x000200000de00055); + status |= + test__divdf3(0x001000000f000012, 0x401fffff40000000, 0x000200000de00056); status |= test__divdf3(0x000ffffffffffff7, 0x3feffffffffffffe, 0x000ffffffffffff8); status |= From 8f97de5918ae05fcd2ab9027c4ca8ef2d5949b9a Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Wed, 24 Jun 2026 11:17:03 +0200 Subject: [PATCH 301/511] [libc] Add IPv4 socket options and related structs (#204787) This patch adds struct ip_mreq, ip_mreq_source, ip_mreqn, ip_opts, and ip_msfilter to , along with IP level socket option macros (IP_TOS, IP_TTL, IP_ADD_MEMBERSHIP, etc.). I add basic unit tests verifying the size and member offsets of the new structures against standard layout expectations, mainly to make sure that the files are used /somewhere/. Assisted by Gemini. --- libc/hdr/types/CMakeLists.txt | 45 +++++++++++ libc/hdr/types/struct_ip_mreq.h | 26 ++++++ libc/hdr/types/struct_ip_mreq_source.h | 26 ++++++ libc/hdr/types/struct_ip_mreqn.h | 26 ++++++ libc/hdr/types/struct_ip_msfilter.h | 26 ++++++ libc/hdr/types/struct_ip_opts.h | 26 ++++++ libc/include/CMakeLists.txt | 5 ++ libc/include/llvm-libc-types/CMakeLists.txt | 5 ++ libc/include/llvm-libc-types/struct_ip_mreq.h | 24 ++++++ .../llvm-libc-types/struct_ip_mreq_source.h | 25 ++++++ .../include/llvm-libc-types/struct_ip_mreqn.h | 25 ++++++ .../llvm-libc-types/struct_ip_msfilter.h | 28 +++++++ libc/include/llvm-libc-types/struct_ip_opts.h | 24 ++++++ libc/include/netinet/in.yaml | 81 +++++++++++++++++++ libc/test/src/netinet/CMakeLists.txt | 5 ++ libc/test/src/netinet/in_test.cpp | 38 +++++++++ 16 files changed, 435 insertions(+) create mode 100644 libc/hdr/types/struct_ip_mreq.h create mode 100644 libc/hdr/types/struct_ip_mreq_source.h create mode 100644 libc/hdr/types/struct_ip_mreqn.h create mode 100644 libc/hdr/types/struct_ip_msfilter.h create mode 100644 libc/hdr/types/struct_ip_opts.h create mode 100644 libc/include/llvm-libc-types/struct_ip_mreq.h create mode 100644 libc/include/llvm-libc-types/struct_ip_mreq_source.h create mode 100644 libc/include/llvm-libc-types/struct_ip_mreqn.h create mode 100644 libc/include/llvm-libc-types/struct_ip_msfilter.h create mode 100644 libc/include/llvm-libc-types/struct_ip_opts.h diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index 1742e423b1d00..a130f7ee0000a 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -455,6 +455,51 @@ add_proxy_header_library( libc.include.netinet_in ) +add_proxy_header_library( + struct_ip_mreq + HDRS + struct_ip_mreq.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_ip_mreq + libc.include.netinet_in +) + +add_proxy_header_library( + struct_ip_mreq_source + HDRS + struct_ip_mreq_source.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_ip_mreq_source + libc.include.netinet_in +) + +add_proxy_header_library( + struct_ip_mreqn + HDRS + struct_ip_mreqn.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_ip_mreqn + libc.include.netinet_in +) + +add_proxy_header_library( + struct_ip_msfilter + HDRS + struct_ip_msfilter.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_ip_msfilter + libc.include.netinet_in +) + +add_proxy_header_library( + struct_ip_opts + HDRS + struct_ip_opts.h + FULL_BUILD_DEPENDS + libc.include.llvm-libc-types.struct_ip_opts + libc.include.netinet_in +) + add_proxy_header_library( in_addr_t HDRS diff --git a/libc/hdr/types/struct_ip_mreq.h b/libc/hdr/types/struct_ip_mreq.h new file mode 100644 index 0000000000000..9799f892bf7ef --- /dev/null +++ b/libc/hdr/types/struct_ip_mreq.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Proxy header for struct ip_mreq. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_ip_mreq.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_H diff --git a/libc/hdr/types/struct_ip_mreq_source.h b/libc/hdr/types/struct_ip_mreq_source.h new file mode 100644 index 0000000000000..d19fd6cd19132 --- /dev/null +++ b/libc/hdr/types/struct_ip_mreq_source.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Proxy header for struct ip_mreq_source. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_ip_mreq_source.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQ_SOURCE_H diff --git a/libc/hdr/types/struct_ip_mreqn.h b/libc/hdr/types/struct_ip_mreqn.h new file mode 100644 index 0000000000000..9672e2408f705 --- /dev/null +++ b/libc/hdr/types/struct_ip_mreqn.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Proxy header for struct ip_mreqn. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_ip_mreqn.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MREQN_H diff --git a/libc/hdr/types/struct_ip_msfilter.h b/libc/hdr/types/struct_ip_msfilter.h new file mode 100644 index 0000000000000..7434815d95fd3 --- /dev/null +++ b/libc/hdr/types/struct_ip_msfilter.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Proxy header for struct ip_msfilter. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_ip_msfilter.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_MSFILTER_H diff --git a/libc/hdr/types/struct_ip_opts.h b/libc/hdr/types/struct_ip_opts.h new file mode 100644 index 0000000000000..44f922f941da0 --- /dev/null +++ b/libc/hdr/types/struct_ip_opts.h @@ -0,0 +1,26 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Proxy header for struct ip_opts. +/// +//===----------------------------------------------------------------------===// +#ifndef LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H +#define LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H + +#ifdef LIBC_FULL_BUILD + +#include "include/llvm-libc-types/struct_ip_opts.h" + +#else + +#include + +#endif // LIBC_FULL_BUILD + +#endif // LLVM_LIBC_HDR_TYPES_STRUCT_IP_OPTS_H diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index ab24c854692d0..bb670b614742a 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -235,6 +235,11 @@ add_header_macro( .llvm-libc-types.struct_sockaddr_in6 .llvm-libc-types.struct_in_addr .llvm-libc-types.struct_in6_addr + .llvm-libc-types.struct_ip_mreq + .llvm-libc-types.struct_ip_mreq_source + .llvm-libc-types.struct_ip_mreqn + .llvm-libc-types.struct_ip_msfilter + .llvm-libc-types.struct_ip_opts .llvm_libc_common_h ) diff --git a/libc/include/llvm-libc-types/CMakeLists.txt b/libc/include/llvm-libc-types/CMakeLists.txt index c0c600983ba12..0512d3f0e642a 100644 --- a/libc/include/llvm-libc-types/CMakeLists.txt +++ b/libc/include/llvm-libc-types/CMakeLists.txt @@ -68,6 +68,11 @@ add_header(in_addr_t HDR in_addr_t.h) add_header(in_port_t HDR in_port_t.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) add_header(struct_in_addr HDR struct_in_addr.h DEPENDS .in_addr_t) add_header(struct_in6_addr HDR struct_in6_addr.h DEPENDS libc.include.llvm-libc-macros.stdint_macros) +add_header(struct_ip_mreq HDR struct_ip_mreq.h DEPENDS .struct_in_addr) +add_header(struct_ip_mreq_source HDR struct_ip_mreq_source.h DEPENDS .struct_in_addr) +add_header(struct_ip_mreqn HDR struct_ip_mreqn.h DEPENDS .struct_in_addr) +add_header(struct_ip_msfilter HDR struct_ip_msfilter.h DEPENDS libc.include.llvm-libc-macros.stdint_macros .struct_in_addr) +add_header(struct_ip_opts HDR struct_ip_opts.h DEPENDS .struct_in_addr) add_header(ino_t HDR ino_t.h) add_header(key_t HDR key_t.h) add_header(mbstate_t HDR mbstate_t.h) diff --git a/libc/include/llvm-libc-types/struct_ip_mreq.h b/libc/include/llvm-libc-types/struct_ip_mreq.h new file mode 100644 index 0000000000000..614a38d8d95c6 --- /dev/null +++ b/libc/include/llvm-libc-types/struct_ip_mreq.h @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of struct ip_mreq. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H +#define LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H + +#include "struct_in_addr.h" + +struct ip_mreq { + struct in_addr imr_multiaddr; + struct in_addr imr_interface; +}; + +#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQ_H diff --git a/libc/include/llvm-libc-types/struct_ip_mreq_source.h b/libc/include/llvm-libc-types/struct_ip_mreq_source.h new file mode 100644 index 0000000000000..565c92ba43c7d --- /dev/null +++ b/libc/include/llvm-libc-types/struct_ip_mreq_source.h @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of struct ip_mreq_source. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H +#define LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H + +#include "struct_in_addr.h" + +struct ip_mreq_source { + struct in_addr imr_multiaddr; + struct in_addr imr_interface; + struct in_addr imr_sourceaddr; +}; + +#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQ_SOURCE_H diff --git a/libc/include/llvm-libc-types/struct_ip_mreqn.h b/libc/include/llvm-libc-types/struct_ip_mreqn.h new file mode 100644 index 0000000000000..7fa7aaa2dd09b --- /dev/null +++ b/libc/include/llvm-libc-types/struct_ip_mreqn.h @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of struct ip_mreqn. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H +#define LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H + +#include "struct_in_addr.h" + +struct ip_mreqn { + struct in_addr imr_multiaddr; + struct in_addr imr_address; + int imr_ifindex; +}; + +#endif // LLVM_LIBC_TYPES_STRUCT_IP_MREQN_H diff --git a/libc/include/llvm-libc-types/struct_ip_msfilter.h b/libc/include/llvm-libc-types/struct_ip_msfilter.h new file mode 100644 index 0000000000000..6a5c4097cafcd --- /dev/null +++ b/libc/include/llvm-libc-types/struct_ip_msfilter.h @@ -0,0 +1,28 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of struct ip_msfilter. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H +#define LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H + +#include "../llvm-libc-macros/stdint-macros.h" +#include "struct_in_addr.h" + +struct ip_msfilter { + struct in_addr imsf_multiaddr; + struct in_addr imsf_interface; + uint32_t imsf_fmode; + uint32_t imsf_numsrc; + struct in_addr imsf_slist[1]; // Variable size. +}; + +#endif // LLVM_LIBC_TYPES_STRUCT_IP_MSFILTER_H diff --git a/libc/include/llvm-libc-types/struct_ip_opts.h b/libc/include/llvm-libc-types/struct_ip_opts.h new file mode 100644 index 0000000000000..18c6d638cbcd4 --- /dev/null +++ b/libc/include/llvm-libc-types/struct_ip_opts.h @@ -0,0 +1,24 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Definition of struct ip_opts. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H +#define LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H + +#include "struct_in_addr.h" + +struct ip_opts { + struct in_addr ip_dst; + char ip_opts[40]; // Variable size. +}; + +#endif // LLVM_LIBC_TYPES_STRUCT_IP_OPTS_H diff --git a/libc/include/netinet/in.yaml b/libc/include/netinet/in.yaml index e4eb7d65bb42d..0244b160cb601 100644 --- a/libc/include/netinet/in.yaml +++ b/libc/include/netinet/in.yaml @@ -24,6 +24,82 @@ macros: macro_header: inet-address-macros.h - macro_name: INET6_ADDRSTRLEN macro_header: inet-address-macros.h + + - macro_name: SOL_IP + macro_value: 0 + + # IP level socket options + - macro_name: IP_TOS + macro_value: 1 # int + - macro_name: IP_TTL + macro_value: 2 # int + - macro_name: IP_HDRINCL + macro_value: 3 # int + - macro_name: IP_OPTIONS + macro_value: 4 # struct ip_opts + - macro_name: IP_ROUTER_ALERT + macro_value: 5 # int + - macro_name: IP_RECVOPTS + macro_value: 6 # int + - macro_name: IP_RETOPTS + macro_value: 7 # int + - macro_name: IP_PKTINFO + macro_value: 8 # int + - macro_name: IP_MTU_DISCOVER + macro_value: 10 # int + - macro_name: IP_PMTUDISC_DONT + macro_value: 0 + - macro_name: IP_PMTUDISC_WANT + macro_value: 1 + - macro_name: IP_PMTUDISC_DO + macro_value: 2 + - macro_name: IP_PMTUDISC_PROBE + macro_value: 3 + - macro_name: IP_RECVERR + macro_value: 11 # int + - macro_name: IP_RECVTTL + macro_value: 12 # int + - macro_name: IP_RECVTOS + macro_value: 13 # int + - macro_name: IP_MTU + macro_value: 14 # int + - macro_name: IP_FREEBIND + macro_value: 15 # int + - macro_name: IP_PASSSEC + macro_value: 18 # int + - macro_name: IP_TRANSPARENT + macro_value: 19 # int + - macro_name: IP_RECVORIGDSTADDR + macro_value: 20 # int + - macro_name: IP_NODEFRAG + macro_value: 22 # int + - macro_name: IP_BIND_ADDRESS_NO_PORT + macro_value: 24 # int + - macro_name: IP_MULTICAST_IF + macro_value: 32 # struct in_addr or ip_mreq or ip_mreqn + - macro_name: IP_MULTICAST_TTL + macro_value: 33 # int + - macro_name: IP_MULTICAST_LOOP + macro_value: 34 # int + - macro_name: IP_ADD_MEMBERSHIP + macro_value: 35 # struct ip_mreq or ip_mreqn + - macro_name: IP_DROP_MEMBERSHIP + macro_value: 36 # struct ip_mreq or ip_mreqn + - macro_name: IP_UNBLOCK_SOURCE + macro_value: 37 # struct ip_mreq_source + - macro_name: IP_BLOCK_SOURCE + macro_value: 38 # struct ip_mreq_source + - macro_name: IP_ADD_SOURCE_MEMBERSHIP + macro_value: 39 # struct ip_mreq_source + - macro_name: IP_DROP_SOURCE_MEMBERSHIP + macro_value: 40 # struct ip_mreq_source + - macro_name: IP_MSFILTER + macro_value: 41 # struct ip_msfilter + - macro_name: IP_MULTICAST_ALL + macro_value: 49 # int + - macro_name: IP_LOCAL_PORT_RANGE + macro_value: 51 # uint32_t + types: - type_name: in_port_t - type_name: in_addr_t @@ -32,6 +108,11 @@ types: - type_name: struct_sockaddr_in6 - type_name: struct_in_addr - type_name: struct_in6_addr + - type_name: struct_ip_mreq + - type_name: struct_ip_mreq_source + - type_name: struct_ip_mreqn + - type_name: struct_ip_msfilter + - type_name: struct_ip_opts enums: [] objects: - object_name: in6addr_any diff --git a/libc/test/src/netinet/CMakeLists.txt b/libc/test/src/netinet/CMakeLists.txt index 605f70ff193cd..f1adcf0f56f17 100644 --- a/libc/test/src/netinet/CMakeLists.txt +++ b/libc/test/src/netinet/CMakeLists.txt @@ -9,6 +9,11 @@ add_libc_unittest( DEPENDS libc.hdr.netinet_in_macros libc.hdr.types.struct_in6_addr + libc.hdr.types.struct_ip_mreq + libc.hdr.types.struct_ip_mreq_source + libc.hdr.types.struct_ip_mreqn + libc.hdr.types.struct_ip_msfilter + libc.hdr.types.struct_ip_opts libc.hdr.types.struct_sockaddr_in6 libc.src.arpa.inet.htons libc.src.arpa.inet.htonl diff --git a/libc/test/src/netinet/in_test.cpp b/libc/test/src/netinet/in_test.cpp index fb4c3be335605..eccfdf37d5514 100644 --- a/libc/test/src/netinet/in_test.cpp +++ b/libc/test/src/netinet/in_test.cpp @@ -18,6 +18,11 @@ #include "hdr/netinet_in_macros.h" #include "hdr/types/struct_in6_addr.h" +#include "hdr/types/struct_ip_mreq.h" +#include "hdr/types/struct_ip_mreq_source.h" +#include "hdr/types/struct_ip_mreqn.h" +#include "hdr/types/struct_ip_msfilter.h" +#include "hdr/types/struct_ip_opts.h" #include "hdr/types/struct_sockaddr_in6.h" #include "src/netinet/in6addr_any.h" #include "src/netinet/in6addr_loopback.h" @@ -92,3 +97,36 @@ TEST(LlvmLibcNetinetInTest, SockaddrIn6Layout) { static_cast(24)); EXPECT_EQ(sizeof(struct sockaddr_in6), static_cast(28)); } + +TEST(LlvmLibcNetinetInTest, IpOptionLayout) { + EXPECT_EQ(sizeof(struct ip_mreq), static_cast(8)); + EXPECT_EQ(sizeof(struct ip_mreq_source), static_cast(12)); + EXPECT_EQ(sizeof(struct ip_mreqn), static_cast(12)); + EXPECT_EQ(sizeof(struct ip_msfilter), static_cast(20)); + EXPECT_EQ(sizeof(struct ip_opts), static_cast(44)); + + EXPECT_EQ(offsetof(struct ip_mreq, imr_multiaddr), static_cast(0)); + EXPECT_EQ(offsetof(struct ip_mreq, imr_interface), static_cast(4)); + + EXPECT_EQ(offsetof(struct ip_mreq_source, imr_multiaddr), + static_cast(0)); + EXPECT_EQ(offsetof(struct ip_mreq_source, imr_interface), + static_cast(4)); + EXPECT_EQ(offsetof(struct ip_mreq_source, imr_sourceaddr), + static_cast(8)); + + EXPECT_EQ(offsetof(struct ip_mreqn, imr_multiaddr), static_cast(0)); + EXPECT_EQ(offsetof(struct ip_mreqn, imr_address), static_cast(4)); + EXPECT_EQ(offsetof(struct ip_mreqn, imr_ifindex), static_cast(8)); + + EXPECT_EQ(offsetof(struct ip_msfilter, imsf_multiaddr), + static_cast(0)); + EXPECT_EQ(offsetof(struct ip_msfilter, imsf_interface), + static_cast(4)); + EXPECT_EQ(offsetof(struct ip_msfilter, imsf_fmode), static_cast(8)); + EXPECT_EQ(offsetof(struct ip_msfilter, imsf_numsrc), static_cast(12)); + EXPECT_EQ(offsetof(struct ip_msfilter, imsf_slist), static_cast(16)); + + EXPECT_EQ(offsetof(struct ip_opts, ip_dst), static_cast(0)); + EXPECT_EQ(offsetof(struct ip_opts, ip_opts), static_cast(4)); +} From 0df1522bd3c48f6bacdeb665ce7673cd74980e0e Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Wed, 24 Jun 2026 11:35:28 +0200 Subject: [PATCH 302/511] [flang][FIR] add canonicalization pattern for fir.if returning OPTIONAL (#205353) Lowering is generating patterns when forwarding OPTIONAL in calls that looks like: ``` %present = fir.is_present %var : (T) -> i1 %if_result = fir.if %present -> (T) { fir.result %var : T } else { %absent = fir.absent T fir.result %absent : T } ``` This specific pattern is a no-op and `%var` can be used directly. The lowering logic that generates such patterns is inside non trivial compiler code that has to deal with more complex scenarios where the code inside the fir.if is more complex. Add a FIR pattern to canonicalize such code to help with later analysis (like aliasing). --- .../include/flang/Optimizer/Dialect/FIROps.td | 2 + flang/lib/Optimizer/Dialect/FIROps.cpp | 59 +++++++++++++++++ flang/test/Fir/present-absent-if-fold.fir | 66 +++++++++++++++++++ 3 files changed, 127 insertions(+) create mode 100644 flang/test/Fir/present-absent-if-fold.fir diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index 35a87c29d0cb6..c6e4d1b3b4d11 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -2560,6 +2560,8 @@ def fir_IfOp "bool":$withElseRegion)> ]; + let hasCanonicalizer = 1; + let extraClassDeclaration = [{ mlir::OpBuilder getThenBodyBuilder() { assert(!getThenRegion().empty() && "Unexpected empty 'where' region."); diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 1d61989eba6cf..ba047e71d6aa3 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -5690,6 +5690,65 @@ void fir::IfOp::resultToSourceOps(llvm::SmallVectorImpl &results, results.push_back(term->getOperand(resultNum)); } +// Fold away a fir.if that only forwards an optional argument or returns +// fir.absent when it is not present: +// +// %present = fir.is_present %var : (T) -> i1 +// %r = fir.if %present -> (T) { +// fir.result %var : T +// } else { +// %absent = fir.absent T +// fir.result %absent : T +// } +// +// The result is always %var: optional arguments already encode presence. +struct FoldPresentAbsentIfOp : public mlir::OpRewritePattern { + using mlir::OpRewritePattern::OpRewritePattern; + + mlir::LogicalResult + matchAndRewrite(fir::IfOp ifOp, + mlir::PatternRewriter &rewriter) const override { + if (ifOp.getNumResults() != 1) + return mlir::failure(); + + auto isPresentOp = ifOp.getCondition().getDefiningOp(); + if (!isPresentOp) + return mlir::failure(); + + mlir::Value optionalVal = isPresentOp.getVal(); + mlir::Type resultType = ifOp.getResult(0).getType(); + if (optionalVal.getType() != resultType) + return mlir::failure(); + + mlir::Block &thenBlock = ifOp.getThenRegion().front(); + if (thenBlock.getOperations().size() != 1) + return mlir::failure(); + auto thenResult = mlir::dyn_cast(thenBlock.getTerminator()); + if (!thenResult || thenResult.getNumOperands() != 1 || + thenResult.getOperand(0) != optionalVal) + return mlir::failure(); + + if (ifOp.getElseRegion().empty()) + return mlir::failure(); + mlir::Block &elseBlock = ifOp.getElseRegion().front(); + if (elseBlock.getOperations().size() > 2) + return mlir::failure(); + auto elseResult = mlir::dyn_cast(elseBlock.getTerminator()); + if (!elseResult || elseResult.getNumOperands() != 1) + return mlir::failure(); + if (!elseResult.getOperand(0).getDefiningOp()) + return mlir::failure(); + + rewriter.replaceOp(ifOp, optionalVal); + return mlir::success(); + } +}; + +void fir::IfOp::getCanonicalizationPatterns(mlir::RewritePatternSet &patterns, + mlir::MLIRContext *context) { + patterns.add(context); +} + //===----------------------------------------------------------------------===// // BoxOffsetOp //===----------------------------------------------------------------------===// diff --git a/flang/test/Fir/present-absent-if-fold.fir b/flang/test/Fir/present-absent-if-fold.fir new file mode 100644 index 0000000000000..19fd7859f4a23 --- /dev/null +++ b/flang/test/Fir/present-absent-if-fold.fir @@ -0,0 +1,66 @@ +// RUN: fir-opt --canonicalize %s | FileCheck %s + +// CHECK-LABEL: func.func @fold_present_absent_if_box( +// CHECK-SAME: %[[VAR:.*]]: !fir.box>) -> !fir.box> { +// CHECK-NOT: fir.if +// CHECK-NOT: fir.is_present +// CHECK-NOT: fir.absent +// CHECK: return %[[VAR]] : !fir.box> +func.func @fold_present_absent_if_box( + %var: !fir.box>) -> !fir.box> { + %present = fir.is_present %var : (!fir.box>) -> i1 + %if_result = fir.if %present -> (!fir.box>) { + fir.result %var : !fir.box> + } else { + %absent = fir.absent !fir.box> + fir.result %absent : !fir.box> + } + return %if_result : !fir.box> +} + +// CHECK-LABEL: func.func @fold_present_absent_if_ref( +// CHECK-SAME: %[[VAR:.*]]: !fir.ref) -> !fir.ref { +// CHECK-NOT: fir.if +// CHECK: return %[[VAR]] : !fir.ref +func.func @fold_present_absent_if_ref(%var: !fir.ref) -> !fir.ref { + %present = fir.is_present %var : (!fir.ref) -> i1 + %if_result = fir.if %present -> (!fir.ref) { + fir.result %var : !fir.ref + } else { + %absent = fir.absent !fir.ref + fir.result %absent : !fir.ref + } + return %if_result : !fir.ref +} + +func.func private @side_effect() -> () + +// CHECK-LABEL: func.func @no_fold_call_in_then( +// CHECK: fir.is_present +// CHECK: fir.if +func.func @no_fold_call_in_then(%var: !fir.ref) -> !fir.ref { + %present = fir.is_present %var : (!fir.ref) -> i1 + %if_result = fir.if %present -> (!fir.ref) { + fir.call @side_effect() : () -> () + fir.result %var : !fir.ref + } else { + %absent = fir.absent !fir.ref + fir.result %absent : !fir.ref + } + return %if_result : !fir.ref +} + +// CHECK-LABEL: func.func @no_fold_call_in_else( +// CHECK: fir.is_present +// CHECK: fir.if +func.func @no_fold_call_in_else(%var: !fir.ref) -> !fir.ref { + %present = fir.is_present %var : (!fir.ref) -> i1 + %if_result = fir.if %present -> (!fir.ref) { + fir.result %var : !fir.ref + } else { + %absent = fir.absent !fir.ref + fir.call @side_effect() : () -> () + fir.result %absent : !fir.ref + } + return %if_result : !fir.ref +} From a1f50d6cb4882ae2656f20f767af540508e7b057 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Wed, 24 Jun 2026 12:48:55 +0300 Subject: [PATCH 303/511] [libc++][ranges] Applied `[[nodiscard]]` to `reverse_view` (#205186) Towards #172124 #References: - https://wg21.link/range.reverse - https://libcxx.llvm.org/CodingGuidelines.html#apply-nodiscard-where-relevant --- libcxx/include/__ranges/reverse_view.h | 16 +-- .../adaptor.nodiscard.verify.cpp | 21 ---- .../range.reverse/nodiscard.verify.cpp | 99 +++++++++++++++++++ 3 files changed, 107 insertions(+), 29 deletions(-) delete mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp create mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp diff --git a/libcxx/include/__ranges/reverse_view.h b/libcxx/include/__ranges/reverse_view.h index c36ba77dd8f6d..b016cc231f2b5 100644 --- a/libcxx/include/__ranges/reverse_view.h +++ b/libcxx/include/__ranges/reverse_view.h @@ -59,13 +59,13 @@ class reverse_view : public view_interface> { _LIBCPP_HIDE_FROM_ABI constexpr explicit reverse_view(_View __view) : __base_(std::move(__view)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator> begin() { if constexpr (_UseCache) @@ -78,35 +78,35 @@ class reverse_view : public view_interface> { return __tmp; } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator> begin() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator> begin() requires common_range<_View> { return std::make_reverse_iterator(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto begin() const requires common_range { return std::make_reverse_iterator(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator> end() { + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr reverse_iterator> end() { return std::make_reverse_iterator(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto end() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto end() const requires common_range { return std::make_reverse_iterator(ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { return ranges::size(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return ranges::size(__base_); diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp deleted file mode 100644 index 2f7eb94611928..0000000000000 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/adaptor.nodiscard.verify.cpp +++ /dev/null @@ -1,21 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// Test the libc++ extension that std::views::reverse is marked as [[nodiscard]]. - -#include - -void test() { - int range[] = {1, 2, 3}; - - std::views::reverse(range); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} - range | std::views::reverse; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} - std::views::all | std::views::reverse; // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} -} diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp new file mode 100644 index 0000000000000..6dfa2f5d17c60 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.reverse/nodiscard.verify.cpp @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++20 + +// Check that functions are marked [[nodiscard]] + +#include +#include + +#include "test_iterators.h" + +void test() { + int range[] = {19, 28, 29, 49, 82, 94}; + auto v = std::views::reverse(range); + + // [range.reverse.view] + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.base(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::move(v).base(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.begin(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).begin(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.end(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).end(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.size(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).size(); + + // [range.reverse.overview] + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::reverse(std::views::reverse(range)); + + struct BidirRange : std::ranges::view_base { + int* begin_; + int* end_; + + constexpr BidirRange(int* b, int* e) : begin_(b), end_(e) {} + + constexpr bidirectional_iterator begin() { return bidirectional_iterator{begin_}; } + constexpr bidirectional_iterator begin() const { return bidirectional_iterator{begin_}; } + constexpr bidirectional_iterator end() { return bidirectional_iterator{end_}; } + constexpr bidirectional_iterator end() const { return bidirectional_iterator{end_}; } + }; + static_assert(std::ranges::bidirectional_range); + static_assert(std::ranges::common_range); + static_assert(std::ranges::view); + static_assert(std::copyable); + + { // views::reverse(x) is equivalent to subrange{end, begin, size} if x is a + // sized subrange over reverse iterators + using It = bidirectional_iterator; + using Subrange = std::ranges::subrange; + + using ReverseIt = std::reverse_iterator; + using ReverseSubrange = std::ranges::subrange; + + BidirRange view(range, range + 6); + ReverseSubrange subrange(ReverseIt(std::ranges::end(view)), ReverseIt(std::ranges::begin(view)), /* size */ 6); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::reverse(subrange); + } + { // views::reverse(x) is equivalent to subrange{end, begin} if x is an + // unsized subrange over reverse iterators + using It = bidirectional_iterator; + using Subrange = std::ranges::subrange; + + using ReverseIt = std::reverse_iterator; + using ReverseSubrange = std::ranges::subrange; + + BidirRange view(range, range + 6); + ReverseSubrange subrange(ReverseIt(std::ranges::end(view)), ReverseIt(std::ranges::begin(view))); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::reverse(subrange); + } + { // Otherwise, views::reverse(x) is equivalent to ranges::reverse_view{x} + BidirRange view(range, range + 6); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::reverse(view); + } +} From 11c61926d6270159780d1114d7c6e9c8f423e28a Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 11:50:30 +0200 Subject: [PATCH 304/511] Remove unused variables in the monorepo (#204994) https://github.com/llvm/llvm-project/pull/203084 adds diagnostics about unused variables to the libc++ containers. This patch is the fallout from the projects I tried to build with it. --- clang-tools-extra/clangd/IncludeFixer.cpp | 7 ------- clang-tools-extra/modularize/PreprocessorTracker.cpp | 3 --- clang/lib/Driver/ToolChains/Hexagon.cpp | 1 - .../Checkers/StdLibraryFunctionsChecker.cpp | 4 ---- clang/unittests/AST/ASTImporterFixtures.cpp | 2 -- clang/unittests/Interpreter/CodeCompletionTest.cpp | 2 +- clang/unittests/Tooling/HeaderIncludesTest.cpp | 1 - lldb/source/Commands/CommandObjectCommands.cpp | 3 --- lldb/source/Commands/CommandObjectHelp.cpp | 1 - lldb/source/Interpreter/CommandInterpreter.cpp | 3 --- .../Hexagon-DYLD/HexagonDYLDRendezvous.cpp | 2 -- .../DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp | 2 -- .../GDBRemoteCommunicationServerPlatform.cpp | 5 +---- lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp | 2 -- lldb/source/Target/StackFrameList.cpp | 1 - lldb/tools/yaml2macho-core/yaml2macho.cpp | 2 -- .../Orc/TargetProcess/SimpleRemoteEPCServer.h | 1 - llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h | 8 ++++---- llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp | 2 +- llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp | 4 ---- llvm/tools/llvm-rc/ResourceScriptParser.cpp | 3 +-- llvm/tools/llvm-rc/ResourceScriptParser.h | 1 - llvm/tools/sancov/sancov.cpp | 1 - llvm/unittests/ADT/STLExtrasTest.cpp | 4 ++-- llvm/unittests/CodeGen/PassManagerTest.cpp | 2 +- llvm/unittests/TextAPI/TextStubV3Tests.cpp | 10 +--------- llvm/unittests/TextAPI/TextStubV4Tests.cpp | 4 ---- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 1 - mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 1 - 29 files changed, 12 insertions(+), 71 deletions(-) diff --git a/clang-tools-extra/clangd/IncludeFixer.cpp b/clang-tools-extra/clangd/IncludeFixer.cpp index 3f3d7fbefd58e..5ecf853524a3f 100644 --- a/clang-tools-extra/clangd/IncludeFixer.cpp +++ b/clang-tools-extra/clangd/IncludeFixer.cpp @@ -613,13 +613,6 @@ IncludeFixer::lookupCached(const SymbolID &ID) const { Index.lookup(Req, [&](const Symbol &Sym) { Matches.insert(Sym); }); auto Syms = std::move(Matches).build(); - std::vector Fixes; - if (!Syms.empty()) { - auto &Matched = *Syms.begin(); - if (!Matched.IncludeHeaders.empty() && Matched.Definition && - Matched.CanonicalDeclaration.FileURI == Matched.Definition.FileURI) - Fixes = fixesForSymbols(Syms); - } auto E = LookupCache.try_emplace(ID, std::move(Syms)); return &E.first->second; } diff --git a/clang-tools-extra/modularize/PreprocessorTracker.cpp b/clang-tools-extra/modularize/PreprocessorTracker.cpp index 04abb2733f5a7..6205b97a17176 100644 --- a/clang-tools-extra/modularize/PreprocessorTracker.cpp +++ b/clang-tools-extra/modularize/PreprocessorTracker.cpp @@ -1078,9 +1078,6 @@ class PreprocessorTrackerImpl : public PreprocessorTracker { auto I = ConditionalExpansions.find(InstanceKey); // If existing instance of condition not found, add one. if (I == ConditionalExpansions.end()) { - std::string InstanceSourceLine = - getSourceLocationString(PP, InstanceLoc) + ":\n" + - getSourceLine(PP, InstanceLoc) + "\n"; ConditionalExpansions[InstanceKey] = ConditionalTracker(DirectiveKind, ConditionValue, ConditionUnexpandedHandle, InclusionPathHandle); diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index b671db98a7798..3643c0d4e526c 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -534,7 +534,6 @@ void hexagon::Linker::ConstructJob(Compilation &C, const JobAction &JA, std::string HexagonToolChain::getHexagonTargetDir( const std::string &InstalledDir, const SmallVectorImpl &PrefixDirs) const { - std::string InstallRelDir; const Driver &D = getDriver(); // Locate the rest of the toolchain ... diff --git a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp index 50b34cb181ca5..4fe3e1f7623f6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp @@ -2149,8 +2149,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( ErrnoIrrelevant) .ArgConstraint(NotNull(ArgNo(0)))); } else { - const auto ReturnsZeroOrMinusOne = - ConstraintSet{ReturnValueCondition(WithinRange, Range(-1, 0))}; const auto ReturnsZero = ConstraintSet{ReturnValueCondition(WithinRange, SingleValue(0))}; const auto ReturnsMinusOne = @@ -2161,8 +2159,6 @@ void StdLibraryFunctionsChecker::initFunctionSummaries( ConstraintSet{ReturnValueCondition(WithinRange, Range(0, IntMax))}; const auto ReturnsNonZero = ConstraintSet{ReturnValueCondition(OutOfRange, SingleValue(0))}; - const auto ReturnsFileDescriptor = - ConstraintSet{ReturnValueCondition(WithinRange, Range(-1, IntMax))}; const auto &ReturnsValidFileDescriptor = ReturnsNonnegative; auto ValidFileDescriptorOrAtFdcwd = [&](ArgNo ArgN) { diff --git a/clang/unittests/AST/ASTImporterFixtures.cpp b/clang/unittests/AST/ASTImporterFixtures.cpp index e4e5a078262c3..48421fe90a431 100644 --- a/clang/unittests/AST/ASTImporterFixtures.cpp +++ b/clang/unittests/AST/ASTImporterFixtures.cpp @@ -142,7 +142,6 @@ std::tuple ASTImporterTestBase::getImportedDecl( StringRef FromSrcCode, TestLanguage FromLang, StringRef ToSrcCode, TestLanguage ToLang, StringRef Identifier) { std::vector FromArgs = getCommandLineArgsForLanguage(FromLang); - std::vector ToArgs = getCommandLineArgsForLanguage(ToLang); FromTUs.emplace_back(FromSrcCode, InputFileName, FromArgs, Creator, ODRHandling); @@ -185,7 +184,6 @@ TranslationUnitDecl *ASTImporterTestBase::getTuDecl(StringRef SrcCode, TranslationUnitDecl *ASTImporterTestBase::getToTuDecl(StringRef ToSrcCode, TestLanguage ToLang) { - std::vector ToArgs = getCommandLineArgsForLanguage(ToLang); assert(!ToAST); lazyInitToAST(ToLang, ToSrcCode, OutputFileName); return ToAST->getASTContext().getTranslationUnitDecl(); diff --git a/clang/unittests/Interpreter/CodeCompletionTest.cpp b/clang/unittests/Interpreter/CodeCompletionTest.cpp index ceb683497ac74..fcabb28092811 100644 --- a/clang/unittests/Interpreter/CodeCompletionTest.cpp +++ b/clang/unittests/Interpreter/CodeCompletionTest.cpp @@ -98,7 +98,7 @@ TEST_F(CodeCompletionTest, TwoDecls) { TEST_F(CodeCompletionTest, CompFunDeclsNoError) { auto Err = llvm::Error::success(); - auto comps = runComp("void app(", Err); + runComp("void app(", Err); EXPECT_EQ((bool)Err, false); } diff --git a/clang/unittests/Tooling/HeaderIncludesTest.cpp b/clang/unittests/Tooling/HeaderIncludesTest.cpp index 8259394cb30bc..95fb05885a0b9 100644 --- a/clang/unittests/Tooling/HeaderIncludesTest.cpp +++ b/clang/unittests/Tooling/HeaderIncludesTest.cpp @@ -678,7 +678,6 @@ int main() { std::vector ints {}; })cpp"; - auto InsertedCode = insert(Code, ""); EXPECT_EQ(Expected, insert(Code, "")); } diff --git a/lldb/source/Commands/CommandObjectCommands.cpp b/lldb/source/Commands/CommandObjectCommands.cpp index 84e661ec01f53..8f006768ecc9a 100644 --- a/lldb/source/Commands/CommandObjectCommands.cpp +++ b/lldb/source/Commands/CommandObjectCommands.cpp @@ -627,7 +627,6 @@ class CommandObjectCommandsUnalias : public CommandObjectParsed { protected: void DoExecute(Args &args, CommandReturnObject &result) override { - CommandObject::CommandMap::iterator pos; CommandObject *cmd_obj; if (args.empty()) { @@ -702,8 +701,6 @@ class CommandObjectCommandsDelete : public CommandObjectParsed { protected: void DoExecute(Args &args, CommandReturnObject &result) override { - CommandObject::CommandMap::iterator pos; - if (args.empty()) { result.AppendErrorWithFormat("must call '%s' with one or more valid user " "defined regular expression command names", diff --git a/lldb/source/Commands/CommandObjectHelp.cpp b/lldb/source/Commands/CommandObjectHelp.cpp index a29ded846b100..c96c5158c06f6 100644 --- a/lldb/source/Commands/CommandObjectHelp.cpp +++ b/lldb/source/Commands/CommandObjectHelp.cpp @@ -64,7 +64,6 @@ CommandObjectHelp::CommandOptions::GetDefinitions() { } void CommandObjectHelp::DoExecute(Args &command, CommandReturnObject &result) { - CommandObject::CommandMap::iterator pos; CommandObject *cmd_obj; const size_t argc = command.GetArgumentCount(); diff --git a/lldb/source/Interpreter/CommandInterpreter.cpp b/lldb/source/Interpreter/CommandInterpreter.cpp index 0c5456c2c3b57..a35baa7fd6d27 100644 --- a/lldb/source/Interpreter/CommandInterpreter.cpp +++ b/lldb/source/Interpreter/CommandInterpreter.cpp @@ -2962,7 +2962,6 @@ void CommandInterpreter::HandleCommandsFromFile( auto input_file_up = FileSystem::Instance().Open(cmd_file, File::eOpenOptionReadOnly); if (!input_file_up) { - std::string error = llvm::toString(input_file_up.takeError()); result.AppendErrorWithFormatv( "error: an error occurred read file '{0}': {1}\n", cmd_file_path, llvm::fmt_consume(input_file_up.takeError())); @@ -3263,8 +3262,6 @@ void CommandInterpreter::FindCommandsForApropos(llvm::StringRef search_word, bool search_user_commands, bool search_alias_commands, bool search_user_mw_commands) { - CommandObject::CommandMap::const_iterator pos; - if (search_builtin_commands) FindCommandsForApropos(search_word, commands_found, commands_help, m_command_dict); diff --git a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp index 62c0fb0ff4eb8..7b5bcc2567dda 100644 --- a/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp +++ b/lldb/source/Plugins/DynamicLoader/Hexagon-DYLD/HexagonDYLDRendezvous.cpp @@ -148,7 +148,6 @@ bool HexagonDYLDRendezvous::UpdateSOEntries() { bool HexagonDYLDRendezvous::UpdateSOEntriesForAddition() { SOEntry entry; - iterator pos; assert(m_previous.state == eAdd); @@ -176,7 +175,6 @@ bool HexagonDYLDRendezvous::UpdateSOEntriesForAddition() { bool HexagonDYLDRendezvous::UpdateSOEntriesForDeletion() { SOEntryList entry_list; - iterator pos; assert(m_previous.state == eDelete); diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp index c2084e0322c1f..2d0eef666f688 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DYLDRendezvous.cpp @@ -497,7 +497,6 @@ bool DYLDRendezvous::RemoveSOEntriesFromRemote( bool DYLDRendezvous::AddSOEntries() { SOEntry entry; - iterator pos; assert(m_previous.state == eAdd); @@ -525,7 +524,6 @@ bool DYLDRendezvous::AddSOEntries() { bool DYLDRendezvous::RemoveSOEntries() { SOEntryList entry_list; - iterator pos; assert(m_previous.state == eDelete); diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp index 5876c3a9434a1..eaed4e6742824 100644 --- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp +++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp @@ -156,15 +156,12 @@ GDBRemoteCommunicationServerPlatform::Handle_qLaunchGDBServer( __FUNCTION__); ConnectionFileDescriptor file_conn; - std::string hostname; packet.SetFilePos(::strlen("qLaunchGDBServer;")); llvm::StringRef name; llvm::StringRef value; std::optional port; while (packet.GetNameColonValue(name, value)) { - if (name == "host") - hostname = std::string(value); - else if (name == "port") { + if (name == "port") { // Make the Optional valid so we can use its value port = 0; value.getAsInteger(0, *port); diff --git a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp index c92f80bc166b7..96d6910ce5ce5 100644 --- a/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp +++ b/lldb/source/Plugins/Protocol/MCP/ProtocolServerMCP.cpp @@ -98,8 +98,6 @@ llvm::Error ProtocolServerMCP::Start(ProtocolServer::Connection connection) { auto listening_uris = m_listener->GetListeningConnectionURI(); if (listening_uris.empty()) return createStringError("failed to get listening connections"); - std::string address = - llvm::join(m_listener->GetListeningConnectionURI(), ", "); ServerInfo info{listening_uris[0]}; llvm::Expected server_info_handle = ServerInfo::Write(info); diff --git a/lldb/source/Target/StackFrameList.cpp b/lldb/source/Target/StackFrameList.cpp index f23261d50ed4b..01ce5870a6edb 100644 --- a/lldb/source/Target/StackFrameList.cpp +++ b/lldb/source/Target/StackFrameList.cpp @@ -1021,7 +1021,6 @@ size_t StackFrameList::GetStatus(Stream &strm, uint32_t first_frame, StackFrameSP selected_frame_sp = m_thread.GetSelectedFrame(DoNoSelectMostRelevantFrame); - std::string buffer; std::string marker; for (frame_idx = first_frame; frame_idx < last_frame; ++frame_idx) { frame_sp = GetFrameAtIndex(frame_idx); diff --git a/lldb/tools/yaml2macho-core/yaml2macho.cpp b/lldb/tools/yaml2macho-core/yaml2macho.cpp index c29ae282d8571..cd51d228ff062 100644 --- a/lldb/tools/yaml2macho-core/yaml2macho.cpp +++ b/lldb/tools/yaml2macho-core/yaml2macho.cpp @@ -176,7 +176,6 @@ int main(int argc, char **argv) { if (spec.binaries.size() > 0) for (const Binary &binary : spec.binaries) { std::vector segment_command_bytes; - std::vector payload_bytes; create_lc_note_binary_load_cmd(spec, segment_command_bytes, binary, lc_note_payload_bytes, payload_fileoff); payload_fileoff = @@ -185,7 +184,6 @@ int main(int argc, char **argv) { } if (spec.addressable_bits) { std::vector segment_command_bytes; - std::vector payload_bytes; create_lc_note_addressable_bits(spec, segment_command_bytes, *spec.addressable_bits, lc_note_payload_bytes, payload_fileoff); diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h index e5d28345bb84e..0aa040bc00138 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcess/SimpleRemoteEPCServer.h @@ -188,7 +188,6 @@ class LLVM_ABI SimpleRemoteEPCServer : public SimpleRemoteEPCTransportClient { uint64_t NextSeqNo = 0; PendingJITDispatchResultsMap PendingJITDispatchResults; - std::vector Dylibs; }; } // end namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h index b73da194cd187..643562616dd22 100644 --- a/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/TaskDispatch.h @@ -59,9 +59,9 @@ class GenericNamedTask : public RTTIExtends { /// Generic task implementation. template class GenericNamedTaskImpl : public GenericNamedTask { public: - GenericNamedTaskImpl(FnT &&Fn, std::string DescBuffer) - : Fn(std::forward(Fn)), Desc(DescBuffer.c_str()), - DescBuffer(std::move(DescBuffer)) {} + GenericNamedTaskImpl(FnT &&Fn, std::string InDescBuffer) + : Fn(std::forward(Fn)), DescBuffer(std::move(InDescBuffer)), + Desc(DescBuffer.c_str()) {} GenericNamedTaskImpl(FnT &&Fn, const char *Desc) : Fn(std::forward(Fn)), Desc(Desc) { assert(Desc && "Description cannot be null"); @@ -71,8 +71,8 @@ template class GenericNamedTaskImpl : public GenericNamedTask { private: FnT Fn; - const char *Desc; std::string DescBuffer; + const char *Desc; }; /// Create a generic named task from a std::string description. diff --git a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp index 2c0eeac0089ae..606f5a520fd7d 100644 --- a/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp +++ b/llvm/lib/DWARFCFIChecker/DWARFCFIState.cpp @@ -36,7 +36,7 @@ void DWARFCFIState::update(const MCCFIInstruction &Directive) { // updated row and following the previous rows. These middle rows are stored // in `PrecedingRows`. For now, there is no need to store these rows in the // state, so they are ignored in the end. - dwarf::UnwindTable::RowContainer PrecedingRows; + // dwarf::UnwindTable::RowContainer PrecedingRows; // TODO: `.cfi_remember_state` and `.cfi_restore_state` directives are not // supported yet. The reason is that `parseRows` expects the stack of states diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp index 4165274a6f8bd..d1c0022f71dcc 100644 --- a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp +++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp @@ -88,7 +88,6 @@ static std::string DependencyInfoPath; static bool VersionOption; static bool NoWarningForNoSymbols; static bool WarningsAsErrors; -static std::string IgnoredSyslibRoot; static const std::array StandardSearchDirs{ "/lib", @@ -659,9 +658,6 @@ static void parseRawArgs(int Argc, char **Argv) { if (const opt::Arg *A = Args.getLastArg(OPT_dependencyInfoPath)) DependencyInfoPath = A->getValue(); - if (const opt::Arg *A = Args.getLastArg(OPT_ignoredSyslibRoot)) - IgnoredSyslibRoot = A->getValue(); - LibraryOperation = Args.hasArg(OPT_static) ? Operation::Static : Operation::None; DeterministicOption = Args.hasArg(OPT_deterministicOption); diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.cpp b/llvm/tools/llvm-rc/ResourceScriptParser.cpp index e4efc83c933b4..f28efb6de611e 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.cpp +++ b/llvm/tools/llvm-rc/ResourceScriptParser.cpp @@ -33,8 +33,7 @@ namespace llvm { namespace rc { RCParser::ParserError::ParserError(const Twine &Expected, const LocIter CurLoc, - const LocIter End) - : ErrorLoc(CurLoc), FileEnd(End) { + const LocIter End) { CurMessage = "Error parsing file: expected " + Expected.str() + ", got " + (CurLoc == End ? "" : CurLoc->value()).str(); } diff --git a/llvm/tools/llvm-rc/ResourceScriptParser.h b/llvm/tools/llvm-rc/ResourceScriptParser.h index 1e7618c84142e..ed7a09b16834e 100644 --- a/llvm/tools/llvm-rc/ResourceScriptParser.h +++ b/llvm/tools/llvm-rc/ResourceScriptParser.h @@ -48,7 +48,6 @@ class RCParser { private: std::string CurMessage; - LocIter ErrorLoc, FileEnd; }; explicit RCParser(std::vector TokenList); diff --git a/llvm/tools/sancov/sancov.cpp b/llvm/tools/sancov/sancov.cpp index 483e6d7d58d68..d6fdac2583d5d 100644 --- a/llvm/tools/sancov/sancov.cpp +++ b/llvm/tools/sancov/sancov.cpp @@ -987,7 +987,6 @@ computeNotCoveredFunctions(const SymbolizedCoverage &Coverage) { static std::set computeCoveredFunctions(const SymbolizedCoverage &Coverage) { - auto AllFns = computeFunctions(Coverage.Points); std::set Result; for (const auto &Point : Coverage.Points) { diff --git a/llvm/unittests/ADT/STLExtrasTest.cpp b/llvm/unittests/ADT/STLExtrasTest.cpp index 2f1e01d16e3c5..00c062ae1ca29 100644 --- a/llvm/unittests/ADT/STLExtrasTest.cpp +++ b/llvm/unittests/ADT/STLExtrasTest.cpp @@ -1170,7 +1170,7 @@ TEST(STLExtrasTest, getSingleElement) { } TEST(STLExtrasTest, hasNItems) { - const std::list V0 = {}, V1 = {1}, V2 = {1, 2}; + const std::list V0 = {}, V1 = {1}; const std::list V3 = {1, 3, 5}; EXPECT_TRUE(hasNItems(V0, 0)); @@ -1191,7 +1191,7 @@ TEST(STLExtrasTest, hasNItems) { } TEST(STLExtras, hasNItemsOrMore) { - const std::list V0 = {}, V1 = {1}, V2 = {1, 2}; + const std::list V1 = {1}, V2 = {1, 2}; const std::list V3 = {1, 3, 5}; EXPECT_TRUE(hasNItemsOrMore(V1, 1)); diff --git a/llvm/unittests/CodeGen/PassManagerTest.cpp b/llvm/unittests/CodeGen/PassManagerTest.cpp index dc83effd3c1e1..f004d14b4aee4 100644 --- a/llvm/unittests/CodeGen/PassManagerTest.cpp +++ b/llvm/unittests/CodeGen/PassManagerTest.cpp @@ -218,7 +218,7 @@ TEST_F(PassManagerTest, Basic) { testing::internal::CaptureStderr(); MPM.run(*M, MAM); - std::string Output = testing::internal::GetCapturedStderr(); + testing::internal::GetCapturedStderr(); EXPECT_EQ((std::vector{10, 16, 18, 20, 30, 36, 38, 40}), Counts); EXPECT_EQ(40, Count); diff --git a/llvm/unittests/TextAPI/TextStubV3Tests.cpp b/llvm/unittests/TextAPI/TextStubV3Tests.cpp index 3c822b2188a41..19624c9dc2abb 100644 --- a/llvm/unittests/TextAPI/TextStubV3Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV3Tests.cpp @@ -81,10 +81,6 @@ TEST(TBDv3, ReadFile) { for (auto &&arch : Archs) Targets.emplace_back(Target(arch, Platform)); EXPECT_EQ(Archs, File->getArchitectures()); - TargetToAttr Uuids = {{Target(AK_armv7, PLATFORM_UNKNOWN), - "00000000-0000-0000-0000-000000000000"}, - {Target(AK_arm64, PLATFORM_UNKNOWN), - "11111111-1111-1111-1111-111111111111"}}; EXPECT_EQ(File->getPlatforms().size(), 1U); EXPECT_EQ(Platform, *File->getPlatforms().begin()); EXPECT_EQ(std::string("Test.dylib"), File->getInstallName()); @@ -165,10 +161,6 @@ TEST(TBDv3, ReadMultipleDocuments) { for (auto &&arch : Archs) Targets.emplace_back(Target(arch, Platform)); EXPECT_EQ(Archs, File->getArchitectures()); - TargetToAttr Uuids = {{Target(AK_armv7, PLATFORM_UNKNOWN), - "00000000-0000-0000-0000-000000000000"}, - {Target(AK_arm64, PLATFORM_UNKNOWN), - "11111111-1111-1111-1111-111111111111"}}; EXPECT_EQ(File->getPlatforms().size(), 1U); EXPECT_EQ(Platform, *File->getPlatforms().begin()); EXPECT_EQ(std::string("Test.dylib"), File->getInstallName()); @@ -358,7 +350,7 @@ TEST(TBDv3, WriteMultipleDocuments) { Document.addSymbol(EncodeKind::GlobalSymbol, "_sym3", Targets); Document.addSymbol(EncodeKind::GlobalSymbol, "_sym4", Targets); File.addDocument(std::make_shared(std::move(Document))); - + SmallString<4096> Buffer; raw_svector_ostream OS(Buffer); Error Result = TextAPIWriter::writeToStream(OS, File); diff --git a/llvm/unittests/TextAPI/TextStubV4Tests.cpp b/llvm/unittests/TextAPI/TextStubV4Tests.cpp index 3f9d8d9b7deaa..11c485198b704 100644 --- a/llvm/unittests/TextAPI/TextStubV4Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV4Tests.cpp @@ -394,8 +394,6 @@ TEST(TBDv4, WriteMultipleDocuments) { Target(AK_i386, Platform), Target(AK_x86_64, Platform), }; - TargetToAttr Uuids = {{Targets[0], "00000000-0000-0000-0000-000000000002"}, - {Targets[1], "11111111-1111-1111-1111-111111111112"}}; File.setInstallName("/System/Library/Frameworks/Umbrella.framework/Umbrella"); File.setFileType(FileType::TBD_V4); File.addTargets(Targets); @@ -409,8 +407,6 @@ TEST(TBDv4, WriteMultipleDocuments) { Targets[1]); // Write Second Document - Uuids = {{Targets[0], "00000000-0000-0000-0000-000000000000"}, - {Targets[1], "11111111-1111-1111-1111-111111111111"}}; InterfaceFile Document; Document.setInstallName("/System/Library/Frameworks/A.framework/A"); Document.setFileType(FileType::TBD_V4); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 77cc7a388a984..0f954e384929a 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -6546,7 +6546,6 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, // to use or standardizing/canonicalizing the order of the bounds to compute // the offset may be useful in the future when there's other frontends with // different formats. - std::vector dimensionIndexSizeOffset; for (int i = bounds.size() - 1; i >= 0; --i) { if (auto boundOp = dyn_cast_if_present( bounds[i].getDefiningOp())) { diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 90ada40302296..ee73c9841e240 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -3429,7 +3429,6 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder( }); }; if (op.getTrait("::mlir::OpTrait::AttrSizedOperandSegments")) { - std::string sizes = op.getGetterName(operandSegmentAttrName); body << " ::llvm::copy(::llvm::ArrayRef({"; emitSegment(); body << "}), " << builderOpStateProperties From 7a6c8fb2f1b0e663fe1a29ca5b218fe15ccab959 Mon Sep 17 00:00:00 2001 From: Akimasa Watanuki Date: Wed, 24 Jun 2026 19:02:31 +0900 Subject: [PATCH 305/511] [CIR] Handle const evaluated variable values (#205512) Match the `VarDecl::evaluateValue()` contract updated by #205033 in CIR constant emission. --- clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp index 6c64d7571795a..c29b66ac2f8bc 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprConstant.cpp @@ -1725,7 +1725,7 @@ mlir::Attribute ConstantEmitter::tryEmitPrivateForVarInit(const VarDecl &d) { // Try to emit the initializer. Note that this can allow some things that // are not allowed by tryEmitPrivateForMemory alone. - if (APValue *value = d.evaluateValue()) + if (const APValue *value = d.evaluateValue()) return tryEmitPrivateForMemory(*value, destType); return {}; From a4ec22dd22e300cdb96c77c4f5feb6abb1a71137 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 11:08:01 +0100 Subject: [PATCH 306/511] [X86] madd.ll - add additional tests for matchPMADDWD folds that fail with irregular source types (#205514) Ensure #205391 doesn't crash with non-pow2/illegal types --- llvm/test/CodeGen/X86/madd.ll | 235 ++++++++++++++++++++++++++++++++++ 1 file changed, 235 insertions(+) diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 056712cc5a66c..63b390f4b9bdf 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -3739,3 +3739,238 @@ define <16 x i32> @extract_concat_pmaddwd(<32 x i16> %a, <32 x i16> %b) { %ret = add <16 x i32> %odd, %even ret <16 x i32> %ret } + +define <5 x i32> @oddvector_mul(<16 x i16> %A, <16 x i16> %B) { +; SSE2-LABEL: oddvector_mul: +; SSE2: # %bb.0: +; SSE2-NEXT: movq %rdi, %rax +; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: pmulhw %xmm3, %xmm4 +; SSE2-NEXT: pmullw %xmm3, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pmulhw %xmm2, %xmm3 +; SSE2-NEXT: pmullw %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2],xmm2[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] +; SSE2-NEXT: paddd %xmm3, %xmm0 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] +; SSE2-NEXT: movd %xmm2, %ecx +; SSE2-NEXT: movdqa %xmm0, (%rdi) +; SSE2-NEXT: movd %xmm1, %edx +; SSE2-NEXT: addl %ecx, %edx +; SSE2-NEXT: movl %edx, 16(%rdi) +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddvector_mul: +; SSE42: # %bb.0: +; SSE42-NEXT: movq %rdi, %rax +; SSE42-NEXT: pxor %xmm4, %xmm4 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm5 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE42-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] +; SSE42-NEXT: pmaddwd %xmm2, %xmm0 +; SSE42-NEXT: pmaddwd %xmm5, %xmm4 +; SSE42-NEXT: phaddd %xmm0, %xmm4 +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; SSE42-NEXT: pmaddwd %xmm0, %xmm1 +; SSE42-NEXT: movd %xmm1, %ecx +; SSE42-NEXT: movdqa %xmm4, (%rdi) +; SSE42-NEXT: pextrd $1, %xmm1, %edx +; SSE42-NEXT: addl %ecx, %edx +; SSE42-NEXT: movl %edx, 16(%rdi) +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddvector_mul: +; AVX1: # %bb.0: +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; AVX1-NEXT: vpmaddwd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vphaddd %xmm0, %xmm4, %xmm1 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; AVX1-NEXT: vpmaddwd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vphaddd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 +; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddvector_mul: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpmovsxwd %xmm1, %ymm3 +; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 +; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1 +; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vphaddd %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512-LABEL: oddvector_mul: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512-NEXT: retq + %a = sext <16 x i16> %A to <16 x i32> + %b = sext <16 x i16> %B to <16 x i32> + %m = mul nsw <16 x i32> %a, %b + %odd = shufflevector <16 x i32> %m, <16 x i32> undef, <5 x i32> + %even = shufflevector <16 x i32> %m, <16 x i32> undef, <5 x i32> + %ret = add <5 x i32> %odd, %even + ret <5 x i32> %ret +} + +define <4 x i32> @oddvector_sext(<13 x i16> %A) { +; SSE2-LABEL: oddvector_sext: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pinsrw $1, %r8d, %xmm1 +; SSE2-NEXT: pinsrw $3, %r9d, %xmm1 +; SSE2-NEXT: pinsrw $5, {{[0-9]+}}(%rsp), %xmm1 +; SSE2-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pinsrw $1, %edi, %xmm0 +; SSE2-NEXT: pinsrw $3, %esi, %xmm0 +; SSE2-NEXT: pinsrw $5, %edx, %xmm0 +; SSE2-NEXT: pinsrw $7, %ecx, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddvector_sext: +; SSE42: # %bb.0: +; SSE42-NEXT: movd %edi, %xmm0 +; SSE42-NEXT: pinsrw $1, %esi, %xmm0 +; SSE42-NEXT: pinsrw $2, %edx, %xmm0 +; SSE42-NEXT: pinsrw $3, %ecx, %xmm0 +; SSE42-NEXT: movd %r8d, %xmm1 +; SSE42-NEXT: pinsrw $1, %r9d, %xmm1 +; SSE42-NEXT: pinsrw $2, {{[0-9]+}}(%rsp), %xmm1 +; SSE42-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm1 +; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE42-NEXT: phaddd %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddvector_sext: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddvector_sext: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: oddvector_sext: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %a = sext <13 x i16> %A to <13 x i32> + %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> + %even = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> + %ret = add <4 x i32> %odd, %even + ret <4 x i32> %ret +} + +define <3 x i32> @oddvector_shl(<12 x i16> %A) { +; SSE2-LABEL: oddvector_shl: +; SSE2: # %bb.0: +; SSE2-NEXT: pxor %xmm1, %xmm1 +; SSE2-NEXT: pxor %xmm0, %xmm0 +; SSE2-NEXT: pinsrw $1, %edi, %xmm0 +; SSE2-NEXT: pinsrw $3, %esi, %xmm0 +; SSE2-NEXT: pinsrw $5, %edx, %xmm0 +; SSE2-NEXT: pinsrw $7, %ecx, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: pinsrw $1, %r8d, %xmm1 +; SSE2-NEXT: pinsrw $3, %r9d, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $7, %xmm1 +; SSE2-NEXT: pslld $7, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,3] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddvector_shl: +; SSE42: # %bb.0: +; SSE42-NEXT: movd %edi, %xmm0 +; SSE42-NEXT: pinsrw $1, %esi, %xmm0 +; SSE42-NEXT: pinsrw $2, %edx, %xmm0 +; SSE42-NEXT: pinsrw $3, %ecx, %xmm0 +; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 +; SSE42-NEXT: movd %r8d, %xmm1 +; SSE42-NEXT: pinsrw $1, %r9d, %xmm1 +; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE42-NEXT: pslld $7, %xmm1 +; SSE42-NEXT: pslld $7, %xmm0 +; SSE42-NEXT: phaddd %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddvector_shl: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] +; AVX1-NEXT: vpmovsxwd %xmm1, %xmm1 +; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 +; AVX1-NEXT: vpslld $7, %xmm0, %xmm0 +; AVX1-NEXT: vpslld $7, %xmm1, %xmm1 +; AVX1-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddvector_shl: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX2-NEXT: vpslld $7, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: oddvector_shl: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxbd {{.*#+}} xmm1 = [1,3,5,0] +; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 +; AVX512-NEXT: vpslld $7, %ymm0, %ymm0 +; AVX512-NEXT: vpermd %ymm0, %ymm1, %ymm1 +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %a = sext <12 x i16> %A to <12 x i32> + %shl = shl <12 x i32> %a, splat (i32 7) + %odd = shufflevector <12 x i32> %shl, <12 x i32> undef, <3 x i32> + %even = shufflevector <12 x i32> %shl, <12 x i32> undef, <3 x i32> + %ret = add <3 x i32> %odd, %even + ret <3 x i32> %ret +} From c08aff55d1de6db0ce9e5c1082b098a99b89c457 Mon Sep 17 00:00:00 2001 From: Anonmiraj Date: Wed, 24 Jun 2026 13:18:58 +0300 Subject: [PATCH 307/511] [APINotes] Skip per-decl ProcessAPINotes work when no API notes are active (#203710) Cache "any API notes active" and skip per-decl lookups. | function | before #202727 | trunk (parent) | this PR | | ------------------------ | ----------------- | ----------------- | ------------------- | | `Sema::ProcessAPINotes` | 2,385,852 (0.21%) | 1,028,280 (0.09%) | **365,310 (0.03%)** | | `UnwindNamespaceContext` | 349,244 | gone | gone | | `findAPINotes` | 448,140 | 473,550 | **gone** | cc @egorzhdan @Xazax-hun @compnerd --- clang/include/clang/APINotes/APINotesManager.h | 7 +++++++ clang/lib/APINotes/APINotesManager.cpp | 5 +++++ clang/lib/Sema/SemaAPINotes.cpp | 2 ++ 3 files changed, 14 insertions(+) diff --git a/clang/include/clang/APINotes/APINotesManager.h b/clang/include/clang/APINotes/APINotesManager.h index 772fa5faa0f87..aaf48706fb26b 100644 --- a/clang/include/clang/APINotes/APINotesManager.h +++ b/clang/include/clang/APINotes/APINotesManager.h @@ -50,6 +50,11 @@ class APINotesManager { /// source file from which an entity was declared. bool ImplicitAPINotes; + /// Cached value of hasAPINotes() true once any current-module reader has + /// been loaded, or if implicit API notes lookup is enabled. Monotonic within + /// a compilation, so it can be tested per-declaration without recomputing. + bool HasAPINotes; + /// Whether to apply all APINotes as optionally-applied versioned /// entities. This means that when building a Clang module, /// we capture every note on a given decl wrapped in a SwiftVersionedAttr @@ -172,6 +177,8 @@ class APINotesManager { return ArrayRef(CurrentModuleReaders).slice(0, HasPrivate ? 2 : 1); } + bool hasAPINotes() const { return HasAPINotes; } + /// Find the API notes readers that correspond to the given source location. llvm::SmallVector findAPINotes(SourceLocation Loc); diff --git a/clang/lib/APINotes/APINotesManager.cpp b/clang/lib/APINotes/APINotesManager.cpp index acb84c3949cb1..2cc801d5415b8 100644 --- a/clang/lib/APINotes/APINotesManager.cpp +++ b/clang/lib/APINotes/APINotesManager.cpp @@ -50,6 +50,7 @@ class PrettyStackTraceDoubleString : public llvm::PrettyStackTraceEntry { APINotesManager::APINotesManager(SourceManager &SM, const LangOptions &LangOpts) : SM(SM), ImplicitAPINotes(LangOpts.APINotes), + HasAPINotes(LangOpts.APINotes), VersionIndependentSwift(LangOpts.SwiftVersionIndependentAPINotes) {} APINotesManager::~APINotesManager() { @@ -319,6 +320,8 @@ bool APINotesManager::loadCurrentModuleAPINotes( M->APINotesFile = File.getName().str(); } + if (NumReaders > 0) + HasAPINotes = true; return NumReaders > 0; } @@ -331,6 +334,8 @@ bool APINotesManager::loadCurrentModuleAPINotesFromBuffer( CurrentModuleReaders[NumReader++] = Reader.release(); } + if (NumReader > 0) + HasAPINotes = true; return NumReader; } diff --git a/clang/lib/Sema/SemaAPINotes.cpp b/clang/lib/Sema/SemaAPINotes.cpp index a99408a4c8a7b..67c08d239e758 100644 --- a/clang/lib/Sema/SemaAPINotes.cpp +++ b/clang/lib/Sema/SemaAPINotes.cpp @@ -998,6 +998,8 @@ UnwindTagContext(TagDecl *DC, api_notes::APINotesManager &APINotes) { void Sema::ProcessAPINotes(Decl *D) { if (!D) return; + if (!APINotes.hasAPINotes()) + return; auto Readers = APINotes.findAPINotes(D->getLocation()); if (Readers.empty()) return; From bd177f964244c93af3ddcb03cc6d1e748cb1c6ac Mon Sep 17 00:00:00 2001 From: CarolineConcatto Date: Wed, 24 Jun 2026 11:30:13 +0100 Subject: [PATCH 308/511] [AArch64][TableGen] Define ZA, ZT0 and FPMR memory defvars (#154144) Introduce TableGen defvars for the inaccessible memory effects used to model accesses to ZA, ZT0 and FPMR in IntrinsicsAArch64.td. This is a preparatory cleanup for a follow-up patch that will replace these uses of InaccessibleMem with target-specific memory locations. Other uses of inaccessible memory in the file are left unchanged because they are unrelated to ZA, ZT0 or FPMR. This preserves the existing memory effects. In particular, intrinsics that currently access both argument memory and inaccessible memory keep the same ArgMem/InaccessibleMem read/write modelling. --------- Co-authored-by: Paul Walker --- llvm/include/llvm/IR/Intrinsics.td | 5 + llvm/include/llvm/IR/IntrinsicsAArch64.td | 218 +++++++++++++--------- 2 files changed, 134 insertions(+), 89 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index d2517fbc4c8b2..c4eedb62d46fc 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -74,6 +74,11 @@ class IntrWrite idx> : IntrinsicProperty { list MemLoc=idx; } +// Constrain intrinsic to not write any memory location. +defvar IntrReadOnly = IntrWrite<[]>; +// Constrain intrinsic to not read any memory location. +defvar IntrWriteOnly = IntrRead<[]>; + // Commutative - This intrinsic is commutative: X op Y == Y op X. def Commutative : IntrinsicProperty; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 2923595486712..5ba1f4ba861d2 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -732,6 +732,11 @@ def int_aarch64_neon_tbx2 : AdvSIMD_Tbx2_Intrinsic; def int_aarch64_neon_tbx3 : AdvSIMD_Tbx3_Intrinsic; def int_aarch64_neon_tbx4 : AdvSIMD_Tbx4_Intrinsic; +// Maps Memory locations to registers. +defvar FPMR = InaccessibleMem; +defvar ZT0 = InaccessibleMem; +defvar ZA = InaccessibleMem; + let TargetPrefix = "aarch64" in { class FPENV_Get_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty], [], [IntrNoMem, IntrHasSideEffects]>; @@ -740,7 +745,7 @@ let TargetPrefix = "aarch64" in { class RNDR_Intrinsic : DefaultAttrsIntrinsic<[llvm_i64_ty, llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects]>; class FPMR_Set_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrWrite<[FPMR]>, IntrWriteOnly]>; } // FP environment registers. @@ -965,7 +970,8 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat // Conversions class AdvSIMD_FP8_1VectorArg_Long_Intrinsic - : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], [IntrReadMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty], + [IntrRead<[FPMR]>, IntrReadOnly]>; def int_aarch64_neon_fp8_cvtl1 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; def int_aarch64_neon_fp8_cvtl2 : AdvSIMD_FP8_1VectorArg_Long_Intrinsic; @@ -974,13 +980,13 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; def int_aarch64_neon_fp8_fcvtn2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; // Dot-product class AdvSIMD_FP8_DOT_Intrinsic @@ -988,14 +994,14 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<1>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; class AdvSIMD_FP8_DOT_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_anyvector_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[FPMR]>, IntrReadOnly, ImmArg>]>; def int_aarch64_neon_fp8_fdot2 : AdvSIMD_FP8_DOT_Intrinsic; def int_aarch64_neon_fp8_fdot2_lane : AdvSIMD_FP8_DOT_LANE_Intrinsic; @@ -1010,7 +1016,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat [LLVMMatchType<0>, llvm_v16i8_ty, llvm_v16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; class AdvSIMD_FP8_FMLA_LANE_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], @@ -1018,7 +1024,7 @@ def int_aarch64_st64bv0: Intrinsic<[llvm_i64_ty], !listconcat([llvm_ptr_ty], dat llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[FPMR]>, IntrReadOnly, ImmArg>]>; def int_aarch64_neon_fp8_fmlalb : AdvSIMD_FP8_FMLA_Intrinsic; def int_aarch64_neon_fp8_fmlalt : AdvSIMD_FP8_FMLA_Intrinsic; @@ -2860,7 +2866,7 @@ def int_aarch64_sve_fmmla def int_aarch64_sve_fp8_fmmla : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; // // SVE ACLE: 7.2. BFloat16 extensions @@ -2963,7 +2969,9 @@ def int_aarch64_sve_whilewr_d : SVE2_CONFLICT_DETECT_Intrinsic<[IntrSpeculatable let TargetPrefix = "aarch64" in { class SME_Load_Store_Intrinsic : DefaultAttrsIntrinsic<[], - [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly, ImmArg>]>; + [pred_ty, llvm_anyptr_ty, llvm_i32_ty, llvm_i32_ty], + [IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>, + ImmArg>]>; // Loads def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic; @@ -2991,18 +2999,21 @@ let TargetPrefix = "aarch64" in { // Spill + fill class SME_LDR_STR_ZA_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrInaccessibleMemOrArgMemOnly]>; + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty, llvm_i32_ty], [IntrRead<[ArgMem, ZA]>, IntrWrite<[ArgMem, ZA]>]>; def int_aarch64_sme_ldr : SME_LDR_STR_ZA_Intrinsic; def int_aarch64_sme_str : SME_LDR_STR_ZA_Intrinsic; + class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_i32_ty, llvm_i32_ty], [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; + llvm_i32_ty, llvm_i32_ty], [IntrRead<[ZA]>, IntrReadOnly, + ImmArg>]>; class SME_VectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic; def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic; @@ -3017,13 +3028,13 @@ let TargetPrefix = "aarch64" in { class SME_MOVAZ_TileToVector_X2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg>]>; class SME_MOVAZ_TileToVector_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>,LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg>]>; def int_aarch64_sme_readz_horiz_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic; def int_aarch64_sme_readz_vert_x2 : SME_MOVAZ_TileToVector_X2_Intrinsic; @@ -3034,7 +3045,7 @@ let TargetPrefix = "aarch64" in { class SME_MOVAZ_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>, ImmArg>]>; def int_aarch64_sme_readz_horiz : SME_MOVAZ_TileToVector_Intrinsic; def int_aarch64_sme_readz_vert : SME_MOVAZ_TileToVector_Intrinsic; @@ -3045,23 +3056,24 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_readz_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty], - [IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>]>; def int_aarch64_sme_readz_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty], - [IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrRead<[ZA]>]>; def int_aarch64_sme_write_lane_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, + IntrWrite<[ZT0]>]>; def int_aarch64_sme_write_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty], - [ImmArg>, IntrInaccessibleMemOnly, IntrWriteMem]>; + [ImmArg>, IntrWrite<[ZT0]>, IntrWriteOnly]>; - def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg>]>; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg>]>; def int_aarch64_sme_in_streaming_mode : DefaultAttrsIntrinsic<[llvm_i1_ty], [], [IntrNoMem]>, ClangBuiltin<"__builtin_arm_in_streaming_mode">; class SME_OuterProduct_Intrinsic @@ -3070,7 +3082,8 @@ let TargetPrefix = "aarch64" in { LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; @@ -3088,17 +3101,32 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_usmops_wide : SME_OuterProduct_Intrinsic; class SME_OuterProduct_QuarterTile_Single_Single + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>], [ImmArg>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Single_Single : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, - LLVMMatchType<0>], [ImmArg>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Single_Multi : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Single_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; class SME_OuterProduct_QuarterTile_Multi_Multi : DefaultAttrsIntrinsic<[], @@ -3106,7 +3134,16 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, - LLVMMatchType<0>], [ImmArg>, IntrInaccessibleMemOnly, IntrHasSideEffects]>; + LLVMMatchType<0>], [ImmArg>, IntrRead<[ZA]>, + IntrWrite<[ZA]>, IntrHasSideEffects]>; + + class SME_FP8_OuterProduct_QuarterTile_Multi_Multi + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, + LLVMMatchType<0>, + LLVMMatchType<0>], [ImmArg>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, IntrHasSideEffects]>; // 2-way and 4-way multi-vector signed/unsigned Quarter Tile Quarter Product A/S foreach mode = ["s", "a"] in { @@ -3138,8 +3175,8 @@ let TargetPrefix = "aarch64" in { LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, - IntrInaccessibleMemOnly]>; + [ImmArg>, ImmArg>, IntrRead<[ZA]>, + IntrWrite<[ZA]>]>; def int_aarch64_sme_ftmopa_za16 : SME_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_ftmopa_za32 : SME_OuterProduct_TMOP_Intrinsic; @@ -3157,17 +3194,17 @@ let TargetPrefix = "aarch64" in { llvm_nxv16i8_ty, llvm_i32_ty], [ImmArg>, ImmArg>, - IntrInaccessibleMemOnly]>; + IntrRead<[ZA, FPMR]>, IntrWrite<[ZA]>]>; def int_aarch64_sme_fp8_ftmopa_za16 : SME_FP8_OuterProduct_TMOP_Intrinsic; def int_aarch64_sme_fp8_ftmopa_za32 : SME_FP8_OuterProduct_TMOP_Intrinsic; // 16 and 32 bit multi-vector floating point 8 Quarter Tile Quarter Product foreach za = ["za16", "za32"] in { - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_OuterProduct_QuarterTile_Single_Single; - def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_OuterProduct_QuarterTile_Single_Multi; - def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_OuterProduct_QuarterTile_Multi_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x1" : SME_FP8_OuterProduct_QuarterTile_Single_Single; + def int_aarch64_sme_fp8_fmop4a_ # za # "_1x2" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x1" : SME_FP8_OuterProduct_QuarterTile_Single_Multi; + def int_aarch64_sme_fp8_fmop4a_ # za # "_2x2" : SME_FP8_OuterProduct_QuarterTile_Multi_Multi; } class SME_AddVectorToTile_Intrinsic @@ -3175,7 +3212,8 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty], [IntrInaccessibleMemOnly, ImmArg>]>; + llvm_anyvector_ty], [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; @@ -3199,9 +3237,9 @@ let TargetPrefix = "aarch64" in { [IntrNoMem, IntrHasSideEffects]>; def int_aarch64_sme_za_enable - : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>; def int_aarch64_sme_za_disable - : DefaultAttrsIntrinsic<[], [], [IntrWriteMem, IntrInaccessibleMemOnly]>; + : DefaultAttrsIntrinsic<[], [], [IntrWrite<[ZA, ZT0]>, IntrWriteOnly]>; // Clamp // @@ -3290,56 +3328,59 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_Matrix_ArrayVector_Single_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>, + ImmArg>]>; class SVE2_VG2_Multi_Imm_Intrinsic : DefaultAttrsIntrinsic<[LLVMSubdivide2VectorType<0>], @@ -3358,14 +3399,14 @@ let TargetPrefix = "aarch64" in { : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SME2_ZA_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrWrite<[ZA]>]>; class SVE2_VG2_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], @@ -3483,50 +3524,50 @@ let TargetPrefix = "aarch64" in { class SME2_ZA_ArrayVector_Read_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrReadOnly]>; class SME2_ZA_ArrayVector_Read_VG4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrReadOnly]>; class SME2_Matrix_TileVector_Read_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrReadOnly]>; class SME2_Matrix_TileVector_Read_VG4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[ZA]>, IntrReadOnly]>; class SME2_ZA_ArrayVector_Write_VG2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; class SME2_ZA_ArrayVector_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; class SME2_Matrix_TileVector_Write_VG2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg>]>; + [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg>]>; class SME2_Matrix_TileVector_Write_VG4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrWriteMem, IntrInaccessibleMemOnly, ImmArg>]>; + [IntrWrite<[ZA]>, IntrWriteOnly, ImmArg>]>; class SVE2_VG2_Multi_Single_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], @@ -3692,7 +3733,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector zeroing foreach vg = ["vg1x2", "vg1x4", "vg2x1", "vg2x2", "vg2x4", "vg4x1", "vg4x2", "vg4x4"] in { - def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWriteMem, IntrInaccessibleMemOnly]>; + def int_aarch64_sme_zero_za64_ # vg : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [IntrWrite<[ZA]>, IntrWriteOnly]>; } // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SVE2_VG2_Multi_Single_Intrinsic; @@ -3830,14 +3871,14 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; class SME2_Add_Sub_Write_VG4_Multi_Single_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; def int_aarch64_sme_add_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic; def int_aarch64_sme_sub_write_single_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Single_Intrinsic; @@ -3852,7 +3893,7 @@ let TargetPrefix = "aarch64" in { [llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; class SME2_Add_Sub_Write_VG4_Multi_Multi_Intrinsic : DefaultAttrsIntrinsic<[], @@ -3860,7 +3901,7 @@ let TargetPrefix = "aarch64" in { llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [IntrInaccessibleMemOnly, IntrWriteMem]>; + [IntrWrite<[ZA]>, IntrWriteOnly]>; def int_aarch64_sme_add_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic; def int_aarch64_sme_sub_write_za_vg1x2 : SME2_Add_Sub_Write_VG2_Multi_Multi_Intrinsic; @@ -3981,37 +4022,38 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_sel_x2 : SVE2_VG2_Sel_Intrinsic; def int_aarch64_sve_sel_x4 : SVE2_VG4_Sel_Intrinsic; + class SME_LDR_STR_ZT_Intrinsic - : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], [IntrInaccessibleMemOrArgMemOnly]>; + : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_anyptr_ty], [IntrRead<[ArgMem, ZT0]>, IntrWrite<[ArgMem, ZT0]>]>; def int_aarch64_sme_ldr_zt : SME_LDR_STR_ZT_Intrinsic; def int_aarch64_sme_str_zt : SME_LDR_STR_ZT_Intrinsic; // // Zero ZT0 // - def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrInaccessibleMemOnly, IntrWriteMem]>; + def int_aarch64_sme_zero_zt : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>, IntrWrite<[ZT0]>, IntrWriteOnly]>; // // Lookup table expand one register // def int_aarch64_sme_luti2_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti4_lane_zt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti6_zt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; // Lookup table expand two registers // def int_aarch64_sme_luti2_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti4_lane_zt_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; // // Lookup table expand four registers @@ -4019,11 +4061,11 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti2_lane_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti4_lane_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [ImmArg>, ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti6_lane_x4_x2 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [LLVMMatchType<0>, LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], @@ -4036,12 +4078,12 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sme_luti4_zt_x4 : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; def int_aarch64_sme_luti6_zt_x4 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrInaccessibleMemOnly, IntrReadMem]>; + [ImmArg>, IntrRead<[ZT0]>, IntrReadOnly]>; // @@ -4127,7 +4169,7 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Cvt : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; def int_aarch64_sve_fp8_cvt1 : SVE2_FP8_Cvt; def int_aarch64_sve_fp8_cvt2 : SVE2_FP8_Cvt; @@ -4138,29 +4180,26 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_Narrow_Cvt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrRead<[FPMR]>, IntrReadOnly]>; def int_aarch64_sve_fp8_cvtn : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnb : SVE2_FP8_Narrow_Cvt; def int_aarch64_sve_fp8_cvtnt : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv16i8_ty, llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; // Dot product class SVE2_FP8_FMLA_FDOT : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; - + [IntrRead<[FPMR]>, IntrReadOnly]>; class SVE2_FP8_FMLA_FDOT_Lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrReadMem, IntrInaccessibleMemOnly, ImmArg>]>; - + [IntrRead<[FPMR]>, IntrReadOnly, ImmArg>]>; def int_aarch64_sve_fp8_fdot : SVE2_FP8_FMLA_FDOT; def int_aarch64_sve_fp8_fdot_lane : SVE2_FP8_FMLA_FDOT_Lane; @@ -4186,69 +4225,70 @@ let TargetPrefix = "aarch64" in { class SVE2_FP8_CVT_X2_Single_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_nxv16i8_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; class SVE2_FP8_CVT_Single_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty, llvm_nxv4f32_ty], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; class SME_FP8_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i1_ty, llvm_nxv16i1_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [ImmArg>, IntrInaccessibleMemOnly]>; + [ImmArg>, IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_LANE_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg>]>; class SME_FP8_ZA_LANE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg>]>; class SME_FP8_ZA_LANE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_i32_ty], - [IntrInaccessibleMemOnly, ImmArg>]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>, ImmArg>]>; + class SME_FP8_ZA_VGx1_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_SINGLE_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_SINGLE_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_MULTI_VGx2_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; class SME_FP8_ZA_MULTI_VGx4_Intrinsic : DefaultAttrsIntrinsic<[], [llvm_i32_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty, llvm_nxv16i8_ty], - [IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR, ZA]>, IntrWrite<[ZA]>]>; // // CVT from FP8 to half-precision/BFloat16 multi-vector // @@ -4267,7 +4307,7 @@ let TargetPrefix = "aarch64" in { def int_aarch64_sve_fp8_cvt_x2 : DefaultAttrsIntrinsic<[llvm_nxv16i8_ty], [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrReadMem, IntrInaccessibleMemOnly]>; + [IntrRead<[FPMR]>, IntrReadOnly]>; def int_aarch64_sve_fp8_cvt_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; def int_aarch64_sve_fp8_cvtn_x4 : SVE2_FP8_CVT_Single_X4_Intrinsic; From 17198f8bbae09e7843ef17727ba8f514a5903aba Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Wed, 24 Jun 2026 11:36:29 +0100 Subject: [PATCH 309/511] [NFC][Clang][Tests] Remove +bf16 from AArch64 tests that don't strictly need the feature flag. (#205336) --- .../fp8-intrinsics/acle_neon_fp8_reinterpret.c | 6 +++--- clang/test/CodeGen/AArch64/neon/bf16-getset.c | 6 +++--- .../sme2p3-intrinsics/acle_sme2p3_luti6.c | 10 +++++----- .../sve2p3-intrinsics/acle_sve2p3_luti6.c | 10 +++++----- .../acle_sve2p3_luti6_lane_x2.c | 18 +++++++++--------- .../CodeGen/attr-arm-sve-vector-bits-bitcast.c | 6 +++--- .../CodeGen/attr-arm-sve-vector-bits-codegen.c | 2 +- .../CodeGen/attr-arm-sve-vector-bits-globals.c | 4 ++-- .../CodeGen/attr-arm-sve-vector-bits-types.c | 12 ++++++------ clang/test/CodeGen/svboolx2_t.cpp | 2 +- clang/test/CodeGen/svboolx4_t.cpp | 2 +- .../aarch64-mangle-sve-fixed-vectors.cpp | 10 +++++----- .../CodeGenCXX/aarch64-mangle-sve-vectors.cpp | 4 ++-- .../CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp | 10 +++++----- clang/test/CodeGenCXX/mangle-neon-vectors.cpp | 12 +----------- clang/test/Index/index-builtin-sve.cpp | 4 ++-- clang/test/Sema/aarch64-bf16-ldst-intrinsics.c | 3 +-- .../Sema/aarch64-incompat-sm-builtin-calls.cpp | 2 +- clang/test/Sema/aarch64-sme-func-attrs.c | 4 ++-- ...ch64-sme-streaming-nonstreaming-vl-checks.c | 4 ++-- clang/test/Sema/aarch64-sme2p1-diagnostics.c | 2 +- .../acle_sme2p1_imm.cpp | 3 +-- .../acle_sme2p3_imm.c | 2 +- clang/test/Sema/attr-arm-sve-vector-bits.c | 10 +++++----- .../test/SemaCXX/attr-arm-sve-vector-bits.cpp | 2 +- 25 files changed, 69 insertions(+), 81 deletions(-) diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c index adf5fceb9ceb9..a0b26fc1bb6e3 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_reinterpret.c @@ -1,9 +1,9 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 #include -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +bf16 -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +fp8 -target-feature +fp8fma -disable-O0-optnone -Werror -Wall -S -o /dev/null %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/AArch64/neon/bf16-getset.c b/clang/test/CodeGen/AArch64/neon/bf16-getset.c index a00e2fcc01e81..a03a9d9cc5f40 100644 --- a/clang/test/CodeGen/AArch64/neon/bf16-getset.c +++ b/clang/test/CodeGen/AArch64/neon/bf16-getset.c @@ -1,8 +1,8 @@ // REQUIRES: aarch64-registered-target || arm-registered-target -// RUN: %clang_cc1_cg_arm64_neon -target-feature +bf16 -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM -// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +bf16 -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %} -// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -target-feature +bf16 -fclangir -emit-cir %s -disable-O0-optnone | FileCheck %s --check-prefixes=ALL,CIR %} +// RUN: %clang_cc1_cg_arm64_neon -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM +// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -fclangir -emit-llvm %s -disable-O0-optnone | opt -S -passes=mem2reg,sroa | FileCheck %s --check-prefixes=ALL,LLVM %} +// RUN: %if cir-enabled %{%clang_cc1_cg_arm64_neon -fclangir -emit-cir %s -disable-O0-optnone | FileCheck %s --check-prefixes=ALL,CIR %} #include diff --git a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c index 656b0ce565833..d61b2b2254893 100644 --- a/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c +++ b/clang/test/CodeGen/AArch64/sme2p3-intrinsics/acle_sme2p3_luti6.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c index 11f0848af1c07..216482cfcea9b 100644 --- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6.c @@ -1,11 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c index b6d8fe5cff531..e68152545d676 100644 --- a/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c +++ b/clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_luti6_lane_x2.c @@ -1,15 +1,15 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK -// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK -// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK -// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -target-feature +bf16 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +bf16 -S -O1 -Werror -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - %s | FileCheck %s --check-prefix=STREAM-CHECK +// RUN: %clang_cc1 -DSTREAMING_MODE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s --check-prefix=STREAM-CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3 -S -O1 -Werror -o /dev/null %s #include diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c index 63e87c7dd37d3..0d035880c3f42 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c @@ -1,7 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=2 -mvscale-max=2 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c index 06fbb0027d7c1..6e70834623e17 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c index ae011db633b6a..0d6540bc10d26 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128 -// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=1 -mvscale-max=1 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -mvscale-min=4 -mvscale-max=4 -O1 -emit-llvm -o - %s -fhalf-no-semantic-interposition | FileCheck %s --check-prefix=CHECK-512 // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c index c6d5d2d2cffdc..b9fc43335584c 100644 --- a/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c +++ b/clang/test/CodeGen/attr-arm-sve-vector-bits-types.c @@ -1,9 +1,9 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024 -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048 -// RUN: %clang_cc1 -triple aarch64_32-unknown-darwin -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ILP32 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=1 -mvscale-max=1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=2 -mvscale-max=2 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=8 -mvscale-max=8 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -mvscale-min=16 -mvscale-max=16 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048 +// RUN: %clang_cc1 -triple aarch64_32-unknown-darwin -target-feature +sve -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-ILP32 // REQUIRES: aarch64-registered-target diff --git a/clang/test/CodeGen/svboolx2_t.cpp b/clang/test/CodeGen/svboolx2_t.cpp index 0b798afb8c642..70572d9563c2e 100644 --- a/clang/test/CodeGen/svboolx2_t.cpp +++ b/clang/test/CodeGen/svboolx2_t.cpp @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: @_Z3foo10svboolx2_t( // CHECK-NEXT: entry: diff --git a/clang/test/CodeGen/svboolx4_t.cpp b/clang/test/CodeGen/svboolx4_t.cpp index d849896bad85f..dc4736856abae 100644 --- a/clang/test/CodeGen/svboolx4_t.cpp +++ b/clang/test/CodeGen/svboolx4_t.cpp @@ -1,5 +1,5 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -emit-llvm -o - %s | FileCheck %s // CHECK-LABEL: @_Z3foo10svboolx4_t( // CHECK-NEXT: entry: diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp index d0bde80a34057..835abc175b6ef 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-fixed-vectors.cpp @@ -1,17 +1,17 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 \ +// RUN: -target-feature +sve -mvscale-min=1 -mvscale-max=1 \ // RUN: | FileCheck %s --check-prefix=CHECK-128 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 \ +// RUN: -target-feature +sve -mvscale-min=2 -mvscale-max=2 \ // RUN: | FileCheck %s --check-prefix=CHECK-256 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 \ +// RUN: -target-feature +sve -mvscale-min=4 -mvscale-max=4 \ // RUN: | FileCheck %s --check-prefix=CHECK-512 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 \ +// RUN: -target-feature +sve -mvscale-min=8 -mvscale-max=8 \ // RUN: | FileCheck %s --check-prefix=CHECK-1024 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 \ +// RUN: -target-feature +sve -mvscale-min=16 -mvscale-max=16 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 #define N __ARM_FEATURE_SVE_BITS diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp index 713d0a2b1cb51..292c89d6a0371 100644 --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 3 // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 | FileCheck %s +// RUN: -target-feature +sve | FileCheck %s // RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17 +// RUN: -target-feature +sve -fclang-abi-compat=17 | FileCheck %s --check-prefix=COMPAT_17 void f(__SVInt8_t, __SVInt8_t); void f(__SVInt16_t, __SVInt16_t); diff --git a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp index dcc2c2e0af85d..66fc29a4a1696 100644 --- a/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp @@ -1,17 +1,17 @@ // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=1 -mvscale-max=1 \ +// RUN: -target-feature +sve -mvscale-min=1 -mvscale-max=1 \ // RUN: | FileCheck %s --check-prefix=CHECK-128 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=2 -mvscale-max=2 \ +// RUN: -target-feature +sve -mvscale-min=2 -mvscale-max=2 \ // RUN: | FileCheck %s --check-prefix=CHECK-256 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=4 -mvscale-max=4 \ +// RUN: -target-feature +sve -mvscale-min=4 -mvscale-max=4 \ // RUN: | FileCheck %s --check-prefix=CHECK-512 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=8 -mvscale-max=8 \ +// RUN: -target-feature +sve -mvscale-min=8 -mvscale-max=8 \ // RUN: | FileCheck %s --check-prefix=CHECK-1024 // RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \ -// RUN: -target-feature +sve -target-feature +bf16 -mvscale-min=16 -mvscale-max=16 \ +// RUN: -target-feature +sve -mvscale-min=16 -mvscale-max=16 \ // RUN: | FileCheck %s --check-prefix=CHECK-2048 #define N __ARM_FEATURE_SVE_BITS diff --git a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp index 2139a8ae98caf..a4ca0581c195b 100644 --- a/clang/test/CodeGenCXX/mangle-neon-vectors.cpp +++ b/clang/test/CodeGenCXX/mangle-neon-vectors.cpp @@ -1,7 +1,6 @@ -// RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -triple armv7-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-apple-ios -target-feature +neon %s -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64 -// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-feature +bf16 %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-AARCH64-BF16 typedef float float32_t; typedef double float64_t; @@ -15,10 +14,7 @@ typedef signed char poly8_t; typedef short poly16_t; #endif typedef unsigned __INT64_TYPE__ uint64_t; - -#if defined(__ARM_FEATURE_BF16) typedef __bf16 bfloat16_t; -#endif typedef __attribute__((neon_vector_type(2))) int int32x2_t; typedef __attribute__((neon_vector_type(4))) int int32x4_t; @@ -35,10 +31,7 @@ typedef __attribute__((neon_vector_type(16))) mfloat8_t mfloat8x16_t; #endif typedef __attribute__((neon_polyvector_type(16))) poly8_t poly8x16_t; typedef __attribute__((neon_polyvector_type(8))) poly16_t poly16x8_t; - -#if defined(__ARM_FEATURE_BF16) typedef __attribute__((neon_vector_type(4))) __bf16 bfloat16x4_t; -#endif // CHECK: 16__simd64_int32_t // CHECK-AARCH64: 11__Int32x2_t @@ -85,11 +78,8 @@ void f10(poly16x8_t v) {} void f11(float64x2_t v) { } #endif -#if defined(__ARM_FEATURE_BF16) // CHECK-AARCH64-BF16: 14__Bfloat16x4_t void f12(bfloat16x4_t v) {} -#endif - #ifdef __aarch64__ // CHECK-AARCH64: 13__Mfloat8x8_t diff --git a/clang/test/Index/index-builtin-sve.cpp b/clang/test/Index/index-builtin-sve.cpp index cc148a21578fa..95fca71004b74 100644 --- a/clang/test/Index/index-builtin-sve.cpp +++ b/clang/test/Index/index-builtin-sve.cpp @@ -1,7 +1,7 @@ +// RUN: c-index-test -index-file %s --target=aarch64 -target-feature +sve -std=c++11 | FileCheck %s +// void testSve(__SVInt8_t sve); // CHECK: USR: c:@F@testSve#@BT@__SVInt8_t# void testBf16(__bf16); // CHECK: USR: c:@F@testBf16#@BT@__bf16# - -// RUN: c-index-test -index-file %s --target=aarch64 -target-feature +bf16 -target-feature +sve -std=c++11 | FileCheck %s diff --git a/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c b/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c index 55cd8b772831a..a7d0d31274a1a 100644 --- a/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c +++ b/clang/test/Sema/aarch64-bf16-ldst-intrinsics.c @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -target-feature +bf16 \ -// RUN: -O2 -verify -fsyntax-only %s +// RUN: %clang_cc1 -triple aarch64 -target-feature +neon -O2 -verify -fsyntax-only %s // REQUIRES: aarch64-registered-target || arm-registered-target diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp index 3fbcaf4a13d67..15be24c1e477c 100644 --- a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.cpp @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -std=c++23 -triple aarch64-none-linux-gnu -target-feature +sve \ -// RUN: -target-feature +bf16 -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +sve2 -target-feature +neon -Waarch64-sme-attributes -fsyntax-only -verify %s +// RUN: -target-feature +sve -target-feature +sme -target-feature +sme2 -target-feature +sve2 -target-feature +neon -Waarch64-sme-attributes -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c index 1543e990dd042..77b8520f41dc1 100644 --- a/clang/test/Sema/aarch64-sme-func-attrs.c +++ b/clang/test/Sema/aarch64-sme-func-attrs.c @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-cpp -x c++ %s // Valid attributes diff --git a/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c b/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c index 41d89869062ff..949b976f9cae8 100644 --- a/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c +++ b/clang/test/Sema/aarch64-sme-streaming-nonstreaming-vl-checks.c @@ -1,9 +1,9 @@ // Case 1: No vscale flags — should only produce warnings -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-noflags %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -verify=expected-noflags %s // Case 2: Explicit mismatch in vscale flags — should produce errors for // streaming and non-streaming callers -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -mvscale-min=1 -mvscale-max=1 -mvscale-streaming-min=2 -mvscale-streaming-max=2 -verify=expected-flags %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sve -Waarch64-sme-attributes -fsyntax-only -mvscale-min=1 -mvscale-max=1 -mvscale-streaming-min=2 -mvscale-streaming-max=2 -verify=expected-flags %s void sme_streaming_with_vl_arg(__SVInt8_t a) __arm_streaming; diff --git a/clang/test/Sema/aarch64-sme2p1-diagnostics.c b/clang/test/Sema/aarch64-sme2p1-diagnostics.c index 2a25039e120b1..8a4a1e2f48155 100644 --- a/clang/test/Sema/aarch64-sme2p1-diagnostics.c +++ b/clang/test/Sema/aarch64-sme2p1-diagnostics.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p1 -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target #include "arm_sme.h" diff --git a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp index 06b1e8301ce49..d299114c9a3b2 100644 --- a/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp +++ b/clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp @@ -1,5 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu \ -// RUN: -target-feature +sme -target-feature +sme2p1 -target-feature +bf16 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p1 -fsyntax-only -verify %s // REQUIRES: aarch64-registered-target diff --git a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c index 25c35fbcbcc7b..b9c8380949562 100644 --- a/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c +++ b/clang/test/Sema/aarch64-sme2p3-intrinsics/acle_sme2p3_imm.c @@ -1,6 +1,6 @@ // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -target-feature +bf16 -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +sme2p3 -fsyntax-only -verify %s #include diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c index 9e9e72ef4254d..1f6b01aa87669 100644 --- a/clang/test/Sema/attr-arm-sve-vector-bits.c +++ b/clang/test/Sema/attr-arm-sve-vector-bits.c @@ -1,8 +1,8 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=1 -mvscale-max=1 %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=2 -mvscale-max=2 -mvscale-streaming-min=2 %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected -mvscale-min=4 -mvscale-max=4 -mvscale-streaming-min=4 -mvscale-streaming-max=4 %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=8 -mvscale-max=8 -mvscale-streaming-min=4 -mvscale-streaming-max=8 %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=16 -mvscale-max=16 %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=1 -mvscale-max=1 %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=2 -mvscale-max=2 -mvscale-streaming-min=2 %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected -mvscale-min=4 -mvscale-max=4 -mvscale-streaming-min=4 -mvscale-streaming-max=4 %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=8 -mvscale-max=8 -mvscale-streaming-min=4 -mvscale-streaming-max=8 %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -ffreestanding -fsyntax-only -verify=expected,streamingdifferent -mvscale-min=16 -mvscale-max=16 %s #include diff --git a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp index 1127f31619e77..93326a0afa279 100644 --- a/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp +++ b/clang/test/SemaCXX/attr-arm-sve-vector-bits.cpp @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s // expected-no-diagnostics #include From e1eeeffe619651ec6a602e97b893913ce64523f9 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 24 Jun 2026 05:41:38 -0500 Subject: [PATCH 310/511] [flang][OpenMP] Check that IF clause applies to at most one leaf (#205164) This also allows placing the IF clause in the "allowedClauses" set for all directives, instead of having it in "allowedOnceClauses" for some directives and in "allowedClauses" for others. The emitted diagnostic will show which constituent has multiple IF clauses applying to it: ``` if.f90:4:35: error: At most one IF clause can apply to each directive constituent !$omp & if(target teams: x > 0) if(teams distribute: y > 0) ^^^^^^^^^^^^^^^^^^^^^^^^^^^ if.f90:4:11: Previous IF clause applying to the TEAMS constituent !$omp & if(target teams: x > 0) if(teams distribute: y > 0) ^^^^^^^^^^^^^^^^^^^^^^^ ``` --- flang/lib/Semantics/check-omp-structure.cpp | 115 ++++++++++++++---- flang/lib/Semantics/check-omp-structure.h | 5 + .../Semantics/OpenMP/device-constructs.f90 | 4 +- .../OpenMP/if-clause-45-suggestion.f90 | 18 +++ flang/test/Semantics/OpenMP/if-clause-45.f90 | 29 ++--- .../OpenMP/if-clause-50-suggestion.f90 | 14 +++ flang/test/Semantics/OpenMP/if-clause-50.f90 | 32 +++-- flang/test/Semantics/OpenMP/if-clause-60.f90 | 12 ++ flang/test/Semantics/OpenMP/if-clause.f90 | 34 +++--- llvm/include/llvm/Frontend/OpenMP/OMP.td | 71 ++++++----- 10 files changed, 223 insertions(+), 111 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90 create mode 100644 flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90 create mode 100644 flang/test/Semantics/OpenMP/if-clause-60.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index d87b2f1983de6..816b8fd2f149d 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -3359,6 +3359,10 @@ void OmpStructureChecker::Leave(const parser::OmpEndDirective &x) { } } +void OmpStructureChecker::Enter(const parser::OmpClauseList &) { + ifLeafs_.clear(); +} + // Clauses // Mainly categorized as // 1. Checks on 'OmpClauseList' from 'parse-tree.h'. @@ -4317,6 +4321,13 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) { return false; }}; + // The directive-name to which the clause applies. [Note: The directive- + // name-modifier is not necessarily a valid directive name, but that's how + // it's currently modeled.] + // This will be set only after other checks pass to avoid emitting irrelevant + // diagnostics. + llvm::omp::Directive appliesTo{llvm::omp::Directive::OMPD_unknown}; + if (!OmpVerifyModifiers( x.v, llvm::omp::OMPC_if, GetContext().clauseSource, context_)) { return; @@ -4334,39 +4345,91 @@ void OmpStructureChecker::Enter(const parser::OmpClause::If &x) { std::string modName{desc.name.str()}; if (!isConstituent(dir, sub)) { - context_ - .Say(modifierSource, - "%s is not a constituent of the %s directive"_err_en_US, subName, - dirName) - .Attach( - GetContext().directiveSource, "Cannot apply to directive"_en_US); + context_.Say(modifierSource, + "%s is not a constituent of the %s directive"_err_en_US, subName, + dirName); } else { - static llvm::omp::Directive valid45[]{ - llvm::omp::OMPD_cancel, // - llvm::omp::OMPD_parallel, // - /* OMP 5.0+ also allows OMPD_simd */ - llvm::omp::OMPD_target, // - llvm::omp::OMPD_target_data, // - llvm::omp::OMPD_target_enter_data, // - llvm::omp::OMPD_target_exit_data, // - llvm::omp::OMPD_target_update, // - llvm::omp::OMPD_task, // - llvm::omp::OMPD_taskloop, // - /* OMP 5.2+ also allows OMPD_teams */ + static OmpDirectiveSet valid45{ + llvm::omp::Directive::OMPD_cancel, // + llvm::omp::Directive::OMPD_parallel, // + llvm::omp::Directive::OMPD_target, // + llvm::omp::Directive::OMPD_target_data, // + llvm::omp::Directive::OMPD_target_enter_data, // + llvm::omp::Directive::OMPD_target_exit_data, // + llvm::omp::Directive::OMPD_target_update, // + llvm::omp::Directive::OMPD_task, // + llvm::omp::Directive::OMPD_taskloop, // }; - if (version < 50 && sub == llvm::omp::OMPD_simd) { + static OmpDirectiveSet valid50{ + valid45 | OmpDirectiveSet{llvm::omp::Directive::OMPD_simd}}; + // 5.1 is the same as 5.0. + static OmpDirectiveSet valid52{ + valid50 | OmpDirectiveSet{llvm::omp::Directive::OMPD_teams}}; + static OmpDirectiveSet valid60{valid52 | + OmpDirectiveSet{llvm::omp::Directive::OMPD_taskgraph, + /*TODO llvm::omp::Directive::OMPD_task_iteration*/}}; + + static auto minVersion{[&](llvm::omp::Directive d) { + if (valid45.test(d)) { + return 45; + } + if (valid50.test(d)) { + return 50; + } + if (valid52.test(d)) { + return 52; + } + if (valid60.test(d)) { + return 60; + } + return 0; + }}; + static auto suggest{[&](unsigned v) -> std::string { + if (v != 0) { + return ", " + TryVersion(v); + } else { + return ""; + } + }}; + + if (version <= 45 && !valid45.test(sub)) { + context_.Say(modifierSource, + "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName, + ThisVersion(version), suggest(minVersion(sub))); + } else if (version <= 51 && !valid50.test(sub)) { context_.Say(modifierSource, - "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName, - ThisVersion(version), TryVersion(50)); - } else if (version < 52 && sub == llvm::omp::OMPD_teams) { + "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName, + ThisVersion(version), suggest(minVersion(sub))); + } else if (version <= 52 && !valid52.test(sub)) { context_.Say(modifierSource, - "%s is not allowed as '%s' in %s, %s"_warn_en_US, subName, modName, - ThisVersion(version), TryVersion(52)); - } else if (!llvm::is_contained(valid45, sub) && - sub != llvm::omp::OMPD_simd && sub != llvm::omp::OMPD_teams) { + "%s is not allowed as '%s' in %s%s"_err_en_US, subName, modName, + ThisVersion(version), suggest(minVersion(sub))); + } else if (!valid60.test(sub)) { context_.Say(modifierSource, "%s is not allowed as '%s' in %s"_err_en_US, subName, modName, ThisVersion(version)); + } else { + appliesTo = sub; + } + } + } else { + appliesTo = GetContext().directive; + } + + if (appliesTo != llvm::omp::Directive::OMPD_unknown) { + parser::CharBlock source{GetContext().clauseSource}; + for (auto leaf : llvm::omp::getLeafConstructsOrSelf(appliesTo)) { + auto pair{ifLeafs_.try_emplace(leaf, source)}; + if (!pair.second) { + std::string ifName{GetUpperName(llvm::omp::Clause::OMPC_if, version)}; + context_ + .Say(source, + "At most one %s clause can apply to each directive constituent"_err_en_US, + ifName) + .Attach(pair.first->second, + "Previous %s clause applying to the %s constituent"_en_US, + ifName, GetUpperName(leaf, version)); + break; } } } diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 1283feb32ef5f..8d9b25fb2a11d 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -131,6 +131,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void Enter(const parser::OpenMPCriticalConstruct &); void Enter(const parser::OpenMPAtomicConstruct &); + void Enter(const parser::OmpClauseList &); void Leave(const parser::OmpClauseList &); void Enter(const parser::OmpClause &); @@ -388,6 +389,10 @@ class OmpStructureChecker : public OmpStructureCheckerBase { int allocateDirectiveLevel_{0}; parser::CharBlock visitedAtomicSource_; + // Mapping of directive-name-modifier constituents to the sources of the + // IF clauses that referenced them. If there was no modifier, the entire + // directive is assumed to be listed. + std::map ifLeafs_; // Stack of nested DO loops and OpenMP constructs. // This is used to verify DO loop nest for DOACROSS, and branches into // and out of OpenMP constructs. diff --git a/flang/test/Semantics/OpenMP/device-constructs.f90 b/flang/test/Semantics/OpenMP/device-constructs.f90 index db04e7db155ad..d74d720b2d35d 100644 --- a/flang/test/Semantics/OpenMP/device-constructs.f90 +++ b/flang/test/Semantics/OpenMP/device-constructs.f90 @@ -169,7 +169,7 @@ program main !ERROR: The device expression of the DEVICE clause must be a non-negative integer expression, 'omp_initial_device' (-1), or 'omp_invalid_device' (-2) !$omp target exit data map(delete:A) device(-3) - !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target enter data map(to:a) if(.true.) if(.false.) !ERROR: Only the ALLOC, TO, TOFROM map types are permitted for MAP clauses on the TARGET ENTER DATA directive @@ -185,7 +185,7 @@ program main !$omp target update if(.true.) device(1) to(a) from(b) depend(inout:c) nowait - !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target update to(a) if(.true.) if(.false.) !ERROR: At most one DEVICE clause can appear on the TARGET UPDATE directive diff --git a/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90 b/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90 new file mode 100644 index 0000000000000..98ac83d7f4999 --- /dev/null +++ b/flang/test/Semantics/OpenMP/if-clause-45-suggestion.f90 @@ -0,0 +1,18 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=45 + +subroutine f + !OK + !$omp parallel if(.false.) + !$omp end parallel + + !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 + !$omp simd if(.true.) + do i = 1, 10 + end do + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 + !$omp teams if(.true.) + !$omp end teams + + !No test for 6.0 because it requires a directive that is not in 4.5 +end diff --git a/flang/test/Semantics/OpenMP/if-clause-45.f90 b/flang/test/Semantics/OpenMP/if-clause-45.f90 index b013a33094727..4f444907b1617 100644 --- a/flang/test/Semantics/OpenMP/if-clause-45.f90 +++ b/flang/test/Semantics/OpenMP/if-clause-45.f90 @@ -24,7 +24,7 @@ program main end do !$omp end distribute parallel do - !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp distribute parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -79,7 +79,6 @@ program main !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 !ERROR: IF clause is not allowed on directive DISTRIBUTE SIMD in OpenMP v4.5, try -fopenmp-version=50 - !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 !$omp distribute simd if(.true.) if(simd: .false.) do i = 1, 10 @@ -112,7 +111,6 @@ program main !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 !ERROR: IF clause is not allowed on directive DO SIMD in OpenMP v4.5, try -fopenmp-version=50 - !ERROR: At most one IF clause can appear on the DO SIMD directive !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 !$omp do simd if(.true.) if(simd: .false.) do i = 1, 10 @@ -132,7 +130,7 @@ program main !$omp parallel if(target: .true.) !$omp end parallel - !ERROR: At most one IF clause can appear on the PARALLEL directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel if(.true.) if(parallel: .false.) !$omp end parallel @@ -155,7 +153,7 @@ program main end do !$omp end parallel do - !ERROR: At most one IF clause can appear on the PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -194,7 +192,7 @@ program main !$omp parallel sections if(target: .true.) !$omp end parallel sections - !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel sections if(.true.) if(parallel: .false.) !$omp end parallel sections @@ -211,7 +209,7 @@ program main !$omp parallel workshare if(target: .true.) !$omp end parallel workshare - !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel workshare if(.true.) if(parallel: .false.) !$omp end parallel workshare @@ -240,7 +238,6 @@ program main !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 !ERROR: IF clause is not allowed on directive SIMD in OpenMP v4.5, try -fopenmp-version=50 - !ERROR: At most one IF clause can appear on the SIMD directive !ERROR: SIMD is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=50 !$omp simd if(.true.) if(simd: .false.) do i = 1, 10 @@ -260,7 +257,7 @@ program main !$omp target if(parallel: .true.) !$omp end target - !ERROR: At most one IF clause can appear on the TARGET directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target if(.true.) if(target: .false.) !$omp end target @@ -277,7 +274,7 @@ program main !$omp target data map(tofrom: i) if(target: .true.) !$omp end target data - !ERROR: At most one IF clause can appear on the TARGET DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target data map(tofrom: i) if(.true.) if(target data: .false.) !$omp end target data @@ -291,7 +288,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive !$omp target enter data map(to: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.) ! ---------------------------------------------------------------------------- @@ -304,7 +301,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive !$omp target exit data map(from: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.) ! ---------------------------------------------------------------------------- @@ -490,7 +487,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET UPDATE directive !$omp target update to(i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target update to(i) if(.true.) if(target update: .false.) ! ---------------------------------------------------------------------------- @@ -506,7 +503,7 @@ program main !$omp task if(target: .true.) !$omp end task - !ERROR: At most one IF clause can appear on the TASK directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp task if(.true.) if(task: .false.) !$omp end task @@ -529,7 +526,7 @@ program main end do !$omp end taskloop - !ERROR: At most one IF clause can appear on the TASKLOOP directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp taskloop if(.true.) if(taskloop: .false.) do i = 1, 10 end do @@ -574,7 +571,6 @@ program main !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v4.5, try -fopenmp-version=52 - !ERROR: At most one IF clause can appear on the TEAMS directive !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 !$omp teams if(.true.) if(teams: .false.) !$omp end teams @@ -599,7 +595,6 @@ program main end do !$omp end teams distribute - !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v4.5, try -fopenmp-version=52 !$omp teams distribute if(.true.) if(teams: .true.) do i = 1, 10 diff --git a/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90 b/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90 new file mode 100644 index 0000000000000..f8a4452d01951 --- /dev/null +++ b/flang/test/Semantics/OpenMP/if-clause-50-suggestion.f90 @@ -0,0 +1,14 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=50 + +subroutine f + !OK + !$omp simd if(.true.) + do i = 1, 10 + end do + + !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 + !$omp teams if(.true.) + !$omp end teams + + !No test for 6.0 because it requires a directive that is not in 5.0 +end diff --git a/flang/test/Semantics/OpenMP/if-clause-50.f90 b/flang/test/Semantics/OpenMP/if-clause-50.f90 index 3c385cdad9e65..23d7738949404 100644 --- a/flang/test/Semantics/OpenMP/if-clause-50.f90 +++ b/flang/test/Semantics/OpenMP/if-clause-50.f90 @@ -24,7 +24,7 @@ program main end do !$omp end distribute parallel do - !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp distribute parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -72,7 +72,7 @@ program main end do !$omp end distribute simd - !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp distribute simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -98,7 +98,7 @@ program main end do !$omp end do simd - !ERROR: At most one IF clause can appear on the DO SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp do simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -117,7 +117,7 @@ program main !$omp parallel if(target: .true.) !$omp end parallel - !ERROR: At most one IF clause can appear on the PARALLEL directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel if(.true.) if(parallel: .false.) !$omp end parallel @@ -140,7 +140,7 @@ program main end do !$omp end parallel do - !ERROR: At most one IF clause can appear on the PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -178,7 +178,7 @@ program main !$omp parallel sections if(target: .true.) !$omp end parallel sections - !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel sections if(.true.) if(parallel: .false.) !$omp end parallel sections @@ -195,7 +195,7 @@ program main !$omp parallel workshare if(target: .true.) !$omp end parallel workshare - !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel workshare if(.true.) if(parallel: .false.) !$omp end parallel workshare @@ -218,7 +218,7 @@ program main end do !$omp end simd - !ERROR: At most one IF clause can appear on the SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -237,7 +237,7 @@ program main !$omp target if(parallel: .true.) !$omp end target - !ERROR: At most one IF clause can appear on the TARGET directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target if(.true.) if(target: .false.) !$omp end target @@ -254,7 +254,7 @@ program main !$omp target data map(tofrom: i) if(target: .true.) !$omp end target data - !ERROR: At most one IF clause can appear on the TARGET DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target data map(tofrom: i) if(.true.) if(target data: .false.) !$omp end target data @@ -268,7 +268,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive !$omp target enter data map(to: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.) ! ---------------------------------------------------------------------------- @@ -281,7 +281,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive !$omp target exit data map(from: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.) ! ---------------------------------------------------------------------------- @@ -463,7 +463,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET UPDATE directive !$omp target update to(i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target update to(i) if(.true.) if(target update: .false.) ! ---------------------------------------------------------------------------- @@ -479,7 +479,7 @@ program main !$omp task if(target: .true.) !$omp end task - !ERROR: At most one IF clause can appear on the TASK directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp task if(.true.) if(task: .false.) !$omp end task @@ -502,7 +502,7 @@ program main end do !$omp end taskloop - !ERROR: At most one IF clause can appear on the TASKLOOP directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp taskloop if(.true.) if(taskloop: .false.) do i = 1, 10 end do @@ -546,7 +546,6 @@ program main !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 !ERROR: IF clause is not allowed on directive TEAMS in OpenMP v5.0, try -fopenmp-version=52 - !ERROR: At most one IF clause can appear on the TEAMS directive !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 !$omp teams if(.true.) if(teams: .false.) !$omp end teams @@ -571,7 +570,6 @@ program main end do !$omp end teams distribute - !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive !ERROR: TEAMS is not allowed as 'directive-name-modifier' in OpenMP v5.0, try -fopenmp-version=52 !$omp teams distribute if(.true.) if(teams: .true.) do i = 1, 10 diff --git a/flang/test/Semantics/OpenMP/if-clause-60.f90 b/flang/test/Semantics/OpenMP/if-clause-60.f90 new file mode 100644 index 0000000000000..3105bb3251e61 --- /dev/null +++ b/flang/test/Semantics/OpenMP/if-clause-60.f90 @@ -0,0 +1,12 @@ +!RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=60 + +subroutine f +!ERROR: At most one IF clause can apply to each directive constituent + !$omp taskgraph if(.true.) if(.false.) + !$omp end taskgraph + +!ERROR: PARALLEL LOOP is not a constituent of the TEAMS LOOP directive + !$omp teams loop if(parallel loop: .false.) + do i = 1, 10 + end do +end diff --git a/flang/test/Semantics/OpenMP/if-clause.f90 b/flang/test/Semantics/OpenMP/if-clause.f90 index 5e19c78a1ce76..ce11020e50f59 100644 --- a/flang/test/Semantics/OpenMP/if-clause.f90 +++ b/flang/test/Semantics/OpenMP/if-clause.f90 @@ -24,7 +24,7 @@ program main end do !$omp end distribute parallel do - !ERROR: At most one IF clause can appear on the DISTRIBUTE PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp distribute parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -72,7 +72,7 @@ program main end do !$omp end distribute simd - !ERROR: At most one IF clause can appear on the DISTRIBUTE SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp distribute simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -98,7 +98,7 @@ program main end do !$omp end do simd - !ERROR: At most one IF clause can appear on the DO SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp do simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -117,7 +117,7 @@ program main !$omp parallel if(target: .true.) !$omp end parallel - !ERROR: At most one IF clause can appear on the PARALLEL directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel if(.true.) if(parallel: .false.) !$omp end parallel @@ -140,7 +140,7 @@ program main end do !$omp end parallel do - !ERROR: At most one IF clause can appear on the PARALLEL DO directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel do if(.true.) if(parallel: .false.) do i = 1, 10 end do @@ -178,7 +178,7 @@ program main !$omp parallel sections if(target: .true.) !$omp end parallel sections - !ERROR: At most one IF clause can appear on the PARALLEL SECTIONS directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel sections if(.true.) if(parallel: .false.) !$omp end parallel sections @@ -195,7 +195,7 @@ program main !$omp parallel workshare if(target: .true.) !$omp end parallel workshare - !ERROR: At most one IF clause can appear on the PARALLEL WORKSHARE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp parallel workshare if(.true.) if(parallel: .false.) !$omp end parallel workshare @@ -218,7 +218,7 @@ program main end do !$omp end simd - !ERROR: At most one IF clause can appear on the SIMD directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp simd if(.true.) if(simd: .false.) do i = 1, 10 end do @@ -237,7 +237,7 @@ program main !$omp target if(parallel: .true.) !$omp end target - !ERROR: At most one IF clause can appear on the TARGET directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target if(.true.) if(target: .false.) !$omp end target @@ -254,7 +254,7 @@ program main !$omp target data map(tofrom: i) if(target: .true.) !$omp end target data - !ERROR: At most one IF clause can appear on the TARGET DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target data map(tofrom: i) if(.true.) if(target data: .false.) !$omp end target data @@ -268,7 +268,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET ENTER DATA directive !$omp target enter data map(to: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET ENTER DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target enter data map(to: i) if(.true.) if(target enter data: .false.) ! ---------------------------------------------------------------------------- @@ -281,7 +281,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET EXIT DATA directive !$omp target exit data map(from: i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET EXIT DATA directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target exit data map(from: i) if(.true.) if(target exit data: .false.) ! ---------------------------------------------------------------------------- @@ -458,7 +458,7 @@ program main !ERROR: TARGET is not a constituent of the TARGET UPDATE directive !$omp target update to(i) if(target: .true.) - !ERROR: At most one IF clause can appear on the TARGET UPDATE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp target update to(i) if(.true.) if(target update: .false.) ! ---------------------------------------------------------------------------- @@ -474,7 +474,7 @@ program main !$omp task if(target: .true.) !$omp end task - !ERROR: At most one IF clause can appear on the TASK directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp task if(.true.) if(task: .false.) !$omp end task @@ -497,7 +497,7 @@ program main end do !$omp end taskloop - !ERROR: At most one IF clause can appear on the TASKLOOP directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp taskloop if(.true.) if(taskloop: .false.) do i = 1, 10 end do @@ -535,7 +535,7 @@ program main !$omp teams if(target: .true.) !$omp end teams - !ERROR: At most one IF clause can appear on the TEAMS directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp teams if(.true.) if(teams: .false.) !$omp end teams @@ -558,7 +558,7 @@ program main end do !$omp end teams distribute - !ERROR: At most one IF clause can appear on the TEAMS DISTRIBUTE directive + !ERROR: At most one IF clause can apply to each directive constituent !$omp teams distribute if(.true.) if(teams: .true.) do i = 1, 10 end do diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td index 65118c96bc2e8..679a944fc4358 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.td +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td @@ -761,9 +761,11 @@ def OMP_EndDeclareVariant : Directive<[Spelling<"end declare variant", 1, 52>, let languages = OMP_BeginDeclareVariant.languages; } def OMP_Cancel : Directive<[Spelling<"cancel">]> { + let allowedClauses = [ + VersionedClause, + ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, ]; let association = AS_None; let category = CA_Executable; @@ -1074,6 +1076,7 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1081,7 +1084,6 @@ def OMP_Parallel : Directive<[Spelling<"parallel">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1168,6 +1170,7 @@ def OMP_Simd : Directive<[Spelling<"simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1176,7 +1179,6 @@ def OMP_Simd : Directive<[Spelling<"simd">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1215,6 +1217,7 @@ def OMP_Target : Directive<[Spelling<"target">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1227,7 +1230,6 @@ def OMP_Target : Directive<[Spelling<"target">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1239,10 +1241,12 @@ def OMP_Target : Directive<[Spelling<"target">]> { } def OMP_TargetData : Directive<[Spelling<"target data", 1, 52>, Spelling<"target_data", 60>]> { + let allowedClauses = [ + VersionedClause, + ]; let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let requiredClauses = [ @@ -1257,10 +1261,10 @@ def OMP_TargetEnterData : Directive<[Spelling<"target enter data", 1, 52>, Spelling<"target_enter_data", 60>]> { let allowedClauses = [ VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1274,10 +1278,10 @@ def OMP_TargetExitData : Directive<[Spelling<"target exit data", 1, 52>, Spelling<"target_exit_data", 60>]> { let allowedClauses = [ VersionedClause, + VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1292,11 +1296,11 @@ def OMP_TargetUpdate : Directive<[Spelling<"target update", 1, 52>, let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1309,6 +1313,7 @@ def OMP_Task : Directive<[Spelling<"task">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1317,7 +1322,6 @@ def OMP_Task : Directive<[Spelling<"task">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1329,10 +1333,12 @@ def OMP_Task : Directive<[Spelling<"task">]> { let category = CA_Executable; } def OMP_Taskgraph : Directive<[Spelling<"taskgraph">]> { + let allowedClauses = [ + VersionedClause, + ]; let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, ]; let association = AS_Block; @@ -1351,6 +1357,7 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1360,7 +1367,6 @@ def OMP_TaskLoop : Directive<[Spelling<"taskloop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1396,6 +1402,7 @@ def OMP_Teams : Directive<[Spelling<"teams">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1403,7 +1410,6 @@ def OMP_Teams : Directive<[Spelling<"teams">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -1494,6 +1500,7 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1504,7 +1511,6 @@ def OMP_DistributeParallelDo : Directive<[Spelling<"distribute parallel do">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1554,6 +1560,7 @@ def OMP_DistributeParallelFor VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1564,7 +1571,6 @@ def OMP_DistributeParallelFor VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1615,6 +1621,7 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1625,7 +1632,6 @@ def OMP_DistributeSimd : Directive<[Spelling<"distribute simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1642,6 +1648,7 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1649,7 +1656,6 @@ def OMP_DoSimd : Directive<[Spelling<"do simd">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1700,6 +1706,7 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1716,7 +1723,6 @@ def OMP_target_loop : Directive<[Spelling<"target loop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1729,6 +1735,7 @@ def OMP_MaskedTaskloop : Directive<[Spelling<"masked taskloop">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1741,7 +1748,6 @@ def OMP_MaskedTaskloop : Directive<[Spelling<"masked taskloop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1756,6 +1762,7 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1770,7 +1777,6 @@ def OMP_MaskedTaskloopSimd : Directive<[Spelling<"masked taskloop simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1787,6 +1793,7 @@ def OMP_MasterTaskloop : Directive<[Spelling<"master taskloop">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1798,7 +1805,6 @@ def OMP_MasterTaskloop : Directive<[Spelling<"master taskloop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1813,6 +1819,7 @@ def OMP_MasterTaskloopSimd : Directive<[Spelling<"master taskloop simd">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1826,7 +1833,6 @@ def OMP_MasterTaskloopSimd : Directive<[Spelling<"master taskloop simd">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1844,6 +1850,7 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1853,7 +1860,6 @@ def OMP_ParallelDo : Directive<[Spelling<"parallel do">]> { let allowedOnceClauses = [ VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1962,6 +1968,7 @@ def OMP_parallel_loop : Directive<[Spelling<"parallel loop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -1972,7 +1979,6 @@ def OMP_parallel_loop : Directive<[Spelling<"parallel loop">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2010,6 +2016,7 @@ def OMP_ParallelMaskedTaskloop VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2022,7 +2029,6 @@ def OMP_ParallelMaskedTaskloop VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2043,6 +2049,7 @@ def OMP_ParallelMaskedTaskloopSimd VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2057,7 +2064,6 @@ def OMP_ParallelMaskedTaskloopSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2079,6 +2085,7 @@ def OMP_ParallelMaster : Directive<[Spelling<"parallel master">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2086,7 +2093,6 @@ def OMP_ParallelMaster : Directive<[Spelling<"parallel master">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2101,6 +2107,7 @@ def OMP_ParallelMasterTaskloop VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2112,7 +2119,6 @@ def OMP_ParallelMasterTaskloop VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2133,6 +2139,7 @@ def OMP_ParallelMasterTaskloopSimd VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2146,7 +2153,6 @@ def OMP_ParallelMasterTaskloopSimd VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2168,6 +2174,7 @@ def OMP_ParallelSections : Directive<[Spelling<"parallel sections">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2176,7 +2183,6 @@ def OMP_ParallelSections : Directive<[Spelling<"parallel sections">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2190,13 +2196,13 @@ def OMP_ParallelWorkshare : Directive<[Spelling<"parallel workshare">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2833,6 +2839,7 @@ def OMP_TeamsDistribute : Directive<[Spelling<"teams distribute">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -2843,7 +2850,6 @@ def OMP_TeamsDistribute : Directive<[Spelling<"teams distribute">]> { VersionedClause, VersionedClause, VersionedClause, - VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -3016,6 +3022,7 @@ def OMP_TeamsWorkdistribute : Directive<[Spelling<"teams workdistribute">]> { let allowedClauses = [ VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, @@ -3023,7 +3030,6 @@ def OMP_TeamsWorkdistribute : Directive<[Spelling<"teams workdistribute">]> { ]; let allowedOnceClauses = [ VersionedClause, - VersionedClause, VersionedClause, VersionedClause, ]; @@ -3036,6 +3042,7 @@ def OMP_teams_loop : Directive<[Spelling<"teams loop">]> { VersionedClause, VersionedClause, VersionedClause, + VersionedClause, VersionedClause, VersionedClause, VersionedClause, From a102df5d0208b6ad95647ac22e3ccd06f40626bc Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Wed, 24 Jun 2026 12:42:58 +0200 Subject: [PATCH 311/511] [SCEV] Infer addrec nowrap flags during range analysis (#202964) When we're computing the range of the addrec, we already have to reason about whether it wraps, so we may as well determine the nowrap flags at the same time. This is more precise than the previous logic that took the addrec range and checked whether adding a step to it does not wrap. For example, an `{0,+,1}` addrec with a full range can still be non-wrapping. Note that I removed some assertions in the SCEV printed that predicated exit counts actually have predicates. Due to SCEV's query order dependence, this can happen, also prior to this change, see for example https://llvm.godbolt.org/z/cWK1MMEqv. While this indicates suboptimal results, it's not a bug, and we should not assert. Fixes https://github.com/llvm/llvm-project/issues/200788. --- llvm/include/llvm/Analysis/ScalarEvolution.h | 13 +- llvm/lib/Analysis/ScalarEvolution.cpp | 130 +++++---- .../Delinearization/global_array_bounds.ll | 2 +- .../Analysis/DependenceAnalysis/Banerjee.ll | 22 +- .../Analysis/DependenceAnalysis/BasePtrBug.ll | 2 +- .../DependenceAnalysis/Constraints.ll | 36 +-- .../DependenceAnalysis/DifferentOffsets.ll | 2 +- .../NonCanonicalizedSubscript.ll | 2 +- .../DependenceAnalysis/Propagating.ll | 20 +- .../clamped-access-pattern.ll | 6 +- .../inbounds-gep-in-predicated-blocks.ll | 10 +- .../nssw-predicate-implied.ll | 52 ++-- .../LoopAccessAnalysis/symbolic-stride.ll | 12 +- ...drec-computed-during-addrec-calculation.ll | 2 +- .../backedge-taken-count-guard-info.ll | 2 +- .../ScalarEvolution/becount-invalidation.ll | 4 +- .../ScalarEvolution/different-loops-recs.ll | 32 +-- .../ScalarEvolution/exit-count-non-strict.ll | 30 +- .../ScalarEvolution/exit-count-select-safe.ll | 108 ++++---- .../ScalarEvolution/incorrect-exit-count.ll | 14 +- .../increasing-or-decreasing-iv.ll | 6 +- .../Analysis/ScalarEvolution/limit-depth.ll | 2 +- ...ge-taken-count-guard-info-operand-order.ll | 2 +- ...en-count-guard-info-rewrite-expressions.ll | 4 +- .../max-backedge-taken-count-guard-info.ll | 2 +- .../ScalarEvolution/mul-udiv-folds.ll | 4 +- .../test/Analysis/ScalarEvolution/pr123550.ll | 8 +- llvm/test/Analysis/ScalarEvolution/pr22641.ll | 2 +- .../test/Analysis/ScalarEvolution/ptrtoint.ll | 4 +- .../Analysis/ScalarEvolution/sext-iv-2.ll | 4 +- .../test/Analysis/ScalarEvolution/sext-mul.ll | 8 +- .../trip-count-negative-stride.ll | 12 +- .../ScalarEvolution/umin-umax-folds.ll | 12 +- llvm/test/CodeGen/PowerPC/mma-intrinsics.ll | 212 +++++++-------- .../IndVarSimplify/eliminate-exit-no-dl.ll | 3 +- .../fixed-size-no-signed-wrap.ll | 32 ++- .../RISCV/masked_gather_scatter.ll | 39 +-- .../LoopVectorize/X86/cast-costs.ll | 29 +- .../LoopVectorize/iv-select-cmp-no-wrap.ll | 22 +- .../LoopVectorize/iv-select-cmp-trunc.ll | 256 +++++++++--------- ...conditional_surrounding_non_affine_loop.ll | 2 +- 41 files changed, 555 insertions(+), 611 deletions(-) diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h index 37da037ffcae8..1e09dbc3db5f1 100644 --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1983,10 +1983,11 @@ class ScalarEvolution { /// operands iteratively first. const ConstantRange &getRangeRefIter(const SCEV *S, RangeSignHint Hint); - /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}. - /// Helper for \c getRange. - ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Step, - const APInt &MaxBECount); + /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Step}, + /// and whether it may wrap. Helper for \c getRange. + std::pair + getRangeForAffineAR(const SCEV *Start, const SCEV *Step, + const APInt &MaxBECount); /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p /// Start,+,\p Step}. @@ -2386,8 +2387,8 @@ class ScalarEvolution { bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step, const Loop *L); - /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation. - SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); + /// Try to infer NSW or NUW on \p AR relying on ConstantRange manipulation. + void inferNoWrapViaConstantRanges(const SCEVAddRecExpr *AR); /// Try to prove NSW on \p AR by proving facts about conditions known on /// entry and backedge. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 6c419a9895ef5..2129c8667cc6c 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -5244,14 +5244,17 @@ class SCEVShiftRewriter : public SCEVRewriteVisitor { } // end anonymous namespace -SCEV::NoWrapFlags -ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { +void ScalarEvolution::inferNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { if (!AR->isAffine()) - return SCEV::FlagAnyWrap; + return; - using OBO = OverflowingBinaryOperator; + // Force computation of ranges, which will also perform range-based flag + // inference. + if (!AR->hasNoSignedWrap()) + (void)getSignedRange(AR); - SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; + if (!AR->hasNoUnsignedWrap()) + (void)getUnsignedRange(AR); if (!AR->hasNoSelfWrap()) { const SCEV *BECount = getConstantMaxBackedgeTakenCount(AR->getLoop()); @@ -5261,31 +5264,9 @@ ScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { unsigned NoOverflowBitWidth = BECountAP.getActiveBits() + StepCR.getMinSignedBits(); if (NoOverflowBitWidth <= getTypeSizeInBits(AR->getType())) - Result = ScalarEvolution::setFlags(Result, SCEV::FlagNW); + const_cast(AR)->setNoWrapFlags(SCEV::FlagNW); } } - - if (!AR->hasNoSignedWrap()) { - ConstantRange AddRecRange = getSignedRange(AR); - ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); - - auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Add, IncRange, OBO::NoSignedWrap); - if (NSWRegion.contains(AddRecRange)) - Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); - } - - if (!AR->hasNoUnsignedWrap()) { - ConstantRange AddRecRange = getUnsignedRange(AR); - ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); - - auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( - Instruction::Add, IncRange, OBO::NoUnsignedWrap); - if (NUWRegion.contains(AddRecRange)) - Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); - } - - return Result; } SCEV::NoWrapFlags @@ -5947,10 +5928,8 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); insertValueToMap(PN, PHISCEV); - if (auto *AR = dyn_cast(PHISCEV)) { - setNoWrapFlags(const_cast(AR), - (AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR))); - } + if (auto *AR = dyn_cast(PHISCEV)) + inferNoWrapViaConstantRanges(AR); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to @@ -6077,11 +6056,8 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { forgetMemoizedResults({SymbolicName}); insertValueToMap(PN, PHISCEV); - if (auto *AR = dyn_cast(PHISCEV)) { - setNoWrapFlags( - const_cast(AR), - (AR->getNoWrapFlags() | proveNoWrapViaConstantRanges(AR))); - } + if (auto *AR = dyn_cast(PHISCEV)) + inferNoWrapViaConstantRanges(AR); // We can add Flags to the post-inc expression only if we // know that it is *undefined behavior* for BEValueV to @@ -7042,10 +7018,11 @@ const ConstantRange &ScalarEvolution::getRangeRef( MaxBECount = MaxBECount.zext(BitWidth); if (MaxBECount.getBitWidth() == BitWidth) { - auto RangeFromAffine = getRangeForAffineAR( + auto [RangeFromAffine, Flags] = getRangeForAffineAR( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount); ConservativeResult = ConservativeResult.intersectWith(RangeFromAffine, RangeType); + const_cast(AddRec)->setNoWrapFlags(Flags); auto RangeFromFactoring = getRangeViaFactoring( AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount); @@ -7223,24 +7200,24 @@ const ConstantRange &ScalarEvolution::getRangeRef( // Given a StartRange, Step and MaxBECount for an expression compute a range of // values that the expression can take. Initially, the expression has a value // from StartRange and then is changed by Step up to MaxBECount times. Signed -// argument defines if we treat Step as signed or unsigned. -static ConstantRange getRangeForAffineARHelper(APInt Step, - const ConstantRange &StartRange, - const APInt &MaxBECount, - bool Signed) { +// argument defines if we treat Step as signed or unsigned. The second return +// value indicates that no wrapping occurred. +static std::pair +getRangeForAffineARHelper(APInt Step, const ConstantRange &StartRange, + const APInt &MaxBECount, bool Signed) { unsigned BitWidth = Step.getBitWidth(); assert(BitWidth == StartRange.getBitWidth() && BitWidth == MaxBECount.getBitWidth() && "mismatched bit widths"); // If either Step or MaxBECount is 0, then the expression won't change, and we // just need to return the initial range. if (Step == 0 || MaxBECount == 0) - return StartRange; + return {StartRange, true}; // If we don't know anything about the initial value (i.e. StartRange is // FullRange), then we don't know anything about the final range either. // Return FullRange. if (StartRange.isFullSet()) - return ConstantRange::getFull(BitWidth); + return {ConstantRange::getFull(BitWidth), false}; // If Step is signed and negative, then we use its absolute value, but we also // note that we're moving in the opposite direction. @@ -7256,7 +7233,7 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // Check if Offset is more than full span of BitWidth. If it is, the // expression is guaranteed to overflow. if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) - return ConstantRange::getFull(BitWidth); + return {ConstantRange::getFull(BitWidth), false}; // Offset is by how much the expression can change. Checks above guarantee no // overflow here. @@ -7268,14 +7245,28 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, // if the expression is decreasing and will be increased by Offset otherwise. APInt StartLower = StartRange.getLower(); APInt StartUpper = StartRange.getUpper() - 1; - APInt MovedBoundary = Descending ? (StartLower - std::move(Offset)) - : (StartUpper + std::move(Offset)); + bool Overflow; + APInt MovedBoundary; + if (Signed) { + // This does not use sadd_ov, as we want to check overflow for a signed + // start with an unsigned offset. + if (Descending) { + MovedBoundary = StartLower - std::move(Offset); + Overflow = MovedBoundary.sgt(StartLower) || StartRange.isSignWrappedSet(); + } else { + MovedBoundary = StartUpper + std::move(Offset); + Overflow = MovedBoundary.slt(StartUpper) || StartRange.isSignWrappedSet(); + } + } else { + MovedBoundary = StartUpper.uadd_ov(std::move(Offset), Overflow); + Overflow |= StartRange.isWrappedSet(); + } // It's possible that the new minimum/maximum value will fall into the initial // range (due to wrap around). This means that the expression can take any // value in this bitwidth, and we have to return full range. if (StartRange.contains(MovedBoundary)) - return ConstantRange::getFull(BitWidth); + return {ConstantRange::getFull(BitWidth), false}; APInt NewLower = Descending ? std::move(MovedBoundary) : std::move(StartLower); @@ -7284,12 +7275,13 @@ static ConstantRange getRangeForAffineARHelper(APInt Step, NewUpper += 1; // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. - return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)); + return {ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)), + !Overflow}; } -ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, - const SCEV *Step, - const APInt &MaxBECount) { +std::pair +ScalarEvolution::getRangeForAffineAR(const SCEV *Start, const SCEV *Step, + const APInt &MaxBECount) { assert(getTypeSizeInBits(Start->getType()) == getTypeSizeInBits(Step->getType()) && getTypeSizeInBits(Start->getType()) == MaxBECount.getBitWidth() && @@ -7301,19 +7293,26 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, // If Step can be both positive and negative, we need to find ranges for the // maximum absolute step values in both directions and union them. - ConstantRange SR = getRangeForAffineARHelper( - StepSRange.getSignedMin(), StartSRange, MaxBECount, /* Signed = */ true); - SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), - StartSRange, MaxBECount, - /* Signed = */ true)); + auto [SR1, NSW1] = getRangeForAffineARHelper( + StepSRange.getSignedMin(), StartSRange, MaxBECount, /*Signed=*/true); + auto [SR2, NSW2] = getRangeForAffineARHelper(StepSRange.getSignedMax(), + StartSRange, MaxBECount, + /*Signed=*/true); + ConstantRange SR = SR1.unionWith(SR2); // Next, consider step unsigned. - ConstantRange UR = getRangeForAffineARHelper( + auto [UR, NUW] = getRangeForAffineARHelper( getUnsignedRangeMax(Step), getUnsignedRange(Start), MaxBECount, - /* Signed = */ false); + /*Signed=*/false); + + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + if (NUW) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); + if (NSW1 && NSW2) + Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); // Finally, intersect signed and unsigned ranges. - return SR.intersectWith(UR, ConstantRange::Smallest); + return {SR.intersectWith(UR, ConstantRange::Smallest), Flags}; } ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( @@ -7491,9 +7490,9 @@ ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); ConstantRange TrueRange = - this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount); + this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount).first; ConstantRange FalseRange = - this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount); + this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount).first; return TrueRange.unionWith(FalseRange); } @@ -14196,7 +14195,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, SmallVector Preds; auto *PBT = SE->getPredicatedBackedgeTakenCount(L, Preds); if (PBT != BTC) { - assert(!Preds.empty() && "Different predicated BTC, but no predicates"); OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; @@ -14215,8 +14213,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, auto *PredConstantMax = SE->getPredicatedConstantMaxBackedgeTakenCount(L, Preds); if (PredConstantMax != ConstantBTC) { - assert(!Preds.empty() && - "different predicated constant max BTC but no predicates"); OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; @@ -14235,8 +14231,6 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, auto *PredSymbolicMax = SE->getPredicatedSymbolicMaxBackedgeTakenCount(L, Preds); if (SymbolicBTC != PredSymbolicMax) { - assert(!Preds.empty() && - "Different predicated symbolic max BTC, but no predicates"); OS << "Loop "; L->getHeader()->printAsOperand(OS, /*PrintType=*/false); OS << ": "; diff --git a/llvm/test/Analysis/Delinearization/global_array_bounds.ll b/llvm/test/Analysis/Delinearization/global_array_bounds.ll index 2e13fd66eb930..218038f9a9ac4 100644 --- a/llvm/test/Analysis/Delinearization/global_array_bounds.ll +++ b/llvm/test/Analysis/Delinearization/global_array_bounds.ll @@ -19,7 +19,7 @@ define void @test_2d_array(i64 %i, i64 %j, i64 %N, i64 %M) { ; CHECK-NEXT: AccessFunction: {{\{\{}}0,+,80}<%for.i>,+,4}<%for.j> ; CHECK-NEXT: Base offset: @test_array_10x20 ; CHECK-NEXT: ArrayDecl[UnknownSize][20] with elements of 4 bytes. -; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] +; CHECK-NEXT: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>] ; CHECK-NEXT: Delinearization validation: Failed ; entry: diff --git a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll index 6572a8bc0cadb..bb89ad55554ff 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Banerjee.ll @@ -52,11 +52,11 @@ define void @banerjee0(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: %0 = load i64, ptr %arrayidx6, align 8 -; DELIN-NEXT: da analyze - input [0 *]! +; DELIN-NEXT: da analyze - none! ; DELIN-NEXT: Src: %0 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: store i64 %0, ptr %B.addr.11, align 8 --> Dst: store i64 %0, ptr %B.addr.11, align 8 -; DELIN-NEXT: da analyze - output [0 *]! +; DELIN-NEXT: da analyze - none! ; entry: br label %for.cond1.preheader @@ -802,11 +802,11 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [<= 0|<]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; CHECK-NEXT: da analyze - input [* *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 @@ -816,11 +816,11 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; NORMALIZE-NEXT: da analyze - output [* *]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; NORMALIZE-NEXT: da analyze - flow [* *|<]! +; NORMALIZE-NEXT: da analyze - flow [<= 0|<]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; NORMALIZE-NEXT: da analyze - input [* *]! +; NORMALIZE-NEXT: da analyze - none! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 @@ -830,7 +830,7 @@ define void @banerjee9(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; DELIN-NEXT: da analyze - output [* *]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 -; DELIN-NEXT: da analyze - flow [* *|<]! +; DELIN-NEXT: da analyze - flow [<= 0|<]! ; DELIN-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; DELIN-NEXT: da analyze - confused! ; DELIN-NEXT: Src: %1 = load i64, ptr %arrayidx7, align 8 --> Dst: %1 = load i64, ptr %arrayidx7, align 8 @@ -888,11 +888,11 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [<> 0]! ; CHECK-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; CHECK-NEXT: da analyze - input [* *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 @@ -902,11 +902,11 @@ define void @banerjee10(ptr %A, ptr %B, i64 %m, i64 %n) nounwind uwtable ssp { ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 0, ptr %arrayidx, align 8 ; NORMALIZE-NEXT: da analyze - none! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; NORMALIZE-NEXT: da analyze - flow [* *|<]! +; NORMALIZE-NEXT: da analyze - flow [<> 0]! ; NORMALIZE-NEXT: Src: store i64 0, ptr %arrayidx, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: %1 = load i64, ptr %arrayidx6, align 8 -; NORMALIZE-NEXT: da analyze - input [* *]! +; NORMALIZE-NEXT: da analyze - none! ; NORMALIZE-NEXT: Src: %1 = load i64, ptr %arrayidx6, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 ; NORMALIZE-NEXT: da analyze - confused! ; NORMALIZE-NEXT: Src: store i64 %1, ptr %B.addr.11, align 8 --> Dst: store i64 %1, ptr %B.addr.11, align 8 diff --git a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll index b4eba4d35b2f4..dd35508b93ddd 100644 --- a/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll +++ b/llvm/test/Analysis/DependenceAnalysis/BasePtrBug.ll @@ -22,7 +22,7 @@ define void @test1(ptr nocapture %A, ptr nocapture %B, i32 %N) #0 { ; CHECK-NEXT: Src: %0 = load i32, ptr %gep.0, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: %1 = load i32, ptr %gep.1, align 4 -; CHECK-NEXT: da analyze - input [*]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %1 = load i32, ptr %gep.1, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %add, ptr %gep.B, align 4 --> Dst: store i32 %add, ptr %gep.B, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll index e4063186ac73e..f8f8c5d7f5501 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Constraints.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Constraints.ll @@ -37,15 +37,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %out_l.promoted = load i32, ptr @out_l, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - anti [|<]! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: store i32 0, ptr %13, align 4 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - output [S * *]! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: %18 = load i32, ptr %17, align 4 -; CHECK-NEXT: da analyze - flow [* * *|<]! +; CHECK-NEXT: da analyze - flow [S * *|<]! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: %20 = load i32, ptr %19, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: %23 = load i32, ptr %22, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: store i32 %24, ptr %25, align 4 -; CHECK-NEXT: da analyze - output [* * *|<]! +; CHECK-NEXT: da analyze - output [S * *|<]! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: %27 = load i32, ptr %26, align 4 ; CHECK-NEXT: da analyze - flow [* * *|<]! ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: %29 = load i32, ptr %28, align 4 @@ -57,15 +57,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: store i32 0, ptr %13, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: %18 = load i32, ptr %17, align 4 -; CHECK-NEXT: da analyze - input [* * * *]! +; CHECK-NEXT: da analyze - input [S * * *]! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: %20 = load i32, ptr %19, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: %23 = load i32, ptr %22, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %24, ptr %25, align 4 -; CHECK-NEXT: da analyze - anti [* * * *|<]! +; CHECK-NEXT: da analyze - anti [S * * *|<]! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: %27 = load i32, ptr %26, align 4 -; CHECK-NEXT: da analyze - input [* * *|<]! +; CHECK-NEXT: da analyze - input [S * *|<]! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: %29 = load i32, ptr %28, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %30, ptr %31, align 4 @@ -75,9 +75,9 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %18 = load i32, ptr %17, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: %20 = load i32, ptr %19, align 4 -; CHECK-NEXT: da analyze - input [* * * *]! +; CHECK-NEXT: da analyze - input [S * S *]! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: %23 = load i32, ptr %22, align 4 -; CHECK-NEXT: da analyze - input [* * * *|<]! +; CHECK-NEXT: da analyze - input [S * * *|<]! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %24, ptr %25, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: %27 = load i32, ptr %26, align 4 @@ -87,11 +87,11 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %30, ptr %31, align 4 ; CHECK-NEXT: da analyze - anti [* * *|<]! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: %32 = load i32, ptr %6, align 4 -; CHECK-NEXT: da analyze - input [*|<]! +; CHECK-NEXT: da analyze - input [S|<]! ; CHECK-NEXT: Src: %20 = load i32, ptr %19, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: %23 = load i32, ptr %22, align 4 -; CHECK-NEXT: da analyze - input [* * * *]! +; CHECK-NEXT: da analyze - input [S * * *]! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %24, ptr %25, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: %27 = load i32, ptr %26, align 4 @@ -99,15 +99,15 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: %29 = load i32, ptr %28, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %30, ptr %31, align 4 -; CHECK-NEXT: da analyze - anti [* * *|<]! +; CHECK-NEXT: da analyze - anti [S * *|<]! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: %32 = load i32, ptr %6, align 4 -; CHECK-NEXT: da analyze - input [*|<]! +; CHECK-NEXT: da analyze - input [S|<]! ; CHECK-NEXT: Src: %23 = load i32, ptr %22, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %24, ptr %25, align 4 -; CHECK-NEXT: da analyze - output [* * * *]! +; CHECK-NEXT: da analyze - output [S * * *]! ; CHECK-NEXT: Src: store i32 %24, ptr %25, align 4 --> Dst: %27 = load i32, ptr %26, align 4 -; CHECK-NEXT: da analyze - flow [* * *|<]! +; CHECK-NEXT: da analyze - flow [S * *|<]! ; CHECK-NEXT: Src: store i32 %24, ptr %25, align 4 --> Dst: %29 = load i32, ptr %28, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %30, ptr %31, align 4 @@ -117,7 +117,7 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: store i32 %24, ptr %25, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %27 = load i32, ptr %26, align 4 --> Dst: %27 = load i32, ptr %26, align 4 -; CHECK-NEXT: da analyze - input [* * *]! +; CHECK-NEXT: da analyze - input [S * *]! ; CHECK-NEXT: Src: %27 = load i32, ptr %26, align 4 --> Dst: %29 = load i32, ptr %28, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %27 = load i32, ptr %26, align 4 --> Dst: store i32 %30, ptr %31, align 4 @@ -127,7 +127,7 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %27 = load i32, ptr %26, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %29 = load i32, ptr %28, align 4 --> Dst: %29 = load i32, ptr %28, align 4 -; CHECK-NEXT: da analyze - input [* * *]! +; CHECK-NEXT: da analyze - input [S * *]! ; CHECK-NEXT: Src: %29 = load i32, ptr %28, align 4 --> Dst: store i32 %30, ptr %31, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %29 = load i32, ptr %28, align 4 --> Dst: %32 = load i32, ptr %6, align 4 @@ -135,9 +135,9 @@ define void @dep_constraint_crash_test(i32 %M, i32 %N) { ; CHECK-NEXT: Src: %29 = load i32, ptr %28, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %30, ptr %31, align 4 --> Dst: store i32 %30, ptr %31, align 4 -; CHECK-NEXT: da analyze - output [* * *]! +; CHECK-NEXT: da analyze - output [S * *]! ; CHECK-NEXT: Src: store i32 %30, ptr %31, align 4 --> Dst: %32 = load i32, ptr %6, align 4 -; CHECK-NEXT: da analyze - flow [*|<]! +; CHECK-NEXT: da analyze - flow [S|<]! ; CHECK-NEXT: Src: store i32 %30, ptr %31, align 4 --> Dst: store i32 %33, ptr @out_l, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %32 = load i32, ptr %6, align 4 --> Dst: %32 = load i32, ptr %6, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll index 3df7e35b4f16c..077d5bde44eac 100644 --- a/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll +++ b/llvm/test/Analysis/DependenceAnalysis/DifferentOffsets.ll @@ -148,7 +148,7 @@ end: define void @multidim_accesses(ptr %A) { ; CHECK-LABEL: 'multidim_accesses' ; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx0, align 4 -; CHECK-NEXT: da analyze - output [0 0 *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 1, ptr %idx0, align 4 --> Dst: store i32 1, ptr %idx1, align 4 ; CHECK-NEXT: da analyze - output [* * *|<]! ; CHECK-NEXT: Src: store i32 1, ptr %idx1, align 4 --> Dst: store i32 1, ptr %idx1, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll index 491a309193258..a3c69270cf4f6 100644 --- a/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll +++ b/llvm/test/Analysis/DependenceAnalysis/NonCanonicalizedSubscript.ll @@ -61,7 +61,7 @@ define void @coupled_miv_type_mismatch(i32 %n) { ; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx5, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4 ; CHECK-NEXT: da analyze - anti [* *|<]! ; CHECK-NEXT: Src: store i32 %add6, ptr %arrayidx10, align 4 --> Dst: store i32 %add6, ptr %arrayidx10, align 4 -; CHECK-NEXT: da analyze - output [0 *]! +; CHECK-NEXT: da analyze - none! ; entry: br label %for.cond diff --git a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll index 1d8475f43ec6c..75d13aa82faac 100644 --- a/llvm/test/Analysis/DependenceAnalysis/Propagating.ll +++ b/llvm/test/Analysis/DependenceAnalysis/Propagating.ll @@ -16,7 +16,7 @@ define void @prop0(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* <>]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4 @@ -200,7 +200,7 @@ define void @prop3(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %conv, ptr %arrayidx4, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [<> *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx8, align 4 --> Dst: %0 = load i32, ptr %arrayidx8, align 4 @@ -258,7 +258,7 @@ define void @prop4(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %conv, ptr %arrayidx6, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx10, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* <>]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx10, align 4 --> Dst: %0 = load i32, ptr %arrayidx10, align 4 @@ -377,7 +377,7 @@ define void @prop6(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %conv, ptr %arrayidx6, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: %0 = load i32, ptr %arrayidx11, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* <>]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx6, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx11, align 4 --> Dst: %0 = load i32, ptr %arrayidx11, align 4 @@ -435,13 +435,13 @@ for.end14: ; preds = %for.inc12 define void @prop7(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-LABEL: 'prop7' ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %conv, ptr %arrayidx7, align 4 -; CHECK-NEXT: da analyze - output [0 *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* -38]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: %0 = load i32, ptr %arrayidx13, align 4 -; CHECK-NEXT: da analyze - input [0 *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx13, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 @@ -499,11 +499,11 @@ define void @prop8(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %conv, ptr %arrayidx4, align 4 ; CHECK-NEXT: da analyze - output [S 0]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* <>]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx4, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4 -; CHECK-NEXT: da analyze - input [0 *]! +; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: store i32 %0, ptr %B.addr.11, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 @@ -557,7 +557,7 @@ define void @prop9(ptr %A, ptr %B, i32 %n) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %conv, ptr %arrayidx7, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4 -; CHECK-NEXT: da analyze - flow [* *|<]! +; CHECK-NEXT: da analyze - flow [* <>]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx7, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx9, align 4 --> Dst: %0 = load i32, ptr %arrayidx9, align 4 diff --git a/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll b/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll index 55ee04388515b..3ce2fc37c3b48 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/clamped-access-pattern.ll @@ -308,7 +308,7 @@ define void @clamped_small_bound(ptr %a) { ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i32, ptr %a, i64 %idx: ; CHECK-NEXT: ((4 * (zext i1 {false,+,true}<%loop> to i64)) + %a) -; CHECK-NEXT: --> {%a,+,-4}<%loop> +; CHECK-NEXT: --> {%a,+,-4}<%loop> ; entry: br label %loop @@ -1040,7 +1040,7 @@ define void @clamped_mul_huge_scale_as1(ptr addrspace(1) %a) { ; CHECK-LABEL: 'clamped_mul_huge_scale_as1' ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop -; CHECK-NEXT: Unsafe indirect dependence. +; CHECK-NEXT: Unsafe indirect dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: IndirectUnsafe: ; CHECK-NEXT: %ld = load i64, ptr addrspace(1) %gep, align 8 -> @@ -1054,7 +1054,7 @@ define void @clamped_mul_huge_scale_as1(ptr addrspace(1) %a) { ; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: -; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off: +; CHECK-NEXT: [PSE] %gep = getelementptr inbounds i8, ptr addrspace(1) %a, i128 %off: ; CHECK-NEXT: ((36893488147419103232 * (zext i2 {0,+,1}<%loop> to i128)) + %a) ; CHECK-NEXT: --> {%a,+,36893488147419103232}<%loop> ; diff --git a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll index fb4e91f38afbb..aeaf59c7aea44 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/inbounds-gep-in-predicated-blocks.ll @@ -168,8 +168,8 @@ exit: define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) { ; CHECK-LABEL: 'test_nusw_gep_with_load_user_outside_loop' ; CHECK-NEXT: loop.header: -; CHECK-NEXT: Report: unsafe dependent memory operations in loop. -; CHECK-NEXT: Unknown data dependence. +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma clang loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: store i32 0, ptr %A, align 4 -> @@ -179,7 +179,7 @@ define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) { ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: (-392 + %A) High: (8 + %A)) -; CHECK-NEXT: Member: {(4 + %A),+,-4}<%loop.header> +; CHECK-NEXT: Member: {(4 + %A),+,-4}<%loop.header> ; CHECK-NEXT: Member: %A ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. @@ -187,9 +187,9 @@ define i32 @test_nusw_gep_with_load_user_outside_loop(ptr %A) { ; CHECK-NEXT: {true,+,true}<%loop.header> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: -; CHECK-NEXT: [PSE] %gep = getelementptr nusw i32, ptr %A, i64 %and: +; CHECK-NEXT: [PSE] %gep = getelementptr nusw i32, ptr %A, i64 %and: ; CHECK-NEXT: ((4 * (zext i1 {true,+,true}<%loop.header> to i64)) + %A) -; CHECK-NEXT: --> {(4 + %A),+,-4}<%loop.header> +; CHECK-NEXT: --> {(4 + %A),+,-4}<%loop.header> ; entry: br label %loop.header diff --git a/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll b/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll index 168e9a867d217..2cd487f71f068 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/nssw-predicate-implied.ll @@ -18,10 +18,10 @@ define void @wrap_check_iv.3_implies_iv.2(i32 noundef %N, ptr %dst, ptr %src) { ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %dst High: (4 + (12 * (zext i32 (-1 + %N) to i64)) + %dst)) -; CHECK-NEXT: Member: {%dst,+,12}<%loop> +; CHECK-NEXT: Member: {%dst,+,12}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,8}<%loop> +; CHECK-NEXT: Member: {%src,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -30,10 +30,10 @@ define void @wrap_check_iv.3_implies_iv.2(i32 noundef %N, ptr %dst, ptr %src) { ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {0,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,8}<%loop> +; CHECK-NEXT: --> {%src,+,8}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3: ; CHECK-NEXT: ((4 * (sext i32 {0,+,3}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {%dst,+,12}<%loop> +; CHECK-NEXT: --> {%dst,+,12}<%loop> ; entry: br label %loop @@ -73,10 +73,10 @@ define void @wrap_check_iv.3_implies_iv.2_different_start(i32 noundef %N, ptr %d ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: (12 + %dst) High: (16 + (8 * (zext i32 (-1 + %N) to i64)) + %dst)) -; CHECK-NEXT: Member: {(12 + %dst),+,8}<%loop> +; CHECK-NEXT: Member: {(12 + %dst),+,8}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,8}<%loop> +; CHECK-NEXT: Member: {%src,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -85,10 +85,10 @@ define void @wrap_check_iv.3_implies_iv.2_different_start(i32 noundef %N, ptr %d ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {0,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,8}<%loop> +; CHECK-NEXT: --> {%src,+,8}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3: ; CHECK-NEXT: (4 + (4 * (sext i32 {2,+,2}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {(12 + %dst),+,8}<%loop> +; CHECK-NEXT: --> {(12 + %dst),+,8}<%loop> ; entry: br label %loop @@ -128,10 +128,10 @@ define void @wrap_check_iv.3_implies_iv.2_predicates_added_in_different_order(i3 ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %dst High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %dst)) -; CHECK-NEXT: Member: {%dst,+,8}<%loop> +; CHECK-NEXT: Member: {%dst,+,8}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (12 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,12}<%loop> +; CHECK-NEXT: Member: {%src,+,12}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -140,10 +140,10 @@ define void @wrap_check_iv.3_implies_iv.2_predicates_added_in_different_order(i3 ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.3: ; CHECK-NEXT: ((4 * (sext i32 {0,+,3}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,12}<%loop> +; CHECK-NEXT: --> {%src,+,12}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {0,+,2}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {%dst,+,8}<%loop> +; CHECK-NEXT: --> {%dst,+,8}<%loop> ; entry: br label %loop @@ -182,10 +182,10 @@ define void @wrap_check_iv.3_does_not_implies_iv.2_due_to_start(i32 noundef %N, ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: %dst High: (4 + (12 * (zext i32 (-1 + %N) to i64)) + %dst)) -; CHECK-NEXT: Member: {%dst,+,12}<%loop> +; CHECK-NEXT: Member: {%dst,+,12}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: (40 + %src) High: (44 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {(40 + %src),+,8}<%loop> +; CHECK-NEXT: Member: {(40 + %src),+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -195,10 +195,10 @@ define void @wrap_check_iv.3_does_not_implies_iv.2_due_to_start(i32 noundef %N, ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {10,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {(40 + %src),+,8}<%loop> +; CHECK-NEXT: --> {(40 + %src),+,8}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3: ; CHECK-NEXT: ((4 * (sext i32 {0,+,3}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {%dst,+,12}<%loop> +; CHECK-NEXT: --> {%dst,+,12}<%loop> ; entry: br label %loop @@ -237,10 +237,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_start_negative(i32 nound ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: (-4 + %dst) High: ((12 * (zext i32 (-1 + %N) to i64)) + %dst)) -; CHECK-NEXT: Member: {(-4 + %dst),+,12}<%loop> +; CHECK-NEXT: Member: {(-4 + %dst),+,12}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,8}<%loop> +; CHECK-NEXT: Member: {%src,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -250,10 +250,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_start_negative(i32 nound ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {0,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,8}<%loop> +; CHECK-NEXT: --> {%src,+,8}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3: ; CHECK-NEXT: ((4 * (sext i32 {-1,+,3}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {(-4 + %dst),+,12}<%loop> +; CHECK-NEXT: --> {(-4 + %dst),+,12}<%loop> ; entry: br label %loop @@ -292,10 +292,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_negative_step(i32 nounde ; CHECK-NEXT: Grouped accesses: ; CHECK-NEXT: Group GRP0: ; CHECK-NEXT: (Low: ((-4 * (zext i32 (-1 + %N) to i64)) + %dst) High: (4 + %dst)) -; CHECK-NEXT: Member: {%dst,+,-4}<%loop> +; CHECK-NEXT: Member: {%dst,+,-4}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,8}<%loop> +; CHECK-NEXT: Member: {%src,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -305,10 +305,10 @@ define void @wrap_check_iv.3_does_not_imply_iv.2_due_to_negative_step(i32 nounde ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep.iv.2 = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i32 {0,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,8}<%loop> +; CHECK-NEXT: --> {%src,+,8}<%loop> ; CHECK-NEXT: [PSE] %gep.iv.3 = getelementptr inbounds i32, ptr %dst, i64 %ext.iv.3: ; CHECK-NEXT: ((4 * (sext i32 {0,+,-1}<%loop> to i64)) + %dst) -; CHECK-NEXT: --> {%dst,+,-4}<%loop> +; CHECK-NEXT: --> {%dst,+,-4}<%loop> ; entry: br label %loop @@ -407,7 +407,7 @@ define void @narrower_i8_nssw_implies_wider_i32_nssw(ptr %dst, ptr %src, i32 %N) ; CHECK-NEXT: Member: {%dst,+,4}<%loop> ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: %src High: (4 + (8 * (zext i32 (-1 + %N) to i64)) + %src)) -; CHECK-NEXT: Member: {%src,+,8}<%loop> +; CHECK-NEXT: Member: {%src,+,8}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -419,7 +419,7 @@ define void @narrower_i8_nssw_implies_wider_i32_nssw(ptr %dst, ptr %src, i32 %N) ; CHECK-NEXT: --> {%dst,+,4}<%loop> ; CHECK-NEXT: [PSE] %gep.src = getelementptr inbounds i32, ptr %src, i64 %ext.iv.2: ; CHECK-NEXT: ((4 * (sext i8 {0,+,2}<%loop> to i64)) + %src) -; CHECK-NEXT: --> {%src,+,8}<%loop> +; CHECK-NEXT: --> {%src,+,8}<%loop> ; entry: br label %loop diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll index 1c48b0ed0f967..88cf23e412183 100644 --- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -329,8 +329,8 @@ define double @single_iteration_unknown_stride(i32 %x, ptr %y, i1 %cond) { ; CHECK-EMPTY: ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %gep10 = getelementptr double, ptr %gep8, i64 %mul: -; CHECK-NEXT: {(8 + %y),+,(8 * (sext i32 %x to i64))}<%loop.body> -; CHECK-NEXT: --> {(8 + %y),+,8}<%loop.body> +; CHECK-NEXT: {(8 + %y),+,(8 * (sext i32 %x to i64))}<%loop.body> +; CHECK-NEXT: --> {(8 + %y),+,8}<%loop.body> ; entry: br i1 %cond, label %noloop.exit, label %loop.ph @@ -522,7 +522,7 @@ define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 ; CHECK-NEXT: Member: %A ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64)) + %B) umin ((2 * (sext i32 %j to i64)) + (2 * (zext i32 (-1 + (zext i16 %N to i32)) to i64) * (zext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64)) + %B) umax ((2 * (sext i32 %j to i64)) + (2 * (zext i32 (-1 + (zext i16 %N to i32)) to i64) * (zext i16 %N to i64)) + %B)))) -; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64)) + %B),+,(2 * (zext i16 %N to i64))}<%loop> +; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64)) + %B),+,(2 * (zext i16 %N to i64))}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -531,7 +531,7 @@ define void @unknown_stride_equalto_zext_tc(i16 zeroext %N, ptr %A, ptr %B, i32 ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: ; CHECK-NEXT: ((2 * (sext i32 {%j,+,(zext i16 %N to i32)}<%loop> to i64)) + %B) -; CHECK-NEXT: --> {((2 * (sext i32 %j to i64)) + %B),+,(2 * (zext i16 %N to i64))}<%loop> +; CHECK-NEXT: --> {((2 * (sext i32 %j to i64)) + %B),+,(2 * (zext i16 %N to i64))}<%loop> ; entry: %N.ext = zext i16 %N to i32 @@ -573,7 +573,7 @@ define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) { ; CHECK-NEXT: Member: %A ; CHECK-NEXT: Group GRP1: ; CHECK-NEXT: (Low: (((2 * (sext i32 %j to i64)) + %B) umin ((2 * (sext i32 %j to i64)) + (2 * (zext i32 (-1 + (sext i16 %N to i32)) to i64) * (sext i16 %N to i64)) + %B)) High: (2 + (((2 * (sext i32 %j to i64)) + %B) umax ((2 * (sext i32 %j to i64)) + (2 * (zext i32 (-1 + (sext i16 %N to i32)) to i64) * (sext i16 %N to i64)) + %B)))) -; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64)) + %B),+,(2 * (sext i16 %N to i64))}<%loop> +; CHECK-NEXT: Member: {((2 * (sext i32 %j to i64)) + %B),+,(2 * (sext i16 %N to i64))}<%loop> ; CHECK-EMPTY: ; CHECK-NEXT: Non vectorizable stores to invariant address were not found in loop. ; CHECK-NEXT: SCEV assumptions: @@ -582,7 +582,7 @@ define void @unknown_stride_equalto_sext_tc(i16 %N, ptr %A, ptr %B, i32 %j) { ; CHECK-NEXT: Expressions re-written: ; CHECK-NEXT: [PSE] %arrayidx = getelementptr inbounds i16, ptr %B, i32 %add: ; CHECK-NEXT: ((2 * (sext i32 {%j,+,(sext i16 %N to i32)}<%loop> to i64)) + %B) -; CHECK-NEXT: --> {((2 * (sext i32 %j to i64)) + %B),+,(2 * (sext i16 %N to i64))}<%loop> +; CHECK-NEXT: --> {((2 * (sext i32 %j to i64)) + %B),+,(2 * (sext i16 %N to i64))}<%loop> ; entry: %N.ext = sext i16 %N to i32 diff --git a/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll b/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll index fad5c3a144e17..81d7fbe2d0c9e 100644 --- a/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll +++ b/llvm/test/Analysis/ScalarEvolution/addrec-computed-during-addrec-calculation.ll @@ -20,7 +20,7 @@ define void @test(ptr %p) { ; CHECK-NEXT: %iv2.ext = sext i32 %iv2 to i64 ; CHECK-NEXT: --> {(sext i32 %iv to i64),+,1}<%loop2> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: <> LoopDispositions: { %loop.header: Variant, %loop2: Computable, %loop3: Invariant } ; CHECK-NEXT: %iv3 = phi i64 [ %iv2.ext, %loop2.end ], [ %iv3.next, %loop3 ] -; CHECK-NEXT: --> {{\{\{}}(sext i32 %iv to i64),+,1}<%loop2>,+,1}<%loop3> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: {(sext i32 %iv to i64),+,1}<%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant } +; CHECK-NEXT: --> {{\{\{}}(sext i32 %iv to i64),+,1}<%loop2>,+,1}<%loop3> U: [-2147483648,6442450943) S: [-2147483648,6442450943) Exits: {(sext i32 %iv to i64),+,1}<%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant } ; CHECK-NEXT: %iv3.next = add nsw i64 %iv3, 1 ; CHECK-NEXT: --> {{\{\{}}(1 + (sext i32 %iv to i64)),+,1}<%loop2>,+,1}<%loop3> U: [-2147483647,6442450944) S: [-2147483647,6442450944) Exits: {(1 + (sext i32 %iv to i64)),+,1}<%loop2> LoopDispositions: { %loop3: Computable, %loop.header: Variant } ; CHECK-NEXT: %iv.next = trunc i64 %iv3 to i32 diff --git a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll index 1964fca603e23..a5b7b166aaa97 100644 --- a/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/backedge-taken-count-guard-info.ll @@ -77,7 +77,7 @@ define void @rewrite_preserve_add_nsw(i32 %a) { ; CHECK-NEXT: %add = add nsw i32 %a, 4 ; CHECK-NEXT: --> (4 + %a) U: [-2147483644,-2147483648) S: [-2147483644,-2147483648) ; CHECK-NEXT: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (4 + %a) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-2147483648) S: [0,-2147483648) Exits: (4 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,-2147483647) S: [1,-2147483647) Exits: (5 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_preserve_add_nsw diff --git a/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll b/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll index a0ae9b63ac02a..d3e51bce99faa 100644 --- a/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll +++ b/llvm/test/Analysis/ScalarEvolution/becount-invalidation.ll @@ -13,11 +13,11 @@ define void @test(ptr %arg) { ; CHECK-NEXT: %ptr2 = phi ptr [ %ptr2.next, %loop.latch ], [ null, %entry ] ; CHECK-NEXT: --> %ptr2 U: full-set S: full-set Exits: <> LoopDispositions: { %loop.header: Variant, %loop2.header: Invariant } ; CHECK-NEXT: %ptr1.next = phi ptr [ %ptr2, %loop.header ], [ %ptr1.next.next, %loop2.latch ] -; CHECK-NEXT: --> {%ptr2,+,8}<%loop2.header> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant } +; CHECK-NEXT: --> {%ptr2,+,8}<%loop2.header> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant } ; CHECK-NEXT: %iv = phi i64 [ 0, %loop.header ], [ %iv.next, %loop2.latch ] ; CHECK-NEXT: --> {0,+,1}<%loop2.header> U: [0,1) S: [0,1) Exits: <> LoopDispositions: { %loop2.header: Computable, %loop.header: Uniform } ; CHECK-NEXT: %ptr1.dummy = getelementptr inbounds i64, ptr %ptr1.next, i64 0 -; CHECK-NEXT: --> {%ptr2,+,8}<%loop2.header> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant } +; CHECK-NEXT: --> {%ptr2,+,8}<%loop2.header> U: full-set S: full-set Exits: <> LoopDispositions: { %loop2.header: Computable, %loop.header: Variant } ; CHECK-NEXT: %val = load i64, ptr %ptr1.dummy, align 8 ; CHECK-NEXT: --> %val U: full-set S: full-set Exits: <> LoopDispositions: { %loop2.header: Variant, %loop.header: Variant } ; CHECK-NEXT: %ptr1.next.next = getelementptr inbounds i64, ptr %ptr1.next, i64 1 diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll index 9f9dd6f3c11af..a13da89b4d6e9 100644 --- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll +++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll @@ -47,13 +47,13 @@ define void @test_00(i1 %arg) { ; CHECK-NEXT: %s2 = add i32 %phi5, %phi2 ; CHECK-NEXT: --> {{\{\{}}57,+,2}<%loop1>,+,2}<%loop2> U: [57,668) S: [57,668) --> 667 U: [667,668) S: [667,668) ; CHECK-NEXT: %s3 = add i32 %sum1, %sum3 -; CHECK-NEXT: --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046) +; CHECK-NEXT: --> {{\{\{}}130,+,3}<%loop1>,+,3}<%loop2> U: [130,1046) S: [130,1046) --> 1045 U: [1045,1046) S: [1045,1046) ; CHECK-NEXT: %s4 = add i32 %sum4, %sum2 -; CHECK-NEXT: --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010) +; CHECK-NEXT: --> {{\{\{}}179,+,6}<%loop1>,+,6}<%loop2> U: [179,2010) S: [179,2010) --> 2009 U: [2009,2010) S: [2009,2010) ; CHECK-NEXT: %s5 = add i32 %phi3, %sum3 -; CHECK-NEXT: --> {{\{\{}}122,+,3}<%loop1>,+,3}<%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038) +; CHECK-NEXT: --> {{\{\{}}122,+,3}<%loop1>,+,3}<%loop2> U: [122,1038) S: [122,1038) --> 1037 U: [1037,1038) S: [1037,1038) ; CHECK-NEXT: %s6 = add i32 %sum2, %phi6 -; CHECK-NEXT: --> {{\{\{}}63,+,6}<%loop1>,+,3}<%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471) +; CHECK-NEXT: --> {{\{\{}}63,+,6}<%loop1>,+,3}<%loop2> U: [63,1471) S: [63,1471) --> 1470 U: [1470,1471) S: [1470,1471) ; CHECK-NEXT: Determining loop execution counts for: @test_00 ; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 141 ; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 141 @@ -143,9 +143,9 @@ define void @test_01(i32 %a, i32 %b) { ; CHECK-NEXT: %sum4 = add i32 %sum3, %phi6 ; CHECK-NEXT: --> {159,+,6}<%loop2> U: [159,1162) S: [159,1162) Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } ; CHECK-NEXT: %is2 = add i32 %sum4, %b -; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: --> {(159 + %b),+,6}<%loop2> U: full-set S: full-set Exits: (159 + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + %b) LoopDispositions: { %loop2: Computable } ; CHECK-NEXT: %ec2 = add i32 %is1, %is2 -; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> {(165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))),+,6}<%loop2> U: full-set S: full-set Exits: (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) LoopDispositions: { %loop2: Computable } ; CHECK-NEXT: %s1 = add i32 %phi1, %is1 ; CHECK-NEXT: --> {(6 + (3 * %a) + %b),+,7}<%loop1> U: full-set S: full-set --> (6 + (3 * %a) + (7 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set ; CHECK-NEXT: %s2 = add i32 %is2, %phi4 @@ -153,11 +153,11 @@ define void @test_01(i32 %a, i32 %b) { ; CHECK-NEXT: %s3 = add i32 %is1, %phi5 ; CHECK-NEXT: --> {{\{\{}}(59 + (2 * %a) + %b),+,6}<%loop1>,+,2}<%loop2> U: full-set S: full-set --> (59 + (2 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))))))))) + (2 * %a) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + %b) U: full-set S: full-set ; CHECK-NEXT: %s4 = add i32 %phi2, %is2 -; CHECK-NEXT: --> {{\{\{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: --> {{\{\{}}(159 + (2 * %b)),+,2}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (159 + (2 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (2 * %b) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set ; CHECK-NEXT: %s5 = add i32 %is1, %is2 -; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set ; CHECK-NEXT: %s6 = add i32 %is2, %is1 -; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set +; CHECK-NEXT: --> {{\{\{}}(165 + (2 * %a) + (2 * %b)),+,6}<%loop1>,+,6}<%loop2> U: full-set S: full-set --> (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (6 * (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))))) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @test_01 ; CHECK-NEXT: Loop %loop2: backedge-taken count is (((-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (-1 * (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))) /u 6) + (1 umin (-165 + (-6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b)))))) + (-2 * %a) + (-2 * %b) + (1000 umax (165 + (2 * %a) + (2 * %b) + (6 * (((-6 + (-2 * %a) + (-1 * (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))) /u 6) + (1 umin (-6 + (-2 * %a) + (-1 * %b) + (1000 umax (6 + (2 * %a) + %b))))))))))) ; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 167 @@ -520,15 +520,15 @@ define void @test_06() { ; CHECK-NEXT: %s1 = add i32 %phi1, %phi2 ; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: %s2 = add i32 %phi2, %phi1 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1998) S: [30,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: %s3 = add i32 %phi1, %phi3 ; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999) ; CHECK-NEXT: %s4 = add i32 %phi3, %phi1 -; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999) +; CHECK-NEXT: --> {{\{\{}}40,+,1}<%loop1>,+,3}<%loop3> U: [40,1999) S: [40,1999) --> 1998 U: [1998,1999) S: [1998,1999) ; CHECK-NEXT: %s5 = add i32 %phi2, %phi3 ; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: %s6 = add i32 %phi3, %phi2 -; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: --> {{\{\{}}50,+,2}<%loop2>,+,3}<%loop3> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: Determining loop execution counts for: @test_06 ; CHECK-NEXT: Loop %loop3: backedge-taken count is i32 323 ; CHECK-NEXT: Loop %loop3: constant max backedge-taken count is i32 323 @@ -598,15 +598,15 @@ define void @test_07() { ; CHECK-NEXT: %s1 = add i32 %phi1, %phi2 ; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009) ; CHECK-NEXT: %s2 = add i32 %phi2, %phi1 -; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009) +; CHECK-NEXT: --> {{\{\{}}30,+,1}<%loop1>,+,2}<%loop2> U: [30,1009) S: [30,1009) --> 1008 U: [1008,1009) S: [1008,1009) ; CHECK-NEXT: %s3 = add i32 %phi1, %phi3 ; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010) ; CHECK-NEXT: %s4 = add i32 %phi3, %phi1 -; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010) +; CHECK-NEXT: --> {{\{\{}}40,+,3}<%loop3>,+,1}<%loop1> U: [40,1010) S: [40,1010) --> 1009 U: [1009,1010) S: [1009,1010) ; CHECK-NEXT: %s5 = add i32 %phi2, %phi3 ; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: %s6 = add i32 %phi3, %phi2 -; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) +; CHECK-NEXT: --> {{\{\{}}50,+,3}<%loop3>,+,2}<%loop2> U: [50,1998) S: [50,1998) --> 1997 U: [1997,1998) S: [1997,1998) ; CHECK-NEXT: Determining loop execution counts for: @test_07 ; CHECK-NEXT: Loop %loop2: backedge-taken count is i32 489 ; CHECK-NEXT: Loop %loop2: constant max backedge-taken count is i32 489 @@ -684,7 +684,7 @@ define void @test_08() { ; CHECK-NEXT: %tmp12 = trunc i64 %tmp11 to i32 ; CHECK-NEXT: --> ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } ; CHECK-NEXT: %tmp14 = mul i32 %tmp12, %tmp7 -; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>) U: full-set S: full-set --> (-2 * ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)) U: [0,-1) S: [-2147483648,2147483647) Exits: 0 LoopDispositions: { %loop_2: Variant } +; CHECK-NEXT: --> (((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>) * {-1,+,-1}<%loop_1>) U: full-set S: full-set --> (-2 * ((trunc i64 %iv.2.1 to i32) + {0,+,-1}<%loop_2>)) U: [0,-1) S: [-2147483648,2147483647) Exits: 0 LoopDispositions: { %loop_2: Variant } ; CHECK-NEXT: %tmp16 = mul i64 %iv.2.1, %iv.1.1 ; CHECK-NEXT: --> ({2,+,1}<%loop_1> * %iv.2.1) U: full-set S: full-set --> (3 * %iv.2.1) U: full-set S: full-set Exits: 0 LoopDispositions: { %loop_2: Variant } ; CHECK-NEXT: %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1 diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll index 1e15d2d0d6461..3fde1dfb963c4 100644 --- a/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll +++ b/llvm/test/Analysis/ScalarEvolution/exit-count-non-strict.ll @@ -153,7 +153,6 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) { ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated exit count for loop: (1 + (zext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: exit count for latch: %N ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1 @@ -161,18 +160,14 @@ define void @ule_from_zero_no_nuw(i32 %M, i32 %N) { ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated symbolic max exit count for loop: (1 + (zext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: symbolic max exit count for latch: %N ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; entry: br label %loop @@ -198,7 +193,6 @@ define void @le_from_zero_no_nuw(i32 %M, i32 %N) { ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated exit count for loop: (1 + (zext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: exit count for latch: %N ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1 @@ -206,18 +200,14 @@ define void @le_from_zero_no_nuw(i32 %M, i32 %N) { ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated symbolic max exit count for loop: (1 + (zext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: symbolic max exit count for latch: %N ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 %N to i64) umin (1 + (zext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {0,+,1}<%loop> Added Flags: ; entry: br label %loop @@ -417,7 +407,7 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) { ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated exit count for loop: (2147483649 + (sext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: exit count for latch: (-2147483648 + %N) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1 @@ -425,18 +415,18 @@ define void @sle_from_int_min_no_nsw(i32 %M, i32 %N) { ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated symbolic max exit count for loop: (2147483649 + (sext i32 %M to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 4294967295 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((zext i32 (-2147483648 + %N) to i64) umin (2147483649 + (sext i32 %M to i64))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; entry: br label %loop @@ -462,7 +452,7 @@ define void @le_from_int_min_no_nuw_nsw(i32 %M, i32 %N) { ; CHECK-NEXT: exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated exit count for loop: (-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: exit count for latch: (-2147483648 + %N) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i32 -1 @@ -470,18 +460,18 @@ define void @le_from_int_min_no_nuw_nsw(i32 %M, i32 %N) { ; CHECK-NEXT: symbolic max exit count for loop: ***COULDNOTCOMPUTE*** ; CHECK-NEXT: predicated symbolic max exit count for loop: (-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64)))) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-EMPTY: ; CHECK-NEXT: symbolic max exit count for latch: (-2147483648 + %N) ; CHECK-NEXT: Loop %loop: Predicated backedge-taken count is ((-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64)))) umin_seq (zext i32 (-2147483648 + %N) to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated constant max backedge-taken count is i64 2147483648 ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; CHECK-NEXT: Loop %loop: Predicated symbolic max backedge-taken count is ((-2147483648 + (2147483648 umax (1 + (zext i32 %M to i64)))) umin_seq (zext i32 (-2147483648 + %N) to i64)) ; CHECK-NEXT: Predicates: -; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: +; CHECK-NEXT: {-2147483648,+,1}<%loop> Added Flags: ; entry: br label %loop diff --git a/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll b/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll index c63650aef8fe4..77642f0c3f1ee 100644 --- a/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll +++ b/llvm/test/Analysis/ScalarEvolution/exit-count-select-safe.ll @@ -5,9 +5,9 @@ define i32 @logical_and_2ops(i32 %n, i32 %m) { ; CHECK-LABEL: 'logical_and_2ops' ; CHECK-NEXT: Classifying expressions for: @logical_and_2ops ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_2ops @@ -33,9 +33,9 @@ define i32 @logical_or_2ops(i32 %n, i32 %m) { ; CHECK-LABEL: 'logical_or_2ops' ; CHECK-NEXT: Classifying expressions for: @logical_or_2ops ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 true, i1 %cond_p1 ; CHECK-NEXT: --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_or_2ops @@ -61,9 +61,9 @@ define i32 @logical_and_3ops(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_and_3ops' ; CHECK-NEXT: Classifying expressions for: @logical_and_3ops ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond_p3 = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond = select i1 %cond_p3, i1 %cond_p2, i1 false @@ -93,9 +93,9 @@ define i32 @logical_or_3ops(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_or_3ops' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1 ; CHECK-NEXT: --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond = select i1 %cond_p3, i1 true, i1 %cond_p2 @@ -125,9 +125,9 @@ define i32 @logical_or_3ops_duplicate(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_or_3ops_duplicate' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops_duplicate ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m umin_seq %k) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m umin_seq %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond_p4 = select i1 %cond_p0, i1 true, i1 %cond_p1 ; CHECK-NEXT: --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond_p5 = select i1 %cond_p4, i1 true, i1 %cond_p2 @@ -161,9 +161,9 @@ define i32 @logical_or_3ops_redundant_uminseq_operand(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_or_3ops_redundant_uminseq_operand' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops_redundant_uminseq_operand ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m) ; CHECK-NEXT: --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1 @@ -196,9 +196,9 @@ define i32 @logical_or_3ops_redundant_umin_operand(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_or_3ops_redundant_umin_operand' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops_redundant_umin_operand ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %k umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %k umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %k umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %k umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m) ; CHECK-NEXT: --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1 @@ -231,9 +231,9 @@ define i32 @logical_or_4ops_redundant_operand_across_umins(i32 %n, i32 %m, i32 % ; CHECK-LABEL: 'logical_or_4ops_redundant_operand_across_umins' ; CHECK-NEXT: Classifying expressions for: @logical_or_4ops_redundant_operand_across_umins ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k umin_seq %q) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k umin_seq %q) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k umin_seq %q)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k umin_seq %q)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m) ; CHECK-NEXT: --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %umin2 = call i32 @llvm.umin.i32(i32 %n, i32 %q) @@ -269,9 +269,9 @@ define i32 @logical_or_3ops_operand_wise_redundant_umin(i32 %n, i32 %m, i32 %k) ; CHECK-LABEL: 'logical_or_3ops_operand_wise_redundant_umin' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops_operand_wise_redundant_umin ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n umin %m) umin_seq %k) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n umin %m) umin_seq %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m) ; CHECK-NEXT: --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %umin2 = call i32 @llvm.umin.i32(i32 %n, i32 %k) @@ -307,9 +307,9 @@ define i32 @logical_or_3ops_partially_redundant_umin(i32 %n, i32 %m, i32 %k) { ; CHECK-LABEL: 'logical_or_3ops_partially_redundant_umin' ; CHECK-NEXT: Classifying expressions for: @logical_or_3ops_partially_redundant_umin ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%m umin %k)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%m umin %k)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%m umin %k))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%m umin %k))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %n, i32 %m) ; CHECK-NEXT: --> (%n umin %m) U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %umin2 = call i32 @llvm.umin.i32(i32 %umin, i32 %k) @@ -341,21 +341,21 @@ define i32 @logical_or_5ops_redundant_opearand_of_inner_uminseq(i32 %a, i32 %b, ; CHECK-LABEL: 'logical_or_5ops_redundant_opearand_of_inner_uminseq' ; CHECK-NEXT: Classifying expressions for: @logical_or_5ops_redundant_opearand_of_inner_uminseq ; CHECK-NEXT: %first.i = phi i32 [ 0, %entry ], [ %first.i.next, %first.loop ] -; CHECK-NEXT: --> {0,+,1}<%first.loop> U: full-set S: full-set Exits: (%e umin_seq %d umin_seq %a) LoopDispositions: { %first.loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%first.loop> U: full-set S: full-set Exits: (%e umin_seq %d umin_seq %a) LoopDispositions: { %first.loop: Computable } ; CHECK-NEXT: %first.i.next = add i32 %first.i, 1 -; CHECK-NEXT: --> {1,+,1}<%first.loop> U: full-set S: full-set Exits: (1 + (%e umin_seq %d umin_seq %a)) LoopDispositions: { %first.loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%first.loop> U: full-set S: full-set Exits: (1 + (%e umin_seq %d umin_seq %a)) LoopDispositions: { %first.loop: Computable } ; CHECK-NEXT: %cond_p3 = select i1 %cond_p0, i1 true, i1 %cond_p1 ; CHECK-NEXT: --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1))) U: full-set S: full-set Exits: <> LoopDispositions: { %first.loop: Variant } ; CHECK-NEXT: %cond_p4 = select i1 %cond_p3, i1 true, i1 %cond_p2 ; CHECK-NEXT: --> (true + ((true + %cond_p0) umin_seq (true + %cond_p1) umin_seq (true + %cond_p2))) U: full-set S: full-set Exits: <> LoopDispositions: { %first.loop: Variant } ; CHECK-NEXT: %i = phi i32 [ 0, %first.loop.exit ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%a umin_seq %b umin_seq ((%e umin_seq %d) umin %c))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = call i32 @llvm.umin.i32(i32 %c, i32 %d) ; CHECK-NEXT: --> (%c umin %d) U: full-set S: full-set Exits: (%c umin %d) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %umin2 = call i32 @llvm.umin.i32(i32 %umin, i32 %first.i) -; CHECK-NEXT: --> ({0,+,1}<%first.loop> umin %c umin %d) U: full-set S: full-set --> ((%e umin_seq %d umin_seq %a) umin %c umin %d) U: full-set S: full-set Exits: ((%e umin_seq %d umin_seq %a) umin %c umin %d) LoopDispositions: { %loop: Invariant } +; CHECK-NEXT: --> ({0,+,1}<%first.loop> umin %c umin %d) U: full-set S: full-set --> ((%e umin_seq %d umin_seq %a) umin %c umin %d) U: full-set S: full-set Exits: ((%e umin_seq %d umin_seq %a) umin %c umin %d) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: %cond_p8 = select i1 %cond_p5, i1 true, i1 %cond_p6 ; CHECK-NEXT: --> (true + ((true + %cond_p5) umin_seq (true + %cond_p6))) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond = select i1 %cond_p8, i1 true, i1 %cond_p7 @@ -501,15 +501,15 @@ define i64 @uminseq_vs_ptrtoint_complexity(i64 %n, i64 %m, ptr %ptr) { ; CHECK-LABEL: 'uminseq_vs_ptrtoint_complexity' ; CHECK-NEXT: Classifying expressions for: @uminseq_vs_ptrtoint_complexity ; CHECK-NEXT: %i = phi i64 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i64 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %ptr.int = ptrtoint ptr %ptr to i64 ; CHECK-NEXT: --> (ptrtoint ptr %ptr to i64) U: full-set S: full-set ; CHECK-NEXT: %r = add i64 %i, %ptr.int -; CHECK-NEXT: --> {(ptrtoint ptr %ptr to i64),+,1}<%loop> U: full-set S: full-set --> ((%n umin_seq %m) + (ptrtoint ptr %ptr to i64)) U: full-set S: full-set +; CHECK-NEXT: --> {(ptrtoint ptr %ptr to i64),+,1}<%loop> U: full-set S: full-set --> ((%n umin_seq %m) + (ptrtoint ptr %ptr to i64)) U: full-set S: full-set ; CHECK-NEXT: Determining loop execution counts for: @uminseq_vs_ptrtoint_complexity ; CHECK-NEXT: Loop %loop: backedge-taken count is (%n umin_seq %m) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 -1 @@ -537,9 +537,9 @@ define i32 @logical_and_implies_poison1(i32 %n) { ; CHECK-NEXT: %add = add i32 %n, 1 ; CHECK-NEXT: --> (1 + %n) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison1 @@ -568,9 +568,9 @@ define i32 @logical_and_implies_poison2(i32 %n) { ; CHECK-NEXT: %add = add i32 %n, 1 ; CHECK-NEXT: --> (1 + %n) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((1 + %n) umin %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p1 umin %cond_p0) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison2 @@ -599,9 +599,9 @@ define i32 @logical_and_implies_poison3(i32 %n, i32 %m) { ; CHECK-NEXT: %add = add i32 %n, %m ; CHECK-NEXT: --> (%n + %m) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin %n)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p1 umin %cond_p0) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison3 @@ -630,9 +630,9 @@ define i32 @logical_and_implies_poison_wrong_direction(i32 %n, i32 %m) { ; CHECK-NEXT: %add = add i32 %n, %m ; CHECK-NEXT: --> (%n + %m) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%n + %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq (%n + %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%n + %m))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq (%n + %m))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_wrong_direction @@ -659,9 +659,9 @@ define i32 @logical_and_implies_poison_noundef(i32 %n, i32 noundef %m) { ; CHECK-LABEL: 'logical_and_implies_poison_noundef' ; CHECK-NEXT: Classifying expressions for: @logical_and_implies_poison_noundef ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_noundef @@ -687,9 +687,9 @@ define i32 @logical_and_implies_poison_noundef_wrong_direction(i32 %n, i32 nound ; CHECK-LABEL: 'logical_and_implies_poison_noundef_wrong_direction' ; CHECK-NEXT: Classifying expressions for: @logical_and_implies_poison_noundef_wrong_direction ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq %n) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq %n)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_noundef_wrong_direction @@ -719,9 +719,9 @@ define i32 @logical_and_implies_poison_complex1(i32 %n, i32 %m) { ; CHECK-NEXT: %add1 = add i32 %add, 1 ; CHECK-NEXT: --> (1 + %n + %m) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (1 + %n + %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (1 + %n + %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (1 + %n + %m))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (1 + %n + %m))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_complex1 @@ -753,9 +753,9 @@ define i32 @logical_and_implies_poison_complex2(i32 %n, i32 %m, i32 %l) { ; CHECK-NEXT: %add1 = add i32 %add, %l ; CHECK-NEXT: --> (%n + %m + %l) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (%n + %m + %l)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin (%n + %m + %l)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (%n + %m + %l))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin (%n + %m + %l))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_complex2 @@ -787,9 +787,9 @@ define i32 @logical_and_implies_poison_complex_wrong_direction(i32 %n, i32 %m, i ; CHECK-NEXT: %add1 = add i32 %add, %l ; CHECK-NEXT: --> (%n + %m + %l) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin_seq (%n + %m + %l)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: ((%n + %m) umin_seq (%n + %m + %l)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin_seq (%n + %m + %l))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + ((%n + %m) umin_seq (%n + %m + %l))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_implies_poison_complex_wrong_direction @@ -819,9 +819,9 @@ define i32 @logical_and_implies_multiple_ops(i32 %n, i32 %m) { ; CHECK-NEXT: %add = add i32 %n, 1 ; CHECK-NEXT: --> (1 + %n) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (((1 + %n) umin %n) umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (((1 + %n) umin %n) umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (((1 + %n) umin %n) umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (((1 + %n) umin %n) umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond2 = select i1 %cond, i1 %cond_p2, i1 false @@ -854,9 +854,9 @@ define i32 @logical_and_implies_multiple_ops2(i32 %n, i32 %m) { ; CHECK-NEXT: %add = add i32 %n, 1 ; CHECK-NEXT: --> (1 + %n) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq ((1 + %n) umin %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq ((1 + %n) umin %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq ((1 + %n) umin %m))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq ((1 + %n) umin %m))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond2 = select i1 %cond, i1 %cond_p2, i1 false @@ -889,9 +889,9 @@ define i32 @logical_and_implies_multiple_ops3(i32 %n, i32 %m) { ; CHECK-NEXT: %add = add i32 %n, 1 ; CHECK-NEXT: --> (1 + %n) U: full-set S: full-set ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%m umin_seq ((1 + %n) umin %n)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq ((1 + %n) umin %n))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%m umin_seq ((1 + %n) umin %n))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %cond2 = select i1 %cond, i1 %cond_p2, i1 false @@ -990,9 +990,9 @@ define i32 @logical_and_not_zero_needs_context(i32 %n, i32 %m) { ; CHECK-LABEL: 'logical_and_not_zero_needs_context' ; CHECK-NEXT: Classifying expressions for: @logical_and_not_zero_needs_context ; CHECK-NEXT: %i = phi i32 [ 0, %entry ], [ %i.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (%n umin_seq %m) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %i.next = add i32 %i, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (%n umin_seq %m)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %cond = select i1 %cond_p0, i1 %cond_p1, i1 false ; CHECK-NEXT: --> (%cond_p0 umin_seq %cond_p1) U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @logical_and_not_zero_needs_context diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll index f2de5e32f082e..b00c26d472eaf 100644 --- a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll +++ b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll @@ -17,11 +17,11 @@ define dso_local i32 @f() { ; CHECK-NEXT: %storemerge23 = phi i32 [ 3, %entry ], [ %dec16, %for.inc13.3 ] ; CHECK-NEXT: --> {3,+,-1}<%outer.loop> U: [1,4) S: [1,4) Exits: <> LoopDispositions: { %outer.loop: Computable, %for.cond6: Invariant, %inner.loop: Invariant } ; CHECK-NEXT: %storemerge1921 = phi i32 [ 3, %outer.loop ], [ %dec, %for.end ] -; CHECK-NEXT: --> {3,+,-1}<%for.cond6> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> {3,+,-1}<%for.cond6> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %idxprom20 = zext i32 %storemerge1921 to i64 -; CHECK-NEXT: --> (zext i32 {3,+,-1}<%for.cond6> to i64) U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> (zext i32 {3,+,-1}<%for.cond6> to i64) U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20 -; CHECK-NEXT: --> ((2 * (zext i32 {3,+,-1}<%for.cond6> to i64)) + @__const.f.g) U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> ((2 * (zext i32 {3,+,-1}<%for.cond6> to i64)) + @__const.f.g) U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %for.cond6: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %i = load i16, ptr %arrayidx7, align 2 ; CHECK-NEXT: --> %i U: full-set S: full-set Exits: <> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant } ; CHECK-NEXT: %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ] @@ -41,11 +41,11 @@ define dso_local i32 @f() { ; CHECK-NEXT: %retval.0 = phi i32 [ %i1, %if.end ], [ 0, %cleanup.loopexit ] ; CHECK-NEXT: --> %retval.0 U: full-set S: full-set ; CHECK-NEXT: %storemerge1921.3 = phi i32 [ 3, %for.end ], [ %dec.3, %for.end.3 ] -; CHECK-NEXT: --> {3,+,-1}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> {3,+,-1}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %idxprom20.3 = zext i32 %storemerge1921.3 to i64 -; CHECK-NEXT: --> (zext i32 {3,+,-1}<%inner.loop> to i64) U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> (zext i32 {3,+,-1}<%inner.loop> to i64) U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], ptr @__const.f.g, i64 0, i64 0, i64 %idxprom20.3 -; CHECK-NEXT: --> ((2 * (zext i32 {3,+,-1}<%inner.loop> to i64)) + @__const.f.g) U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } +; CHECK-NEXT: --> ((2 * (zext i32 {3,+,-1}<%inner.loop> to i64)) + @__const.f.g) U: [8,-3) S: [-9223372036854775808,9223372036854775807) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %i7 = load i16, ptr %arrayidx7.3, align 2 ; CHECK-NEXT: --> %i7 U: full-set S: full-set Exits: <> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant } ; CHECK-NEXT: %i8 = load volatile i32, ptr @b, align 4 @@ -53,7 +53,7 @@ define dso_local i32 @f() { ; CHECK-NEXT: %dec.3 = add nsw i32 %storemerge1921.3, -1 ; CHECK-NEXT: --> {2,+,-1}<%inner.loop> U: [2,3) S: [2,3) Exits: <> LoopDispositions: { %inner.loop: Computable, %outer.loop: Uniform } ; CHECK-NEXT: %storemerge1921.lcssa25.3 = phi i32 [ %storemerge1921.3, %for.end.3 ] -; CHECK-NEXT: --> {3,+,-1}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %outer.loop: Uniform, %for.cond6: Variant, %inner.loop: Computable } +; CHECK-NEXT: --> {3,+,-1}<%inner.loop> U: [3,4) S: [3,4) Exits: <> LoopDispositions: { %outer.loop: Uniform, %for.cond6: Variant, %inner.loop: Computable } ; CHECK-NEXT: %dec16 = add nsw i32 %storemerge23, -1 ; CHECK-NEXT: --> {2,+,-1}<%outer.loop> U: [0,3) S: [0,3) Exits: <> LoopDispositions: { %outer.loop: Computable, %for.cond6: Invariant, %inner.loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @f diff --git a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll index 2264bfe4fce6c..0caa0a3f0f15c 100644 --- a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll +++ b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll @@ -175,7 +175,7 @@ define void @f3(i1 %c) { ; CHECK-NEXT: %loop.iv = phi i16 [ 0, %entry ], [ %loop.iv.inc, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,128) S: [0,128) Exits: 127 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv = phi i16 [ %start, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {%start,+,%step}<%loop> U: [0,-892) S: [0,-892) Exits: ((127 * %step) + %start) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%start,+,%step}<%loop> U: [0,-892) S: [0,-892) Exits: ((127 * %step) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.zext = zext i16 %iv to i64 ; CHECK-NEXT: --> {(zext i16 %start to i64),+,(zext i16 %step to i64)}<%loop> U: [0,64644) S: [0,64644) Exits: ((zext i16 %start to i64) + (127 * (zext i16 %step to i64))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i16 %iv, %step @@ -358,9 +358,9 @@ define void @f7(i1 %c) { ; CHECK-NEXT: %iv.next = add i32 %iv, %step ; CHECK-NEXT: --> {(%step + %start),+,%step}<%loop> U: [-256,256) S: [-256,256) Exits: ((128 * %step) + %start) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.trunc.plus.one = add i16 %iv.trunc, 1 -; CHECK-NEXT: --> {(1 + (trunc i32 %start to i16)),+,(trunc i32 %step to i16)}<%loop> U: [1,129) S: [1,129) Exits: (1 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(1 + (trunc i32 %start to i16)),+,(trunc i32 %step to i16)}<%loop> U: [1,129) S: [1,129) Exits: (1 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.trunc.plus.two = add i16 %iv.trunc, 2 -; CHECK-NEXT: --> {(2 + (trunc i32 %start to i16)),+,(trunc i32 %step to i16)}<%loop> U: [2,130) S: [2,130) Exits: (2 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(2 + (trunc i32 %start to i16)),+,(trunc i32 %step to i16)}<%loop> U: [2,130) S: [2,130) Exits: (2 + (trunc i32 %start to i16) + (127 * (trunc i32 %step to i16))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %loop.iv.inc = add i16 %loop.iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,129) S: [1,129) Exits: 128 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @f7 diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll index 87697da20a30b..e0016f3055484 100644 --- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll +++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll @@ -115,7 +115,7 @@ exit: define void @test_trunc(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) { ; CHECK-LABEL: @test_trunc ; CHECK: %trunc2 = trunc i64 %iv2.inc to i32 -; CHECK-NEXT: --> {(trunc i64 (1 + {7,+,1}<%loop>) to i32),+,1}<%loop2> U: [8,53) S: [8,53) --> 52 U: [52,53) S: [52,53) +; CHECK-NEXT: --> {(trunc i64 (1 + {7,+,1}<%loop>) to i32),+,1}<%loop2> U: [8,53) S: [8,53) --> 52 U: [52,53) S: [52,53) entry: br label %loop diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll index 17a6b706685c4..512c2a9e1f132 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-operand-order.ll @@ -218,7 +218,7 @@ define void @const_max_btc_32_or_order_1(i64 %n) { ; CHECK-NEXT: %and.pre = and i1 %pre.1, %pre.0 ; CHECK-NEXT: --> (%pre.1 umin %pre.0) U: full-set S: full-set ; CHECK-NEXT: %iv = phi i64 [ %iv.next, %loop ], [ 0, %ph ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,-9223372036854775808) S: [0,-9223372036854775808) Exits: %n LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,-9223372036854775807) S: [1,-9223372036854775807) Exits: (1 + %n) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @const_max_btc_32_or_order_1 diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll index f0e732968c88a..fac1e2a9e6986 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info-rewrite-expressions.ll @@ -808,9 +808,9 @@ define void @rewrite_add_rec() { ; CHECK-NEXT: %n.vec = and i64 %sub, -2 ; CHECK-NEXT: --> (2 * ({9,+,-1}<%outer.header> /u 2)) U: [0,9) S: [0,9) Exits: 0 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: %inner.iv = phi i64 [ 0, %inner.ph ], [ %inner.iv.next, %inner ] -; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Uniform } +; CHECK-NEXT: --> {0,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ({9,+,-1}<%outer.header> /u 2))) LoopDispositions: { %inner: Computable, %outer.header: Uniform } ; CHECK-NEXT: %inner.iv.next = add i64 %inner.iv, 2 -; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<%outer.header> /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Uniform } +; CHECK-NEXT: --> {2,+,2}<%inner> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ({9,+,-1}<%outer.header> /u 2)) LoopDispositions: { %inner: Computable, %outer.header: Uniform } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%outer.header> U: [1,11) S: [1,11) Exits: 10 LoopDispositions: { %outer.header: Computable, %inner: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @rewrite_add_rec diff --git a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll index 436f0e55840d9..bab3a28d1e8a4 100644 --- a/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll +++ b/llvm/test/Analysis/ScalarEvolution/max-backedge-taken-count-guard-info.ll @@ -1334,7 +1334,7 @@ define i32 @ptr_induction_ult_1(ptr %a, ptr %b) { ; CHECK-LABEL: 'ptr_induction_ult_1' ; CHECK-NEXT: Classifying expressions for: @ptr_induction_ult_1 ; CHECK-NEXT: %ptr.iv = phi ptr [ %ptr.iv.next, %loop ], [ %a, %entry ] -; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {%a,+,4}<%loop> U: full-set S: full-set Exits: %a LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %ptr.iv.next = getelementptr i32, ptr %ptr.iv, i64 1 ; CHECK-NEXT: --> {(4 + %a),+,4}<%loop> U: full-set S: full-set Exits: (4 + %a) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @ptr_induction_ult_1 diff --git a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll index 9e7142adb1ae8..e5972e021aeef 100644 --- a/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/mul-udiv-folds.ll @@ -163,9 +163,9 @@ define void @btc_depends_on_div_mul(i64 %x) { ; CHECK-NEXT: %masked = and i64 %div.16, 1152921504606846974 ; CHECK-NEXT: --> (2 * ((2 * %x) /u 32)) U: [0,1152921504606846975) S: [0,1152921504606846975) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ((2 * %x) /u 32))) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (-2 + (2 * ((2 * %x) /u 32))) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 2 -; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((2 * %x) /u 32)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {2,+,2}<%loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: (2 * ((2 * %x) /u 32)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @btc_depends_on_div_mul ; CHECK-NEXT: Loop %loop: backedge-taken count is ((-2 + (2 * ((2 * %x) /u 32))) /u 2) ; CHECK-NEXT: Loop %loop: constant max backedge-taken count is i64 9223372036854775807 diff --git a/llvm/test/Analysis/ScalarEvolution/pr123550.ll b/llvm/test/Analysis/ScalarEvolution/pr123550.ll index 196f03cad51cd..81bd58bf2ae36 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr123550.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr123550.ll @@ -12,13 +12,13 @@ define i32 @test() { ; CHECK-NEXT: %srem = srem i32 729259140, %phi ; CHECK-NEXT: --> %srem U: [0,1073741824) S: [0,1073741824) Exits: 130 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %trunc = trunc i32 %iv2 to i8 -; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,2) S: [1,2) Exits: 1 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %urem = urem i8 -83, %trunc -; CHECK-NEXT: --> (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %zext = zext i8 %urem to i32 -; CHECK-NEXT: --> (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sub = sub i32 0, %zext -; CHECK-NEXT: --> (-1 * (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> (-1 * (zext i8 (-83 + ((-83 /u {1,+,1}<%loop>) * {-1,+,-1}<%loop>)) to i32)) U: [0,1) S: [0,1) Exits: 0 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv2.inc = add i32 %iv2, 1 ; CHECK-NEXT: --> {2,+,1}<%loop> U: [2,3) S: [2,3) Exits: 2 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %srem.lcssa = phi i32 [ %srem, %loop ] diff --git a/llvm/test/Analysis/ScalarEvolution/pr22641.ll b/llvm/test/Analysis/ScalarEvolution/pr22641.ll index fe06973747d90..be79f2971e28c 100644 --- a/llvm/test/Analysis/ScalarEvolution/pr22641.ll +++ b/llvm/test/Analysis/ScalarEvolution/pr22641.ll @@ -11,7 +11,7 @@ body: %conv2 = zext i16 %dec2 to i32 %conv = zext i16 %dec to i32 ; CHECK: %conv = zext i16 %dec to i32 -; CHECK-NEXT: --> {(zext i16 (-1 + %a) to i32),+,65535}<%body> +; CHECK-NEXT: --> {(zext i16 (-1 + %a) to i32),+,65535}<%body> ; CHECK-NOT: --> {(65535 + (zext i16 %a to i32)),+,65535}<%body> br label %cond diff --git a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll index 0c1f37bf58601..2b07c944eb1d1 100644 --- a/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll +++ b/llvm/test/Analysis/ScalarEvolution/ptrtoint.ll @@ -410,7 +410,7 @@ define void @pr46786_c26_char(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i10 = sub i64 %i9, %i4 ; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 -; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 ; X32-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i13 = add i8 %i12, %i8 @@ -487,7 +487,7 @@ define void @pr46786_c26_char_cmp_ops_swapped(ptr %arg, ptr %arg1, ptr %arg2) { ; X32-NEXT: %i10 = sub i64 %i9, %i4 ; X32-NEXT: --> {0,+,1}<%bb6> U: [0,4294967295) S: [0,4294967295) Exits: (zext i32 (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32)) to i64) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i11 = getelementptr inbounds i8, ptr %arg2, i64 %i10 -; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } +; X32-NEXT: --> {%arg2,+,1}<%bb6> U: full-set S: full-set Exits: (-1 + (-1 * (ptrtoint ptr %arg to i32)) + (ptrtoint ptr %arg1 to i32) + %arg2) LoopDispositions: { %bb6: Computable } ; X32-NEXT: %i12 = load i8, ptr %i11, align 1 ; X32-NEXT: --> %i12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb6: Variant } ; X32-NEXT: %i13 = add i8 %i12, %i8 diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll index f372847d2e398..572b3f7271b88 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll @@ -1,9 +1,9 @@ ; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s ; CHECK: %tmp3 = sext i8 %tmp2 to i32 -; CHECK: --> (sext i8 {0,+,1}<%bb1> to i32) U: [-128,128) S: [-128,128) Exits: -1 +; CHECK: --> (sext i8 {0,+,1}<%bb1> to i32) U: [-128,128) S: [-128,128) Exits: -1 ; CHECK: %tmp4 = mul i32 %tmp3, %i.02 -; CHECK: --> ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>) U: [-3968,3938) S: [-3968,3938) Exits: {0,+,-1}<%bb> +; CHECK: --> ((sext i8 {0,+,1}<%bb1> to i32) * {0,+,1}<%bb>) U: [-3968,3938) S: [-3968,3938) Exits: {0,+,-1}<%bb> ; These sexts are not foldable. diff --git a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll index f369bfe0b6312..5d6a1dec72e13 100644 --- a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll +++ b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll @@ -11,17 +11,17 @@ define void @foo(ptr nocapture %arg, i32 %arg1, i32 %arg2) { ; CHECK-NEXT: %tmp9 = shl i64 %tmp8, 33 ; CHECK-NEXT: --> {0,+,8589934592}<%bb7> U: [0,-17179869183) S: [-9223372036854775808,9223372028264841217) Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64))) LoopDispositions: { %bb7: Computable } ; CHECK-NEXT: %tmp10 = ashr exact i64 %tmp9, 32 -; CHECK-NEXT: --> (sext i32 {0,+,2}<%bb7> to i64) U: [0,-1) S: [-2147483648,2147483647) Exits: (sext i32 (-2 + (2 * %arg2)) to i64) LoopDispositions: { %bb7: Computable } +; CHECK-NEXT: --> (sext i32 {0,+,2}<%bb7> to i64) U: [0,-1) S: [-2147483648,2147483647) Exits: (sext i32 (-2 + (2 * %arg2)) to i64) LoopDispositions: { %bb7: Computable } ; CHECK-NEXT: %tmp11 = getelementptr inbounds i32, ptr %arg, i64 %tmp10 -; CHECK-NEXT: --> ((4 * (sext i32 {0,+,2}<%bb7> to i64)) + %arg) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg) LoopDispositions: { %bb7: Computable } +; CHECK-NEXT: --> ((4 * (sext i32 {0,+,2}<%bb7> to i64)) + %arg) U: full-set S: full-set Exits: ((4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg) LoopDispositions: { %bb7: Computable } ; CHECK-NEXT: %tmp12 = load i32, ptr %tmp11, align 4 ; CHECK-NEXT: --> %tmp12 U: full-set S: full-set Exits: <> LoopDispositions: { %bb7: Variant } ; CHECK-NEXT: %tmp13 = sub nsw i32 %tmp12, %arg1 ; CHECK-NEXT: --> ((-1 * %arg1) + %tmp12) U: full-set S: full-set Exits: <> LoopDispositions: { %bb7: Variant } ; CHECK-NEXT: %tmp14 = or disjoint i64 %tmp10, 1 -; CHECK-NEXT: --> (1 + (sext i32 {0,+,2}<%bb7> to i64)) U: [1,0) S: [-2147483647,2147483648) Exits: (1 + (sext i32 (-2 + (2 * %arg2)) to i64)) LoopDispositions: { %bb7: Computable } +; CHECK-NEXT: --> (1 + (sext i32 {0,+,2}<%bb7> to i64)) U: [1,0) S: [-2147483647,2147483648) Exits: (1 + (sext i32 (-2 + (2 * %arg2)) to i64)) LoopDispositions: { %bb7: Computable } ; CHECK-NEXT: %tmp15 = getelementptr inbounds i32, ptr %arg, i64 %tmp14 -; CHECK-NEXT: --> (4 + (4 * (sext i32 {0,+,2}<%bb7> to i64)) + %arg) U: full-set S: full-set Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg) LoopDispositions: { %bb7: Computable } +; CHECK-NEXT: --> (4 + (4 * (sext i32 {0,+,2}<%bb7> to i64)) + %arg) U: full-set S: full-set Exits: (4 + (4 * (sext i32 (-2 + (2 * %arg2)) to i64)) + %arg) LoopDispositions: { %bb7: Computable } ; CHECK-NEXT: %tmp16 = load i32, ptr %tmp15, align 4 ; CHECK-NEXT: --> %tmp16 U: full-set S: full-set Exits: <> LoopDispositions: { %bb7: Variant } ; CHECK-NEXT: %tmp17 = mul nsw i32 %tmp16, %arg1 diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll index 5a3517961e1ac..cc8fbf961462e 100644 --- a/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll +++ b/llvm/test/Analysis/ScalarEvolution/trip-count-negative-stride.ll @@ -562,9 +562,9 @@ declare void @llvm.assume(i1) define void @step_is_neg_addrec_slt_8(i64 %n) { ; CHECK-LABEL: 'step_is_neg_addrec_slt_8' ; CHECK-NEXT: Determining loop execution counts for: @step_is_neg_addrec_slt_8 -; CHECK-NEXT: Loop %inner: backedge-taken count is (7 /u {0,+,-1}<%outer.header>) +; CHECK-NEXT: Loop %inner: backedge-taken count is (7 /u {0,+,-1}<%outer.header>) ; CHECK-NEXT: Loop %inner: constant max backedge-taken count is i32 8 -; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is (7 /u {0,+,-1}<%outer.header>) +; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is (7 /u {0,+,-1}<%outer.header>) ; CHECK-NEXT: Loop %inner: Trip multiple is 1 ; CHECK-NEXT: Loop %outer.header: backedge-taken count is i64 0 ; CHECK-NEXT: Loop %outer.header: constant max backedge-taken count is i64 0 @@ -600,9 +600,9 @@ exit: define void @step_is_neg_addrec_slt_var(i32 %n) { ; CHECK-LABEL: 'step_is_neg_addrec_slt_var' ; CHECK-NEXT: Determining loop execution counts for: @step_is_neg_addrec_slt_var -; CHECK-NEXT: Loop %inner: backedge-taken count is ({0,+,1}<%outer.header> + ({0,+,-1}<%outer.header> smax %n)) +; CHECK-NEXT: Loop %inner: backedge-taken count is ({0,+,1}<%outer.header> + ({0,+,-1}<%outer.header> smax %n)) ; CHECK-NEXT: Loop %inner: constant max backedge-taken count is i32 2147483647 -; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ({0,+,1}<%outer.header> + ({0,+,-1}<%outer.header> smax %n)) +; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ({0,+,1}<%outer.header> + ({0,+,-1}<%outer.header> smax %n)) ; CHECK-NEXT: Loop %inner: Trip multiple is 1 ; CHECK-NEXT: Loop %outer.header: backedge-taken count is i64 0 ; CHECK-NEXT: Loop %outer.header: constant max backedge-taken count is i64 0 @@ -638,9 +638,9 @@ exit: define void @step_is_neg_addrec_unknown_start(i32 %n) { ; CHECK-LABEL: 'step_is_neg_addrec_unknown_start' ; CHECK-NEXT: Determining loop execution counts for: @step_is_neg_addrec_unknown_start -; CHECK-NEXT: Loop %inner: backedge-taken count is ({(-1 * %n),+,1}<%outer.header> + (8 smax {%n,+,-1}<%outer.header>)) +; CHECK-NEXT: Loop %inner: backedge-taken count is ({(-1 * %n),+,1}<%outer.header> + (8 smax {%n,+,-1}<%outer.header>)) ; CHECK-NEXT: Loop %inner: constant max backedge-taken count is i32 -2147483640 -; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ({(-1 * %n),+,1}<%outer.header> + (8 smax {%n,+,-1}<%outer.header>)) +; CHECK-NEXT: Loop %inner: symbolic max backedge-taken count is ({(-1 * %n),+,1}<%outer.header> + (8 smax {%n,+,-1}<%outer.header>)) ; CHECK-NEXT: Loop %inner: Trip multiple is 1 ; CHECK-NEXT: Loop %outer.header: backedge-taken count is i64 0 ; CHECK-NEXT: Loop %outer.header: constant max backedge-taken count is i64 0 diff --git a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll index fbdbefb875fba..b97afae894621 100644 --- a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll +++ b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll @@ -77,9 +77,9 @@ define void @uge_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %sel = select i1 %cmp1, i64 %len.zext, i64 %len.sext ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @uge_sext_x_zext_x @@ -145,9 +145,9 @@ define void @ugt_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umax = select i1 %cmp1, i64 %len.zext, i64 %len.sext ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @ugt_sext_x_zext_x @@ -247,9 +247,9 @@ define void @slt_sext_x_zext_x(i32 %len) { ; CHECK-NEXT: %len.sext = sext i32 %len to i64 ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] -; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {0,+,1}<%loop> U: full-set S: full-set Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 -; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {1,+,1}<%loop> U: full-set S: full-set Exits: (1 + (sext i32 %len to i64)) LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %umin = select i1 %cmp1, i64 %len.zext, i64 %len.sext ; CHECK-NEXT: --> (sext i32 %len to i64) U: [-2147483648,2147483648) S: [-2147483648,2147483648) Exits: (sext i32 %len to i64) LoopDispositions: { %loop: Invariant } ; CHECK-NEXT: Determining loop execution counts for: @slt_sext_x_zext_x diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll index 935a8c6fb6c39..7aa7dde4296a7 100644 --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -641,46 +641,44 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: bltlr cr0 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: clrldi r4, r4, 32 -; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: mtctr r4 -; CHECK-NEXT: li r4, 0 +; CHECK-NEXT: clrldi r6, r4, 32 +; CHECK-NEXT: addi r4, r5, 48 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB9_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: rldic r7, r6, 4, 28 +; CHECK-NEXT: lxv vs0, -48(r4) +; CHECK-NEXT: lxv vs1, -32(r4) ; CHECK-NEXT: xxsetaccz acc2 ; CHECK-NEXT: xxsetaccz acc1 -; CHECK-NEXT: addi r6, r6, 6 -; CHECK-NEXT: add r8, r5, r7 -; CHECK-NEXT: lxvx vs0, r5, r7 -; CHECK-NEXT: rldic r7, r4, 6, 26 -; CHECK-NEXT: addi r4, r4, 3 -; CHECK-NEXT: lxv vs1, 16(r8) +; CHECK-NEXT: rldic r6, r5, 6, 26 +; CHECK-NEXT: addi r5, r5, 3 +; CHECK-NEXT: add r7, r3, r6 ; CHECK-NEXT: xvf32gerpp acc2, vs0, vs1 -; CHECK-NEXT: lxv vs0, 32(r8) -; CHECK-NEXT: lxv vs1, 48(r8) +; CHECK-NEXT: lxv vs0, -16(r4) +; CHECK-NEXT: lxv vs1, 0(r4) ; CHECK-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-NEXT: lxv vs12, 64(r8) -; CHECK-NEXT: lxv vs13, 80(r8) +; CHECK-NEXT: lxv vs12, 16(r4) +; CHECK-NEXT: lxv vs13, 32(r4) ; CHECK-NEXT: xxsetaccz acc0 -; CHECK-NEXT: add r8, r3, r7 -; CHECK-NEXT: xxmfacc acc2 +; CHECK-NEXT: addi r4, r4, 96 ; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-NEXT: stxvx vs11, r3, r7 -; CHECK-NEXT: stxv vs8, 48(r8) +; CHECK-NEXT: xxmfacc acc2 ; CHECK-NEXT: xxmfacc acc1 -; CHECK-NEXT: stxv vs9, 32(r8) -; CHECK-NEXT: stxv vs10, 16(r8) -; CHECK-NEXT: stxv vs4, 112(r8) -; CHECK-NEXT: stxv vs5, 96(r8) +; CHECK-NEXT: stxvx vs11, r3, r6 +; CHECK-NEXT: stxv vs8, 48(r7) +; CHECK-NEXT: stxv vs9, 32(r7) +; CHECK-NEXT: stxv vs10, 16(r7) ; CHECK-NEXT: xxmfacc acc0 -; CHECK-NEXT: stxv vs6, 80(r8) -; CHECK-NEXT: stxv vs7, 64(r8) -; CHECK-NEXT: stxv vs0, 176(r8) -; CHECK-NEXT: stxv vs1, 160(r8) -; CHECK-NEXT: stxv vs2, 144(r8) -; CHECK-NEXT: stxv vs3, 128(r8) +; CHECK-NEXT: stxv vs4, 112(r7) +; CHECK-NEXT: stxv vs5, 96(r7) +; CHECK-NEXT: stxv vs6, 80(r7) +; CHECK-NEXT: stxv vs7, 64(r7) +; CHECK-NEXT: stxv vs0, 176(r7) +; CHECK-NEXT: stxv vs1, 160(r7) +; CHECK-NEXT: stxv vs2, 144(r7) +; CHECK-NEXT: stxv vs3, 128(r7) ; CHECK-NEXT: bdnz .LBB9_2 ; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: blr @@ -690,46 +688,44 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-BE-NEXT: cmpwi r4, 1 ; CHECK-BE-NEXT: bltlr cr0 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader -; CHECK-BE-NEXT: clrldi r4, r4, 32 -; CHECK-BE-NEXT: li r6, 0 -; CHECK-BE-NEXT: mtctr r4 -; CHECK-BE-NEXT: li r4, 0 +; CHECK-BE-NEXT: clrldi r6, r4, 32 +; CHECK-BE-NEXT: addi r4, r5, 48 +; CHECK-BE-NEXT: li r5, 0 +; CHECK-BE-NEXT: mtctr r6 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB9_2: # %for.body ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: rldic r7, r6, 4, 28 +; CHECK-BE-NEXT: lxv vs0, -48(r4) +; CHECK-BE-NEXT: lxv vs1, -32(r4) ; CHECK-BE-NEXT: xxsetaccz acc2 ; CHECK-BE-NEXT: xxsetaccz acc1 -; CHECK-BE-NEXT: addi r6, r6, 6 -; CHECK-BE-NEXT: add r8, r5, r7 -; CHECK-BE-NEXT: lxvx vs0, r5, r7 -; CHECK-BE-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-NEXT: addi r4, r4, 3 -; CHECK-BE-NEXT: lxv vs1, 16(r8) +; CHECK-BE-NEXT: rldic r6, r5, 6, 26 +; CHECK-BE-NEXT: addi r5, r5, 3 +; CHECK-BE-NEXT: add r7, r3, r6 ; CHECK-BE-NEXT: xvf32gerpp acc2, vs0, vs1 -; CHECK-BE-NEXT: lxv vs0, 32(r8) -; CHECK-BE-NEXT: lxv vs1, 48(r8) +; CHECK-BE-NEXT: lxv vs0, -16(r4) +; CHECK-BE-NEXT: lxv vs1, 0(r4) ; CHECK-BE-NEXT: xvf32gerpn acc1, vs0, vs1 -; CHECK-BE-NEXT: lxv vs12, 64(r8) -; CHECK-BE-NEXT: lxv vs13, 80(r8) +; CHECK-BE-NEXT: lxv vs12, 16(r4) +; CHECK-BE-NEXT: lxv vs13, 32(r4) ; CHECK-BE-NEXT: xxsetaccz acc0 -; CHECK-BE-NEXT: add r8, r3, r7 -; CHECK-BE-NEXT: xxmfacc acc2 +; CHECK-BE-NEXT: addi r4, r4, 96 ; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 -; CHECK-BE-NEXT: stxvx vs8, r3, r7 -; CHECK-BE-NEXT: stxv vs9, 16(r8) +; CHECK-BE-NEXT: xxmfacc acc2 ; CHECK-BE-NEXT: xxmfacc acc1 -; CHECK-BE-NEXT: stxv vs11, 48(r8) -; CHECK-BE-NEXT: stxv vs10, 32(r8) -; CHECK-BE-NEXT: stxv vs5, 80(r8) -; CHECK-BE-NEXT: stxv vs4, 64(r8) +; CHECK-BE-NEXT: stxvx vs8, r3, r6 +; CHECK-BE-NEXT: stxv vs9, 16(r7) +; CHECK-BE-NEXT: stxv vs11, 48(r7) +; CHECK-BE-NEXT: stxv vs10, 32(r7) ; CHECK-BE-NEXT: xxmfacc acc0 -; CHECK-BE-NEXT: stxv vs7, 112(r8) -; CHECK-BE-NEXT: stxv vs6, 96(r8) -; CHECK-BE-NEXT: stxv vs1, 144(r8) -; CHECK-BE-NEXT: stxv vs0, 128(r8) -; CHECK-BE-NEXT: stxv vs3, 176(r8) -; CHECK-BE-NEXT: stxv vs2, 160(r8) +; CHECK-BE-NEXT: stxv vs5, 80(r7) +; CHECK-BE-NEXT: stxv vs4, 64(r7) +; CHECK-BE-NEXT: stxv vs7, 112(r7) +; CHECK-BE-NEXT: stxv vs6, 96(r7) +; CHECK-BE-NEXT: stxv vs1, 144(r7) +; CHECK-BE-NEXT: stxv vs0, 128(r7) +; CHECK-BE-NEXT: stxv vs3, 176(r7) +; CHECK-BE-NEXT: stxv vs2, 160(r7) ; CHECK-BE-NEXT: bdnz .LBB9_2 ; CHECK-BE-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-NEXT: blr @@ -741,44 +737,42 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-WACC-NEXT: # %bb.1: # %for.body.preheader ; CHECK-WACC-NEXT: clrldi r4, r4, 32 ; CHECK-WACC-NEXT: mtctr r4 -; CHECK-WACC-NEXT: li r4, 0 -; CHECK-WACC-NEXT: li r6, 0 +; CHECK-WACC-NEXT: addi r4, r5, 48 +; CHECK-WACC-NEXT: li r5, 0 ; CHECK-WACC-NEXT: .p2align 4 ; CHECK-WACC-NEXT: .LBB9_2: # %for.body ; CHECK-WACC-NEXT: # -; CHECK-WACC-NEXT: rldic r7, r6, 4, 28 -; CHECK-WACC-NEXT: add r8, r5, r7 -; CHECK-WACC-NEXT: lxvx vs0, r5, r7 -; CHECK-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-WACC-NEXT: lxv vs0, -48(r4) +; CHECK-WACC-NEXT: lxv vs1, -32(r4) ; CHECK-WACC-NEXT: dmxxsetaccz wacc2 ; CHECK-WACC-NEXT: dmxxsetaccz wacc1 ; CHECK-WACC-NEXT: dmxxsetaccz wacc0 ; CHECK-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 -; CHECK-WACC-NEXT: lxv vs0, 32(r8) -; CHECK-WACC-NEXT: lxv vs1, 48(r8) -; CHECK-WACC-NEXT: rldic r7, r4, 6, 26 -; CHECK-WACC-NEXT: addi r4, r4, 3 -; CHECK-WACC-NEXT: addi r6, r6, 6 +; CHECK-WACC-NEXT: lxv vs0, -16(r4) +; CHECK-WACC-NEXT: lxv vs1, 0(r4) +; CHECK-WACC-NEXT: rldic r6, r5, 6, 26 +; CHECK-WACC-NEXT: add r7, r3, r6 +; CHECK-WACC-NEXT: addi r5, r5, 3 ; CHECK-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 -; CHECK-WACC-NEXT: lxv vs0, 64(r8) -; CHECK-WACC-NEXT: lxv vs1, 80(r8) -; CHECK-WACC-NEXT: add r8, r3, r7 +; CHECK-WACC-NEXT: lxv vs0, 16(r4) +; CHECK-WACC-NEXT: lxv vs1, 32(r4) +; CHECK-WACC-NEXT: addi r4, r4, 96 ; CHECK-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 ; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 -; CHECK-WACC-NEXT: stxvx v3, r3, r7 -; CHECK-WACC-NEXT: stxv v4, 48(r8) -; CHECK-WACC-NEXT: stxv v5, 32(r8) -; CHECK-WACC-NEXT: stxv v2, 16(r8) +; CHECK-WACC-NEXT: stxvx v3, r3, r6 +; CHECK-WACC-NEXT: stxv v4, 48(r7) +; CHECK-WACC-NEXT: stxv v5, 32(r7) +; CHECK-WACC-NEXT: stxv v2, 16(r7) ; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-WACC-NEXT: stxv v4, 112(r8) -; CHECK-WACC-NEXT: stxv v5, 96(r8) -; CHECK-WACC-NEXT: stxv v2, 80(r8) -; CHECK-WACC-NEXT: stxv v3, 64(r8) +; CHECK-WACC-NEXT: stxv v4, 112(r7) +; CHECK-WACC-NEXT: stxv v5, 96(r7) +; CHECK-WACC-NEXT: stxv v2, 80(r7) +; CHECK-WACC-NEXT: stxv v3, 64(r7) ; CHECK-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-WACC-NEXT: stxv v4, 176(r8) -; CHECK-WACC-NEXT: stxv v5, 160(r8) -; CHECK-WACC-NEXT: stxv v2, 144(r8) -; CHECK-WACC-NEXT: stxv v3, 128(r8) +; CHECK-WACC-NEXT: stxv v4, 176(r7) +; CHECK-WACC-NEXT: stxv v5, 160(r7) +; CHECK-WACC-NEXT: stxv v2, 144(r7) +; CHECK-WACC-NEXT: stxv v3, 128(r7) ; CHECK-WACC-NEXT: bdnz .LBB9_2 ; CHECK-WACC-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-WACC-NEXT: blr @@ -790,44 +784,42 @@ define void @testcse4(ptr %res, i32 %lim, ptr %vc) { ; CHECK-BE-WACC-NEXT: # %bb.1: # %for.body.preheader ; CHECK-BE-WACC-NEXT: clrldi r4, r4, 32 ; CHECK-BE-WACC-NEXT: mtctr r4 -; CHECK-BE-WACC-NEXT: li r4, 0 -; CHECK-BE-WACC-NEXT: li r6, 0 +; CHECK-BE-WACC-NEXT: addi r4, r5, 48 +; CHECK-BE-WACC-NEXT: li r5, 0 ; CHECK-BE-WACC-NEXT: .p2align 4 ; CHECK-BE-WACC-NEXT: .LBB9_2: # %for.body ; CHECK-BE-WACC-NEXT: # -; CHECK-BE-WACC-NEXT: rldic r7, r6, 4, 28 -; CHECK-BE-WACC-NEXT: add r8, r5, r7 -; CHECK-BE-WACC-NEXT: lxvx vs0, r5, r7 -; CHECK-BE-WACC-NEXT: lxv vs1, 16(r8) +; CHECK-BE-WACC-NEXT: lxv vs0, -48(r4) +; CHECK-BE-WACC-NEXT: lxv vs1, -32(r4) ; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc2 ; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc1 ; CHECK-BE-WACC-NEXT: dmxxsetaccz wacc0 ; CHECK-BE-WACC-NEXT: xvf32gerpp wacc2, vs0, vs1 -; CHECK-BE-WACC-NEXT: lxv vs0, 32(r8) -; CHECK-BE-WACC-NEXT: lxv vs1, 48(r8) -; CHECK-BE-WACC-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-WACC-NEXT: addi r4, r4, 3 -; CHECK-BE-WACC-NEXT: addi r6, r6, 6 +; CHECK-BE-WACC-NEXT: lxv vs0, -16(r4) +; CHECK-BE-WACC-NEXT: lxv vs1, 0(r4) +; CHECK-BE-WACC-NEXT: rldic r6, r5, 6, 26 +; CHECK-BE-WACC-NEXT: add r7, r3, r6 +; CHECK-BE-WACC-NEXT: addi r5, r5, 3 ; CHECK-BE-WACC-NEXT: xvf32gerpn wacc1, vs0, vs1 -; CHECK-BE-WACC-NEXT: lxv vs0, 64(r8) -; CHECK-BE-WACC-NEXT: lxv vs1, 80(r8) -; CHECK-BE-WACC-NEXT: add r8, r3, r7 +; CHECK-BE-WACC-NEXT: lxv vs0, 16(r4) +; CHECK-BE-WACC-NEXT: lxv vs1, 32(r4) +; CHECK-BE-WACC-NEXT: addi r4, r4, 96 ; CHECK-BE-WACC-NEXT: xvf32gernp wacc0, vs0, vs1 ; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc2, 0 -; CHECK-BE-WACC-NEXT: stxvx v2, r3, r7 -; CHECK-BE-WACC-NEXT: stxv v5, 48(r8) -; CHECK-BE-WACC-NEXT: stxv v4, 32(r8) -; CHECK-BE-WACC-NEXT: stxv v3, 16(r8) +; CHECK-BE-WACC-NEXT: stxvx v2, r3, r6 +; CHECK-BE-WACC-NEXT: stxv v5, 48(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 32(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 16(r7) ; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc1, 0 -; CHECK-BE-WACC-NEXT: stxv v5, 112(r8) -; CHECK-BE-WACC-NEXT: stxv v4, 96(r8) -; CHECK-BE-WACC-NEXT: stxv v3, 80(r8) -; CHECK-BE-WACC-NEXT: stxv v2, 64(r8) +; CHECK-BE-WACC-NEXT: stxv v5, 112(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 96(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 80(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 64(r7) ; CHECK-BE-WACC-NEXT: dmxxextfdmr512 vsp34, vsp36, wacc0, 0 -; CHECK-BE-WACC-NEXT: stxv v5, 176(r8) -; CHECK-BE-WACC-NEXT: stxv v4, 160(r8) -; CHECK-BE-WACC-NEXT: stxv v3, 144(r8) -; CHECK-BE-WACC-NEXT: stxv v2, 128(r8) +; CHECK-BE-WACC-NEXT: stxv v5, 176(r7) +; CHECK-BE-WACC-NEXT: stxv v4, 160(r7) +; CHECK-BE-WACC-NEXT: stxv v3, 144(r7) +; CHECK-BE-WACC-NEXT: stxv v2, 128(r7) ; CHECK-BE-WACC-NEXT: bdnz .LBB9_2 ; CHECK-BE-WACC-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-BE-WACC-NEXT: blr diff --git a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll index 3f75d0c9880f8..5eebd75411056 100644 --- a/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll +++ b/llvm/test/Transforms/IndVarSimplify/eliminate-exit-no-dl.ll @@ -15,7 +15,8 @@ define void @foo() { ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: ; CHECK-NEXT: [[TMP6:%.*]] = load i8, ptr getelementptr inbounds nuw (i8, ptr @global, i64 1), align 1 -; CHECK-NEXT: br i1 false, label [[BB7:%.*]], label [[BB11:%.*]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt ptr getelementptr (i8, ptr @global, i64 1), getelementptr (i8, ptr @global, i64 500) +; CHECK-NEXT: br i1 [[TMP5]], label [[BB7:%.*]], label [[BB11:%.*]] ; CHECK: bb7: ; CHECK-NEXT: [[TMP8:%.*]] = zext i8 [[TMP6]] to i64 ; CHECK-NEXT: br i1 true, label [[BB11]], label [[BB3]] diff --git a/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll b/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll index 91f3b850e0a20..c5c492ae7956a 100644 --- a/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll +++ b/llvm/test/Transforms/LoopInterchange/fixed-size-no-signed-wrap.ll @@ -12,36 +12,46 @@ ; end do ; end do -; FIXME: We currently fail to interchange this. define void @fixed_size_5x5(ptr noalias %A) { ; CHECK-LABEL: define void @fixed_size_5x5( ; CHECK-SAME: ptr noalias [[A:%.*]]) { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[OUTER_HEADER:.*]] -; CHECK: [[OUTER_HEADER]]: -; CHECK-NEXT: [[I_COUNT:%.*]] = phi i64 [ 5, %[[ENTRY]] ], [ [[I_COUNT_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i32 [ 1, %[[ENTRY]] ], [ [[I_NEXT:%.*]], %[[OUTER_LATCH]] ] +; CHECK: [[OUTER_HEADER_PREHEADER:.*]]: +; CHECK-NEXT: br label %[[OUTER_HEADER1:.*]] +; CHECK: [[OUTER_HEADER1]]: +; CHECK-NEXT: [[I_COUNT:%.*]] = phi i64 [ [[I_COUNT_NEXT:%.*]], %[[OUTER_LATCH:.*]] ], [ 5, %[[OUTER_HEADER_PREHEADER]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_NEXT:%.*]], %[[OUTER_LATCH]] ], [ 1, %[[OUTER_HEADER_PREHEADER]] ] ; CHECK-NEXT: [[I_EXT:%.*]] = zext nneg i32 [[I]] to i64 ; CHECK-NEXT: [[ROW_GEP:%.*]] = getelementptr [4 x i8], ptr [[A]], i64 [[I_EXT]] ; CHECK-NEXT: br label %[[INNER:.*]] +; CHECK: [[OUTER_HEADER]]: +; CHECK-NEXT: br label %[[INNER1:.*]] +; CHECK: [[INNER1]]: +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], %[[INNER_SPLIT:.*]] ], [ 1, %[[OUTER_HEADER]] ] +; CHECK-NEXT: [[J_COUNT:%.*]] = phi i64 [ [[TMP1:%.*]], %[[INNER_SPLIT]] ], [ 5, %[[OUTER_HEADER]] ] +; CHECK-NEXT: br label %[[OUTER_HEADER_PREHEADER]] ; CHECK: [[INNER]]: -; CHECK-NEXT: [[J:%.*]] = phi i64 [ 1, %[[OUTER_HEADER]] ], [ [[J_NEXT:%.*]], %[[INNER]] ] -; CHECK-NEXT: [[J_COUNT:%.*]] = phi i64 [ 5, %[[OUTER_HEADER]] ], [ [[J_COUNT_NEXT:%.*]], %[[INNER]] ] ; CHECK-NEXT: [[COL_OFF:%.*]] = mul nuw nsw i64 [[J]], 20 ; CHECK-NEXT: [[ELT_GEP:%.*]] = getelementptr i8, ptr [[ROW_GEP]], i64 [[COL_OFF]] ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i8, ptr [[ELT_GEP]], i64 -24 ; CHECK-NEXT: [[V:%.*]] = load float, ptr [[ADDR]], align 4 ; CHECK-NEXT: [[INC:%.*]] = fadd contract float [[V]], 1.000000e+00 ; CHECK-NEXT: store float [[INC]], ptr [[ADDR]], align 4 -; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1 -; CHECK-NEXT: [[J_COUNT_NEXT]] = add nsw i64 [[J_COUNT]], -1 +; CHECK-NEXT: [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[J_COUNT_NEXT:%.*]] = add nsw i64 [[J_COUNT]], -1 ; CHECK-NEXT: [[J_DONE:%.*]] = icmp eq i64 [[J_COUNT_NEXT]], 0 -; CHECK-NEXT: br i1 [[J_DONE]], label %[[OUTER_LATCH]], label %[[INNER]] +; CHECK-NEXT: br label %[[OUTER_LATCH]] +; CHECK: [[INNER_SPLIT]]: +; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[TMP1]] = add nsw i64 [[J_COUNT]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[EXIT:.*]], label %[[INNER1]] ; CHECK: [[OUTER_LATCH]]: ; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i32 [[I]], 1 ; CHECK-NEXT: [[I_COUNT_NEXT]] = add nsw i64 [[I_COUNT]], -1 ; CHECK-NEXT: [[I_CMP:%.*]] = icmp sgt i64 [[I_COUNT]], 1 -; CHECK-NEXT: br i1 [[I_CMP]], label %[[OUTER_HEADER]], label %[[EXIT:.*]] +; CHECK-NEXT: br i1 [[I_CMP]], label %[[OUTER_HEADER1]], label %[[INNER_SPLIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll index 1c67b243429b5..4dcf948d4a23e 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/masked_gather_scatter.ll @@ -17,33 +17,18 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-LABEL: @foo4( ; RV32-NEXT: entry: ; RV32-NEXT: br label [[VECTOR_MEMCHECK:%.*]] -; RV32: vector.scevcheck: -; RV32-NEXT: [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 128, i32 624) -; RV32-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL]], 0 -; RV32-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL]], 1 -; RV32-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 [[MUL_RESULT]] -; RV32-NEXT: [[TMP1:%.*]] = icmp ult ptr [[TMP0]], [[A]] -; RV32-NEXT: [[TMP2:%.*]] = or i1 [[TMP1]], [[MUL_OVERFLOW]] -; RV32-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 256, i32 624) -; RV32-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 -; RV32-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 -; RV32-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 [[MUL_RESULT2]] -; RV32-NEXT: [[TMP4:%.*]] = icmp ult ptr [[TMP3]], [[B]] -; RV32-NEXT: [[TMP5:%.*]] = or i1 [[TMP4]], [[MUL_OVERFLOW3]] -; RV32-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[TMP5]] -; RV32-NEXT: br i1 [[TMP6]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK1:%.*]] ; RV32: vector.memcheck: +; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A:%.*]], i32 79880 ; RV32-NEXT: [[SCEVGEP1:%.*]] = getelementptr i8, ptr [[TRIGGER:%.*]], i32 39940 -; RV32-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[A]], i32 79880 -; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B]], i32 159752 -; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] -; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; RV32-NEXT: [[SCEVGEP2:%.*]] = getelementptr i8, ptr [[B:%.*]], i32 159752 +; RV32-NEXT: [[BOUND0:%.*]] = icmp ult ptr [[A]], [[SCEVGEP1]] +; RV32-NEXT: [[BOUND1:%.*]] = icmp ult ptr [[TRIGGER]], [[SCEVGEP]] ; RV32-NEXT: [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]] ; RV32-NEXT: [[BOUND03:%.*]] = icmp ult ptr [[A]], [[SCEVGEP2]] ; RV32-NEXT: [[BOUND14:%.*]] = icmp ult ptr [[B]], [[SCEVGEP]] ; RV32-NEXT: [[FOUND_CONFLICT5:%.*]] = and i1 [[BOUND03]], [[BOUND14]] ; RV32-NEXT: [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT5]] -; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; RV32-NEXT: br i1 [[CONFLICT_RDX]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; RV32: vector.ph: ; RV32-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i64() ; RV32-NEXT: [[TMP9:%.*]] = mul nuw nsw [[TMP7]], splat (i64 16) @@ -58,22 +43,22 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP11]], i64 0 ; RV32-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; RV32-NEXT: [[TMP12:%.*]] = trunc i64 [[INDEX]] to i32 -; RV32-NEXT: [[TMP13:%.*]] = shl i32 [[TMP12]], 6 -; RV32-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TRIGGER]], i32 [[TMP13]] -; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr align 4 [[TMP15]], i32 64, splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] +; RV32-NEXT: [[TMP6:%.*]] = shl nuw i32 [[TMP12]], 6 +; RV32-NEXT: [[TMP13:%.*]] = getelementptr nuw i8, ptr [[TRIGGER]], i32 [[TMP6]] +; RV32-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.experimental.vp.strided.load.nxv2i32.p0.i32(ptr align 4 [[TMP13]], i32 64, splat (i1 true), i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]] ; RV32-NEXT: [[TMP14:%.*]] = icmp slt [[WIDE_MASKED_GATHER]], splat (i32 100) ; RV32-NEXT: [[TMP20:%.*]] = shl i32 [[TMP12]], 8 ; RV32-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[B]], i32 [[TMP20]] -; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr align 8 [[TMP25]], i32 256, [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]] +; RV32-NEXT: [[WIDE_MASKED_GATHER6:%.*]] = call @llvm.experimental.vp.strided.load.nxv2f64.p0.i32(ptr align 8 [[TMP25]], i32 256, [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]] ; RV32-NEXT: [[TMP17:%.*]] = sitofp [[WIDE_MASKED_GATHER]] to ; RV32-NEXT: [[TMP18:%.*]] = fadd [[WIDE_MASKED_GATHER6]], [[TMP17]] ; RV32-NEXT: [[TMP19:%.*]] = getelementptr inbounds double, ptr [[A]], [[VEC_IND]] -; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META3]], !noalias [[META5]] +; RV32-NEXT: call void @llvm.vp.scatter.nxv2f64.nxv2p0( [[TMP18]], align 8 [[TMP19]], [[TMP14]], i32 [[TMP10]]), !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]] ; RV32-NEXT: [[CURRENT_ITERATION_NEXT]] = add nuw i64 [[TMP8]], [[INDEX]] ; RV32-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP8]] ; RV32-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] ; RV32-NEXT: [[TMP24:%.*]] = icmp eq i64 [[AVL_NEXT]], 0 -; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] +; RV32-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; RV32: middle.block: ; RV32-NEXT: br label [[FOR_END:%.*]] ; RV32: scalar.ph: @@ -96,7 +81,7 @@ define void @foo4(ptr nocapture %A, ptr nocapture readonly %B, ptr nocapture rea ; RV32: for.inc: ; RV32-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 16 ; RV32-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], 10000 -; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP10:![0-9]+]] +; RV32-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP11:![0-9]+]] ; RV32: for.end: ; RV32-NEXT: ret void ; diff --git a/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll index fb40384873a19..a63cced82cde2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cast-costs.ll @@ -101,31 +101,14 @@ define void @replicate_sext(i32 %N, ptr %dst, ptr %src) #0 { ; CHECK-SAME: i32 [[N:%.*]], ptr [[DST:%.*]], ptr [[SRC:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*]]: ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 40 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 16 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] ; CHECK: [[VECTOR_SCEVCHECK]]: -; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 12, i64 [[TMP1]]) -; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[MUL_RESULT]] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[TMP2]], [[DST]] -; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP3]], [[MUL_OVERFLOW]] ; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 4, i32 [[N]]) ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0 ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1 ; CHECK-NEXT: [[TMP5:%.*]] = icmp slt i32 [[MUL_RESULT2]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] -; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[SRC]], i64 4 -; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[N]] to i64 -; CHECK-NEXT: [[MUL4:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP7]]) -; CHECK-NEXT: [[MUL_RESULT5:%.*]] = extractvalue { i64, i1 } [[MUL4]], 0 -; CHECK-NEXT: [[MUL_OVERFLOW6:%.*]] = extractvalue { i64, i1 } [[MUL4]], 1 -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SCEVGEP]], i64 [[MUL_RESULT5]] -; CHECK-NEXT: [[TMP9:%.*]] = icmp ult ptr [[TMP8]], [[SCEVGEP]] -; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP9]], [[MUL_OVERFLOW6]] -; CHECK-NEXT: [[TMP11:%.*]] = or i1 [[TMP4]], [[TMP6]] -; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP11]], [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = or i1 [[TMP5]], [[MUL_OVERFLOW3]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[SCALAR_PH]], label %[[VECTOR_MEMCHECK:.*]] ; CHECK: [[VECTOR_MEMCHECK]]: ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[N]] to i64 @@ -168,13 +151,13 @@ define void @replicate_sext(i32 %N, ptr %dst, ptr %src) #0 { ; CHECK-NEXT: br label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL10:%.*]] = phi i32 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] -; CHECK-NEXT: [[BC_RESUME_VAL11:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[TMP20]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[TMP21]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: ; CHECK-NEXT: [[IV_1:%.*]] = phi i32 [ [[IV_1_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL10]], %[[SCALAR_PH]] ] -; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL11]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[IV_2_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL3]], %[[SCALAR_PH]] ] +; CHECK-NEXT: [[IV_3:%.*]] = phi i32 [ [[IV_3_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL4]], %[[SCALAR_PH]] ] ; CHECK-NEXT: [[IV_2_EXT:%.*]] = sext i32 [[IV_2]] to i64 ; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr nusw i32, ptr [[SRC]], i64 [[IV_2_EXT]] ; CHECK-NEXT: [[L_0:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll index 9253d72a10a45..319dc7e595923 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll @@ -223,35 +223,33 @@ define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[II]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ [[BROADCAST_SPLAT]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP10]], align 8 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP11]], align 8 ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i64> [[WIDE_LOAD]], [[WIDE_LOAD1]] -; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) -; CHECK-NEXT: [[TMP5]] = select i1 [[TMP4]], <4 x i1> [[TMP2]], <4 x i1> [[LAST_ACTIVE_MASK]] -; CHECK-NEXT: [[TMP6]] = select i1 [[TMP4]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] +; CHECK-NEXT: [[TMP3]] = or <4 x i1> [[VEC_PHI1]], [[TMP2]] +; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP2]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[BROADCAST_SPLAT]], i64 0 -; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> [[TMP6]], <4 x i1> [[TMP5]], i64 [[TMP8]]) +; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP4]]) +; CHECK-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]]) +; CHECK-NEXT: [[TMP8:%.*]] = freeze i1 [[TMP9]] +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP8]], i64 [[TMP6]], i64 [[II]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ [[II]], %[[ENTRY]] ] ; CHECK-NEXT: br label %[[FOR_BODY:.*]] ; CHECK: [[FOR_BODY]]: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ] @@ -266,7 +264,7 @@ define i64 @select_icmp_noflag(ptr %a, ptr %b, i64 %ii, i64 %n) { ; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] ; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK: [[EXIT]]: -; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[TMP9]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[COND_LCSSA:%.*]] = phi i64 [ [[COND]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[COND_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll index 9840d60d46200..7a2f26c43c333 100644 --- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll +++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll @@ -959,27 +959,28 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC1: [[VECTOR_BODY]]: ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[ARRAYIDX]], align 4 ; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD]], splat (i32 3) -; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]] -; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) -; CHECK-VF4IC1-NEXT: [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]] -; CHECK-VF4IC1-NEXT: [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI1]], [[TMP8]] +; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: -; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 331) +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP9]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 331 ; CHECK-VF4IC1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-VF4IC1-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK-VF4IC1: [[SCALAR_PH]]: ; CHECK-VF4IC1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] -; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] +; CHECK-VF4IC1-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: ; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] @@ -993,7 +994,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF4IC1-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] ; CHECK-VF4IC1-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC1: [[EXIT_LOOPEXIT]]: -; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC1-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF4IC1-NEXT: br label %[[EXIT]] ; CHECK-VF4IC1: [[EXIT]]: ; CHECK-VF4IC1-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] @@ -1014,14 +1015,14 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC4: [[VECTOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 331), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) @@ -1038,36 +1039,34 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[TMP33:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD4]], splat (i32 3) ; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD5]], splat (i32 3) ; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = icmp sgt <4 x i32> [[WIDE_LOAD6]], splat (i32 3) -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP1]] -; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP33]] -; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP2]], [[TMP13]] -; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]] -; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP14]], [[TMP15]] -; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP11]] -; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) -; CHECK-VF4IC4-NEXT: [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP1]], <4 x i1> [[LAST_ACTIVE_MASK]] -; CHECK-VF4IC4-NEXT: [[TMP21]] = select i1 [[TMP3]], <4 x i1> [[TMP33]], <4 x i1> [[TMP12]] -; CHECK-VF4IC4-NEXT: [[TMP22]] = select i1 [[TMP3]], <4 x i1> [[TMP10]], <4 x i1> [[TMP19]] -; CHECK-VF4IC4-NEXT: [[TMP23]] = select i1 [[TMP3]], <4 x i1> [[TMP11]], <4 x i1> [[TMP20]] -; CHECK-VF4IC4-NEXT: [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] -; CHECK-VF4IC4-NEXT: [[TMP25]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] -; CHECK-VF4IC4-NEXT: [[TMP26]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] -; CHECK-VF4IC4-NEXT: [[TMP27]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI4]], [[TMP1]] +; CHECK-VF4IC4-NEXT: [[TMP20]] = or <4 x i1> [[VEC_PHI5]], [[TMP33]] +; CHECK-VF4IC4-NEXT: [[TMP21]] = or <4 x i1> [[VEC_PHI6]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP22]] = or <4 x i1> [[VEC_PHI7]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP33]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] +; CHECK-VF4IC4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: -; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 331) -; CHECK-VF4IC4-NEXT: [[TMP30:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP25]], <4 x i1> [[TMP21]], i32 [[TMP7]]) -; CHECK-VF4IC4-NEXT: [[TMP31:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP26]], <4 x i1> [[TMP22]], i32 [[TMP30]]) -; CHECK-VF4IC4-NEXT: [[TMP32:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP27]], <4 x i1> [[TMP23]], i32 [[TMP31]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP14]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX11]], <4 x i32> [[TMP15]]) +; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_MINMAX12]]) +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP20]], [[TMP16]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX13:%.*]] = or <4 x i1> [[TMP21]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX14:%.*]] = or <4 x i1> [[TMP22]], [[BIN_RDX13]] +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX14]]) +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 331 ; CHECK-VF4IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-VF4IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK-VF4IC4: [[SCALAR_PH]]: ; CHECK-VF4IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] -; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP32]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] +; CHECK-VF4IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] @@ -1081,7 +1080,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF4IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] ; CHECK-VF4IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF4IC4: [[EXIT_LOOPEXIT]]: -; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[TMP32]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF4IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY]] ], [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF4IC4-NEXT: br label %[[EXIT]] ; CHECK-VF4IC4: [[EXIT]]: ; CHECK-VF4IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] @@ -1102,19 +1101,18 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF1IC4: [[FOR_BODY]]: ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP35:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP36:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP37:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 331, %[[VECTOR_PH]] ], [ [[TMP38:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP33:%.*]], %[[FOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP34:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP24:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI5:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI6:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP29:%.*]], %[[FOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI7:%.*]] = phi i1 [ false, %[[VECTOR_PH]] ], [ [[TMP30:%.*]], %[[FOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 1 ; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[IV]], 2 ; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[IV]], 3 ; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 0 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], 1 ; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = add i32 [[TMP7]], 2 ; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = add i32 [[TMP7]], 3 @@ -1130,34 +1128,31 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = icmp sgt i32 [[TMP17]], 3 ; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = icmp sgt i32 [[TMP18]], 3 ; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = icmp sgt i32 [[TMP19]], 3 -; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = freeze i1 [[CMP1]] -; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[TMP21]] -; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]] -; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = freeze i1 [[TMP22]] -; CHECK-VF1IC4-NEXT: [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]] -; CHECK-VF1IC4-NEXT: [[TMP29:%.*]] = freeze i1 [[TMP23]] -; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = or i1 [[TMP28]], [[TMP29]] -; CHECK-VF1IC4-NEXT: [[TMP31]] = select i1 [[TMP30]], i1 [[CMP1]], i1 [[TMP12]] -; CHECK-VF1IC4-NEXT: [[TMP32]] = select i1 [[TMP30]], i1 [[TMP21]], i1 [[TMP1]] -; CHECK-VF1IC4-NEXT: [[TMP33]] = select i1 [[TMP30]], i1 [[TMP22]], i1 [[TMP2]] -; CHECK-VF1IC4-NEXT: [[TMP34]] = select i1 [[TMP30]], i1 [[TMP23]], i1 [[TMP3]] -; CHECK-VF1IC4-NEXT: [[TMP35]] = select i1 [[TMP30]], i32 [[TMP8]], i32 [[VEC_PHI]] -; CHECK-VF1IC4-NEXT: [[TMP36]] = select i1 [[TMP30]], i32 [[TMP9]], i32 [[VEC_PHI1]] -; CHECK-VF1IC4-NEXT: [[TMP37]] = select i1 [[TMP30]], i32 [[TMP10]], i32 [[VEC_PHI2]] -; CHECK-VF1IC4-NEXT: [[TMP38]] = select i1 [[TMP30]], i32 [[TMP11]], i32 [[VEC_PHI3]] +; CHECK-VF1IC4-NEXT: [[TMP28]] = or i1 [[VEC_PHI4]], [[CMP1]] +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI5]], [[TMP21]] +; CHECK-VF1IC4-NEXT: [[TMP29]] = or i1 [[VEC_PHI6]], [[TMP22]] +; CHECK-VF1IC4-NEXT: [[TMP30]] = or i1 [[VEC_PHI7]], [[TMP23]] +; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[CMP1]], i32 [[TMP7]], i32 [[VEC_PHI]] +; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP21]], i32 [[TMP9]], i32 [[VEC_PHI1]] +; CHECK-VF1IC4-NEXT: [[TMP25]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[VEC_PHI2]] +; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[VEC_PHI3]] ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-VF1IC4-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-VF1IC4-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] ; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP27]], i32 [[TMP24]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX8:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX]], i32 [[TMP25]]) +; CHECK-VF1IC4-NEXT: [[TMP35:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX8]], i32 [[TMP26]]) +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP20]], [[TMP28]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX10:%.*]] = or i1 [[TMP29]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX11:%.*]] = or i1 [[TMP30]], [[BIN_RDX10]] +; CHECK-VF1IC4-NEXT: [[TMP31:%.*]] = freeze i1 [[BIN_RDX11]] ; CHECK-VF1IC4-NEXT: [[TMP40:%.*]] = select i1 [[TMP31]], i32 [[TMP35]], i32 331 -; CHECK-VF1IC4-NEXT: [[TMP41:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP40]] -; CHECK-VF1IC4-NEXT: [[TMP42:%.*]] = select i1 [[TMP33]], i32 [[TMP37]], i32 [[TMP41]] -; CHECK-VF1IC4-NEXT: [[TMP43:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP42]] ; CHECK-VF1IC4-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]] ; CHECK-VF1IC4-NEXT: br i1 [[CMP_N]], label %[[EXIT_LOOPEXIT:.*]], label %[[SCALAR_PH]] ; CHECK-VF1IC4: [[SCALAR_PH]]: ; CHECK-VF1IC4-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_BODY_PREHEADER]] ] -; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP43]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] +; CHECK-VF1IC4-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP40]], %[[MIDDLE_BLOCK]] ], [ 331, %[[FOR_BODY_PREHEADER]] ] ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY1:.*]] ; CHECK-VF1IC4: [[FOR_BODY1]]: ; CHECK-VF1IC4-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY1]] ] @@ -1171,7 +1166,7 @@ define i32 @select_icmp_const_truncated_iv_unsigned_loop_guard(ptr %a, i32 %n) { ; CHECK-VF1IC4-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[WIDE_TRIP_COUNT]] ; CHECK-VF1IC4-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT_LOOPEXIT]], label %[[FOR_BODY1]], !llvm.loop [[LOOP10:![0-9]+]] ; CHECK-VF1IC4: [[EXIT_LOOPEXIT]]: -; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY1]] ], [ [[TMP43]], %[[MIDDLE_BLOCK]] ] +; CHECK-VF1IC4-NEXT: [[SPEC_SELECT_LCSSA:%.*]] = phi i32 [ [[SPEC_SELECT]], %[[FOR_BODY1]] ], [ [[TMP40]], %[[MIDDLE_BLOCK]] ] ; CHECK-VF1IC4-NEXT: br label %[[EXIT]] ; CHECK-VF1IC4: [[EXIT]]: ; CHECK-VF1IC4-NEXT: [[RDX_LCSSA:%.*]] = phi i32 [ 331, %[[ENTRY]] ], [ [[SPEC_SELECT_LCSSA]], %[[EXIT_LOOPEXIT]] ] @@ -1667,28 +1662,29 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { ; CHECK-VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC1: [[VECTOR_BODY]]: ; CHECK-VF4IC1-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP3:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP2:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]] ; CHECK-VF4IC1-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[ARRAYIDX]], align 4 ; CHECK-VF4IC1-NEXT: [[TMP8:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD]], zeroinitializer -; CHECK-VF4IC1-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]] -; CHECK-VF4IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) -; CHECK-VF4IC1-NEXT: [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]] -; CHECK-VF4IC1-NEXT: [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC1-NEXT: [[TMP2]] = or <4 x i1> [[VEC_PHI1]], [[TMP8]] +; CHECK-VF4IC1-NEXT: [[TMP3]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] ; CHECK-VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-VF4IC1-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC1-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC1: [[MIDDLE_BLOCK]]: -; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 -1) +; CHECK-VF4IC1-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]]) +; CHECK-VF4IC1-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]]) +; CHECK-VF4IC1-NEXT: [[TMP7:%.*]] = freeze i1 [[TMP9]] +; CHECK-VF4IC1-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 [[TMP5]], i32 -1 ; CHECK-VF4IC1-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK-VF4IC1: [[SCALAR_PH]]: ; CHECK-VF4IC1-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC1: [[FOR_BODY]]: ; CHECK-VF4IC1-NEXT: [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[TMP7]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC1-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC1-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]] ; CHECK-VF4IC1-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 ; CHECK-VF4IC1-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 @@ -1709,14 +1705,14 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF4IC4: [[VECTOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ splat (i32 -1), %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[LAST_ACTIVE_MASK:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP4:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP12:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[TMP20:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP12:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP13:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP14:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP15:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP16:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI5:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP21:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[VEC_PHI7:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP22:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) @@ -1733,37 +1729,35 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { ; CHECK-VF4IC4-NEXT: [[TMP9:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD4]], zeroinitializer ; CHECK-VF4IC4-NEXT: [[TMP10:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD5]], zeroinitializer ; CHECK-VF4IC4-NEXT: [[TMP11:%.*]] = fcmp fast olt <4 x float> [[WIDE_LOAD6]], zeroinitializer -; CHECK-VF4IC4-NEXT: [[TMP2:%.*]] = freeze <4 x i1> [[TMP8]] -; CHECK-VF4IC4-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP9]] -; CHECK-VF4IC4-NEXT: [[TMP14:%.*]] = or <4 x i1> [[TMP2]], [[TMP13]] -; CHECK-VF4IC4-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP10]] -; CHECK-VF4IC4-NEXT: [[TMP16:%.*]] = or <4 x i1> [[TMP14]], [[TMP15]] -; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP11]] -; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = or <4 x i1> [[TMP16]], [[TMP17]] -; CHECK-VF4IC4-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP18]]) -; CHECK-VF4IC4-NEXT: [[TMP4]] = select i1 [[TMP3]], <4 x i1> [[TMP8]], <4 x i1> [[LAST_ACTIVE_MASK]] -; CHECK-VF4IC4-NEXT: [[TMP21]] = select i1 [[TMP3]], <4 x i1> [[TMP9]], <4 x i1> [[TMP12]] -; CHECK-VF4IC4-NEXT: [[TMP22]] = select i1 [[TMP3]], <4 x i1> [[TMP10]], <4 x i1> [[TMP19]] -; CHECK-VF4IC4-NEXT: [[TMP23]] = select i1 [[TMP3]], <4 x i1> [[TMP11]], <4 x i1> [[TMP20]] -; CHECK-VF4IC4-NEXT: [[TMP5]] = select i1 [[TMP3]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] -; CHECK-VF4IC4-NEXT: [[TMP25]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] -; CHECK-VF4IC4-NEXT: [[TMP26]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] -; CHECK-VF4IC4-NEXT: [[TMP27]] = select i1 [[TMP3]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] +; CHECK-VF4IC4-NEXT: [[TMP16]] = or <4 x i1> [[VEC_PHI4]], [[TMP8]] +; CHECK-VF4IC4-NEXT: [[TMP20]] = or <4 x i1> [[VEC_PHI5]], [[TMP9]] +; CHECK-VF4IC4-NEXT: [[TMP21]] = or <4 x i1> [[VEC_PHI6]], [[TMP10]] +; CHECK-VF4IC4-NEXT: [[TMP22]] = or <4 x i1> [[VEC_PHI7]], [[TMP11]] +; CHECK-VF4IC4-NEXT: [[TMP12]] = select <4 x i1> [[TMP8]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-VF4IC4-NEXT: [[TMP13]] = select <4 x i1> [[TMP9]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI1]] +; CHECK-VF4IC4-NEXT: [[TMP14]] = select <4 x i1> [[TMP10]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI2]] +; CHECK-VF4IC4-NEXT: [[TMP15]] = select <4 x i1> [[TMP11]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI3]] ; CHECK-VF4IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 16 ; CHECK-VF4IC4-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-VF4IC4-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF4IC4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF4IC4: [[MIDDLE_BLOCK]]: -; CHECK-VF4IC4-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP5]], <4 x i1> [[TMP4]], i32 -1) -; CHECK-VF4IC4-NEXT: [[TMP30:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP25]], <4 x i1> [[TMP21]], i32 [[TMP7]]) -; CHECK-VF4IC4-NEXT: [[TMP31:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP26]], <4 x i1> [[TMP22]], i32 [[TMP30]]) -; CHECK-VF4IC4-NEXT: [[TMP32:%.*]] = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> [[TMP27]], <4 x i1> [[TMP23]], i32 [[TMP31]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[TMP12]], <4 x i32> [[TMP13]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX11:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX]], <4 x i32> [[TMP14]]) +; CHECK-VF4IC4-NEXT: [[RDX_MINMAX12:%.*]] = call <4 x i32> @llvm.umax.v4i32(<4 x i32> [[RDX_MINMAX11]], <4 x i32> [[TMP15]]) +; CHECK-VF4IC4-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> [[RDX_MINMAX12]]) +; CHECK-VF4IC4-NEXT: [[BIN_RDX:%.*]] = or <4 x i1> [[TMP20]], [[TMP16]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX13:%.*]] = or <4 x i1> [[TMP21]], [[BIN_RDX]] +; CHECK-VF4IC4-NEXT: [[BIN_RDX14:%.*]] = or <4 x i1> [[TMP22]], [[BIN_RDX13]] +; CHECK-VF4IC4-NEXT: [[TMP18:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[BIN_RDX14]]) +; CHECK-VF4IC4-NEXT: [[TMP19:%.*]] = freeze i1 [[TMP18]] +; CHECK-VF4IC4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP19]], i32 [[TMP17]], i32 -1 ; CHECK-VF4IC4-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK-VF4IC4: [[SCALAR_PH]]: ; CHECK-VF4IC4-NEXT: br label %[[FOR_BODY:.*]] ; CHECK-VF4IC4: [[FOR_BODY]]: ; CHECK-VF4IC4-NEXT: [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY]] ] -; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[TMP32]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] +; CHECK-VF4IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[RDX_SELECT]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY]] ] ; CHECK-VF4IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]] ; CHECK-VF4IC4-NEXT: [[TMP0:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 ; CHECK-VF4IC4-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP0]], 0.000000e+00 @@ -1784,19 +1778,18 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { ; CHECK-VF1IC4-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK-VF1IC4: [[VECTOR_BODY]]: ; CHECK-VF1IC4-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[FOR_BODY]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP35:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP36:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP37:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ -1, %[[FOR_BODY]] ], [ [[TMP38:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP12:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP31:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP32:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP2:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP33:%.*]], %[[VECTOR_BODY]] ] -; CHECK-VF1IC4-NEXT: [[TMP3:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP34:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI1:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP24:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI2:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP25:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI3:%.*]] = phi i32 [ 0, %[[FOR_BODY]] ], [ [[TMP26:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI4:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI5:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP20:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI6:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP29:%.*]], %[[VECTOR_BODY]] ] +; CHECK-VF1IC4-NEXT: [[VEC_PHI7:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[TMP30:%.*]], %[[VECTOR_BODY]] ] ; CHECK-VF1IC4-NEXT: [[TMP4:%.*]] = add i64 [[IV]], 1 ; CHECK-VF1IC4-NEXT: [[TMP5:%.*]] = add i64 [[IV]], 2 ; CHECK-VF1IC4-NEXT: [[TMP6:%.*]] = add i64 [[IV]], 3 ; CHECK-VF1IC4-NEXT: [[TMP7:%.*]] = trunc i64 [[IV]] to i32 -; CHECK-VF1IC4-NEXT: [[TMP8:%.*]] = add i32 [[TMP7]], 0 ; CHECK-VF1IC4-NEXT: [[TMP9:%.*]] = add i32 [[TMP7]], 1 ; CHECK-VF1IC4-NEXT: [[TMP10:%.*]] = add i32 [[TMP7]], 2 ; CHECK-VF1IC4-NEXT: [[TMP11:%.*]] = add i32 [[TMP7]], 3 @@ -1812,35 +1805,32 @@ define i32 @not_vectorized_select_fcmp_invalid_const_ub(ptr %a) { ; CHECK-VF1IC4-NEXT: [[TMP21:%.*]] = fcmp fast olt float [[TMP17]], 0.000000e+00 ; CHECK-VF1IC4-NEXT: [[TMP22:%.*]] = fcmp fast olt float [[TMP18]], 0.000000e+00 ; CHECK-VF1IC4-NEXT: [[TMP23:%.*]] = fcmp fast olt float [[TMP19]], 0.000000e+00 -; CHECK-VF1IC4-NEXT: [[TMP24:%.*]] = freeze i1 [[CMP]] -; CHECK-VF1IC4-NEXT: [[TMP25:%.*]] = freeze i1 [[TMP21]] -; CHECK-VF1IC4-NEXT: [[TMP26:%.*]] = or i1 [[TMP24]], [[TMP25]] -; CHECK-VF1IC4-NEXT: [[TMP27:%.*]] = freeze i1 [[TMP22]] -; CHECK-VF1IC4-NEXT: [[TMP28:%.*]] = or i1 [[TMP26]], [[TMP27]] -; CHECK-VF1IC4-NEXT: [[TMP29:%.*]] = freeze i1 [[TMP23]] -; CHECK-VF1IC4-NEXT: [[TMP30:%.*]] = or i1 [[TMP28]], [[TMP29]] -; CHECK-VF1IC4-NEXT: [[TMP31]] = select i1 [[TMP30]], i1 [[CMP]], i1 [[TMP12]] -; CHECK-VF1IC4-NEXT: [[TMP32]] = select i1 [[TMP30]], i1 [[TMP21]], i1 [[TMP1]] -; CHECK-VF1IC4-NEXT: [[TMP33]] = select i1 [[TMP30]], i1 [[TMP22]], i1 [[TMP2]] -; CHECK-VF1IC4-NEXT: [[TMP34]] = select i1 [[TMP30]], i1 [[TMP23]], i1 [[TMP3]] -; CHECK-VF1IC4-NEXT: [[TMP35]] = select i1 [[TMP30]], i32 [[TMP8]], i32 [[VEC_PHI]] -; CHECK-VF1IC4-NEXT: [[TMP36]] = select i1 [[TMP30]], i32 [[TMP9]], i32 [[VEC_PHI1]] -; CHECK-VF1IC4-NEXT: [[TMP37]] = select i1 [[TMP30]], i32 [[TMP10]], i32 [[VEC_PHI2]] -; CHECK-VF1IC4-NEXT: [[TMP38]] = select i1 [[TMP30]], i32 [[TMP11]], i32 [[VEC_PHI3]] +; CHECK-VF1IC4-NEXT: [[TMP28]] = or i1 [[VEC_PHI4]], [[CMP]] +; CHECK-VF1IC4-NEXT: [[TMP20]] = or i1 [[VEC_PHI5]], [[TMP21]] +; CHECK-VF1IC4-NEXT: [[TMP29]] = or i1 [[VEC_PHI6]], [[TMP22]] +; CHECK-VF1IC4-NEXT: [[TMP30]] = or i1 [[VEC_PHI7]], [[TMP23]] +; CHECK-VF1IC4-NEXT: [[TMP27]] = select i1 [[CMP]], i32 [[TMP7]], i32 [[VEC_PHI]] +; CHECK-VF1IC4-NEXT: [[TMP24]] = select i1 [[TMP21]], i32 [[TMP9]], i32 [[VEC_PHI1]] +; CHECK-VF1IC4-NEXT: [[TMP25]] = select i1 [[TMP22]], i32 [[TMP10]], i32 [[VEC_PHI2]] +; CHECK-VF1IC4-NEXT: [[TMP26]] = select i1 [[TMP23]], i32 [[TMP11]], i32 [[VEC_PHI3]] ; CHECK-VF1IC4-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4 ; CHECK-VF1IC4-NEXT: [[TMP39:%.*]] = icmp eq i64 [[INDEX_NEXT]], 2147483648 ; CHECK-VF1IC4-NEXT: br i1 [[TMP39]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK-VF1IC4: [[MIDDLE_BLOCK]]: +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP27]], i32 [[TMP24]]) +; CHECK-VF1IC4-NEXT: [[RDX_MINMAX8:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX]], i32 [[TMP25]]) +; CHECK-VF1IC4-NEXT: [[TMP35:%.*]] = call i32 @llvm.umax.i32(i32 [[RDX_MINMAX8]], i32 [[TMP26]]) +; CHECK-VF1IC4-NEXT: [[BIN_RDX:%.*]] = or i1 [[TMP20]], [[TMP28]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX10:%.*]] = or i1 [[TMP29]], [[BIN_RDX]] +; CHECK-VF1IC4-NEXT: [[BIN_RDX11:%.*]] = or i1 [[TMP30]], [[BIN_RDX10]] +; CHECK-VF1IC4-NEXT: [[TMP31:%.*]] = freeze i1 [[BIN_RDX11]] ; CHECK-VF1IC4-NEXT: [[TMP40:%.*]] = select i1 [[TMP31]], i32 [[TMP35]], i32 -1 -; CHECK-VF1IC4-NEXT: [[TMP41:%.*]] = select i1 [[TMP32]], i32 [[TMP36]], i32 [[TMP40]] -; CHECK-VF1IC4-NEXT: [[TMP42:%.*]] = select i1 [[TMP33]], i32 [[TMP37]], i32 [[TMP41]] -; CHECK-VF1IC4-NEXT: [[TMP43:%.*]] = select i1 [[TMP34]], i32 [[TMP38]], i32 [[TMP42]] ; CHECK-VF1IC4-NEXT: br label %[[SCALAR_PH:.*]] ; CHECK-VF1IC4: [[SCALAR_PH]]: ; CHECK-VF1IC4-NEXT: br label %[[FOR_BODY1:.*]] ; CHECK-VF1IC4: [[FOR_BODY1]]: ; CHECK-VF1IC4-NEXT: [[IV1:%.*]] = phi i64 [ 2147483648, %[[SCALAR_PH]] ], [ [[INC:%.*]], %[[FOR_BODY1]] ] -; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[TMP43]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY1]] ] +; CHECK-VF1IC4-NEXT: [[RDX:%.*]] = phi i32 [ [[TMP40]], %[[SCALAR_PH]] ], [ [[SPEC_SELECT:%.*]], %[[FOR_BODY1]] ] ; CHECK-VF1IC4-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV1]] ; CHECK-VF1IC4-NEXT: [[TMP44:%.*]] = load float, ptr [[ARRAYIDX1]], align 4 ; CHECK-VF1IC4-NEXT: [[CMP1:%.*]] = fcmp fast olt float [[TMP44]], 0.000000e+00 diff --git a/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll b/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll index b1f7e65e9dd25..dbedc28a927e6 100644 --- a/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll +++ b/polly/test/ScopInfo/NonAffine/non_affine_conditional_surrounding_non_affine_loop.ll @@ -28,7 +28,7 @@ ; INNERMOST-NEXT: Invalid Context: ; INNERMOST-NEXT: [tmp6, p_1, p_2] -> { : p_2 < p_1 and (tmp6 < 0 or tmp6 > 0) } ; INNERMOST: p0: %tmp6 -; INNERMOST-NEXT: p1: {0,+,(sext i32 %N to i64)}<%bb3> +; INNERMOST-NEXT: p1: {0,+,(sext i32 %N to i64)}<%bb3> ; INNERMOST-NEXT: p2: {0,+,1}<%bb3> ; INNERMOST-NEXT: Arrays { ; INNERMOST-NEXT: i32 MemRef_A[*]; // Element size 4 From 2048e75cc29b6cbbfa39ab28c962081e92486f2a Mon Sep 17 00:00:00 2001 From: Tomas Matheson Date: Wed, 24 Jun 2026 11:43:29 +0100 Subject: [PATCH 312/511] [AArch64][SVE] Use ADD/ADR instead of MUL/MLA for x*N (#198566) Avoid `MUL`/`MLA` for all-active multiplies by small constants when cheaper `ADD`/`ADR` sequences are available. Vector multiplication (int32_t/uint32_t base types) by 2, 3, 5, 9 can be done with ADD (for 2) ADR (for 3,5,9). Similarly, operations of the form a + x * {1,2,4,8} can use ADR. --- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 51 +++ .../AArch64/sve-intrinsics-int-arith-undef.ll | 2 +- .../CodeGen/AArch64/sve-mul-imm-add-adr.ll | 372 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sve2-histcnt.ll | 4 +- 4 files changed, 425 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 4712406e37e6b..64dab3296dddd 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1948,6 +1948,57 @@ let Predicates = [HasSVE] in { defm : adrShiftPat; defm : adrShiftPat; + // Avoid MUL/MLA for small constants where ADD/ADR forms are available. + multiclass sveMulMlaImmAddAdrPat { + // MUL + def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op, + (Ty (splat_vector (ScalarTy 2))))), + (Add $Op, $Op)>; + def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op, + (Ty (splat_vector (ScalarTy 3))))), + (Adr1 $Op, $Op)>; + def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op, + (Ty (splat_vector (ScalarTy 5))))), + (Adr2 $Op, $Op)>; + def : Pat<(Ty (AArch64mul_p (PredTy (SVEAnyPredicate)), Ty:$Op, + (Ty (splat_vector (ScalarTy 9))))), + (Adr3 $Op, $Op)>; + + // MLA + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op, + (Ty (splat_vector (ScalarTy 2))))), + (Adr1 $Acc, $Op)>; + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op, + (Ty (splat_vector (ScalarTy 4))))), + (Adr2 $Acc, $Op)>; + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, Ty:$Op, + (Ty (splat_vector (ScalarTy 8))))), + (Adr3 $Acc, $Op)>; + + // MLA commuted. These can be removed if the commuted forms are canonicalized. + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, + (Ty (splat_vector (ScalarTy 2))), Ty:$Op)), + (Adr1 $Acc, $Op)>; + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, + (Ty (splat_vector (ScalarTy 4))), Ty:$Op)), + (Adr2 $Acc, $Op)>; + def : Pat<(Ty (AArch64mla_p (PredTy (SVEAnyPredicate)), Ty:$Acc, + (Ty (splat_vector (ScalarTy 8))), Ty:$Op)), + (Adr3 $Acc, $Op)>; + } + + let AddedComplexity = 10 in { + defm : sveMulMlaImmAddAdrPat; + defm : sveMulMlaImmAddAdrPat; + } + // adr z0.d, [z0.d, z0.d, uxtw #] // adr z0.d, [z0.d, z0.d, sxtw #] multiclass adrXtwShiftPat { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll index 82b39785a07b5..1446f6956cf04 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-undef.ll @@ -301,7 +301,7 @@ define @mul_imm_i16( %pg, define @mul_imm_i32( %pg, %a) { ; CHECK-LABEL: mul_imm_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mul z0.s, z0.s, #5 +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #2] ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %a, diff --git a/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll b/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll new file mode 100644 index 0000000000000..91423fc1d068d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-mul-imm-add-adr.ll @@ -0,0 +1,372 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + + +define @mul_i32_by_2( %x) { +; CHECK-LABEL: mul_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #1 +; CHECK-NEXT: ret + %out = mul %x, splat(i32 2) + ret %out +} + +define @mul_i32_by_3( %x) { +; CHECK-LABEL: mul_i32_by_3: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #1] +; CHECK-NEXT: ret + %out = mul %x, splat(i32 3) + ret %out +} + +define @mul_i32_by_5( %x) { +; CHECK-LABEL: mul_i32_by_5: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #2] +; CHECK-NEXT: ret + %out = mul %x, splat(i32 5) + ret %out +} + +define @mul_i32_by_9( %x) { +; CHECK-LABEL: mul_i32_by_9: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #3] +; CHECK-NEXT: ret + %out = mul %x, splat(i32 9) + ret %out +} + + +define @mul_i64_by_2_commuted( %x) { +; CHECK-LABEL: mul_i64_by_2_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, z0.d, #1 +; CHECK-NEXT: ret + %out = mul splat(i64 2), %x + ret %out +} + +define @mul_i64_by_3_commuted( %x) { +; CHECK-LABEL: mul_i64_by_3_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z0.d, lsl #1] +; CHECK-NEXT: ret + %out = mul splat(i64 3), %x + ret %out +} + +define @mul_i64_by_5_commuted( %x) { +; CHECK-LABEL: mul_i64_by_5_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z0.d, lsl #2] +; CHECK-NEXT: ret + %out = mul splat(i64 5), %x + ret %out +} + +define @mul_i64_by_9_commuted( %x) { +; CHECK-LABEL: mul_i64_by_9_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z0.d, lsl #3] +; CHECK-NEXT: ret + %out = mul splat(i64 9), %x + ret %out +} + + +define @mla_i32_by_2( %a, %x) { +; CHECK-LABEL: mla_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #1] +; CHECK-NEXT: ret + %mul = mul %x, splat(i32 2) + %out = add %a, %mul + ret %out +} + +define @mla_i32_by_4( %a, %x) { +; CHECK-LABEL: mla_i32_by_4: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #2] +; CHECK-NEXT: ret + %mul = mul %x, splat(i32 4) + %out = add %a, %mul + ret %out +} + +define @mla_i32_by_8( %a, %x) { +; CHECK-LABEL: mla_i32_by_8: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #3] +; CHECK-NEXT: ret + %mul = mul %x, splat(i32 8) + %out = add %a, %mul + ret %out +} + + +define @mla_i64_by_2_commuted( %a, %x) { +; CHECK-LABEL: mla_i64_by_2_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #1] +; CHECK-NEXT: ret + %mul = mul splat(i64 2), %x + %out = add %a, %mul + ret %out +} + +define @mla_i64_by_4_commuted( %a, %x) { +; CHECK-LABEL: mla_i64_by_4_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #2] +; CHECK-NEXT: ret + %mul = mul splat(i64 4), %x + %out = add %a, %mul + ret %out +} + +define @mla_i64_by_8_commuted( %a, %x) { +; CHECK-LABEL: mla_i64_by_8_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #3] +; CHECK-NEXT: ret + %mul = mul splat(i64 8), %x + %out = add %a, %mul + ret %out +} + + +define @svmul_u_i32_by_2( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %x, splat(i32 2)) + ret %out +} + +define @svmul_u_i32_by_3( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_3: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %x, splat(i32 3)) + ret %out +} + +define @svmul_u_i32_by_5( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_5: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %x, splat(i32 5)) + ret %out +} + +define @svmul_u_i32_by_9( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_9: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z0.s, lsl #3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, %x, splat(i32 9)) + ret %out +} + + +define @svmul_m_partial_i32_by_2( %pg, %x) { +; CHECK-LABEL: svmul_m_partial_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #2 // =0x2 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, %x, splat(i32 2)) + ret %out +} + +define @svmul_m_partial_i32_by_3( %pg, %x) { +; CHECK-LABEL: svmul_m_partial_i32_by_3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #3 // =0x3 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, %x, splat(i32 3)) + ret %out +} + +define @svmul_m_partial_i32_by_5( %pg, %x) { +; CHECK-LABEL: svmul_m_partial_i32_by_5: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, %x, splat(i32 5)) + ret %out +} + +define @svmul_m_partial_i32_by_9( %pg, %x) { +; CHECK-LABEL: svmul_m_partial_i32_by_9: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #9 // =0x9 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.nxv4i32( %pg, %x, splat(i32 9)) + ret %out +} + + +define @svmul_u_i32_by_2_commuted( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_2_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #2 // =0x2 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, splat(i32 2), %x) + ret %out +} + +define @svmul_u_i32_by_3_commuted( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_3_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #3 // =0x3 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, splat(i32 3), %x) + ret %out +} + +define @svmul_u_i32_by_5_commuted( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_5_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #5 // =0x5 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, splat(i32 5), %x) + ret %out +} + +define @svmul_u_i32_by_9_commuted( %pg, %x) { +; CHECK-LABEL: svmul_u_i32_by_9_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #9 // =0x9 +; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mul.u.nxv4i32( %pg, splat(i32 9), %x) + ret %out +} + + +define @svmla_u_i32_by_2( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, %x, splat(i32 2)) + ret %out +} + +define @svmla_u_i32_by_4( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_4: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, %x, splat(i32 4)) + ret %out +} + +define @svmla_u_i32_by_8( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_8: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, %x, splat(i32 8)) + ret %out +} + + +define @svmla_u_i32_by_2_commuted( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_2_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, splat(i32 2), %x) + ret %out +} + +define @svmla_u_i32_by_4_commuted( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_4_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, splat(i32 4), %x) + ret %out +} + +define @svmla_u_i32_by_8_commuted( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i32_by_8_commuted: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.s, [z0.s, z1.s, lsl #3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv4i32( %pg, %a, splat(i32 8), %x) + ret %out +} + + +define @svmla_m_partial_i32_by_2( %pg, %a, %x) { +; CHECK-LABEL: svmla_m_partial_i32_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #2 // =0x2 +; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv4i32( %pg, %a, %x, splat(i32 2)) + ret %out +} + +define @svmla_m_partial_i32_by_4( %pg, %a, %x) { +; CHECK-LABEL: svmla_m_partial_i32_by_4: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #4 // =0x4 +; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv4i32( %pg, %a, %x, splat(i32 4)) + ret %out +} + +define @svmla_m_partial_i32_by_8( %pg, %a, %x) { +; CHECK-LABEL: svmla_m_partial_i32_by_8: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z2.s, #8 // =0x8 +; CHECK-NEXT: mla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.nxv4i32( %pg, %a, %x, splat(i32 8)) + ret %out +} + + +define @svmla_u_i64_by_2( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i64_by_2: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #1] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv2i64( %pg, %a, %x, splat(i64 2)) + ret %out +} + +define @svmla_u_i64_by_4( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i64_by_4: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #2] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv2i64( %pg, %a, %x, splat(i64 4)) + ret %out +} + +define @svmla_u_i64_by_8( %pg, %a, %x) { +; CHECK-LABEL: svmla_u_i64_by_8: +; CHECK: // %bb.0: +; CHECK-NEXT: adr z0.d, [z0.d, z1.d, lsl #3] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.mla.u.nxv2i64( %pg, %a, %x, splat(i64 8)) + ret %out +} diff --git a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll index 6596abe2f105a..a49bdafb8a46a 100644 --- a/llvm/test/CodeGen/AArch64/sve2-histcnt.ll +++ b/llvm/test/CodeGen/AArch64/sve2-histcnt.ll @@ -147,10 +147,8 @@ define void @histogram_i16_literal_2(ptr %base, %indices, %indices From c5f23fa90ce4f1477894ff66727b8f79feaf0c09 Mon Sep 17 00:00:00 2001 From: Kareem Ergawy Date: Wed, 24 Jun 2026 12:46:15 +0200 Subject: [PATCH 313/511] [FIR] Route embox + projected complex slice through shapeVec (#205042) When the array_coor base is a fir.embox with a projected complex %re/%im slice, take the shapeVec path instead of the descriptor (fir.box_dims) path. The descriptor path iterates source-rank dims while querying the rank-reduced embox result box, which miscompiles slices that collapse dims (e.g. complex(:,k)%re). For embox-derived boxes the underlying storage is contiguous, so the shape-derived layout is both correct and the natural place to encode that static shape is available. Non-embox boxes (rebox, assumed-shape) still go through fir.box_dims. Co-Authored-By: Claude Sonnet 4.6 Co-authored-by: Claude Sonnet 4.6 --- .../lib/Optimizer/Transforms/FIRToMemRef.cpp | 15 +--- .../FIRToMemRef/slice-projected.mlir | 70 +++++-------------- 2 files changed, 21 insertions(+), 64 deletions(-) diff --git a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp index 3f738a25ec98b..1dcc056387c48 100644 --- a/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp +++ b/flang/lib/Optimizer/Transforms/FIRToMemRef.cpp @@ -773,20 +773,9 @@ FIRToMemRef::convertArrayCoorOp(Operation *memOp, fir::ArrayCoorOp arrayCoorOp, // // box_dims path: query the descriptor at runtime. Required when: // (a) we have no shape information at all; or - // (b) the array_coor base is a fir.box that is NOT a fir.embox result; - // or a fir.box with a projected slice (layout in the descriptor); or - // (c) embox cannot supply layout for this coor (non-embox box above). - // getFIRConvert materializes fir.box_addr(box) -- an opaque pointer - // with no layout in its type -- so strides must come from the - // descriptor. This matches CodeGen XArrayCoorOp's boxed branch - // (getStrideFromBox); shape/shape_shift on array_coor is - // informational only (lower bounds for index translation). - // Projected complex %re/%im on a bare ref uses the shapeVec path with - // strides scaled by two scalar slots per complex. - const bool boxNeedsDescriptorStrides = - firMemrefIsBox && (!firMemrefIsEmbox || sliceInfo.hasProjectedSlice); + // (b) the array_coor base is a fir.box that is NOT a fir.embox result. const bool descriptorOwnsLayout = - shapeVec.empty() || boxNeedsDescriptorStrides; + shapeVec.empty() || (firMemrefIsBox && !firMemrefIsEmbox); if (descriptorOwnsLayout) { // Plain `!fir.ref` without recoverable shape extents cannot use fir.box_*. if (shapeVec.empty() && !sliceInfo.hasProjectedSlice && !isDescriptor && diff --git a/flang/test/Transforms/FIRToMemRef/slice-projected.mlir b/flang/test/Transforms/FIRToMemRef/slice-projected.mlir index 7d29fca000fad..0a5cb672333ed 100644 --- a/flang/test/Transforms/FIRToMemRef/slice-projected.mlir +++ b/flang/test/Transforms/FIRToMemRef/slice-projected.mlir @@ -29,22 +29,12 @@ // CHECK: [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref>>) -> memref<4xcomplex> // CHECK: [[IDX:%.*]] = arith.addi // CHECK: [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex>) -> memref<4x2xf32> -// CHECK: %[[FWD_C_RE:.*]] = arith.constant 0 : index -// CHECK: %[[FWD_C_SZF32:.*]] = arith.constant 4 : index -// CHECK: %[[FWD_C_DIM0:.*]] = arith.constant 0 : index -// CHECK: [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[FWD_C_DIM0]] : (!fir.box>, index) -> (index, index, index) -// CHECK: [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[FWD_C_SZF32]] : index -// Reinterpret applies the embox descriptor layout onto the scalar view: -// sizes[0] = box extent (section length in f32 slots) -// sizes[1] = 2 for the (re, im) pair exposed by memref<4x2xf32> -// strides[0] = box_dims byte_stride / sizeof(f32) (not box_elesize) -// strides[1] = 1 between adjacent real/imag scalars -// Without this, memref.load would use dense strides from fir.convert only. -// CHECK: %[[FWD_C_PAIR:.*]] = arith.constant 2 : index -// CHECK: %[[FWD_C_COMP_STRIDE:.*]] = arith.constant 1 : index -// CHECK: %[[FWD_C_OFF:.*]] = arith.constant 0 : index -// CHECK: [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[FWD_C_OFF]]], sizes: [[[BD]]#1, %[[FWD_C_PAIR]]], strides: [[[STRIDE]], %[[FWD_C_COMP_STRIDE]]] : memref<4x2xf32> to memref to memref>>) { %c1 = arith.constant 1 : index %c4 = arith.constant 4 : index @@ -68,17 +58,10 @@ func.func @projected_slice_fwd(%arg0: !fir.ref>>) { // CHECK: [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref>>) -> memref<4xcomplex> // CHECK: [[IDX:%.*]] = arith.addi // CHECK: [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex>) -> memref<4x2xf32> -// CHECK: %[[BWD_C_RE:.*]] = arith.constant 0 : index -// CHECK: %[[BWD_C_SZF32:.*]] = arith.constant 4 : index -// CHECK: %[[BWD_C_DIM0:.*]] = arith.constant 0 : index -// CHECK: [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[BWD_C_DIM0]] : (!fir.box>, index) -> (index, index, index) -// CHECK: [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[BWD_C_SZF32]] : index // Same reinterpret as forward; slice triple only changes [[IDX]], not strides. -// CHECK: %[[BWD_C_PAIR:.*]] = arith.constant 2 : index -// CHECK: %[[BWD_C_COMP_STRIDE:.*]] = arith.constant 1 : index -// CHECK: %[[BWD_C_OFF:.*]] = arith.constant 0 : index -// CHECK: [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[BWD_C_OFF]]], sizes: [[[BD]]#1, %[[BWD_C_PAIR]]], strides: [[[STRIDE]], %[[BWD_C_COMP_STRIDE]]] : memref<4x2xf32> to memref to memref>>) { %c1 = arith.constant 1 : index %c4 = arith.constant 4 : index @@ -103,17 +86,10 @@ func.func @projected_slice_bwd(%arg0: !fir.ref>>) { // CHECK: [[MEMREF:%.*]] = fir.convert %arg0 : (!fir.ref>>) -> memref<4xcomplex> // CHECK: [[IDX:%.*]] = arith.addi // CHECK: [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<4xcomplex>) -> memref<4x2xf32> -// CHECK: %[[IM_C_IM:.*]] = arith.constant 1 : index -// CHECK: %[[IM_C_SZF32:.*]] = arith.constant 4 : index -// CHECK: %[[IM_C_DIM0:.*]] = arith.constant 0 : index -// CHECK: [[BD:%[0-9]+]]:3 = fir.box_dims %2, %[[IM_C_DIM0]] : (!fir.box>, index) -> (index, index, index) -// CHECK: [[STRIDE:%[0-9]+]] = arith.divsi [[BD]]#2, %[[IM_C_SZF32]] : index // Same layout as %re; store uses component index 1 for imaginary. -// CHECK: %[[IM_C_PAIR:.*]] = arith.constant 2 : index -// CHECK: %[[IM_C_COMP_STRIDE:.*]] = arith.constant 1 : index -// CHECK: %[[IM_C_OFF:.*]] = arith.constant 0 : index -// CHECK: [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[IM_C_OFF]]], sizes: [[[BD]]#1, %[[IM_C_PAIR]]], strides: [[[STRIDE]], %[[IM_C_COMP_STRIDE]]] : memref<4x2xf32> to memref to memref>>, %arg1: f32) { %c1 = arith.constant 1 : index @@ -152,21 +128,13 @@ func.func @projected_slice_store_im(%arg0: !fir.ref>>, // CHECK: [[IDX_I:%.*]] = arith.addi // CHECK: [[IDX_J:%.*]] = arith.addi // CHECK: [[COMP:%[0-9]+]] = fir.convert [[MEMREF]] : (memref<3x2xcomplex>) -> memref<3x2x2xf32> -// CHECK: %[[D2_C_RE:.*]] = arith.constant 0 : index -// CHECK: %[[D2_C_SZF32:.*]] = arith.constant 4 : index -// CHECK: %[[D2_C_DIM1:.*]] = arith.constant 1 : index -// CHECK: [[BD0:%[0-9]+]]:3 = fir.box_dims %2, %[[D2_C_DIM1]] : (!fir.box>, index) -> (index, index, index) -// CHECK: [[STR0:%[0-9]+]] = arith.divsi [[BD0]]#2, %[[D2_C_SZF32]] : index -// CHECK: %[[D2_C_DIM0:.*]] = arith.constant 0 : index -// CHECK: [[BD1:%[0-9]+]]:3 = fir.box_dims %2, %[[D2_C_DIM0]] : (!fir.box>, index) -> (index, index, index) -// CHECK: [[STR1:%[0-9]+]] = arith.divsi [[BD1]]#2, %[[D2_C_SZF32]] : index -// 2-D embox: two box_dims strides (both / sizeof(f32)), plus pair dim (2, 1). -// Row-major memref indices are [j, i, 0] after Fortran dim reversal. -// CHECK: %[[D2_C_PAIR:.*]] = arith.constant 2 : index -// CHECK: %[[D2_C_COMP_STRIDE:.*]] = arith.constant 1 : index -// CHECK: %[[D2_C_OFF:.*]] = arith.constant 0 : index -// CHECK: [[VIEW:%.*]] = memref.reinterpret_cast [[COMP]] to offset: [%[[D2_C_OFF]]], sizes: [[[BD0]]#1, [[BD1]]#1, %[[D2_C_PAIR]]], strides: [[[STR0]], [[STR1]], %[[D2_C_COMP_STRIDE]]] : memref<3x2x2xf32> to memref to memref>>) { %c1 = arith.constant 1 : index %c2 = arith.constant 2 : index From 80a8b4076c2a0a35ce21f756de2e1cc7f2edad2c Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 24 Jun 2026 12:50:34 +0200 Subject: [PATCH 314/511] [Clang][SYCL] Fix new-driver Backend phase producing .ll instead of .bc when -S is passed (#22380) 85863edfc94a (Merge from 'main' to 'sycl-web' (2 commits)) dropped the getUseNewOffloadingDriver() guard from the OFK_SYCL branch in ConstructPhaseAction when resolving a merge conflict. This caused SYCL new-driver Backend phases to emit TY_LLVM_IR when -S is present. Upstream has a single SYCL compilation path, so bare OFK_SYCL is correct there. sycl-web has both old-driver and new-driver paths and must distinguish them: in the new-driver path, device backend output feeds llvm-offload-binary for packaging, which requires .bc. The -S flag is meaningful for host compilation only; the new-driver device Backend phase must always produce TY_LLVM_BC regardless of -S. Restore the getUseNewOffloadingDriver() guard and restrict -S -> TY_LLVM_IR to TargetDeviceOffloadKind == OFK_None or offloadDeviceOnly(). Fix by separating the SYCL new-driver and emit_llvm branches with SYCL new-driver checked first, restoring upstream behavior for OpenMP offload: - SYCL new-driver + -emit-llvm -S: SYCL branch fires first -> BC - SYCL new-driver, no -emit-llvm: SYCL branch -> BC - OpenMP NVPTX + -emit-llvm -S: emit_llvm branch -> IR (upstream behavior restored) - SYCL old-driver + -emit-llvm -S: emit_llvm branch -> IR (same as before) Fixes - clang/test/Driver/sycl-offload-nvptx.cpp - clang/test/Driver/sycl-offload-static-lib-2-old-model.cpp - clang/test/Driver/sycl-oneapi-gpu-nvidia.cpp - clang/test/Driver/sycl-save-ptx-files.cpp - clang/test/Driver/sycl-offload-Xarch.cpp - clang/test/Driver/openmp-offload-gpu.c - clang/test/Driver/sycl-offload.cpp - clang/test/Driver/sycl-offload-new-driver.cpp Also update sycl-offload-Xarch.cpp AMD-ARCH/MULTI-ARCH checks from .bc to .s for upstream 859ee9d83ef2 (AMDGCN now defaults to full LTO mode). CMPLRLLVM-76332 --------- Co-authored-by: Claude Sonnet 4.6 --- clang/lib/Driver/Driver.cpp | 11 +++++++++-- clang/test/Driver/sycl-offload-Xarch.cpp | 6 +++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 582a5099538b3..73b3573464d6d 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -8502,6 +8502,14 @@ Action *Driver::ConstructPhaseAction( return C.MakeAction(Input, types::TY_LLVM_BC); } case phases::Backend: { + // SYCL new-driver backend outputs BC for llvm-offload-binary packaging. + // ThinLTO excluded (has its own sycl-post-link pipeline). + // LTO + -S excluded: AMD needs TY_LTO_IR so the image is .s assembly; + // LTOK_None targets (NVPTX, SPIR64) have no LTO branch so always hit this. + if (TargetDeviceOffloadKind == Action::OFK_SYCL && + getUseNewOffloadingDriver() && TargetLTOMode != LTOK_Thin && + (TargetLTOMode == LTOK_None || !Args.hasArg(options::OPT_S))) + return C.MakeAction(Input, types::TY_LLVM_BC); if (TargetLTOMode != LTOK_None) { bool IsDeviceOffload = TargetDeviceOffloadKind != Action::OFK_None; if (!IsDeviceOffload) { @@ -8562,8 +8570,7 @@ Action *Driver::ConstructPhaseAction( } return C.MakeAction(Input, Output); } - if (Args.hasArg(options::OPT_emit_llvm) || - TargetDeviceOffloadKind == Action::OFK_SYCL) { + if (Args.hasArg(options::OPT_emit_llvm)) { types::ID Output = Args.hasArg(options::OPT_S) ? types::TY_LLVM_IR : types::TY_LLVM_BC; return C.MakeAction(Input, Output); diff --git a/clang/test/Driver/sycl-offload-Xarch.cpp b/clang/test/Driver/sycl-offload-Xarch.cpp index 017486cbaf941..dce51f593966a 100644 --- a/clang/test/Driver/sycl-offload-Xarch.cpp +++ b/clang/test/Driver/sycl-offload-Xarch.cpp @@ -29,10 +29,10 @@ // O3ONCE-NVPTX: "-triple" "nvptx64-nvidia-cuda" // O3ONCE-NVPTX-SAME: "-O3" // INVALID-ARCH-FOR-TARGET: clang: error: invalid target ID 'sm_75'; format is a processor name followed by an optional colon-delimited list of features followed by an enable/disable sign (e.g., 'gfx908:sramecc+:xnack-') -// AMD-ARCH: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=sycl"}} +// AMD-ARCH: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.s,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=sycl"}} // NVPTX-ARCH: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.bc,triple=nvptx64-nvidia-cuda,arch=sm_52,kind=sycl"}} -// MULTI-ARCH: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx906,kind=sycl"}} -// MULTI-ARCH-SAME: {{"--image=file=.*.bc,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=sycl"}} +// MULTI-ARCH: {{"[^"]*llvm-offload-binary[^"]*" "-o".* "--image=file=.*.s,triple=amdgcn-amd-amdhsa,arch=gfx906,kind=sycl"}} +// MULTI-ARCH-SAME: {{"--image=file=.*.s,triple=amdgcn-amd-amdhsa,arch=gfx90a,kind=sycl"}} // MULTI-ARCH-SAME: {{"--image=file=.*.bc,triple=nvptx64-nvidia-cuda,arch=sm_52,kind=sycl"}} // MULTI-ARCH-SAME: {{"--image=file=.*.bc,triple=nvptx64-nvidia-cuda,arch=sm_89,kind=sycl"}} From 61c2de88a341a558a88616d6992fe7d7b4ec8a7d Mon Sep 17 00:00:00 2001 From: Nikita Kornev Date: Wed, 24 Jun 2026 12:54:54 +0200 Subject: [PATCH 315/511] [SYCL] Align the kernel class with SYCL2020 (#22359) Add the constructor & replace undocumented methods in the next ABI-break window. --- sycl/include/sycl/kernel.hpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/sycl/include/sycl/kernel.hpp b/sycl/include/sycl/kernel.hpp index 8e375c4f18542..9551425d46cfa 100644 --- a/sycl/include/sycl/kernel.hpp +++ b/sycl/include/sycl/kernel.hpp @@ -81,6 +81,8 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { kernel(cl_kernel ClKernel, const context &SyclContext); #endif + kernel() = delete; + kernel(const kernel &RHS) = default; kernel(kernel &&RHS) = default; @@ -89,9 +91,21 @@ class __SYCL_EXPORT kernel : public detail::OwnerLessBase { kernel &operator=(kernel &&RHS) = default; +#ifndef __INTEL_PREVIEW_BREAKING_CHANGES + // SYCL 2020 declares hidden friend opearators, see 4.5.2. Common reference + // semantics. bool operator==(const kernel &RHS) const { return impl == RHS.impl; } bool operator!=(const kernel &RHS) const { return !operator==(RHS); } +#else + friend bool operator==(const kernel &lhs, const kernel &rhs) { + return lhs.impl == rhs.impl; + } + + friend bool operator!=(const kernel &lhs, const kernel &rhs) { + return !(lhs == rhs); + } +#endif // __INTEL_PREVIEW_BREAKING_CHANGES /// Get a valid OpenCL kernel handle /// From 3aaa0c9688ea3b0ac0cc4407b22ae91289aa2bfb Mon Sep 17 00:00:00 2001 From: dklochkov-intel Date: Wed, 24 Jun 2026 13:00:36 +0200 Subject: [PATCH 316/511] [SYCL] optimize fp8 load store (#22321) When size == 2 of FP8 data type, it was used memcpy to store and copy data. However, if it is guaranteed that fp8 data type sizeof is 2 bytes, then we can optimize it to have one ir operation, This PR adds alignment for data types which are supported by hardware. --- .../oneapi/experimental/float_8bit/types.hpp | 23 +++++----- .../fp8/e4m3_x2_cri_conversion.cpp | 3 ++ .../fp8/e5m2_x2_cri_conversion.cpp | 3 ++ .../Experimental/fp8/x2_alignment_codegen.cpp | 46 +++++++++++++++++++ 4 files changed, 64 insertions(+), 11 deletions(-) create mode 100644 sycl/test-e2e/Experimental/fp8/x2_alignment_codegen.cpp diff --git a/sycl/include/sycl/ext/oneapi/experimental/float_8bit/types.hpp b/sycl/include/sycl/ext/oneapi/experimental/float_8bit/types.hpp index 8b97c30f7f836..048a3f1475066 100644 --- a/sycl/include/sycl/ext/oneapi/experimental/float_8bit/types.hpp +++ b/sycl/include/sycl/ext/oneapi/experimental/float_8bit/types.hpp @@ -945,7 +945,8 @@ template class fp8_e4m3_x { void ConvertFromFP8_Vec2(sycl::marray &ret, rounding r = rounding::to_even) const { #ifdef __SYCL_DEVICE_ONLY__ - const ::sycl::detail::uint8_vec2 packed{vals[0], vals[1]}; + const ::sycl::detail::uint8_vec2 packed = + *reinterpret_cast(vals); ::sycl::detail::float16_vec2 hi = __builtin_spirv_ConvertE4M3ToFP16EXT(packed); ret[0] = sycl::bit_cast(hi[0]); @@ -970,7 +971,8 @@ template class fp8_e4m3_x { void ConvertBF16FromFP8_Vec2(sycl::marray &ret, rounding r = rounding::to_even) const { #ifdef __SYCL_DEVICE_ONLY__ - const ::sycl::detail::uint8_vec2 packed{vals[0], vals[1]}; + const ::sycl::detail::uint8_vec2 packed = + *reinterpret_cast(vals); ::sycl::detail::bfloat16_vec2 hi = __builtin_spirv_ConvertE4M3ToBF16EXT(packed); ret[0] = sycl::bit_cast(hi[0]); @@ -994,9 +996,8 @@ template class fp8_e4m3_x { } else { \ const VecType vec{sycl::bit_cast(in[0]), \ sycl::bit_cast(in[1])}; \ - const ::sycl::detail::uint8_vec2 result = \ + *reinterpret_cast<::sycl::detail::uint8_vec2 *>(vals) = \ Convert##Prefix##ToFP8_Vec2(vec); \ - std::memcpy(vals, &result, sizeof(vals)); \ } #else #define CONVERT_TO_FP8(VecType, CastType, in, Prefix) \ @@ -1295,7 +1296,7 @@ template class fp8_e4m3_x { } // Intentionally public to allow access to the raw values. - uint8_t vals[N]; + alignas(N == 2 ? 2 : alignof(uint8_t)) uint8_t vals[N]; #undef CONVERT_TO_FP8 }; @@ -1390,7 +1391,8 @@ template class fp8_e5m2_x { void ConvertFromFP8_Vec2(sycl::marray &ret, rounding r = rounding::to_even) const { #ifdef __SYCL_DEVICE_ONLY__ - const ::sycl::detail::uint8_vec2 packed{vals[0], vals[1]}; + const ::sycl::detail::uint8_vec2 packed = + *reinterpret_cast(vals); ::sycl::detail::float16_vec2 hi = __builtin_spirv_ConvertE5M2ToFP16EXT(packed); ret[0] = sycl::bit_cast(hi[0]); @@ -1415,7 +1417,8 @@ template class fp8_e5m2_x { void ConvertBF16FromFP8_Vec2(sycl::marray &ret, rounding r = rounding::to_even) const { #ifdef __SYCL_DEVICE_ONLY__ - const ::sycl::detail::uint8_vec2 packed{vals[0], vals[1]}; + const ::sycl::detail::uint8_vec2 packed = + *reinterpret_cast(vals); ::sycl::detail::bfloat16_vec2 hi = __builtin_spirv_ConvertE5M2ToBF16EXT(packed); ret[0] = sycl::bit_cast(hi[0]); @@ -1439,9 +1442,8 @@ template class fp8_e5m2_x { } else { \ const VecType vec{sycl::bit_cast(in[0]), \ sycl::bit_cast(in[1])}; \ - const ::sycl::detail::uint8_vec2 result = \ + *reinterpret_cast<::sycl::detail::uint8_vec2 *>(vals) = \ Convert##Prefix##ToFP8_Vec2(vec, s); \ - std::memcpy(vals, &result, sizeof(vals)); \ } #else #define CONVERT_TO_FP8(VecType, CastType, in, s, Prefix) \ @@ -1875,8 +1877,7 @@ template class fp8_e5m2_x { } // Intentionally public to allow access to the raw values. - - uint8_t vals[N]; + alignas(N == 2 ? 2 : alignof(uint8_t)) uint8_t vals[N]; #undef CONVERT_TO_FP8 }; diff --git a/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp index 70711c38c9ed0..8cdf307d03666 100644 --- a/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp @@ -337,6 +337,9 @@ template int test_carray_conversion(sycl::queue &queue) { } int main() { + static_assert(alignof(fp8_e4m3_x2) == 2); + static_assert(sizeof(fp8_e4m3_x2) == 2); + auto async_handler = [](sycl::exception_list exceptions) { for (const std::exception_ptr &e : exceptions) { try { diff --git a/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp index 5b64ea8bb268a..1bbe45ae30357 100644 --- a/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp @@ -465,6 +465,9 @@ template int test_carray_conversion(sycl::queue &queue) { } int main() { + static_assert(alignof(fp8_e5m2_x2) == 2); + static_assert(sizeof(fp8_e5m2_x2) == 2); + auto async_handler = [](sycl::exception_list exceptions) { for (const std::exception_ptr &e : exceptions) { try { diff --git a/sycl/test-e2e/Experimental/fp8/x2_alignment_codegen.cpp b/sycl/test-e2e/Experimental/fp8/x2_alignment_codegen.cpp new file mode 100644 index 0000000000000..7fea9512a4951 --- /dev/null +++ b/sycl/test-e2e/Experimental/fp8/x2_alignment_codegen.cpp @@ -0,0 +1,46 @@ +// RUN: %clangxx -fsycl -fsycl-device-only -DNDEBUG -S -emit-llvm %s -o - | FileCheck %s + +// UNSUPPORTED: target-nvidia, target-amd +// UNSUPPORTED-INTENDED: relies on SPIR-V FP8 conversion builtins + +#include + +using namespace sycl::ext::oneapi::experimental; + +// Encode: two halfs are converted and stored into the `vals` array through a +// single aligned `<2 x i8>` store. +// +// CHECK-LABEL: define {{.*}}encode_e4m3 +// CHECK: store <2 x i8> {{%[a-zA-Z0-9._]+}}, ptr {{.*}}, align 2 +SYCL_EXTERNAL void encode_e4m3(sycl::half a, sycl::half b, fp8_e4m3_x2 *out) { + sycl::half in[2] = {a, b}; + *out = fp8_e4m3_x2(in); +} + +// Decode: the two packed FP8 values are read through a single aligned +// `<2 x i8>` load. +// +// CHECK-LABEL: define {{.*}}decode_e4m3 +// CHECK: load <2 x i8>, ptr {{.*}}, align 2 +SYCL_EXTERNAL void decode_e4m3(const fp8_e4m3_x2 *in, sycl::half *out) { + sycl::marray m = static_cast>(*in); + out[0] = m[0]; + out[1] = m[1]; +} + +// Same aligned vector store/load for the e5m2 variant. +// +// CHECK-LABEL: define {{.*}}encode_e5m2 +// CHECK: store <2 x i8> {{%[a-zA-Z0-9._]+}}, ptr {{.*}}, align 2 +SYCL_EXTERNAL void encode_e5m2(sycl::half a, sycl::half b, fp8_e5m2_x2 *out) { + sycl::half in[2] = {a, b}; + *out = fp8_e5m2_x2(in); +} + +// CHECK-LABEL: define {{.*}}decode_e5m2 +// CHECK: load <2 x i8>, ptr {{.*}}, align 2 +SYCL_EXTERNAL void decode_e5m2(const fp8_e5m2_x2 *in, sycl::half *out) { + sycl::marray m = static_cast>(*in); + out[0] = m[0]; + out[1] = m[1]; +} From a66f0335aaac58ee427fb0275258d4679f6cc8ea Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 24 Jun 2026 13:11:26 +0200 Subject: [PATCH 317/511] [Clang][SYCL] Remove redundant driver-level C input check (#22382) The driver-level err_drv_fsycl_with_c_type check in Driver.cpp shadowed the frontend check in CompilerInvocation.cpp (from upstream commit 1723b7a30145), causing a different error message than expected. Remove the driver check and its diagnostic definition so the frontend check is the single source of truth, matching upstream behavior. Fixes clang/test/Driver/sycl.cpp CMPLRLLVM-76332 --------- Co-authored-by: Claude Sonnet 4.6 --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 2 -- clang/lib/Driver/Driver.cpp | 4 ---- clang/test/Driver/sycl-offload-old-model.cpp | 6 ------ clang/test/Driver/sycl-offload.cpp | 6 ------ 4 files changed, 18 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 5b9c6e8e2f754..a18e0396ce005 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -450,8 +450,6 @@ def err_drv_expecting_fopenmp_with_fopenmp_targets : Error< "compatible with offloading; e.g., '-fopenmp=libomp' or '-fopenmp=libiomp5'">; def err_drv_expecting_fsycl_with_sycl_opt : Error< "'%0' must be used in conjunction with '-fsycl' to enable offloading">; -def err_drv_fsycl_with_c_type : Error< - "'%0' must not be used in conjunction with '-fsycl', which expects C++ source">; def warn_drv_opt_requires_opt : Warning<"'%0' should be used only in conjunction with '%1'">, InGroup; def err_drv_sycl_missing_amdgpu_arch : Error< diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 73b3573464d6d..50f5408c9086c 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3736,10 +3736,6 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, Diag(clang::diag::err_drv_unknown_language) << A->getValue(); InputType = types::TY_Object; } - // Emit an error if c-compilation is forced in -fsycl mode - if (IsSYCL && (InputType == types::TY_C || InputType == types::TY_PP_C || - InputType == types::TY_CHeader)) - Diag(clang::diag::err_drv_fsycl_with_c_type) << A->getAsString(Args); // If the user has put -fmodule-header{,=} then we treat C++ headers as // header unit inputs. So we 'promote' -xc++-header appropriately. diff --git a/clang/test/Driver/sycl-offload-old-model.cpp b/clang/test/Driver/sycl-offload-old-model.cpp index edd69cc9aae40..10fd17149bbb4 100644 --- a/clang/test/Driver/sycl-offload-old-model.cpp +++ b/clang/test/Driver/sycl-offload-old-model.cpp @@ -803,12 +803,6 @@ // LIB-NODEVICE: 1: linker, {0}, image, (host-sycl) // LIB-NODEVICE-NOT: linker, {{.*}}, spirv, (device-sycl) -// Checking for an error if c-compilation is forced -// RUN: not %clangxx -### -c -fsycl --no-offload-new-driver -xc %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --no-offload-new-driver -xc-header %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --no-offload-new-driver -xcpp-output %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// CHECK_XC_FSYCL: '-x c{{.*}}' must not be used in conjunction with '-fsycl' - // -std=c++17 check (check all 3 compilations) // RUN: %clangxx -### -c -fsycl --no-offload-new-driver -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s // RUN: %clang_cl -### -c -fsycl --no-offload-new-driver -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s diff --git a/clang/test/Driver/sycl-offload.cpp b/clang/test/Driver/sycl-offload.cpp index 44cf684999acc..ddf623ddbf37a 100644 --- a/clang/test/Driver/sycl-offload.cpp +++ b/clang/test/Driver/sycl-offload.cpp @@ -491,12 +491,6 @@ // LIB-NODEVICE: 0: input, "somelib", object, (host-sycl) // LIB-NODEVICE: 1: clang-linker-wrapper, {0}, image, (host-sycl) -// Checking for an error if c-compilation is forced -// RUN: not %clangxx -### -c -fsycl --offload-new-driver --sysroot=%S/Inputs/SYCL -xc %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --offload-new-driver --sysroot=%S/Inputs/SYCL -xc-header %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// RUN: not %clangxx -### -c -fsycl --offload-new-driver --sysroot=%S/Inputs/SYCL -xcpp-output %s 2>&1 | FileCheck -check-prefixes=CHECK_XC_FSYCL %s -// CHECK_XC_FSYCL: '-x c{{.*}}' must not be used in conjunction with '-fsycl' - // -std=c++17 check (check all 3 compilations) // RUN: %clangxx -### -c -fsycl --offload-new-driver --sysroot=%S/Inputs/SYCL -xc++ %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s // RUN: %clang_cl -### -c -fsycl --offload-new-driver /clang:--sysroot=%S/Inputs/SYCL -TP %s 2>&1 | FileCheck -check-prefix=CHECK-STD %s From e452fe23f2a906abe7e0c495b7e08ffcad852abe Mon Sep 17 00:00:00 2001 From: Wenju He Date: Wed, 24 Jun 2026 13:21:41 +0200 Subject: [PATCH 318/511] [libclc][NFC] Fix libspirv nvptx build warnings (#22378) - warning: unused parameter 'sampl' - warning: unused parameter 'img' - warning: 'long long' is a reserved data type in OpenCL C --- libclc/libspirv/lib/nvptx/images/image_helpers.cl | 6 +++++- libclc/libspirv/lib/nvptx/synchronization/barrier.cl | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/libclc/libspirv/lib/nvptx/images/image_helpers.cl b/libclc/libspirv/lib/nvptx/images/image_helpers.cl index ae6afbffe3495..ef469a77a5484 100644 --- a/libclc/libspirv/lib/nvptx/images/image_helpers.cl +++ b/libclc/libspirv/lib/nvptx/images/image_helpers.cl @@ -20,9 +20,13 @@ typedef unsigned int uint; typedef unsigned long ulong; // Sampled image pack/unpack helpers -ulong __clc__sampled_image_unpack_image(ulong img, uint sampl) { return img; } +ulong __clc__sampled_image_unpack_image(ulong img, uint sampl) { + (void)sampl; + return img; +} uint __clc__sampled_image_unpack_sampler(ulong img, uint sampl) { + (void)img; return sampl; } diff --git a/libclc/libspirv/lib/nvptx/synchronization/barrier.cl b/libclc/libspirv/lib/nvptx/synchronization/barrier.cl index d5c98ac1cbb47..c56b9b741047c 100644 --- a/libclc/libspirv/lib/nvptx/synchronization/barrier.cl +++ b/libclc/libspirv/lib/nvptx/synchronization/barrier.cl @@ -56,12 +56,12 @@ __spirv_ControlBarrier(int scope, int memory, unsigned int env1, env2; __asm__ __volatile__("mov.u32 %0, %%envreg1;" : "=r"(env1)); __asm__ __volatile__("mov.u32 %0, %%envreg2;" : "=r"(env2)); - long long envreg1 = env1; - long long envreg2 = env2; + long envreg1 = env1; + long envreg2 = env2; // Bit field insert operation. Place 32 bits of envreg2 next to 32 bits of // envreg1: s64[envreg2][envreg1]. The resulting value is the address in // device global memory region, where atomic operations can be performed. - long long atomicAddr; + long atomicAddr; __asm__ __volatile__("bfi.b64 %0, %1, %2, 32, 32;" : "=l"(atomicAddr) : "l"(envreg1), "l"(envreg2)); From d6ec6c8e605d40371846e9157b80bc66d86949fa Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Wed, 24 Jun 2026 12:22:37 +0100 Subject: [PATCH 319/511] [VPlan] Introduce VPValue::user_empty (NFC) (#203518) --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 20 +++++----- llvm/lib/Transforms/Vectorize/VPlan.h | 2 +- .../Transforms/Vectorize/VPlanAnalysis.cpp | 4 +- .../Vectorize/VPlanConstruction.cpp | 2 +- .../Transforms/Vectorize/VPlanTransforms.cpp | 23 +++++------ llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp | 2 +- llvm/lib/Transforms/Vectorize/VPlanValue.h | 9 +++-- .../Transforms/Vectorize/VPlanTest.cpp | 40 +++++++++---------- 8 files changed, 51 insertions(+), 51 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 83333aae39cf9..5f29f329baf8c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -832,7 +832,7 @@ void VPRegionBlock::dissolveToCFGLoop() { auto *Header = cast(getEntry()); auto *ExitingLatch = cast(getExiting()); auto *CanIV = getCanonicalIV(); - if (CanIV->getNumUsers() > 0) { + if (!CanIV->user_empty()) { VPlan &Plan = *getPlan(); auto *Zero = Plan.getZero(CanIV->getType()); DebugLoc DL = CanIV->getDebugLoc(); @@ -1085,38 +1085,38 @@ bool VPlan::isOuterLoop() const { void VPlan::printLiveIns(raw_ostream &O) const { VPSlotTracker SlotTracker(this); - if (VF.getNumUsers() > 0) { + if (!VF.user_empty()) { O << "\nLive-in "; VF.printAsOperand(O, SlotTracker); O << " = VF"; } - if (UF.getNumUsers() > 0) { + if (!UF.user_empty()) { O << "\nLive-in "; UF.printAsOperand(O, SlotTracker); O << " = UF"; } - if (VFxUF.getNumUsers() > 0) { + if (!VFxUF.user_empty()) { O << "\nLive-in "; VFxUF.printAsOperand(O, SlotTracker); O << " = VF * UF"; } - if (VectorTripCount.getNumUsers() > 0) { + if (!VectorTripCount.user_empty()) { O << "\nLive-in "; VectorTripCount.printAsOperand(O, SlotTracker); O << " = vector-trip-count"; } - if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) { + if (BackedgeTakenCount && !BackedgeTakenCount->user_empty()) { O << "\nLive-in "; BackedgeTakenCount->printAsOperand(O, SlotTracker); O << " = backedge-taken count"; } O << "\n"; - if (TripCount && TripCount->getNumUsers() > 0) { + if (TripCount && !TripCount->user_empty()) { if (isa(TripCount)) O << "Live-in "; TripCount->printAsOperand(O, SlotTracker); @@ -1563,11 +1563,11 @@ void VPSlotTracker::assignName(const VPValue *V) { } void VPSlotTracker::assignNames(const VPlan &Plan) { - if (Plan.VF.getNumUsers() > 0) + if (!Plan.VF.user_empty()) assignName(&Plan.VF); - if (Plan.UF.getNumUsers() > 0) + if (!Plan.UF.user_empty()) assignName(&Plan.UF); - if (Plan.VFxUF.getNumUsers() > 0) + if (!Plan.VFxUF.user_empty()) assignName(&Plan.VFxUF); assignName(&Plan.VectorTripCount); if (Plan.BackedgeTakenCount) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 26c528a323969..f73118ac31797 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4936,7 +4936,7 @@ class VPlan { /// Resets the trip count for the VPlan. The caller must make sure all uses of /// the original trip count have been replaced. void resetTripCount(VPValue *NewTripCount) { - assert(TripCount && NewTripCount && TripCount->getNumUsers() == 0 && + assert(TripCount && NewTripCount && TripCount->user_empty() && "TripCount must be set when resetting"); TripCount = NewTripCount; } diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 54f6c602f77d2..77a33339eb5f9 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -124,7 +124,7 @@ SmallVector llvm::calculateRegisterUsageForPlan( // the loop (not including non-recipe values such as arguments and // constants). SmallSetVector LoopInvariants; - if (Plan.getVectorTripCount().getNumUsers() > 0) + if (!Plan.getVectorTripCount().user_empty()) LoopInvariants.insert(&Plan.getVectorTripCount()); // We scan the loop in a topological order in order and assign a number to @@ -198,7 +198,7 @@ SmallVector llvm::calculateRegisterUsageForPlan( VPValue *CanIV = LoopRegion->getCanonicalIV(); // Note: canonical IVs are retained even if they have no users. - if (CanIV->getNumUsers() != 0) + if (!CanIV->user_empty()) OpenIntervals.insert(CanIV); // We scan the instructions linearly and record each time that a new interval diff --git a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp index bb967d3f3daf0..619fea8c10b4d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp @@ -1767,7 +1767,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) { continue; if (auto *DerivedIV = dyn_cast(VecV)) { VPValue *DIVTC = DerivedIV->getOperand(1); - if (DerivedIV->getNumUsers() == 1 && IsTC(DIVTC)) { + if (DerivedIV->hasOneUse() && IsTC(DIVTC)) { auto *NewSel = MiddleBuilder.createSelect( AnyNaNLane, LoopRegion->getCanonicalIV(), DIVTC); DerivedIV->moveAfter(&*MiddleBuilder.getInsertPoint()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index adcfe30ff9561..0c7ede5f81593 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -492,7 +492,7 @@ static bool mergeReplicateRegionsIntoSuccessors(VPlan &Plan) { }); // Remove phi recipes that are unused after merging the regions. - if (Phi1ToMove.getVPSingleValue()->getNumUsers() == 0) { + if (Phi1ToMove.getVPSingleValue()->user_empty()) { Phi1ToMove.eraseFromParent(); continue; } @@ -548,7 +548,7 @@ static VPRegionBlock *createReplicateRegion(VPReplicateRecipe *PredRecipe, VPBlockUtils::insertTwoBlocksAfter(Pred, Exiting, Entry); VPBlockUtils::connectBlocks(Pred, Exiting); - if (PredRecipe->getNumUsers() != 0) { + if (!PredRecipe->user_empty()) { auto *PHIRecipe = new VPPredInstPHIRecipe(RecipeWithoutMask, RecipeWithoutMask->getDebugLoc()); Exiting->appendRecipe(PHIRecipe); @@ -802,8 +802,7 @@ static bool isDeadRecipe(VPRecipeBase &R) { return false; // Recipe is dead if no user keeps the recipe alive. - return all_of(R.definedValues(), - [](VPValue *V) { return V->getNumUsers() == 0; }); + return all_of(R.definedValues(), [](VPValue *V) { return V->user_empty(); }); } void VPlanTransforms::removeDeadRecipes(VPlan &Plan) { @@ -894,7 +893,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) { auto *RepR = dyn_cast(U); // Skip recipes that shouldn't be narrowed. if (!Def || !isa(Def) || - Def->getNumUsers() == 0 || !Def->getUnderlyingValue() || + Def->user_empty() || !Def->getUnderlyingValue() || (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))) continue; @@ -2063,7 +2062,7 @@ static void simplifyBlends(VPlan &Plan) { // TODO: Find the most expensive mask that can be deadcoded, or a mask // that's used by multiple blends where it can be removed from them all. VPValue *Mask = Blend->getMask(I); - if (Mask->getNumUsers() == 1 && !match(Mask, m_False())) { + if (Mask->hasOneUse() && !match(Mask, m_False())) { StartIndex = I; break; } @@ -2099,7 +2098,7 @@ static void simplifyBlends(VPlan &Plan) { NewBlend->setOperand(0, Inc1); NewBlend->setOperand(1, Inc0); NewBlend->setOperand(2, NewMask); - if (OldMask->getNumUsers() == 0) + if (OldMask->user_empty()) cast(OldMask)->eraseFromParent(); } } @@ -5322,7 +5321,7 @@ void VPlanTransforms::materializeConstantVectorTripCount( assert(Plan.hasUF(BestUF) && "BestUF is not available in Plan"); VPValue *TC = Plan.getTripCount(); - if (TC->getNumUsers() == 0) + if (TC->user_empty()) return; // Skip cases for which the trip count may be non-trivial to materialize. @@ -5351,7 +5350,7 @@ void VPlanTransforms::materializeConstantVectorTripCount( void VPlanTransforms::materializeBackedgeTakenCount(VPlan &Plan, VPBasicBlock *VectorPH) { VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); - if (BTC->getNumUsers() == 0) + if (BTC->user_empty()) return; VPBuilder Builder(VectorPH, VectorPH->begin()); @@ -5460,7 +5459,7 @@ void VPlanTransforms::materializeVectorTripCount( VPSymbolicValue &VectorTC = Plan.getVectorTripCount(); // There's nothing to do if there are no users of the vector trip count or its // IR value has already been set. - if (VectorTC.getNumUsers() == 0 || VectorTC.getUnderlyingValue()) + if (VectorTC.user_empty() || VectorTC.getUnderlyingValue()) return; VPValue *TC = Plan.getTripCount(); @@ -5540,7 +5539,7 @@ void VPlanTransforms::materializeFactors(VPlan &Plan, VPBasicBlock *VectorPH, VPValue &VFxUF = Plan.getVFxUF(); // If there are no users of the runtime VF, compute VFxUF by constant folding // the multiplication of VF and UF. - if (VF.getNumUsers() == 0) { + if (VF.user_empty()) { VPValue *RuntimeVFxUF = Builder.createElementCount(TCTy, VFEC * Plan.getConcreteUF()); VFxUF.replaceAllUsesWith(RuntimeVFxUF); @@ -5695,7 +5694,7 @@ void VPlanTransforms::expandSCEVsToVPInstructions(VPlan &Plan, // late expansion. for (VPRecipeBase &R : make_early_inc_range(*Entry)) { auto *ExpSCEV = dyn_cast(&R); - if (!ExpSCEV || ExpSCEV->getNumUsers() == 0) + if (!ExpSCEV || ExpSCEV->user_empty()) continue; Builder.setInsertPoint(ExpSCEV); VPValue *Expanded = Expander.tryToExpand(ExpSCEV->getSCEV()); diff --git a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp index ae4beb5b71874..bcd17a54a3e31 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp @@ -946,7 +946,7 @@ void VPlanTransforms::replicateByVF(VPlan &Plan, ElementCount VF) { auto *DefR = cast(&R); VPBuilder Builder(DefR); - if (DefR->getNumUsers() == 0) { + if (DefR->user_empty()) { // Create single-scalar version of DefR for all lanes. for (unsigned I = 0; I != VF.getKnownMinValue(); ++I) cloneForLane(Plan, Builder, IdxTy, DefR, VPLane(I), Def2LaneDefs); diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 8356bcb08634f..a7aa0523ad5d0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -92,7 +92,7 @@ class LLVM_ABI_FOR_TEST VPValue { VPValue &operator=(const VPValue &) = delete; virtual ~VPValue() { - assert(Users.empty() && "trying to delete a VPValue with remaining users"); + assert(user_empty() && "trying to delete a VPValue with remaining users"); } /// \return an ID for the concrete type of this object. @@ -113,7 +113,7 @@ class LLVM_ABI_FOR_TEST VPValue { void assertNotMaterialized() const; unsigned getNumUsers() const { - if (Users.empty()) + if (user_empty()) return 0; assertNotMaterialized(); return Users.size(); @@ -158,10 +158,11 @@ class LLVM_ABI_FOR_TEST VPValue { const_user_range users() const { return const_user_range(user_begin(), user_end()); } + bool user_empty() const { return Users.empty(); } // NOLINT /// Returns true if the value has more than one unique user. bool hasMoreThanOneUniqueUser() const { - if (getNumUsers() == 0) + if (user_empty()) return false; // Check if all users match the first user. @@ -523,7 +524,7 @@ class VPDef { for (VPRecipeValue *D : to_vector(DefinedValues)) { assert(D->isDefinedBy(this) && "all defined VPValues should point to the containing VPDef"); - assert(D->getNumUsers() == 0 && + assert(D->user_empty() && "all defined VPValues should have no more users"); delete D; } diff --git a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp index 723977595938f..2deb7c6c864b0 100644 --- a/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp +++ b/llvm/unittests/Transforms/Vectorize/VPlanTest.cpp @@ -197,24 +197,24 @@ TEST_F(VPInstructionTest, setOperand) { VPInstruction *I1 = new VPInstruction(Instruction::Add, {VPV1, VPV2}, VPIRFlags::getDefaultFlags(Instruction::Add)); - EXPECT_EQ(1u, VPV1->getNumUsers()); + EXPECT_TRUE(VPV1->hasOneUse()); EXPECT_EQ(I1, *VPV1->user_begin()); - EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV2->hasOneUse()); EXPECT_EQ(I1, *VPV2->user_begin()); // Replace operand 0 (VPV1) with VPV3. VPValue *VPV3 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 3)); I1->setOperand(0, VPV3); - EXPECT_EQ(0u, VPV1->getNumUsers()); - EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV1->user_empty()); + EXPECT_TRUE(VPV2->hasOneUse()); EXPECT_EQ(I1, *VPV2->user_begin()); - EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_TRUE(VPV3->hasOneUse()); EXPECT_EQ(I1, *VPV3->user_begin()); // Replace operand 1 (VPV2) with VPV3. I1->setOperand(1, VPV3); - EXPECT_EQ(0u, VPV1->getNumUsers()); - EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV1->user_empty()); + EXPECT_TRUE(VPV2->user_empty()); EXPECT_EQ(2u, VPV3->getNumUsers()); EXPECT_EQ(I1, *VPV3->user_begin()); EXPECT_EQ(I1, *std::next(VPV3->user_begin())); @@ -222,13 +222,13 @@ TEST_F(VPInstructionTest, setOperand) { // Replace operand 0 (VPV3) with VPV4. VPValue *VPV4 = getPlan().getOrAddLiveIn(ConstantInt::get(Int32, 4)); I1->setOperand(0, VPV4); - EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_TRUE(VPV3->hasOneUse()); EXPECT_EQ(I1, *VPV3->user_begin()); EXPECT_EQ(I1, *VPV4->user_begin()); // Replace operand 1 (VPV3) with VPV4. I1->setOperand(1, VPV4); - EXPECT_EQ(0u, VPV3->getNumUsers()); + EXPECT_TRUE(VPV3->user_empty()); EXPECT_EQ(I1, *VPV4->user_begin()); EXPECT_EQ(I1, *std::next(VPV4->user_begin())); @@ -248,18 +248,18 @@ TEST_F(VPInstructionTest, replaceAllUsesWith) { VPV1->replaceAllUsesWith(VPV3); EXPECT_EQ(VPV3, I1->getOperand(0)); EXPECT_EQ(VPV2, I1->getOperand(1)); - EXPECT_EQ(0u, VPV1->getNumUsers()); - EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV1->user_empty()); + EXPECT_TRUE(VPV2->hasOneUse()); EXPECT_EQ(I1, *VPV2->user_begin()); - EXPECT_EQ(1u, VPV3->getNumUsers()); + EXPECT_TRUE(VPV3->hasOneUse()); EXPECT_EQ(I1, *VPV3->user_begin()); // Replace all uses of VPV2 with VPV3. VPV2->replaceAllUsesWith(VPV3); EXPECT_EQ(VPV3, I1->getOperand(0)); EXPECT_EQ(VPV3, I1->getOperand(1)); - EXPECT_EQ(0u, VPV1->getNumUsers()); - EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV1->user_empty()); + EXPECT_TRUE(VPV2->user_empty()); EXPECT_EQ(2u, VPV3->getNumUsers()); EXPECT_EQ(I1, *VPV3->user_begin()); @@ -269,8 +269,8 @@ TEST_F(VPInstructionTest, replaceAllUsesWith) { EXPECT_EQ(VPV1, I1->getOperand(1)); EXPECT_EQ(2u, VPV1->getNumUsers()); EXPECT_EQ(I1, *VPV1->user_begin()); - EXPECT_EQ(0u, VPV2->getNumUsers()); - EXPECT_EQ(0u, VPV3->getNumUsers()); + EXPECT_TRUE(VPV2->user_empty()); + EXPECT_TRUE(VPV3->user_empty()); VPInstruction *I2 = new VPInstruction(Instruction::Add, {VPV1, VPV2}, @@ -291,15 +291,15 @@ TEST_F(VPInstructionTest, releaseOperandsAtDeletion) { new VPInstruction(Instruction::Add, {VPV1, VPV2}, VPIRFlags::getDefaultFlags(Instruction::Add)); - EXPECT_EQ(1u, VPV1->getNumUsers()); + EXPECT_TRUE(VPV1->hasOneUse()); EXPECT_EQ(I1, *VPV1->user_begin()); - EXPECT_EQ(1u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV2->hasOneUse()); EXPECT_EQ(I1, *VPV2->user_begin()); delete I1; - EXPECT_EQ(0u, VPV1->getNumUsers()); - EXPECT_EQ(0u, VPV2->getNumUsers()); + EXPECT_TRUE(VPV1->user_empty()); + EXPECT_TRUE(VPV2->user_empty()); } using VPBasicBlockTest = VPlanTestBase; From 58180b820125748552421c45d308a75a5e97e806 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Wed, 24 Jun 2026 04:25:36 -0700 Subject: [PATCH 320/511] [Allocator] Drop the fast-path null check via a sentinel End (#205485) Follow-up to #203718. Store `End` as the slab end plus 1 (and 0 for an empty or moved-from allocator). This removes one condition from the fast path. For lld/ELF SymbolTable.cpp (clang++ -O3), the inlined `make()` fast path loses its `test rax, rax; je` pair; the whole TU's .text shrinks from 14037 to 13800 bytes. Aided by Claude Opus 4.8 --- llvm/include/llvm/Support/Allocator.h | 30 ++++++++++++++------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/llvm/include/llvm/Support/Allocator.h b/llvm/include/llvm/Support/Allocator.h index f58f73227a2a9..92027cceea3bf 100644 --- a/llvm/include/llvm/Support/Allocator.h +++ b/llvm/include/llvm/Support/Allocator.h @@ -95,10 +95,11 @@ class BumpPtrAllocatorImpl // slabs as a matter of correctness. BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) : AllocTy(std::move(Old.getAllocator())), CurPtr(Old.CurPtr), - End(Old.End), Slabs(std::move(Old.Slabs)), + EndSentinel(Old.EndSentinel), Slabs(std::move(Old.Slabs)), CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { - Old.CurPtr = Old.End = nullptr; + Old.CurPtr = nullptr; + Old.EndSentinel = 0; Old.BytesAllocated = 0; Old.Slabs.clear(); Old.CustomSizedSlabs.clear(); @@ -114,14 +115,15 @@ class BumpPtrAllocatorImpl DeallocateCustomSizedSlabs(); CurPtr = RHS.CurPtr; - End = RHS.End; + EndSentinel = RHS.EndSentinel; BytesAllocated = RHS.BytesAllocated; RedZoneSize = RHS.RedZoneSize; Slabs = std::move(RHS.Slabs); CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); AllocTy::operator=(std::move(RHS.getAllocator())); - RHS.CurPtr = RHS.End = nullptr; + RHS.CurPtr = nullptr; + RHS.EndSentinel = 0; RHS.BytesAllocated = 0; RHS.Slabs.clear(); RHS.CustomSizedSlabs.clear(); @@ -141,7 +143,7 @@ class BumpPtrAllocatorImpl // Reset the state. BytesAllocated = 0; CurPtr = (char *)Slabs.front(); - End = CurPtr + SlabSize; + EndSentinel = uintptr_t(CurPtr) + SlabSize + 1; __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0)); DeallocateSlabs(std::next(Slabs.begin()), Slabs.end()); @@ -174,10 +176,9 @@ class BumpPtrAllocatorImpl assert(AllocEndPtr >= uintptr_t(CurPtr) && "Alignment + Size must not overflow"); - // Check if we have enough space. - if (LLVM_LIKELY(AllocEndPtr <= uintptr_t(End) - // We can't return nullptr even for a zero-sized allocation! - && CurPtr != nullptr)) { + // Check if we have enough space. `EndSentinel` is 0 for an empty allocator, + // so this also rejects a null CurPtr when `SizeToAllocate` is 0. + if (LLVM_LIKELY(AllocEndPtr < EndSentinel)) { CurPtr = reinterpret_cast(AllocEndPtr); // Update the allocation point of this memory block in MemorySanitizer. // Without this, MemorySanitizer messages for values originated from here @@ -214,7 +215,7 @@ class BumpPtrAllocatorImpl // Otherwise, start a new slab and try again. StartNewSlab(); uintptr_t AlignedAddr = alignAddr(CurPtr, Alignment); - assert(AlignedAddr + SizeToAllocate <= (uintptr_t)End && + assert(AlignedAddr + SizeToAllocate < EndSentinel && "Unable to allocate memory!"); char *AlignedPtr = (char*)AlignedAddr; CurPtr = AlignedPtr + SizeToAllocate; @@ -324,8 +325,9 @@ class BumpPtrAllocatorImpl /// This points to the next free byte in the slab. char *CurPtr = nullptr; - /// The end of the current slab. - char *End = nullptr; + /// One past the slab end (0 when there is no slab). +1 is so that the fast + /// path condition also rejects a empty allocator with a 0-size allocation. + uintptr_t EndSentinel = 0; /// The slabs allocated so far. SmallVector Slabs; @@ -352,7 +354,7 @@ class BumpPtrAllocatorImpl } /// Allocate a new slab and move the bump pointers over into the new - /// slab, modifying CurPtr and End. + /// slab, modifying CurPtr and EndSentinel. void StartNewSlab() { size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); @@ -364,7 +366,7 @@ class BumpPtrAllocatorImpl Slabs.push_back(NewSlab); CurPtr = (char *)(NewSlab); - End = ((char *)NewSlab) + AllocatedSlabSize; + EndSentinel = uintptr_t(NewSlab) + AllocatedSlabSize + 1; } /// Deallocate a sequence of slabs. From b35195b93e67574f2e461007397563db7519bbed Mon Sep 17 00:00:00 2001 From: Abid Qadeer Date: Wed, 24 Jun 2026 12:37:29 +0100 Subject: [PATCH 321/511] [flang][OpenMP][NFC] Hoist variant match-info construction into Semantics (#204387) Replace the lowering-only `makeVariantMatchInfo` helper with a single shared `semantics::omp::MakeVariantMatchInfo`. It builds the VariantMatchInfo from a parsed context selector and returns the optional non-constant user condition (as before). Update metadirective lowering to use it and drop the duplicated Lower/OpenMP copy. Selector features that variant selection cannot yet honour (target_device selectors, and clause/extension trait properties) are not match-info concerns, so they are kept out of `MakeVariantMatchInfo`. Detection lives in a separate, pure helper `FindUnsupportedSelectorFeature`; the caller diagnoses the feature in its own terms (metadirective lowering emits a TODO) before building the match info. `MakeVariantMatchInfo` checks the precondition. NFC for metadirective. Co-authored-by: Cursor --------- Co-authored-by: Cursor --- flang/include/flang/Semantics/openmp-utils.h | 40 +++++++ flang/lib/Lower/OpenMP/OpenMP.cpp | 32 ++++-- flang/lib/Lower/OpenMP/Utils.cpp | 111 ------------------- flang/lib/Lower/OpenMP/Utils.h | 19 +--- flang/lib/Semantics/openmp-utils.cpp | 107 ++++++++++++++++++ 5 files changed, 173 insertions(+), 136 deletions(-) diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index e0358eafe487c..d2bfeca68bf84 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -163,6 +163,46 @@ std::optional GetLogicalArgument( std::optional IsContiguous( SemanticsContext &semaCtx, const parser::OmpObject &object); +/// Non-constant user condition expression and source for runtime lowering. +struct DynamicUserCondition { + const parser::ScalarExpr *expr; + parser::CharBlock source; +}; + +/// A context-selector feature that variant matching accepts syntactically but +/// cannot yet honour during selection. Callers are expected to diagnose these +/// (a lowering \c TODO or a semantic error) before calling +/// \c MakeVariantMatchInfo, which asserts none are present. +enum class UnsupportedSelectorFeature { + None, + /// A `target_device={...}` selector set. + TargetDevice, + /// A clause property (e.g. \c simdlen(8) in \c construct={simd(simdlen(8))}) + /// or an extension property (e.g. \c foo(bar) in + /// \c implementation={my_trait(foo(bar))}). + ClauseOrExtensionProperty, +}; + +/// Scan a parsed context selector for the first feature that variant matching +/// cannot yet honour (see \c UnsupportedSelectorFeature). Pure detection: emits +/// no diagnostics and has no side effects on any match info. +UnsupportedSelectorFeature FindUnsupportedSelectorFeature( + const parser::traits::OmpContextSelectorSpecification &ctxSel, + SemanticsContext &semaCtx); + +/// Populate \p vmi from a parsed context selector. Score modifiers are +/// honoured (including on `condition(...)` selectors). Constant user +/// conditions are folded into user_condition_true/false traits; a non-constant +/// user condition is recorded as user_condition_unknown and the first such +/// expression is returned for the caller to lower as a runtime condition. +/// +/// The caller must first reject unsupported selector features (see +/// \c FindUnsupportedSelectorFeature); this function asserts none are present. +std::optional MakeVariantMatchInfo( + llvm::omp::VariantMatchInfo &vmi, + const parser::traits::OmpContextSelectorSpecification &ctxSel, + SemanticsContext &semaCtx); + std::vector GetTopLevelDesignators(const SomeExpr &expr); const SomeExpr *HasStorageOverlap( const SomeExpr &base, llvm::ArrayRef exprs); diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 094cec737d481..4b83ac68ebf44 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -4786,18 +4786,18 @@ static void genOMP(lower::AbstractConverter &converter, lower::SymMap &symTable, namespace { struct MetadirectiveCandidate { - MetadirectiveCandidate( - const parser::OmpDirectiveSpecification *spec, - llvm::omp::VariantMatchInfo vmi, bool isExplicit, - std::optional dynamicCond = std::nullopt, - bool conditionShouldBeTrue = true) + MetadirectiveCandidate(const parser::OmpDirectiveSpecification *spec, + llvm::omp::VariantMatchInfo vmi, bool isExplicit, + std::optional + dynamicCond = std::nullopt, + bool conditionShouldBeTrue = true) : spec(spec), vmi(vmi), isExplicit(isExplicit), dynamicCond(dynamicCond), conditionShouldBeTrue(conditionShouldBeTrue) {} const parser::OmpDirectiveSpecification *spec = nullptr; llvm::omp::VariantMatchInfo vmi; bool isExplicit = false; - std::optional dynamicCond; + std::optional dynamicCond; bool conditionShouldBeTrue = true; }; } // namespace @@ -4855,9 +4855,25 @@ static void genMetadirective(lower::AbstractConverter &converter, const auto &ctxSel = getContextSelector(*whenClause); auto [spec, isExplicit] = getDirectiveVariant(*whenClause); + // METADIRECTIVE cannot yet honour some selector features that are + // otherwise accepted; reject them before building the match info. + switch (semantics::omp::FindUnsupportedSelectorFeature(ctxSel, semaCtx)) { + case semantics::omp::UnsupportedSelectorFeature::TargetDevice: + TODO(converter.genLocation(clause.source), + "target_device selector in METADIRECTIVE"); + break; + case semantics::omp::UnsupportedSelectorFeature:: + ClauseOrExtensionProperty: + TODO(converter.genLocation(clause.source), + "clause or extension trait matching in METADIRECTIVE"); + break; + case semantics::omp::UnsupportedSelectorFeature::None: + break; + } + llvm::omp::VariantMatchInfo rawVMI; - std::optional dynamicCond = makeVariantMatchInfo( - rawVMI, ctxSel, semaCtx, converter.genLocation(clause.source)); + std::optional dynamicCond = + semantics::omp::MakeVariantMatchInfo(rawVMI, ctxSel, semaCtx); if (dynamicCond) { constexpr llvm::omp::TraitProperty dynamicConditionTrait = diff --git a/flang/lib/Lower/OpenMP/Utils.cpp b/flang/lib/Lower/OpenMP/Utils.cpp index 7a532e10f1a1e..382292b6c6c13 100644 --- a/flang/lib/Lower/OpenMP/Utils.cpp +++ b/flang/lib/Lower/OpenMP/Utils.cpp @@ -1286,117 +1286,6 @@ mlir::Value genIteratorCoordinate(Fortran::lower::AbstractConverter &converter, /*typeparams=*/mlir::ValueRange{}); } -/// Collect trait property names (vendor, kind, arch, isa, etc.) into a VMI. -static void processTraitProperties( - llvm::omp::VariantMatchInfo &vmi, llvm::omp::TraitSet set, - llvm::omp::TraitSelector selector, - const std::optional &props, - llvm::APInt *scorePtr, mlir::Location loc) { - if (!props) - return; - - for (const auto &prop : - std::get>(props->t)) { - const auto *name = std::get_if(&prop.u); - // Clause properties and extension properties (e.g. `simdlen(8)` in - // `construct={simd(simdlen(8))}`) and `foo(bar)` in - // `implementation={my_trait(foo(bar))}` are not matched yet. - if (!name) - TODO(loc, "clause or extension trait matching in METADIRECTIVE"); - } - semantics::omp::ProcessTraitProperties(vmi, set, selector, props, scorePtr); -} - -/// Process user={condition(...)} trait properties. Constant conditions are -/// resolved to user_condition_true/false. Non-constant conditions are marked -/// as user_condition_unknown and returned for later use in fir.if lowering. -static std::optional processUserConditionTrait( - llvm::omp::VariantMatchInfo &vmi, - const std::optional &props, - semantics::SemanticsContext &semaCtx, llvm::APInt *scorePtr) { - std::optional dynamicCond; - if (!props) - return dynamicCond; - - for (const auto &prop : - std::get>(props->t)) { - const auto *scalarExpr = std::get_if(&prop.u); - if (!scalarExpr) - continue; - - if (auto constValue = - semantics::omp::EvaluateUserCondition(semaCtx, *scalarExpr)) { - vmi.addTrait(*constValue ? llvm::omp::TraitProperty::user_condition_true - : llvm::omp::TraitProperty::user_condition_false, - "", scorePtr); - continue; - } - - dynamicCond = DynamicUserCondition{scalarExpr, prop.source}; - vmi.addTrait(llvm::omp::TraitProperty::user_condition_unknown, - "", scorePtr); - } - - return dynamicCond; -} - -/// Populate a VariantMatchInfo from context selector. -/// For user conditions, attempts constant folding. Non-constant conditions -/// are recorded as user_condition_unknown and returned for later use in -/// fir.if lowering. -std::optional -makeVariantMatchInfo(llvm::omp::VariantMatchInfo &vmi, - const parser::modifier::OmpContextSelector &ctxSel, - semantics::SemanticsContext &semaCtx, mlir::Location loc) { - std::optional dynamicCond; - - for (const auto &traitSet : ctxSel.v) { - using TSSName = parser::OmpTraitSetSelectorName; - auto setName = std::get(traitSet.t).v; - llvm::omp::TraitSet set = semantics::omp::MapTraitSet(setName); - - for (const auto &trait : - std::get>(traitSet.t)) { - const auto &selectorName = - std::get(trait.t); - llvm::omp::TraitSelector selector = - semantics::omp::MapTraitSelector(selectorName, set); - const auto &props = - std::get>( - trait.t); - - // target_device selectors require runtime target device queries not yet - // supported. - if (set == llvm::omp::TraitSet::target_device) - TODO(loc, "target_device selector in METADIRECTIVE"); - - std::optional score; - llvm::APInt *scorePtr = - semantics::omp::GetTraitScore(props, semaCtx, score); - - if (selector == llvm::omp::TraitSelector::user_condition) { - if (std::optional userCond = - processUserConditionTrait(vmi, props, semaCtx, scorePtr)) - dynamicCond = userCond; - continue; - } - - processTraitProperties(vmi, set, selector, props, scorePtr, loc); - - if (props || set != llvm::omp::TraitSet::construct) - continue; - - // Construct traits with no properties: the selector is the property. - llvm::omp::TraitProperty propKind = - llvm::omp::getOpenMPContextTraitPropertyForSelector(selector); - if (propKind != llvm::omp::TraitProperty::invalid) - vmi.addTrait(set, propKind, selectorName.ToString(), scorePtr); - } - } - - return dynamicCond; -} - // --------------------------------------------------------------------------- // FlangOMPContext — shared OMPContext for metadirective variant-matching // --------------------------------------------------------------------------- diff --git a/flang/lib/Lower/OpenMP/Utils.h b/flang/lib/Lower/OpenMP/Utils.h index e7f477ff44739..efe6c963a3778 100644 --- a/flang/lib/Lower/OpenMP/Utils.h +++ b/flang/lib/Lower/OpenMP/Utils.h @@ -258,27 +258,12 @@ std::optional> getIteratorElementIndices( /// Walk the already-emitted MLIR parent operations starting from \p op and /// collect the implied OpenMP construct traits in outermost-to-innermost -/// order. Used by metadirective lowering to build the `ConstructTraits` of an -/// `OMPContext`. +/// order. Used by metadirective lowering and declare-variant call resolution +/// to build the `ConstructTraits` of an `OMPContext`. void collectEnclosingConstructTraits( mlir::Operation *op, llvm::SmallVectorImpl &constructTraits); -/// Non-constant user condition expression and source for runtime lowering. -struct DynamicUserCondition { - const parser::ScalarExpr *expr; - parser::CharBlock source; -}; - -/// Populate \p vmi from a parsed OpenMP context selector. Constant user -/// conditions are folded into user_condition_true/false traits. A non-constant -/// user condition is recorded as user_condition_unknown and returned for later -/// lowering as a runtime condition. -std::optional -makeVariantMatchInfo(llvm::omp::VariantMatchInfo &vmi, - const parser::modifier::OmpContextSelector &ctxSel, - semantics::SemanticsContext &semaCtx, mlir::Location loc); - /// `OMPContext` flavour used by Flang's OpenMP variant matching. Adds an /// ISA-trait override based on the module's target-features attribute. class FlangOMPContext final : public llvm::omp::OMPContext { diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index b02ef81176a63..1aa27a5fe6074 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -2230,4 +2230,111 @@ void ProcessTraitProperties(llvm::omp::VariantMatchInfo &vmi, } } +UnsupportedSelectorFeature FindUnsupportedSelectorFeature( + const parser::traits::OmpContextSelectorSpecification &ctxSel, + SemanticsContext &semaCtx) { + for (const parser::OmpTraitSetSelector &traitSet : ctxSel.v) { + using TSSName = parser::OmpTraitSetSelectorName; + auto setName{std::get(traitSet.t).v}; + if (MapTraitSet(setName) == llvm::omp::TraitSet::target_device) { + return UnsupportedSelectorFeature::TargetDevice; + } + + for (const parser::OmpTraitSelector &selector : + std::get>(traitSet.t)) { + const auto &props{ + std::get>( + selector.t)}; + if (!props) { + continue; + } + for (const auto &prop : + std::get>(props->t)) { + if (std::holds_alternative>( + prop.u) || + std::holds_alternative(prop.u)) { + return UnsupportedSelectorFeature::ClauseOrExtensionProperty; + } + } + } + } + return UnsupportedSelectorFeature::None; +} + +static void AddTraitPropertiesFromSelector(llvm::omp::TraitSet set, + const parser::OmpTraitSelector &selector, llvm::omp::VariantMatchInfo &vmi, + SemanticsContext &semaCtx, + std::optional &dynamicCond) { + const auto &traitName{std::get(selector.t)}; + const auto &props{ + std::get>( + selector.t)}; + + std::optional scoreStorage; + llvm::APInt *scorePtr{GetTraitScore(props, semaCtx, scoreStorage)}; + + // user={condition(...)}: constant-fold to user_condition_true/false. A + // non-constant expression is recorded as user_condition_unknown and the + // first such expression is captured for later runtime lowering. + llvm::omp::TraitSelector selectorKind{MapTraitSelector(traitName, set)}; + if (selectorKind == llvm::omp::TraitSelector::user_condition) { + if (!props) { + return; + } + for (const auto &prop : + std::get>(props->t)) { + const auto *scalarExpr{std::get_if(&prop.u)}; + if (!scalarExpr) { + continue; + } + if (auto constValue{EvaluateUserCondition(semaCtx, *scalarExpr)}) { + vmi.addTrait(set, + *constValue ? llvm::omp::TraitProperty::user_condition_true + : llvm::omp::TraitProperty::user_condition_false, + "", scorePtr); + continue; + } + if (!dynamicCond) { + dynamicCond = DynamicUserCondition{scalarExpr, prop.source}; + } + vmi.addTrait(set, llvm::omp::TraitProperty::user_condition_unknown, + "", scorePtr); + } + return; + } + + ProcessTraitProperties(vmi, set, selectorKind, props, scorePtr); + + if (props || set != llvm::omp::TraitSet::construct) { + return; + } + + // Construct trait selector with no properties (e.g. `construct={simd}`): + // the selector itself implies the property. + llvm::omp::TraitProperty propKind{ + llvm::omp::getOpenMPContextTraitPropertyForSelector(selectorKind)}; + if (propKind != llvm::omp::TraitProperty::invalid) { + vmi.addTrait(set, propKind, traitName.ToString(), scorePtr); + } +} + +std::optional MakeVariantMatchInfo( + llvm::omp::VariantMatchInfo &vmi, + const parser::traits::OmpContextSelectorSpecification &ctxSel, + SemanticsContext &semaCtx) { + CHECK(FindUnsupportedSelectorFeature(ctxSel, semaCtx) == + UnsupportedSelectorFeature::None); + std::optional dynamicCond; + for (const parser::OmpTraitSetSelector &traitSet : ctxSel.v) { + using TSSName = parser::OmpTraitSetSelectorName; + auto setName{std::get(traitSet.t).v}; + llvm::omp::TraitSet set{MapTraitSet(setName)}; + + for (const parser::OmpTraitSelector &selector : + std::get>(traitSet.t)) { + AddTraitPropertiesFromSelector(set, selector, vmi, semaCtx, dynamicCond); + } + } + return dynamicCond; +} } // namespace Fortran::semantics::omp From 9e4042b4a725b77e256686489fa614bcbaabbccb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Benics?= Date: Wed, 24 Jun 2026 13:42:42 +0200 Subject: [PATCH 322/511] [analyzer][NFC] Take BugReport descriptions as Twine instead of StringRef (#205527) The constructors of `BugReport`, `BasicBugReport`, and `PathSensitiveBugReport` previously took the description (and short description) as `StringRef`. The base class always copies into a `std::string` member regardless, so taking `const llvm::Twine &` is strictly more flexible at no storage cost: callers can keep passing string literals, `StringRef`, `std::string`, `SmallString::str()`, or `formatv(...).str()` exactly as before, and now they can also pass a `Twine` concatenation directly without first materializing a temporary through `SmallString` + `raw_svector_ostream` or `+`/`formatv`. Assisted-By: claude --- .../Core/BugReporter/BugReporter.h | 30 ++++++++------- .../Checkers/CXXDeleteChecker.cpp | 16 ++++---- .../Checkers/MacOSKeychainAPIChecker.cpp | 37 ++++++++----------- .../Checkers/NonNullParamChecker.cpp | 14 +++---- .../Checkers/UndefCapturedBlockVarChecker.cpp | 17 +++------ .../Checkers/UndefinedNewArraySizeChecker.cpp | 9 +---- .../Checkers/UnixAPIChecker.cpp | 10 ++--- .../Checkers/VLASizeChecker.cpp | 12 +++--- clang/lib/StaticAnalyzer/Core/BugReporter.cpp | 2 +- 9 files changed, 61 insertions(+), 86 deletions(-) diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h index 6d2de7a27608c..51c54151ac07b 100644 --- a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h +++ b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporter.h @@ -33,6 +33,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/ADT/ilist.h" #include "llvm/ADT/ilist_node.h" #include "llvm/ADT/iterator_range.h" @@ -132,13 +133,13 @@ class BugReport { SmallVector, 4> Notes; SmallVector Fixits; - BugReport(Kind kind, const BugType &bt, StringRef desc) + BugReport(Kind kind, const BugType &bt, const llvm::Twine &desc) : BugReport(kind, bt, "", desc) {} - BugReport(Kind K, const BugType &BT, StringRef ShortDescription, - StringRef Description) - : K(K), BT(BT), ShortDescription(ShortDescription), - Description(Description) {} + BugReport(Kind K, const BugType &BT, const llvm::Twine &ShortDescription, + const llvm::Twine &Description) + : K(K), BT(BT), ShortDescription(ShortDescription.str()), + Description(Description.str()) {} public: virtual ~BugReport() = default; @@ -252,11 +253,12 @@ class BasicBugReport : public BugReport { const Decl *DeclWithIssue = nullptr; public: - BasicBugReport(const BugType &bt, StringRef desc, PathDiagnosticLocation l) + BasicBugReport(const BugType &bt, const llvm::Twine &desc, + PathDiagnosticLocation l) : BugReport(Kind::Basic, bt, desc), Location(l) {} - BasicBugReport(const BugType &BT, StringRef ShortDesc, StringRef Desc, - PathDiagnosticLocation L) + BasicBugReport(const BugType &BT, const llvm::Twine &ShortDesc, + const llvm::Twine &Desc, PathDiagnosticLocation L) : BugReport(Kind::Basic, BT, ShortDesc, Desc), Location(L) {} static bool classof(const BugReport *R) { @@ -369,12 +371,12 @@ class PathSensitiveBugReport : public BugReport { StackHints; public: - PathSensitiveBugReport(const BugType &bt, StringRef desc, + PathSensitiveBugReport(const BugType &bt, const llvm::Twine &desc, const ExplodedNode *errorNode) : PathSensitiveBugReport(bt, desc, desc, errorNode) {} - PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc, - const ExplodedNode *errorNode) + PathSensitiveBugReport(const BugType &bt, const llvm::Twine &shortDesc, + const llvm::Twine &desc, const ExplodedNode *errorNode) : PathSensitiveBugReport(bt, shortDesc, desc, errorNode, /*LocationToUnique*/ {}, /*DeclToUnique*/ nullptr) {} @@ -386,15 +388,15 @@ class PathSensitiveBugReport : public BugReport { /// to the user. This method allows to rest the location which should be used /// for uniquing reports. For example, memory leaks checker, could set this to /// the allocation site, rather then the location where the bug is reported. - PathSensitiveBugReport(const BugType &bt, StringRef desc, + PathSensitiveBugReport(const BugType &bt, const llvm::Twine &desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique) : PathSensitiveBugReport(bt, desc, desc, errorNode, LocationToUnique, DeclToUnique) {} - PathSensitiveBugReport(const BugType &bt, StringRef shortDesc, StringRef desc, - const ExplodedNode *errorNode, + PathSensitiveBugReport(const BugType &bt, const llvm::Twine &shortDesc, + const llvm::Twine &desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique); diff --git a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp index bfab91dd67919..0bc628eaabe8c 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CXXDeleteChecker.cpp @@ -156,19 +156,17 @@ void CXXArrayDeleteChecker::checkTypedDeleteExpr( if (!N) return; - SmallString<256> Buf; - llvm::raw_svector_ostream OS(Buf); - QualType SourceType = BaseClassRegion->getValueType(); QualType TargetType = DerivedClassRegion->getSymbol()->getType()->getPointeeType(); - OS << "Deleting an array of '" << TargetType.getAsString() - << "' objects as their base class '" - << SourceType.getAsString(C.getASTContext().getPrintingPolicy()) - << "' is undefined"; - - auto R = std::make_unique(BT, OS.str(), N); + auto R = std::make_unique( + BT, + "Deleting an array of '" + Twine(TargetType.getAsString()) + + "' objects as their base class '" + + SourceType.getAsString(C.getASTContext().getPrintingPolicy()) + + "' is undefined", + N); // Mark region of problematic base class for later use in the BugVisitor. R->markInteresting(BaseClassRegion); diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp index 5d4a8b6b24766..02794da032f2f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp @@ -19,8 +19,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -226,14 +224,14 @@ void MacOSKeychainAPIChecker:: if (!N) return; - SmallString<80> sbuf; - llvm::raw_svector_ostream os(sbuf); unsigned int PDeallocIdx = - FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx; + FunctionsToTrack[AP.second->AllocatorIdx].DeallocatorIdx; - os << "Deallocator doesn't match the allocator: '" - << FunctionsToTrack[PDeallocIdx].Name << "' should be used."; - auto Report = std::make_unique(BT, os.str(), N); + auto Report = std::make_unique( + BT, + "Deallocator doesn't match the allocator: '" + + Twine(FunctionsToTrack[PDeallocIdx].Name) + "' should be used.", + N); Report->addVisitor(std::make_unique(AP.first)); Report->addRange(ArgExpr->getSourceRange()); markInteresting(Report.get(), AP); @@ -269,14 +267,13 @@ void MacOSKeychainAPIChecker::checkPreStmt(const CallExpr *CE, ExplodedNode *N = C.generateNonFatalErrorNode(State); if (!N) return; - SmallString<128> sbuf; - llvm::raw_svector_ostream os(sbuf); unsigned int DIdx = FunctionsToTrack[AS->AllocatorIdx].DeallocatorIdx; - os << "Allocated data should be released before another call to " - << "the allocator: missing a call to '" - << FunctionsToTrack[DIdx].Name - << "'."; - auto Report = std::make_unique(BT, os.str(), N); + auto Report = std::make_unique( + BT, + "Allocated data should be released before another call to " + "the allocator: missing a call to '" + + Twine(FunctionsToTrack[DIdx].Name) + "'.", + N); Report->addVisitor(std::make_unique(V)); Report->addRange(ArgExpr->getSourceRange()); Report->markInteresting(AS->Region); @@ -463,10 +460,6 @@ std::unique_ptr MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport( const AllocationPair &AP, ExplodedNode *N, CheckerContext &C) const { const ADFunctionInfo &FI = FunctionsToTrack[AP.second->AllocatorIdx]; - SmallString<70> sbuf; - llvm::raw_svector_ostream os(sbuf); - os << "Allocated data is not released: missing a call to '" - << FunctionsToTrack[FI.DeallocatorIdx].Name << "'."; // Most bug reports are cached at the location where they occurred. // With leaks, we want to unique them by the location where they were @@ -480,8 +473,10 @@ MacOSKeychainAPIChecker::generateAllocatedDataNotReleasedReport( AllocStmt, C.getSourceManager(), AllocNode->getStackFrame()); auto Report = std::make_unique( - BT, os.str(), N, LocUsedForUniqueing, - AllocNode->getStackFrame()->getDecl()); + BT, + "Allocated data is not released: missing a call to '" + + Twine(FunctionsToTrack[FI.DeallocatorIdx].Name) + "'.", + N, LocUsedForUniqueing, AllocNode->getStackFrame()->getDecl()); Report->addVisitor(std::make_unique(AP.first)); markInteresting(Report.get(), AP); diff --git a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp index 4b55c7c49caa8..2cc633fa5649f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/NonNullParamChecker.cpp @@ -22,7 +22,6 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" -#include "llvm/ADT/StringExtras.h" using namespace clang; using namespace ento; @@ -280,14 +279,11 @@ std::unique_ptr NonNullParamChecker::genReportNullAttrNonNull(const ExplodedNode *ErrorNode, const Expr *ArgE, unsigned IdxOfArg) const { - llvm::SmallString<256> SBuf; - llvm::raw_svector_ostream OS(SBuf); - OS << "Null pointer passed to " - << IdxOfArg << llvm::getOrdinalSuffix(IdxOfArg) - << " parameter expecting 'nonnull'"; - - auto R = - std::make_unique(BTAttrNonNull, SBuf, ErrorNode); + auto R = std::make_unique( + BTAttrNonNull, + "Null pointer passed to " + Twine(IdxOfArg) + + llvm::getOrdinalSuffix(IdxOfArg) + " parameter expecting 'nonnull'", + ErrorNode); if (ArgE) bugreporter::trackExpressionValue(ErrorNode, ArgE, *R); diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp index 2839ef0b6d2e6..5de7daae1b10f 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefCapturedBlockVarChecker.cpp @@ -10,15 +10,13 @@ // //===----------------------------------------------------------------------===// -#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/AST/Attr.h" +#include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" #include "clang/StaticAnalyzer/Core/Checker.h" #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -70,14 +68,11 @@ UndefCapturedBlockVarChecker::checkPostStmt(const BlockExpr *BE, if (std::optional V = state->getSVal(Var.getOriginalRegion()).getAs()) { if (ExplodedNode *N = C.generateErrorNode()) { - // Generate a bug report. - SmallString<128> buf; - llvm::raw_svector_ostream os(buf); - - os << "Variable '" << VD->getName() - << "' is uninitialized when captured by block"; - - auto R = std::make_unique(BT, os.str(), N); + auto R = std::make_unique( + BT, + "Variable '" + Twine(VD->getName()) + + "' is uninitialized when captured by block", + N); if (const Expr *Ex = FindBlockDeclRefExpr(BE->getBody(), VD)) R->addRange(Ex->getSourceRange()); bugreporter::trackStoredValue(*V, VR, *R, diff --git a/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp index f053ee887a1aa..dcab55a7e370d 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UndefinedNewArraySizeChecker.cpp @@ -55,13 +55,8 @@ void UndefinedNewArraySizeChecker::HandleUndefinedArrayElementCount( CheckerContext &C, SVal ArgVal, const Expr *Init, SourceRange Range) const { if (ExplodedNode *N = C.generateErrorNode()) { - - SmallString<100> buf; - llvm::raw_svector_ostream os(buf); - - os << "Element count in new[] is a garbage value"; - - auto R = std::make_unique(BT, os.str(), N); + auto R = std::make_unique( + BT, "Element count in new[] is a garbage value", N); R->markInteresting(ArgVal); R->addRange(Range); bugreporter::trackExpressionValue(N, Init, *R); diff --git a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp index 4df751d203973..e51a74f725975 100644 --- a/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/UnixAPIChecker.cpp @@ -22,8 +22,6 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerHelpers.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -480,11 +478,9 @@ bool UnixAPIPortabilityChecker::ReportZeroByteAllocation( if (!N) return false; - SmallString<256> S; - llvm::raw_svector_ostream os(S); - os << "Call to '" << fn_name << "' has an allocation size of 0 bytes"; - auto report = - std::make_unique(BT_mallocZero, os.str(), N); + auto report = std::make_unique( + BT_mallocZero, + "Call to '" + Twine(fn_name) + "' has an allocation size of 0 bytes", N); report->addRange(arg->getSourceRange()); bugreporter::trackExpressionValue(N, arg, *report); diff --git a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp index d5c91a20d60b2..1d82ac0f7225a 100644 --- a/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/VLASizeChecker.cpp @@ -21,7 +21,6 @@ #include "clang/StaticAnalyzer/Core/CheckerManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" -#include "llvm/Support/raw_ostream.h" #include using namespace clang; @@ -213,12 +212,11 @@ void VLASizeChecker::reportTaintBug(const Expr *SizeE, ProgramStateRef State, if (!N) return; - SmallString<256> buf; - llvm::raw_svector_ostream os(buf); - os << "Declared variable-length array (VLA) "; - os << "has tainted (attacker controlled) size that can be 0 or negative"; - - auto report = std::make_unique(TaintBT, os.str(), N); + auto report = std::make_unique( + TaintBT, + "Declared variable-length array (VLA) has tainted (attacker controlled) " + "size that can be 0 or negative", + N); report->addRange(SizeE->getSourceRange()); bugreporter::trackExpressionValue(N, SizeE, *report); // The vla size may be a complex expression where multiple memory locations diff --git a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp index b6c709963501f..b609a98b0aed2 100644 --- a/clang/lib/StaticAnalyzer/Core/BugReporter.cpp +++ b/clang/lib/StaticAnalyzer/Core/BugReporter.cpp @@ -2154,7 +2154,7 @@ LLVM_ATTRIBUTE_USED static bool isHidden(const CheckerRegistryData &Registry, } PathSensitiveBugReport::PathSensitiveBugReport( - const BugType &bt, StringRef shortDesc, StringRef desc, + const BugType &bt, const llvm::Twine &shortDesc, const llvm::Twine &desc, const ExplodedNode *errorNode, PathDiagnosticLocation LocationToUnique, const Decl *DeclToUnique) : BugReport(Kind::PathSensitive, bt, shortDesc, desc), ErrorNode(errorNode), From 666bb76ece09e604d40366bbdf1e75b642394d3f Mon Sep 17 00:00:00 2001 From: "forking-google-bazel-bot[bot]" <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com> Date: Wed, 24 Jun 2026 07:48:29 -0400 Subject: [PATCH 323/511] [Bazel] Fixes 25ae6ce (#205501) This fixes 25ae6ce4801f6f6addae5079323870d4191e7531. Co-authored-by: Google Bazel Bot --- .../libc/test/src/stdlib/BUILD.bazel | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel index 8d30869056ec7..8e7d6b40e2fdf 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/stdlib/BUILD.bazel @@ -150,10 +150,19 @@ libc_test( ], ) +libc_test_library( + name = "qsort_r_test_helper", + hdrs = ["QsortReentrantTest.h"], + deps = [ + "//libc/test/UnitTest:LibcUnitTest", + ], +) + libc_test( name = "qsort_r_test", srcs = ["qsort_r_test.cpp"], deps = [ + ":qsort_r_test_helper", "//libc:qsort_r", "//libc:types_size_t", ], From b3f5e7bb021ee92f5615a7545ed49f09126c090b Mon Sep 17 00:00:00 2001 From: dklochkov-intel Date: Wed, 24 Jun 2026 13:54:05 +0200 Subject: [PATCH 324/511] [SYCL][Doc] Move FP8 docs from proposed to experimental (#22403) CRI FP8 already implemented, PR is merged into sycl branch --- .../sycl_ext_oneapi_fp8.asciidoc | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) rename sycl/doc/extensions/{proposed => experimental}/sycl_ext_oneapi_fp8.asciidoc (99%) diff --git a/sycl/doc/extensions/proposed/sycl_ext_oneapi_fp8.asciidoc b/sycl/doc/extensions/experimental/sycl_ext_oneapi_fp8.asciidoc similarity index 99% rename from sycl/doc/extensions/proposed/sycl_ext_oneapi_fp8.asciidoc rename to sycl/doc/extensions/experimental/sycl_ext_oneapi_fp8.asciidoc index 4dc16b87f7acb..7bfffae4cfd13 100644 --- a/sycl/doc/extensions/proposed/sycl_ext_oneapi_fp8.asciidoc +++ b/sycl/doc/extensions/experimental/sycl_ext_oneapi_fp8.asciidoc @@ -53,14 +53,11 @@ This extension also depends on the following other SYCL extensions: == Status -This is a proposed extension specification, intended to gather community -feedback. -Interfaces defined in this specification may not be implemented yet or may be in -a preliminary state. -The specification itself may also change in incompatible ways before it is -finalized. -*Shipping software products should not rely on APIs defined in this -specification.* +This is an experimental extension specification, intended to provide early +access to features and gather community feedback. Interfaces defined in this +specification are implemented in DPC++, but they are not finalized and may +change incompatibly in future versions of DPC++ without prior notice. +*Shipping software products should not rely on APIs defined in this specification.* == Overview From e92bf51810d18defaf572d017e02ae77b5b9fc23 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 14:14:15 +0200 Subject: [PATCH 325/511] [libc++] Move constexpr/explicit macros to <__configuration/language.h> (#205535) These macros are essentially a property of the language mode we're in, so move them to `<__configuration/language.h>`. --- libcxx/include/__config | 36 ----------------------- libcxx/include/__configuration/language.h | 36 +++++++++++++++++++++++ 2 files changed, 36 insertions(+), 36 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index fdd0558fbec6f..714cd0fd26b36 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -238,42 +238,6 @@ typedef __char32_t char32_t; # define _LIBCPP_DIAGNOSE_DEPRECATED_HEADERS 0 # endif -# if _LIBCPP_STD_VER <= 11 -# define _LIBCPP_EXPLICIT_SINCE_CXX14 -# else -# define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit -# endif - -# if _LIBCPP_STD_VER >= 14 -# define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr -# else -# define _LIBCPP_CONSTEXPR_SINCE_CXX14 -# endif - -# if _LIBCPP_STD_VER >= 17 -# define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr -# else -# define _LIBCPP_CONSTEXPR_SINCE_CXX17 -# endif - -# if _LIBCPP_STD_VER >= 20 -# define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr -# else -# define _LIBCPP_CONSTEXPR_SINCE_CXX20 -# endif - -# if _LIBCPP_STD_VER >= 23 -# define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr -# else -# define _LIBCPP_CONSTEXPR_SINCE_CXX23 -# endif - -# if _LIBCPP_STD_VER >= 26 -# define _LIBCPP_CONSTEXPR_SINCE_CXX26 constexpr -# else -# define _LIBCPP_CONSTEXPR_SINCE_CXX26 -# endif - // Thread API // clang-format off # if _LIBCPP_HAS_THREADS && \ diff --git a/libcxx/include/__configuration/language.h b/libcxx/include/__configuration/language.h index 3137ba2ea27ef..1205934334852 100644 --- a/libcxx/include/__configuration/language.h +++ b/libcxx/include/__configuration/language.h @@ -56,4 +56,40 @@ # define _LIBCPP_HAS_CHAR8_T 1 #endif +#if _LIBCPP_STD_VER <= 11 +# define _LIBCPP_EXPLICIT_SINCE_CXX14 +#else +# define _LIBCPP_EXPLICIT_SINCE_CXX14 explicit +#endif + +#if _LIBCPP_STD_VER >= 14 +# define _LIBCPP_CONSTEXPR_SINCE_CXX14 constexpr +#else +# define _LIBCPP_CONSTEXPR_SINCE_CXX14 +#endif + +#if _LIBCPP_STD_VER >= 17 +# define _LIBCPP_CONSTEXPR_SINCE_CXX17 constexpr +#else +# define _LIBCPP_CONSTEXPR_SINCE_CXX17 +#endif + +#if _LIBCPP_STD_VER >= 20 +# define _LIBCPP_CONSTEXPR_SINCE_CXX20 constexpr +#else +# define _LIBCPP_CONSTEXPR_SINCE_CXX20 +#endif + +#if _LIBCPP_STD_VER >= 23 +# define _LIBCPP_CONSTEXPR_SINCE_CXX23 constexpr +#else +# define _LIBCPP_CONSTEXPR_SINCE_CXX23 +#endif + +#if _LIBCPP_STD_VER >= 26 +# define _LIBCPP_CONSTEXPR_SINCE_CXX26 constexpr +#else +# define _LIBCPP_CONSTEXPR_SINCE_CXX26 +#endif + #endif // _LIBCPP___CONFIGURATION_LANGUAGE_H From a87ec86436370632d63fced08dcbd2d70af24034 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 13:16:03 +0100 Subject: [PATCH 326/511] [X86] combineAddOfPMADDWD - use MaskedVectorIsZero directly instead of MaskedValueIsZero. NFC. (#205534) We're setting all demanded bits and just want to know that the high elements in each pair are zero. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 291124bb485cb..d3729b4102c55 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -59814,15 +59814,12 @@ static SDValue combineAddOfPMADDWD(SelectionDAG &DAG, SDValue N0, SDValue N1, unsigned NumElts = VT.getVectorNumElements(); MVT OpVT = N0.getOperand(0).getSimpleValueType(); - APInt DemandedBits = APInt::getAllOnes(OpVT.getScalarSizeInBits()); APInt DemandedHiElts = APInt::getSplat(2 * NumElts, APInt(2, 2)); - bool Op0HiZero = - DAG.MaskedValueIsZero(N0.getOperand(0), DemandedBits, DemandedHiElts) || - DAG.MaskedValueIsZero(N0.getOperand(1), DemandedBits, DemandedHiElts); - bool Op1HiZero = - DAG.MaskedValueIsZero(N1.getOperand(0), DemandedBits, DemandedHiElts) || - DAG.MaskedValueIsZero(N1.getOperand(1), DemandedBits, DemandedHiElts); + bool Op0HiZero = DAG.MaskedVectorIsZero(N0.getOperand(0), DemandedHiElts) || + DAG.MaskedVectorIsZero(N0.getOperand(1), DemandedHiElts); + bool Op1HiZero = DAG.MaskedVectorIsZero(N1.getOperand(0), DemandedHiElts) || + DAG.MaskedVectorIsZero(N1.getOperand(1), DemandedHiElts); // TODO: Check for zero lower elements once we have actual codegen that // creates them. From c12105f24a80ca2a16af6ef9795b3332321406a0 Mon Sep 17 00:00:00 2001 From: David CARLIER Date: Wed, 24 Jun 2026 13:25:09 +0100 Subject: [PATCH 327/511] =?UTF-8?q?[NFC]=20use=20DenseMap/SmallPtrSet=20in?= =?UTF-8?q?=20CacheMetrics=20and=20TailDupli=E2=80=A6=20(#205480)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …cation Swap pointer-keyed std::unordered_map/std::set for their ADT equivalents on hot paths. --- bolt/lib/Passes/CacheMetrics.cpp | 34 ++++++++++++++--------------- bolt/lib/Passes/TailDuplication.cpp | 2 +- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/bolt/lib/Passes/CacheMetrics.cpp b/bolt/lib/Passes/CacheMetrics.cpp index ccc25fc0c9f4f..8c6f3ff7c4308 100644 --- a/bolt/lib/Passes/CacheMetrics.cpp +++ b/bolt/lib/Passes/CacheMetrics.cpp @@ -14,7 +14,6 @@ #include "bolt/Passes/CacheMetrics.h" #include "bolt/Core/BinaryBasicBlock.h" #include "bolt/Core/BinaryFunction.h" -#include using namespace llvm; using namespace bolt; @@ -29,10 +28,9 @@ constexpr unsigned ITLBPageSize = 4096; constexpr unsigned ITLBEntries = 16; /// Initialize and return a position map for binary basic blocks -void extractBasicBlockInfo( - const BinaryFunctionListType &BinaryFunctions, - std::unordered_map &BBAddr, - std::unordered_map &BBSize) { +void extractBasicBlockInfo(const BinaryFunctionListType &BinaryFunctions, + DenseMap &BBAddr, + DenseMap &BBSize) { for (BinaryFunction *BF : BinaryFunctions) { const BinaryContext &BC = BF->getBinaryContext(); @@ -55,8 +53,8 @@ void extractBasicBlockInfo( /// (the number of fallthrough branches, the total number of branches) std::pair calcTSPScore(const BinaryFunctionListType &BinaryFunctions, - const std::unordered_map &BBAddr, - const std::unordered_map &BBSize) { + const DenseMap &BBAddr, + const DenseMap &BBSize) { uint64_t Score = 0; uint64_t JumpCount = 0; for (BinaryFunction *BF : BinaryFunctions) { @@ -94,9 +92,9 @@ using Predecessors = std::vector>; /// Build a simplified version of the call graph: For every function, keep /// its callers and the frequencies of the calls -std::unordered_map +DenseMap extractFunctionCalls(const BinaryFunctionListType &BinaryFunctions) { - std::unordered_map Calls; + DenseMap Calls; for (BinaryFunction *SrcFunction : BinaryFunctions) { const BinaryContext &BC = SrcFunction->getBinaryContext(); @@ -139,15 +137,15 @@ extractFunctionCalls(const BinaryFunctionListType &BinaryFunctions) { /// is proportional to the number of samples corresponding to the functions on /// the page. The following procedure detects short and long calls, and /// estimates the expected number of cache misses for the long ones. -double expectedCacheHitRatio( - const BinaryFunctionListType &BinaryFunctions, - const std::unordered_map &BBAddr, - const std::unordered_map &BBSize) { - std::unordered_map Calls = +double +expectedCacheHitRatio(const BinaryFunctionListType &BinaryFunctions, + const DenseMap &BBAddr, + const DenseMap &BBSize) { + DenseMap Calls = extractFunctionCalls(BinaryFunctions); // Compute 'hotness' of the functions double TotalSamples = 0; - std::unordered_map FunctionSamples; + DenseMap FunctionSamples; for (BinaryFunction *BF : BinaryFunctions) { double Samples = 0; for (std::pair Pair : Calls[BF]) @@ -158,7 +156,7 @@ double expectedCacheHitRatio( } // Compute 'hotness' of the pages - std::unordered_map PageSamples; + DenseMap PageSamples; for (BinaryFunction *BF : BinaryFunctions) { if (BF->getLayout().block_empty()) continue; @@ -266,8 +264,8 @@ void CacheMetrics::printAll(raw_ostream &OS, double(HotCodeSize) / HugePage2MB); // Stats related to expected cache performance - std::unordered_map BBAddr; - std::unordered_map BBSize; + DenseMap BBAddr; + DenseMap BBSize; extractBasicBlockInfo(BFs, BBAddr, BBSize); OS << " Expected i-TLB cache hit ratio: " diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp index c5565fdf4a7a7..a4a5876943ec4 100644 --- a/bolt/lib/Passes/TailDuplication.cpp +++ b/bolt/lib/Passes/TailDuplication.cpp @@ -130,7 +130,7 @@ bool TailDuplication::isOverwrittenBeforeUsed(BinaryBasicBlock &StartBB, BinaryBasicBlock *NextBB = *Itr; Q.push(NextBB); } - std::set Visited; + SmallPtrSet Visited; // Breadth first search through successive blocks and see if Reg is ever used // before its overwritten while (Q.size() > 0) { From 5794d5256a79ef61f47272115aaede0963685372 Mon Sep 17 00:00:00 2001 From: Sairudra More Date: Wed, 24 Jun 2026 17:59:23 +0530 Subject: [PATCH 328/511] [flang][OpenMP] Lower task reduction modifier (#205124) Adds Flang lowering and MLIR-to-LLVM IR translation support for the OpenMP `task` modifier on reduction clauses for `parallel`, `do`/`wsloop`, and `sections`. Unsupported forms remain diagnosed/TODO-gated rather than silently lowered. Fixes #205123. --- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 9 +- .../test/Lower/OpenMP/Todo/reduction-task.f90 | 12 - .../Lower/OpenMP/parallel-reduction-task.f90 | 37 +++ .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 158 +++++++++++-- .../openmp-reduction-task-modifier.mlir | 216 ++++++++++++++++++ mlir/test/Target/LLVMIR/openmp-todo.mlir | 28 +++ 6 files changed, 427 insertions(+), 33 deletions(-) delete mode 100644 flang/test/Lower/OpenMP/Todo/reduction-task.f90 create mode 100644 flang/test/Lower/OpenMP/parallel-reduction-task.f90 create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index eb416d103fbe0..4f19dfb98024d 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -2052,12 +2052,9 @@ bool ClauseProcessor::processReduction( auto mod = std::get>(clause.t); if (mod.has_value()) { - if (mod.value() == ReductionModifier::Task) - TODO(currentLocation, "Reduction modifier `task` is not supported"); - else - result.reductionMod = mlir::omp::ReductionModifierAttr::get( - converter.getFirOpBuilder().getContext(), - translateReductionModifier(mod.value())); + result.reductionMod = mlir::omp::ReductionModifierAttr::get( + converter.getFirOpBuilder().getContext(), + translateReductionModifier(mod.value())); } ReductionProcessor rp; diff --git a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 b/flang/test/Lower/OpenMP/Todo/reduction-task.f90 deleted file mode 100644 index adc8de00a9b7a..0000000000000 --- a/flang/test/Lower/OpenMP/Todo/reduction-task.f90 +++ /dev/null @@ -1,12 +0,0 @@ -! RUN: %not_todo_cmd bbc -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s -! RUN: %not_todo_cmd %flang_fc1 -emit-fir -fopenmp -o - %s 2>&1 | FileCheck %s - -! CHECK: not yet implemented: Reduction modifier `task` is not supported -subroutine reduction_task() - integer :: i - i = 0 - - !$omp parallel reduction(task, +:i) - i = i + 1 - !$omp end parallel -end subroutine reduction_task diff --git a/flang/test/Lower/OpenMP/parallel-reduction-task.f90 b/flang/test/Lower/OpenMP/parallel-reduction-task.f90 new file mode 100644 index 0000000000000..ee46b0044249f --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-task.f90 @@ -0,0 +1,37 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + +! Check that the `task` reduction modifier is lowered to the `task` +! reduction modifier attribute on the parallel and worksharing constructs. + +! CHECK-LABEL: func.func @_QPreduction_task_parallel +subroutine reduction_task_parallel() + integer :: i + i = 0 + ! CHECK: omp.parallel reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref) { + !$omp parallel reduction(task, +:i) + i = i + 1 + !$omp end parallel +end subroutine reduction_task_parallel + +! CHECK-LABEL: func.func @_QPreduction_task_do +subroutine reduction_task_do() + integer :: i, j + i = 0 + ! CHECK: omp.wsloop {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref) { + !$omp do reduction(task, +:i) + do j = 1, 10 + i = i + 1 + end do + !$omp end do +end subroutine reduction_task_do + +! CHECK-LABEL: func.func @_QPreduction_task_sections +subroutine reduction_task_sections() + integer :: i + i = 0 + ! CHECK: omp.sections {{.*}}reduction(mod: task, @{{.*}} %{{.*}} -> %{{.*}} : !fir.ref) { + !$omp sections reduction(task, +:i) + i = i + 1 + !$omp end sections +end subroutine reduction_task_sections diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 0f954e384929a..edfa407234fa0 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -392,8 +392,26 @@ static LogicalResult checkImplementationStatus(Operation &op) { op.getReductionSyms()) result = todo("reduction"); if (op.getReductionMod() && - op.getReductionMod().value() != omp::ReductionModifier::defaultmod) - result = todo("reduction with modifier"); + op.getReductionMod().value() != omp::ReductionModifier::defaultmod) { + omp::ReductionModifier mod = op.getReductionMod().value(); + // The `task` reduction modifier is supported on the parallel and + // worksharing (do/for and sections) constructs. Other modifiers, and the + // `task` modifier on other constructs, are not yet implemented. + bool taskModifierSupported = + mod == omp::ReductionModifier::task && + isa(op); + if (!taskModifierSupported) { + result = todo("reduction with modifier"); + } else if (auto byref = op.getReductionByref()) { + // The task reduction modifier lowering only handles non-byref + // reductions for now. + for (bool isByRef : *byref) + if (isByRef) { + result = todo("task reduction modifier with by-ref reduction"); + break; + } + } + } }; auto checkTaskReductionByref = [&todo](auto op, LogicalResult &result) { if (auto byrefAttr = op.getTaskReductionByref()) @@ -2024,6 +2042,23 @@ static bool constructIsCancellable(Operation *op) { .wasInterrupted(); } +// Forward declarations for the task-reduction helpers defined alongside the +// omp.taskgroup lowering further down in this file. These are shared by the +// `reduction(task, ...)` modifier lowering on the parallel/worksharing +// constructs and by the omp.taskgroup / omp.taskloop.context task_reduction +// lowering. When \p isModifier is set, `__kmpc_taskred_modifier_init` is +// emitted (opening a task-reduction scope) instead of `__kmpc_taskred_init`, +// with \p isWorksharing selecting the runtime `is_ws` argument. +static llvm::Value *emitTaskReductionInitCall( + ArrayRef redDecls, + ArrayRef origPtrs, StringRef helperNamePrefix, + llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP, + LLVM::ModuleTranslation &moduleTranslation, bool isModifier = false, + bool isWorksharing = false); +static void +emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation); + static LogicalResult convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation) { @@ -2057,6 +2092,10 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, isByRef))) return failure(); + bool isTaskReductionMod = + sectionsOp.getReductionMod() == omp::ReductionModifier::task && + sectionsOp.getNumReductionVars() > 0; + SmallVector sectionCBs; for (Operation &op : *sectionsOp.getRegion().begin()) { @@ -2096,6 +2135,19 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, if (sectionCBs.empty()) return success(); + // For `reduction(task, ...)` open a task-reduction scope for the worksharing + // region. Participating explicit tasks accumulate into the per-thread private + // copies, which the worksharing reduction then combines across threads. This + // is emitted only after the empty-sections early return above, so it stays + // balanced with the matching fini emitted after the sections region. + if (isTaskReductionMod && + !emitTaskReductionInitCall(reductionDecls, privateReductionVariables, + "__omp_taskred_mod_", builder, allocaIP, + moduleTranslation, /*isModifier=*/true, + /*isWorksharing=*/true)) + return sectionsOp.emitError( + "failed to emit task reduction modifier initialization"); + assert(isa(*sectionsOp.getRegion().op_begin())); // TODO: Perform appropriate actions according to the data-sharing @@ -2125,6 +2177,11 @@ convertOmpSections(Operation &opInst, llvm::IRBuilderBase &builder, builder.restoreIP(*afterIP); + // Close the task-reduction scope before combining the worksharing copies. + if (isTaskReductionMod) + emitTaskReductionModifierFini(/*isWorksharing=*/true, builder, + moduleTranslation); + // Process the reductions if required. return createReductionsAndCleanup( sectionsOp, builder, moduleTranslation, allocaIP, reductionDecls, @@ -3484,15 +3541,6 @@ computeTaskloopBounds(omp::LoopNestOp loopOp, llvm::IRBuilderBase &builder, return llvm::Error::success(); } -// Forward declaration: defined alongside the taskgroup task_reduction -// lowering further down in this file. Shared between omp.taskgroup and -// omp.taskloop.context translation. -static llvm::Value *emitTaskReductionInitCall( - ArrayRef redDecls, - ArrayRef origPtrs, StringRef helperNamePrefix, - llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP, - LLVM::ModuleTranslation &moduleTranslation); - // Converts an OpenMP taskloop construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskloopContextOp(omp::TaskloopContextOp contextOp, @@ -4060,8 +4108,11 @@ emitTaskReductionCombFn(omp::DeclareReductionOp decl, StringRef baseName, /// \p allocaIP. \p helperNamePrefix is used to disambiguate the generated /// init/combiner helper symbol names between taskgroup and taskloop callers. /// -/// Returns the `ptr` value produced by `__kmpc_taskred_init` (the taskgroup -/// reduction handle), or null on failure. +/// When \p isModifier is false, emits `__kmpc_taskred_init` and returns the +/// `ptr` value it produces (the taskgroup reduction handle). When \p isModifier +/// is true, emits `__kmpc_taskred_modifier_init` instead to open a +/// task-reduction scope for a parallel or worksharing construct, passing +/// \p isWorksharing as the runtime `is_ws` argument. Returns null on failure. /// /// Only the non-byref form is handled here. Byref reductions have already /// been rejected by `checkImplementationStatus`. @@ -4069,7 +4120,8 @@ static llvm::Value *emitTaskReductionInitCall( ArrayRef redDecls, ArrayRef origPtrs, StringRef helperNamePrefix, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP, - LLVM::ModuleTranslation &moduleTranslation) { + LLVM::ModuleTranslation &moduleTranslation, bool isModifier, + bool isWorksharing) { assert(redDecls.size() == origPtrs.size() && "expected one orig pointer per reduction decl"); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); @@ -4138,7 +4190,7 @@ static llvm::Value *emitTaskReductionInitCall( storeField(6, llvm::ConstantInt::get(i32Ty, 0)); // flags } - // Emit call: __kmpc_taskred_init(gtid, num, &arr). + // Emit the runtime call that registers the task reduction data. llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); uint32_t srcLocSize; llvm::Constant *srcLocStr = @@ -4146,12 +4198,45 @@ static llvm::Value *emitTaskReductionInitCall( llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize); ompBuilder->updateToLocation(ompLoc); llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident); + if (isModifier) { + // __kmpc_taskred_modifier_init(loc, gtid, is_ws, num, &arr) opens a + // task-reduction scope for the enclosing parallel/worksharing region. + llvm::FunctionCallee modInit = ompBuilder->getOrCreateRuntimeFunction( + *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_modifier_init); + return builder.CreateCall(modInit, + {ident, gtid, + builder.getInt32(isWorksharing ? 1 : 0), + builder.getInt32(n), arrAlloca}, + ".taskred.desc"); + } + // __kmpc_taskred_init(gtid, num, &arr). llvm::FunctionCallee taskredInit = ompBuilder->getOrCreateRuntimeFunction( *llvmModule, llvm::omp::OMPRTL___kmpc_taskred_init); return builder.CreateCall(taskredInit, {gtid, builder.getInt32(n), arrAlloca}, ".taskred.desc"); } +/// Emits `__kmpc_task_reduction_modifier_fini(loc, gtid, is_ws)` at the current +/// builder insertion point, closing the task-reduction scope opened by the +/// `task` reduction modifier on a parallel or worksharing construct. +static void +emitTaskReductionModifierFini(bool isWorksharing, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation) { + llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); + llvm::Module *llvmModule = moduleTranslation.getLLVMModule(); + llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder); + uint32_t srcLocSize; + llvm::Constant *srcLocStr = + ompBuilder->getOrCreateSrcLocStr(ompLoc, srcLocSize); + llvm::Value *ident = ompBuilder->getOrCreateIdent(srcLocStr, srcLocSize); + ompBuilder->updateToLocation(ompLoc); + llvm::Value *gtid = ompBuilder->getOrCreateThreadID(ident); + llvm::FunctionCallee fini = ompBuilder->getOrCreateRuntimeFunction( + *llvmModule, llvm::omp::OMPRTL___kmpc_task_reduction_modifier_fini); + builder.CreateCall(fini, + {ident, gtid, builder.getInt32(isWorksharing ? 1 : 0)}); +} + /// Converts an OpenMP taskgroup construct into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, @@ -4334,6 +4419,20 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, reductionVariableMap, isByRef, deferredStores))) return failure(); + // For `reduction(task, ...)` open a task-reduction scope for the worksharing + // loop. Participating explicit tasks accumulate into the per-thread private + // copies, which the worksharing reduction then combines across threads. + bool isTaskReductionMod = + wsloopOp.getReductionMod() == omp::ReductionModifier::task && + wsloopOp.getNumReductionVars() > 0; + if (isTaskReductionMod && + !emitTaskReductionInitCall(reductionDecls, privateReductionVariables, + "__omp_taskred_mod_", builder, allocaIP, + moduleTranslation, /*isModifier=*/true, + /*isWorksharing=*/true)) + return wsloopOp.emitError( + "failed to emit task reduction modifier initialization"); + // TODO: Handle doacross loops when the ordered clause has a parameter. bool isOrdered = wsloopOp.getOrdered().has_value(); std::optional scheduleMod = wsloopOp.getScheduleMod(); @@ -4443,6 +4542,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // Set the correct branch target for task cancellation popCancelFinalizationCB(cancelTerminators, *ompBuilder, wsloopIP.get()); + // Close the task-reduction scope before the worksharing reduction combine. + if (isTaskReductionMod) + emitTaskReductionModifierFini(/*isWorksharing=*/true, builder, + moduleTranslation); + // Process the reductions if required. if (failed(createReductionsAndCleanup( wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, @@ -4475,6 +4579,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, SmallVector privateReductionVariables( opInst.getNumReductionVars()); SmallVector deferredStores; + // Only open a task-reduction scope when the `task` modifier is present and + // there are reduction variables to combine; otherwise the matching fini in + // the reduction-combine path (guarded by getNumReductionVars() > 0) would be + // skipped, leaving the modifier init unbalanced. + bool isTaskReductionMod = + opInst.getReductionMod() == omp::ReductionModifier::task && + opInst.getNumReductionVars() > 0; auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP, @@ -4522,6 +4633,17 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, reductionVariableMap, isByRef, deferredStores))) return llvm::make_error(); + // For `reduction(task, ...)` open a task-reduction scope so participating + // explicit tasks accumulate into the per-thread private copies; the + // parallel reduction then combines those copies across the team. + if (isTaskReductionMod && + !emitTaskReductionInitCall(reductionDecls, privateReductionVariables, + "__omp_taskred_mod_", builder, allocaIP, + moduleTranslation, /*isModifier=*/true, + /*isWorksharing=*/false)) + return llvm::createStringError( + "failed to emit task reduction modifier initialization"); + // Save the alloca insertion point on ModuleTranslation stack for use in // nested regions. LLVM::ModuleTranslation::SaveStack frame( @@ -4549,6 +4671,12 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Move to region cont block builder.SetInsertPoint((*regionBlock)->getTerminator()); + // Close the task-reduction scope before the per-thread reduction + // contributions are combined across the team. + if (isTaskReductionMod) + emitTaskReductionModifierFini(/*isWorksharing=*/false, builder, + moduleTranslation); + // Generate reductions from info llvm::UnreachableInst *tempTerminator = builder.CreateUnreachable(); builder.SetInsertPoint(tempTerminator); diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir new file mode 100644 index 0000000000000..a15b1ee701a4e --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-reduction-task-modifier.mlir @@ -0,0 +1,216 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// The `task` reduction modifier opens a task-reduction scope around the +// parallel / worksharing region. Verify that +// __kmpc_taskred_modifier_init is emitted (with the correct `is_ws` argument) +// after the reduction privates are set up, and that +// __kmpc_task_reduction_modifier_fini is emitted before the reduction combine. + +omp.declare_reduction @add_i32 : i32 +init { +^bb0(%arg0: i32): + %c0 = llvm.mlir.constant(0 : i32) : i32 + omp.yield(%c0 : i32) +} +combiner { +^bb0(%arg0: i32, %arg1: i32): + %s = llvm.add %arg0, %arg1 : i32 + omp.yield(%s : i32) +} + +llvm.func @parallel_task_reduction(%x: !llvm.ptr) { + omp.parallel reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) { + omp.terminator + } + llvm.return +} + +// CHECK: %kmp_taskred_input_t = type { ptr, ptr, i64, ptr, ptr, ptr, i32 } + +// On a parallel construct the modifier init uses is_ws = 0. +// CHECK-LABEL: define internal void @parallel_task_reduction..omp_par +// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t] +// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 1, ptr %[[ARR]]) +// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 0) + +// ----- + +omp.declare_reduction @add_i32 : i32 +init { +^bb0(%arg0: i32): + %c0 = llvm.mlir.constant(0 : i32) : i32 + omp.yield(%c0 : i32) +} +combiner { +^bb0(%arg0: i32, %arg1: i32): + %s = llvm.add %arg0, %arg1 : i32 + omp.yield(%s : i32) +} + +llvm.func @wsloop_task_reduction(%x: !llvm.ptr) { + %lb = llvm.mlir.constant(1 : i32) : i32 + %ub = llvm.mlir.constant(10 : i32) : i32 + %step = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) inclusive step (%step) { + omp.yield + } + } + llvm.return +} + +// On a worksharing construct the modifier init uses is_ws = 1. +// CHECK-LABEL: define void @wsloop_task_reduction( +// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t] +// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]]) +// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1) + +// ----- + +omp.declare_reduction @add_i32 : i32 +init { +^bb0(%arg0: i32): + %c0 = llvm.mlir.constant(0 : i32) : i32 + omp.yield(%c0 : i32) +} +combiner { +^bb0(%arg0: i32, %arg1: i32): + %s = llvm.add %arg0, %arg1 : i32 + omp.yield(%s : i32) +} + +llvm.func @sections_task_reduction(%x: !llvm.ptr) { + omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) { + omp.section { + ^bb0(%arg: !llvm.ptr): + omp.terminator + } + omp.terminator + } + llvm.return +} + +// On a worksharing (sections) construct the modifier init uses is_ws = 1. +// CHECK-LABEL: define void @sections_task_reduction( +// CHECK: %[[ARR:.+]] = alloca [1 x %kmp_taskred_input_t] +// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 1, i32 1, ptr %[[ARR]]) +// CHECK: call void @__kmpc_task_reduction_modifier_fini(ptr @{{.+}}, i32 %{{.+}}, i32 1) + +// ----- + +omp.declare_reduction @add_i32 : i32 +init { +^bb0(%arg0: i32): + %c0 = llvm.mlir.constant(0 : i32) : i32 + omp.yield(%c0 : i32) +} +combiner { +^bb0(%arg0: i32, %arg1: i32): + %s = llvm.add %arg0, %arg1 : i32 + omp.yield(%s : i32) +} + +llvm.func @parallel_two_task_reductions(%x: !llvm.ptr, %y: !llvm.ptr) { + omp.parallel reduction(mod: task, @add_i32 %x -> %p0, @add_i32 %y -> %p1 : !llvm.ptr, !llvm.ptr) { + omp.terminator + } + llvm.return +} + +// With two task-modifier reductions the descriptor array holds two entries and +// the modifier init receives num = 2 (is_ws = 0 on the parallel construct). +// CHECK-LABEL: define internal void @parallel_two_task_reductions..omp_par +// CHECK: %[[ARR:.+]] = alloca [2 x %kmp_taskred_input_t] +// CHECK: call ptr @__kmpc_taskred_modifier_init(ptr @{{.+}}, i32 %{{.+}}, i32 0, i32 2, ptr %[[ARR]]) + +// ----- + +// An empty omp.sections (only a terminator, no omp.section) hits the +// empty-sections early return, so no task-reduction scope is opened: neither +// the modifier init nor the matching fini may be emitted. + +omp.declare_reduction @add_i32 : i32 +init { +^bb0(%arg0: i32): + %c0 = llvm.mlir.constant(0 : i32) : i32 + omp.yield(%c0 : i32) +} +combiner { +^bb0(%arg0: i32, %arg1: i32): + %s = llvm.add %arg0, %arg1 : i32 + omp.yield(%s : i32) +} + +llvm.func @empty_sections_task_reduction(%x: !llvm.ptr) { + omp.sections reduction(mod: task, @add_i32 %x -> %prv : !llvm.ptr) { + omp.terminator + } + llvm.return +} + +// CHECK-LABEL: define void @empty_sections_task_reduction( +// CHECK-NOT: @__kmpc_taskred_modifier_init +// CHECK-NOT: @__kmpc_task_reduction_modifier_fini +// CHECK: ret void + +// ----- + +// A verifier-valid omp.parallel that carries reduction_mod = task but has no +// reduction variables must not open a task-reduction scope. + +llvm.func @parallel_task_mod_no_reductions() { + "omp.parallel"() <{operandSegmentSizes = array, reduction_mod = #omp}> ({ + omp.terminator + }) : () -> () + llvm.return +} + +// CHECK-LABEL: define internal void @parallel_task_mod_no_reductions..omp_par +// CHECK-NOT: @__kmpc_taskred_modifier_init +// CHECK-NOT: @__kmpc_task_reduction_modifier_fini +// CHECK: ret void + +// ----- + +// A verifier-valid omp.wsloop that carries reduction_mod = task but has no +// reduction variables must not open a task-reduction scope. + +llvm.func @wsloop_task_mod_no_reductions() { + %lb = llvm.mlir.constant(1 : i32) : i32 + %ub = llvm.mlir.constant(10 : i32) : i32 + %step = llvm.mlir.constant(1 : i32) : i32 + "omp.wsloop"() <{operandSegmentSizes = array, reduction_mod = #omp}> ({ + "omp.loop_nest"(%lb, %ub, %step) <{loop_inclusive}> ({ + ^bb0(%iv: i32): + "omp.yield"() : () -> () + }) : (i32, i32, i32) -> () + }) : () -> () + llvm.return +} + +// CHECK-LABEL: define void @wsloop_task_mod_no_reductions( +// CHECK-NOT: @__kmpc_taskred_modifier_init +// CHECK-NOT: @__kmpc_task_reduction_modifier_fini +// CHECK: ret void + +// ----- + +// A verifier-valid omp.sections that carries reduction_mod = task but has no +// reduction variables must not open a task-reduction scope. A section body is +// present, so this exercises the reduction-count guard rather than the +// empty-sections early return tested above. + +llvm.func @sections_task_mod_no_reductions() { + "omp.sections"() <{operandSegmentSizes = array, reduction_mod = #omp}> ({ + "omp.section"() ({ + "omp.terminator"() : () -> () + }) : () -> () + "omp.terminator"() : () -> () + }) : () -> () + llvm.return +} + +// CHECK-LABEL: define void @sections_task_mod_no_reductions( +// CHECK-NOT: @__kmpc_taskred_modifier_init +// CHECK-NOT: @__kmpc_task_reduction_modifier_fini +// CHECK: ret void diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 377a5bb799be4..4d23fcafc80bd 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -134,6 +134,34 @@ llvm.func @scan_reduction(%lb : i32, %ub : i32, %step : i32, %x : !llvm.ptr) { // ----- +omp.declare_reduction @add_f32 : f32 +init { +^bb0(%arg: f32): + %0 = llvm.mlir.constant(0.0 : f32) : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = llvm.fadd %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} +atomic { +^bb2(%arg2: !llvm.ptr, %arg3: !llvm.ptr): + %2 = llvm.load %arg3 : !llvm.ptr -> f32 + llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 + omp.yield +} +llvm.func @parallel_task_reduction_modifier_byref(%x : !llvm.ptr) { + // expected-error@below {{not yet implemented: Unhandled clause task reduction modifier with by-ref reduction in omp.parallel operation}} + // expected-error@below {{LLVM Translation failed for operation: omp.parallel}} + omp.parallel reduction(mod: task, byref @add_f32 %x -> %prv : !llvm.ptr) { + omp.terminator + } + llvm.return +} + +// ----- + llvm.func @single_allocate(%x : !llvm.ptr) { // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.single operation}} // expected-error@below {{LLVM Translation failed for operation: omp.single}} From 29fae5ec2445137e157b9ffabefb96447e9c37c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Wed, 24 Jun 2026 14:31:14 +0200 Subject: [PATCH 329/511] [NFC][analyzer] Remove the NodeBuilder from VisitArrayInitLoopExpr (#204354) Part of my commit series to eliminate the class `NodeBuilder`. --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 77faa675b90b8..4357f0fae4144 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3190,20 +3190,20 @@ void ExprEngine::VisitCommonDeclRefExpr(const Expr *Ex, const NamedDecl *D, void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex, ExplodedNode *Pred, ExplodedNodeSet &Dst) { + const Expr *Arr = Ex->getCommonExpr()->getSourceExpr(); + ExplodedNodeSet CheckerPreStmt; getCheckerManager().runCheckersForPreStmt(CheckerPreStmt, Pred, Ex, *this); ExplodedNodeSet EvalSet; - NodeBuilder Bldr(CheckerPreStmt, EvalSet, *currBldrCtx); - - const Expr *Arr = Ex->getCommonExpr()->getSourceExpr(); + if (isa(Ex->getSubExpr())) { + // The constructor visitor has already handled everything, so let's skip + // forward to PostStmt handling by clearing the range of the 'for' loop. + EvalSet.insert(CheckerPreStmt); + CheckerPreStmt.clear(); + } for (auto *Node : CheckerPreStmt) { - - // The constructor visitior has already taken care of everything. - if (isa(Ex->getSubExpr())) - break; - const StackFrame *SF = Node->getStackFrame(); ProgramStateRef state = Node->getState(); @@ -3278,7 +3278,7 @@ void ExprEngine::VisitArrayInitLoopExpr(const ArrayInitLoopExpr *Ex, else Base = UnknownVal(); - Bldr.generateNode(Ex, Node, state->BindExpr(Ex, SF, Base)); + EvalSet.insert(Engine.makeNodeWithBinding(Node, Ex, Base)); } getCheckerManager().runCheckersForPostStmt(Dst, EvalSet, Ex, *this); From f653d3432b079d396c9c1dd700ec52e9e7750e7c Mon Sep 17 00:00:00 2001 From: Arda Serdar Pektezol Date: Wed, 24 Jun 2026 15:36:32 +0300 Subject: [PATCH 330/511] [UnifyFunctionExitNodes] Remove the pass (#205519) The mergereturn pass is not used by anything, so we can go ahead and delete it. Related discussion: https://github.com/llvm/llvm-project/pull/204651#issuecomment-4787636904 --- llvm/docs/Passes.md | 5 - .../Transforms/Utils/UnifyFunctionExitNodes.h | 30 ------ llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/Transforms/Utils/CMakeLists.txt | 1 - .../Utils/UnifyFunctionExitNodes.cpp | 94 ------------------- .../unreachable-blocks-status.ll | 67 ------------- 7 files changed, 199 deletions(-) delete mode 100644 llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h delete mode 100644 llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp delete mode 100644 llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll diff --git a/llvm/docs/Passes.md b/llvm/docs/Passes.md index 28ea11eb994d9..36159ff0c5206 100644 --- a/llvm/docs/Passes.md +++ b/llvm/docs/Passes.md @@ -745,11 +745,6 @@ Read {doc}`this ` article for more details. -### `mergereturn`: Unify function exit nodes - -Ensure that functions have at most one `ret` instruction in them. -Additionally, it keeps track of which node is the new exit node of the CFG. - ### `partial-inliner`: Partial Inliner This pass performs partial inlining, typically by inlining an `if` statement diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h deleted file mode 100644 index 92df7b480ff5d..0000000000000 --- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- UnifyFunctionExitNodes.h - Ensure fn's have one return --*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass is used to ensure that functions have at most one return and one -// unreachable instruction in them. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H -#define LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H - -#include "llvm/IR/PassManager.h" -#include "llvm/Pass.h" - -namespace llvm { - -class UnifyFunctionExitNodesPass - : public OptionalPassInfoMixin { -public: - LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} // end namespace llvm - -#endif // LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index de32ed34a3df4..68ea19332ec33 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -391,7 +391,6 @@ #include "llvm/Transforms/Utils/StripNonLineTableDebugInfo.h" #include "llvm/Transforms/Utils/SymbolRewriter.h" #include "llvm/Transforms/Utils/TriggerCrashPass.h" -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" #include "llvm/Transforms/Utils/UnifyLoopExits.h" #include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" #include "llvm/Transforms/Vectorize/LoopIdiomVectorize.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 370106e225a9b..84804c3e9d2a8 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -493,7 +493,6 @@ FUNCTION_PASS("mem2reg", PromotePass()) FUNCTION_PASS("memcpyopt", MemCpyOptPass()) FUNCTION_PASS("memprof", MemProfilerPass()) FUNCTION_PASS("mergeicmps", MergeICmpsPass()) -FUNCTION_PASS("mergereturn", UnifyFunctionExitNodesPass()) FUNCTION_PASS("move-auto-init", MoveAutoInitPass()) FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) FUNCTION_PASS("newgvn", NewGVNPass()) diff --git a/llvm/lib/Transforms/Utils/CMakeLists.txt b/llvm/lib/Transforms/Utils/CMakeLists.txt index 0088d439e6895..ebff5af0dd7f3 100644 --- a/llvm/lib/Transforms/Utils/CMakeLists.txt +++ b/llvm/lib/Transforms/Utils/CMakeLists.txt @@ -92,7 +92,6 @@ add_llvm_component_library(LLVMTransformUtils StripNonLineTableDebugInfo.cpp SymbolRewriter.cpp TriggerCrashPass.cpp - UnifyFunctionExitNodes.cpp UnifyLoopExits.cpp Utils.cpp ValueMapper.cpp diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp deleted file mode 100644 index 17fa30e436c2f..0000000000000 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ /dev/null @@ -1,94 +0,0 @@ -//===- UnifyFunctionExitNodes.cpp - Make all functions have a single exit -===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass is used to ensure that functions have at most one return and one -// unreachable instruction in them. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/UnifyFunctionExitNodes.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Type.h" -using namespace llvm; - -namespace { - -bool unifyUnreachableBlocks(Function &F) { - std::vector UnreachableBlocks; - - for (BasicBlock &I : F) - if (isa(I.getTerminator())) - UnreachableBlocks.push_back(&I); - - if (UnreachableBlocks.size() <= 1) - return false; - - BasicBlock *UnreachableBlock = - BasicBlock::Create(F.getContext(), "UnifiedUnreachableBlock", &F); - new UnreachableInst(F.getContext(), UnreachableBlock); - - for (BasicBlock *BB : UnreachableBlocks) { - BB->back().eraseFromParent(); // Remove the unreachable inst. - UncondBrInst::Create(UnreachableBlock, BB); - } - - return true; -} - -bool unifyReturnBlocks(Function &F) { - std::vector ReturningBlocks; - - for (BasicBlock &I : F) - if (isa(I.getTerminator())) - ReturningBlocks.push_back(&I); - - if (ReturningBlocks.size() <= 1) - return false; - - // Insert a new basic block into the function, add PHI nodes (if the function - // returns values), and convert all of the return instructions into - // unconditional branches. - BasicBlock *NewRetBlock = BasicBlock::Create(F.getContext(), - "UnifiedReturnBlock", &F); - - PHINode *PN = nullptr; - if (F.getReturnType()->isVoidTy()) { - ReturnInst::Create(F.getContext(), nullptr, NewRetBlock); - } else { - // If the function doesn't return void... add a PHI node to the block... - PN = PHINode::Create(F.getReturnType(), ReturningBlocks.size(), - "UnifiedRetVal"); - PN->insertInto(NewRetBlock, NewRetBlock->end()); - ReturnInst::Create(F.getContext(), PN, NewRetBlock); - } - - // Loop over all of the blocks, replacing the return instruction with an - // unconditional branch. - for (BasicBlock *BB : ReturningBlocks) { - // Add an incoming element to the PHI node for every return instruction that - // is merging into this new block... - if (PN) - PN->addIncoming(BB->getTerminator()->getOperand(0), BB); - - BB->back().eraseFromParent(); // Remove the return insn - UncondBrInst::Create(NewRetBlock, BB); - } - - return true; -} -} // namespace - -PreservedAnalyses UnifyFunctionExitNodesPass::run(Function &F, - FunctionAnalysisManager &AM) { - bool Changed = false; - Changed |= unifyUnreachableBlocks(F); - Changed |= unifyReturnBlocks(F); - return Changed ? PreservedAnalyses() : PreservedAnalyses::all(); -} diff --git a/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll b/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll deleted file mode 100644 index a6832c79dad43..0000000000000 --- a/llvm/test/Transforms/UnifyFunctionExitNodes/unreachable-blocks-status.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: opt -passes='break-crit-edges,lower-switch,mergereturn' -S < %s | FileCheck %s - -; The pass did previously not report the correct Modified status in the case -; where a function had at most one return block, and an unified unreachable -; block was created. This was caught by the pass return status check that is -; hidden under EXPENSIVE_CHECKS. - -; CHECK: for.foo.body2: -; CHECK-NEXT: br label %UnifiedUnreachableBlock - -; CHECK: for.foo.end: -; CHECK-NEXT: br label %UnifiedUnreachableBlock - -; CHECK: UnifiedUnreachableBlock: -; CHECK-NEXT: unreachable - -define i32 @foo() { -entry: - br label %for.foo.cond - -for.foo.cond: ; preds = %entry - br i1 false, label %for.foo.body, label %for.foo.end3 - -for.foo.body: ; preds = %for.foo.cond - br label %for.foo.cond1 - -for.foo.cond1: ; preds = %for.foo.body - br i1 false, label %for.foo.body2, label %for.foo.end - -for.foo.body2: ; preds = %for.foo.cond1 - unreachable - -for.foo.end: ; preds = %for.foo.cond1 - unreachable - -for.foo.end3: ; preds = %for.foo.cond - ret i32 undef -} - -; CHECK: for.bar.body2: -; CHECK-NEXT: br label %UnifiedUnreachableBlock - -; CHECK: for.bar.end: -; CHECK-NEXT: br label %UnifiedUnreachableBlock - -; CHECK: UnifiedUnreachableBlock: -; CHECK-NEXT: unreachable - -define void @bar() { -entry: - br label %for.bar.cond - -for.bar.cond: ; preds = %entry - br i1 false, label %for.bar.body, label %for.bar.end - -for.bar.body: ; preds = %for.bar.cond - br label %for.bar.cond1 - -for.bar.cond1: ; preds = %for.bar.body - br i1 false, label %for.bar.body2, label %for.bar.end - -for.bar.body2: ; preds = %for.bar.cond1 - unreachable - -for.bar.end: ; preds = %for.bar.cond1 - unreachable -} From 6e48656e7c6de8a83320958f8aa7dab3e3b74e58 Mon Sep 17 00:00:00 2001 From: Fady Farag Date: Wed, 24 Jun 2026 07:41:36 -0500 Subject: [PATCH 331/511] [clang][test] Use `FileCheck` in `Rewriter/objc-modern-getclass-proto.mm` (#204272) The test had `CHECK` directives that were never executed because no `RUN` line invoked `FileCheck` on the output. The test also used a fragile runtime, which invoked the fragile rewriter instead of the modern one the test was written for. Switch to a non-fragile runtime so the modern rewriter runs as the test intended. --- clang/test/Rewriter/objc-modern-getclass-proto.mm | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/clang/test/Rewriter/objc-modern-getclass-proto.mm b/clang/test/Rewriter/objc-modern-getclass-proto.mm index da417477aa12b..81947460b4cfb 100644 --- a/clang/test/Rewriter/objc-modern-getclass-proto.mm +++ b/clang/test/Rewriter/objc-modern-getclass-proto.mm @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -E %s -o %t.mm -// RUN: %clang_cc1 -x objective-c++ -fblocks -fms-extensions -rewrite-objc -fobjc-runtime=macosx-fragile-10.5 %t.mm -o %t-rw.cpp +// RUN: %clang_cc1 -x objective-c++ -fblocks -fms-extensions -rewrite-objc -fobjc-runtime=macosx-10.7 %t.mm -o %t-rw.cpp +// RUN: FileCheck --input-file=%t-rw.cpp %s @interface I @end @implementation I @end From 75c7431baaf31312ec56a631b8e1333722d535eb Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 14:48:16 +0200 Subject: [PATCH 332/511] [libc++] Move _LIBCPP_CONCAT{,3} to <__configuration/utility.h> (#205533) The macro is already used in `<__configuration/attributes.h>` and just happens to work because we include both headers. `<__configuration/utility.h>` seems like the obvious place to put the macros. --- libcxx/include/__config | 5 +---- libcxx/include/__configuration/utility.h | 4 ++++ 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 714cd0fd26b36..802440bae5ef6 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -28,6 +28,7 @@ # include <__configuration/language.h> # include <__configuration/namespace.h> # include <__configuration/platform.h> +# include <__configuration/utility.h> // The attributes supported by clang are documented at https://clang.llvm.org/docs/AttributeReference.html @@ -36,10 +37,6 @@ // defined to XXYYZZ. # define _LIBCPP_VERSION 230000 -# define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y -# define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) -# define _LIBCPP_CONCAT3(X, Y, Z) _LIBCPP_CONCAT(X, _LIBCPP_CONCAT(Y, Z)) - # ifndef __has_constexpr_builtin # define __has_constexpr_builtin(x) 0 # endif diff --git a/libcxx/include/__configuration/utility.h b/libcxx/include/__configuration/utility.h index 81e91887614d3..2f06af151dd05 100644 --- a/libcxx/include/__configuration/utility.h +++ b/libcxx/include/__configuration/utility.h @@ -19,4 +19,8 @@ #define _LIBCPP_TOSTRING2(x) #x #define _LIBCPP_TOSTRING(x) _LIBCPP_TOSTRING2(x) +#define _LIBCPP_CONCAT_IMPL(_X, _Y) _X##_Y +#define _LIBCPP_CONCAT(_X, _Y) _LIBCPP_CONCAT_IMPL(_X, _Y) +#define _LIBCPP_CONCAT3(X, Y, Z) _LIBCPP_CONCAT(X, _LIBCPP_CONCAT(Y, Z)) + #endif // _LIBCPP___CONFIGURATION_UTILITY_H From 26e0226be3e414575c22ab78eb18594a9377b527 Mon Sep 17 00:00:00 2001 From: "Ivan R. Ivanov" Date: Wed, 24 Jun 2026 14:50:35 +0200 Subject: [PATCH 333/511] [offload][unittest] Set rpath to make sure tests use the correct lib (#205542) Depending on system setup, the unit tests could not find the just built libLLVMOffload.so. Set the BUILD_RPATH to make sure they correctly use the library from the current build. --- offload/unittests/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/offload/unittests/CMakeLists.txt b/offload/unittests/CMakeLists.txt index f02571c8d0bc8..b08f7213d5344 100644 --- a/offload/unittests/CMakeLists.txt +++ b/offload/unittests/CMakeLists.txt @@ -146,6 +146,7 @@ function(add_offload_unittest test_dirname) target_compile_definitions(${target_name} PRIVATE DEVICE_CODE_PATH="${OFFLOAD_TEST_DEVICE_CODE_PATH}") target_link_libraries(${target_name} PRIVATE ${PLUGINS_TEST_COMMON}) target_include_directories(${target_name} PRIVATE ${PLUGINS_TEST_INCLUDE}) + set_target_properties(${target_name} PROPERTIES BUILD_RPATH "${LIBOMPTARGET_LIBRARY_DIR}") endfunction() function(add_conformance_test test_name) From c15b7694ebf2efca6d1e7e2df1b9c8e3bc067f9e Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 14:52:28 +0200 Subject: [PATCH 334/511] [libc++] Move include to <__configuration/platform.h> (#205548) Including `` is platform-specific configuration and should therefore be in `<__configuration/platform.h>`. --- libcxx/include/__configuration/platform.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libcxx/include/__configuration/platform.h b/libcxx/include/__configuration/platform.h index 644fe1724e42e..ebd638e663215 100644 --- a/libcxx/include/__configuration/platform.h +++ b/libcxx/include/__configuration/platform.h @@ -30,6 +30,10 @@ // ... add new file formats here ... #endif +#if defined(__MVS__) +# include // for __NATIVE_ASCII_F +#endif + // Need to detect which libc we're using if we're on Linux. #if (defined(__linux__) || defined(__AMDGPU__) || defined(__NVPTX__)) && __has_include() # include From 02af39ef77bb913ba4ea8dca323a586e1f6d0915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Don=C3=A1t=20Nagy?= Date: Wed, 24 Jun 2026 14:53:15 +0200 Subject: [PATCH 335/511] [NFC][analyzer] Remove the NodeBuilder from eagerly assume (#204371) Part of my commit series to gradually eliminate the class `NodeBuilder`. Admittedly this is one of the few places where the implementation with the `NodeBuilder` is more concise than the new code. This is caused by two factors: 1. This is an optional step in the analysis, so the "put source nodes in destination unless we generate a child node from them" behavior of `NodeBuilder` -- which is often completely useless -- was helpful on two branches. 2. Making nodes with tags is very rare, so I intentionally did not include support for tagging in `makeNodeWithBinding` -- but this is one of the few places where tags are applied. --- clang/lib/StaticAnalyzer/Core/ExprEngine.cpp | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp index 4357f0fae4144..cfb294736ee02 100644 --- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp +++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp @@ -3711,20 +3711,20 @@ REGISTER_TRAIT_WITH_PROGRAMSTATE(LastEagerlyAssumeExprIfSuccessful, void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, ExplodedNodeSet &Src, const Expr *Ex) { - NodeBuilder Bldr(Src, Dst, *currBldrCtx); - for (ExplodedNode *Pred : Src) { + const StackFrame *SF = Pred->getStackFrame(); // Test if the previous node was as the same expression. This can happen // when the expression fails to evaluate to anything meaningful and // (as an optimization) we don't generate a node. ProgramPoint P = Pred->getLocation(); if (!P.getAs() || P.castAs().getStmt() != Ex) { + Dst.insert(Pred); continue; } ProgramStateRef State = Pred->getState(); State = State->set(nullptr); - SVal V = State->getSVal(Ex, Pred->getStackFrame()); + SVal V = State->getSVal(Ex, SF); std::optional SEV = V.getAs(); if (SEV && SEV->isExpression()) { const auto &[TrueTag, FalseTag] = getEagerlyAssumeBifurcationTags(); @@ -3739,16 +3739,20 @@ void ExprEngine::evalEagerlyAssumeBifurcation(ExplodedNodeSet &Dst, // First assume that the condition is true. if (StateTrue) { SVal Val = svalBuilder.makeIntVal(1U, Ex->getType()); - StateTrue = StateTrue->BindExpr(Ex, Pred->getStackFrame(), Val); - Bldr.generateNode(Ex, Pred, StateTrue, TrueTag); + StateTrue = StateTrue->BindExpr(Ex, SF, Val); + PostStmt PostStmtTrue(Ex, SF, TrueTag); + Dst.insert(Engine.makeNode(PostStmtTrue, StateTrue, Pred)); } // Next, assume that the condition is false. if (StateFalse) { SVal Val = svalBuilder.makeIntVal(0U, Ex->getType()); - StateFalse = StateFalse->BindExpr(Ex, Pred->getStackFrame(), Val); - Bldr.generateNode(Ex, Pred, StateFalse, FalseTag); + StateFalse = StateFalse->BindExpr(Ex, SF, Val); + PostStmt PostStmtFalse(Ex, SF, FalseTag); + Dst.insert(Engine.makeNode(PostStmtFalse, StateFalse, Pred)); } + } else { + Dst.insert(Pred); } } } From d6d26f917a8d5185142feb0745606c08dd7ca374 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 14:58:42 +0200 Subject: [PATCH 336/511] [libc++] Remove include from <__config> (#205549) The include was moved to `<__configuration/platform.h>` in #205548, which was also supposed to remove the include in `<__config>`. --- libcxx/include/__config | 4 ---- 1 file changed, 4 deletions(-) diff --git a/libcxx/include/__config b/libcxx/include/__config index 802440bae5ef6..fc7a121d52783 100644 --- a/libcxx/include/__config +++ b/libcxx/include/__config @@ -66,10 +66,6 @@ # define _LIBCPP_ABI_VCRUNTIME # endif -# if defined(__MVS__) -# include // for __NATIVE_ASCII_F -# endif - # if defined(_WIN32) # define _LIBCPP_WIN32API # define _LIBCPP_SHORT_WCHAR 1 From 332c1ca3f642fc06cac7afa7bd5a3f4d49d6ed0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Wed, 24 Jun 2026 14:08:28 +0100 Subject: [PATCH 337/511] [clang][ARM] Delete dead-code (nfc) (#205404) Removes dead code that I accidentally introduced in #195825/. Thank you @shafik for pointing this out! --- clang/lib/CodeGen/TargetBuiltins/ARM.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp index aa32bc2a1d5a7..9c41a807c62cb 100644 --- a/clang/lib/CodeGen/TargetBuiltins/ARM.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/ARM.cpp @@ -6548,7 +6548,6 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, VTy = llvm::FixedVectorType::get(HalfTy, 4); llvm::Type *Tys[2] = {Ty, VTy}; return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv"); - return Builder.CreateTrunc(Ops[0], HalfTy); } case NEON::BI__builtin_neon_vminnmvq_f16: { Int = Intrinsic::aarch64_neon_fminnmv; From e84a5a4bdaecd3bd7fcad3f98ccc7b34d9995f46 Mon Sep 17 00:00:00 2001 From: Tony Guillot Date: Wed, 24 Jun 2026 15:31:56 +0200 Subject: [PATCH 338/511] [Clang][Docs] Fixed typos of sentinel attribute (#205539) I have previously documented the sentinel attribute but some typos have been missed during the review process. --- clang/include/clang/Basic/AttrDocs.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 0f1a66ec34197..7c1c88241aaa8 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -10200,7 +10200,7 @@ call. The attribute accepts two optional arguments: the first argument is the position of the expected sentinel value, starting from the last parameter. The second argument describes whether the last fixed parameter is treated as a valid sentinel value when set to '1'. -All arguments described above defaults to '0' when elided. +All arguments described above default to '0' when elided. The attribute is also supported with blocks and in Objective-C. .. code-block:: c @@ -10214,7 +10214,7 @@ The attribute is also supported with blocks and in Objective-C. foo("Another", "example", NULL); foo("Missing", "sentinel"); // Not OK - bar(1, 2, NULL, 3); // OK: sentinel value at the 2nd to last positon + bar(1, 2, NULL, 3); // OK: sentinel value at the 2nd to last position bar(1, 2, 3, nullptr, 4); // OK: `nullptr` is valid in C23 bar(1, 2, 3, 4, NULL); // Not OK From 2474100129824af88e2c92117db7394a9b6044df Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 14:44:59 +0100 Subject: [PATCH 339/511] [X86] madd.ll - add additional load test for matchPMADDWD folds that fail with irregular source types (#205554) Ensure #205391 doesn't crash with non-pow2/illegal types --- llvm/test/CodeGen/X86/madd.ll | 53 +++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 63b390f4b9bdf..5912e6aa3a3c4 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -3901,6 +3901,59 @@ define <4 x i32> @oddvector_sext(<13 x i16> %A) { ret <4 x i32> %ret } +define <4 x i32> @oddvector_sext_load(ptr %p) { +; SSE2-LABEL: oddvector_sext_load: +; SSE2: # %bb.0: +; SSE2-NEXT: movdqa (%rdi), %xmm0 +; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: retq +; +; SSE42-LABEL: oddvector_sext_load: +; SSE42: # %bb.0: +; SSE42-NEXT: movdqa (%rdi), %xmm1 +; SSE42-NEXT: pmovsxwd %xmm1, %xmm0 +; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 +; SSE42-NEXT: phaddd %xmm1, %xmm0 +; SSE42-NEXT: retq +; +; AVX1-LABEL: oddvector_sext_load: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 +; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 +; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: oddvector_sext_load: +; AVX2: # %bb.0: +; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: oddvector_sext_load: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmovsxwd (%rdi), %zmm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq + %A = load <13 x i16>, ptr %p + %a = sext <13 x i16> %A to <13 x i32> + %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> + %even = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> + %ret = add <4 x i32> %odd, %even + ret <4 x i32> %ret +} + define <3 x i32> @oddvector_shl(<12 x i16> %A) { ; SSE2-LABEL: oddvector_shl: ; SSE2: # %bb.0: From 86b214c28c82327eb172c1389ec9ad52e02be1ef Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Wed, 24 Jun 2026 08:51:36 -0500 Subject: [PATCH 340/511] [flang][OpenMP] Parsing and semantics of locators as part of OmpObject (#203910) Allow function call references and reserved locator names as parts of OmpObject. Function calls and array element accesses have the same syntax, and the OmpObject parser will parse them as function calls. This is then corrected (if needed) immediately after the name resolution is complete. There are no clause-specific semantic checks of proper locators. Existing code will check if a proper locator is specified on a clause that allows it. Lowering of proper locators to MLIR is not implemented, and a TODO message is emitted. --- flang/include/flang/Parser/dump-parse-tree.h | 2 +- flang/include/flang/Parser/parse-tree.h | 23 +++++---- flang/include/flang/Semantics/expression.h | 4 +- flang/include/flang/Semantics/symbol.h | 4 +- flang/lib/Lower/OpenMP/ClauseProcessor.cpp | 22 ++++++++ flang/lib/Lower/OpenMP/Clauses.cpp | 51 +++++++++++++++---- flang/lib/Parser/openmp-parsers.cpp | 37 +++++++++----- flang/lib/Parser/openmp-utils.cpp | 12 +++-- flang/lib/Parser/unparse.cpp | 2 +- flang/lib/Semantics/check-omp-loop.cpp | 2 +- flang/lib/Semantics/check-omp-structure.cpp | 28 ++++++++-- flang/lib/Semantics/check-omp-structure.h | 1 + flang/lib/Semantics/check-omp-variant.cpp | 2 +- flang/lib/Semantics/openmp-utils.cpp | 48 ++++++++++++++--- flang/lib/Semantics/resolve-directives.cpp | 3 ++ flang/lib/Semantics/resolve-names.cpp | 16 +++++- flang/lib/Semantics/rewrite-parse-tree.cpp | 17 +++++++ .../OpenMP/Todo/locator-call-affinity.f90 | 13 +++++ .../Lower/OpenMP/Todo/locator-call-from.f90 | 12 +++++ .../Lower/OpenMP/Todo/locator-call-map.f90 | 13 +++++ .../Lower/OpenMP/Todo/locator-call-to.f90 | 12 +++++ .../Lower/OpenMP/Todo/locator-reserved.f90 | 11 ++++ .../Parser/OpenMP/allocate-align-tree.f90 | 4 +- .../Parser/OpenMP/allocate-tree-spec-part.f90 | 8 +-- flang/test/Parser/OpenMP/allocate-tree.f90 | 6 +-- flang/test/Parser/OpenMP/declare-variant.f90 | 8 +-- flang/test/Parser/OpenMP/depobj-construct.f90 | 8 +-- flang/test/Parser/OpenMP/groupprivate.f90 | 6 +-- .../Parser/OpenMP/metadirective-dirspec.f90 | 14 ++--- .../Parser/OpenMP/metadirective-flush.f90 | 4 +- .../OpenMP/openmp6-directive-spellings.f90 | 2 +- flang/test/Parser/OpenMP/threadprivate.f90 | 4 +- .../Semantics/OpenMP/affinity-invalid.f90 | 8 --- .../Semantics/OpenMP/depend-substring.f90 | 9 ---- .../Semantics/OpenMP/reserved-locator.f90 | 6 +++ llvm/include/llvm/Frontend/OpenMP/OMP.h | 2 + llvm/lib/Frontend/OpenMP/OMP.cpp | 6 +++ 37 files changed, 320 insertions(+), 110 deletions(-) create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-from.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-map.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-call-to.f90 create mode 100644 flang/test/Lower/OpenMP/Todo/locator-reserved.f90 create mode 100644 flang/test/Semantics/OpenMP/reserved-locator.f90 diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 1205101c21fcf..ceba23d7d4706 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -667,7 +667,6 @@ class ParseTreeDumper { NODE(parser, OmpLinearModifier) NODE_ENUM(OmpLinearModifier, Value) NODE(parser, OmpLocator) - NODE(parser, OmpLocatorList) NODE(parser, OmpLooprangeClause) NODE(parser, OmpLowerBound) NODE(parser, OmpMapClause) @@ -722,6 +721,7 @@ class ParseTreeDumper { NODE_ENUM(OmpRefModifier, Value) NODE(parser, OmpReplayableClause) NODE(parser, OmpRequiresDirective) + NODE(parser, OmpReservedIdentifier) NODE(parser, OmpReverseOffloadClause) NODE(parser, OmpScheduleClause) NODE(OmpScheduleClause, Modifier) diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h index ee6288539395c..ea4ce1882eb1b 100644 --- a/flang/include/flang/Parser/parse-tree.h +++ b/flang/include/flang/Parser/parse-tree.h @@ -3599,6 +3599,16 @@ struct OmpTypeNameList { WRAPPER_CLASS_BOILERPLATE(OmpTypeNameList, std::list); }; +struct OmpReservedIdentifier { + WRAPPER_CLASS_BOILERPLATE(OmpReservedIdentifier, Name); +}; + +// "Proper" locator, i.e. a function reference or a reserved locator. +struct OmpLocator { + UNION_CLASS_BOILERPLATE(OmpLocator); + std::variant u; +}; + // 2.1 Directives or clauses may accept a list or extended-list. // A list item is a variable, array section or common block name (enclosed // in slashes). An extended list item is a list item or a procedure Name. @@ -3612,7 +3622,7 @@ struct OmpObject { CharBlock source; }; UNION_CLASS_BOILERPLATE(OmpObject); - std::variant u; + std::variant u; }; struct OmpObjectList { @@ -3688,15 +3698,6 @@ struct OmpInitializerExpression : public OmpStylizedExpression { }; inline namespace arguments { -struct OmpLocator { - UNION_CLASS_BOILERPLATE(OmpLocator); - std::variant u; -}; - -struct OmpLocatorList { - WRAPPER_CLASS_BOILERPLATE(OmpLocatorList, std::list); -}; - // Ref: [4.5:58-60], [5.0:58-60], [5.1:63-68], [5.2:197-198], [6.0:334-336] // // Argument to DECLARE VARIANT with the base-name present. (When only @@ -3737,7 +3738,7 @@ struct OmpReductionSpecifier { struct OmpArgument { CharBlock source; UNION_CLASS_BOILERPLATE(OmpArgument); - std::variant u; diff --git a/flang/include/flang/Semantics/expression.h b/flang/include/flang/Semantics/expression.h index 598cb31e851f8..75468d683af48 100644 --- a/flang/include/flang/Semantics/expression.h +++ b/flang/include/flang/Semantics/expression.h @@ -252,6 +252,8 @@ class ExpressionAnalyzer { MaybeExpr Analyze(const parser::InitialDataTarget &); MaybeExpr Analyze(const parser::NullInit &); MaybeExpr Analyze(const parser::StmtFunctionStmt &); + MaybeExpr Analyze(const parser::FunctionReference &, + std::optional * = nullptr); void Analyze(const parser::CallStmt &); const Assignment *Analyze(const parser::AssignmentStmt &); @@ -293,8 +295,6 @@ class ExpressionAnalyzer { MaybeExpr Analyze(const parser::CharLiteralConstantSubstring &); MaybeExpr Analyze(const parser::SubstringInquiry &); MaybeExpr Analyze(const parser::ArrayConstructor &); - MaybeExpr Analyze(const parser::FunctionReference &, - std::optional * = nullptr); MaybeExpr Analyze(const parser::Expr::Parentheses &); MaybeExpr Analyze(const parser::Expr::UnaryPlus &); MaybeExpr Analyze(const parser::Expr::Negate &); diff --git a/flang/include/flang/Semantics/symbol.h b/flang/include/flang/Semantics/symbol.h index 23c26ba733e86..e0511b5b4803d 100644 --- a/flang/include/flang/Semantics/symbol.h +++ b/flang/include/flang/Semantics/symbol.h @@ -925,8 +925,8 @@ class Symbol { // OpenMP special variables OmpInVar, OmpOrigVar, OmpOutVar, OmpPrivVar, // OpenMP miscellaneous flags - OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned, OmpNontemporal, - OmpAllocate, OmpDeclarativeAllocateDirective, + OmpReserved, OmpCommonBlock, OmpReduction, OmpInReduction, OmpAligned, + OmpNontemporal, OmpAllocate, OmpDeclarativeAllocateDirective, OmpExecutableAllocateDirective, OmpDeclareSimd, OmpDeclareTarget, OmpThreadprivate, OmpDeclareReduction, OmpFlushed, OmpCriticalLock, OmpIfSpecified, OmpNone, OmpPreDetermined, OmpExplicit, OmpImplicit, diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index 4f19dfb98024d..f1ccb64e3dfb3 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -29,6 +29,24 @@ namespace Fortran { namespace lower { namespace omp { +static void TodoLocators(mlir::Location loc, const omp::ObjectList &objects) { + for (const omp::Object &object : objects) { + if (auto &ref = object.ref()) { + auto op = GetTopLevelOperation(*ref).first; + if (op == evaluate::operation::Operator::Call) + TODO(loc, "Function call locators are not supported yet"); + } + semantics::Symbol *symbol = object.sym(); + if (symbol->test(semantics::Symbol::Flag::OmpReserved)) { + std::string name = + parser::ToLowerCaseLetters(object.sym()->name().ToString()); + if (llvm::is_contained(llvm::omp::getReservedLocatorNames(), name)) { + TODO(loc, "Reserved locators are not supported yet"); + } + } + } +} + using ReductionModifier = Fortran::lower::omp::clause::Reduction::ReductionModifier; @@ -971,6 +989,8 @@ bool ClauseProcessor::processAffinity( std::get>(clause.t); collectIteratorIVs(clause, converter, stmtCtx, iteratorRanges, ivSyms); + TodoLocators(clauseLocation, objects); + for (const omp::Object &object : objects) { llvm::SmallVector bounds; std::stringstream asFortran; @@ -1972,6 +1992,7 @@ bool ClauseProcessor::processMap( if (iterator) TODO(currentLocation, "Support for iterator modifiers is not implemented yet"); + TodoLocators(currentLocation, objects); processMapObjects(stmtCtx, clauseLocation, std::get(clause.t), mapTypeBits, @@ -2007,6 +2028,7 @@ bool ClauseProcessor::processMotionClauses(lower::StatementContext &stmtCtx, if (iterator) TODO(clauseLocation, "Iterator modifier is not supported yet"); + TodoLocators(clauseLocation, objects); processMapObjects(stmtCtx, clauseLocation, objects, mapTypeBits, parentMemberIndices, result.mapVars, mapObjects, diff --git a/flang/lib/Lower/OpenMP/Clauses.cpp b/flang/lib/Lower/OpenMP/Clauses.cpp index 03852dc9e7d74..34dc6ed56b435 100644 --- a/flang/lib/Lower/OpenMP/Clauses.cpp +++ b/flang/lib/Lower/OpenMP/Clauses.cpp @@ -57,6 +57,12 @@ struct SymbolAndDesignatorExtractor { evaluate::AsGenericExpr(AsRvalueRef(e))); } + template + static SymbolWithDesignator visit(const evaluate::FunctionRef &e) { + return std::make_tuple(symbol_addr(*e.proc().GetSymbol()), + evaluate::AsGenericExpr(AsRvalueRef(e))); + } + static SymbolWithDesignator visit(const evaluate::ProcedureDesignator &e) { return std::make_tuple(symbol_addr(*e.GetSymbol()), std::nullopt); } @@ -78,12 +84,16 @@ struct SymbolAndDesignatorExtractor { if (maybeRef) { if (&maybeRef->GetLastSymbol() == symbol) return; // Symbol with a designator for it -> OK - llvm_unreachable("Expecting designator for given symbol"); + llvm_unreachable("Symbol mismatch"); + } else if (auto *ref = evaluate::UnwrapProcedureRef(*maybeDsg)) { + if (ref->proc().GetSymbol() == symbol) + return; + llvm_unreachable("Symbol mismatch"); } else { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) maybeDsg->dump(); #endif - llvm_unreachable("Expecting DataRef designator"); + llvm_unreachable("Unexpected expression"); } } }; @@ -110,6 +120,14 @@ Object makeObject(const parser::Designator &dsg, return Object{std::get<0>(sd), std::move(std::get<1>(sd))}; } +Object makeObject(const parser::FunctionReference &ref, + semantics::SemanticsContext &semaCtx) { + evaluate::ExpressionAnalyzer ea{semaCtx}; + SymbolWithDesignator sd = getSymbolAndDesignator(ea.Analyze(ref)); + SymbolAndDesignatorExtractor::verify(sd); + return Object{std::get<0>(sd), std::move(std::get<1>(sd))}; +} + Object makeObject(const parser::StructureComponent &comp, semantics::SemanticsContext &semaCtx) { evaluate::ExpressionAnalyzer ea{semaCtx}; @@ -127,8 +145,24 @@ Object makeObject(const parser::OmpObject &object, assert(name->symbol && "Expecting Symbol"); return Object{name->symbol, std::nullopt}; } - // OmpObject is std::variant; - return makeObject(std::get(object.u), semaCtx); + assert(!std::holds_alternative(object.u) && + "Invalid object should have been caught in semantics"); + // OmpObject is std::variant; + if (auto *desg = std::get_if(&object.u)) + return makeObject(*desg, semaCtx); + if (auto *locator = std::get_if(&object.u)) { + return common::visit( // + common::visitors{ + [&](const parser::OmpReservedIdentifier &x) { + return makeObject(x.v, semaCtx); + }, + [&](const parser::FunctionReference &x) { + return makeObject(x, semaCtx); + }, + }, + locator->u); + } + llvm_unreachable("Unexpected OmpObject"); } Object makeObject(const parser::EntityDecl &decl, @@ -139,13 +173,10 @@ Object makeObject(const parser::EntityDecl &decl, ObjectList makeObjects(const parser::OmpArgumentList &objects, semantics::SemanticsContext &semaCtx) { return makeList(objects.v, [&](const parser::OmpArgument &arg) { - return common::visit( + return common::visit( // common::visitors{ - [&](const parser::OmpLocator &locator) -> Object { - if (auto *object = std::get_if(&locator.u)) { - return makeObject(*object, semaCtx); - } - llvm_unreachable("Expecting object"); + [&](const parser::OmpObject &object) -> Object { + return makeObject(object, semaCtx); }, [](auto &&s) -> Object { // llvm_unreachable("Expecting object"); diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp index 7016c688a572d..8a18bcc9e4485 100644 --- a/flang/lib/Parser/openmp-parsers.cpp +++ b/flang/lib/Parser/openmp-parsers.cpp @@ -281,6 +281,27 @@ TYPE_PARSER(construct( "RELEASE" >> pure(common::OmpMemoryOrderType::Release) || "SEQ_CST" >> pure(common::OmpMemoryOrderType::Seq_Cst))) +static bool IsReservedName(const Name &name) { + llvm::StringRef s(name.source.begin(), name.source.size()); + return s.starts_with_insensitive("OMP_") || + s.starts_with_insensitive("OMPX_"); +} + +TYPE_PARSER( // + construct(predicated(name, IsReservedName))) + +// Parse x(...)(...) as a substring instead of a function reference. +TYPE_PARSER( // + construct(functionReference / !lookAhead("("_tok)) || + construct(Parser{})) + +TYPE_PARSER( // + construct(Parser{}) || + construct(designator) || + "/" >> construct(name) / "/" || + construct(sourced(construct( + "//"_tok >> pure(OmpObject::Invalid::Kind::BlankCommonBlock))))) + // --- Modifier helpers ----------------------------------------------- template struct ModifierList { @@ -588,10 +609,6 @@ TYPE_PARSER( // // At the moment these are only directive arguments. This is needed for // parsing directive-specification. -TYPE_PARSER( // - construct(Parser{}) || - construct(Parser{})) - TYPE_PARSER(construct( Parser{} / ":", Parser{})) @@ -610,7 +627,7 @@ struct OmpArgumentParser { construct(Parser{}), // By default, prefer OmpReductionSpecifier over OmpBaseVariantNames. construct(Parser{}), - construct(Parser{})))}; + construct(Parser{})))}; return parser.Parse(state); } }; @@ -625,13 +642,11 @@ struct OmpArgumentParser { // In DECLARE_VARIANT parse OmpBaseVariantNames instead of // OmpReductionSpecifier. construct(Parser{}), - construct(Parser{})))}; + construct(Parser{})))}; return parser.Parse(state); } }; -TYPE_PARSER(construct(nonemptyList(Parser{}))) - template struct OmpArgumentListParser { using resultType = OmpArgumentList; @@ -1405,12 +1420,6 @@ TYPE_PARSER(construct( maybe(nonemptyList(Parser{}) / ":"), nonemptyList(scalarIntExpr))) -TYPE_PARSER( // - construct(designator) || - "/" >> construct(name) / "/" || - construct(sourced(construct( - "//"_tok >> pure(OmpObject::Invalid::Kind::BlankCommonBlock))))) - // OMP 5.0 2.19.4.5 LASTPRIVATE ([lastprivate-modifier :] list) TYPE_PARSER(construct( maybe(nonemptyList(Parser{}) / ":"), diff --git a/flang/lib/Parser/openmp-utils.cpp b/flang/lib/Parser/openmp-utils.cpp index ec312bcf3ebfd..ca563f2df892d 100644 --- a/flang/lib/Parser/openmp-utils.cpp +++ b/flang/lib/Parser/openmp-utils.cpp @@ -54,16 +54,20 @@ std::optional GetObjectSource( return name->source; } else if (auto *desg{std::get_if(&object.u)}) { return GetLastName(*desg).source; + } else if (auto *locator{std::get_if(&object.u)}) { + return common::visit( // + common::visitors{ + [](const parser::OmpReservedIdentifier &x) { return x.v.source; }, + [](const parser::FunctionReference &x) { return x.source; }, + }, + locator->u); } return std::nullopt; } const parser::OmpObject *GetArgumentObject( const parser::OmpArgument &argument) { - if (auto *locator{std::get_if(&argument.u)}) { - return std::get_if(&locator->u); - } - return nullptr; + return std::get_if(&argument.u); } namespace detail { diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp index 42f042e470e81..7d0038767a9c4 100644 --- a/flang/lib/Parser/unparse.cpp +++ b/flang/lib/Parser/unparse.cpp @@ -2515,7 +2515,7 @@ class UnparseVisitor { void Unparse(const OmpObject &x) { common::visit( // common::visitors{ - [&](const Designator &y) { Walk(y); }, + [&](const auto &y) { Walk(y); }, [&](const Name &y) { Put("/"); Walk(y); diff --git a/flang/lib/Semantics/check-omp-loop.cpp b/flang/lib/Semantics/check-omp-loop.cpp index 3e1f2e6cdf5d0..c77c1c53f4813 100644 --- a/flang/lib/Semantics/check-omp-loop.cpp +++ b/flang/lib/Semantics/check-omp-loop.cpp @@ -665,7 +665,7 @@ void OmpStructureChecker::CheckScanModifier( [&](const parser::Name &name) { checkReductionSymbolInScan(name); }, - [&](const parser::OmpObject::Invalid &invalid) {}, + [&](const auto &) {}, }, ompObj.u); } diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 816b8fd2f149d..81600fa1ddbb9 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -414,12 +414,18 @@ void OmpStructureChecker::AnalyzeObject(const parser::OmpObject &object) { } } } + evaluate::ExpressionAnalyzer ea{context_}; auto restore{ea.AllowWholeAssumedSizeArray(true)}; common::visit( // common::visitors{ [&](auto &&s) { ea.Analyze(s); }, - [&](const parser::OmpObject::Invalid &invalid) {}, + [&](const parser::OmpLocator &x) { + if (auto *ref{std::get_if(&x.u)}) { + ea.Analyze(*ref); + } + }, + [&](const parser::OmpObject::Invalid &) {}, }, object.u); } @@ -616,6 +622,16 @@ bool OmpStructureChecker::HasRequires(llvm::omp::Clause req) { DEREF(unit.symbol()).details()); } +void OmpStructureChecker::Enter(const parser::OmpLocator &x) { + if (auto *reserved{parser::Unwrap(x.u)}) { + std::string name{parser::ToLowerCaseLetters(reserved->v.source.ToString())}; + if (!llvm::is_contained(llvm::omp::getReservedLocatorNames(), name)) { + context_.Say(reserved->v.source, "'%s' is not a valid locator"_err_en_US, + parser::ToUpperCaseLetters(name)); + } + } +} + void OmpStructureChecker::CheckArgumentObjectKind(const parser::OmpClause &x) { unsigned version{context_.langOptions().OpenMPVersion}; llvm::omp::Directive dirId{GetContext().directive}; @@ -1671,7 +1687,8 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( common::visit( // common::visitors{ [&](auto &&s) { CheckThreadprivateOrDeclareTargetVar(s); }, - [&](const parser::OmpObject::Invalid &invalid) {}, + [&](const parser::OmpLocator &) {}, + [&](const parser::OmpObject::Invalid &) {}, }, object.u); } @@ -1685,10 +1702,10 @@ void OmpStructureChecker::CheckThreadprivateOrDeclareTargetVar( void OmpStructureChecker::Enter(const parser::OmpGroupprivateDirective &x) { for (const parser::OmpArgument &arg : x.v.Arguments().v) { - auto *locator{std::get_if(&arg.u)}; + auto *object{std::get_if(&arg.u)}; const Symbol *sym{GetArgumentSymbol(arg, /*ultimate=*/true)}; - if (!locator || !sym || + if (!object || !sym || (!IsVariableListItem(*sym) && !IsCommonBlock(*sym))) { context_.Say(arg.source, "GROUPPRIVATE argument should be a variable or a named common block"_err_en_US); @@ -3557,7 +3574,8 @@ void OmpStructureChecker::Leave(const parser::OmpClauseList &x) { } } }, - [&](const parser::OmpObject::Invalid &invalid) {}, + [&](const parser::OmpLocator &) {}, + [&](const parser::OmpObject::Invalid &) {}, }, ompObject.u); } diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 8d9b25fb2a11d..4499e2a213384 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -131,6 +131,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase { void Enter(const parser::OpenMPCriticalConstruct &); void Enter(const parser::OpenMPAtomicConstruct &); + void Enter(const parser::OmpLocator &x); void Enter(const parser::OmpClauseList &); void Leave(const parser::OmpClauseList &); void Enter(const parser::OmpClause &); diff --git a/flang/lib/Semantics/check-omp-variant.cpp b/flang/lib/Semantics/check-omp-variant.cpp index c681cd601b856..8b782030c37e3 100644 --- a/flang/lib/Semantics/check-omp-variant.cpp +++ b/flang/lib/Semantics/check-omp-variant.cpp @@ -688,7 +688,7 @@ void OmpStructureChecker::CheckOmpDeclareVariantDirective( CheckProcedureSymbol(base, arg.source); CheckProcedureSymbol(variant, arg.source); }, - [&](const parser::OmpLocator &y) { + [&](const parser::OmpObject &y) { variant = GetArgumentSymbol(arg); CheckProcedureSymbol(variant, arg.source); const Scope &containingScope{context_.FindScope(x.source)}; diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index 1aa27a5fe6074..0556920877f45 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -121,6 +121,21 @@ std::string TryVersion(unsigned version) { return "try -fopenmp-version=" + std::to_string(version); } +static const Symbol *GetFunctionReferenceSymbol( + const parser::FunctionReference &ref) { + auto &proc{std::get(ref.v.t)}; + return common::visit( + common::visitors{ + [](const parser::Name &x) { return x.symbol; }, + [](const parser::ProcComponentRef &x) { + return parser::UnwrapRef(x.v) + .Component() + .symbol; + }, + }, + proc.u); +} + const Symbol *GetObjectSymbol(const parser::OmpObject &object, bool ultimate) { // Some symbols may be missing if the resolution failed, e.g. when an // undeclared name is used with implicit none. @@ -137,16 +152,28 @@ const Symbol *GetObjectSymbol(const parser::OmpObject &object, bool ultimate) { } else { return last.symbol; } + } else if (auto *locator{std::get_if(&object.u)}) { + const Symbol *sym = common::visit( // + common::visitors{ + [](const parser::OmpReservedIdentifier &x) { return x.v.symbol; }, + [](const parser::FunctionReference &x) { + return GetFunctionReferenceSymbol(x); + }, + }, + locator->u); + if (sym && ultimate) { + return &sym->GetUltimate(); + } else { + return sym; + } } return nullptr; } const Symbol *GetArgumentSymbol( const parser::OmpArgument &argument, bool ultimate) { - if (auto *locator{std::get_if(&argument.u)}) { - if (auto *object{std::get_if(&locator->u)}) { - return GetObjectSymbol(*object, ultimate); - } + if (auto *object{GetArgumentObject(argument)}) { + return GetObjectSymbol(*object, ultimate); } return nullptr; } @@ -233,17 +260,21 @@ bool IsExtendedListItem( if (IsVariableListItem(object, semaCtx)) { return true; } - if (auto *sym{GetObjectSymbol(object, /*ultimate=*/true)}) { - return IsProcedure(*sym); + if (!std::holds_alternative(object.u)) { + if (auto *sym{GetObjectSymbol(object, /*ultimate=*/true)}) { + return IsProcedure(*sym); + } } return false; } bool IsLocatorListItem( const parser::OmpObject &object, SemanticsContext *semaCtx) { - if (IsVariableListItem(object, semaCtx)) { + if (IsVariableListItem(object, semaCtx) || + std::holds_alternative(object.u)) { return true; } + // A statement function call may look like an array element access. if (auto *desg{parser::Unwrap(object)}) { evaluate::ExpressionAnalyzer ea(*semaCtx); auto restorer{ea.GetContextualMessages().DiscardMessages()}; @@ -447,6 +478,9 @@ std::optional IsContiguous( } return std::optional{}; }, + [&](const parser::OmpLocator &) { // + return std::optional{}; + }, [&](const parser::OmpObject::Invalid &) { return std::optional{}; }}, diff --git a/flang/lib/Semantics/resolve-directives.cpp b/flang/lib/Semantics/resolve-directives.cpp index 2e1a1cbe01aef..f3865cfb877dc 100644 --- a/flang/lib/Semantics/resolve-directives.cpp +++ b/flang/lib/Semantics/resolve-directives.cpp @@ -3176,6 +3176,9 @@ void OmpAttributeVisitor::ResolveOmpObject( [&](const parser::Name &name) { // common block ResolveOmpCommonBlock(name, ompFlag); }, + [&](const parser::OmpLocator &ref) { + // Do nothing here. + }, [&](const parser::OmpObject::Invalid &invalid) { switch (invalid.v) { SWITCH_COVERS_ALL_CASES diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index c6800e15be9dc..a6f3fe12eb9b7 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -1738,6 +1738,7 @@ class OmpVisitor : public virtual DeclarationVisitor { return Pre(static_cast(x)); } + bool Pre(const parser::OmpReservedIdentifier &); void Post(const parser::OmpTypeName &); bool Pre(const parser::OmpStylizedDeclaration &); void Post(const parser::OmpStylizedDeclaration &); @@ -1938,6 +1939,17 @@ void OmpVisitor::PushScopeWithSource( currScope().AddSourceRange(source); } +bool OmpVisitor::Pre(const parser::OmpReservedIdentifier &x) { + // Create a unique symbol in the global scope. + if (auto *symbol{context().globalScope().FindSymbol(x.v.source)}) { + x.v.symbol = symbol; + } else { + MakePlaceholder(x.v, MiscDetails::Kind::None); + } + x.v.symbol->set(Symbol::Flag::OmpReserved); + return false; +} + void OmpVisitor::Post(const parser::OmpTypeName &x) { x.declTypeSpec = GetDeclTypeSpec(); } @@ -2205,7 +2217,7 @@ bool OmpVisitor::Pre(const parser::OmpDirectiveSpecification &x) { Walk(std::get<0>(names.t)); Walk(std::get<1>(names.t)); }, - [&](const parser::OmpLocator &locator) { + [&](const parser::OmpObject &object) { // Manually resolve names in CRITICAL directives. This is because // these names do not denote Fortran objects, and the CRITICAL // directive causes them to be "auto-declared", i.e. inserted into @@ -2215,7 +2227,7 @@ bool OmpVisitor::Pre(const parser::OmpDirectiveSpecification &x) { if (x.DirId() == llvm::omp::Directive::OMPD_critical) { ResolveCriticalName(arg); } else { - Walk(locator); + Walk(object); } }, }, diff --git a/flang/lib/Semantics/rewrite-parse-tree.cpp b/flang/lib/Semantics/rewrite-parse-tree.cpp index 4e1c9bae9c153..bdccde8af3b94 100644 --- a/flang/lib/Semantics/rewrite-parse-tree.cpp +++ b/flang/lib/Semantics/rewrite-parse-tree.cpp @@ -82,6 +82,7 @@ class RewriteMutator { bool Pre(parser::EndSubroutineStmt &) { return false; } bool Pre(parser::EndTypeStmt &) { return false; } + bool Pre(parser::OmpObject &); bool Pre(parser::OmpBlockConstruct &); bool Pre(parser::OpenMPLoopConstruct &); void Post(parser::OmpBlockConstruct &); @@ -371,6 +372,22 @@ bool RewriteMutator::Pre(parser::Block &block) { void RewriteMutator::Post(parser::Block &block) { this->Pre(block); } +bool RewriteMutator::Pre(parser::OmpObject &object) { + // When parsing A(i) there is no way to tell whether it's a function call + // or an array element access. In OmpObject it will be preferentially + // parsed as FunctionReference, but once the name "A" is resolved, and it + // turns out to be an array, the function call in the OmpObject will need + // to be converted to an array element. + // This has to happen early, before the ExprChecker runs, or otherwise it + // will emit undesirable diagnostics. + if (auto *ref{parser::Unwrap(object)}) { + if (CheckMisparsedArrayElement(context_, *ref)) { + object.u = ref->ConvertToArrayElementRef(); + } + } + return true; +} + bool RewriteMutator::Pre(parser::OmpBlockConstruct &block) { if (context_.langOptions().OpenMPSimd) { auto &innerBlock = std::get(block.t); diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90 new file mode 100644 index 0000000000000..2f17e6c22c932 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/locator-call-affinity.f90 @@ -0,0 +1,13 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: Function call locators are not supported yet + +subroutine f + interface + function p + integer, pointer :: p + end + end interface + !$omp task affinity(p()) + !$omp end task +end diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-from.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-from.f90 new file mode 100644 index 0000000000000..66e6a5eb1a147 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/locator-call-from.f90 @@ -0,0 +1,12 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: Function call locators are not supported yet + +subroutine f + interface + function p + integer, pointer :: p + end + end interface + !$omp target update from(p()) +end diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-map.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-map.f90 new file mode 100644 index 0000000000000..a0c17c7584e80 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/locator-call-map.f90 @@ -0,0 +1,13 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: Function call locators are not supported yet + +subroutine f + interface + function p + integer, pointer :: p + end + end interface + !$omp target map(p()) + !$omp end target +end diff --git a/flang/test/Lower/OpenMP/Todo/locator-call-to.f90 b/flang/test/Lower/OpenMP/Todo/locator-call-to.f90 new file mode 100644 index 0000000000000..e5747bdda82b1 --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/locator-call-to.f90 @@ -0,0 +1,12 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: Function call locators are not supported yet + +subroutine f + interface + function p + integer, pointer :: p + end + end interface + !$omp target update to(p()) +end diff --git a/flang/test/Lower/OpenMP/Todo/locator-reserved.f90 b/flang/test/Lower/OpenMP/Todo/locator-reserved.f90 new file mode 100644 index 0000000000000..e742ce843501b --- /dev/null +++ b/flang/test/Lower/OpenMP/Todo/locator-reserved.f90 @@ -0,0 +1,11 @@ +!RUN: %not_todo_cmd %flang_fc1 -emit-hlfir -fopenmp -fopenmp-version=60 -o - %s 2>&1 | FileCheck %s + +!CHECK: not yet implemented: Reserved locators are not supported yet + +subroutine f + ! This is a wrong use of OMP_ALL_MEMORY, but at the moment the clauses that + ! legally allow this locator aren't accepting it yet in flang. + !$omp target map(omp_all_memory) + !$omp end target +end + diff --git a/flang/test/Parser/OpenMP/allocate-align-tree.f90 b/flang/test/Parser/OpenMP/allocate-align-tree.f90 index e440d23904693..35f7d00b88a29 100644 --- a/flang/test/Parser/OpenMP/allocate-align-tree.f90 +++ b/flang/test/Parser/OpenMP/allocate-align-tree.f90 @@ -25,7 +25,7 @@ end program allocate_align_tree !CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | OmpBeginDirective !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'j' +!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'j' !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Align -> OmpAlignClause -> Scalar -> Integer -> Constant -> Expr = '16_4' !CHECK-NEXT: | | | LiteralConstant -> IntLiteralConstant = '16' !CHECK-NEXT: | | Flags = {} @@ -33,7 +33,7 @@ end program allocate_align_tree !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | OmpBeginDirective !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray' +!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray' !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Align -> OmpAlignClause -> Scalar -> Integer -> Constant -> Expr = '32_4' !CHECK-NEXT: | | | | | LiteralConstant -> IntLiteralConstant = '32' !CHECK-NEXT: | | | | OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8' diff --git a/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90 b/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90 index 92ddbbdce05c5..6624273659200 100644 --- a/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90 +++ b/flang/test/Parser/OpenMP/allocate-tree-spec-part.f90 @@ -20,7 +20,7 @@ end program allocate_tree !CHECK: | | DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | OmpBeginDirective !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f' +!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f' !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8' !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc' !CHECK-NEXT: | | | | Flags = {} @@ -34,7 +34,7 @@ end program allocate_tree !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | OmpBeginDirective !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'w' +!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'w' !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '3_8' !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_const_mem_alloc' !CHECK-NEXT: | | | | Flags = {} @@ -42,7 +42,7 @@ end program allocate_tree !CHECK-NEXT: | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | | | OmpBeginDirective !CHECK-NEXT: | | | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray' +!CHECK-NEXT: | | | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray' !CHECK-NEXT: | | | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8' !CHECK-NEXT: | | | | | | | Designator -> DataRef -> Name = 'omp_large_cap_mem_alloc' !CHECK-NEXT: | | | | | | Flags = {} @@ -50,7 +50,7 @@ end program allocate_tree !CHECK-NEXT: | | | | | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | | | | | OmpBeginDirective !CHECK-NEXT: | | | | | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'zarray' +!CHECK-NEXT: | | | | | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'zarray' !CHECK-NEXT: | | | | | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8' !CHECK-NEXT: | | | | | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc' !CHECK-NEXT: | | | | | | | | Flags = {} diff --git a/flang/test/Parser/OpenMP/allocate-tree.f90 b/flang/test/Parser/OpenMP/allocate-tree.f90 index 17ffb76aeed96..d2d309a646f01 100644 --- a/flang/test/Parser/OpenMP/allocate-tree.f90 +++ b/flang/test/Parser/OpenMP/allocate-tree.f90 @@ -21,7 +21,7 @@ end program allocate_tree !CHECK: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpAllocateDirective !CHECK-NEXT: | OmpBeginDirective !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'w' +!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'w' !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '3_8' !CHECK-NEXT: | | | Designator -> DataRef -> Name = 'omp_const_mem_alloc' !CHECK-NEXT: | | Flags = {} @@ -30,7 +30,7 @@ end program allocate_tree !CHECK: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | OmpBeginDirective !CHECK-NEXT: | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'xarray' +!CHECK-NEXT: | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'xarray' !CHECK-NEXT: | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '2_8' !CHECK-NEXT: | | | Designator -> DataRef -> Name = 'omp_large_cap_mem_alloc' !CHECK-NEXT: | | Flags = {} @@ -38,7 +38,7 @@ end program allocate_tree !CHECK-NEXT: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OmpAllocateDirective !CHECK-NEXT: | | | OmpBeginDirective !CHECK-NEXT: | | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'zarray' +!CHECK-NEXT: | | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'zarray' !CHECK-NEXT: | | | | OmpClauseList -> OmpClause -> Allocator -> Scalar -> Integer -> Expr = '1_8' !CHECK-NEXT: | | | | | Designator -> DataRef -> Name = 'omp_default_mem_alloc' !CHECK-NEXT: | | | | Flags = {} diff --git a/flang/test/Parser/OpenMP/declare-variant.f90 b/flang/test/Parser/OpenMP/declare-variant.f90 index 07f65beaaf12a..cc967cf6642ee 100644 --- a/flang/test/Parser/OpenMP/declare-variant.f90 +++ b/flang/test/Parser/OpenMP/declare-variant.f90 @@ -38,7 +38,7 @@ subroutine sub (v1) !PARSE-TREE: OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub' !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct !PARSE-TREE: | | OmpTraitSelector @@ -68,7 +68,7 @@ subroutine sub (v1) !PARSE-TREE: OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub' !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct !PARSE-TREE: | | OmpTraitSelector @@ -96,7 +96,7 @@ subroutine sub (v1, v2) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'vsub' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'vsub' !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct !PARSE-TREE: | | OmpTraitSelector @@ -136,7 +136,7 @@ subroutine f2 (x, y) !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f1' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f1' !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = Construct !PARSE-TREE: | | OmpTraitSelector diff --git a/flang/test/Parser/OpenMP/depobj-construct.f90 b/flang/test/Parser/OpenMP/depobj-construct.f90 index 2d4831fe62bbb..9a49976cfd4b0 100644 --- a/flang/test/Parser/OpenMP/depobj-construct.f90 +++ b/flang/test/Parser/OpenMP/depobj-construct.f90 @@ -13,7 +13,7 @@ subroutine f00 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | OmpClauseList -> OmpClause -> Depend -> OmpDependClause -> TaskDep !PARSE-TREE: | | Modifier -> OmpTaskDependenceType -> OmpDependenceKind = In !PARSE-TREE: | | OmpObjectList -> OmpObject -> Designator -> DataRef -> Name = 'y' @@ -30,7 +30,7 @@ subroutine f01 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | OmpClauseList -> OmpClause -> Update -> OmpUpdateClause -> OmpTaskDependenceType -> OmpDependenceKind = Out subroutine f02 @@ -45,7 +45,7 @@ subroutine f02 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | OmpClauseList -> OmpClause -> Destroy -> OmpDestroyClause -> OmpObject -> Designator -> DataRef -> Name = 'x' subroutine f03 @@ -60,7 +60,7 @@ subroutine f03 !PARSE-TREE: ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPStandaloneConstruct -> OpenMPDepobjConstruct -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = depobj -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | OmpClauseList -> OmpClause -> Destroy -> subroutine f04 diff --git a/flang/test/Parser/OpenMP/groupprivate.f90 b/flang/test/Parser/OpenMP/groupprivate.f90 index 120af619d3b9b..82e16ba08d90f 100644 --- a/flang/test/Parser/OpenMP/groupprivate.f90 +++ b/flang/test/Parser/OpenMP/groupprivate.f90 @@ -19,12 +19,12 @@ module m !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpGroupprivateDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' -!PARSE-TREE: | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'y' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'y' !PARSE-TREE: | OmpClauseList -> OmpClause -> DeviceType -> OmpDeviceTypeClause -> OmpDeviceType = Nohost !PARSE-TREE: | Flags = {} !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpGroupprivateDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = groupprivate -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'z' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'z' !PARSE-TREE: | OmpClauseList -> !PARSE-TREE: | Flags = {} diff --git a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 index a24027161ef09..d63db6c93cf40 100644 --- a/flang/test/Parser/OpenMP/metadirective-dirspec.f90 +++ b/flang/test/Parser/OpenMP/metadirective-dirspec.f90 @@ -26,7 +26,7 @@ subroutine f00(x) !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = allocate -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpClauseList -> subroutine f01(x) @@ -52,7 +52,7 @@ subroutine f01(x) !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = critical -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpClauseList -> subroutine f02 @@ -187,7 +187,7 @@ subroutine f04 !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = declare simd -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f04' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f04' !PARSE-TREE: | | | OmpClauseList -> !PARSE-TREE: ImplicitPart -> @@ -212,7 +212,7 @@ subroutine f05 !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = declare target -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'f05' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'f05' !PARSE-TREE: | | | OmpClauseList -> !PARSE-TREE: ImplicitPart -> @@ -239,8 +239,8 @@ subroutine f06(x, y) !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' -!PARSE-TREE: | | | | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'y' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'y' !PARSE-TREE: | | | OmpClauseList -> subroutine f07 @@ -266,5 +266,5 @@ subroutine f07 !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = threadprivate -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 't' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 't' !PARSE-TREE: | | | OmpClauseList -> diff --git a/flang/test/Parser/OpenMP/metadirective-flush.f90 b/flang/test/Parser/OpenMP/metadirective-flush.f90 index e4e521ed07073..6b74bfdf50bdb 100644 --- a/flang/test/Parser/OpenMP/metadirective-flush.f90 +++ b/flang/test/Parser/OpenMP/metadirective-flush.f90 @@ -23,7 +23,7 @@ subroutine f00() !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpClauseList -> OmpClause -> SeqCst !PARSE-TREE: | | | Flags = {DeprecatedSyntax} @@ -49,6 +49,6 @@ subroutine f01() !PARSE-TREE: | | | | | | | bool = 'true' !PARSE-TREE: | | OmpDirectiveSpecification !PARSE-TREE: | | | OmpDirectiveName -> llvm::omp::Directive = flush -!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'x' +!PARSE-TREE: | | | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'x' !PARSE-TREE: | | | OmpClauseList -> OmpClause -> SeqCst !PARSE-TREE: | | | Flags = {} diff --git a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 index dbe51d854da04..5a14d99795752 100644 --- a/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 +++ b/flang/test/Parser/OpenMP/openmp6-directive-spellings.f90 @@ -155,7 +155,7 @@ subroutine g05 !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpDeclareVariantDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = declare variant -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'g05' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'g05' !PARSE-TREE: | OmpClauseList -> OmpClause -> Match -> OmpMatchClause -> OmpContextSelectorSpecification -> OmpTraitSetSelector !PARSE-TREE: | | OmpTraitSetSelectorName -> Value = User !PARSE-TREE: | | OmpTraitSelector diff --git a/flang/test/Parser/OpenMP/threadprivate.f90 b/flang/test/Parser/OpenMP/threadprivate.f90 index e03bf6f7f94b9..5295acf6fa0a7 100644 --- a/flang/test/Parser/OpenMP/threadprivate.f90 +++ b/flang/test/Parser/OpenMP/threadprivate.f90 @@ -19,7 +19,7 @@ module m !PARSE-TREE: DeclarationConstruct -> SpecificationConstruct -> OpenMPDeclarativeConstruct -> OmpThreadprivateDirective -> OmpDirectiveSpecification !PARSE-TREE: | OmpDirectiveName -> llvm::omp::Directive = threadprivate -!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpLocator -> OmpObject -> Name = 'blk' -!PARSE-TREE: | OmpArgument -> OmpLocator -> OmpObject -> Designator -> DataRef -> Name = 'b' +!PARSE-TREE: | OmpArgumentList -> OmpArgument -> OmpObject -> Name = 'blk' +!PARSE-TREE: | OmpArgument -> OmpObject -> Designator -> DataRef -> Name = 'b' !PARSE-TREE: | OmpClauseList -> !PARSE-TREE: | Flags = {} diff --git a/flang/test/Semantics/OpenMP/affinity-invalid.f90 b/flang/test/Semantics/OpenMP/affinity-invalid.f90 index 32e726bada937..34bf9bb3c0798 100644 --- a/flang/test/Semantics/OpenMP/affinity-invalid.f90 +++ b/flang/test/Semantics/OpenMP/affinity-invalid.f90 @@ -81,14 +81,6 @@ subroutine affinity_iterator_section_bad_stride(n) !$omp end task end subroutine -subroutine affinity_substring_like_single_index() - character(len=7) :: s - !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2. - !ERROR: Substrings must be in the form parent-string(lb:ub) - !$omp task affinity(s(2)) - !$omp end task -end subroutine - subroutine affinity_substring_like_step() character(len=7) :: s !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2. diff --git a/flang/test/Semantics/OpenMP/depend-substring.f90 b/flang/test/Semantics/OpenMP/depend-substring.f90 index 23d6bb4c0b7b3..558172fa218d2 100644 --- a/flang/test/Semantics/OpenMP/depend-substring.f90 +++ b/flang/test/Semantics/OpenMP/depend-substring.f90 @@ -25,15 +25,6 @@ subroutine substring_2(c) !$omp end task end -! Error -subroutine substring_3(c) - character(:), pointer :: c - !PORTABILITY: The use of substrings in OpenMP argument lists has been disallowed since OpenMP 5.2. - !ERROR: Substrings must be in the form parent-string(lb:ub) - !$omp task depend(out:c(2)) - !$omp end task -end - ! This is okay: interpreted as indexing into the array not as a substring subroutine substring_3b(c) character(:), pointer :: c(:) diff --git a/flang/test/Semantics/OpenMP/reserved-locator.f90 b/flang/test/Semantics/OpenMP/reserved-locator.f90 new file mode 100644 index 0000000000000..3fc45ffa0f54c --- /dev/null +++ b/flang/test/Semantics/OpenMP/reserved-locator.f90 @@ -0,0 +1,6 @@ +!RUN: %python %S/../test_errors.py %s %flang_fc1 -fopenmp -fopenmp-version=60 + +subroutine f +!ERROR: 'OMP_SOME_MEMORY' is not a valid locator + !$omp target update from(omp_some_memory) +end diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.h b/llvm/include/llvm/Frontend/OpenMP/OMP.h index 8ba5171caab25..1faec3812412c 100644 --- a/llvm/include/llvm/Frontend/OpenMP/OMP.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMP.h @@ -103,6 +103,8 @@ LLVM_ABI ArrayRef getOpenMPVersions(); /// of a variable in given OpenMP version? LLVM_ABI bool isPrivatizingConstruct(Directive D, unsigned Version); +LLVM_ABI ArrayRef getReservedLocatorNames(); + /// Create a nicer version of a function name for humans to look at. LLVM_ABI std::string prettifyFunctionName(StringRef FunctionName); diff --git a/llvm/lib/Frontend/OpenMP/OMP.cpp b/llvm/lib/Frontend/OpenMP/OMP.cpp index 871b6211fc2a5..86e144624211c 100644 --- a/llvm/lib/Frontend/OpenMP/OMP.cpp +++ b/llvm/lib/Frontend/OpenMP/OMP.cpp @@ -228,6 +228,12 @@ bool isPrivatizingConstruct(Directive D, unsigned Version) { return llvm::is_contained(Privatizing, D); } +ArrayRef getReservedLocatorNames() { + // All names must be lowercase. + static StringRef names[]{"omp_all_memory"}; + return names; +} + std::string prettifyFunctionName(StringRef FunctionName) { // Internalized functions have the right name, but simply a suffix. if (FunctionName.ends_with(".internalized")) From bec6da4b11599d227568d527a9ccacd8b924f41d Mon Sep 17 00:00:00 2001 From: Nathan Corbyn Date: Wed, 24 Jun 2026 15:02:00 +0100 Subject: [PATCH 341/511] [AArch64] Optimise materialisation of large stack offset calculations (#201856) --- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 21 +++++++++++++++++-- ...ramelayout-scavengingslot-stack-hazard.mir | 4 ++-- .../AArch64/framelayout-scavengingslot.mir | 4 ++-- llvm/test/CodeGen/AArch64/irg_sp_tagp.ll | 4 ++-- .../AArch64/large-stack-offset-calcs.mir | 15 +++++++++++++ llvm/test/CodeGen/AArch64/stack-guard-sve.ll | 10 ++++----- .../CodeGen/AArch64/swiftself-scavenger.ll | 6 +++--- 7 files changed, 47 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d5d1e17216e63..4dbe292a29843 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -7296,8 +7296,25 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, if (MinOff <= NewOffset && NewOffset <= MaxOff) Offset = Remainder; else { - NewOffset = NewOffset < 0 ? MinOff : MaxOff; - Offset = Offset - (NewOffset * Scale); + // Try to minimise the number of instructions required to materialise the + // offset calculation. Specifically, for fixed offsets, if masking out the + // low 12 bits leaves a legal add immediate, we can realise the offset + // calculation with a single add instruction. Whenever this is possible, + // prefer this split. + int64_t HighPart = Offset & ~0xFFF; + int64_t LowPart = Offset & 0xFFF; + int64_t LowScaled = LowPart / Scale; + if (!IsMulVL && NewOffset >= 0 && LowPart % Scale == 0 && + MinOff <= LowScaled && LowScaled <= MaxOff && + AArch64_AM::isLegalArithImmed(HighPart)) { + NewOffset = LowScaled; + Offset = HighPart; + } else { + // Default to a greedy split: take the memop immediate to be maximum / + // minimum expressible offset and materialise the remainder. + NewOffset = NewOffset < 0 ? MinOff : MaxOff; + Offset = Offset - (NewOffset * Scale); + } } if (EmittableOffset) diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir index 52ac36f801854..5a75f540c60f6 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot-stack-hazard.mir @@ -77,8 +77,8 @@ name: stack_hazard_streaming_compat_emergency_spill_slot # CHECK-LABEL: name: stack_hazard_streaming_compat_emergency_spill_slot # CHECK: bb.0: # CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $x19, 0 -# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 1056, 0 -# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 4095 +# CHECK-NEXT: $[[SCRATCH]] = ADDXri $x19, 8, 12 +# CHECK-NEXT: STRDui $d0, killed $[[SCRATCH]], 131 # CHECK-NEXT: $[[SCRATCH]] = LDRXui $x19, 0 # CHECK: bb.1: tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir index 390582969d026..17ae97ba58077 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-scavengingslot.mir @@ -6,8 +6,8 @@ name: LateScavengingSlotRealignment # CHECK-LABEL: name: LateScavengingSlotRealignment # CHECK: bb.0: # CHECK: STRXui killed $[[SCRATCH:x[0-9]+]], $sp, 0 -# CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 40, 0 -# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4095 +# CHECK-NEXT: $[[SCRATCH]] = ADDXri $sp, 8, 12 +# CHECK-NEXT: STRXui $x0, killed $[[SCRATCH]], 4 # CHECK-NEXT: $[[SCRATCH]] = LDRXui $sp, 0 # CHECK: bb.1: tracksRegLiveness: true diff --git a/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll index 4fa96c771a330..b6d32c59a1f19 100644 --- a/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll +++ b/llvm/test/CodeGen/AArch64/irg_sp_tagp.ll @@ -37,8 +37,8 @@ define dso_local void @huge_allocas() { entry: ; CHECK-LABEL: huge_allocas: ; CHECK: irg x1, sp{{$}} -; CHECK: add [[TMP:x[0-9]+]], x1, #3088 -; CHECK: addg x0, [[TMP]], #1008, #1 +; CHECK: add [[TMP:x[0-9]+]], x1, #1, lsl #12 +; CHECK: addg x0, [[TMP]], #0, #1 ; CHECK: bl use2 %a = alloca i8, i64 4096, align 16 %b = alloca i8, i64 4096, align 16 diff --git a/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir b/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir new file mode 100644 index 0000000000000..bae954a7ad3d8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/large-stack-offset-calcs.mir @@ -0,0 +1,15 @@ +# RUN: llc -mtriple=arm64-apple-ios -run-pass=prologepilog %s -o - | FileCheck %s +--- +name: large_stack_offset_calc +tracksRegLiveness: true +stack: + - { id: 0, size: 32768, alignment: 8 } +body: | + ; CHECK-LABEL: name: large_stack_offset_calc + ; CHECK: $[[BASE:x[0-9]+]] = ADDXri $sp, 10, 12 + ; CHECK-NEXT: STRXui $x0, killed $[[BASE]], 317 + bb.0: + liveins: $x0 + STRXui $x0, %stack.0, 5437 :: (store (s64)) + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll index e719e6d9d25b2..644ddc9ee4bc2 100644 --- a/llvm/test/CodeGen/AArch64/stack-guard-sve.ll +++ b/llvm/test/CodeGen/AArch64/stack-guard-sve.ll @@ -151,10 +151,9 @@ entry: ; CHECK: addvl sp, sp, #-2 ; Stack guard is placed below the SVE stack area (and above all fixed-width objects) -; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #8, lsl #12 -; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC]], [[STACK_GUARD_SPILL_PART_LOC]], #16 +; CHECK-DAG: add [[STACK_GUARD_SPILL_PART_LOC:x[0-9]+]], sp, #16, lsl #12 ; CHECK-DAG: ldr [[STACK_GUARD:x[0-9]+]], [{{x[0-9]+}}, :lo12:__stack_chk_guard] -; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_SPILL_PART_LOC]], #32760] +; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_SPILL_PART_LOC]], #8] ; char_arr is below the stack guard ; CHECK-DAG: add [[CHAR_ARR_LOC:x[0-9]+]], sp, #16, lsl #12 @@ -206,9 +205,8 @@ entry: ; CHECK-DAG: str [[STACK_GUARD]], [[[STACK_GUARD_POS]]] ; char_arr is below the SVE stack area -; CHECK-DAG: add [[CHAR_ARR:x[0-9]+]], sp, #15, lsl #12 // =61440 -; CHECK-DAG: add [[CHAR_ARR]], [[CHAR_ARR]], #9 -; CHECK-DAG: strb wzr, [[[CHAR_ARR]], #4095] +; CHECK-DAG: add [[CHAR_ARR:x[0-9]+]], sp, #16, lsl #12 // =65536 +; CHECK-DAG: strb wzr, [[[CHAR_ARR]], #8] ; large1 is accessed via a virtual base register ; CHECK-DAG: add [[LARGE1:x[0-9]+]], sp, #8, lsl #12 diff --git a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll index 030593986be4a..a19cc12150924 100644 --- a/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll +++ b/llvm/test/CodeGen/AArch64/swiftself-scavenger.ll @@ -3,8 +3,8 @@ ; CSR spill for the values used by the swiftself parameter. ; CHECK-LABEL: func: ; CHECK: str [[REG:x[0-9]+]], [sp] -; CHECK: add [[REG]], sp, #248 -; CHECK: str xzr, [{{\s*}}[[REG]], #32760] +; CHECK: add [[REG]], sp, #8, lsl #12 +; CHECK: str xzr, [{{\s*}}[[REG]], #240] ; CHECK: ldr [[REG]], [sp] target triple = "arm64-apple-ios" @@ -75,7 +75,7 @@ bb: store volatile i64 %v23, ptr @ptr64, align 8 store volatile i64 %v24, ptr @ptr64, align 8 store volatile i64 %v25, ptr @ptr64, align 8 - + ; use swiftself parameter late so it stays alive throughout the function. store volatile ptr %arg, ptr @ptr8 ret void From 88bafc7f80db017341ee2974ac854a8b708d5a22 Mon Sep 17 00:00:00 2001 From: Buildbot for SYCL Date: Wed, 24 Jun 2026 22:02:14 +0800 Subject: [PATCH 342/511] [GHA] Uplift Linux GPU RT version to 26.22.38646.4 (#22394) Scheduled drivers uplift --------- Signed-off-by: Nick Sarnie Co-authored-by: GitHub Actions Co-authored-by: Nick Sarnie --- devops/dependencies.json | 18 +++++++++--------- .../level_zero/barrier_optimization.cpp | 3 +++ .../Adapters/level_zero/interop-buffer.cpp | 2 ++ .../Basic/alloc_pinned_host_memory.cpp | 3 +++ sycl/test-e2e/Basic/buffer/buffer_create.cpp | 3 +++ .../level-zero-static-link-flow.cpp | 3 +++ sycl/test-e2e/Properties/cache_config.cpp | 3 +++ .../SpecConstants/2020/non_native/gpu.cpp | 3 +++ sycl/test-e2e/XPTI/mem_alloc_events_linux.cpp | 2 ++ .../vulkan_sycl_buffer_binary_semaphore.cpp | 3 +++ 10 files changed, 34 insertions(+), 9 deletions(-) diff --git a/devops/dependencies.json b/devops/dependencies.json index f982ba2f304c7..5f426382b16ff 100644 --- a/devops/dependencies.json +++ b/devops/dependencies.json @@ -1,15 +1,15 @@ { "linux": { "compute_runtime": { - "github_tag": "26.18.38308.1", - "version": "26.18.38308.1", - "url": "https://github.com/intel/compute-runtime/releases/tag/26.18.38308.1", + "github_tag": "26.22.38646.4", + "version": "26.22.38646.4", + "url": "https://github.com/intel/compute-runtime/releases/tag/26.22.38646.4", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "igc": { - "github_tag": "v2.34.4", - "version": "v2.34.4", - "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.34.4", + "github_tag": "v2.36.3", + "version": "v2.36.3", + "url": "https://github.com/intel/intel-graphics-compiler/releases/tag/v2.36.3", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "cm": { @@ -19,9 +19,9 @@ "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "level_zero": { - "github_tag": "v1.29.0", - "version": "v1.29.0", - "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.29.0", + "github_tag": "v1.30.0", + "version": "v1.30.0", + "url": "https://github.com/oneapi-src/level-zero/releases/tag/v1.30.0", "root": "{DEPS_ROOT}/opencl/runtime/linux/oclgpu" }, "tbb": { diff --git a/sycl/test-e2e/Adapters/level_zero/barrier_optimization.cpp b/sycl/test-e2e/Adapters/level_zero/barrier_optimization.cpp index 8bfe806f110ad..d6b83b6de01e3 100644 --- a/sycl/test-e2e/Adapters/level_zero/barrier_optimization.cpp +++ b/sycl/test-e2e/Adapters/level_zero/barrier_optimization.cpp @@ -5,6 +5,9 @@ // UNSUPPORTED: windows && (gpu-intel-gen12 || arch-intel_gpu_bmg_g21) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22033 +// UNSUPPORTED: linux && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21 || arch-intel_gpu_mtl_u) +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // Test to check that we don't insert unnecessary L0 commands for // queue::ext_oneapi_submit_barrier() when we have in-order queue. #include diff --git a/sycl/test-e2e/Adapters/level_zero/interop-buffer.cpp b/sycl/test-e2e/Adapters/level_zero/interop-buffer.cpp index dd25811cb7815..c69e8902224ab 100644 --- a/sycl/test-e2e/Adapters/level_zero/interop-buffer.cpp +++ b/sycl/test-e2e/Adapters/level_zero/interop-buffer.cpp @@ -6,6 +6,8 @@ // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21556 // UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22284 +// UNSUPPORTED: linux && run-mode +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 // RUN: %{build} -Wno-error=deprecated-declarations %level_zero_options -o %t.out // RUN: env UR_L0_DEBUG=1 %{run} %t.out diff --git a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp index 203ee4a9af366..c5496a1c62b85 100644 --- a/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp +++ b/sycl/test-e2e/Basic/alloc_pinned_host_memory.cpp @@ -6,6 +6,9 @@ // UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22287 +// UNSUPPORTED: linux && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21) +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // RUN: %{build} -o %t2.out // RUN: env SYCL_UR_TRACE=2 UR_L0_DEBUG=1 %{run} %t2.out %if level_zero %{ 2>&1 | FileCheck %s %} // RUN: %{run} %t2.out diff --git a/sycl/test-e2e/Basic/buffer/buffer_create.cpp b/sycl/test-e2e/Basic/buffer/buffer_create.cpp index 7af5b5abf3e2a..92233e723dc30 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_create.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_create.cpp @@ -11,6 +11,9 @@ // UNSUPPORTED: windows && (gpu-intel-gen12 || gpu-intel-dg2 || arch-intel_gpu_bmg_g21) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21954 +// UNSUPPORTED: linux && arch-intel_gpu_mtl_u +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + #include #include #include diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp index 2ca302bf98229..cd0850e13d647 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp @@ -4,6 +4,9 @@ // UNSUPPORTED: windows && (gpu-intel-gen12 || arch-intel_gpu_bmg_g21) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21556 +// UNSUPPORTED: linux && run-mode && !gpu-intel-dg2 +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // RUN: %{build} -o %t.out // RUN: env SYCL_UR_TRACE=2 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s // diff --git a/sycl/test-e2e/Properties/cache_config.cpp b/sycl/test-e2e/Properties/cache_config.cpp index ede21a864b878..ad331d3f83c79 100644 --- a/sycl/test-e2e/Properties/cache_config.cpp +++ b/sycl/test-e2e/Properties/cache_config.cpp @@ -6,6 +6,9 @@ // UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22099 +// UNSUPPORTED: linux && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21) +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // RUN: %{build} -Wno-deprecated-declarations -o %t.out // RUN: env SYCL_UR_TRACE=-1 UR_L0_DEBUG=1 %{run} %t.out 2>&1 | FileCheck %s diff --git a/sycl/test-e2e/SpecConstants/2020/non_native/gpu.cpp b/sycl/test-e2e/SpecConstants/2020/non_native/gpu.cpp index c7c8cfd8a9aa0..ef872a4aa7cb6 100644 --- a/sycl/test-e2e/SpecConstants/2020/non_native/gpu.cpp +++ b/sycl/test-e2e/SpecConstants/2020/non_native/gpu.cpp @@ -1,5 +1,8 @@ // REQUIRES: ocloc, gpu, target-spir +// UNSUPPORTED: linux && arch-intel_gpu_pvc +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %S/Inputs/common.cpp -o %t.out -fsycl-dead-args-optimization // RUN: %{run} %t.out diff --git a/sycl/test-e2e/XPTI/mem_alloc_events_linux.cpp b/sycl/test-e2e/XPTI/mem_alloc_events_linux.cpp index 02a32cfb74ff1..a20929dbe1f28 100644 --- a/sycl/test-e2e/XPTI/mem_alloc_events_linux.cpp +++ b/sycl/test-e2e/XPTI/mem_alloc_events_linux.cpp @@ -1,4 +1,6 @@ // REQUIRES: xptifw, level_zero, gpu, linux +// UNSUPPORTED: linux && arch-intel_gpu_pvc +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 // RUN: %build_collector // RUN: %{build} -o %t.out // RUN: env XPTI_TRACE_ENABLE=1 env XPTI_FRAMEWORK_DISPATCHER=%xptifw_dispatcher env XPTI_SUBSCRIBERS=%t_collector.dll %{run} %t.out | FileCheck %s diff --git a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp index e167ed8b01b6c..f664053769cc7 100644 --- a/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp +++ b/sycl/test-e2e/bindless_images/vulkan_interop/vulkan_sycl_buffer_binary_semaphore.cpp @@ -5,6 +5,9 @@ // Linux fix tracked by GSD-12371, landed in driver 38362. // REQUIRES-INTEL-DRIVER: lin: 38362 win: 101.9999 +// UNSUPPORTED: linux && run-mode +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 + // RUN: %{build} %link-vulkan -o %t.out %if target-spir %{ -Wno-ignored-attributes %} // RUN: %{run} %t.out --no-sem // RUN: %{run} %t.out --dual-sem From 90a20b815f3dd6c7e9dbf64a71f62c520fce32e9 Mon Sep 17 00:00:00 2001 From: Lukas Sommer Date: Wed, 24 Jun 2026 16:07:47 +0200 Subject: [PATCH 343/511] [AMDGPU] Promote uniform i16 ABS to i32 (#204526) GlobalISel already expands uniform `i16` `G_ABS` to sign-extend to i32 and the native `s_abs` instruction. This adds a similar expansion as DAGCombiner pattern, promoting uniform `i16` `ABS` to `i32` that can use `s_abs`. --------- Signed-off-by: Lukas Sommer --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 26 +++++++++++++++++++ llvm/lib/Target/AMDGPU/SIISelLowering.h | 1 + llvm/test/CodeGen/AMDGPU/absdiff.ll | 4 +-- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index be6fd4d243252..795e487219d8f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1041,6 +1041,7 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const { switch (N->getOpcode()) { + case ISD::ABS: case ISD::ADD: case ISD::SUB: case ISD::SHL: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 03d7b936d4109..502cc438c836c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1052,6 +1052,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM, ISD::FMA, + ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, @@ -8795,6 +8796,7 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const { static unsigned getExtOpcodeForPromotedOp(SDValue Op) { switch (Op->getOpcode()) { + case ISD::ABS: case ISD::SRA: case ISD::SMIN: case ISD::SMAX: @@ -8823,6 +8825,26 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) { } } +SDValue +SITargetLowering::promoteUniformUnaryOpToI32(SDValue Op, + DAGCombinerInfo &DCI) const { + EVT OpTy = Op.getValueType(); + SelectionDAG &DAG = DCI.DAG; + EVT ExtTy = OpTy.changeElementType(*DAG.getContext(), MVT::i32); + + if (isNarrowingProfitable(Op.getNode(), ExtTy, OpTy)) + return SDValue(); + + SDLoc DL(Op); + SDValue Input = Op.getOperand(0); + const unsigned ExtOp = getExtOpcodeForPromotedOp(Op); + Input = DAG.getNode(ExtOp, DL, ExtTy, Input); + + SDValue NewVal = DAG.getNode(Op.getOpcode(), DL, ExtTy, Input); + + return DAG.getNode(ISD::TRUNCATE, DL, OpTy, NewVal); +} + SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const { const unsigned Opc = Op.getOpcode(); @@ -18543,6 +18565,10 @@ SDValue SITargetLowering::performSelectCombine(SDNode *N, SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { + case ISD::ABS: + if (SDValue Res = promoteUniformUnaryOpToI32(SDValue(N, 0), DCI)) + return Res; + break; case ISD::ADD: case ISD::SUB: case ISD::SHL: diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h index 3d72723c9ca8f..c98426cdac0b1 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.h +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -168,6 +168,7 @@ class SITargetLowering final : public AMDGPUTargetLowering { SDValue lowerFMINIMUM_FMAXIMUM(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const; SDValue promoteUniformOpToI32(SDValue Op, DAGCombinerInfo &DCI) const; + SDValue promoteUniformUnaryOpToI32(SDValue Op, DAGCombinerInfo &DCI) const; SDValue lowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMUL(SDValue Op, SelectionDAG &DAG) const; SDValue lowerXMULO(SDValue Op, SelectionDAG &DAG) const; diff --git a/llvm/test/CodeGen/AMDGPU/absdiff.ll b/llvm/test/CodeGen/AMDGPU/absdiff.ll index 5e4947ea5e0b5..94385621f6b15 100644 --- a/llvm/test/CodeGen/AMDGPU/absdiff.ll +++ b/llvm/test/CodeGen/AMDGPU/absdiff.ll @@ -43,10 +43,8 @@ define amdgpu_ps i16 @absdiff_i16_false(i16 inreg %arg0, i16 inreg %arg1) { ; CHECK-LABEL: absdiff_i16_false: ; CHECK: ; %bb.0: ; CHECK-NEXT: s_sub_i32 s0, s0, s1 -; CHECK-NEXT: s_sext_i32_i16 s1, s0 -; CHECK-NEXT: s_sub_i32 s0, 0, s0 ; CHECK-NEXT: s_sext_i32_i16 s0, s0 -; CHECK-NEXT: s_max_i32 s0, s1, s0 +; CHECK-NEXT: s_abs_i32 s0, s0 ; CHECK-NEXT: ; return to shader part epilog %diff = sub i16 %arg0, %arg1 %res = call i16 @llvm.abs.i16(i16 %diff, i1 false) ; INT_MIN input returns INT_MIN From 83abbef1602404b16519c6c78b0f4b7b32407b9c Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 15:11:22 +0100 Subject: [PATCH 344/511] [X86] isHorizontalBinOp - constify hadd/sub pattern matcher to make it easier to reuse. (#205538) Avoid performing vector splits / bitcasts inside the isHorizontalBinOp call. This is to make it easier to reuse the pattern matcher for other uses (e.g. X86ISD::VPMADDWD/VPMADDUBSW detection). --- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d3729b4102c55..8d87cf8a6665e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -55266,8 +55266,8 @@ static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, /// A horizontal-op B, for some already available A and B, and if so then LHS is /// set to A, RHS to B, and the routine returns 'true'. static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, - SelectionDAG &DAG, const X86Subtarget &Subtarget, - bool IsCommutative, + const SelectionDAG &DAG, + const X86Subtarget &Subtarget, bool IsCommutative, SmallVectorImpl &PostShuffleMask, bool ForceHorizOp) { // If either operand is undef, bail out. The binop should be simplified. @@ -55312,8 +55312,11 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end()); } if (UseSubVector && SrcOps.size() == 1 && - scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) { - std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op)); + scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask) && + SrcOps[0].getOpcode() == ISD::CONCAT_VECTORS && + SrcOps[0].getNumOperands() == 2) { + N0 = SrcOps[0].getOperand(0); + N1 = SrcOps[0].getOperand(1); ArrayRef Mask = ArrayRef(ScaledMask).slice(0, NumElts); ShuffleMask.assign(Mask.begin(), Mask.end()); } @@ -55449,8 +55452,8 @@ static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, DAG, Subtarget)) return false; - LHS = DAG.getBitcast(VT, NewLHS); - RHS = DAG.getBitcast(VT, NewRHS); + LHS = NewLHS; + RHS = NewRHS; return true; } @@ -55481,7 +55484,9 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB; if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, PostShuffleMask, MergableHorizOp(HorizOpcode))) { - SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); + SDValue HorizBinOp = + DAG.getNode(HorizOpcode, SDLoc(N), VT, DAG.getBitcast(VT, LHS), + DAG.getBitcast(VT, RHS)); if (!PostShuffleMask.empty()) HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, DAG.getUNDEF(VT), PostShuffleMask); @@ -55497,7 +55502,6 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, break; if (VT == MVT::v8i16 || VT == MVT::v16i16 || (!IsSat && (VT == MVT::v4i32 || VT == MVT::v8i32))) { - SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); auto HorizOpcode = IsSat ? (IsAdd ? X86ISD::HADDS : X86ISD::HSUBS) @@ -55508,8 +55512,9 @@ static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, ArrayRef Ops) { return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); }; - SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, - {LHS, RHS}, HOpBuilder); + SDValue HorizBinOp = SplitOpsAndApply( + DAG, Subtarget, SDLoc(N), VT, + {DAG.getBitcast(VT, LHS), DAG.getBitcast(VT, RHS)}, HOpBuilder); if (!PostShuffleMask.empty()) HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, DAG.getUNDEF(VT), PostShuffleMask); From 441c6255baeffdfd501df7a8c4fa51a29a50cc79 Mon Sep 17 00:00:00 2001 From: KavinSai-synthara Date: Wed, 24 Jun 2026 20:08:41 +0530 Subject: [PATCH 345/511] [ARM] Select ssat for saturating clamps with a non-zero offset (#203854) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Summary On ARM/Thumb2, a saturating clamp to a signed range that is not centered on zero is *not* matched to `ssat`, even though it can be potentially matched to `ssat`. The backend instead emits a long explicit compare/select clamp. This pattern is extremely common in quantized ML code: an int8 requantize is `clamp(accumulator + output_zero_point, -128, 127)`, and whenever the output zero_point != 0, the instruction count regress from one ssat to ~10 instructions. The root cause is that InstCombine canonically **sinks the constant out of the min/max** — `clamp(X + C, -128, 127)` becomes `clamp(X, -128-C, 127-C) + C` — and the backend's SSAT matcher (`PerformMinMaxToSatCombine`) only handles a clamp whose bounds already form a saturation range. The offset of the form (`[-128-C, 127-C]`, width 256 = 2⁸) falls through to a generic clamp. #### Reproducer ```c #include int8_t requant_offset(int32_t acc) { // output zero-point = 96 int32_t v = (acc >> 8) + 96; if (v < -128) v = -128; if (v > 127) v = 127; return (int8_t)v; } int8_t requant_zero(int32_t acc) { // output zero-point = 0 (for contrast) int32_t v = (acc >> 8); if (v < -128) v = -128; if (v > 127) v = 127; return (int8_t)v; } ``` ``` clang -O2 --target=thumbv7em-none-eabihf -mcpu=cortex-m4 -mfloat-abi=hard -mfpu=fpv4-sp-d16 -S issue.c ``` **Actual** `requant_offset` (zero-point 96): ```asm asrs r1, r0, #8 cmn.w r1, #224 mvn r1, #223 it gt asrgt r1, r0, #8 cmp r1, #31 it ge movge r1, #31 add.w r0, r1, #96 sxtb r0, r0 ``` `requant_zero` (zero-point 0): ```asm ssat r0, #8, r0, asr #8 ``` #### Expected `requant_offset` should select `ssat`, e.g.: ```asm movs r1, #96 add.w r0, r1, r0, asr #8 ssat r0, #8, r0 subs r0, #96 ``` #### Why this is sound A clamp whose width (hi − lo + 1) is a power of two is just an `ssat` shifted off zero by constant C. ``` clamp(X, lo, hi) == ssat_k(X − C) + C, C = lo + 2^(k-1) ``` e.g. `clamp(X, −224, 31)` == `clamp(X+96, (-225+96), (31+96)) - 96` == `ssat8(X + 96) − 96` Assisted-by: Claude --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 53 +++++++++++--- llvm/test/CodeGen/ARM/ssat-offset.ll | 96 +++++++++++++++++++++++++ 2 files changed, 139 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/ssat-offset.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 248f5b3f9f083..042ae5cc6b74c 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -18096,22 +18096,55 @@ static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, if (Min.getOpcode() == ISD::SMAX) std::swap(Min, Max); + if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX) + return SDValue(); + APInt MinC = Min.getConstantOperandAPInt(1); APInt MaxC = Max.getConstantOperandAPInt(1); - - if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX || - !(MinC + 1).isPowerOf2()) + if (MaxC.sgt(MinC)) return SDValue(); SDLoc DL(Op); - if (MinC == ~MaxC) - return DAG.getNode(ARMISD::SSAT, DL, VT, Input, - DAG.getConstant(MinC.countr_one(), DL, VT)); - if (MaxC == 0) - return DAG.getNode(ARMISD::USAT, DL, VT, Input, - DAG.getConstant(MinC.countr_one(), DL, VT)); - return SDValue(); + // A clamp whose bounds are already a saturation range maps to a single + // SSAT / USAT. + if ((MinC + 1).isPowerOf2()) { + if (MinC == ~MaxC) + return DAG.getNode(ARMISD::SSAT, DL, VT, Input, + DAG.getConstant(MinC.countr_one(), DL, VT)); + if (MaxC == 0) + return DAG.getNode(ARMISD::USAT, DL, VT, Input, + DAG.getConstant(MinC.countr_one(), DL, VT)); + } + + // For power-of-two clamp widths, convert the range to be zero-centered, + // apply SSAT, and convert the result back. + // + // Width = Hi - Lo + 1 + // Center = Lo + Width / 2 + // Result = ssat(X - Center) + Center + // + // The idea is to shift the input so that the clamp range is centered + // around zero, apply ssat, and then shift the result back. + // + // For example clamp(X, -118, 137) -> Width = 256, Center = 10, so it becomes + // ssat(X - 10, 8) + 10 + + APInt Width = MinC - MaxC + 1; + if (!Width.isPowerOf2() || Width.isOne()) + return SDValue(); + unsigned SatBit = Width.logBase2() - 1; // ssat to SatBit + 1 signed bits + APInt Center = MaxC + Width.lshr(1); + + // The rewrite is only valid when X - Center does not overflow; + SDValue NegC = DAG.getConstant(-Center, DL, VT); + if (DAG.computeOverflowForSignedAdd(Input, NegC) != SelectionDAG::OFK_Never) + return SDValue(); + + SDValue Shifted = DAG.getNode(ISD::ADD, DL, VT, Input, NegC); + SDValue Sat = DAG.getNode(ARMISD::SSAT, DL, VT, Shifted, + DAG.getConstant(SatBit, DL, VT)); + return DAG.getNode(ISD::ADD, DL, VT, Sat, DAG.getConstant(Center, DL, VT)); } /// PerformMinMaxCombine - Target-specific DAG combining for creating truncating diff --git a/llvm/test/CodeGen/ARM/ssat-offset.ll b/llvm/test/CodeGen/ARM/ssat-offset.ll new file mode 100644 index 0000000000000..be636bd77decf --- /dev/null +++ b/llvm/test/CodeGen/ARM/ssat-offset.ll @@ -0,0 +1,96 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=thumbv7em-none-eabi %s -o - | FileCheck %s + +; Tests for recovering SSAT from an "offset" clamp: a clamp(X, Lo, Hi) whose +; width Width = Hi - Lo + 1 is a power of two but whose bounds are not centered +; on zero. PerformMinMaxToSatCombine rewrites clamp(X, Lo, Hi) into +; ssat(X - Center) + Center, with Center = Lo + Width/2, guarded so that +; X - Center cannot signed-overflow. + +; A bare clamp to [-224, 31] (width 256). X is range-limited by the ashr, so +; X + 96 cannot overflow: clamp[-224,31] -> ssat8(X + 96) - 96. +define i32 @offset_clamp_bare(i32 %x) { +; CHECK-LABEL: offset_clamp_bare: +; CHECK: @ %bb.0: +; CHECK-NEXT: movs r1, #96 +; CHECK-NEXT: add.w r0, r1, r0, asr #8 +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: subs r0, #96 +; CHECK-NEXT: bx lr + %s = ashr i32 %x, 8 + %lo = call i32 @llvm.smax.i32(i32 %s, i32 -224) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 31) + ret i32 %hi +} + +; An already-symmetric clamp [-128, 127] is still a bare ssat. +define i32 @symmetric_clamp(i32 %x) { +; CHECK-LABEL: symmetric_clamp: +; CHECK: @ %bb.0: +; CHECK-NEXT: ssat r0, #8, r0 +; CHECK-NEXT: bx lr + %lo = call i32 @llvm.smax.i32(i32 %x, i32 -128) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 127) + ret i32 %hi +} + +; An unsigned clamp [0, 255] is still a usat. +define i32 @unsigned_clamp(i32 %x) { +; CHECK-LABEL: unsigned_clamp: +; CHECK: @ %bb.0: +; CHECK-NEXT: usat r0, #8, r0 +; CHECK-NEXT: bx lr + %lo = call i32 @llvm.smax.i32(i32 %x, i32 0) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 255) + ret i32 %hi +} + +; X is unconstrained, so X + 96 may signed-overflow and the rewrite is unsound. +; Must stay an explicit clamp (no ssat). +define i32 @offset_clamp_may_overflow(i32 %x) { +; CHECK-LABEL: offset_clamp_may_overflow: +; CHECK: @ %bb.0: +; CHECK-NEXT: cmn.w r0, #224 +; CHECK-NEXT: it le +; CHECK-NEXT: mvnle r0, #223 +; CHECK-NEXT: cmp r0, #31 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r0, #31 +; CHECK-NEXT: bx lr + %lo = call i32 @llvm.smax.i32(i32 %x, i32 -224) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 31) + ret i32 %hi +} + +; Width 151 ([-100, 50]) is not a power of two. No fold. +define i32 @offset_clamp_non_pow2_width(i32 %x) { +; CHECK-LABEL: offset_clamp_non_pow2_width: +; CHECK: @ %bb.0: +; CHECK-NEXT: mvn r1, #99 +; CHECK-NEXT: asrs r2, r0, #8 +; CHECK-NEXT: cmn.w r2, #100 +; CHECK-NEXT: it gt +; CHECK-NEXT: asrgt r1, r0, #8 +; CHECK-NEXT: cmp r1, #50 +; CHECK-NEXT: it ge +; CHECK-NEXT: movge r1, #50 +; CHECK-NEXT: mov r0, r1 +; CHECK-NEXT: bx lr + %s = ashr i32 %x, 8 + %lo = call i32 @llvm.smax.i32(i32 %s, i32 -100) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 50) + ret i32 %hi +} + +; Reversed bounds are a degenerate clamp. The modular APInt width would look +; like a power of two, but this must not become an offset ssat. +define i32 @offset_clamp_degenerate_bounds(i32 %x) { +; CHECK-LABEL: offset_clamp_degenerate_bounds: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r0, #-2147483648 +; CHECK-NEXT: bx lr + %s = ashr i32 %x, 24 + %lo = call i32 @llvm.smax.i32(i32 %s, i32 1) + %hi = call i32 @llvm.smin.i32(i32 %lo, i32 -2147483648) + ret i32 %hi +} From fa3789ca96206e842af5e94ed4d9aadf1f1b3d15 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 24 Jun 2026 16:40:20 +0200 Subject: [PATCH 346/511] [clang][bytecode][NFC] Use SourceInfo for opcode emitter helpers (#205550) Instead of only using an `Expr*`. This makes more sense and we can later use a decl as a source as well. --- clang/lib/AST/ByteCode/Compiler.cpp | 118 ++++++++++++++-------------- clang/lib/AST/ByteCode/Compiler.h | 12 +-- 2 files changed, 66 insertions(+), 64 deletions(-) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 9444eeb0c2ad3..960b2c5cfca23 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -4963,26 +4963,26 @@ bool Compiler::visitAssignment(const Expr *LHS, const Expr *RHS, template template -bool Compiler::emitConst(T Value, PrimType Ty, const Expr *E) { +bool Compiler::emitConst(T Value, PrimType Ty, SourceInfo Info) { switch (Ty) { case PT_Sint8: - return this->emitConstSint8(Value, E); + return this->emitConstSint8(Value, Info); case PT_Uint8: - return this->emitConstUint8(Value, E); + return this->emitConstUint8(Value, Info); case PT_Sint16: - return this->emitConstSint16(Value, E); + return this->emitConstSint16(Value, Info); case PT_Uint16: - return this->emitConstUint16(Value, E); + return this->emitConstUint16(Value, Info); case PT_Sint32: - return this->emitConstSint32(Value, E); + return this->emitConstSint32(Value, Info); case PT_Uint32: - return this->emitConstUint32(Value, E); + return this->emitConstUint32(Value, Info); case PT_Sint64: - return this->emitConstSint64(Value, E); + return this->emitConstSint64(Value, Info); case PT_Uint64: - return this->emitConstUint64(Value, E); + return this->emitConstUint64(Value, Info); case PT_Bool: - return this->emitConstBool(Value, E); + return this->emitConstBool(Value, Info); case PT_Ptr: case PT_MemberPtr: case PT_Float: @@ -5003,28 +5003,28 @@ bool Compiler::emitConst(T Value, const Expr *E) { template bool Compiler::emitConst(const APSInt &Value, PrimType Ty, - const Expr *E) { + SourceInfo Info) { if (Ty == PT_IntAPS) - return this->emitConstIntAPS(Value, E); + return this->emitConstIntAPS(Value, Info); if (Ty == PT_IntAP) - return this->emitConstIntAP(Value, E); + return this->emitConstIntAP(Value, Info); if (Value.isSigned()) - return this->emitConst(Value.getSExtValue(), Ty, E); - return this->emitConst(Value.getZExtValue(), Ty, E); + return this->emitConst(Value.getSExtValue(), Ty, Info); + return this->emitConst(Value.getZExtValue(), Ty, Info); } template bool Compiler::emitConst(const APInt &Value, PrimType Ty, - const Expr *E) { + SourceInfo Info) { if (Ty == PT_IntAPS) - return this->emitConstIntAPS(Value, E); + return this->emitConstIntAPS(Value, Info); if (Ty == PT_IntAP) - return this->emitConstIntAP(Value, E); + return this->emitConstIntAP(Value, Info); if (isSignedType(Ty)) - return this->emitConst(Value.getSExtValue(), Ty, E); - return this->emitConst(Value.getZExtValue(), Ty, E); + return this->emitConst(Value.getSExtValue(), Ty, Info); + return this->emitConst(Value.getZExtValue(), Ty, Info); } template @@ -5411,42 +5411,44 @@ bool Compiler::visitDtorCall(const VarDecl *VD, const APValue &Value) { if (!this->emitGetPtrLocal(Local.Offset, VD)) return false; - if (!this->visitAPValueInitializer(Value, nullptr, Ty)) + + if (!this->visitAPValueInitializer(Value, VD, Ty)) return false; + return this->emitDestructionPop(D, VD); } template bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, - const Expr *E) { + SourceInfo Info) { assert(!DiscardResult); if (Val.isInt()) - return this->emitConst(Val.getInt(), ValType, E); + return this->emitConst(Val.getInt(), ValType, Info); if (Val.isFloat()) { APFloat F = Val.getFloat(); - return this->emitFloat(F, E); + return this->emitFloat(F, Info); } if (Val.isMemberPointer()) { if (const ValueDecl *MemberDecl = Val.getMemberPointerDecl()) { - if (!this->emitGetMemberPtr(MemberDecl, E)) + if (!this->emitGetMemberPtr(MemberDecl, Info)) return false; bool IsDerived = Val.isMemberPointerToDerivedMember(); // Apply the member pointer path. for (const CXXRecordDecl *PathEntry : Val.getMemberPointerPath()) { - if (!this->emitCopyMemberPtrPath(PathEntry, IsDerived, E)) + if (!this->emitCopyMemberPtrPath(PathEntry, IsDerived, Info)) return false; } return true; } - return this->emitNullMemberPtr(0, nullptr, E); + return this->emitNullMemberPtr(0, nullptr, Info); } if (Val.isLValue()) { if (Val.isNullPointer()) - return this->emitNull(ValType, 0, nullptr, E); + return this->emitNull(ValType, 0, nullptr, Info); APValue::LValueBase Base = Val.getLValueBase(); ArrayRef Path = Val.getLValuePath(); @@ -5454,7 +5456,7 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, if (const Expr *BaseExpr = Base.dyn_cast()) return this->visit(BaseExpr); if (const auto *VD = Base.dyn_cast()) { - if (!this->visitDeclRef(VD, E)) + if (!this->visitDeclRef(VD, Info.asExpr())) return false; QualType EntryType = VD->getType(); @@ -5463,9 +5465,9 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, uint64_t Index = Entry.getAsArrayIndex(); QualType ElemType = EntryType->getAsArrayTypeUnsafe()->getElementType(); - if (!this->emitConst(Index, PT_Uint64, E)) + if (!this->emitConst(Index, PT_Uint64, Info)) return false; - if (!this->emitArrayElemPtrPop(PT_Uint64, E)) + if (!this->emitArrayElemPtrPop(PT_Uint64, Info)) return false; EntryType = ElemType; } else { @@ -5480,13 +5482,13 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, const Decl *BaseOrMember = Entry.getAsBaseOrMember().getPointer(); if (const auto *FD = dyn_cast(BaseOrMember)) { unsigned EntryOffset = EntryRecord->getField(FD)->Offset; - if (!this->emitGetPtrFieldPop(EntryOffset, E)) + if (!this->emitGetPtrFieldPop(EntryOffset, Info)) return false; EntryType = FD->getType(); } else { const auto *Base = cast(BaseOrMember); unsigned BaseOffset = EntryRecord->getBase(Base)->Offset; - if (!this->emitGetPtrBasePop(BaseOffset, /*NullOK=*/false, E)) + if (!this->emitGetPtrBasePop(BaseOffset, /*NullOK=*/false, Info)) return false; EntryType = Ctx.getASTContext().getCanonicalTagType(Base); } @@ -5502,7 +5504,7 @@ bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, template bool Compiler::visitAPValueInitializer(const APValue &Val, - const Expr *E, QualType T) { + SourceInfo Info, QualType T) { if (Val.isStruct()) { const Record *R = this->getRecord(T); assert(R); @@ -5513,16 +5515,16 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, // Fields. if (OptPrimType PT = classify(FieldType)) { - if (!this->visitAPValue(F, *PT, E)) + if (!this->visitAPValue(F, *PT, Info)) return false; - if (!this->emitInitField(*PT, RF->Offset, E)) + if (!this->emitInitField(*PT, RF->Offset, Info)) return false; } else { - if (!this->emitGetPtrField(RF->Offset, E)) + if (!this->emitGetPtrField(RF->Offset, Info)) return false; - if (!this->visitAPValueInitializer(F, E, FieldType)) + if (!this->visitAPValueInitializer(F, Info, FieldType)) return false; - if (!this->emitFinishInitPop(E)) + if (!this->emitFinishInitPop(Info)) return false; } } @@ -5533,11 +5535,11 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, const Record::Base *RB = R->getBase(I); QualType BaseType = Ctx.getASTContext().getCanonicalTagType(RB->Decl); - if (!this->emitGetPtrBase(RB->Offset, E)) + if (!this->emitGetPtrBase(RB->Offset, Info)) return false; - if (!this->visitAPValueInitializer(B, E, BaseType)) + if (!this->visitAPValueInitializer(B, Info, BaseType)) return false; - if (!this->emitFinishInitPop(E)) + if (!this->emitFinishInitPop(Info)) return false; } @@ -5554,21 +5556,21 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, QualType FieldType = RF->Decl->getType(); if (OptPrimType PT = classify(FieldType)) { - if (!this->visitAPValue(F, *PT, E)) + if (!this->visitAPValue(F, *PT, Info)) return false; if (RF->isBitField()) return this->emitInitBitFieldActivate(*PT, RF->Offset, RF->bitWidth(), - E); - return this->emitInitFieldActivate(*PT, RF->Offset, E); + Info); + return this->emitInitFieldActivate(*PT, RF->Offset, Info); } - if (!this->emitGetPtrField(RF->Offset, E)) + if (!this->emitGetPtrField(RF->Offset, Info)) return false; - if (!this->emitActivate(E)) + if (!this->emitActivate(Info)) return false; - if (!this->visitAPValueInitializer(F, E, FieldType)) + if (!this->visitAPValueInitializer(F, Info, FieldType)) return false; - return this->emitPopPtr(E); + return this->emitPopPtr(Info); } if (Val.isArray()) { unsigned InitializedElems = Val.getArrayInitializedElts(); @@ -5582,18 +5584,18 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, : Val.getArrayInitializedElt(A); if (ElemT) { - if (!this->visitAPValue(Elem, *ElemT, E)) + if (!this->visitAPValue(Elem, *ElemT, Info)) return false; - if (!this->emitInitElem(*ElemT, A, E)) + if (!this->emitInitElem(*ElemT, A, Info)) return false; } else { - if (!this->emitConstUint32(A, E)) + if (!this->emitConstUint32(A, Info)) return false; - if (!this->emitArrayElemPtrUint32(E)) + if (!this->emitArrayElemPtrUint32(Info)) return false; - if (!this->visitAPValueInitializer(Elem, E, ElemType)) + if (!this->visitAPValueInitializer(Elem, Info, ElemType)) return false; - if (!this->emitPopPtr(E)) + if (!this->emitPopPtr(Info)) return false; } } @@ -8163,15 +8165,15 @@ bool Compiler::emitDummyPtr(const DeclTy &D, const Expr *E, bool CU) { } template -bool Compiler::emitFloat(const APFloat &F, const Expr *E) { +bool Compiler::emitFloat(const APFloat &F, SourceInfo Info) { if (Floating::singleWord(F.getSemantics())) - return this->emitConstFloat(Floating(F), E); + return this->emitConstFloat(Floating(F), Info); APInt I = F.bitcastToAPInt(); return this->emitConstFloat( Floating(const_cast(I.getRawData()), llvm::APFloatBase::SemanticsToEnum(F.getSemantics())), - E); + Info); } // This function is constexpr if and only if To, From, and the types of diff --git a/clang/lib/AST/ByteCode/Compiler.h b/clang/lib/AST/ByteCode/Compiler.h index e0008e4eeebc4..85105d9d42520 100644 --- a/clang/lib/AST/ByteCode/Compiler.h +++ b/clang/lib/AST/ByteCode/Compiler.h @@ -317,8 +317,8 @@ class Compiler : public ConstStmtVisitor, bool>, bool Toplevel = false); VarCreationState visitDecl(const VarDecl *VD); /// Visit an APValue. - bool visitAPValue(const APValue &Val, PrimType ValType, const Expr *E); - bool visitAPValueInitializer(const APValue &Val, const Expr *E, QualType T); + bool visitAPValue(const APValue &Val, PrimType ValType, SourceInfo Info); + bool visitAPValueInitializer(const APValue &Val, SourceInfo Info, QualType T); /// Visit the given decl as if we have a reference to it. bool visitDeclRef(const ValueDecl *D, const Expr *E); @@ -366,15 +366,15 @@ class Compiler : public ConstStmtVisitor, bool>, bool visitAssignment(const Expr *LHS, const Expr *RHS, const Expr *E); /// Emits an APSInt constant. - bool emitConst(const llvm::APSInt &Value, PrimType Ty, const Expr *E); - bool emitConst(const llvm::APInt &Value, PrimType Ty, const Expr *E); + bool emitConst(const llvm::APSInt &Value, PrimType Ty, SourceInfo Info); + bool emitConst(const llvm::APInt &Value, PrimType Ty, SourceInfo Info); bool emitConst(const llvm::APSInt &Value, const Expr *E); bool emitConst(const llvm::APInt &Value, const Expr *E) { return emitConst(Value, classifyPrim(E), E); } /// Emits an integer constant. - template bool emitConst(T Value, PrimType Ty, const Expr *E); + template bool emitConst(T Value, PrimType Ty, SourceInfo Info); template bool emitConst(T Value, const Expr *E); bool emitBool(bool V, const Expr *E) override { return this->emitConst(V, E); @@ -421,7 +421,7 @@ class Compiler : public ConstStmtVisitor, bool>, bool emitRecordDestructionPop(const Record *R, SourceInfo Loc); bool emitDestructionPop(const Descriptor *Desc, SourceInfo Loc); bool emitDummyPtr(const DeclTy &D, const Expr *E, bool CU = false); - bool emitFloat(const APFloat &F, const Expr *E); + bool emitFloat(const APFloat &F, SourceInfo Info); unsigned collectBaseOffset(const QualType BaseType, const QualType DerivedType); bool emitLambdaStaticInvokerBody(const CXXMethodDecl *MD); From 86b8ca5e2e4e5fd7a6cc3ca72a1fd45355199a44 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 15:54:16 +0100 Subject: [PATCH 347/511] [X86] matchPMADDWD - add support for larger source types (#205391) Handle cases where the source vector type came from a vXi32 type wider than 2 x the original vXi16 type The matcher only bothers with the lower elements - it doesn't matter if we're extracting from a wider vector Fixes a number of SSE/AVX512 targets that failed to legalize to recoverable vector widths --- llvm/lib/Target/X86/X86ISelLowering.cpp | 24 +- llvm/test/CodeGen/X86/madd.ll | 281 ++++++------------------ 2 files changed, 77 insertions(+), 228 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 8d87cf8a6665e..2ae8a433297b2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -59622,12 +59622,12 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, return SDValue(); if (!Mul) { // First time an extract_elt's source vector is visited. Must be a MUL - // with 2X number of vector elements than the BUILD_VECTOR. + // with at least 2X number of vector elements than the BUILD_VECTOR. // Both extracts must be from same MUL. Mul = Vec0L; if ((Mul.getOpcode() != ISD::MUL && Mul.getOpcode() != ISD::SHL && Mul.getOpcode() != ISD::SIGN_EXTEND) || - Mul.getValueType().getVectorNumElements() != 2 * e) + Mul.getValueType().getVectorNumElements() < (2 * e)) return SDValue(); } // Check that the extract is from the same MUL previously seen. @@ -59637,6 +59637,7 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements() * 2); + EVT MulVT = TruncVT.changeVectorElementType(*DAG.getContext(), MVT::i32); SDValue N0, N1; if (Mul.getOpcode() == ISD::MUL) { @@ -59646,15 +59647,17 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, Mode == ShrinkMode::MULU16) return SDValue(); - N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0)); - N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1)); + N0 = DAG.getExtractSubvector(DL, MulVT, Mul.getOperand(0), 0); + N1 = DAG.getExtractSubvector(DL, MulVT, Mul.getOperand(1), 0); + N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N0); + N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N1); } else if (Mul.getOpcode() == ISD::SHL) { SDValue ShVal = Mul.getOperand(0); if (ShVal.getOpcode() != ISD::SIGN_EXTEND) return SDValue(); N0 = ShVal.getOperand(0); - if (N0.getValueType() != TruncVT) + if (N0.getValueType().getScalarType() != MVT::i16) return SDValue(); // A shift by more than 15 would overflow an i16. @@ -59663,17 +59666,18 @@ static SDValue matchPMADDWD(SelectionDAG &DAG, SDNode *N, })) return SDValue(); + N0 = DAG.getExtractSubvector(DL, TruncVT, N0, 0); + N1 = DAG.getExtractSubvector(DL, MulVT, Mul.getOperand(1), 0); N1 = DAG.getNode(ISD::SHL, DL, TruncVT, DAG.getConstant(1, DL, TruncVT), - DAG.getZExtOrTrunc(Mul.getOperand(1), DL, TruncVT)); + DAG.getZExtOrTrunc(N1, DL, TruncVT)); } else { assert(Mul.getOpcode() == ISD::SIGN_EXTEND); - // Add a trivial multiplication with 1 so that we can make use of VPMADDWD. - N0 = Mul.getOperand(0); - - if (N0.getValueType() != TruncVT) + if (Mul.getOperand(0).getValueType().getScalarType() != MVT::i16) return SDValue(); + // Add a trivial multiplication with 1 so that we can make use of VPMADDWD. + N0 = DAG.getExtractSubvector(DL, TruncVT, Mul.getOperand(0), 0); N1 = DAG.getConstant(1, DL, TruncVT); } diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 5912e6aa3a3c4..87bec9597d35f 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -2083,56 +2083,17 @@ define <4 x i32> @pmaddwd_8_swapped(<8 x i16> %A, <8 x i16> %B) { ret <4 x i32> %ret } -; FIXME: SSE2 fails to match PMADDWD define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { -; SSE2-LABEL: larger_mul: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: pmulhw %xmm2, %xmm1 -; SSE2-NEXT: pmullw %xmm2, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm2[0,2] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3] -; SSE2-NEXT: paddd %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: larger_mul: -; SSE42: # %bb.0: -; SSE42-NEXT: pxor %xmm1, %xmm1 -; SSE42-NEXT: pmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero -; SSE42-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] -; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; SSE42-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; SSE42-NEXT: pmaddwd %xmm2, %xmm0 -; SSE42-NEXT: pmaddwd %xmm3, %xmm1 -; SSE42-NEXT: phaddd %xmm0, %xmm1 -; SSE42-NEXT: movdqa %xmm1, %xmm0 -; SSE42-NEXT: retq -; -; AVX1-LABEL: larger_mul: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: larger_mul: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; SSE-LABEL: larger_mul: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd %xmm2, %xmm0 +; SSE-NEXT: retq ; -; AVX512-LABEL: larger_mul: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: larger_mul: +; AVX: # %bb.0: +; AVX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %a = sext <16 x i16> %A to <16 x i32> %b = sext <16 x i16> %B to <16 x i32> %m = mul nsw <16 x i32> %a, %b @@ -2142,48 +2103,17 @@ define <4 x i32> @larger_mul(<16 x i16> %A, <16 x i16> %B) { ret <4 x i32> %ret } -; FIXME: SSE fails to match PMADDWD define <4 x i32> @larger_sext(<16 x i16> %A) { -; SSE2-LABEL: larger_sext: -; SSE2: # %bb.0: -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: larger_sext: -; SSE42: # %bb.0: -; SSE42-NEXT: pmovsxwd %xmm0, %xmm1 -; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 -; SSE42-NEXT: phaddd %xmm0, %xmm1 -; SSE42-NEXT: movdqa %xmm1, %xmm0 -; SSE42-NEXT: retq -; -; AVX1-LABEL: larger_sext: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: larger_sext: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; SSE-LABEL: larger_sext: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,1,1,1,1,1] +; SSE-NEXT: retq ; -; AVX512-LABEL: larger_sext: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: larger_sext: +; AVX: # %bb.0: +; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %a = sext <16 x i16> %A to <16 x i32> %odd = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> %even = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> @@ -2191,53 +2121,17 @@ define <4 x i32> @larger_sext(<16 x i16> %A) { ret <4 x i32> %ret } -; FIXME: SSE fails to match PMADDWD define <4 x i32> @larger_shl(<16 x i16> %A) { -; SSE2-LABEL: larger_shl: -; SSE2: # %bb.0: -; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: pslld $7, %xmm0 -; SSE2-NEXT: pslld $7, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm0[0,2] -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] -; SSE2-NEXT: paddd %xmm2, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: larger_shl: -; SSE42: # %bb.0: -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] -; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 -; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 -; SSE42-NEXT: pslld $7, %xmm0 -; SSE42-NEXT: pslld $7, %xmm1 -; SSE42-NEXT: phaddd %xmm1, %xmm0 -; SSE42-NEXT: retq -; -; AVX1-LABEL: larger_shl: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,128,128,128,128,128,128] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: larger_shl: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,128,128,128,128,128,128] -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; SSE-LABEL: larger_shl: +; SSE: # %bb.0: +; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [128,128,128,128,128,128,128,128] +; SSE-NEXT: retq ; -; AVX512-LABEL: larger_shl: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX512-NEXT: vpslld $7, %ymm0, %ymm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: larger_shl: +; AVX: # %bb.0: +; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [128,128,128,128,128,128,128,128] +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %a = sext <16 x i16> %A to <16 x i32> %shl = shl <16 x i32> %a, splat (i32 7) %odd = shufflevector <16 x i32> %shl, <16 x i32> undef, <4 x i32> @@ -3842,22 +3736,22 @@ define <5 x i32> @oddvector_mul(<16 x i16> %A, <16 x i16> %B) { define <4 x i32> @oddvector_sext(<13 x i16> %A) { ; SSE2-LABEL: oddvector_sext: ; SSE2: # %bb.0: -; SSE2-NEXT: pxor %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pinsrw $1, %r8d, %xmm1 -; SSE2-NEXT: pinsrw $3, %r9d, %xmm1 -; SSE2-NEXT: pinsrw $5, {{[0-9]+}}(%rsp), %xmm1 -; SSE2-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm1 -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: pinsrw $1, %edi, %xmm0 -; SSE2-NEXT: pinsrw $3, %esi, %xmm0 -; SSE2-NEXT: pinsrw $5, %edx, %xmm0 -; SSE2-NEXT: pinsrw $7, %ecx, %xmm0 -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; SSE2-NEXT: paddd %xmm2, %xmm0 +; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movd %r9d, %xmm0 +; SSE2-NEXT: movd %r8d, %xmm2 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; SSE2-NEXT: movd %ecx, %xmm0 +; SSE2-NEXT: movd %edx, %xmm1 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; SSE2-NEXT: movd %esi, %xmm3 +; SSE2-NEXT: movd %edi, %xmm0 +; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; SSE2-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,1,1,1,1,1] ; SSE2-NEXT: retq ; ; SSE42-LABEL: oddvector_sext: @@ -3866,34 +3760,18 @@ define <4 x i32> @oddvector_sext(<13 x i16> %A) { ; SSE42-NEXT: pinsrw $1, %esi, %xmm0 ; SSE42-NEXT: pinsrw $2, %edx, %xmm0 ; SSE42-NEXT: pinsrw $3, %ecx, %xmm0 -; SSE42-NEXT: movd %r8d, %xmm1 -; SSE42-NEXT: pinsrw $1, %r9d, %xmm1 -; SSE42-NEXT: pinsrw $2, {{[0-9]+}}(%rsp), %xmm1 -; SSE42-NEXT: pinsrw $3, {{[0-9]+}}(%rsp), %xmm1 -; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 -; SSE42-NEXT: pmovsxwd %xmm0, %xmm0 -; SSE42-NEXT: phaddd %xmm1, %xmm0 +; SSE42-NEXT: pinsrw $4, %r8d, %xmm0 +; SSE42-NEXT: pinsrw $5, %r9d, %xmm0 +; SSE42-NEXT: pinsrw $6, {{[0-9]+}}(%rsp), %xmm0 +; SSE42-NEXT: pinsrw $7, {{[0-9]+}}(%rsp), %xmm0 +; SSE42-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,1,1,1,1,1] ; SSE42-NEXT: retq ; -; AVX1-LABEL: oddvector_sext: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] -; AVX1-NEXT: vzeroupper -; AVX1-NEXT: retq -; -; AVX2-LABEL: oddvector_sext: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq -; -; AVX512-LABEL: oddvector_sext: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: oddvector_sext: +; AVX: # %bb.0: +; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX-NEXT: vzeroupper +; AVX-NEXT: retq %a = sext <13 x i16> %A to <13 x i32> %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> %even = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> @@ -3902,50 +3780,17 @@ define <4 x i32> @oddvector_sext(<13 x i16> %A) { } define <4 x i32> @oddvector_sext_load(ptr %p) { -; SSE2-LABEL: oddvector_sext_load: -; SSE2: # %bb.0: -; SSE2-NEXT: movdqa (%rdi), %xmm0 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; SSE2-NEXT: psrad $16, %xmm1 -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: psrad $16, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; SSE2-NEXT: paddd %xmm2, %xmm0 -; SSE2-NEXT: retq -; -; SSE42-LABEL: oddvector_sext_load: -; SSE42: # %bb.0: -; SSE42-NEXT: movdqa (%rdi), %xmm1 -; SSE42-NEXT: pmovsxwd %xmm1, %xmm0 -; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] -; SSE42-NEXT: pmovsxwd %xmm1, %xmm1 -; SSE42-NEXT: phaddd %xmm1, %xmm0 -; SSE42-NEXT: retq -; -; AVX1-LABEL: oddvector_sext_load: -; AVX1: # %bb.0: -; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 -; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 -; AVX1-NEXT: vphaddd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: oddvector_sext_load: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vzeroupper -; AVX2-NEXT: retq +; SSE-LABEL: oddvector_sext_load: +; SSE: # %bb.0: +; SSE-NEXT: movdqa (%rdi), %xmm0 +; SSE-NEXT: pmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1,1,1,1,1,1,1,1] +; SSE-NEXT: retq ; -; AVX512-LABEL: oddvector_sext_load: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd (%rdi), %zmm0 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vphaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX-LABEL: oddvector_sext_load: +; AVX: # %bb.0: +; AVX-NEXT: vmovdqa (%rdi), %xmm0 +; AVX-NEXT: vpmaddwd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 # [1,1,1,1,1,1,1,1] +; AVX-NEXT: retq %A = load <13 x i16>, ptr %p %a = sext <13 x i16> %A to <13 x i32> %odd = shufflevector <13 x i32> %a, <13 x i32> undef, <4 x i32> From 636c2923b70155e12d5dd009387d5539b72c798d Mon Sep 17 00:00:00 2001 From: Jiaqi He Date: Wed, 24 Jun 2026 22:57:31 +0800 Subject: [PATCH 348/511] [mlir][tosa] Handle function declarations in tosa input shape pass (#205359) Fixes https://github.com/llvm/llvm-project/issues/205063. The `tosa-experimental-input-shape` pass currently does not handle function declarations correctly. The pass may run on declarations, but the current implementation assumes that every function has a body and unconditionally accesses the entry block and the last block when updating argument and result types. This patch checks whether the function has a body before accessing body blocks. For declarations, the pass updates the function signature input types and preserves the original result types, since there is no return operation from which result types can be inferred. A regression test is added for the declaration case. --- .../Tosa/Transforms/TosaInputShape.cpp | 41 ++++++++++--------- mlir/test/Dialect/Tosa/tosa-input-shape.mlir | 13 ++++++ 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaInputShape.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaInputShape.cpp index e733fb8d378f5..801542116358f 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaInputShape.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaInputShape.cpp @@ -132,16 +132,18 @@ struct TosaInputShape : public tosa::impl::TosaInputShapeBase { return tensorType.cloneWith(requestedShape, tensorType.getElementType()); }; - // Update argument shapes in the entry block - Block &entryBlock = func.getBody().front(); - const SmallVector argTypes(entryBlock.getArgumentTypes()); - for (const auto &[argIdx, shape] : argsParsed) { - FailureOr newTensorType = - getUpdatedTensorType(argIdx, argTypes, shape); - if (failed(newTensorType)) - return signalPassFailure(); - - entryBlock.getArgument(argIdx).setType(newTensorType.value()); + // Update argument shapes in the entry block if the function has body. + if (!func.getBody().empty()) { + Block &entryBlock = func.getBody().front(); + const SmallVector argTypes(entryBlock.getArgumentTypes()); + for (const auto &[argIdx, shape] : argsParsed) { + FailureOr newTensorType = + getUpdatedTensorType(argIdx, argTypes, shape); + if (failed(newTensorType)) + return signalPassFailure(); + + entryBlock.getArgument(argIdx).setType(newTensorType.value()); + } } // Get new func argument types @@ -158,16 +160,17 @@ struct TosaInputShape : public tosa::impl::TosaInputShapeBase { } // Update function signature - Block &lastBlock = func.getBody().back(); - const Operation *terminator = lastBlock.getTerminator(); - SmallVector newResults; - if (auto returnOp = dyn_cast_or_null(terminator)) { - const auto types = returnOp.getOperandTypes(); - newResults.assign(types.begin(), types.end()); - } else { - const auto types = oldFunctionType.getResults(); - newResults.assign(types.begin(), types.end()); + const auto oldResultTypes = oldFunctionType.getResults(); + SmallVector newResults(oldResultTypes.begin(), oldResultTypes.end()); + if (!func.getBody().empty()) { + Block &lastBlock = func.getBody().back(); + const Operation *terminator = lastBlock.getTerminator(); + if (auto returnOp = dyn_cast_or_null(terminator)) { + const auto returnTypes = returnOp.getOperandTypes(); + newResults.assign(returnTypes.begin(), returnTypes.end()); + } } + const FunctionType newFunctionType = oldFunctionType.clone(newInputs, newResults); func.setFunctionType(newFunctionType); diff --git a/mlir/test/Dialect/Tosa/tosa-input-shape.mlir b/mlir/test/Dialect/Tosa/tosa-input-shape.mlir index ee8e7aac609d5..8b390a1826240 100644 --- a/mlir/test/Dialect/Tosa/tosa-input-shape.mlir +++ b/mlir/test/Dialect/Tosa/tosa-input-shape.mlir @@ -70,3 +70,16 @@ func.func @test_incompatible_input_shape(%arg0: tensor<1x?xf32>, %arg1: tensor<1 // expected-error@-1 {{arg0 has incompatible shape with requested input shape (2, 16), got 'tensor<1x?xf32>'}} return %arg0 : tensor<1x?xf32> } + +// ----- + +// CHECK-LABEL: test_func_decl +func.func private @test_func_decl( + // CHECK: tensor<2x16xi32> + %arg0: tensor<2x?xi32>, + // CHECK: f32 + %arg1: f32, + // CHECK: tensor<64x9xi32> + %arg2: tensor) -> + // CHECK: tensor + tensor From ee2025fc8617e19a7ac9bb3330e934885a4f4b7c Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Wed, 24 Jun 2026 17:00:28 +0200 Subject: [PATCH 349/511] [clang][bytecode] Work around virtual bases being present in APValues (#205553) This happens in code called via `evaluateDestruction()`, where we consume an `APValue` created by the current interpreter. APValues don't have a notion of virtual bases right now, so the virtual bases simply appear as regular ones. --- clang/lib/AST/ByteCode/Compiler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index 960b2c5cfca23..a74bea26f5c28 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5531,6 +5531,11 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, // Bases. for (unsigned I = 0, N = Val.getStructNumBases(); I != N; ++I) { + // FIXME: APValue doesn't know about virtual bases. + // We simply assume that if the APValue has more bases than the Record, + // those additional bases must be virtual. + if (I >= R->getNumBases()) + break; const APValue &B = Val.getStructBase(I); const Record::Base *RB = R->getBase(I); QualType BaseType = Ctx.getASTContext().getCanonicalTagType(RB->Decl); From 668c4da06cdc3602dcda8cd7c8d30dab0e6fd488 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 24 Jun 2026 08:22:51 -0700 Subject: [PATCH 350/511] [AArch64][ISel] Enable profile-aware branch condition merging (#201486) AArch64 previously inherited the default {-1, -1, -1} for `getJumpConditionMergingParams`, causing `shouldKeepJumpConditionsTogether` in SelectionDAGBuilder to always return false. This meant compound branch conditions (br (and/or cond1, cond2)) were always split into separate basic blocks at the DAG level, and profile data from BranchProbabilityInfo was never consulted for the merge/split decision. Override `getJumpConditionMergingParams` in AArch64TargetLowering with tunable cl::opt parameters matching the X86 structure. Since `CCMP` is part of the base AArch64 ISA, the `CCMP` bias is applied unconditionally. Default values: `BaseCost=2, CcmpBias=6 (effective threshold 8), LikelyBias=0, UnlikelyBias=-1`. This enables three improvements: 1. Profile-guided merge/split decisions using BranchProbabilityInfo 2. Smarter compare ordering at the DAG level (e.g., placing large immediates in CMP and small ones in CCMP to respect the 0-31 immediate range) 3. Branch elimination in cases like (a == 5 || b == 32) where the post-ISel CCMP pass previously could not merge due to immediate range constraints Godbolt: https://godbolt.org/z/99d8cnKx8 (note: I am not sure why godbolt is not showing the compiler I saved, but essentially the example shows the comparison of aarch64 clang vs. gcc with `-O3` flags) --------- Co-authored-by: Kunal Pathak --- .../Target/AArch64/AArch64ISelLowering.cpp | 110 +++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 4 + llvm/test/CodeGen/AArch64/andorbrcompare.ll | 112 ++++---- llvm/test/CodeGen/AArch64/arm64-ccmp.ll | 161 +++++++---- llvm/test/CodeGen/AArch64/arm64_32.ll | 2 +- .../AArch64/br-cond-merging-cbz-tbnz.ll | 268 ++++++++++++++++++ .../CodeGen/AArch64/br-cond-merging-fccmp.ll | 100 +++++++ .../test/CodeGen/AArch64/br-cond-not-merge.ll | 4 +- llvm/test/CodeGen/AArch64/fcvt-i256.ll | 112 ++++---- .../AArch64/lr-reserved-for-ra-live-in.ll | 16 +- llvm/test/CodeGen/AArch64/machine_cse.ll | 2 +- llvm/test/CodeGen/AArch64/peephole-and-tst.ll | 28 +- llvm/test/CodeGen/AArch64/pr166870.ll | 20 +- llvm/test/CodeGen/AArch64/ragreedy-csr.ll | 180 ++++++------ llvm/test/CodeGen/AArch64/tbl-loops.ll | 176 ++++++------ llvm/test/CodeGen/AArch64/tbz-tbnz.ll | 20 +- .../umulo-128-legalisation-lowering.ll | 10 +- .../AArch64/instr-ref-target-hooks.ll | 2 +- 18 files changed, 901 insertions(+), 426 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/br-cond-merging-cbz-tbnz.ll create mode 100644 llvm/test/CodeGen/AArch64/br-cond-merging-fccmp.ll diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e56a9be69dd7c..366237249a2df 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -157,6 +157,44 @@ cl::opt EnableSVEGISel( // TODO: This option should be removed once we switch to always using PTRADD in // the SelectionDAG. +static cl::opt BrMergingBaseCostThresh( + "aarch64-br-merging-base-cost", cl::init(2), + cl::desc( + "Cost threshold for merging multiple conditionals into one branch " + "versus splitting into multiple branches: conditionals are merged when " + "their instruction cost is below this limit and split above it. Set to " + "-1 to never merge branches."), + cl::Hidden); + +static cl::opt BrMergingCcmpBias( + "aarch64-br-merging-ccmp-bias", cl::init(6), + cl::desc("Increases 'aarch64-br-merging-base-cost' to account for the " + "CCMP instruction, which is always available on AArch64 and " + "makes merging branch conditions cheaper."), + cl::Hidden); + +static cl::opt BrMergingCbzTbnzBias( + "aarch64-br-merging-cbz-tbnz-bias", cl::init(6), + cl::desc("Decreases 'aarch64-br-merging-base-cost' when a condition can " + "lower to a single CBZ/CBNZ or TBZ/TBNZ compare-and-branch, to " + "bias toward splitting. Set to 0 to disable."), + cl::Hidden); + +static cl::opt BrMergingLikelyBias( + "aarch64-br-merging-likely-bias", cl::init(0), + cl::desc("Increases 'aarch64-br-merging-base-cost' when all conditionals " + "are likely to be executed, biasing toward merging. Set to -1 to " + "never merge likely branches."), + cl::Hidden); + +static cl::opt BrMergingUnlikelyBias( + "aarch64-br-merging-unlikely-bias", cl::init(-1), + cl::desc( + "Decreases 'aarch64-br-merging-base-cost' when all conditionals are " + "unlikely to be executed, biasing toward splitting. Set to -1 to never " + "merge unlikely branches."), + cl::Hidden); + static cl::opt UseFEATCPACodegen( "aarch64-use-featcpa-codegen", cl::Hidden, cl::desc("Generate ISD::PTRADD nodes for pointer arithmetic in " @@ -31864,6 +31902,78 @@ bool AArch64TargetLowering:: return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; } +TargetLoweringBase::CondMergingParams +AArch64TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc, + const Value *Lhs, + const Value *Rhs) const { + using namespace llvm::PatternMatch; + + // Keep floating-point conditions split rather than folding them into an + // FCMP/FCCMP chain: FCCMP is never cheaper than the FCMP it replaces, and + // unordered predicates expand one merge into several FCCMPs that the cost + // heuristic below cannot see (regressed branch-cond-split-fcmp.ll). + if (isa(Lhs) || isa(Rhs)) + return {-1, -1, -1}; + + // Returns true if \p V is a branch condition that AArch64 can lower to a + // single compare-and-branch (CBZ/CBNZ) or test-bit-and-branch (TBZ/TBNZ), + // i.e. without materializing a separate compare. Merging such a condition + // into a CMP/CCMP chain removes that fused form and tends to add + // instructions, so it is less likely to be profitable. + auto IsCbzTbnzCandidate = [](const Value *V) { + // A truncation to i1 feeding a branch tests bit #0 -> TBZ/TBNZ. + if (match(V, m_Trunc(m_Value()))) + return true; + const auto *Cmp = dyn_cast(V); + if (!Cmp) + return false; + ICmpInst::Predicate P = Cmp->getPredicate(); + // icmp eq/ne X, 0 -> CBZ/CBNZ. This also subsumes the bit-test form + // icmp eq/ne (and X, Pow2), 0, which AArch64 folds to TBZ/TBNZ. + if (ICmpInst::isEquality(P) && match(Cmp->getOperand(1), m_Zero())) + return true; + // Sign-bit tests lower to TBZ/TBNZ on the MSB: (X s< 0) and (X s> -1). + if (P == ICmpInst::ICMP_SLT && match(Cmp->getOperand(1), m_Zero())) + return true; + if (P == ICmpInst::ICMP_SGT && match(Cmp->getOperand(1), m_AllOnes())) + return true; + return false; + }; + + int BaseCost = BrMergingBaseCostThresh.getValue(); + // CCMP folds the second compare and the branch into a single cheap op, so + // merging is worth tolerating extra speculated work on the RHS dependency + // chain. The bias budgets that tolerance in TTI latency units, standing in + // for the amortized cost of the eliminated branch (~MispredictPenalty + // weighted by misprediction probability; ~50/50 with no profile, hence the + // default 6 ~= MispredictPenalty/2). The likely/unlikely biases below refine + // that. + if (BaseCost >= 0) + BaseCost += BrMergingCcmpBias; + + if (BaseCost >= 0 && BrMergingCbzTbnzBias > 0) { + bool LhsIsFusedBranch = IsCbzTbnzCandidate(Lhs); + bool RhsIsFusedBranch = IsCbzTbnzCandidate(Rhs); + // If both conditions would each lower to a single CBZ/CBNZ or TBZ/TBNZ, the + // split form is two fused compare-and-branches with no separate compares -- + // at worst code-size-neutral versus a CMP/CCMP chain, and it keeps the + // short-circuit. The dependency-chain cost below would price each compare + // as a real instruction (cost ~1) and merge them anyway, so force the + // split. + if (LhsIsFusedBranch && RhsIsFusedBranch) + return {-1, -1, -1}; + // If only one side is a fused branch, merging still costs it its fused + // form, so withdraw the CCMP discount to bias toward splitting; the other + // side may still be worth a CMP/CCMP, so leave that to the dependency-chain + // cost. + if (LhsIsFusedBranch || RhsIsFusedBranch) + BaseCost -= BrMergingCbzTbnzBias; + } + + return {BaseCost, BrMergingLikelyBias.getValue(), + BrMergingUnlikelyBias.getValue()}; +} + TargetLowering::ShiftLegalizationStrategy AArch64TargetLowering::preferredShiftLegalizationStrategy( SelectionDAG &DAG, SDNode *N, unsigned int ExpansionFactor) const { diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 4c3994e4e3d1d..ad67705377661 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -444,6 +444,10 @@ class AArch64TargetLowering : public TargetLowering { preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override; + CondMergingParams + getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, + const Value *Rhs) const override; + bool shouldTransformSignedTruncationCheck(EVT XVT, unsigned KeptBits) const override { // For vectors, we don't have a preference.. diff --git a/llvm/test/CodeGen/AArch64/andorbrcompare.ll b/llvm/test/CodeGen/AArch64/andorbrcompare.ll index 5bc06ec5b65b2..61331eb52a274 100644 --- a/llvm/test/CodeGen/AArch64/andorbrcompare.ll +++ b/llvm/test/CodeGen/AArch64/andorbrcompare.ll @@ -9,17 +9,15 @@ define i32 @and_eq_ne_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #0, ne -; CHECK-SD-NEXT: b.eq .LBB0_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.lo .LBB0_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB0_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #0, ne +; CHECK-SD-NEXT: b.hs .LBB0_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB0_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_eq_ne_ult: ; CHECK-GI: // %bb.0: // %entry @@ -60,17 +58,15 @@ define i32 @and_ne_ult_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #4, lo -; CHECK-SD-NEXT: b.ne .LBB1_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.ls .LBB1_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB1_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #0, eq +; CHECK-SD-NEXT: b.hi .LBB1_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB1_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_ne_ult_ule: ; CHECK-GI: // %bb.0: // %entry @@ -111,17 +107,15 @@ define i32 @and_ult_ule_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #2, ls -; CHECK-SD-NEXT: b.lo .LBB2_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.hi .LBB2_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB2_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #2, hs +; CHECK-SD-NEXT: b.ls .LBB2_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB2_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_ult_ule_ugt: ; CHECK-GI: // %bb.0: // %entry @@ -162,17 +156,15 @@ define i32 @and_ule_ugt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #2, hi -; CHECK-SD-NEXT: b.ls .LBB3_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.hs .LBB3_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB3_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #2, hi +; CHECK-SD-NEXT: b.lo .LBB3_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB3_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_ule_ugt_uge: ; CHECK-GI: // %bb.0: // %entry @@ -213,17 +205,15 @@ define i32 @and_ugt_uge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #0, hs -; CHECK-SD-NEXT: b.hi .LBB4_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.lt .LBB4_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB4_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #8, ls +; CHECK-SD-NEXT: b.ge .LBB4_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB4_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_ugt_uge_slt: ; CHECK-GI: // %bb.0: // %entry @@ -264,17 +254,15 @@ define i32 @and_uge_slt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #0, lt -; CHECK-SD-NEXT: b.hs .LBB5_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.le .LBB5_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB5_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #4, lo +; CHECK-SD-NEXT: b.gt .LBB5_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB5_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_uge_slt_sle: ; CHECK-GI: // %bb.0: // %entry @@ -315,17 +303,15 @@ define i32 @and_slt_sle_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #0, le -; CHECK-SD-NEXT: b.lt .LBB6_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.gt .LBB6_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB6_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #0, ge +; CHECK-SD-NEXT: b.le .LBB6_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB6_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_slt_sle_sgt: ; CHECK-GI: // %bb.0: // %entry @@ -366,17 +352,15 @@ define i32 @and_sle_sgt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5 ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: cmp w2, w3 ; CHECK-SD-NEXT: ccmp w0, w1, #0, gt -; CHECK-SD-NEXT: b.le .LBB7_3 -; CHECK-SD-NEXT: // %bb.1: // %entry -; CHECK-SD-NEXT: cmp w4, w5 -; CHECK-SD-NEXT: b.ge .LBB7_3 -; CHECK-SD-NEXT: // %bb.2: -; CHECK-SD-NEXT: mov w0, wzr -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB7_3: // %if +; CHECK-SD-NEXT: ccmp w4, w5, #0, gt +; CHECK-SD-NEXT: b.lt .LBB7_2 +; CHECK-SD-NEXT: // %bb.1: // %if ; CHECK-SD-NEXT: mov w0, #1 // =0x1 ; CHECK-SD-NEXT: str w0, [x6] ; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB7_2: +; CHECK-SD-NEXT: mov w0, wzr +; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: and_sle_sgt_sge: ; CHECK-GI: // %bb.0: // %entry diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll index 71d26d25c0515..54d05c581bf2c 100644 --- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll @@ -4,18 +4,31 @@ target triple = "arm64-apple-ios" define i32 @single_same(i32 %a, i32 %b) nounwind ssp { -; CHECK-LABEL: single_same: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, #5 -; CHECK-NEXT: ccmp w1, #17, #4, ne -; CHECK-NEXT: b.ne LBB0_2 -; CHECK-NEXT: ; %bb.1: ; %if.then -; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; CHECK-NEXT: bl _foo -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: LBB0_2: ; %if.end -; CHECK-NEXT: mov w0, #7 ; =0x7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: single_same: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w1, #17 +; CHECK-SD-NEXT: ccmp w0, #5, #4, ne +; CHECK-SD-NEXT: b.ne LBB0_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: LBB0_2: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: single_same: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #5 +; CHECK-GI-NEXT: ccmp w1, #17, #4, ne +; CHECK-GI-NEXT: b.ne LBB0_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: LBB0_2: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 %cmp1 = icmp eq i32 %b, 17 @@ -34,9 +47,9 @@ if.end: define i32 @single_different(i32 %a, i32 %b) nounwind ssp { ; CHECK-SD-LABEL: single_different: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: cmp w0, #6 -; CHECK-SD-NEXT: ccmp w1, #17, #0, ge -; CHECK-SD-NEXT: b.eq LBB1_2 +; CHECK-SD-NEXT: cmp w1, #17 +; CHECK-SD-NEXT: ccmp w0, #5, #4, eq +; CHECK-SD-NEXT: b.gt LBB1_2 ; CHECK-SD-NEXT: ; %bb.1: ; %if.then ; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-SD-NEXT: bl _foo @@ -276,17 +289,31 @@ if.end: ; Chain multiple compares. define void @multi_different(i32 %a, i32 %b, i32 %c) nounwind ssp { -; CHECK-LABEL: multi_different: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: sdiv w8, w1, w0 -; CHECK-NEXT: ccmp w8, #5, #0, gt -; CHECK-NEXT: ccmp w8, w2, #4, eq -; CHECK-NEXT: b.gt LBB6_2 -; CHECK-NEXT: ; %bb.1: ; %if.end -; CHECK-NEXT: ret -; CHECK-NEXT: LBB6_2: ; %if.then -; CHECK-NEXT: b _foo +; CHECK-SD-LABEL: multi_different: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w0, w1 +; CHECK-SD-NEXT: b.le LBB6_3 +; CHECK-SD-NEXT: ; %bb.1: ; %land.lhs.true +; CHECK-SD-NEXT: sdiv w8, w1, w0 +; CHECK-SD-NEXT: cmp w8, w2 +; CHECK-SD-NEXT: ccmp w8, #5, #0, gt +; CHECK-SD-NEXT: b.ne LBB6_3 +; CHECK-SD-NEXT: ; %bb.2: ; %if.then +; CHECK-SD-NEXT: b _foo +; CHECK-SD-NEXT: LBB6_3: ; %if.end +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: multi_different: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, w1 +; CHECK-GI-NEXT: sdiv w8, w1, w0 +; CHECK-GI-NEXT: ccmp w8, #5, #0, gt +; CHECK-GI-NEXT: ccmp w8, w2, #4, eq +; CHECK-GI-NEXT: b.gt LBB6_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.end +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB6_2: ; %if.then +; CHECK-GI-NEXT: b _foo entry: %cmp = icmp sgt i32 %a, %b br i1 %cmp, label %land.lhs.true, label %if.end @@ -308,18 +335,31 @@ if.end: ; Convert a cbz in the head block. define i32 @cbz_head(i32 %a, i32 %b) nounwind ssp { -; CHECK-LABEL: cbz_head: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: ccmp w1, #17, #0, ne -; CHECK-NEXT: b.eq LBB7_2 -; CHECK-NEXT: ; %bb.1: ; %if.then -; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; CHECK-NEXT: bl _foo -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: LBB7_2: ; %if.end -; CHECK-NEXT: mov w0, #7 ; =0x7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: cbz_head: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w1, #17 +; CHECK-SD-NEXT: ccmp w0, #0, #4, eq +; CHECK-SD-NEXT: b.ne LBB7_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: LBB7_2: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: cbz_head: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #0 +; CHECK-GI-NEXT: ccmp w1, #17, #0, ne +; CHECK-GI-NEXT: b.eq LBB7_2 +; CHECK-GI-NEXT: ; %bb.1: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: LBB7_2: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp eq i32 %a, 0 %cmp1 = icmp ne i32 %b, 17 @@ -338,22 +378,35 @@ if.end: ; smaller range of immediates than subs/adds. ; The ccmp immediates must be in the range 0-31. define i32 @immediate_range(i32 %a, i32 %b) nounwind ssp { -; CHECK-LABEL: immediate_range: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: cmp w0, #5 -; CHECK-NEXT: b.eq LBB8_3 -; CHECK-NEXT: ; %bb.1: ; %entry -; CHECK-NEXT: cmp w1, #32 -; CHECK-NEXT: b.eq LBB8_3 -; CHECK-NEXT: ; %bb.2: ; %if.end -; CHECK-NEXT: mov w0, #7 ; =0x7 -; CHECK-NEXT: ret -; CHECK-NEXT: LBB8_3: ; %if.then -; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill -; CHECK-NEXT: bl _foo -; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload -; CHECK-NEXT: mov w0, #7 ; =0x7 -; CHECK-NEXT: ret +; CHECK-SD-LABEL: immediate_range: +; CHECK-SD: ; %bb.0: ; %entry +; CHECK-SD-NEXT: cmp w1, #32 +; CHECK-SD-NEXT: ccmp w0, #5, #4, ne +; CHECK-SD-NEXT: b.ne LBB8_2 +; CHECK-SD-NEXT: ; %bb.1: ; %if.then +; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-SD-NEXT: bl _foo +; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-SD-NEXT: LBB8_2: ; %if.end +; CHECK-SD-NEXT: mov w0, #7 ; =0x7 +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: immediate_range: +; CHECK-GI: ; %bb.0: ; %entry +; CHECK-GI-NEXT: cmp w0, #5 +; CHECK-GI-NEXT: b.eq LBB8_3 +; CHECK-GI-NEXT: ; %bb.1: ; %entry +; CHECK-GI-NEXT: cmp w1, #32 +; CHECK-GI-NEXT: b.eq LBB8_3 +; CHECK-GI-NEXT: ; %bb.2: ; %if.end +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret +; CHECK-GI-NEXT: LBB8_3: ; %if.then +; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-GI-NEXT: bl _foo +; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-GI-NEXT: mov w0, #7 ; =0x7 +; CHECK-GI-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 %cmp1 = icmp eq i32 %b, 32 diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll index cddadcab9cde1..e08dfcd5def25 100644 --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -1,7 +1,7 @@ ; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=non-leaf | \ ; RUN: llvm-objdump --private-headers - | \ ; RUN: FileCheck %s --check-prefix=CHECK-MACHO -; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT +; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf -aarch64-br-merging-base-cost=-1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-OPT ; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -o - -fast-isel -aarch64-enable-atomic-cfg-tidy=0 -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST ; CHECK-MACHO: Mach header diff --git a/llvm/test/CodeGen/AArch64/br-cond-merging-cbz-tbnz.ll b/llvm/test/CodeGen/AArch64/br-cond-merging-cbz-tbnz.ll new file mode 100644 index 0000000000000..b225881ce1609 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/br-cond-merging-cbz-tbnz.ll @@ -0,0 +1,268 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=SPLIT +; RUN: llc < %s -mtriple=aarch64-linux-gnu -aarch64-br-merging-cbz-tbnz-bias=0 | FileCheck %s --check-prefixes=MERGE + +; When one side of a merged branch condition would lower to a single CBZ/CBNZ or +; TBZ/TBNZ compare-and-branch, getJumpConditionMergingParams withdraws the CCMP +; merging discount so the fused branch is kept (and the expensive RHS is +; short-circuited) instead of folded into a CMP/CCMP chain. The second RUN line +; disables the carve-out (bias=0) and recovers the merged CCMP form. + +declare void @sink_a() +declare void @sink_b() + +; icmp eq ptr, null -> CBZ. +define void @cbz_eq_zero(ptr %p, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: cbz_eq_zero: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: cbz x0, .LBB0_3 +; SPLIT-NEXT: // %bb.1: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: b.gt .LBB0_3 +; SPLIT-NEXT: // %bb.2: // %fall +; SPLIT-NEXT: b sink_b +; SPLIT-NEXT: .LBB0_3: // %taken +; SPLIT-NEXT: b sink_a +; +; MERGE-LABEL: cbz_eq_zero: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: ccmp x0, #0, #4, le +; MERGE-NEXT: b.ne .LBB0_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB0_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %c0 = icmp eq ptr %p, null + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %or = or i1 %c0, %c1 + br i1 %or, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; icmp slt i32, 0 -> TBNZ on the sign bit. +define void @tbnz_slt_zero(i32 %s, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: tbnz_slt_zero: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: tbz w0, #31, .LBB1_3 +; SPLIT-NEXT: // %bb.1: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: b.le .LBB1_3 +; SPLIT-NEXT: // %bb.2: // %taken +; SPLIT-NEXT: b sink_a +; SPLIT-NEXT: .LBB1_3: // %fall +; SPLIT-NEXT: b sink_b +; +; MERGE-LABEL: tbnz_slt_zero: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: ccmn w0, #1, #0, gt +; MERGE-NEXT: b.gt .LBB1_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB1_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %c0 = icmp slt i32 %s, 0 + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; icmp sgt i32, -1 (i.e. s >= 0) -> TBZ on the sign bit. +define void @tbz_sgt_allones(i32 %s, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: tbz_sgt_allones: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: tbnz w0, #31, .LBB2_3 +; SPLIT-NEXT: // %bb.1: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: b.le .LBB2_3 +; SPLIT-NEXT: // %bb.2: // %taken +; SPLIT-NEXT: b sink_a +; SPLIT-NEXT: .LBB2_3: // %fall +; SPLIT-NEXT: b sink_b +; +; MERGE-LABEL: tbz_sgt_allones: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: ccmp w0, #0, #8, gt +; MERGE-NEXT: b.mi .LBB2_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB2_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %c0 = icmp sgt i32 %s, -1 + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; icmp eq (and X, pow2), 0 -> TBZ on a single bit. +define void @tbz_and_pow2(i32 %s, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: tbz_and_pow2: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: tbz w0, #3, .LBB3_3 +; SPLIT-NEXT: // %bb.1: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: b.gt .LBB3_3 +; SPLIT-NEXT: // %bb.2: // %fall +; SPLIT-NEXT: b sink_b +; SPLIT-NEXT: .LBB3_3: // %taken +; SPLIT-NEXT: b sink_a +; +; MERGE-LABEL: tbz_and_pow2: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: cset w8, le +; MERGE-NEXT: and w8, w8, w0, lsr #3 +; MERGE-NEXT: tbnz w8, #0, .LBB3_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB3_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %m = and i32 %s, 8 + %c0 = icmp eq i32 %m, 0 + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %or = or i1 %c0, %c1 + br i1 %or, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; trunc i32 to i1 tests bit #0 -> TBNZ. +define void @tbnz_trunc(i32 %s, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: tbnz_trunc: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: tbz w0, #0, .LBB4_3 +; SPLIT-NEXT: // %bb.1: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: b.le .LBB4_3 +; SPLIT-NEXT: // %bb.2: // %taken +; SPLIT-NEXT: b sink_a +; SPLIT-NEXT: .LBB4_3: // %fall +; SPLIT-NEXT: b sink_b +; +; MERGE-LABEL: tbnz_trunc: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: cset w8, le +; MERGE-NEXT: orn w8, w8, w0 +; MERGE-NEXT: tbnz w8, #0, .LBB4_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB4_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %c0 = trunc i32 %s to i1 + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; Negative control: neither side is a CBZ/TBNZ candidate, so the carve-out does +; not apply and the conditions still merge into a CCMP chain under both RUN +; lines. +define void @no_carveout_merges(i32 %s, i32 %x, i32 %y, i32 %z, i32 %k) { +; SPLIT-LABEL: no_carveout_merges: +; SPLIT: // %bb.0: // %entry +; SPLIT-NEXT: add w8, w1, w2 +; SPLIT-NEXT: madd w8, w8, w3, w1 +; SPLIT-NEXT: cmp w8, w4 +; SPLIT-NEXT: ccmp w0, #8, #8, gt +; SPLIT-NEXT: b.lt .LBB5_2 +; SPLIT-NEXT: // %bb.1: // %taken +; SPLIT-NEXT: b sink_a +; SPLIT-NEXT: .LBB5_2: // %fall +; SPLIT-NEXT: b sink_b +; +; MERGE-LABEL: no_carveout_merges: +; MERGE: // %bb.0: // %entry +; MERGE-NEXT: add w8, w1, w2 +; MERGE-NEXT: madd w8, w8, w3, w1 +; MERGE-NEXT: cmp w8, w4 +; MERGE-NEXT: ccmp w0, #8, #8, gt +; MERGE-NEXT: b.lt .LBB5_2 +; MERGE-NEXT: // %bb.1: // %taken +; MERGE-NEXT: b sink_a +; MERGE-NEXT: .LBB5_2: // %fall +; MERGE-NEXT: b sink_b +entry: + %c0 = icmp sgt i32 %s, 7 + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %a3 = add i32 %a2, %x + %c1 = icmp sgt i32 %a3, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/br-cond-merging-fccmp.ll b/llvm/test/CodeGen/AArch64/br-cond-merging-fccmp.ll new file mode 100644 index 0000000000000..edd7aca564047 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/br-cond-merging-fccmp.ll @@ -0,0 +1,100 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s + +; getJumpConditionMergingParams never folds a floating-point condition into an +; FCMP/FCCMP chain: FCCMP is never cheaper than the FCMP it replaces (and is far +; costlier on some cores), so floating-point conditions are kept split into +; separate compares-and-branches. Integer conditions are unaffected and still +; merge into a CMP/CCMP chain. + +declare void @sink_a() +declare void @sink_b() + +; fp && fp -> kept split, no FCCMP. +define void @fp_and_split(double %s, double %t, double %x, double %y, double %z, double %k) { +; CHECK-LABEL: fp_and_split: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: b.pl .LBB0_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: fadd d0, d2, d3 +; CHECK-NEXT: fmul d0, d0, d4 +; CHECK-NEXT: fcmp d0, d5 +; CHECK-NEXT: b.le .LBB0_3 +; CHECK-NEXT: // %bb.2: // %taken +; CHECK-NEXT: b sink_a +; CHECK-NEXT: .LBB0_3: // %fall +; CHECK-NEXT: b sink_b +entry: + %c0 = fcmp olt double %s, %t + %a1 = fadd double %x, %y + %a2 = fmul double %a1, %z + %c1 = fcmp ogt double %a2, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; fp || fp -> kept split, no FCCMP. +define void @fp_or_split(double %s, double %t, double %x, double %y, double %z, double %k) { +; CHECK-LABEL: fp_or_split: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp d0, d1 +; CHECK-NEXT: b.mi .LBB1_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: fadd d0, d2, d3 +; CHECK-NEXT: fmul d0, d0, d4 +; CHECK-NEXT: fcmp d0, d5 +; CHECK-NEXT: b.gt .LBB1_3 +; CHECK-NEXT: // %bb.2: // %fall +; CHECK-NEXT: b sink_b +; CHECK-NEXT: .LBB1_3: // %taken +; CHECK-NEXT: b sink_a +entry: + %c0 = fcmp olt double %s, %t + %a1 = fadd double %x, %y + %a2 = fmul double %a1, %z + %c1 = fcmp ogt double %a2, %k + %or = or i1 %c0, %c1 + br i1 %or, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} + +; Integer control: the same shape with integer compares still folds to a +; CMP/CCMP chain, confirming the carve-out is floating-point specific. +define void @int_and_merges(i32 %s, i32 %t, i32 %x, i32 %y, i32 %z, i32 %k) { +; CHECK-LABEL: int_and_merges: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: add w8, w2, w3 +; CHECK-NEXT: mul w8, w8, w4 +; CHECK-NEXT: cmp w8, w5 +; CHECK-NEXT: ccmp w0, w1, #0, gt +; CHECK-NEXT: b.ge .LBB2_2 +; CHECK-NEXT: // %bb.1: // %taken +; CHECK-NEXT: b sink_a +; CHECK-NEXT: .LBB2_2: // %fall +; CHECK-NEXT: b sink_b +entry: + %c0 = icmp slt i32 %s, %t + %a1 = add i32 %x, %y + %a2 = mul i32 %a1, %z + %c1 = icmp sgt i32 %a2, %k + %and = and i1 %c0, %c1 + br i1 %and, label %taken, label %fall +taken: + tail call void @sink_a() + ret void +fall: + tail call void @sink_b() + ret void +} diff --git a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll index e0ffd81717402..6f5de41287458 100644 --- a/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll +++ b/llvm/test/CodeGen/AArch64/br-cond-not-merge.ll @@ -1,5 +1,5 @@ -; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s -; RUN: llc -mtriple=aarch64 -verify-machineinstrs -O0 -fast-isel=0 -global-isel=false < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s +; RUN: llc -mtriple=aarch64 -verify-machineinstrs -aarch64-br-merging-base-cost=-1 < %s | FileCheck --check-prefix=CHECK --check-prefix=OPT %s +; RUN: llc -mtriple=aarch64 -verify-machineinstrs -aarch64-br-merging-base-cost=-1 -O0 -fast-isel=0 -global-isel=false < %s | FileCheck --check-prefix=CHECK --check-prefix=NOOPT %s declare void @foo() diff --git a/llvm/test/CodeGen/AArch64/fcvt-i256.ll b/llvm/test/CodeGen/AArch64/fcvt-i256.ll index cbee17bf4f8c8..d57e9c2038795 100644 --- a/llvm/test/CodeGen/AArch64/fcvt-i256.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-i256.ll @@ -1572,46 +1572,41 @@ define i256 @f32_to_s256_sat(float %val) { define i256 @f32_to_u256_sat(float %val) { ; CHECK-SD-LABEL: f32_to_u256_sat: ; CHECK-SD: // %bb.0: // %fp-to-i-entry -; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: fmov w10, s0 +; CHECK-SD-NEXT: mov w9, #127 // =0x7f +; CHECK-SD-NEXT: cmp w10, #0 +; CHECK-SD-NEXT: ubfx w8, w10, #23, #8 +; CHECK-SD-NEXT: fccmp s0, s0, #1, pl +; CHECK-SD-NEXT: ccmp w8, w9, #0, vc +; CHECK-SD-NEXT: b.hs .LBB9_2 +; CHECK-SD-NEXT: // %bb.1: ; CHECK-SD-NEXT: mov x0, xzr -; CHECK-SD-NEXT: ubfx w8, w9, #23, #8 -; CHECK-SD-NEXT: cmp w8, #127 -; CHECK-SD-NEXT: b.lo .LBB9_6 -; CHECK-SD-NEXT: // %bb.1: // %fp-to-i-entry -; CHECK-SD-NEXT: fcmp s0, s0 -; CHECK-SD-NEXT: b.vs .LBB9_6 -; CHECK-SD-NEXT: // %bb.2: // %fp-to-i-entry -; CHECK-SD-NEXT: mov x1, x0 -; CHECK-SD-NEXT: mov x2, x0 -; CHECK-SD-NEXT: mov x3, x0 -; CHECK-SD-NEXT: tbnz w9, #31, .LBB9_8 -; CHECK-SD-NEXT: // %bb.3: // %fp-to-i-if-check.saturate +; CHECK-SD-NEXT: mov x1, xzr +; CHECK-SD-NEXT: mov x2, xzr +; CHECK-SD-NEXT: mov x3, xzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB9_2: // %fp-to-i-if-check.saturate ; CHECK-SD-NEXT: cmp w8, #254 -; CHECK-SD-NEXT: b.hi .LBB9_9 -; CHECK-SD-NEXT: // %bb.4: // %fp-to-i-if-check.exp.size -; CHECK-SD-NEXT: mov w10, #8388608 // =0x800000 +; CHECK-SD-NEXT: b.hi .LBB9_6 +; CHECK-SD-NEXT: // %bb.3: // %fp-to-i-if-check.exp.size +; CHECK-SD-NEXT: mov w9, #8388608 // =0x800000 ; CHECK-SD-NEXT: cmp w8, #149 -; CHECK-SD-NEXT: bfxil w10, w9, #0, #23 -; CHECK-SD-NEXT: b.hi .LBB9_7 -; CHECK-SD-NEXT: // %bb.5: // %fp-to-i-if-exp.small -; CHECK-SD-NEXT: mov w9, #150 // =0x96 +; CHECK-SD-NEXT: bfxil w9, w10, #0, #23 +; CHECK-SD-NEXT: b.hi .LBB9_5 +; CHECK-SD-NEXT: // %bb.4: // %fp-to-i-if-exp.small +; CHECK-SD-NEXT: mov w10, #150 // =0x96 ; CHECK-SD-NEXT: mov x1, xzr ; CHECK-SD-NEXT: mov x2, xzr -; CHECK-SD-NEXT: sub w8, w9, w8 +; CHECK-SD-NEXT: sub w8, w10, w8 ; CHECK-SD-NEXT: mov x3, xzr -; CHECK-SD-NEXT: lsr w0, w10, w8 -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB9_6: -; CHECK-SD-NEXT: mov x1, x0 -; CHECK-SD-NEXT: mov x2, x0 -; CHECK-SD-NEXT: mov x3, x0 +; CHECK-SD-NEXT: lsr w0, w9, w8 ; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB9_7: // %fp-to-i-if-exp.large +; CHECK-SD-NEXT: .LBB9_5: // %fp-to-i-if-exp.large ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: sub w8, w8, #150 -; CHECK-SD-NEXT: str x10, [sp, #32] +; CHECK-SD-NEXT: str x9, [sp, #32] ; CHECK-SD-NEXT: lsr x9, x8, #3 ; CHECK-SD-NEXT: mov x10, sp ; CHECK-SD-NEXT: str xzr, [sp, #56] @@ -1640,9 +1635,8 @@ define i256 @f32_to_u256_sat(float %val) { ; CHECK-SD-NEXT: orr x2, x10, x15 ; CHECK-SD-NEXT: orr x1, x11, x13 ; CHECK-SD-NEXT: add sp, sp, #64 -; CHECK-SD-NEXT: .LBB9_8: // %fp-to-i-cleanup ; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB9_9: +; CHECK-SD-NEXT: .LBB9_6: ; CHECK-SD-NEXT: mov x0, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: mov x1, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: mov x2, #-1 // =0xffffffffffffffff @@ -2013,46 +2007,41 @@ define i256 @f64_to_s256_sat(double %val) { define i256 @f64_to_u256_sat(double %val) { ; CHECK-SD-LABEL: f64_to_u256_sat: ; CHECK-SD: // %bb.0: // %fp-to-i-entry -; CHECK-SD-NEXT: fmov x9, d0 +; CHECK-SD-NEXT: fmov x10, d0 +; CHECK-SD-NEXT: mov w9, #1023 // =0x3ff +; CHECK-SD-NEXT: cmp x10, #0 +; CHECK-SD-NEXT: ubfx x8, x10, #52, #11 +; CHECK-SD-NEXT: fccmp d0, d0, #1, pl +; CHECK-SD-NEXT: ccmp x8, x9, #0, vc +; CHECK-SD-NEXT: b.hs .LBB11_2 +; CHECK-SD-NEXT: // %bb.1: ; CHECK-SD-NEXT: mov x0, xzr -; CHECK-SD-NEXT: ubfx x8, x9, #52, #11 -; CHECK-SD-NEXT: cmp x8, #1023 -; CHECK-SD-NEXT: b.lo .LBB11_6 -; CHECK-SD-NEXT: // %bb.1: // %fp-to-i-entry -; CHECK-SD-NEXT: fcmp d0, d0 -; CHECK-SD-NEXT: b.vs .LBB11_6 -; CHECK-SD-NEXT: // %bb.2: // %fp-to-i-entry -; CHECK-SD-NEXT: mov x1, x0 -; CHECK-SD-NEXT: mov x2, x0 -; CHECK-SD-NEXT: mov x3, x0 -; CHECK-SD-NEXT: tbnz x9, #63, .LBB11_8 -; CHECK-SD-NEXT: // %bb.3: // %fp-to-i-if-check.saturate +; CHECK-SD-NEXT: mov x1, xzr +; CHECK-SD-NEXT: mov x2, xzr +; CHECK-SD-NEXT: mov x3, xzr +; CHECK-SD-NEXT: ret +; CHECK-SD-NEXT: .LBB11_2: // %fp-to-i-if-check.saturate ; CHECK-SD-NEXT: cmp x8, #1278 -; CHECK-SD-NEXT: b.hi .LBB11_9 -; CHECK-SD-NEXT: // %bb.4: // %fp-to-i-if-check.exp.size -; CHECK-SD-NEXT: mov x10, #4503599627370496 // =0x10000000000000 +; CHECK-SD-NEXT: b.hi .LBB11_6 +; CHECK-SD-NEXT: // %bb.3: // %fp-to-i-if-check.exp.size +; CHECK-SD-NEXT: mov x9, #4503599627370496 // =0x10000000000000 ; CHECK-SD-NEXT: cmp x8, #1074 -; CHECK-SD-NEXT: bfxil x10, x9, #0, #52 -; CHECK-SD-NEXT: b.hi .LBB11_7 -; CHECK-SD-NEXT: // %bb.5: // %fp-to-i-if-exp.small -; CHECK-SD-NEXT: mov w9, #1075 // =0x433 +; CHECK-SD-NEXT: bfxil x9, x10, #0, #52 +; CHECK-SD-NEXT: b.hi .LBB11_5 +; CHECK-SD-NEXT: // %bb.4: // %fp-to-i-if-exp.small +; CHECK-SD-NEXT: mov w10, #1075 // =0x433 ; CHECK-SD-NEXT: mov x1, xzr ; CHECK-SD-NEXT: mov x2, xzr -; CHECK-SD-NEXT: sub x8, x9, x8 +; CHECK-SD-NEXT: sub x8, x10, x8 ; CHECK-SD-NEXT: mov x3, xzr -; CHECK-SD-NEXT: lsr x0, x10, x8 -; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB11_6: -; CHECK-SD-NEXT: mov x1, x0 -; CHECK-SD-NEXT: mov x2, x0 -; CHECK-SD-NEXT: mov x3, x0 +; CHECK-SD-NEXT: lsr x0, x9, x8 ; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB11_7: // %fp-to-i-if-exp.large +; CHECK-SD-NEXT: .LBB11_5: // %fp-to-i-if-exp.large ; CHECK-SD-NEXT: sub sp, sp, #64 ; CHECK-SD-NEXT: .cfi_def_cfa_offset 64 ; CHECK-SD-NEXT: movi v0.2d, #0000000000000000 ; CHECK-SD-NEXT: sub x8, x8, #1075 -; CHECK-SD-NEXT: str x10, [sp, #32] +; CHECK-SD-NEXT: str x9, [sp, #32] ; CHECK-SD-NEXT: lsr x9, x8, #3 ; CHECK-SD-NEXT: mov x10, sp ; CHECK-SD-NEXT: str xzr, [sp, #56] @@ -2081,9 +2070,8 @@ define i256 @f64_to_u256_sat(double %val) { ; CHECK-SD-NEXT: orr x2, x10, x15 ; CHECK-SD-NEXT: orr x1, x11, x13 ; CHECK-SD-NEXT: add sp, sp, #64 -; CHECK-SD-NEXT: .LBB11_8: // %fp-to-i-cleanup ; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB11_9: +; CHECK-SD-NEXT: .LBB11_6: ; CHECK-SD-NEXT: mov x0, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: mov x1, #-1 // =0xffffffffffffffff ; CHECK-SD-NEXT: mov x2, #-1 // =0xffffffffffffffff diff --git a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll index 708ba621c26d8..09107860c802b 100644 --- a/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll +++ b/llvm/test/CodeGen/AArch64/lr-reserved-for-ra-live-in.ll @@ -8,23 +8,17 @@ declare void @spam() define i32 @check_lr_liveness(ptr %arg) #1 { ; CHECK-LABEL: name: check_lr_liveness ; CHECK: bb.0.bb: - ; CHECK-NEXT: successors: %bb.4(0x20000000), %bb.1(0x60000000) + ; CHECK-NEXT: successors: %bb.1, %bb.2 ; CHECK-NEXT: liveins: $x0, $lr ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = COPY $x0 - ; CHECK-NEXT: renamable $w0 = MOVi32imm -536870206 - ; CHECK-NEXT: CBNZX killed renamable $x8, %bb.1 + ; CHECK-NEXT: CBNZX killed renamable $x0, %bb.2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: liveins: $w0, $lr + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $lr ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w0 = MOVi32imm -536870206 ; CHECK-NEXT: B %bb.3 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.bb: - ; CHECK-NEXT: liveins: $w0, $lr - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.bb1: ; CHECK-NEXT: liveins: $lr ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AArch64/machine_cse.ll b/llvm/test/CodeGen/AArch64/machine_cse.ll index 6478f5a37f782..e8672bc47d2b2 100644 --- a/llvm/test/CodeGen/AArch64/machine_cse.ll +++ b/llvm/test/CodeGen/AArch64/machine_cse.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnuabi -O2 -tail-dup-placement=0 -aarch64-br-merging-base-cost=-1 | FileCheck %s ; -tail-dup-placement causes tail duplication during layout. This breaks the ; assumptions of the test case as written (specifically, it creates an ; additional cmp instruction, creating a false positive), so we pass diff --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll index 74b0e69d1b05b..6449f5d5f07d3 100644 --- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll +++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -13,27 +13,23 @@ define i32 @test_func_i32_two_uses(i32 %in, i32 %bit, i32 %mask) { ; CHECK-SD-NEXT: ldr x8, [x8, :got_lo12:ptr_wrapper] ; CHECK-SD-NEXT: ldr x9, [x8] ; CHECK-SD-NEXT: mov w8, wzr -; CHECK-SD-NEXT: b .LBB0_3 -; CHECK-SD-NEXT: .LBB0_1: // in Loop: Header=BB0_3 Depth=1 -; CHECK-SD-NEXT: str xzr, [x9, #8] -; CHECK-SD-NEXT: .LBB0_2: // in Loop: Header=BB0_3 Depth=1 +; CHECK-SD-NEXT: b .LBB0_2 +; CHECK-SD-NEXT: .LBB0_1: // in Loop: Header=BB0_2 Depth=1 ; CHECK-SD-NEXT: lsl w1, w1, #1 -; CHECK-SD-NEXT: cbz w1, .LBB0_6 -; CHECK-SD-NEXT: .LBB0_3: // %do.body +; CHECK-SD-NEXT: cbz w1, .LBB0_4 +; CHECK-SD-NEXT: .LBB0_2: // %do.body ; CHECK-SD-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-SD-NEXT: ands w10, w1, w0 -; CHECK-SD-NEXT: and w11, w2, w0 +; CHECK-SD-NEXT: orr w11, w2, w10 ; CHECK-SD-NEXT: cinc w8, w8, ne -; CHECK-SD-NEXT: cmp w10, w11 -; CHECK-SD-NEXT: b.eq .LBB0_1 -; CHECK-SD-NEXT: // %bb.4: // %do.body -; CHECK-SD-NEXT: // in Loop: Header=BB0_3 Depth=1 -; CHECK-SD-NEXT: cbnz w2, .LBB0_1 -; CHECK-SD-NEXT: // %bb.5: // %do.body -; CHECK-SD-NEXT: // in Loop: Header=BB0_3 Depth=1 -; CHECK-SD-NEXT: cbz w10, .LBB0_2 +; CHECK-SD-NEXT: cmp w11, #0 +; CHECK-SD-NEXT: and w11, w2, w0 +; CHECK-SD-NEXT: ccmp w10, w11, #4, eq +; CHECK-SD-NEXT: b.ne .LBB0_1 +; CHECK-SD-NEXT: // %bb.3: // in Loop: Header=BB0_2 Depth=1 +; CHECK-SD-NEXT: str xzr, [x9, #8] ; CHECK-SD-NEXT: b .LBB0_1 -; CHECK-SD-NEXT: .LBB0_6: // %do.end +; CHECK-SD-NEXT: .LBB0_4: // %do.end ; CHECK-SD-NEXT: mov w0, w8 ; CHECK-SD-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/pr166870.ll b/llvm/test/CodeGen/AArch64/pr166870.ll index 6f54b0465fbfd..4b3daef3b233d 100644 --- a/llvm/test/CodeGen/AArch64/pr166870.ll +++ b/llvm/test/CodeGen/AArch64/pr166870.ll @@ -22,21 +22,18 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: mov x19, x3 -; CHECK-NEXT: mov x20, x0 -; CHECK-NEXT: mov x21, x1 +; CHECK-NEXT: mov x21, x0 +; CHECK-NEXT: mov x22, x1 ; CHECK-NEXT: bl baz -; CHECK-NEXT: mov w0, #0 // =0x0 -; CHECK-NEXT: mov w10, #1 // =0x1 -; CHECK-NEXT: cbnz w10, .LBB0_10 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cbnz w8, .LBB0_9 ; CHECK-NEXT: // %bb.5: // %bb7 -; CHECK-NEXT: cbnz w10, .LBB0_9 +; CHECK-NEXT: cbnz w8, .LBB0_9 ; CHECK-NEXT: // %bb.6: // %bb8 -; CHECK-NEXT: mov x8, x21 -; CHECK-NEXT: mov x9, x20 ; CHECK-NEXT: mov w20, #0 // =0x0 -; CHECK-NEXT: mov w9, w9 -; CHECK-NEXT: mov x21, x9 -; CHECK-NEXT: mov w8, w8 +; CHECK-NEXT: mov w8, w21 +; CHECK-NEXT: mov x21, x8 +; CHECK-NEXT: mov w8, w22 ; CHECK-NEXT: mov x22, x8 ; CHECK-NEXT: .LBB0_7: // %bb10 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -48,7 +45,6 @@ define i32 @widget(i32 %arg, i32 %arg1, i1 %arg2, ptr %arg3, i1 %arg4) #0 nounwi ; CHECK-NEXT: cbnz x22, .LBB0_7 ; CHECK-NEXT: .LBB0_9: ; CHECK-NEXT: mov w0, #0 // =0x0 -; CHECK-NEXT: .LBB0_10: ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll index 31f004e8d72b7..4ac7d0f1c34e6 100644 --- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll @@ -24,7 +24,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ldrh w9, [x1] ; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: b.ne LBB0_47 +; CHECK-NEXT: b.ne LBB0_42 ; CHECK-NEXT: ; %bb.1: ; %if.end ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill @@ -80,7 +80,7 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: ldrb w8, [x9, x11] ; CHECK-NEXT: ldrb w15, [x10, x11] ; CHECK-NEXT: cmp w8, w15 -; CHECK-NEXT: b.ne LBB0_42 +; CHECK-NEXT: b.ne LBB0_25 ; CHECK-NEXT: ; %bb.7: ; %if.end17 ; CHECK-NEXT: add x11, x11, #1 ; CHECK-NEXT: ldrsb x8, [x9, x11] @@ -108,158 +108,144 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly ; CHECK-NEXT: cbnz w8, LBB0_6 ; CHECK-NEXT: LBB0_9: ; %while.end ; CHECK-NEXT: orr w8, w13, w12 -; CHECK-NEXT: cbnz w8, LBB0_24 +; CHECK-NEXT: cbnz w8, LBB0_23 ; CHECK-NEXT: ; %bb.10: ; %if.then23 ; CHECK-NEXT: ldr x12, [x0, #16] ; CHECK-NEXT: ldrb w8, [x9, x11] ; CHECK-NEXT: ldrb w13, [x12] ; CHECK-NEXT: cmp w13, #83 -; CHECK-NEXT: b.eq LBB0_19 +; CHECK-NEXT: b.eq LBB0_18 ; CHECK-NEXT: LBB0_11: ; %while.cond59.preheader -; CHECK-NEXT: cbz w8, LBB0_23 +; CHECK-NEXT: cbz w8, LBB0_22 ; CHECK-NEXT: LBB0_12: ; %land.rhs.preheader ; CHECK-NEXT: add x12, x9, x11 ; CHECK-NEXT: add x9, x10, x11 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: add x10, x12, #1 ; CHECK-NEXT: LBB0_13: ; %land.rhs ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb w11, [x9], #1 -; CHECK-NEXT: cbz w11, LBB0_23 +; CHECK-NEXT: cbz w11, LBB0_22 ; CHECK-NEXT: ; %bb.14: ; %while.body66 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: cmp w8, #42 -; CHECK-NEXT: b.eq LBB0_18 +; CHECK-NEXT: b.eq LBB0_17 ; CHECK-NEXT: ; %bb.15: ; %while.body66 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: cmp w11, #42 -; CHECK-NEXT: b.eq LBB0_18 +; CHECK-NEXT: b.eq LBB0_17 ; CHECK-NEXT: ; %bb.16: ; %lor.lhs.false74 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: cmp w8, w11 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: b.ne LBB0_43 -; CHECK-NEXT: ; %bb.17: ; %lor.lhs.false74 -; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: cmp w8, #94 -; CHECK-NEXT: b.eq LBB0_43 -; CHECK-NEXT: LBB0_18: ; %if.then83 +; CHECK-NEXT: ccmp w8, w11, #0, ne +; CHECK-NEXT: b.ne LBB0_25 +; CHECK-NEXT: LBB0_17: ; %if.then83 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 ; CHECK-NEXT: ldrb w8, [x10], #1 -; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: cbnz w8, LBB0_13 -; CHECK-NEXT: b LBB0_43 -; CHECK-NEXT: LBB0_19: ; %land.lhs.true28 -; CHECK-NEXT: cbz w8, LBB0_23 -; CHECK-NEXT: ; %bb.20: ; %land.lhs.true28 +; CHECK-NEXT: b LBB0_26 +; CHECK-NEXT: LBB0_18: ; %land.lhs.true28 +; CHECK-NEXT: cbz w8, LBB0_22 +; CHECK-NEXT: ; %bb.19: ; %land.lhs.true28 ; CHECK-NEXT: cmp w8, #112 ; CHECK-NEXT: b.ne LBB0_12 -; CHECK-NEXT: ; %bb.21: ; %land.lhs.true35 +; CHECK-NEXT: ; %bb.20: ; %land.lhs.true35 ; CHECK-NEXT: ldrb w13, [x10, x11] ; CHECK-NEXT: cmp w13, #112 ; CHECK-NEXT: b.ne LBB0_12 -; CHECK-NEXT: ; %bb.22: ; %land.lhs.true43 +; CHECK-NEXT: ; %bb.21: ; %land.lhs.true43 ; CHECK-NEXT: sub x12, x9, x12 ; CHECK-NEXT: add x12, x12, x11 ; CHECK-NEXT: cmp x12, #1 -; CHECK-NEXT: b.ne LBB0_44 -; CHECK-NEXT: LBB0_23: +; CHECK-NEXT: b.ne LBB0_39 +; CHECK-NEXT: LBB0_22: ; CHECK-NEXT: mov w0, #1 ; =0x1 -; CHECK-NEXT: b LBB0_43 -; CHECK-NEXT: LBB0_24: ; %if.else88 -; CHECK-NEXT: cmp w12, #1 -; CHECK-NEXT: b.ne LBB0_33 -; CHECK-NEXT: ; %bb.25: ; %if.else88 +; CHECK-NEXT: b LBB0_26 +; CHECK-NEXT: LBB0_23: ; %if.else88 ; CHECK-NEXT: cmp w13, #2 -; CHECK-NEXT: b.ne LBB0_33 -; CHECK-NEXT: ; %bb.26: ; %while.cond95.preheader +; CHECK-NEXT: ccmp w12, #1, #0, eq +; CHECK-NEXT: b.eq LBB0_27 +; CHECK-NEXT: ; %bb.24: ; %if.else123 +; CHECK-NEXT: cmp w12, #2 +; CHECK-NEXT: ccmp w13, #1, #0, eq +; CHECK-NEXT: b.eq LBB0_34 +; CHECK-NEXT: LBB0_25: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: LBB0_26: +; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #64 +; CHECK-NEXT: ret +; CHECK-NEXT: LBB0_27: ; %while.cond95.preheader ; CHECK-NEXT: ldrb w12, [x9, x11] -; CHECK-NEXT: cbz w12, LBB0_23 -; CHECK-NEXT: ; %bb.27: ; %land.rhs99.preheader +; CHECK-NEXT: cbz w12, LBB0_22 +; CHECK-NEXT: ; %bb.28: ; %land.rhs99.preheader ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov w0, #1 ; =0x1 -; CHECK-NEXT: b LBB0_29 -; CHECK-NEXT: LBB0_28: ; %if.then117 -; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 +; CHECK-NEXT: b LBB0_30 +; CHECK-NEXT: LBB0_29: ; %if.then117 +; CHECK-NEXT: ; in Loop: Header=BB0_30 Depth=1 ; CHECK-NEXT: add x12, x9, x8 ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: add x12, x12, x11 ; CHECK-NEXT: ldrb w12, [x12, #1] -; CHECK-NEXT: cbz w12, LBB0_43 -; CHECK-NEXT: LBB0_29: ; %land.rhs99 +; CHECK-NEXT: cbz w12, LBB0_26 +; CHECK-NEXT: LBB0_30: ; %land.rhs99 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x13, x10, x8 ; CHECK-NEXT: ldrb w13, [x13, x11] -; CHECK-NEXT: cbz w13, LBB0_23 -; CHECK-NEXT: ; %bb.30: ; %while.body104 -; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 -; CHECK-NEXT: cmp w12, w13 -; CHECK-NEXT: b.eq LBB0_28 +; CHECK-NEXT: cbz w13, LBB0_22 ; CHECK-NEXT: ; %bb.31: ; %while.body104 -; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 -; CHECK-NEXT: cmp w12, #42 -; CHECK-NEXT: b.eq LBB0_28 +; CHECK-NEXT: ; in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmp w12, w13 +; CHECK-NEXT: b.eq LBB0_29 ; CHECK-NEXT: ; %bb.32: ; %while.body104 -; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 +; CHECK-NEXT: ; in Loop: Header=BB0_30 Depth=1 +; CHECK-NEXT: cmp w12, #42 +; CHECK-NEXT: b.eq LBB0_29 +; CHECK-NEXT: ; %bb.33: ; %while.body104 +; CHECK-NEXT: ; in Loop: Header=BB0_30 Depth=1 ; CHECK-NEXT: cmp w13, #94 -; CHECK-NEXT: b.eq LBB0_28 -; CHECK-NEXT: b LBB0_42 -; CHECK-NEXT: LBB0_33: ; %if.else123 -; CHECK-NEXT: cmp w13, #1 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: b.ne LBB0_43 -; CHECK-NEXT: ; %bb.34: ; %if.else123 -; CHECK-NEXT: cmp w12, #2 -; CHECK-NEXT: b.ne LBB0_43 -; CHECK-NEXT: ; %bb.35: ; %while.cond130.preheader -; CHECK-NEXT: ldrb w8, [x9, x11] -; CHECK-NEXT: cbz w8, LBB0_23 -; CHECK-NEXT: ; %bb.36: ; %land.rhs134.preheader -; CHECK-NEXT: mov x12, xzr +; CHECK-NEXT: b.eq LBB0_29 +; CHECK-NEXT: b LBB0_25 +; CHECK-NEXT: LBB0_34: ; %while.cond130.preheader +; CHECK-NEXT: ldrb w12, [x9, x11] +; CHECK-NEXT: cbz w12, LBB0_22 +; CHECK-NEXT: ; %bb.35: ; %land.rhs134.preheader +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: mov w13, #42 ; =0x2a ; CHECK-NEXT: mov w0, #1 ; =0x1 -; CHECK-NEXT: b LBB0_38 -; CHECK-NEXT: LBB0_37: ; %if.then152 -; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 -; CHECK-NEXT: add x8, x9, x12 -; CHECK-NEXT: add x12, x12, #1 -; CHECK-NEXT: add x8, x8, x11 -; CHECK-NEXT: ldrb w8, [x8, #1] -; CHECK-NEXT: cbz w8, LBB0_43 -; CHECK-NEXT: LBB0_38: ; %land.rhs134 +; CHECK-NEXT: LBB0_36: ; %land.rhs134 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x13, x10, x12 -; CHECK-NEXT: ldrb w13, [x13, x11] -; CHECK-NEXT: cbz w13, LBB0_23 -; CHECK-NEXT: ; %bb.39: ; %while.body139 -; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 -; CHECK-NEXT: cmp w8, w13 -; CHECK-NEXT: b.eq LBB0_37 -; CHECK-NEXT: ; %bb.40: ; %while.body139 -; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 -; CHECK-NEXT: cmp w13, #42 -; CHECK-NEXT: b.eq LBB0_37 -; CHECK-NEXT: ; %bb.41: ; %while.body139 -; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 -; CHECK-NEXT: cmp w8, #94 -; CHECK-NEXT: b.eq LBB0_37 -; CHECK-NEXT: LBB0_42: -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: LBB0_43: -; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: ret -; CHECK-NEXT: LBB0_44: ; %lor.lhs.false47 +; CHECK-NEXT: add x14, x10, x8 +; CHECK-NEXT: ldrb w14, [x14, x11] +; CHECK-NEXT: cbz w14, LBB0_22 +; CHECK-NEXT: ; %bb.37: ; %while.body139 +; CHECK-NEXT: ; in Loop: Header=BB0_36 Depth=1 +; CHECK-NEXT: cmp w12, #94 +; CHECK-NEXT: ccmp w14, w13, #4, ne +; CHECK-NEXT: ccmp w12, w14, #4, ne +; CHECK-NEXT: b.ne LBB0_25 +; CHECK-NEXT: ; %bb.38: ; %if.then152 +; CHECK-NEXT: ; in Loop: Header=BB0_36 Depth=1 +; CHECK-NEXT: add x12, x9, x8 +; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: add x12, x12, x11 +; CHECK-NEXT: ldrb w12, [x12, #1] +; CHECK-NEXT: cbnz w12, LBB0_36 +; CHECK-NEXT: b LBB0_26 +; CHECK-NEXT: LBB0_39: ; %lor.lhs.false47 ; CHECK-NEXT: cmp x12, #2 ; CHECK-NEXT: b.ne LBB0_11 -; CHECK-NEXT: ; %bb.45: ; %land.lhs.true52 +; CHECK-NEXT: ; %bb.40: ; %land.lhs.true52 ; CHECK-NEXT: add x12, x9, x11 ; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ldurb w12, [x12, #-1] ; CHECK-NEXT: cmp w12, #73 -; CHECK-NEXT: b.eq LBB0_43 -; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52 -; CHECK-NEXT: cbz w8, LBB0_43 +; CHECK-NEXT: b.eq LBB0_26 +; CHECK-NEXT: ; %bb.41: ; %land.lhs.true52 +; CHECK-NEXT: cbz w8, LBB0_26 ; CHECK-NEXT: b LBB0_12 -; CHECK-NEXT: LBB0_47: +; CHECK-NEXT: LBB0_42: ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdrGot Lloh0, Lloh1 diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll index 84af8596a0e99..25bceea3fe2b8 100644 --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -145,30 +145,28 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-LABEL: loop2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w2, #1 -; CHECK-NEXT: b.lt .LBB1_7 +; CHECK-NEXT: b.lt .LBB1_6 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB1_4 +; CHECK-NEXT: b.ls .LBB1_3 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: ubfiz x9, x8, #1, #32 ; CHECK-NEXT: add x9, x9, #2 -; CHECK-NEXT: add x10, x1, x9, lsl #2 -; CHECK-NEXT: cmp x10, x0 -; CHECK-NEXT: b.ls .LBB1_8 -; CHECK-NEXT: // %bb.3: // %vector.memcheck -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: b.ls .LBB1_8 -; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: add x10, x0, x9 +; CHECK-NEXT: add x9, x1, x9, lsl #2 +; CHECK-NEXT: cmp x10, x1 +; CHECK-NEXT: ccmp x9, x0, #0, hi +; CHECK-NEXT: b.ls .LBB1_7 +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB1_5: // %for.body.preheader1 +; CHECK-NEXT: .LBB1_4: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: .LBB1_6: // %for.body +; CHECK-NEXT: .LBB1_5: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp s2, s3, [x8], #8 ; CHECK-NEXT: fcmp s2, s1 @@ -185,10 +183,10 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcvtzs s3, s3 ; CHECK-NEXT: stur b3, [x9, #1] ; CHECK-NEXT: add x9, x9, #2 -; CHECK-NEXT: b.ne .LBB1_6 -; CHECK-NEXT: .LBB1_7: // %for.cond.cleanup +; CHECK-NEXT: b.ne .LBB1_5 +; CHECK-NEXT: .LBB1_6: // %for.cond.cleanup ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB1_8: // %vector.ph +; CHECK-NEXT: .LBB1_7: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: and x10, x11, #0x1fffffffc @@ -196,7 +194,7 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: and x12, x11, #0x1fffffffc ; CHECK-NEXT: add x8, x1, x10, lsl #3 ; CHECK-NEXT: add x9, x0, x10, lsl #1 -; CHECK-NEXT: .LBB1_9: // %vector.body +; CHECK-NEXT: .LBB1_8: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 ; CHECK-NEXT: subs x12, x12, #4 @@ -214,11 +212,11 @@ define void @loop2(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: trn1 v1.8b, v2.8b, v1.8b ; CHECK-NEXT: str d1, [x0], #8 -; CHECK-NEXT: b.ne .LBB1_9 -; CHECK-NEXT: // %bb.10: // %middle.block +; CHECK-NEXT: b.ne .LBB1_8 +; CHECK-NEXT: // %bb.9: // %middle.block ; CHECK-NEXT: cmp x11, x10 -; CHECK-NEXT: b.ne .LBB1_5 -; CHECK-NEXT: b .LBB1_7 +; CHECK-NEXT: b.ne .LBB1_4 +; CHECK-NEXT: b .LBB1_6 entry: %cmp19 = icmp sgt i32 %width, 0 br i1 %cmp19, label %for.body.preheader, label %for.cond.cleanup @@ -320,19 +318,56 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-LABEL: loop3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w2, #1 -; CHECK-NEXT: b.lt .LBB2_9 +; CHECK-NEXT: b.lt .LBB2_6 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB2_6 +; CHECK-NEXT: b.ls .LBB2_3 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: add x9, x8, w8, uxtw #1 ; CHECK-NEXT: add x9, x9, #3 -; CHECK-NEXT: add x10, x1, x9, lsl #2 -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: cmp x10, x0 -; CHECK-NEXT: ccmp x9, x1, #0, hi -; CHECK-NEXT: b.hi .LBB2_6 -; CHECK-NEXT: // %bb.3: // %vector.ph +; CHECK-NEXT: add x10, x0, x9 +; CHECK-NEXT: add x9, x1, x9, lsl #2 +; CHECK-NEXT: cmp x10, x1 +; CHECK-NEXT: ccmp x9, x0, #0, hi +; CHECK-NEXT: b.ls .LBB2_7 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: mov w10, wzr +; CHECK-NEXT: mov x8, x1 +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: .LBB2_4: // %for.body.preheader1 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 +; CHECK-NEXT: sub w10, w2, w10 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: .LBB2_5: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp s2, s3, [x8] +; CHECK-NEXT: fcmp s2, s1 +; CHECK-NEXT: fcsel s4, s1, s2, gt +; CHECK-NEXT: fcmp s2, #0.0 +; CHECK-NEXT: fcsel s2, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcsel s4, s1, s3, gt +; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: ldr s3, [x8, #8] +; CHECK-NEXT: fcvtzs s2, s2 +; CHECK-NEXT: add x8, x8, #12 +; CHECK-NEXT: fcsel s4, s0, s4, mi +; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: str b2, [x9] +; CHECK-NEXT: fcsel s5, s1, s3, gt +; CHECK-NEXT: fcmp s3, #0.0 +; CHECK-NEXT: fcvtzs s4, s4 +; CHECK-NEXT: fcsel s3, s0, s5, mi +; CHECK-NEXT: subs w10, w10, #1 +; CHECK-NEXT: stur b4, [x9, #1] +; CHECK-NEXT: fcvtzs s3, s3 +; CHECK-NEXT: stur b3, [x9, #2] +; CHECK-NEXT: add x9, x9, #3 +; CHECK-NEXT: b.ne .LBB2_5 +; CHECK-NEXT: .LBB2_6: // %for.cond.cleanup +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_7: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: adrp x12, .LCPI2_0 @@ -343,7 +378,7 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: and x12, x11, #0x1fffffffc ; CHECK-NEXT: add x8, x1, x9, lsl #2 ; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: .LBB2_4: // %vector.body +; CHECK-NEXT: .LBB2_8: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 ; CHECK-NEXT: subs x12, x12, #4 @@ -370,48 +405,11 @@ define void @loop3(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: str d2, [x0] ; CHECK-NEXT: str s3, [x0, #8] ; CHECK-NEXT: add x0, x0, #12 -; CHECK-NEXT: b.ne .LBB2_4 -; CHECK-NEXT: // %bb.5: // %middle.block -; CHECK-NEXT: cmp x11, x10 -; CHECK-NEXT: b.ne .LBB2_7 -; CHECK-NEXT: b .LBB2_9 -; CHECK-NEXT: .LBB2_6: -; CHECK-NEXT: mov w10, wzr -; CHECK-NEXT: mov x8, x1 -; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB2_7: // %for.body.preheader1 -; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 -; CHECK-NEXT: sub w10, w2, w10 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: .LBB2_8: // %for.body -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8] -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: fcsel s4, s1, s2, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcsel s2, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcsel s4, s1, s3, gt -; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: ldr s3, [x8, #8] -; CHECK-NEXT: fcvtzs s2, s2 -; CHECK-NEXT: add x8, x8, #12 -; CHECK-NEXT: fcsel s4, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: str b2, [x9] -; CHECK-NEXT: fcsel s5, s1, s3, gt -; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fcvtzs s4, s4 -; CHECK-NEXT: fcsel s3, s0, s5, mi -; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: stur b4, [x9, #1] -; CHECK-NEXT: fcvtzs s3, s3 -; CHECK-NEXT: stur b3, [x9, #2] -; CHECK-NEXT: add x9, x9, #3 ; CHECK-NEXT: b.ne .LBB2_8 -; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup -; CHECK-NEXT: ret +; CHECK-NEXT: // %bb.9: // %middle.block +; CHECK-NEXT: cmp x11, x10 +; CHECK-NEXT: b.ne .LBB2_4 +; CHECK-NEXT: b .LBB2_6 entry: %cmp29 = icmp sgt i32 %width, 0 br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup @@ -530,30 +528,28 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-LABEL: loop4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w2, #1 -; CHECK-NEXT: b.lt .LBB3_7 +; CHECK-NEXT: b.lt .LBB3_6 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB3_4 +; CHECK-NEXT: b.ls .LBB3_3 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: ubfiz x9, x8, #2, #32 ; CHECK-NEXT: add x9, x9, #4 -; CHECK-NEXT: add x10, x1, x9, lsl #2 -; CHECK-NEXT: cmp x10, x0 -; CHECK-NEXT: b.ls .LBB3_8 -; CHECK-NEXT: // %bb.3: // %vector.memcheck -; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: b.ls .LBB3_8 -; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: add x10, x0, x9 +; CHECK-NEXT: add x9, x1, x9, lsl #2 +; CHECK-NEXT: cmp x10, x1 +; CHECK-NEXT: ccmp x9, x0, #0, hi +; CHECK-NEXT: b.ls .LBB3_7 +; CHECK-NEXT: .LBB3_3: ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB3_5: // %for.body.preheader1 +; CHECK-NEXT: .LBB3_4: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 ; CHECK-NEXT: sub w10, w2, w10 ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: .LBB3_6: // %for.body +; CHECK-NEXT: .LBB3_5: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp s2, s3, [x8] ; CHECK-NEXT: fcmp s2, s1 @@ -584,10 +580,10 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: fcvtzs s5, s5 ; CHECK-NEXT: stur b5, [x9, #3] ; CHECK-NEXT: add x9, x9, #4 -; CHECK-NEXT: b.ne .LBB3_6 -; CHECK-NEXT: .LBB3_7: // %for.cond.cleanup +; CHECK-NEXT: b.ne .LBB3_5 +; CHECK-NEXT: .LBB3_6: // %for.cond.cleanup ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB3_8: // %vector.ph +; CHECK-NEXT: .LBB3_7: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 ; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: adrp x12, .LCPI3_0 @@ -597,7 +593,7 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: add x8, x1, x10, lsl #4 ; CHECK-NEXT: add x9, x0, x10, lsl #2 ; CHECK-NEXT: and x12, x11, #0x1fffffffc -; CHECK-NEXT: .LBB3_9: // %vector.body +; CHECK-NEXT: .LBB3_8: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 ; CHECK-NEXT: subs x12, x12, #4 @@ -627,11 +623,11 @@ define void @loop4(ptr noalias nocapture noundef writeonly %dst, ptr nocapture n ; CHECK-NEXT: xtn v19.4h, v2.4s ; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: str q2, [x0], #16 -; CHECK-NEXT: b.ne .LBB3_9 -; CHECK-NEXT: // %bb.10: // %middle.block +; CHECK-NEXT: b.ne .LBB3_8 +; CHECK-NEXT: // %bb.9: // %middle.block ; CHECK-NEXT: cmp x11, x10 -; CHECK-NEXT: b.ne .LBB3_5 -; CHECK-NEXT: b .LBB3_7 +; CHECK-NEXT: b.ne .LBB3_4 +; CHECK-NEXT: b .LBB3_6 entry: %cmp39 = icmp sgt i32 %width, 0 br i1 %cmp39, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll index 5e51676056811..55954db64f8d6 100644 --- a/llvm/test/CodeGen/AArch64/tbz-tbnz.ll +++ b/llvm/test/CodeGen/AArch64/tbz-tbnz.ll @@ -199,20 +199,20 @@ if.end: define void @test8(i64 %val1, i64 %val2, i64 %val3) { ; CHECK-SD-LABEL: test8: ; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: tst x0, x1 -; CHECK-SD-NEXT: b.pl .LBB7_3 -; CHECK-SD-NEXT: // %bb.1: ; CHECK-SD-NEXT: and x8, x1, x2 -; CHECK-SD-NEXT: tbnz x8, #63, .LBB7_3 -; CHECK-SD-NEXT: // %bb.2: // %if.then2 +; CHECK-SD-NEXT: and x9, x0, x1 +; CHECK-SD-NEXT: cmn x8, #1 +; CHECK-SD-NEXT: ccmp x9, #0, #0, gt +; CHECK-SD-NEXT: b.pl .LBB7_2 +; CHECK-SD-NEXT: // %bb.1: // %if.then2 ; CHECK-SD-NEXT: tst x0, x1, lsl #63 -; CHECK-SD-NEXT: b.mi .LBB7_4 -; CHECK-SD-NEXT: .LBB7_3: // %if.end +; CHECK-SD-NEXT: b.mi .LBB7_3 +; CHECK-SD-NEXT: .LBB7_2: // %if.end ; CHECK-SD-NEXT: ret -; CHECK-SD-NEXT: .LBB7_4: // %if.then3 +; CHECK-SD-NEXT: .LBB7_3: // %if.then3 ; CHECK-SD-NEXT: tst x0, x1, lsl #62 -; CHECK-SD-NEXT: b.mi .LBB7_3 -; CHECK-SD-NEXT: // %bb.5: // %if.then4 +; CHECK-SD-NEXT: b.mi .LBB7_2 +; CHECK-SD-NEXT: // %bb.4: // %if.then4 ; CHECK-SD-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 ; CHECK-SD-NEXT: .cfi_offset w30, -16 diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll index ace0c83e63c7c..d7348ea154b24 100644 --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -75,18 +75,18 @@ define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 ; AARCH-NEXT: cmp x9, x11 ; AARCH-NEXT: ccmp x10, x11, #0, eq ; AARCH-NEXT: cset w9, ne -; AARCH-NEXT: tbnz x1, #63, .LBB1_3 -; AARCH-NEXT: b .LBB1_4 +; AARCH-NEXT: b .LBB1_3 ; AARCH-NEXT: .LBB1_2: // %overflow.no ; AARCH-NEXT: smulh x8, x0, x2 ; AARCH-NEXT: mov w9, wzr ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: tbz x1, #63, .LBB1_4 ; AARCH-NEXT: .LBB1_3: // %overflow.res ; AARCH-NEXT: eor x10, x3, #0x8000000000000000 ; AARCH-NEXT: orr x10, x2, x10 -; AARCH-NEXT: cbz x10, .LBB1_5 -; AARCH-NEXT: .LBB1_4: // %Else2 +; AARCH-NEXT: cmp x10, #0 +; AARCH-NEXT: ccmp x1, #0, #0, eq +; AARCH-NEXT: b.mi .LBB1_5 +; AARCH-NEXT: // %bb.4: // %Else2 ; AARCH-NEXT: cbz w9, .LBB1_6 ; AARCH-NEXT: .LBB1_5: // %Then7 ; AARCH-NEXT: mov w9, #1 // =0x1 diff --git a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll index 78c328deff697..f61ff75d7171f 100644 --- a/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll +++ b/llvm/test/DebugInfo/AArch64/instr-ref-target-hooks.ll @@ -1,7 +1,7 @@ ; Test to ensure that variable "__last" is properly recovered at the end of the livedebugvalues pass when Instruction Referencing-based LiveDebugValues is used. ; This testcase was obtained by looking at FileCheck.cpp and reducing it down via llvm-reduce. -; RUN: llc -mtriple=aarch64-apple-darwin -o - %s -stop-after=livedebugvalues -O2 -experimental-debug-variable-locations | FileCheck %s +; RUN: llc -mtriple=aarch64-apple-darwin -o - %s -stop-after=livedebugvalues -O2 -experimental-debug-variable-locations -aarch64-br-merging-base-cost=-1 | FileCheck %s ; CHECK: ![[LOC:[0-9]+]] = !DILocalVariable(name: "__last", ; CHECK: DBG_VALUE_LIST ![[LOC]], !DIExpression(DW_OP_LLVM_arg, 0, DW_OP_plus_uconst, 8, DW_OP_deref), $sp From e607f6adb169c345171f7d13b333a75faf1020b7 Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Wed, 24 Jun 2026 17:25:45 +0200 Subject: [PATCH 351/511] [SYCL][NewOffloadModel] xfail failing tests in Experimental/fp8 (#22373) --- sycl/test-e2e/Experimental/fp8/e4m3_cri_conversion.cpp | 3 +++ sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp | 4 ++++ sycl/test-e2e/Experimental/fp8/e5m2_cri_conversion.cpp | 4 ++++ sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp | 4 ++++ sycl/test-e2e/Experimental/fp8/e8m0_cri_conversion.cpp | 4 ++++ sycl/test-e2e/Experimental/fp8/e8m0_x2_cri_conversion.cpp | 3 +++ 6 files changed, 22 insertions(+) diff --git a/sycl/test-e2e/Experimental/fp8/e4m3_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e4m3_cri_conversion.cpp index 8ce29b3a28e52..88c74a4841b1e 100644 --- a/sycl/test-e2e/Experimental/fp8/e4m3_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e4m3_cri_conversion.cpp @@ -6,6 +6,9 @@ // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include diff --git a/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp index 8cdf307d03666..a4c1a6ccb9205 100644 --- a/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e4m3_x2_cri_conversion.cpp @@ -6,6 +6,10 @@ // UNSUPPORTED: target-nvidia, target-amd, spirv-backend // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions + +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include diff --git a/sycl/test-e2e/Experimental/fp8/e5m2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e5m2_cri_conversion.cpp index 99d8024dd2c47..8eec0f41dc5e3 100644 --- a/sycl/test-e2e/Experimental/fp8/e5m2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e5m2_cri_conversion.cpp @@ -5,6 +5,10 @@ // UNSUPPORTED: target-nvidia, target-amd, spirv-backend // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions + +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include diff --git a/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp index 1bbe45ae30357..edba7e2c6bfc7 100644 --- a/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e5m2_x2_cri_conversion.cpp @@ -5,6 +5,10 @@ // UNSUPPORTED: target-nvidia, target-amd, spirv-backend // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions + +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include diff --git a/sycl/test-e2e/Experimental/fp8/e8m0_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e8m0_cri_conversion.cpp index 9797eff2a8878..aa728a23efe1e 100644 --- a/sycl/test-e2e/Experimental/fp8/e8m0_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e8m0_cri_conversion.cpp @@ -6,6 +6,10 @@ // UNSUPPORTED: target-nvidia, target-amd, spirv-backend // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions + +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include diff --git a/sycl/test-e2e/Experimental/fp8/e8m0_x2_cri_conversion.cpp b/sycl/test-e2e/Experimental/fp8/e8m0_x2_cri_conversion.cpp index 1fa4e6e85c2f6..2f2f7bfac4bb3 100644 --- a/sycl/test-e2e/Experimental/fp8/e8m0_x2_cri_conversion.cpp +++ b/sycl/test-e2e/Experimental/fp8/e8m0_x2_cri_conversion.cpp @@ -7,6 +7,9 @@ // UNSUPPORTED-INTENDED: only supported by backends with CRI driver, and the // SPIR-V backend does not support the required SPIR-V extensions +// XFAIL: new-offload-model +// XFAIL-TRACKER: https://github.com/intel/llvm/issues/22372 + #include #include #include From 89b463bbe6354d7331496b3f2f34ce4be130653b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Wed, 24 Jun 2026 23:33:10 +0800 Subject: [PATCH 352/511] [llubi] Always print out error message (#205573) When `--verbose` is not specified, the error message and UB reason are omitted. However, this information is still useful for test oracles. For example, the fuzzer may skip the seed when it runs out of time. BTW, the stack trace is always dumped. Not sure if it is intended or not. --- .../tools/llubi/infinite_loop_error_only.ll | 14 ++++++++++ llvm/tools/llubi/llubi.cpp | 26 ++++++++++--------- 2 files changed, 28 insertions(+), 12 deletions(-) create mode 100644 llvm/test/tools/llubi/infinite_loop_error_only.ll diff --git a/llvm/test/tools/llubi/infinite_loop_error_only.ll b/llvm/test/tools/llubi/infinite_loop_error_only.ll new file mode 100644 index 0000000000000..01680e5783665 --- /dev/null +++ b/llvm/test/tools/llubi/infinite_loop_error_only.ll @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_llubi_test_checks.py UTC_ARGS: --version 6 +; RUN: not llubi --max-steps 10 < %s 2>&1 | FileCheck %s + +define void @main() { +entry: + br label %loop + +loop: + br label %loop +} +; CHECK: Stacktrace: +; CHECK-NEXT: #0 br label %loop at @main :9 +; CHECK-NEXT: Error: Exceeded maximum number of execution steps. +; CHECK-NEXT: error: Execution of function 'main' failed. diff --git a/llvm/tools/llubi/llubi.cpp b/llvm/tools/llubi/llubi.cpp index 56bd7f49eabd9..fd6d0fa807761 100644 --- a/llvm/tools/llubi/llubi.cpp +++ b/llvm/tools/llubi/llubi.cpp @@ -123,7 +123,19 @@ cl::opt NaNPropagationBehavior( "payloads.")), cl::init(ubi::NaNPropagationBehavior::NonDeterministic)); -class VerboseEventHandler : public ubi::EventHandler { +class NoopEventHandler : public ubi::EventHandler { + void onImmediateUB(StringRef Msg) override { + errs() << "Immediate UB detected: " << Msg << '\n'; + } + + void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; } + + void onUnrecognizedInstruction(Instruction &I) override { + errs() << "Unrecognized instruction: " << I << '\n'; + } +}; + +class VerboseEventHandler : public NoopEventHandler { public: bool onInstructionExecuted(Instruction &I, const ubi::AnyValue &Result) override { @@ -136,12 +148,6 @@ class VerboseEventHandler : public ubi::EventHandler { return true; } - void onImmediateUB(StringRef Msg) override { - errs() << "Immediate UB detected: " << Msg << '\n'; - } - - void onError(StringRef Msg) override { errs() << "Error: " << Msg << '\n'; } - bool onBBJump(Instruction &I, BasicBlock &To) override { errs() << I << " jump to "; To.printAsOperand(errs(), /*PrintType=*/false); @@ -186,10 +192,6 @@ class VerboseEventHandler : public ubi::EventHandler { llvm_unreachable("Unknown ProgramExitKind"); } - - void onUnrecognizedInstruction(Instruction &I) override { - errs() << "Unrecognized instruction: " << I << '\n'; - } }; int main(int argc, char **argv) { @@ -317,7 +319,7 @@ int main(int argc, char **argv) { Args.push_back(ubi::AnyValue::getNullValue(Ctx, Arg.getType())); } - ubi::EventHandler NoopHandler; + NoopEventHandler NoopHandler; VerboseEventHandler VerboseHandler; ubi::AnyValue RetVal; ubi::ProgramExitInfo ExitInfo = Ctx.runFunction( From cd96b2f3ac323f7bdba7ad63a69110f932754ede Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Wed, 24 Jun 2026 08:34:40 -0700 Subject: [PATCH 353/511] AMDGPU/GlobalISel: Implement G_GET/SET_ROUNDING (#205265) Implement G_GET/SET_ROUNDING for the llvm.get.rounding and llvm.set.rounding intrinsics. The lowering is ported from the existing SelectionDAG handling, keeping the structure close to the SDAG implementation. Assisted by: Claude Opus 4.8 --- .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 2 + .../Target/AMDGPU/AMDGPURegBankLegalize.cpp | 7 +- .../AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 140 +- .../AMDGPU/AMDGPURegBankLegalizeHelper.h | 6 +- .../AMDGPU/AMDGPURegBankLegalizeRules.cpp | 7 + .../AMDGPU/AMDGPURegBankLegalizeRules.h | 4 +- llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll | 110 +- llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll | 2615 +++++++++++------ 8 files changed, 1984 insertions(+), 907 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 57d7d42bb5b9c..3909c5b964fa5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -964,6 +964,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, getActionDefinitionsBuilder({G_GET_FPENV, G_SET_FPENV}).customFor({S64}); + getActionDefinitionsBuilder({G_GET_ROUNDING, G_SET_ROUNDING}).legalFor({S32}); + getActionDefinitionsBuilder(G_GLOBAL_VALUE) .customIf(typeIsNot(0, PrivatePtr)); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp index 28e97c759c133..5c4052f942011 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp @@ -23,6 +23,7 @@ #include "GCNSubtarget.h" #include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/Utils.h" @@ -63,6 +64,7 @@ class AMDGPURegBankLegalize : public MachineFunctionPass { AU.addRequired(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -80,6 +82,7 @@ INITIALIZE_PASS_BEGIN(AMDGPURegBankLegalize, DEBUG_TYPE, INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass) +INITIALIZE_PASS_DEPENDENCY(GISelValueTrackingAnalysisLegacy) INITIALIZE_PASS_END(AMDGPURegBankLegalize, DEBUG_TYPE, "AMDGPU Register Bank Legalize", false, false) @@ -439,12 +442,14 @@ bool AMDGPURegBankLegalize::runOnMachineFunction(MachineFunction &MF) { const RegisterBankInfo &RBI = *ST.getRegBankInfo(); const MachineUniformityInfo &MUI = getAnalysis().getUniformityInfo(); + GISelValueTracking &VT = + getAnalysis().get(MF); // RegBankLegalizeRules is initialized with assigning sets of IDs to opcodes. const RegBankLegalizeRules &RBLRules = getRules(ST, MRI); // Logic that does legalization based on IDs assigned to Opcode. - RegBankLegalizeHelper RBLHelper(B, MUI, RBI, RBLRules); + RegBankLegalizeHelper RBLHelper(B, MUI, &VT, RBI, RBLRules); SmallVector AllInst; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 6061854871e3b..8e4058227b96e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -20,6 +20,7 @@ #include "GCNSubtarget.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "SIMachineFunctionInfo.h" +#include "llvm/CodeGen/GlobalISel/GISelValueTracking.h" #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" @@ -34,10 +35,11 @@ using namespace AMDGPU; RegBankLegalizeHelper::RegBankLegalizeHelper( MachineIRBuilder &B, const MachineUniformityInfo &MUI, - const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules) + GISelValueTracking *VT, const RegisterBankInfo &RBI, + const RegBankLegalizeRules &RBLRules) : MF(B.getMF()), MFI(MF.getInfo()), ST(MF.getSubtarget()), TII(*ST.getInstrInfo()), B(B), - MRI(*B.getMRI()), MUI(MUI), RBI(RBI), MORE(MF, nullptr), + MRI(*B.getMRI()), MUI(MUI), VT(VT), RBI(RBI), MORE(MF, nullptr), RBLRules(RBLRules), IsWave32(ST.isWave32()), SgprRB(&RBI.getRegBank(AMDGPU::SGPRRegBankID)), VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)), @@ -1277,6 +1279,136 @@ bool RegBankLegalizeHelper::lowerAbsToS32(MachineInstr &MI) { return true; } +// Ported from SITargetLowering::lowerSET_ROUNDING in SIISelLowering.cpp. +// Keep the mapping logic and conversion tables aligned with the SDAG lowering. +bool RegBankLegalizeHelper::lowerSetRounding(MachineInstr &MI) { + Register NewMode = MI.getOperand(0).getReg(); + + // Index a table of 4-bit entries mapping from the C FLT_ROUNDS values to the + // hardware MODE.fp_round values. + if (auto ConstMode = getIConstantVRegValWithLookThrough(NewMode, MRI)) { + uint32_t ClampedVal = std::min( + static_cast(ConstMode->Value.getZExtValue()), + static_cast(AMDGPU::TowardZeroF32_TowardNegativeF64)); + uint32_t DecodedVal = AMDGPU::decodeFltRoundToHWConversionTable(ClampedVal); + NewMode = B.buildConstant(SgprRB_S32, DecodedVal).getReg(0); + } else { + // If we know the input can only be one of the supported standard modes in + // the range 0-3, we can use a simplified mapping to hardware values. + KnownBits Known = VT->getKnownBits(NewMode); + const bool UseReducedTable = Known.countMinLeadingZeros() >= 30; + // The supported standard values are 0-3. The extended values start at 8. We + // need to offset by 4 if the value is in the extended range. + + if (UseReducedTable) { + // Truncate to the low 32-bits. + auto BitTable = B.buildConstant( + SgprRB_S32, AMDGPU::FltRoundToHWConversionTable & 0xffff); + + auto Two = B.buildConstant(SgprRB_S32, 2); + auto RoundModeTimesNumBits = B.buildShl(SgprRB_S32, NewMode, Two); + + NewMode = + B.buildLShr(SgprRB_S32, BitTable, RoundModeTimesNumBits).getReg(0); + + // TODO: A demanded-bits simplification on the setreg source here could + // likely reduce the table extracted bits into inline immediates. + } else { + // table_index = umin(value, value - 4) + // MODE.fp_round = (bit_table >> (table_index << 2)) & 0xf + auto NegFour = B.buildConstant(SgprRB_S32, -4); + auto OffsetEnum = B.buildAdd(SgprRB_S32, NewMode, NegFour); + auto IndexVal = B.buildUMin(SgprRB_S32, NewMode, OffsetEnum); + + auto Two = B.buildConstant(SgprRB_S32, 2); + auto RoundModeTimesNumBits = B.buildShl(SgprRB_S32, IndexVal, Two); + + auto BitTable = + B.buildConstant({SgprRB, S64}, AMDGPU::FltRoundToHWConversionTable); + auto TableValue = + B.buildLShr({SgprRB, S64}, BitTable, RoundModeTimesNumBits); + // No need to mask out the high bits since the setreg will ignore them + // anyway. + NewMode = B.buildTrunc(SgprRB_S32, TableValue).getReg(0); + } + } + + // N.B. The setreg will be later folded into s_round_mode on supported + // targets. + uint32_t BothRoundHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4); + B.buildIntrinsic(Intrinsic::amdgcn_s_setreg, ArrayRef(), + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(static_cast(BothRoundHwReg)) + .addReg(NewMode); + + MI.eraseFromParent(); + return true; +} + +// Ported from SITargetLowering::lowerGET_ROUNDING in SIISelLowering.cpp. +// Keep the mapping logic and conversion tables aligned with the SDAG lowering. +bool RegBankLegalizeHelper::lowerGetRounding(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + + uint32_t BothRoundHwReg = + AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4); + auto GetReg = B.buildInstr(AMDGPU::S_GETREG_B32, {SgprRB_S32}, {}) + .addImm(BothRoundHwReg); + + // There are two rounding modes, one for f32 and one for f64/f16. We only + // report in the standard value range if both are the same. + // + // The raw values also differ from the expected FLT_ROUNDS values. Nearest + // ties away from zero is not supported, and the other values are rotated by + // 1. + // + // If the two rounding modes are not the same, report a target defined value. + + // Mode register rounding mode fields: + // + // [1:0] Single-precision round mode. + // [3:2] Double/Half-precision round mode. + // + // 0=nearest even; 1= +infinity; 2= -infinity, 3= toward zero. + // + // Hardware Spec + // Toward-0 3 0 + // Nearest Even 0 1 + // +Inf 1 2 + // -Inf 2 3 + // NearestAway0 N/A 4 + // + // We have to handle 16 permutations of a 4-bit value, so we create a 64-bit + // table we can index by the raw hardware mode. + // + // (trunc (FltRoundConversionTable >> MODE.fp_round)) & 0xf + auto BitTable = + B.buildConstant({SgprRB, S64}, AMDGPU::FltRoundConversionTable); + + auto Two = B.buildConstant(SgprRB_S32, 2); + auto RoundModeTimesNumBits = B.buildShl(SgprRB_S32, GetReg, Two); + + // TODO: We could possibly avoid a 64-bit shift and use a simpler table if we + // knew only one mode was demanded. + auto TableValue = B.buildLShr({SgprRB, S64}, BitTable, RoundModeTimesNumBits); + auto TruncTable = B.buildTrunc(SgprRB_S32, TableValue); + + auto EntryMask = B.buildConstant(SgprRB_S32, 0xf); + auto TableEntry = B.buildAnd(SgprRB_S32, TruncTable, EntryMask); + + // There's a gap in the 4-bit encoded table and actual enum values, so offset + // if it's an extended value. + auto Four = B.buildConstant(SgprRB_S32, 4); + auto EnumOffset = B.buildAdd(SgprRB_S32, TableEntry, Four); + auto IsStandardMode = + B.buildICmp(CmpInst::ICMP_ULT, SgprRB_S32, TableEntry, Four); + B.buildSelect(Dst, IsStandardMode, TableEntry, EnumOffset); + + MI.eraseFromParent(); + return true; +} + bool RegBankLegalizeHelper::lower(MachineInstr &MI, const RegBankLLTMapping &Mapping, WaterfallInfo &WFI) { @@ -1653,6 +1785,10 @@ bool RegBankLegalizeHelper::lower(MachineInstr &MI, case DeletePrefetch: MI.eraseFromParent(); return true; + case LowerSetRounding: + return lowerSetRounding(MI); + case LowerGetRounding: + return lowerGetRounding(MI); } return true; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index f4d5e1f57239b..25c8c3a0f6127 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -20,6 +20,7 @@ namespace llvm { class MachineIRBuilder; class SIInstrInfo; class SIMachineFunctionInfo; +class GISelValueTracking; namespace AMDGPU { @@ -46,6 +47,7 @@ class RegBankLegalizeHelper { MachineIRBuilder &B; MachineRegisterInfo &MRI; const MachineUniformityInfo &MUI; + GISelValueTracking *VT; const RegisterBankInfo &RBI; MachineOptimizationRemarkEmitter MORE; const RegBankLegalizeRules &RBLRules; @@ -95,7 +97,7 @@ class RegBankLegalizeHelper { public: RegBankLegalizeHelper(MachineIRBuilder &B, const MachineUniformityInfo &MUI, - const RegisterBankInfo &RBI, + GISelValueTracking *VT, const RegisterBankInfo &RBI, const RegBankLegalizeRules &RBLRules); bool findRuleAndApplyMapping(MachineInstr &MI); @@ -153,6 +155,8 @@ class RegBankLegalizeHelper { bool lowerInsVecEltTo32(MachineInstr &MI); bool lowerAbsToNegMax(MachineInstr &MI); bool lowerAbsToS32(MachineInstr &MI); + bool lowerSetRounding(MachineInstr &MI); + bool lowerGetRounding(MachineInstr &MI); bool applyRegisterBanksVgprWithSgprRsrc(MachineInstr &MI, unsigned RsrcIdx); }; diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index b0a8c1dd11e6e..f1a934c5b4184 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -1446,6 +1446,13 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, addRulesForGOpcs({G_READSTEADYCOUNTER, G_READCYCLECOUNTER}, Standard) .Uni(S64, {{Sgpr64}, {}}); + addRulesForGOpcs({G_GET_ROUNDING}, Standard) + .Uni(S32, {{Sgpr32}, {}, LowerGetRounding}); + + addRulesForGOpcs({G_SET_ROUNDING}, Standard) + .Uni(S32, {{}, {SgprB32_ReadFirstLane}, LowerSetRounding}) + .Div(S32, {{}, {SgprB32_ReadFirstLane}, LowerSetRounding}); + addRulesForGOpcs({G_BLOCK_ADDR}).Any({{UniP0}, {{SgprP0}, {}}}); addRulesForGOpcs({G_GLOBAL_VALUE}) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index e30de59e74394..16a5634a8eb62 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -347,7 +347,9 @@ enum LoweringMethodID { AbsToNegMax, AbsToS32, DynStackAlloc, - DeletePrefetch + DeletePrefetch, + LowerSetRounding, + LowerGetRounding }; enum FastRulesTypes { diff --git a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll index 71d5747f5eece..5f665c4fa32ff 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.get.rounding.ll @@ -1,43 +1,79 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s -; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX678-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX678-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX678-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX678-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX678-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX678-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX10 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11 %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11 %s declare i32 @llvm.get.rounding() define i32 @func_rounding() { -; GFX678-LABEL: func_rounding: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) -; GFX678-NEXT: s_lshl_b32 s6, s4, 2 -; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71 -; GFX678-NEXT: s_mov_b32 s5, 0xc96f385 -; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX678-NEXT: s_and_b32 s4, s4, 15 -; GFX678-NEXT: s_add_i32 s5, s4, 4 -; GFX678-NEXT: s_cmp_lt_u32 s4, 4 -; GFX678-NEXT: s_cselect_b32 s4, s4, s5 -; GFX678-NEXT: v_mov_b32_e32 v0, s4 -; GFX678-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: func_rounding: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) +; GFX678-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-SDAG-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX678-SDAG-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX678-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-SDAG-NEXT: s_and_b32 s4, s4, 15 +; GFX678-SDAG-NEXT: s_add_i32 s5, s4, 4 +; GFX678-SDAG-NEXT: s_cmp_lt_u32 s4, 4 +; GFX678-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; GFX678-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-LABEL: func_rounding: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) -; GFX9-NEXT: s_lshl_b32 s6, s4, 2 -; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71 -; GFX9-NEXT: s_mov_b32 s5, 0xc96f385 -; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX9-NEXT: s_and_b32 s4, s4, 15 -; GFX9-NEXT: s_add_i32 s5, s4, 4 -; GFX9-NEXT: s_cmp_lt_u32 s4, 4 -; GFX9-NEXT: s_cselect_b32 s4, s4, s5 -; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] +; GFX678-GISEL-LABEL: func_rounding: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX678-GISEL-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX678-GISEL-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX678-GISEL-NEXT: s_lshl_b32 s6, s6, 2 +; GFX678-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-GISEL-NEXT: s_and_b32 s4, s4, 15 +; GFX678-GISEL-NEXT: s_add_i32 s5, s4, 4 +; GFX678-GISEL-NEXT: s_cmp_lt_u32 s4, 4 +; GFX678-GISEL-NEXT: s_cselect_b32 s4, s4, s5 +; GFX678-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: func_rounding: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) +; GFX9-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX9-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_add_i32 s5, s4, 4 +; GFX9-SDAG-NEXT: s_cmp_lt_u32 s4, 4 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: func_rounding: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s6, 2 +; GFX9-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 15 +; GFX9-GISEL-NEXT: s_add_i32 s5, s4, 4 +; GFX9-GISEL-NEXT: s_cmp_lt_u32 s4, 4 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s5 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: func_rounding: ; GFX10: ; %bb.0: @@ -71,9 +107,3 @@ define i32 @func_rounding() { %rounding = call i32 @llvm.get.rounding() ret i32 %rounding } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} -; GFX1011: {{.*}} -; GFX6: {{.*}} -; GFX7: {{.*}} -; GFX8: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll index 5cbe3e72ce5f9..aadd16a22daf7 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.set.rounding.ll @@ -1,10 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX678,GFX6 %s -; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX678,GFX7 %s -; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX678,GFX8 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX10 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GCN,GFX1011,GFX11 %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX678,GFX6,GFX678-SDAG,GFX6-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GFX678,GFX6,GFX678-GISEL,GFX6-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX678,GFX7,GFX678-SDAG,GFX7-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=GFX678,GFX7,GFX678-GISEL,GFX7-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX678,GFX678-SDAG,GFX8-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=GFX678,GFX678-GISEL,GFX8-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX1011,GFX10,GFX10-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck -check-prefixes=GFX1011,GFX10,GFX10-GISEL %s +; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1011,GFX11,GFX11-SDAG %s +; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX1011,GFX11,GFX11-GISEL %s declare void @llvm.set.rounding(i32) declare i32 @llvm.get.rounding() @@ -34,29 +40,53 @@ define amdgpu_gfx void @s_set_rounding(i32 inreg %rounding) { ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: s_set_rounding: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_add_i32 s34, s4, -4 -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_min_u32 s36, s4, s34 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s0, s4, -4 -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_min_u32 s2, s4, s0 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: s_set_rounding: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_add_i32 s34, s4, -4 +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_min_u32 s36, s4, s34 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_add_i32 s34, s4, -4 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_min_u32 s34, s4, s34 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_add_i32 s0, s4, -4 +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_min_u32 s2, s4, s0 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_add_i32 s0, s4, -4 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_min_u32 s0, s4, s0 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 %rounding) ret void } @@ -92,20 +122,35 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX7-NEXT: s_endpgm ; -; GFX8-LABEL: s_set_rounding_kernel: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX8-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX8-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX8-NEXT: ;;#ASMSTART -; GFX8-NEXT: ;;#ASMEND -; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: s_add_i32 s3, s2, -4 -; GFX8-NEXT: s_min_u32 s2, s2, s3 -; GFX8-NEXT: s_lshl_b32 s2, s2, 2 -; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX8-NEXT: s_endpgm +; GFX8-SDAG-LABEL: s_set_rounding_kernel: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX8-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX8-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX8-SDAG-NEXT: ;;#ASMSTART +; GFX8-SDAG-NEXT: ;;#ASMEND +; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-SDAG-NEXT: s_add_i32 s3, s2, -4 +; GFX8-SDAG-NEXT: s_min_u32 s2, s2, s3 +; GFX8-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX8-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX8-SDAG-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: s_set_rounding_kernel: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX8-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX8-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX8-GISEL-NEXT: ;;#ASMSTART +; GFX8-GISEL-NEXT: ;;#ASMEND +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-GISEL-NEXT: s_add_i32 s3, s2, -4 +; GFX8-GISEL-NEXT: s_min_u32 s2, s2, s3 +; GFX8-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX8-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX8-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX8-GISEL-NEXT: s_endpgm ; ; GFX9-LABEL: s_set_rounding_kernel: ; GFX9: ; %bb.0: @@ -122,206 +167,372 @@ define amdgpu_kernel void @s_set_rounding_kernel(i32 inreg %rounding) { ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 ; GFX9-NEXT: s_endpgm ; -; GFX10-LABEL: s_set_rounding_kernel: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_load_dword s2, s[4:5], 0x24 -; GFX10-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX10-NEXT: ;;#ASMSTART -; GFX10-NEXT: ;;#ASMEND -; GFX10-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-NEXT: s_add_i32 s3, s2, -4 -; GFX10-NEXT: s_min_u32 s2, s2, s3 -; GFX10-NEXT: s_lshl_b32 s2, s2, 2 -; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX10-NEXT: s_endpgm -; -; GFX11-LABEL: s_set_rounding_kernel: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x24 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: ;;#ASMSTART -; GFX11-NEXT: ;;#ASMEND -; GFX11-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-NEXT: s_add_i32 s3, s2, -4 -; GFX11-NEXT: s_min_u32 s2, s2, s3 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_endpgm +; GFX10-SDAG-LABEL: s_set_rounding_kernel: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dword s2, s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX10-SDAG-NEXT: ;;#ASMSTART +; GFX10-SDAG-NEXT: ;;#ASMEND +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: s_add_i32 s3, s2, -4 +; GFX10-SDAG-NEXT: s_min_u32 s2, s2, s3 +; GFX10-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: s_set_rounding_kernel: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dword s0, s[4:5], 0x24 +; GFX10-GISEL-NEXT: ;;#ASMSTART +; GFX10-GISEL-NEXT: ;;#ASMEND +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX10-GISEL-NEXT: s_min_u32 s2, s0, s1 +; GFX10-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX10-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX10-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: s_set_rounding_kernel: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: ;;#ASMSTART +; GFX11-SDAG-NEXT: ;;#ASMEND +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: s_add_i32 s3, s2, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s2, s3 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: s_set_rounding_kernel: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b32 s0, s[4:5], 0x24 +; GFX11-GISEL-NEXT: ;;#ASMSTART +; GFX11-GISEL-NEXT: ;;#ASMEND +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s2, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_endpgm call void @llvm.set.rounding(i32 %rounding) call void asm sideeffect "",""() ret void } define void @v_set_rounding(i32 %rounding) { -; GFX6-LABEL: v_set_rounding: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: v_add_i32_e32 v1, vcc, -4, v0 -; GFX6-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX6-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 -; GFX6-NEXT: v_readfirstlane_b32 s4, v0 -; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-LABEL: v_set_rounding: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: v_add_i32_e32 v1, vcc, -4, v0 -; GFX7-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX7-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX7-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX7-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 -; GFX7-NEXT: v_readfirstlane_b32 s4, v0 -; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: v_set_rounding: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_add_u32_e32 v1, vcc, -4, v0 -; GFX8-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX8-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX8-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] -; GFX8-NEXT: v_readfirstlane_b32 s4, v0 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_set_rounding: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_add_u32_e32 v1, -4, v0 -; GFX9-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_set_rounding: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_add_nc_u32_e32 v1, -4, v0 -; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX10-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] -; GFX10-NEXT: v_readfirstlane_b32 s4, v0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_set_rounding: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_add_nc_u32_e32 v1, -4, v0 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: v_min_u32_e32 v0, v0, v1 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX6-SDAG-LABEL: v_set_rounding: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_add_i32_e32 v1, vcc, -4, v0 +; GFX6-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX6-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX6-SDAG-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 +; GFX6-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: v_set_rounding: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX678-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX678-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX678-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX678-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX678-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: v_set_rounding: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: v_add_i32_e32 v1, vcc, -4, v0 +; GFX7-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX7-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX7-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX7-SDAG-NEXT: v_lshr_b64 v[0:1], s[4:5], v0 +; GFX7-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX7-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: v_set_rounding: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_add_u32_e32 v1, vcc, -4, v0 +; GFX8-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX8-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX8-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX8-SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] +; GFX8-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_set_rounding: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_add_u32_e32 v1, -4, v0 +; GFX9-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX9-SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_set_rounding: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX9-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_set_rounding: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_add_nc_u32_e32 v1, -4, v0 +; GFX10-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, s[4:5] +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_set_rounding: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX10-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_set_rounding: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_add_nc_u32_e32 v1, -4, v0 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: v_min_u32_e32 v0, v0, v1 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-SDAG-NEXT: v_lshrrev_b64 v[0:1], v0, s[0:1] +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_set_rounding: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] call void @llvm.set.rounding(i32 %rounding) ret void } define void @set_rounding_get_rounding() { -; GFX678-LABEL: set_rounding_get_rounding: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) -; GFX678-NEXT: s_lshl_b32 s6, s4, 2 -; GFX678-NEXT: s_mov_b32 s4, 0xeb24da71 -; GFX678-NEXT: s_mov_b32 s5, 0xc96f385 -; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX678-NEXT: s_and_b32 s4, s4, 15 -; GFX678-NEXT: s_add_i32 s5, s4, 4 -; GFX678-NEXT: s_cmp_lt_u32 s4, 4 -; GFX678-NEXT: s_cselect_b32 s4, s4, s5 -; GFX678-NEXT: s_add_i32 s5, s4, -4 -; GFX678-NEXT: s_min_u32 s4, s4, s5 -; GFX678-NEXT: s_lshl_b32 s6, s4, 2 -; GFX678-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX678-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX678-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: set_rounding_get_rounding: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) -; GFX9-NEXT: s_lshl_b32 s6, s4, 2 -; GFX9-NEXT: s_mov_b32 s4, 0xeb24da71 -; GFX9-NEXT: s_mov_b32 s5, 0xc96f385 -; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX9-NEXT: s_and_b32 s4, s4, 15 -; GFX9-NEXT: s_add_i32 s5, s4, 4 -; GFX9-NEXT: s_cmp_lt_u32 s4, 4 -; GFX9-NEXT: s_cselect_b32 s4, s4, s5 -; GFX9-NEXT: s_add_i32 s5, s4, -4 -; GFX9-NEXT: s_min_u32 s4, s4, s5 -; GFX9-NEXT: s_lshl_b32 s6, s4, 2 -; GFX9-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: set_rounding_get_rounding: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) -; GFX10-NEXT: s_mov_b32 s4, 0xeb24da71 -; GFX10-NEXT: s_mov_b32 s5, 0xc96f385 -; GFX10-NEXT: s_lshl_b32 s6, s6, 2 -; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX10-NEXT: s_and_b32 s4, s4, 15 -; GFX10-NEXT: s_add_i32 s5, s4, 4 -; GFX10-NEXT: s_cmp_lt_u32 s4, 4 -; GFX10-NEXT: s_cselect_b32 s4, s4, s5 -; GFX10-NEXT: s_add_i32 s5, s4, -4 -; GFX10-NEXT: s_min_u32 s6, s4, s5 -; GFX10-NEXT: s_mov_b32 s4, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s5, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s6, s6, 2 -; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: set_rounding_get_rounding: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) -; GFX11-NEXT: s_mov_b32 s0, 0xeb24da71 -; GFX11-NEXT: s_mov_b32 s1, 0xc96f385 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_and_b32 s0, s0, 15 -; GFX11-NEXT: s_add_i32 s1, s0, 4 -; GFX11-NEXT: s_cmp_lt_u32 s0, 4 -; GFX11-NEXT: s_cselect_b32 s0, s0, s1 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: set_rounding_get_rounding: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) +; GFX678-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-SDAG-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX678-SDAG-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX678-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-SDAG-NEXT: s_and_b32 s4, s4, 15 +; GFX678-SDAG-NEXT: s_add_i32 s5, s4, 4 +; GFX678-SDAG-NEXT: s_cmp_lt_u32 s4, 4 +; GFX678-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; GFX678-SDAG-NEXT: s_add_i32 s5, s4, -4 +; GFX678-SDAG-NEXT: s_min_u32 s4, s4, s5 +; GFX678-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX678-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX678-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: set_rounding_get_rounding: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX678-GISEL-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX678-GISEL-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX678-GISEL-NEXT: s_lshl_b32 s6, s6, 2 +; GFX678-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-GISEL-NEXT: s_and_b32 s4, s4, 15 +; GFX678-GISEL-NEXT: s_add_i32 s5, s4, 4 +; GFX678-GISEL-NEXT: s_cmp_lt_u32 s4, 4 +; GFX678-GISEL-NEXT: s_cselect_b32 s4, s4, s5 +; GFX678-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX678-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX678-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX678-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX678-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: set_rounding_get_rounding: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_getreg_b32 s4, hwreg(HW_REG_MODE, 0, 4) +; GFX9-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX9-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-SDAG-NEXT: s_and_b32 s4, s4, 15 +; GFX9-SDAG-NEXT: s_add_i32 s5, s4, 4 +; GFX9-SDAG-NEXT: s_cmp_lt_u32 s4, 4 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; GFX9-SDAG-NEXT: s_add_i32 s5, s4, -4 +; GFX9-SDAG-NEXT: s_min_u32 s4, s4, s5 +; GFX9-SDAG-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX9-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: set_rounding_get_rounding: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s6, 2 +; GFX9-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 15 +; GFX9-GISEL-NEXT: s_add_i32 s5, s4, 4 +; GFX9-GISEL-NEXT: s_cmp_lt_u32 s4, 4 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX9-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: set_rounding_get_rounding: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX10-SDAG-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX10-SDAG-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX10-SDAG-NEXT: s_lshl_b32 s6, s6, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-SDAG-NEXT: s_and_b32 s4, s4, 15 +; GFX10-SDAG-NEXT: s_add_i32 s5, s4, 4 +; GFX10-SDAG-NEXT: s_cmp_lt_u32 s4, 4 +; GFX10-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; GFX10-SDAG-NEXT: s_add_i32 s5, s4, -4 +; GFX10-SDAG-NEXT: s_min_u32 s6, s4, s5 +; GFX10-SDAG-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s6, s6, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: set_rounding_get_rounding: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_getreg_b32 s6, hwreg(HW_REG_MODE, 0, 4) +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0xeb24da71 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xc96f385 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s6, 2 +; GFX10-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-GISEL-NEXT: s_and_b32 s4, s4, 15 +; GFX10-GISEL-NEXT: s_add_i32 s5, s4, 4 +; GFX10-GISEL-NEXT: s_cmp_lt_u32 s4, 4 +; GFX10-GISEL-NEXT: s_cselect_b32 s4, s4, s5 +; GFX10-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX10-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: set_rounding_get_rounding: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_and_b32 s0, s0, 15 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, 4 +; GFX11-SDAG-NEXT: s_cmp_lt_u32 s0, 4 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, s0, s1 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: set_rounding_get_rounding: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_and_b32 s0, s0, 15 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, 4 +; GFX11-GISEL-NEXT: s_cmp_lt_u32 s0, 4 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %rounding = call i32 @llvm.get.rounding() call void @llvm.set.rounding(i32 %rounding) ret void @@ -533,7 +744,6 @@ define void @s_set_rounding_neg1() { ; -------------------------------------------------------------------- ; Test extended values ; -------------------------------------------------------------------- - ; NearestTiesToEvenF32_TowardPositiveF64 = 8 define void @s_set_rounding_8() { ; GFX678-LABEL: s_set_rounding_8: @@ -718,7 +928,6 @@ define void @s_set_rounding_15() { ret void } - ; TowardNegativeF32_TowardZeroF64 = 16 define void @s_set_rounding_16() { ; GFX678-LABEL: s_set_rounding_16: @@ -860,7 +1069,6 @@ define void @s_set_rounding_0xffff() { ; Test optimization knowing the value can only be in the standard ; range ; -------------------------------------------------------------------- - define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) { ; GFX6-LABEL: s_set_rounding_i2_zeroext: ; GFX6: ; %bb.0: @@ -878,41 +1086,77 @@ define amdgpu_gfx void @s_set_rounding_i2_zeroext(i2 zeroext inreg %rounding) { ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: s_set_rounding_i2_zeroext: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX8-NEXT: s_lshl_b32 s34, s34, 2 -; GFX8-NEXT: s_lshr_b32 s34, 0xa50f, s34 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_i2_zeroext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX9-NEXT: s_lshl_b32 s34, s34, 2 -; GFX9-NEXT: s_lshr_b32 s34, 0xa50f, s34 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_i2_zeroext: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX10-NEXT: s_lshl_b32 s34, s34, 2 -; GFX10-NEXT: s_lshr_b32 s34, 0xa50f, s34 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_i2_zeroext: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 -; GFX11-NEXT: s_lshl_b32 s0, s0, 2 -; GFX11-NEXT: s_lshr_b32 s0, 0xa50f, s0 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX8-SDAG-LABEL: s_set_rounding_i2_zeroext: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX8-SDAG-NEXT: s_lshl_b32 s34, s34, 2 +; GFX8-SDAG-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: s_set_rounding_i2_zeroext: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_and_b32 s34, s4, 3 +; GFX8-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX8-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX8-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_i2_zeroext: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX9-SDAG-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-SDAG-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_i2_zeroext: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_and_b32 s34, s4, 3 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_i2_zeroext: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX10-SDAG-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-SDAG-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_i2_zeroext: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_and_b32 s34, s4, 3 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_i2_zeroext: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_and_b32 s0, 0xffff, s4 +; GFX11-SDAG-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-SDAG-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_i2_zeroext: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_and_b32 s0, s4, 3 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %zext.rounding = zext i2 %rounding to i32 call void @llvm.set.rounding(i32 %zext.rounding) ret void @@ -943,57 +1187,109 @@ define amdgpu_gfx void @s_set_rounding_i2_signext(i2 signext inreg %rounding) { ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: s_set_rounding_i2_signext: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_sext_i32_i16 s34, s4 -; GFX8-NEXT: s_add_i32 s35, s34, -4 -; GFX8-NEXT: s_min_u32 s34, s34, s35 -; GFX8-NEXT: s_lshl_b32 s36, s34, 2 -; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_i2_signext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_sext_i32_i16 s34, s4 -; GFX9-NEXT: s_add_i32 s35, s34, -4 -; GFX9-NEXT: s_min_u32 s34, s34, s35 -; GFX9-NEXT: s_lshl_b32 s36, s34, 2 -; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_i2_signext: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_sext_i32_i16 s34, s4 -; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s36, s34, s35 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_i2_signext: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_sext_i32_i16 s0, s4 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX8-SDAG-LABEL: s_set_rounding_i2_signext: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX8-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX8-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX8-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: s_set_rounding_i2_signext: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_bfe_i32 s34, s4, 0x20000 +; GFX8-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX8-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX8-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_i2_signext: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX9-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX9-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX9-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_i2_signext: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_bfe_i32 s34, s4, 0x20000 +; GFX9-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX9-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX9-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_i2_signext: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX10-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX10-SDAG-NEXT: s_min_u32 s36, s34, s35 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_i2_signext: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_bfe_i32 s34, s4, 0x20000 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_i2_signext: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_sext_i32_i16 s0, s4 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_i2_signext: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_bfe_i32 s0, s4, 0x20000 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %sext.rounding = sext i2 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void @@ -1024,57 +1320,109 @@ define amdgpu_gfx void @s_set_rounding_i3_signext(i3 signext inreg %rounding) { ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: s_set_rounding_i3_signext: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_sext_i32_i16 s34, s4 -; GFX8-NEXT: s_add_i32 s35, s34, -4 -; GFX8-NEXT: s_min_u32 s34, s34, s35 -; GFX8-NEXT: s_lshl_b32 s36, s34, 2 -; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_i3_signext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_sext_i32_i16 s34, s4 -; GFX9-NEXT: s_add_i32 s35, s34, -4 -; GFX9-NEXT: s_min_u32 s34, s34, s35 -; GFX9-NEXT: s_lshl_b32 s36, s34, 2 -; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_i3_signext: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_sext_i32_i16 s34, s4 -; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s36, s34, s35 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_i3_signext: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_sext_i32_i16 s0, s4 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX8-SDAG-LABEL: s_set_rounding_i3_signext: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX8-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX8-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX8-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: s_set_rounding_i3_signext: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_bfe_i32 s34, s4, 0x30000 +; GFX8-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX8-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX8-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_i3_signext: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX9-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX9-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX9-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_i3_signext: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_bfe_i32 s34, s4, 0x30000 +; GFX9-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX9-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX9-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_i3_signext: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_sext_i32_i16 s34, s4 +; GFX10-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX10-SDAG-NEXT: s_min_u32 s36, s34, s35 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_i3_signext: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_bfe_i32 s34, s4, 0x30000 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_i3_signext: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_sext_i32_i16 s0, s4 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_i3_signext: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_bfe_i32 s0, s4, 0x30000 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %sext.rounding = sext i3 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void @@ -1105,136 +1453,228 @@ define amdgpu_gfx void @s_set_rounding_i3_zeroext(i3 zeroext inreg %rounding) { ; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX7-NEXT: s_setpc_b64 s[30:31] ; -; GFX8-LABEL: s_set_rounding_i3_zeroext: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX8-NEXT: s_add_i32 s35, s34, -4 -; GFX8-NEXT: s_min_u32 s34, s34, s35 -; GFX8-NEXT: s_lshl_b32 s36, s34, 2 -; GFX8-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX8-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX8-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_i3_zeroext: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX9-NEXT: s_add_i32 s35, s34, -4 -; GFX9-NEXT: s_min_u32 s34, s34, s35 -; GFX9-NEXT: s_lshl_b32 s36, s34, 2 -; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_i3_zeroext: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_and_b32 s34, 0xffff, s4 -; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s36, s34, s35 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_i3_zeroext: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_and_b32 s0, 0xffff, s4 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX8-SDAG-LABEL: s_set_rounding_i3_zeroext: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX8-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX8-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX8-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: s_set_rounding_i3_zeroext: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_and_b32 s34, s4, 7 +; GFX8-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX8-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX8-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX8-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX8-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX8-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX8-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_i3_zeroext: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX9-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX9-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX9-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_i3_zeroext: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_and_b32 s34, s4, 7 +; GFX9-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX9-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX9-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_i3_zeroext: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_and_b32 s34, 0xffff, s4 +; GFX10-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX10-SDAG-NEXT: s_min_u32 s36, s34, s35 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_i3_zeroext: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_and_b32 s34, s4, 7 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_i3_zeroext: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_and_b32 s0, 0xffff, s4 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_i3_zeroext: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_and_b32 s0, s4, 7 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %sext.rounding = zext i3 %rounding to i32 call void @llvm.set.rounding(i32 %sext.rounding) ret void } define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) { -; GFX6-LABEL: s_set_rounding_select_0_1: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX6-NEXT: s_cmp_lg_u32 s4, 0 -; GFX6-NEXT: s_cselect_b64 s[34:35], -1, 0 -; GFX6-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] -; GFX6-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX6-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 -; GFX6-NEXT: v_readfirstlane_b32 s34, v0 -; GFX6-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX6-NEXT: s_setpc_b64 s[30:31] -; -; GFX7-LABEL: s_set_rounding_select_0_1: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX7-NEXT: s_cmp_lg_u32 s4, 0 -; GFX7-NEXT: s_cselect_b64 s[34:35], -1, 0 -; GFX7-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX7-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 -; GFX7-NEXT: v_readfirstlane_b32 s34, v0 -; GFX7-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX7-NEXT: s_setpc_b64 s[30:31] -; -; GFX8-LABEL: s_set_rounding_select_0_1: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_cmp_lg_u32 s4, 0 -; GFX8-NEXT: s_cselect_b64 s[34:35], -1, 0 -; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX8-NEXT: s_mov_b32 s34, 0xa50f -; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s34 -; GFX8-NEXT: v_readfirstlane_b32 s34, v0 -; GFX8-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX8-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_0_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_lg_u32 s4, 0 -; GFX9-NEXT: s_cselect_b64 s[34:35], -1, 0 -; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] -; GFX9-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX9-NEXT: s_mov_b32 s34, 0xa50f -; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s34 -; GFX9-NEXT: v_readfirstlane_b32 s34, v0 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_0_1: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_lg_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s34, -1, 0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 -; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f -; GFX10-NEXT: v_readfirstlane_b32 s34, v0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_0_1: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_lg_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s0, -1, 0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 -; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX6-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX6-SDAG-NEXT: s_cselect_b64 s[34:35], -1, 0 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] +; GFX6-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX6-SDAG-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 +; GFX6-SDAG-NEXT: v_readfirstlane_b32 s34, v0 +; GFX6-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_0_1: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX7-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX7-SDAG-NEXT: s_cselect_b64 s[34:35], -1, 0 +; GFX7-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] +; GFX7-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX7-SDAG-NEXT: v_lshr_b32_e32 v0, 0xa50f, v0 +; GFX7-SDAG-NEXT: v_readfirstlane_b32 s34, v0 +; GFX7-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX7-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX8-SDAG-NEXT: s_cselect_b64 s[34:35], -1, 0 +; GFX8-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] +; GFX8-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX8-SDAG-NEXT: s_mov_b32 s34, 0xa50f +; GFX8-SDAG-NEXT: v_lshrrev_b32_e64 v0, v0, s34 +; GFX8-SDAG-NEXT: v_readfirstlane_b32 s34, v0 +; GFX8-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-SDAG-NEXT: s_cselect_b64 s[34:35], -1, 0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[34:35] +; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX9-SDAG-NEXT: s_mov_b32 s34, 0xa50f +; GFX9-SDAG-NEXT: v_lshrrev_b32_e64 v0, v0, s34 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s34, v0 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_0_1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, -1, 0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s34 +; GFX10-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX10-SDAG-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s34, v0 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_0_1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_0_1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, -1, 0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 +; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 +; GFX11-SDAG-NEXT: v_lshrrev_b32_e64 v0, v0, 0xa50f +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_0_1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 0, i32 1 call void @llvm.set.rounding(i32 %rounding) @@ -1242,37 +1682,77 @@ define amdgpu_gfx void @s_set_rounding_select_0_1(i32 inreg %cond) { } define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_1_3: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_cselect_b32 s34, 0xa50, 10 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_1_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_cselect_b32 s34, 0xa50, 10 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_1_3: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s34, 0xa50, 10 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_1_3: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s0, 0xa50, 10 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_1_3: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, 0xa50, 10 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_1_3: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 1, 3 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_1_3: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, 0xa50, 10 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_1_3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 1, 3 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_1_3: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, 0xa50, 10 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_1_3: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 1, 3 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_1_3: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, 0xa50, 10 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_1_3: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 3 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 3 call void @llvm.set.rounding(i32 %rounding) @@ -1280,43 +1760,103 @@ define amdgpu_gfx void @s_set_rounding_select_1_3(i32 inreg %cond) { } define void @v_set_rounding_select_1_3(i32 %cond) { -; GFX678-LABEL: v_set_rounding_select_1_3: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: v_mov_b32_e32 v1, 0xa50 -; GFX678-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX678-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc -; GFX678-NEXT: v_readfirstlane_b32 s4, v0 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: v_set_rounding_select_1_3: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v1, 0xa50 -; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 -; GFX9-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc -; GFX9-NEXT: v_readfirstlane_b32 s4, v0 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: v_set_rounding_select_1_3: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX10-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo -; GFX10-NEXT: v_readfirstlane_b32 s4, v0 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: v_set_rounding_select_1_3: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: v_set_rounding_select_1_3: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: v_mov_b32_e32 v1, 0xa50 +; GFX678-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX678-SDAG-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc +; GFX678-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: v_set_rounding_select_1_3: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX678-GISEL-NEXT: v_cndmask_b32_e64 v0, 3, 1, vcc +; GFX678-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX678-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX678-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX678-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX678-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX678-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX678-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: v_set_rounding_select_1_3: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0xa50 +; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: v_cndmask_b32_e32 v0, 10, v1, vcc +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: v_set_rounding_select_1_3: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 +; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 3, 1, vcc +; GFX9-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX9-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX9-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: v_set_rounding_select_1_3: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo +; GFX10-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: v_set_rounding_select_1_3: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 3, 1, vcc_lo +; GFX10-GISEL-NEXT: v_readfirstlane_b32 s4, v0 +; GFX10-GISEL-NEXT: s_add_i32 s5, s4, -4 +; GFX10-GISEL-NEXT: s_min_u32 s4, s4, s5 +; GFX10-GISEL-NEXT: s_mov_b32 s5, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s4, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s4, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s4 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: v_set_rounding_select_1_3: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 10, 0xa50, vcc_lo +; GFX11-SDAG-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: v_set_rounding_select_1_3: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 +; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 3, 1, vcc_lo +; GFX11-GISEL-NEXT: v_readfirstlane_b32 s0, v0 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 3 call void @llvm.set.rounding(i32 %rounding) @@ -1324,41 +1864,85 @@ define void @v_set_rounding_select_1_3(i32 %cond) { } define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_2_0: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_movk_i32 s34, 0xa5 -; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50f -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_2_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_movk_i32 s34, 0xa5 -; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50f -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_2_0: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_movk_i32 s34, 0xa5 -; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50f -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_2_0: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_movk_i32 s0, 0xa5 -; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50f -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_2_0: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50f +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_2_0: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 1 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_2_0: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50f +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_2_0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 1 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_2_0: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50f +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_2_0: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 1 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_2_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0xa5 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, s0, 0xa50f +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_2_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 1 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 2, i32 0 call void @llvm.set.rounding(i32 %rounding) @@ -1366,41 +1950,85 @@ define amdgpu_gfx void @s_set_rounding_select_2_0(i32 inreg %cond) { } define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_2_1: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_movk_i32 s34, 0xa5 -; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa50 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_2_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_movk_i32 s34, 0xa5 -; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa50 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_2_1: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_movk_i32 s34, 0xa5 -; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa50 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_2_1: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_movk_i32 s0, 0xa5 -; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa50 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_2_1: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_2_1: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX678-GISEL-NEXT: s_add_i32 s34, s34, 1 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_2_1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_2_1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX9-GISEL-NEXT: s_add_i32 s34, s34, 1 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_2_1: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_movk_i32 s34, 0xa5 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa50 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_2_1: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX10-GISEL-NEXT: s_add_i32 s34, s34, 1 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_2_1: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0xa5 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, s0, 0xa50 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_2_1: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-GISEL-NEXT: s_add_i32 s0, s0, 1 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 2, i32 1 call void @llvm.set.rounding(i32 %rounding) @@ -1408,41 +2036,101 @@ define amdgpu_gfx void @s_set_rounding_select_2_1(i32 inreg %cond) { } define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_1_2: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_movk_i32 s34, 0xa50 -; GFX678-NEXT: s_cselect_b32 s34, s34, 0xa5 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_1_2: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_movk_i32 s34, 0xa50 -; GFX9-NEXT: s_cselect_b32 s34, s34, 0xa5 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_1_2: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_movk_i32 s34, 0xa50 -; GFX10-NEXT: s_cselect_b32 s34, s34, 0xa5 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_1_2: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_movk_i32 s0, 0xa50 -; GFX11-NEXT: s_cselect_b32 s0, s0, 0xa5 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_1_2: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_movk_i32 s34, 0xa50 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa5 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_1_2: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, -1, 0 +; GFX678-GISEL-NEXT: s_add_i32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX678-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX678-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX678-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX678-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX678-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_1_2: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_movk_i32 s34, 0xa50 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa5 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_1_2: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, -1, 0 +; GFX9-GISEL-NEXT: s_add_i32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX9-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX9-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_1_2: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_movk_i32 s34, 0xa50 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, s34, 0xa5 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_1_2: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, -1, 0 +; GFX10-GISEL-NEXT: s_add_i32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_1_2: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_movk_i32 s0, 0xa50 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, s0, 0xa5 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_1_2: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, -1, 0 +; GFX11-GISEL-NEXT: s_add_i32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 1, i32 2 call void @llvm.set.rounding(i32 %rounding) @@ -1450,37 +2138,77 @@ define amdgpu_gfx void @s_set_rounding_select_1_2(i32 inreg %cond) { } define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_3_0: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_cselect_b32 s34, 10, 0xa50f -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_3_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_cselect_b32 s34, 10, 0xa50f -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_3_0: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s34, 10, 0xa50f -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_3_0: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s0, 10, 0xa50f -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_3_0: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, 10, 0xa50f +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_3_0: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 3, 0 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_3_0: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, 10, 0xa50f +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_3_0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 3, 0 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_3_0: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, 10, 0xa50f +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_3_0: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 3, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_lshr_b32 s34, 0xa50f, s34 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_3_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, 10, 0xa50f +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_3_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 3, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_lshr_b32 s0, 0xa50f, s0 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 3, i32 0 call void @llvm.set.rounding(i32 %rounding) @@ -1488,61 +2216,121 @@ define amdgpu_gfx void @s_set_rounding_select_3_0(i32 inreg %cond) { } define amdgpu_gfx void @s_set_rounding_select_4_0(i32 inreg %cond) { -; GFX678-LABEL: s_set_rounding_select_4_0: -; GFX678: ; %bb.0: -; GFX678-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX678-NEXT: s_cmp_eq_u32 s4, 0 -; GFX678-NEXT: s_cselect_b32 s34, 4, 0 -; GFX678-NEXT: s_add_i32 s35, s34, -4 -; GFX678-NEXT: s_min_u32 s34, s34, s35 -; GFX678-NEXT: s_lshl_b32 s36, s34, 2 -; GFX678-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX678-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX678-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX678-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX678-NEXT: s_setpc_b64 s[30:31] -; -; GFX9-LABEL: s_set_rounding_select_4_0: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: s_cmp_eq_u32 s4, 0 -; GFX9-NEXT: s_cselect_b32 s34, 4, 0 -; GFX9-NEXT: s_add_i32 s35, s34, -4 -; GFX9-NEXT: s_min_u32 s34, s34, s35 -; GFX9-NEXT: s_lshl_b32 s36, s34, 2 -; GFX9-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX9-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX9-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX9-NEXT: s_setpc_b64 s[30:31] -; -; GFX10-LABEL: s_set_rounding_select_4_0: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s34, 4, 0 -; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s36, s34, s35 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_4_0: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s0, 4, 0 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX678-SDAG-LABEL: s_set_rounding_select_4_0: +; GFX678-SDAG: ; %bb.0: +; GFX678-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-SDAG-NEXT: s_cselect_b32 s34, 4, 0 +; GFX678-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX678-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX678-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX678-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX678-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX678-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX678-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX678-GISEL-LABEL: s_set_rounding_select_4_0: +; GFX678-GISEL: ; %bb.0: +; GFX678-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX678-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX678-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX678-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX678-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX678-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX678-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX678-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX678-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX678-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX678-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX678-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: s_set_rounding_select_4_0: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s34, 4, 0 +; GFX9-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX9-SDAG-NEXT: s_min_u32 s34, s34, s35 +; GFX9-SDAG-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: s_set_rounding_select_4_0: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX9-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX9-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX9-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX9-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX9-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX9-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX9-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX9-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-SDAG-LABEL: s_set_rounding_select_4_0: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, 4, 0 +; GFX10-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX10-SDAG-NEXT: s_min_u32 s36, s34, s35 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_4_0: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 1, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s34, s34, 2 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_4_0: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, 4, 0 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_4_0: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s0, s0, 2 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 4, i32 0 call void @llvm.set.rounding(i32 %rounding) @@ -1578,33 +2366,61 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { ; GFX9-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-LABEL: s_set_rounding_select_3_5: -; GFX10: ; %bb.0: -; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: s_cmp_eq_u32 s4, 0 -; GFX10-NEXT: s_cselect_b32 s34, 3, 5 -; GFX10-NEXT: s_add_i32 s35, s34, -4 -; GFX10-NEXT: s_min_u32 s36, s34, s35 -; GFX10-NEXT: s_mov_b32 s34, 0x1c84a50f -; GFX10-NEXT: s_mov_b32 s35, 0xb73e62d9 -; GFX10-NEXT: s_lshl_b32 s36, s36, 2 -; GFX10-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 -; GFX10-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 -; GFX10-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-LABEL: s_set_rounding_select_3_5: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_cmp_eq_u32 s4, 0 -; GFX11-NEXT: s_cselect_b32 s0, 3, 5 -; GFX11-NEXT: s_add_i32 s1, s0, -4 -; GFX11-NEXT: s_min_u32 s2, s0, s1 -; GFX11-NEXT: s_mov_b32 s0, 0x1c84a50f -; GFX11-NEXT: s_mov_b32 s1, 0xb73e62d9 -; GFX11-NEXT: s_lshl_b32 s2, s2, 2 -; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX11-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX10-SDAG-LABEL: s_set_rounding_select_3_5: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-SDAG-NEXT: s_cselect_b32 s34, 3, 5 +; GFX10-SDAG-NEXT: s_add_i32 s35, s34, -4 +; GFX10-SDAG-NEXT: s_min_u32 s36, s34, s35 +; GFX10-SDAG-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-SDAG-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-SDAG-NEXT: s_lshl_b32 s36, s36, 2 +; GFX10-SDAG-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-GISEL-LABEL: s_set_rounding_select_3_5: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s34, 3, 5 +; GFX10-GISEL-NEXT: s_add_i32 s35, s34, -4 +; GFX10-GISEL-NEXT: s_min_u32 s34, s34, s35 +; GFX10-GISEL-NEXT: s_mov_b32 s35, 0xb73e62d9 +; GFX10-GISEL-NEXT: s_lshl_b32 s36, s34, 2 +; GFX10-GISEL-NEXT: s_mov_b32 s34, 0x1c84a50f +; GFX10-GISEL-NEXT: s_lshr_b64 s[34:35], s[34:35], s36 +; GFX10-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s34 +; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-SDAG-LABEL: s_set_rounding_select_3_5: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-SDAG-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-SDAG-NEXT: s_cselect_b32 s0, 3, 5 +; GFX11-SDAG-NEXT: s_add_i32 s1, s0, -4 +; GFX11-SDAG-NEXT: s_min_u32 s2, s0, s1 +; GFX11-SDAG-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-SDAG-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-SDAG-NEXT: s_lshl_b32 s2, s2, 2 +; GFX11-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-SDAG-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-GISEL-LABEL: s_set_rounding_select_3_5: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s4, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s0, 3, 5 +; GFX11-GISEL-NEXT: s_add_i32 s1, s0, -4 +; GFX11-GISEL-NEXT: s_min_u32 s0, s0, s1 +; GFX11-GISEL-NEXT: s_mov_b32 s1, 0xb73e62d9 +; GFX11-GISEL-NEXT: s_lshl_b32 s2, s0, 2 +; GFX11-GISEL-NEXT: s_mov_b32 s0, 0x1c84a50f +; GFX11-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX11-GISEL-NEXT: s_setreg_b32 hwreg(HW_REG_MODE, 0, 4), s0 +; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31] %cmp = icmp eq i32 %cond, 0 %rounding = select i1 %cmp, i32 3, i32 5 call void @llvm.set.rounding(i32 %rounding) @@ -1612,83 +2428,161 @@ define amdgpu_gfx void @s_set_rounding_select_3_5(i32 inreg %cond) { } define amdgpu_kernel void @get_rounding_after_set_rounding_1() { -; GFX6-LABEL: get_rounding_after_set_rounding_1: -; GFX6: ; %bb.0: -; GFX6-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX6-NEXT: s_mov_b32 s1, 0xc96f385 -; GFX6-NEXT: s_mov_b32 s3, 0xf000 -; GFX6-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX6-NEXT: s_lshl_b32 s2, s0, 2 -; GFX6-NEXT: s_mov_b32 s0, 0xeb24da71 -; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX6-NEXT: s_and_b32 s0, s0, 15 -; GFX6-NEXT: s_add_i32 s1, s0, 4 -; GFX6-NEXT: s_cmp_lt_u32 s0, 4 -; GFX6-NEXT: s_cselect_b32 s4, s0, s1 -; GFX6-NEXT: s_mov_b64 s[0:1], 0 -; GFX6-NEXT: s_mov_b32 s2, -1 -; GFX6-NEXT: v_mov_b32_e32 v0, s4 -; GFX6-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; GFX6-NEXT: s_waitcnt vmcnt(0) -; GFX6-NEXT: s_endpgm -; -; GFX7-LABEL: get_rounding_after_set_rounding_1: -; GFX7: ; %bb.0: -; GFX7-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX7-NEXT: s_mov_b32 s1, 0xc96f385 -; GFX7-NEXT: s_mov_b32 s3, 0xf000 -; GFX7-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX7-NEXT: s_lshl_b32 s2, s0, 2 -; GFX7-NEXT: s_mov_b32 s0, 0xeb24da71 -; GFX7-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX7-NEXT: s_and_b32 s0, s0, 15 -; GFX7-NEXT: s_add_i32 s1, s0, 4 -; GFX7-NEXT: s_cmp_lt_u32 s0, 4 -; GFX7-NEXT: s_cselect_b32 s4, s0, s1 -; GFX7-NEXT: s_mov_b64 s[0:1], 0 -; GFX7-NEXT: s_mov_b32 s2, -1 -; GFX7-NEXT: v_mov_b32_e32 v0, s4 -; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: s_endpgm -; -; GFX8-LABEL: get_rounding_after_set_rounding_1: -; GFX8: ; %bb.0: -; GFX8-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX8-NEXT: s_mov_b32 s1, 0xc96f385 -; GFX8-NEXT: v_mov_b32_e32 v0, 0 -; GFX8-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX8-NEXT: s_lshl_b32 s2, s0, 2 -; GFX8-NEXT: s_mov_b32 s0, 0xeb24da71 -; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX8-NEXT: s_and_b32 s0, s0, 15 -; GFX8-NEXT: s_add_i32 s1, s0, 4 -; GFX8-NEXT: s_cmp_lt_u32 s0, 4 -; GFX8-NEXT: s_cselect_b32 s0, s0, s1 -; GFX8-NEXT: v_mov_b32_e32 v1, 0 -; GFX8-NEXT: v_mov_b32_e32 v2, s0 -; GFX8-NEXT: flat_store_dword v[0:1], v2 -; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: s_endpgm -; -; GFX9-LABEL: get_rounding_after_set_rounding_1: -; GFX9: ; %bb.0: -; GFX9-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 -; GFX9-NEXT: s_mov_b32 s1, 0xc96f385 -; GFX9-NEXT: v_mov_b32_e32 v0, 0 -; GFX9-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) -; GFX9-NEXT: s_lshl_b32 s2, s0, 2 -; GFX9-NEXT: s_mov_b32 s0, 0xeb24da71 -; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 -; GFX9-NEXT: s_and_b32 s0, s0, 15 -; GFX9-NEXT: s_add_i32 s1, s0, 4 -; GFX9-NEXT: s_cmp_lt_u32 s0, 4 -; GFX9-NEXT: s_cselect_b32 s0, s0, s1 -; GFX9-NEXT: v_mov_b32_e32 v1, 0 -; GFX9-NEXT: v_mov_b32_e32 v2, s0 -; GFX9-NEXT: global_store_dword v[0:1], v2, off -; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: s_endpgm +; GFX6-SDAG-LABEL: get_rounding_after_set_rounding_1: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX6-SDAG-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX6-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) +; GFX6-SDAG-NEXT: s_lshl_b32 s2, s0, 2 +; GFX6-SDAG-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX6-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX6-SDAG-NEXT: s_and_b32 s0, s0, 15 +; GFX6-SDAG-NEXT: s_add_i32 s1, s0, 4 +; GFX6-SDAG-NEXT: s_cmp_lt_u32 s0, 4 +; GFX6-SDAG-NEXT: s_cselect_b32 s4, s0, s1 +; GFX6-SDAG-NEXT: s_mov_b64 s[0:1], 0 +; GFX6-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX6-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX6-SDAG-NEXT: s_endpgm +; +; GFX6-GISEL-LABEL: get_rounding_after_set_rounding_1: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX6-GISEL-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX6-GISEL-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX6-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX6-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX6-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX6-GISEL-NEXT: s_and_b32 s0, s0, 15 +; GFX6-GISEL-NEXT: s_add_i32 s1, s0, 4 +; GFX6-GISEL-NEXT: s_cmp_lt_u32 s0, 4 +; GFX6-GISEL-NEXT: s_cselect_b32 s2, s0, s1 +; GFX6-GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX6-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX6-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX6-GISEL-NEXT: s_endpgm +; +; GFX7-SDAG-LABEL: get_rounding_after_set_rounding_1: +; GFX7-SDAG: ; %bb.0: +; GFX7-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX7-SDAG-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX7-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) +; GFX7-SDAG-NEXT: s_lshl_b32 s2, s0, 2 +; GFX7-SDAG-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX7-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX7-SDAG-NEXT: s_and_b32 s0, s0, 15 +; GFX7-SDAG-NEXT: s_add_i32 s1, s0, 4 +; GFX7-SDAG-NEXT: s_cmp_lt_u32 s0, 4 +; GFX7-SDAG-NEXT: s_cselect_b32 s4, s0, s1 +; GFX7-SDAG-NEXT: s_mov_b64 s[0:1], 0 +; GFX7-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX7-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX7-SDAG-NEXT: s_endpgm +; +; GFX7-GISEL-LABEL: get_rounding_after_set_rounding_1: +; GFX7-GISEL: ; %bb.0: +; GFX7-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX7-GISEL-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX7-GISEL-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX7-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX7-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX7-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX7-GISEL-NEXT: s_and_b32 s0, s0, 15 +; GFX7-GISEL-NEXT: s_add_i32 s1, s0, 4 +; GFX7-GISEL-NEXT: s_cmp_lt_u32 s0, 4 +; GFX7-GISEL-NEXT: s_cselect_b32 s2, s0, s1 +; GFX7-GISEL-NEXT: s_mov_b64 s[0:1], 0 +; GFX7-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX7-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX7-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX7-GISEL-NEXT: s_endpgm +; +; GFX8-SDAG-LABEL: get_rounding_after_set_rounding_1: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX8-SDAG-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) +; GFX8-SDAG-NEXT: s_lshl_b32 s2, s0, 2 +; GFX8-SDAG-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX8-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX8-SDAG-NEXT: s_and_b32 s0, s0, 15 +; GFX8-SDAG-NEXT: s_add_i32 s1, s0, 4 +; GFX8-SDAG-NEXT: s_cmp_lt_u32 s0, 4 +; GFX8-SDAG-NEXT: s_cselect_b32 s0, s0, s1 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-SDAG-NEXT: flat_store_dword v[0:1], v2 +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX8-SDAG-NEXT: s_endpgm +; +; GFX8-GISEL-LABEL: get_rounding_after_set_rounding_1: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX8-GISEL-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX8-GISEL-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX8-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX8-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX8-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX8-GISEL-NEXT: s_and_b32 s0, s0, 15 +; GFX8-GISEL-NEXT: s_add_i32 s1, s0, 4 +; GFX8-GISEL-NEXT: s_cmp_lt_u32 s0, 4 +; GFX8-GISEL-NEXT: s_cselect_b32 s0, s0, s1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-GISEL-NEXT: flat_store_dword v[0:1], v2 +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX8-GISEL-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: get_rounding_after_set_rounding_1: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX9-SDAG-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-SDAG-NEXT: s_getreg_b32 s0, hwreg(HW_REG_MODE, 0, 4) +; GFX9-SDAG-NEXT: s_lshl_b32 s2, s0, 2 +; GFX9-SDAG-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX9-SDAG-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX9-SDAG-NEXT: s_and_b32 s0, s0, 15 +; GFX9-SDAG-NEXT: s_add_i32 s1, s0, 4 +; GFX9-SDAG-NEXT: s_cmp_lt_u32 s0, 4 +; GFX9-SDAG-NEXT: s_cselect_b32 s0, s0, s1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-SDAG-NEXT: global_store_dword v[0:1], v2, off +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: get_rounding_after_set_rounding_1: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 4), 0 +; GFX9-GISEL-NEXT: s_mov_b32 s0, 0xeb24da71 +; GFX9-GISEL-NEXT: s_mov_b32 s1, 0xc96f385 +; GFX9-GISEL-NEXT: s_getreg_b32 s2, hwreg(HW_REG_MODE, 0, 4) +; GFX9-GISEL-NEXT: s_lshl_b32 s2, s2, 2 +; GFX9-GISEL-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 +; GFX9-GISEL-NEXT: s_and_b32 s0, s0, 15 +; GFX9-GISEL-NEXT: s_add_i32 s1, s0, 4 +; GFX9-GISEL-NEXT: s_cmp_lt_u32 s0, 4 +; GFX9-GISEL-NEXT: s_cselect_b32 s0, s0, s1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s0 +; GFX9-GISEL-NEXT: global_store_dword v[0:1], v2, off +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) +; GFX9-GISEL-NEXT: s_endpgm ; ; GFX10-LABEL: get_rounding_after_set_rounding_1: ; GFX10: ; %bb.0: @@ -1731,6 +2625,3 @@ define amdgpu_kernel void @get_rounding_after_set_rounding_1() { store volatile i32 %set.mode, ptr addrspace(1) null ret void } - -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; GCN: {{.*}} From d5e6cb2ed0a31ad85018d8eef613cca0d4be3a4f Mon Sep 17 00:00:00 2001 From: Zeyi Xu Date: Wed, 24 Jun 2026 23:40:17 +0800 Subject: [PATCH 354/511] [LifetimeSafety] Cache lifetimebound macro lookup (#205250) Cache lifetimebound macro spelling lookup used by fix-it suggestions. Current cache strategy: - During cache build, collect macro names that have ever been defined as a lifetimebound attribute spelling. - During lookup, only visit those cached macro names, find the active definition at the fix-it location, and re-check that the active definition still has lifetimebound spelling. - If multiple matching macros are active at the fix-it location, use the most recently defined one. Performance: | Case | Before 359bfe6 | 359bfe6| After Cached | |-------------------------------------------------------------|----------------:|---------------:|-------------:| | C++ no diagnostics (8k macros, 5k safe functions) | 42.5 ms | 44.2 ms | 47.0 ms | | C++ 10 diagnostics, 8k macros | 23.2 ms | 23.4 ms | 17.5 ms | | C++ 200 diagnostics, macro passed via command line option | 24.8 ms | 24.6 ms | 29.3 ms | | C++ 200 diagnostics, 8k macros | 26.3 ms | 41.7 ms | 27.9 ms | | C++ stress: 1000 diagnostics, 20k macros, no matching macro (simulating worst case scenario) | 40.8 ms | 386.7 ms | 78.9 ms | AI Usage: Performance experiment assisted by Codex. Follow up of https://github.com/llvm/llvm-project/pull/204045 --- clang/lib/Sema/SemaLifetimeSafety.h | 91 +++++++++++++++++-- .../annotation-suggestions-fixits.cpp | 33 +++++++ 2 files changed, 116 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaLifetimeSafety.h b/clang/lib/Sema/SemaLifetimeSafety.h index 1047aecf863fb..7c78a6a1e8c31 100644 --- a/clang/lib/Sema/SemaLifetimeSafety.h +++ b/clang/lib/Sema/SemaLifetimeSafety.h @@ -424,6 +424,76 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { } private: + struct LifetimeBoundMacroCache { + bool IsBuilt = false; + SmallVector Candidates; + }; + + void buildLifetimeBoundMacroCache(LifetimeBoundMacroCache &Cache, + ArrayRef Tokens) { + if (Cache.IsBuilt) + return; + + const Preprocessor &PP = S.getPreprocessor(); + // Collect macro names that were ever defined as a lifetimebound attribute. + for (const auto &M : PP.macros()) { + const IdentifierInfo *II = M.first; + const MacroDirective *MD = PP.getLocalMacroDirectiveHistory(II); + if (!MD) + continue; + + // Include earlier matching definitions to handle redefinitions. + for (MacroDirective::DefInfo Def = MD->getDefinition(); Def; + Def = Def.getPreviousDefinition()) { + const MacroInfo *MI = Def.getMacroInfo(); + if (MI->isObjectLike() && Tokens.size() == MI->getNumTokens() && + std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin())) { + Cache.Candidates.push_back(II); + break; + } + } + } + Cache.IsBuilt = true; + } + + StringRef getLastCachedMacroWithSpelling(SourceLocation Loc, + llvm::ArrayRef Tokens, + LifetimeBoundMacroCache &Cache) { + if (Loc.isInvalid()) + return {}; + + buildLifetimeBoundMacroCache(Cache, Tokens); + + const Preprocessor &PP = S.getPreprocessor(); + const SourceManager &SM = S.getSourceManager(); + SourceLocation BestLocation; + StringRef BestSpelling; + for (const IdentifierInfo *II : Cache.Candidates) { + const MacroDirective *MD = PP.getLocalMacroDirectiveHistory(II); + const MacroDirective::DefInfo Def = MD->findDirectiveAtLoc(Loc, SM); + if (!Def || !Def.getMacroInfo()) + continue; + + // Ensure the macro definition active at Loc still has this spelling. + const MacroInfo *MI = Def.getMacroInfo(); + if (!MI->isObjectLike() || Tokens.size() != MI->getNumTokens() || + !std::equal(Tokens.begin(), Tokens.end(), MI->tokens_begin())) + continue; + + // Choose the matching macro defined latest before Loc. + SourceLocation Location = Def.getLocation(); + assert(Location.isInvalid() || + SM.isBeforeInTranslationUnit(Location, Loc)); + if (BestLocation.isInvalid() || + (Location.isValid() && + SM.isBeforeInTranslationUnit(BestLocation, Location))) { + BestLocation = Location; + BestSpelling = II->getName(); + } + } + return BestSpelling; + } + void reportInvalidationSite(const Expr *InvalidationExpr, StringRef InvalidatedSubject) { auto Diag = isa(InvalidationExpr) @@ -438,16 +508,19 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { StringRef Spelling = S.getLangOpts().LifetimeSafetyLifetimeBoundMacro; if (Spelling.empty() && Loc.isValid()) { const Preprocessor &PP = S.getPreprocessor(); - Spelling = PP.getLastMacroWithSpelling( - Loc, {tok::l_square, tok::l_square, PP.getIdentifierInfo("clang"), - tok::coloncolon, PP.getIdentifierInfo("lifetimebound"), - tok::r_square, tok::r_square}); + Spelling = getLastCachedMacroWithSpelling( + Loc, + {tok::l_square, tok::l_square, PP.getIdentifierInfo("clang"), + tok::coloncolon, PP.getIdentifierInfo("lifetimebound"), + tok::r_square, tok::r_square}, + ClangLifetimeBoundMacroCache); if (Spelling.empty() && AllowGNUAttrMacro) - Spelling = PP.getLastMacroWithSpelling( - Loc, {tok::kw___attribute, tok::l_paren, tok::l_paren, - PP.getIdentifierInfo("lifetimebound"), tok::r_paren, - tok::r_paren}); + Spelling = getLastCachedMacroWithSpelling( + Loc, + {tok::kw___attribute, tok::l_paren, tok::l_paren, + PP.getIdentifierInfo("lifetimebound"), tok::r_paren, tok::r_paren}, + GNULifetimeBoundMacroCache); } const std::string Text = Spelling.empty() ? "[[clang::lifetimebound]]" : Spelling.str(); @@ -580,6 +653,8 @@ class LifetimeSafetySemaHelperImpl : public LifetimeSafetySemaHelper { } } + LifetimeBoundMacroCache ClangLifetimeBoundMacroCache; + LifetimeBoundMacroCache GNULifetimeBoundMacroCache; Sema &S; }; diff --git a/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp b/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp index 99f0d16cd8e68..e2fb9dd5df40a 100644 --- a/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp +++ b/clang/test/Sema/LifetimeSafety/annotation-suggestions-fixits.cpp @@ -239,6 +239,39 @@ View return_view_with_latest_macro(View a) { return a; } +#define REDEFINED_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] + +View return_view_with_redefined_macro(View a) { + // CHECK: :[[@LINE-1]]:39: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:45-[[@LINE-2]]:45}:" REDEFINED_LIFETIMEBOUND_MACRO" + return a; +} + +#undef REDEFINED_LIFETIMEBOUND_MACRO +#define REDEFINED_LIFETIMEBOUND_MACRO [[maybe_unused]] + +View return_view_after_redefined_macro(View a) { + // CHECK: :[[@LINE-1]]:40: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:46-[[@LINE-2]]:46}:" SECOND_LIFETIMEBOUND_MACRO" + return a; +} + +#define UNDEFINED_LIFETIMEBOUND_MACRO [[clang::lifetimebound]] + +View return_view_with_undefined_macro(View a) { + // CHECK: :[[@LINE-1]]:39: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:45-[[@LINE-2]]:45}:" UNDEFINED_LIFETIMEBOUND_MACRO" + return a; +} + +#undef UNDEFINED_LIFETIMEBOUND_MACRO + +View return_view_after_undefined_macro(View a) { + // CHECK: :[[@LINE-1]]:40: warning: parameter in intra-TU function should be marked + // CHECK: fix-it:"{{.*}}":{[[@LINE-2]]:46-[[@LINE-2]]:46}:" SECOND_LIFETIMEBOUND_MACRO" + return a; +} + struct MacroMember { MyObj data; From c80e7c6c994c7cb6cc59fbc991c66650e29a4f02 Mon Sep 17 00:00:00 2001 From: Cheng Lingfei <53817093+clingfei@users.noreply.github.com> Date: Wed, 24 Jun 2026 23:45:18 +0800 Subject: [PATCH 355/511] [AArch64] Recombine SETCCCARRY for legalized unsigned compares (#204504) Type legalization can turn wide unsigned compares into SETCCCARRY nodes fed by USUBO carry results, hiding the original high/low compare shape from the existing CCMP conjunction/disjunction lowering. Add an AArch64 DAG combine for SETCCCARRY that recognizes these legalized wide-compare patterns and rebuilds them as SETCC plus AND/OR, exposing them to the existing CCMP lowering. This is separated from https://github.com/llvm/llvm-project/pull/181822. --- .../Target/AArch64/AArch64ISelLowering.cpp | 37 ++++++++++++++ llvm/test/CodeGen/AArch64/i128-cmp.ll | 49 +++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 366237249a2df..85c83cf759305 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1204,6 +1204,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // Try and combine setcc/select_cc with csel and bool-vector bitcasts. setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); + setTargetDAGCombine(ISD::SETCCCARRY); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -28222,6 +28223,40 @@ static SDValue performSETCCCombine(SDNode *N, return SDValue(); } +static SDValue performSETCCCARRYCombine(SDNode *N, SelectionDAG &DAG) { + assert(N->getOpcode() == ISD::SETCCCARRY && "Unexpected opcode!"); + + // Rebuild narrow high/low compares from type-legalized wide unsigned compares + // so the existing CCMP conjunction/disjunction lowering can handle them. + SDValue HiLHS = N->getOperand(0); + SDValue HiRHS = N->getOperand(1); + SDValue Carry = N->getOperand(2); + ISD::CondCode Cond = cast(N->getOperand(3))->get(); + if (Cond != ISD::SETULT || Carry.getOpcode() != ISD::USUBO || + Carry.getResNo() != 1) + return SDValue(); + + if (!isNullConstant(HiLHS) && !isNullConstant(HiRHS)) + return SDValue(); + + SDValue LoLHS = Carry.getOperand(0); + SDValue LoRHS = Carry.getOperand(1); + if (!isa(LoLHS) && !isa(LoRHS)) + return SDValue(); + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + SDValue LoCmp = DAG.getSetCC(DL, VT, LoLHS, LoRHS, ISD::SETULT); + if (isNullConstant(HiRHS)) { + SDValue HiEq = DAG.getSetCC(DL, VT, HiLHS, HiRHS, ISD::SETEQ); + return DAG.getNode(ISD::AND, DL, VT, HiEq, LoCmp); + } + + SDValue HiNe = DAG.getSetCC(DL, VT, HiRHS, HiLHS, ISD::SETNE); + return DAG.getNode(ISD::OR, DL, VT, HiNe, LoCmp); +} + static SDValue performSELECT_CCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -30154,6 +30189,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, return performVSelectCombine(N, DCI, Subtarget); case ISD::SETCC: return performSETCCCombine(N, DCI, DAG); + case ISD::SETCCCARRY: + return performSETCCCARRYCombine(N, DAG); case ISD::LOAD: return performLOADCombine(N, DCI, DAG, Subtarget); case ISD::STORE: diff --git a/llvm/test/CodeGen/AArch64/i128-cmp.ll b/llvm/test/CodeGen/AArch64/i128-cmp.ll index 9c23b3a603d10..c878cc5f1e6a6 100644 --- a/llvm/test/CodeGen/AArch64/i128-cmp.ll +++ b/llvm/test/CodeGen/AArch64/i128-cmp.ll @@ -323,3 +323,52 @@ exit: ret void } +define i1 @cmp_i128_ult_small_const(i128 %a) { +; CHECK-LABEL: cmp_i128_ult_small_const: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x0, #7, #2, eq +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %cmp = icmp ult i128 %a, 7 + ret i1 %cmp +} + +define i1 @cmp_i128_small_const_ult(i128 %a) { +; CHECK-LABEL: cmp_i128_small_const_ult: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x0, #7, #2, eq +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret + %cmp = icmp ult i128 7, %a + ret i1 %cmp +} + +define i1 @cmp_i128_ult_small_const_and_i64(i128 %a, i64 %b) { +; CHECK-LABEL: cmp_i128_ult_small_const_and_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x0, #7 +; CHECK-NEXT: ccmp x2, #11, #2, lo +; CHECK-NEXT: ccmp x1, #0, #0, lo +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %wide = icmp ult i128 %a, 7 + %narrow = icmp ult i64 %b, 11 + %cmp = and i1 %wide, %narrow + ret i1 %cmp +} + +define i1 @cmp_i128_small_const_ult_or_i64(i128 %a, i64 %b) { +; CHECK-LABEL: cmp_i128_small_const_ult_or_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp x1, #0 +; CHECK-NEXT: ccmp x0, #7, #2, eq +; CHECK-NEXT: ccmp x2, #11, #0, ls +; CHECK-NEXT: cset w0, lo +; CHECK-NEXT: ret + %wide = icmp ult i128 7, %a + %narrow = icmp ult i64 %b, 11 + %cmp = or i1 %wide, %narrow + ret i1 %cmp +} From e7c8f3ded0d947f65c7f1ffc9c6687d011e0b7a7 Mon Sep 17 00:00:00 2001 From: Federico Bruzzone Date: Wed, 24 Jun 2026 17:47:41 +0200 Subject: [PATCH 356/511] [mlir][vector] reject negative strides for `vector.load`/`vector.store` (#204611) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR follows up #204309 and #204309. It simply rejects negative strides for vector.load/vector.store :D AI Disclaimer: I used AI for the tests. --------- Signed-off-by: Federico Bruzzone Co-authored-by: Andrzej Warzyński --- .../mlir/Dialect/MemRef/Utils/MemRefUtils.h | 4 ++++ .../mlir/Dialect/Vector/IR/VectorOps.td | 6 ++++++ .../VectorToLLVM/ConvertVectorToLLVM.cpp | 21 +++++++------------ mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp | 7 +++++++ mlir/lib/Dialect/Vector/IR/CMakeLists.txt | 1 + mlir/lib/Dialect/Vector/IR/VectorOps.cpp | 9 ++++++++ mlir/test/Dialect/Vector/invalid.mlir | 18 ++++++++++++++++ 7 files changed, 52 insertions(+), 14 deletions(-) diff --git a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h index 613d567de2457..c93da6ebada7c 100644 --- a/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h +++ b/mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h @@ -194,6 +194,10 @@ LogicalResult resolveSourceIndicesRankReducingSubview( Location loc, OpBuilder &b, memref::SubViewOp subViewOp, ValueRange indices, SmallVectorImpl &sourceIndices); +/// Returns true if any stride of `memRefTy` is statically known to be +/// negative. +bool hasNegativeStaticStride(MemRefType memRefTy); + } // namespace memref } // namespace mlir diff --git a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td index 7578ce78a0f00..24442a6336090 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td +++ b/mlir/include/mlir/Dialect/Vector/IR/VectorOps.td @@ -1717,6 +1717,9 @@ def Vector_LoadOp : Vector_Op<"load", [ %result = vector.load %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> ``` + The memref must have non-negative strides. Negative strides are not supported + and will trigger a verification error. + Representation-wise, the 'vector.load' operation permits out-of-bounds reads. Support and implementation of out-of-bounds vector loads is target-specific. No assumptions should be made on the value of elements @@ -1835,6 +1838,9 @@ def Vector_StoreOp : Vector_Op<"store", [ vector.store %valueToStore, %memref[%i, %j] : memref<200x100xvector<4x8xf32>>, vector<4x8xf32> ``` + The memref must have non-negative strides. Negative strides are not supported + and will trigger a verification error. + Representation-wise, the 'vector.store' operation permits out-of-bounds writes. Support and implementation of out-of-bounds vector stores are target-specific. No assumptions should be made on the memory written out of diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp index 8e9d37648841a..e186df33bb7a4 100644 --- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp +++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp @@ -18,6 +18,7 @@ #include "mlir/Dialect/LLVMIR/FunctionCallUtils.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" #include "mlir/Dialect/Vector/IR/VectorOps.h" #include "mlir/Dialect/Vector/Interfaces/MaskableOpInterface.h" #include "mlir/Dialect/Vector/Transforms/LoweringPatterns.h" @@ -223,16 +224,6 @@ static void replaceLoadOrStoreOp(vector::MaskedStoreOp storeOp, storeOp, adaptor.getValueToStore(), ptr, adaptor.getMask(), align); } -/// Returns true if all strides of `memRefTy` are static and non-negative. A -/// negative (or dynamic, hence unknown-sign) stride would make `mul nuw` on the -/// index arithmetic wrap, so `nuw` must not be emitted in that case. -static bool hasNonNegativeStrides(MemRefType memRefTy) { - auto [strides, offset] = memRefTy.getStridesAndOffset(); - return llvm::all_of(strides, [](int64_t stride) { - return !ShapedType::isDynamic(stride) && stride >= 0; - }); -} - /// Conversion pattern for a vector.load, vector.store, vector.maskedload, and /// vector.maskedstore. template @@ -283,10 +274,12 @@ class VectorLoadStoreConversion : public ConvertOpToLLVMPattern { "vector.load/store requires unit trailing memref stride"); if (enableGEPInboundsNuw) { noWrapFlags = noWrapFlags | LLVM::GEPNoWrapFlags::inbounds; - // `nuw` additionally requires non-negative strides; skip it when the - // memref has dynamic or negative strides to avoid emitting poison. - if (hasNonNegativeStrides(memRefTy)) - noWrapFlags = noWrapFlags | LLVM::GEPNoWrapFlags::nuw; + + // `nuw` additionally requires non-negative strides. + assert( + !(memref::hasNegativeStaticStride(memRefTy)) && + "Invalid MemRef type - should have been rejected by Op verifier."); + noWrapFlags = noWrapFlags | LLVM::GEPNoWrapFlags::nuw; } } auto vtype = cast( diff --git a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp index 2aea597dd5b90..bc019d601dcd9 100644 --- a/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp +++ b/mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp @@ -344,5 +344,12 @@ LogicalResult resolveSourceIndicesRankReducingSubview( return success(); } +bool hasNegativeStaticStride(MemRefType memRefTy) { + auto [strides, offset] = memRefTy.getStridesAndOffset(); + return llvm::any_of(strides, [](int64_t stride) { + return ShapedType::isStatic(stride) && stride < 0; + }); +} + } // namespace memref } // namespace mlir diff --git a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt index 0248896e096a0..f45618cda8ece 100644 --- a/mlir/lib/Dialect/Vector/IR/CMakeLists.txt +++ b/mlir/lib/Dialect/Vector/IR/CMakeLists.txt @@ -24,6 +24,7 @@ add_mlir_dialect_library(MLIRVectorDialect MLIRMaskableOpInterface MLIRMaskingOpInterface MLIRMemRefDialect + MLIRMemRefUtils MLIRSideEffectInterfaces MLIRTensorDialect MLIRUBDialect diff --git a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp index 67c31730f4b65..81ffabca6ecf0 100644 --- a/mlir/lib/Dialect/Vector/IR/VectorOps.cpp +++ b/mlir/lib/Dialect/Vector/IR/VectorOps.cpp @@ -20,6 +20,7 @@ #include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" #include "mlir/Dialect/MemRef/IR/MemoryAccessOpInterfaces.h" +#include "mlir/Dialect/MemRef/Utils/MemRefUtils.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/Dialect/UB/IR/UBMatchers.h" #include "mlir/Dialect/Utils/IndexingUtils.h" @@ -6194,6 +6195,10 @@ LogicalResult vector::LoadOp::verify() { if (failed(verifyLoadStoreMemRefLayout(*this, resVecTy, memRefTy))) return failure(); + // Negative strides are not supported on vector.load. + if (memref::hasNegativeStaticStride(memRefTy)) + return emitOpError("memref strides must be non-negative"); + if (memRefTy.getRank() < resVecTy.getRank()) return emitOpError( "destination memref has lower rank than the result vector"); @@ -6240,6 +6245,10 @@ LogicalResult vector::StoreOp::verify() { if (failed(verifyLoadStoreMemRefLayout(*this, valueVecTy, memRefTy))) return failure(); + // Negative strides are not supported on vector.store. + if (memref::hasNegativeStaticStride(memRefTy)) + return emitOpError("memref strides must be non-negative"); + if (memRefTy.getRank() < valueVecTy.getRank()) return emitOpError("source memref has lower rank than the vector to store"); diff --git a/mlir/test/Dialect/Vector/invalid.mlir b/mlir/test/Dialect/Vector/invalid.mlir index 2fed3002596a3..403581e338a6f 100644 --- a/mlir/test/Dialect/Vector/invalid.mlir +++ b/mlir/test/Dialect/Vector/invalid.mlir @@ -2145,6 +2145,15 @@ func.func @store_non_unit_stride(%src : memref>,%va // ----- +func.func @store_negative_stride(%src: memref<100x100xf32, strided<[-100, 1]>>, %val: vector<4xf32>) { + // expected-error @+2 {{'vector.store' op memref strides must be non-negative}} + %c0 = arith.constant 0 : index + vector.store %val, %src[%c0, %c0] : memref<100x100xf32, strided<[-100, 1]>>, vector<4xf32> + return +} + +// ----- + // Verify that vector.bitcast rejects vectors with i0 (zero-bitwidth) element type. func.func @bitcast_i0(%a: vector<4xi0>) -> vector<4xi0> { // expected-error @+1 {{'vector.bitcast' op operand #0 must be vector of non-zero-bitwidth type values, but got 'vector<4xi0>'}} @@ -2198,3 +2207,12 @@ func.func @scan_i0(%a: vector<4xi0>, %init: vector<1xi0>) -> (vector<4xi0>, vect vector<4xi0>, vector<1xi0> return %0#0, %0#1 : vector<4xi0>, vector<1xi0> } + +// ----- + +func.func @load_negative_stride(%src: memref<100x100xf32, strided<[-100, 1]>>) -> vector<8xf32> { + // expected-error @+2 {{'vector.load' op memref strides must be non-negative}} + %c0 = arith.constant 0 : index + %v = vector.load %src[%c0, %c0] : memref<100x100xf32, strided<[-100, 1]>>, vector<8xf32> + return %v : vector<8xf32> +} From 985fdfe48802b7faef8e7e213f8e899dd507aecc Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Wed, 24 Jun 2026 16:55:25 +0100 Subject: [PATCH 357/511] [Dexter] Add ability to rewrite scripts to fill-in unknown values (#202799) This patch adds a feature to Dexter that allows scripts to be passed to Dexter with missing expected values (`null` values in YAML), which Dexter will attempt to "fill-in" with expected values that match the debugger's actual output. The result is written to a file with the same name as the original test file, in the directory given by --results-directory if one is present; all content outside of the Dexter script itself is preserved exactly as-is. NB: Each test in this patch has a corresponding "expected" file, which is almost identical (including the `RUN` lines), and exists to be `diff`'d against the output of Dexter's script generation. --- .../dexter/dex/evaluation/ExpectRewriter.py | 212 ++++++++++++++++++ .../dexter/dex/test_script/Script.py | 26 +++ .../dexter/dex/tools/ToolBase.py | 3 +- .../dexter/dex/tools/test/Tool.py | 40 +++- .../Inputs/rewrite_expect_list_expected.cpp | 130 +++++++++++ .../Inputs/rewrite_expects_expected.cpp | 53 +++++ .../rewrite_multiple_scripts_expected.cpp | 48 ++++ .../scripts/rewriting/Inputs/simple_prog.cpp | 7 + .../rewriting/Inputs/whole_file_test.dex | 2 + .../Inputs/whole_file_test_expected.dex | 6 + .../scripts/rewriting/rewrite_expect_list.cpp | 44 ++++ .../scripts/rewriting/rewrite_expects.cpp | 54 +++++ .../rewriting/rewrite_multiple_scripts.cpp | 48 ++++ .../scripts/rewriting/whole_file.test | 24 ++ 14 files changed, 690 insertions(+), 7 deletions(-) create mode 100644 cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp create mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py new file mode 100644 index 0000000000000..7519d96a02a6d --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py @@ -0,0 +1,212 @@ +# DExTer : Debugging Experience Tester +# ~~~~~~ ~ ~~ ~ ~~ +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +"""Utilities for using debugger output to generate expected values that match that output.""" + +from collections import Counter, OrderedDict, defaultdict +from copy import deepcopy +from enum import Enum, IntEnum +from typing import Any, Dict, List, Optional, Set, Tuple, Union + +from dex.dextIR import DextIR, StepIR, ValueIR +from dex.evaluation.StateMatch import get_active_where_matches +from dex.test_script.Nodes import Expect, Then, Value, Where +from dex.test_script.Script import DexterScript, Scope +from dex.tools.Main import Context + + +class ExpectedValueRewriter: + """Given a ValueIR for an Expect, generates a complete expected value that matches that value if one can be + provided.""" + + def __init__(self, expect: Expect, value: ValueIR): + self.expect = expect + self.root_value = value + self.expected_value = expect.get_variable_result(value) + + +def unique_expected_values(elements: List[ExpectedValueRewriter]): + """Given a list of ExpectedValueRewriters, and returns either a list containing the unique set of non-None expected + values, or a single item if there is only one non-duplicated expected value in the list, or None if there are no + valid expected values.""" + + unique_set = set() + result = [] + for element in elements: + expected_value = element.expected_value + if expected_value is None: + continue + if expected_value not in unique_set: + unique_set.add(expected_value) + result.append(expected_value) + if not result: + return None + if len(result) == 1: + return result[0] + return result + + +class StepExpectRewriter: + """Processes all active, unknown expects at a given debugger step and produces ExpectedValueRewriter results for + each.""" + + def __init__(self, step: StepIR, script: DexterScript): + self.step = step + self.script = script + self.state_match = get_active_where_matches(script, step) + active_expects = { + expect + for where_match in self.state_match.values() + for expect in where_match.active_expects + } + self.expect_matches: Dict[Expect, ExpectedValueRewriter] = {} + + def add_expected_values(expect: Expect, expected_value: Any, scope: Scope): + assert isinstance(expect, Value), "Non-Value expects currently unsupported" + if expect in active_expects and expected_value is None: + self.expect_matches[expect] = ExpectedValueRewriter( + expect, step.watches[expect.get_watched_expr()] + ) + + script.visit_script(visit_expect=add_expected_values) + + +class ScriptExpectRewriter: + """Given the full output from a debugger run and a script with missing expected values, returns a script with + filled-in expected values that match the debugger output.""" + + def __init__(self, context: Context, dext_ir: DextIR): + self.context = context + self.dext_ir = dext_ir + self.unknown_expect_rewrites: Dict[ + Expect, List[Tuple[int, ExpectedValueRewriter]] + ] = {} + self.new_script: Optional[DexterScript] = None + self.new_expected_values: Dict[Expect, Any] = {} + self.missing_expect_rewrites: List[Expect] = [] + + script = dext_ir.script + assert ( + script is not None + ), "Cannot use ScriptExpectRewriter on a non-script Dexter test." + + # Collect every Expect with an unknown value into the `unknown_expect_rewrites` dict. We expect all Expects in + # this dict to have observed values, and don't expect to rewrite any Expects outside of this dict. + def collect_unknown_expects(expect: Expect, expected_value: Any, scope: Scope): + assert isinstance(expect, Value), "Non-Value expects currently unsupported" + if expected_value is None: + self.unknown_expect_rewrites[expect] = [] + + script.visit_script(visit_expect=collect_unknown_expects) + + # If there are no expects to update, then there is no rewriting to be done - exit early. + if not self.unknown_expect_rewrites: + return + + # Populate the `unknown_expect_rewrites` dict, mapping each expect with an unknown value to its list of observed + # during this run, along with the corresponding step indices. + self.step_rewriters = [ + StepExpectRewriter(step, script) for step in dext_ir.steps + ] + for step_rewriter in self.step_rewriters: + step_idx = step_rewriter.step.step_index + for expect, expected_value_rewriter in step_rewriter.expect_matches.items(): + self.unknown_expect_rewrites[expect].append( + (step_idx, expected_value_rewriter) + ) + + # For each unknown expect, merge the observed values into a writable "expected values" entry, which may be a + # list or a single value. + self.new_expected_values = { + expect: expected_values + for expect, expect_rewriters in self.unknown_expect_rewrites.items() + if ( + expected_values := unique_expected_values( + [rewriter for idx, rewriter in expect_rewriters] + ) + ) + is not None + } + + # Finally, use the new expected values to rewrite the script. + self.new_script = rewrite_script(script, self.new_expected_values) + self.missing_expect_rewrites = [ + expect + for expect in self.unknown_expect_rewrites + if expect not in self.new_expected_values + ] + + @property + def num_successful_rewrites(self): + return len(self.new_expected_values) + + @property + def num_unsuccessful_rewrites(self): + return len(self.missing_expect_rewrites) + + +def rewrite_script( + script: DexterScript, add_expected_values: Dict[Expect, Any] +) -> DexterScript: + """Given a set of updates to apply to a provided script, returns a copy of the script_obj with the updates + applied. + Does not deep copy, meaning the new script contains the same node objects as the old script; this is safe as we do + not modify these objects.""" + # First build up a map describing the children of every node in the script, adding add_expected_values to the + # required expect nodes. + new_node_child_map = {} + + def replace_where(where: Where, scope: Scope): + if scope.where: + scope_where_children = new_node_child_map.setdefault(scope.where, []) + assert isinstance( + scope_where_children, list + ), f"Unexpected child for !where node: {scope_where_children}" + scope_where_children.append(where) + + def replace_then(then: Then, scope: Scope): + assert ( + scope.where not in new_node_child_map + ), "!then must be the sole child of a state node." + new_node_child_map[scope.where] = then + + def replace_expect(expect: Expect, expected_value, scope: Scope): + new_expected_value = add_expected_values.get(expect) or expected_value + new_node_child_map[expect] = new_expected_value + scope_where_children = new_node_child_map.setdefault(scope.where, []) + assert isinstance( + scope_where_children, list + ), f"Unexpected child for state node {scope.where}: {scope_where_children}" + scope_where_children.append(expect) + + script.visit_script( + visit_where=replace_where, visit_expect=replace_expect, visit_then=replace_then + ) + + # Now rebuild the script object using the two maps. + def build_subscript(node): + """Returns the subset of the script object whose parent is the given node.""" + assert isinstance( + node, (Expect, Where) + ), f"Unexpected script parent node: {node}" + if isinstance(node, Expect): + return new_node_child_map[node] + node_children = new_node_child_map[node] + if isinstance(node_children, Then): + return node_children + assert isinstance( + node_children, List + ), f"Unexpected child for state node {node}: {node_children}" + return {child: build_subscript(child) for child in node_children} + + new_script_obj = {node: build_subscript(node) for node in script.script_obj} + return DexterScript( + script.context, + new_script_obj, + script.root_scope, + script.base_dir, + script.load_context, + ) diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py index 26a38a604dbe5..50a5cb48a200e 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py @@ -139,6 +139,16 @@ def get_known_file_for_where(self, where: Where) -> Optional[str]: return next_scope.file +class ScriptLoadContext: + """Contains information about the context that the script was loaded from.""" + + def __init__(self, file: str, lines: List[str], start_line: int, stop_line: int): + self.file = file + self.lines = lines + self.start_line = start_line + self.stop_line = stop_line + + class DexterScript: def __init__( self, @@ -146,10 +156,12 @@ def __init__( script_obj, scope: Scope, source_root_dir: Optional[str], + load_context: ScriptLoadContext, ): self.context = context self.script_obj = script_obj self.root_scope = scope + self.load_context = load_context self.label_dict = LabelDict() assert scope.file is not None self.base_dir = ( @@ -271,6 +283,7 @@ def get_script(context, file, loader, source_root_dir: Optional[str]) -> DexterS try_load_yaml("\n".join(lines), loader), root_scope, source_root_dir, + ScriptLoadContext(file, lines, start_line=0, stop_line=len(lines)), ) except (Error, yaml.YAMLError) as e: raise Error(f"File '{file}' was not a valid Dexter script:\n{e}") @@ -293,6 +306,7 @@ def get_script(context, file, loader, source_root_dir: Optional[str]) -> DexterS ), root_scope, source_root_dir, + ScriptLoadContext(file, lines, start_line, stop_line), ) except (Error, yaml.YAMLError) as e: attempted_scripts.append((start_line, e)) @@ -330,3 +344,15 @@ def check_explicit_files(where: Where, _: Scope): script.visit_script(visit_where=check_explicit_files) return script, source_files + + +def write_dexter_script_file(script: DexterScript) -> str: + load_context = script.load_context + script_lines = script.dump().splitlines(True) + write_lines = ( + load_context.lines[: load_context.start_line] + + script_lines + + ["...\n"] + + load_context.lines[load_context.stop_line :] + ) + return "".join(write_lines) diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py index 4b09c134a1b6e..d54dd3924e407 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py @@ -11,6 +11,7 @@ import tempfile from dex import __version__ +from dex.tools.Main import Context from dex.utils import ExtArgParse from dex.utils import PrettyOutput from dex.utils.ReturnCode import ReturnCode @@ -18,7 +19,7 @@ class ToolBase(object, metaclass=abc.ABCMeta): def __init__(self, context): - self.context = context + self.context: Context = context self.parser = None @abc.abstractproperty diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py index 0c028773ec56c..cbb894aa4d13a 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py @@ -24,8 +24,13 @@ ) from dex.dextIR.DextIR import DextIR from dex.evaluation import DebuggerRunMatch +from dex.evaluation.ExpectRewriter import ScriptExpectRewriter from dex.heuristic import Heuristic -from dex.test_script.Script import get_dexter_script +from dex.test_script.Script import ( + DexterScript, + get_dexter_script, + write_dexter_script_file, +) from dex.tools import TestToolBase from dex.utils.Exceptions import DebuggerException from dex.utils.Exceptions import BuildScriptException, HeuristicException @@ -155,10 +160,6 @@ def _init_debugger_controller(self): self.context.options.source_files.extend(list(new_source_files)) - # If we are not running a debugger, return the DextIR instead of a DebuggerController. - if self.context.options.skip_run: - return step_collection - if self.context.options.use_script: debugger_controller = ScriptDebuggerController( self.context, step_collection @@ -180,7 +181,8 @@ def _get_steps(self): if self.context.options.skip_run: self.context.logger.warning("Skipping run...") - return debugger_controller + assert isinstance(debugger_controller.step_collection, DextIR) + return debugger_controller.step_collection debugger_controller = run_debugger_subprocess( debugger_controller, self.context.working_directory.path ) @@ -254,6 +256,16 @@ def _record_structured_script_metric_results( with open(output_json_path, "w") as fp: json.dump(run_match.get_metric_json_output(), fp) + def _write_updated_structured_script( + self, test_name, rewritten_script: DexterScript + ): + """Write out the original script file, modified to replace any unknown expects with the actual observed + values.""" + if self.context.options.results_directory: + output_text_path = self._get_results_path(test_name) + with open(output_text_path, "w", encoding="utf-8") as fp: + fp.write(write_dexter_script_file(rewritten_script)) + def _record_test_and_display(self, test_case): """Output test case to o stream and record test case internally for handling later. @@ -313,6 +325,22 @@ def _run_test(self, test_name): return self._record_steps(test_name, steps) if self.context.options.use_script: + # Before evaluating, the script may contain "unknown" expects; if they should be rewritten, then do so + # first, and then use the rewritten script to evaluate. + script_writer = ScriptExpectRewriter(self.context, steps) + if script_writer.new_script: + self.context.logger.note( + f"Rewrote script to add {script_writer.num_successful_rewrites} expected values." + ) + if script_writer.num_unsuccessful_rewrites: + self.context.logger.error( + f"Failed to rewrite {script_writer.num_unsuccessful_rewrites} expected values." + ) + self._write_updated_structured_script( + test_name, script_writer.new_script + ) + steps.script = script_writer.new_script + # Then evaluate, using the new script if any was produced. run_match = DebuggerRunMatch(self.context, steps) self._record_structured_script_metric_results(test_name, run_match) self._record_successful_test_match(test_name, steps, run_match) diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp new file mode 100644 index 0000000000000..6fa8008c8badb --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp @@ -0,0 +1,130 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expect_list_expected.cpp + +/// Test that Dexter can write lists of expected values for simple scalar +/// variables. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 3 expected values. + +// CHECK: total_watched_steps: 90 +// CHECK: correct_steps: 90 +// CHECK: incorrect_steps: 0 +// CHECK: seen_values: 86 +// CHECK: missing_values: 0 + +int main() { + int prev = 0; + int current = 0; + int next = 1; + for (int i = 0; i < 30; ++i) { + prev = current; // !dex_label loop + current = next; + next = prev + current; + } + return current; +} + +/* +--- +? !where {lines: !label 'loop'} +: !value 'prev': + - '0' + - '1' + - '2' + - '3' + - '5' + - '8' + - '13' + - '21' + - '34' + - '55' + - '89' + - '144' + - '233' + - '377' + - '610' + - '987' + - '1597' + - '2584' + - '4181' + - '6765' + - '10946' + - '17711' + - '28657' + - '46368' + - '75025' + - '121393' + - '196418' + - '317811' + !value 'current': + - '0' + - '1' + - '2' + - '3' + - '5' + - '8' + - '13' + - '21' + - '34' + - '55' + - '89' + - '144' + - '233' + - '377' + - '610' + - '987' + - '1597' + - '2584' + - '4181' + - '6765' + - '10946' + - '17711' + - '28657' + - '46368' + - '75025' + - '121393' + - '196418' + - '317811' + - '514229' + !value 'next': + - '1' + - '2' + - '3' + - '5' + - '8' + - '13' + - '21' + - '34' + - '55' + - '89' + - '144' + - '233' + - '377' + - '610' + - '987' + - '1597' + - '2584' + - '4181' + - '6765' + - '10946' + - '17711' + - '28657' + - '46368' + - '75025' + - '121393' + - '196418' + - '317811' + - '514229' + - '832040' +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp new file mode 100644 index 0000000000000..e3852874b2ae3 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp @@ -0,0 +1,53 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expects_expected.cpp + +/// Test that when we have a Dexter test with missing/unknown expected values, +/// Dexter produces a modified test file that is identical except for a modified +/// script section. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 6 expected values. +// CHECK: Failed to rewrite 2 expected values. + +// CHECK: total_watched_steps: 7 +// CHECK: correct_steps: 6 +// CHECK: incorrect_steps: 1 +// CHECK: seen_values: 6 +// CHECK: missing_values: 2 + +int multiply(int b, int a) { + int result = a * b; + return result; // !dex_label mul_ret +} + +int main() { + int a = 6; + int b = 7; + int c = multiply(a, b); + return c; // !dex_label main_ret +} +// !dex_label never_reached +/* +--- +? !where {lines: !label 'mul_ret'} +: !value 'a': '7' + !value 'b': '6' + !value 'result': '42' +? !where {lines: !label 'main_ret'} +: !value 'a': '6' + !value 'b': '7' + !value 'c': '42' + !value 'not_real': null +? !where {lines: !label 'never_reached'} +: !value 'a': null +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp new file mode 100644 index 0000000000000..d14e8224635b8 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp @@ -0,0 +1,48 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} \ +// RUN: %S/Inputs/rewrite_multiple_scripts_expected.cpp + +/// Test that when a file contains more than one valid YAML script (but only one +/// Dexter script), the existing YAML is printed correctly. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 1 expected values. + +// CHECK: total_watched_steps: 1 +// CHECK: correct_steps: 1 +// CHECK: incorrect_steps: 0 +// CHECK: seen_values: 1 +// CHECK: missing_values: 0 + +/* +--- +hr: # 1998 hr ranking +- Mark McGwire +- Sammy Sosa +# 1998 rbi ranking +rbi: +- Sammy Sosa +- Ken Griffey +... +*/ + +int main() { + int ret = 0; + return ret; // !dex_label ret +} + +/* +--- +? !where {lines: !label 'ret'} +: !value 'ret': '0' +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp new file mode 100644 index 0000000000000..72567b9db3e51 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp @@ -0,0 +1,7 @@ + +int main() { + int i = 0; + i += 1; // !dex_label start + i += 1; + return i; // !dex_label end +} diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex new file mode 100644 index 0000000000000..4501532871cc9 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex @@ -0,0 +1,2 @@ +!where {file: 'simple_prog.cpp', lines: !range [!label start, !label end]}: + ? !value i \ No newline at end of file diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex new file mode 100644 index 0000000000000..c1cf5e87915a5 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex @@ -0,0 +1,6 @@ +? !where {file: simple_prog.cpp, lines: !range [!label 'start', !label 'end']} +: !value 'i': + - '0' + - '1' + - '2' +... diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp new file mode 100644 index 0000000000000..ad0584949c6c9 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp @@ -0,0 +1,44 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expect_list_expected.cpp + +/// Test that Dexter can write lists of expected values for simple scalar +/// variables. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 3 expected values. + +// CHECK: total_watched_steps: 90 +// CHECK: correct_steps: 90 +// CHECK: incorrect_steps: 0 +// CHECK: seen_values: 86 +// CHECK: missing_values: 0 + +int main() { + int prev = 0; + int current = 0; + int next = 1; + for (int i = 0; i < 30; ++i) { + prev = current; // !dex_label loop + current = next; + next = prev + current; + } + return current; +} + +/* +--- +!where {lines: !label loop}: + ? !value prev + ? !value current + ? !value next +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp new file mode 100644 index 0000000000000..bc5bae6df72fb --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp @@ -0,0 +1,54 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expects_expected.cpp + +/// Test that when we have a Dexter test with missing/unknown expected values, +/// Dexter produces a modified test file that is identical except for a modified +/// script section. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 6 expected values. +// CHECK: Failed to rewrite 2 expected values. + +// CHECK: total_watched_steps: 7 +// CHECK: correct_steps: 6 +// CHECK: incorrect_steps: 1 +// CHECK: seen_values: 6 +// CHECK: missing_values: 2 + +int multiply(int b, int a) { + int result = a * b; + return result; // !dex_label mul_ret +} + +int main() { + int a = 6; + int b = 7; + int c = multiply(a, b); + return c; // !dex_label main_ret +} +// !dex_label never_reached +/* +--- +# Comments in the Dexter script are not preserved. +!where {lines: !label mul_ret}: + ? !value a + ? !value b + ? !value result +!where {lines: !label main_ret}: + ? !value a + ? !value b + ? !value c + ? !value not_real +!where {lines: !label never_reached}: + ? !value a +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp new file mode 100644 index 0000000000000..6507a6ff746db --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp @@ -0,0 +1,48 @@ +// RUN: rm -rf %t +// RUN: mkdir %t +// RUN: %dexter_regression_test_cxx_build %s -o %t/test +// RUN: %dexter_regression_test_run --use-script --binary %t/test \ +// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s +// RUN: diff %t/results/%{s:basename} \ +// RUN: %S/Inputs/rewrite_multiple_scripts_expected.cpp + +/// Test that when a file contains more than one valid YAML script (but only one +/// Dexter script), the existing YAML is printed correctly. + +/// NB: The exact contents of this file are compared against the expect file in +/// the Inputs/ directory; any changes to this file, including comments, +/// will require updating the corresponding expected file. +/// Although we perform an exact file comparison, we use `diff` over `cmp` +/// for more legible lit output. + +// CHECK: Rewrote script to add 1 expected values. + +// CHECK: total_watched_steps: 1 +// CHECK: correct_steps: 1 +// CHECK: incorrect_steps: 0 +// CHECK: seen_values: 1 +// CHECK: missing_values: 0 + +/* +--- +hr: # 1998 hr ranking +- Mark McGwire +- Sammy Sosa +# 1998 rbi ranking +rbi: +- Sammy Sosa +- Ken Griffey +... +*/ + +int main() { + int ret = 0; + return ret; // !dex_label ret +} + +/* +--- +!where {lines: !label ret}: + ? !value ret +... +*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test new file mode 100644 index 0000000000000..97dee0168b824 --- /dev/null +++ b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test @@ -0,0 +1,24 @@ +RUN: rm -rf %t +RUN: mkdir %t +RUN: %dexter_regression_test_cxx_build %S/Inputs/simple_prog.cpp -o %t/test +RUN: %dexter_regression_test_run --use-script --binary %t/test \ +RUN: --results-directory %t/results --source-root-dir %S/Inputs -- \ +RUN: %S/Inputs/whole_file_test.dex 2>&1 | FileCheck %s +RUN: diff %t/results/whole_file_test.dex %S/Inputs/whole_file_test_expected.dex + +Test that script rewriting still works when the test file is pure YAML, rather +than a YAML Dexter script embedded in another file. + +NB: The exact contents of this file are compared against the expect file in + the Inputs/ directory; any changes to this file, including comments, + will require updating the corresponding expected file. + Although we perform an exact file comparison, we use `diff` over `cmp` + for more legible lit output. + +CHECK: Rewrote script to add 1 expected values. + +CHECK: total_watched_steps: 3 +CHECK: correct_steps: 3 +CHECK: incorrect_steps: 0 +CHECK: seen_values: 3 +CHECK: missing_values: 0 \ No newline at end of file From e74bafdb4fc54975c60ba3fcce5c9d70f9d5a0eb Mon Sep 17 00:00:00 2001 From: VladimirMedic Date: Wed, 24 Jun 2026 18:00:23 +0200 Subject: [PATCH 358/511] [llvm-objdump] Add --substitute-path and --source-dir for --source (#201096) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the code object was compiled on a different machine that does not have the same directory structure, or the source code has been moved, we are seeing a warning with the disassembler, stating that the files embedded in the code object were not found on disk. This patch introduces a command line options for llvm-objdump, which provide alternate directory locations to locate the source files on disk. These options are inspired by GDB commands _set directory_ and _set substitute-path_. --substitute-path — Takes two strings, _from_ and _to_, and do a simple string replacement of from with to at the start of the directory part of the source file name, then use that result instead of the original file name to look up the sources. A rule applies only if _from_ ends at a directory separator. --source-dir — Add directories to the source search path. Directories are searched in following order: original recorded path, source-dir + relative recorded path or absolute path without root, source-dir + basename. --- llvm/docs/CommandGuide/llvm-objdump.rst | 19 +++ .../tools/llvm-objdump/X86/source-path.test | 153 ++++++++++++++++++ llvm/tools/llvm-objdump/ObjdumpOpts.td | 12 ++ llvm/tools/llvm-objdump/SourcePrinter.cpp | 110 ++++++++++++- llvm/tools/llvm-objdump/llvm-objdump.cpp | 15 +- llvm/tools/llvm-objdump/llvm-objdump.h | 2 + 6 files changed, 307 insertions(+), 4 deletions(-) create mode 100644 llvm/test/tools/llvm-objdump/X86/source-path.test diff --git a/llvm/docs/CommandGuide/llvm-objdump.rst b/llvm/docs/CommandGuide/llvm-objdump.rst index d7e2ed0015950..f3cf5f67e38c5 100644 --- a/llvm/docs/CommandGuide/llvm-objdump.rst +++ b/llvm/docs/CommandGuide/llvm-objdump.rst @@ -238,6 +238,25 @@ OPTIONS initial directories from absolute paths. This option has no effect without :option:`--prefix`. +.. option:: --source-dir + + When disassembling with the :option:`--source` option, add ``dir`` to the + front of the source search path when looking up source files. For each source + file, llvm-objdump tries the path from the debug info, then each search + directory with the full path appended, then each search directory with only + the file name. + This option may be specified multiple times; each ``--source-dir`` adds one + directory. Options on the command line are searched in first to last order. + +.. option:: --substitute-path + + When disassembling with the :option:`--source` option, replace ``from`` with + ``to`` at the start of the directory part of source file paths when looking up + sources. A rule is applied only if ``from`` ends at a directory separator in + the path. This option may be specified multiple times; rules are evaluated in + the order given and the first matching rule is used, as in GDB + ``set substitute-path``. + .. option:: --print-imm-hex Use hex format when printing immediate values in disassembly output (default). diff --git a/llvm/test/tools/llvm-objdump/X86/source-path.test b/llvm/test/tools/llvm-objdump/X86/source-path.test new file mode 100644 index 0000000000000..4d6e9990e8245 --- /dev/null +++ b/llvm/test/tools/llvm-objdump/X86/source-path.test @@ -0,0 +1,153 @@ +;; Test --substitute-path and --source-dir with --source and --line-numbers. +;; +;; The object records source paths under /wrong/Inputs, but the source file +;; lives in this test's Inputs directory. Without path remapping, --source +;; cannot load the file from disk. +;; +;; --source-dir: each flag appends one directory to the search list, so when +;; several --source-dir paths could supply the same file name, the first +;; flag on the command line is tried before later ones. +;; +;; --substitute-path: rules are tried in command-line order; the first match +;; wins (same as GDB set substitute-path). + +; RUN: sed -e "s,SRC_COMPDIR,/wrong/Inputs,g" %p/Inputs/source-interleave.ll > %t.ll +; RUN: llc -o %t.o -filetype=obj -mtriple=x86_64-pc-linux %t.ll + +; RUN: llvm-objdump --no-print-imm-hex -d --line-numbers %t.o | FileCheck %s --check-prefix=LINE + +; LINE: : +; LINE-NEXT: ; foo(): +; LINE-NEXT: ; {{.*}}source-interleave-x86_64.c:2 +; LINE-NEXT: pushq +; LINE: ; {{.*}}source-interleave-x86_64.c:3 +; LINE-NEXT: movq +; LINE-NEXT: movl +; LINE-NEXT: popq +; LINE-NEXT: retq + +; RUN: llvm-objdump --no-print-imm-hex -d --source %t.o 2>&1 | \ +; RUN: FileCheck %s --check-prefix=SOURCE-MISSING -DFILE=%t.o + +; SOURCE-MISSING: warning: '[[FILE]]': failed to find source {{[/\\]}}wrong{{[/\\]}}Inputs{{[/\\]}}source-interleave-x86_64.c + +; RUN: llvm-objdump --no-print-imm-hex -d --source --substitute-path /wrong %/p %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE +; RUN: llvm-objdump --no-print-imm-hex -d --source --source-dir %/p/Inputs %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; --prefix alone cannot fix /wrong/Inputs, but --source-dir finds the file +;; via basename lookup under the search directory. +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --prefix %/p --source-dir %/p/Inputs %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; All three: substitute and prefix produce a missing path; --source-dir +;; resolves the file from the search directory. +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --substitute-path /wrong/Inputs /missing \ +; RUN: --prefix %/p --source-dir %/p/Inputs %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; When substitute-path yields a relative path, --prefix is ignored (as in +;; source-interleave-prefix.test) but --source-dir still resolves the file. +; RUN: llvm-objdump --no-print-imm-hex -d --source --substitute-path /wrong "" %t.o \ +; RUN: --prefix myprefix --source-dir %/p/Inputs %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; %/p/Inputs is listed first and already contains the source; %/p is only +;; needed if the first directories do not resolve the file. +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --source-dir %/p/Inputs --source-dir %/p %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; More specific rule listed first wins (GDB example: /wrong/Inputs before /wrong). +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --substitute-path /wrong/Inputs %/p/Inputs \ +; RUN: --substitute-path /wrong /nowhere %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +; SOURCE: : +; SOURCE-NEXT: ; int foo() { +; SOURCE-NEXT: pushq +; SOURCE: ; return a; +; SOURCE-NEXT: movq +; SOURCE-NEXT: movl +; SOURCE-NEXT: popq +; SOURCE-NEXT: retq + +; RUN: llvm-objdump --no-print-imm-hex -d --source --substitute-path /wron %/p %t.o 2>&1 | \ +; RUN: FileCheck %s --check-prefix=SOURCE-MISSING -DFILE=%t.o + +;; Two --source-dir trees both contain source-interleave-x86_64.c (same line +;; count; distinct in-line markers). The copy from the **earlier** flag must +;; be the one interleaved (search order is first-to-last on the command line). +; RUN: mkdir -p %t.sd1 %t.sd2 +; RUN: sed -e 's/int foo() {/int foo() { \/\/SDR-FIRST/' \ +; RUN: %p/Inputs/source-interleave-x86_64.c > %t.sd1/source-interleave-x86_64.c +; RUN: sed -e 's/int foo() {/int foo() { \/\/SDR-SECOND/' \ +; RUN: %p/Inputs/source-interleave-x86_64.c > %t.sd2/source-interleave-x86_64.c +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --source-dir %t.sd1 --source-dir %t.sd2 %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE-DIR-ORDER12 --implicit-check-not=SDR-SECOND +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --source-dir %t.sd2 --source-dir %t.sd1 %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE-DIR-ORDER21 --implicit-check-not=SDR-FIRST + +; SOURCE-DIR-ORDER12: : +; SOURCE-DIR-ORDER12-NEXT: ; int foo() { //SDR-FIRST +; SOURCE-DIR-ORDER12-NEXT: pushq + +; SOURCE-DIR-ORDER21: : +; SOURCE-DIR-ORDER21-NEXT: ; int foo() { //SDR-SECOND +; SOURCE-DIR-ORDER21-NEXT: pushq + +;; Broader rule listed first matches; later rule is not applied (miss on remapped path). +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --substitute-path /wrong /nowhere \ +; RUN: --substitute-path /wrong/Inputs %/p/Inputs %t.o 2>&1 | \ +; RUN: FileCheck %s --check-prefix=SOURCE-MISSING-SUBST -DFILE=%t.o + +; SOURCE-MISSING-SUBST: warning: '[[FILE]]': failed to find source {{[/\\]}}nowhere{{[/\\]}}Inputs{{[/\\]}}source-interleave-x86_64.c + +;; First substitute rule is a no-match; second matching rule remaps the path. +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --substitute-path /wron /nowhere \ +; RUN: --substitute-path /wrong %/p %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE + +;; Empty strips the matched prefix; use --source-dir to +;; resolve the resulting relative path. +; RUN: llvm-objdump --no-print-imm-hex -d --source --substitute-path /wrong "" \ +; RUN: --source-dir %/p/Inputs %t.o | FileCheck %s --check-prefix=SOURCE + +;; Empty without --source-dir: relative path is not resolved; warn once, no crash. +; RUN: llvm-objdump --no-print-imm-hex -d --source --substitute-path /wrong "" %t.o 2>&1 | \ +; RUN: FileCheck %s --check-prefix=SOURCE-EMPTY-TO -DFILE=%t.o \ +; RUN: --implicit-check-not="foo()" + +; SOURCE-EMPTY-TO: : +; SOURCE-EMPTY-TO: warning: '[[FILE]]': failed to find source Inputs{{[/\\]}}source-interleave-x86_64.c +; SOURCE-EMPTY-TO: pushq + +;; Empty is rejected . +; RUN: not llvm-objdump --substitute-path "" %/p %t.o 2>&1 | \ +; RUN: FileCheck %s --check-prefix=SUBST-ERR-FROM + +; SUBST-ERR-FROM: llvm-objdump{{.*}}: error: --substitute-path: must not be empty + +;; --substitute-path runs before --prefix: remap /wrong/Inputs to the +;; root-relative /Inputs, then prepend the test directory. +; RUN: llvm-objdump --no-print-imm-hex -d --source \ +; RUN: --substitute-path /wrong/Inputs /Inputs --prefix %/p %t.o | \ +; RUN: FileCheck %s --check-prefix=SOURCE +; RUN: llvm-objdump --no-print-imm-hex -d --line-numbers \ +; RUN: --substitute-path /wrong/Inputs /Inputs --prefix %/p %t.o | \ +; RUN: FileCheck %s --check-prefix=LINE-SUBST-PREFIX -DPREFIX=%/p + +; LINE-SUBST-PREFIX: : +; LINE-SUBST-PREFIX-NEXT: ; foo(): +; LINE-SUBST-PREFIX-NEXT: ; [[PREFIX]]{{[/\\]}}Inputs{{[/\\]}}source-interleave-x86_64.c:2 +; LINE-SUBST-PREFIX-NEXT: pushq +; LINE-SUBST-PREFIX: ; [[PREFIX]]{{[/\\]}}Inputs{{[/\\]}}source-interleave-x86_64.c:3 +; LINE-SUBST-PREFIX-NEXT: movq diff --git a/llvm/tools/llvm-objdump/ObjdumpOpts.td b/llvm/tools/llvm-objdump/ObjdumpOpts.td index 73f4c48abb19f..e22f58236ab9a 100644 --- a/llvm/tools/llvm-objdump/ObjdumpOpts.td +++ b/llvm/tools/llvm-objdump/ObjdumpOpts.td @@ -246,6 +246,18 @@ def : Flag<["-"], "w">, Alias; defm prefix : Eq<"prefix", "Add prefix to absolute paths">, MetaVarName<"prefix">; +def source_dir : Separate<["--"], "source-dir">, + MetaVarName<"">, + HelpText<"Add a directory to the front of the source search path when " + "looking up source files. May be specified multiple times">; +def : Separate<["-"], "source-dir">, Alias; + +def substitute_path : MultiArg<["--"], "substitute-path", 2>, + MetaVarName<" ">, + HelpText<"Replace with at the start of the directory part of " + "source file paths when looking up sources. The rule is applied " + "only if ends at a directory separator in the path. May be " + "specified multiple times">; defm prefix_strip : Eq<"prefix-strip", "Strip out initial directories from absolute " "paths. No effect without --prefix">, diff --git a/llvm/tools/llvm-objdump/SourcePrinter.cpp b/llvm/tools/llvm-objdump/SourcePrinter.cpp index 9240d299251fe..ea325aca22784 100644 --- a/llvm/tools/llvm-objdump/SourcePrinter.cpp +++ b/llvm/tools/llvm-objdump/SourcePrinter.cpp @@ -14,17 +14,110 @@ #include "SourcePrinter.h" #include "llvm-objdump.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallString.h" #include "llvm/DebugInfo/DWARF/DWARFExpressionPrinter.h" #include "llvm/DebugInfo/DWARF/LowLevel/DWARFExpression.h" #include "llvm/Demangle/Demangle.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Path.h" #define DEBUG_TYPE "objdump" namespace llvm { namespace objdump { +static bool sourceFileExists(StringRef Path) { + if (sys::fs::exists(Path) && !sys::fs::is_directory(Path)) + return true; + + return false; +} + +static void normalizeSourcePath(SmallVectorImpl &Path) { + sys::path::native(Path); + sys::path::remove_dots(Path, /*remove_dot_dot=*/true); +} + +static std::optional trySourcePath(StringRef Path) { + SmallString<256> Normalized(Path); + normalizeSourcePath(Normalized); + if (sourceFileExists(Normalized)) + return std::string(Normalized); + return std::nullopt; +} + +static std::optional +searchSourceWithDirs(StringRef FileName, ArrayRef SearchDirs, + bool TryLiteralFirst) { + if (TryLiteralFirst) + if (auto Path = trySourcePath(FileName)) + return Path; + + StringRef PathSuffix = sys::path::relative_path(FileName); + StringRef BaseName = sys::path::filename(FileName); + for (StringRef Dir : SearchDirs) { + SmallString<256> Candidate(Dir); + sys::path::append(Candidate, PathSuffix); + if (auto Path = trySourcePath(Candidate)) + return Path; + } + for (StringRef Dir : SearchDirs) { + SmallString<256> Candidate(Dir); + sys::path::append(Candidate, BaseName); + if (auto Path = trySourcePath(Candidate)) + return Path; + } + return std::nullopt; +} + +static std::optional +findSourceFilePath(StringRef FileName, ArrayRef SearchDirs) { + if (FileName.empty() || FileName == DILineInfo::BadString) + return std::nullopt; + + if (sys::path::is_absolute_gnu(FileName)) + return searchSourceWithDirs(FileName, SearchDirs, /*TryLiteralFirst=*/true); + return searchSourceWithDirs(FileName, SearchDirs, /*TryLiteralFirst=*/false); +} + +static std::string applySubstitutePaths(StringRef FileName) { + if (SubstitutePaths.empty()) + return FileName.str(); + + StringRef BaseName = sys::path::filename(FileName); + SmallString<256> Directory(sys::path::parent_path(FileName)); + normalizeSourcePath(Directory); + + for (const auto &[From, To] : SubstitutePaths) { + SmallString<256> FromPath(From); + normalizeSourcePath(FromPath); + StringRef Dir = Directory; + if (!Dir.starts_with(FromPath)) + continue; + if (Dir.size() > FromPath.size() && + !sys::path::is_separator(Dir[FromPath.size()])) + continue; + + SmallString<256> NewDir(To); + StringRef Suffix = Dir.substr(FromPath.size()); + while (!Suffix.empty() && sys::path::is_separator(Suffix.front())) + Suffix = Suffix.drop_front(); + if (!Suffix.empty()) + sys::path::append(NewDir, Suffix); + normalizeSourcePath(NewDir); + + if (NewDir.empty()) + return BaseName.str(); + SmallString<256> Result(NewDir); + sys::path::append(Result, BaseName); + normalizeSourcePath(Result); + return std::string(Result); + } + + return FileName.str(); +} + bool InlinedFunction::liveAtAddress(object::SectionedAddress Addr) const { if (!Range.valid()) return false; @@ -607,8 +700,17 @@ bool SourcePrinter::cacheSource(const DILineInfo &LineInfo) { if (LineInfo.Source) { Buffer = MemoryBuffer::getMemBuffer(*LineInfo.Source); } else { - auto BufferOrError = - MemoryBuffer::getFile(LineInfo.FileName, /*IsText=*/true); + std::string PathToOpen = LineInfo.FileName; + if (!SourceDirs.empty()) { + SmallVector SearchDirs; + for (const std::string &Dir : SourceDirs) + SearchDirs.push_back(Dir); + if (std::optional Resolved = + findSourceFilePath(LineInfo.FileName, SearchDirs)) + PathToOpen = std::move(*Resolved); + } + + auto BufferOrError = MemoryBuffer::getFile(PathToOpen, /*IsText=*/true); if (!BufferOrError) { if (MissingSources.insert(LineInfo.FileName).second) reportWarning("failed to find source " + LineInfo.FileName, @@ -653,6 +755,8 @@ void SourcePrinter::printSourceLine(formatted_raw_ostream &OS, toString(ExpectedLineInfo.takeError()), ObjectFilename); } + if (!objdump::SubstitutePaths.empty()) + LineInfo.FileName = applySubstitutePaths(LineInfo.FileName); if (!objdump::Prefix.empty() && sys::path::is_absolute_gnu(LineInfo.FileName)) { diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index b6bda1b3a553c..daebae588cfff 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -84,7 +84,6 @@ #include #include #include -#include using namespace llvm; using namespace llvm::object; @@ -344,6 +343,8 @@ bool objdump::UnwindInfo; bool objdump::UnwindShowWODPool; std::string objdump::Prefix; uint32_t objdump::PrefixStrip; +std::vector> objdump::SubstitutePaths; +std::vector objdump::SourceDirs; DebugFormat objdump::DbgVariables = DFDisabled; DebugFormat objdump::DbgInlinedFunctions = DFDisabled; @@ -3903,6 +3904,18 @@ static void parseObjdumpOptions(const llvm::opt::InputArgList &InputArgs) { UnwindShowWODPool = InputArgs.hasArg(OBJDUMP_unwind_show_wod_pool); Prefix = InputArgs.getLastArgValue(OBJDUMP_prefix).str(); parseIntArg(InputArgs, OBJDUMP_prefix_strip, PrefixStrip); + for (const opt::Arg *A : InputArgs.filtered(OBJDUMP_substitute_path)) { + StringRef From = A->getValue(0); + if (From.empty()) + reportCmdLineError(A->getSpelling() + ": must not be empty"); + SubstitutePaths.emplace_back(From.str(), A->getValue(1)); + } + for (StringRef Dir : InputArgs.getAllArgValues(OBJDUMP_source_dir)) { + if (Dir.empty()) + reportCmdLineError("--source-dir argument must not be empty"); + SourceDirs.insert(SourceDirs.end(), Dir.str()); + } + if (const opt::Arg *A = InputArgs.getLastArg(OBJDUMP_debug_vars_EQ)) { DbgVariables = StringSwitch(A->getValue()) .Case("ascii", DFASCII) diff --git a/llvm/tools/llvm-objdump/llvm-objdump.h b/llvm/tools/llvm-objdump/llvm-objdump.h index 208bac8be3b3f..b52f92721386f 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.h +++ b/llvm/tools/llvm-objdump/llvm-objdump.h @@ -66,6 +66,8 @@ extern std::vector MAttrs; extern std::string MCPU; extern std::string Prefix; extern uint32_t PrefixStrip; +extern std::vector SourceDirs; +extern std::vector> SubstitutePaths; extern bool PrintImmHex; extern bool PrintLines; extern bool PrintSource; From 1d0e588022db6fc3823b779e9b1ce561b950e0b3 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Wed, 24 Jun 2026 18:01:43 +0200 Subject: [PATCH 359/511] [libc++][C++03] Cherry-pick #106832 (#205547) --- libcxx/include/__cxx03/__config | 14 +------------- .../__cxx03/__type_traits/datasizeof.h | 19 +++---------------- .../type_traits/datasizeof.compile.pass.cpp | 4 +--- 3 files changed, 5 insertions(+), 32 deletions(-) diff --git a/libcxx/include/__cxx03/__config b/libcxx/include/__cxx03/__config index 27c26084da4b1..c7b2d2b1a7c6d 100644 --- a/libcxx/include/__cxx03/__config +++ b/libcxx/include/__cxx03/__config @@ -842,21 +842,9 @@ typedef __char32_t char32_t; // (If/when MSVC breaks its C++ ABI, it will be changed to work as intended.) // However, MSVC implements [[msvc::no_unique_address]] which does what // [[no_unique_address]] is supposed to do, in general. - -// Clang-cl does not yet (14.0) implement either [[no_unique_address]] or -// [[msvc::no_unique_address]] though. If/when it does implement -// [[msvc::no_unique_address]], this should be preferred though. # define _LIBCPP_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] -# elif __has_cpp_attribute(no_unique_address) -# define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # else -# define _LIBCPP_NO_UNIQUE_ADDRESS /* nothing */ -// Note that this can be replaced by #error as soon as clang-cl -// implements msvc::no_unique_address, since there should be no C++20 -// compiler that doesn't support one of the two attributes at that point. -// We generally don't want to use this macro outside of C++20-only code, -// because using it conditionally in one language version only would make -// the ABI inconsistent. +# define _LIBCPP_NO_UNIQUE_ADDRESS [[__no_unique_address__]] # endif // c8rtomb() and mbrtoc8() were added in C++20 and C23. Support for these diff --git a/libcxx/include/__cxx03/__type_traits/datasizeof.h b/libcxx/include/__cxx03/__type_traits/datasizeof.h index d428856d13a66..c207f59ac2cc0 100644 --- a/libcxx/include/__cxx03/__type_traits/datasizeof.h +++ b/libcxx/include/__cxx03/__type_traits/datasizeof.h @@ -26,29 +26,16 @@ _LIBCPP_BEGIN_NAMESPACE_STD -#if __has_keyword(__datasizeof) || __has_extension(datasizeof) +// TODO: Enable this again once #94816 is fixed. +#if (__has_keyword(__datasizeof) || __has_extension(datasizeof)) && 0 template inline const size_t __datasizeof_v = __datasizeof(_Tp); #else -// NOLINTNEXTLINE(readability-redundant-preprocessor) This is https://llvm.org/PR64825 -# if __has_cpp_attribute(__no_unique_address__) template struct _FirstPaddingByte { - [[__no_unique_address__]] _Tp __v_; + _LIBCPP_NO_UNIQUE_ADDRESS _Tp __v_; char __first_padding_byte_; }; -# else -template ::value || !is_class<_Tp>::value> -struct _FirstPaddingByte : _Tp { - char __first_padding_byte_; -}; - -template -struct _FirstPaddingByte<_Tp, true> { - _Tp __v_; - char __first_padding_byte_; -}; -# endif // __has_cpp_attribute(__no_unique_address__) // _FirstPaddingByte<> is sometimes non-standard layout. Using `offsetof` is UB in that case, but GCC and Clang allow // the use as an extension. diff --git a/libcxx/test/libcxx-03/type_traits/datasizeof.compile.pass.cpp b/libcxx/test/libcxx-03/type_traits/datasizeof.compile.pass.cpp index 67a4dccb0f5fe..699c5101bb7fe 100644 --- a/libcxx/test/libcxx-03/type_traits/datasizeof.compile.pass.cpp +++ b/libcxx/test/libcxx-03/type_traits/datasizeof.compile.pass.cpp @@ -6,9 +6,7 @@ // //===----------------------------------------------------------------------===// -// XFAIL: FROZEN-CXX03-HEADERS-FIXME - -#include <__type_traits/datasizeof.h> +#include <__cxx03/__type_traits/datasizeof.h> #include #include From 4bd5710c39676f5e83fdf4e1d4bc59aef83505a9 Mon Sep 17 00:00:00 2001 From: vitbur Date: Wed, 24 Jun 2026 18:04:13 +0200 Subject: [PATCH 360/511] [RISCV][XCV] Relax long `cv.beqimm`/`cv.bneimm` branches (#205096) `cv.beqimm` and `cv.bneimm` encode their target as a 13-bit signed PC-relative offset (+/-4094 bytes). Branches beyond that range were silently truncated by MC fixup application, producing wrong code with no diagnostic. Add `PseudoLongCV_BEQIMM`/`PseudoLongCV_BNEIMM` and the MC-layer relaxation flow (inverted short branch + JAL trampoline), mirroring the standard B-type and Qualcomm Xqcibi vendor branches. **Tests:** `xcvbi-branch-relax.ll` (uses `-filetype=obj | llvm-objdump`, since MC-layer relaxation is only observable on object emission, not on textual asm). Split out of #204879 at review request (one fix per PR). Part of a CORE-V (XCV) series; see RFC: https://discourse.llvm.org/t/rfc-core-v-xcv-support-for-cv32e40p-clang-builtins-xcvsimd-intrinsics-and-generic-auto-selection/91111 --- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 6 +++ .../RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 6 +++ llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td | 7 ++++ llvm/test/MC/RISCV/corev/XCVbi-long-branch.s | 37 +++++++++++++++++++ 4 files changed, 56 insertions(+) create mode 100644 llvm/test/MC/RISCV/corev/XCVbi-long-branch.s diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 8e4dab0a59c98..315d129aeb5fb 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -235,6 +235,10 @@ static unsigned getRelaxedOpcode(unsigned Opcode, ArrayRef Operands, return RISCV::PseudoLongQC_E_BLTUI; case RISCV::QC_E_BGEUI: return RISCV::PseudoLongQC_E_BGEUI; + case RISCV::CV_BEQIMM: + return RISCV::PseudoLongCV_BEQIMM; + case RISCV::CV_BNEIMM: + return RISCV::PseudoLongCV_BNEIMM; } // Returning the original opcode means we cannot relax the instruction. @@ -310,6 +314,8 @@ void RISCVAsmBackend::relaxInstruction(MCInst &Inst, case RISCV::QC_E_BGEI: case RISCV::QC_E_BLTUI: case RISCV::QC_E_BGEUI: + case RISCV::CV_BEQIMM: + case RISCV::CV_BNEIMM: Res.setOpcode(getRelaxedOpcode(Inst.getOpcode(), Inst.getOperands(), STI)); Res.addOperand(Inst.getOperand(0)); Res.addOperand(Inst.getOperand(1)); diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp index 31205fda3e9d2..23cbbf0954cd7 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp @@ -365,6 +365,10 @@ static unsigned getInvertedBranchOp(unsigned BrOp) { return RISCV::QC_E_BGEUI; case RISCV::PseudoLongQC_E_BGEUI: return RISCV::QC_E_BLTUI; + case RISCV::PseudoLongCV_BEQIMM: + return RISCV::CV_BNEIMM; + case RISCV::PseudoLongCV_BNEIMM: + return RISCV::CV_BEQIMM; } } @@ -602,6 +606,8 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, case RISCV::PseudoLongQC_BGEI: case RISCV::PseudoLongQC_BLTUI: case RISCV::PseudoLongQC_BGEUI: + case RISCV::PseudoLongCV_BEQIMM: + case RISCV::PseudoLongCV_BNEIMM: expandQCLongCondBrImm(MI, CB, Fixups, STI, 4); MCNumEmitted += 2; return; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td index af1919f21362b..f3f3d408c43e7 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td @@ -594,6 +594,13 @@ let Predicates = [HasVendorXCVbi, IsRV32] in { def CV_BNEIMM : CVInstImmBranch<0b111, (outs), (ins GPR:$rs1, simm5:$imm5, bare_simm13_lsb0:$imm12), "cv.bneimm", "$rs1, $imm5, $imm12">, Sched<[]>; + // Long-branch relaxation pseudos for cv.beqimm / cv.bneimm. These are emitted + // by the RISCVAsmBackend when the target of a CV_BEQIMM / CV_BNEIMM is + // further than +/-4 KiB away. Each expands to an inverted short cv.b{ne,eq}imm + // (4 bytes) plus an unconditional JAL trampoline (4 bytes). Reuses the + // LongBcciPseudo class from RISCVInstrInfo.td (size = 8 bytes, simm5 imm). + def PseudoLongCV_BEQIMM : LongBcciPseudo; + def PseudoLongCV_BNEIMM : LongBcciPseudo; } let Predicates = [HasVendorXCVmem, IsRV32] in { diff --git a/llvm/test/MC/RISCV/corev/XCVbi-long-branch.s b/llvm/test/MC/RISCV/corev/XCVbi-long-branch.s new file mode 100644 index 0000000000000..b831841fe7c9f --- /dev/null +++ b/llvm/test/MC/RISCV/corev/XCVbi-long-branch.s @@ -0,0 +1,37 @@ +# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+xcvbi %s \ +# RUN: | llvm-objdump -dr -M no-aliases - \ +# RUN: | FileCheck %s + +# cv.beqimm / cv.bneimm encode a 13-bit signed PC-relative offset +# (+/-4094 bytes). A branch whose target is out of that range must be +# relaxed by the assembler into an inverted short branch over a JAL +# trampoline. An in-range branch stays a single instruction. + +.text + +# CHECK-LABEL: : +# CHECK: cv.bneimm a0, 0x3, 0x{{[0-9a-f]+}} +# CHECK-NEXT: jal zero, 0x{{[0-9a-f]+}} +far_beqimm: + cv.beqimm a0, 3, target1 + .space 8192 +target1: + ret + +# CHECK-LABEL: : +# CHECK: cv.beqimm a1, -0x5, 0x{{[0-9a-f]+}} +# CHECK-NEXT: jal zero, 0x{{[0-9a-f]+}} +far_bneimm: + cv.bneimm a1, -5, target2 + .space 8192 +target2: + ret + +# An in-range branch is not relaxed: a single cv.beqimm to the target. +# CHECK-LABEL: : +# CHECK: cv.beqimm a0, 0x1, 0x{{[0-9a-f]+}} +# CHECK-NOT: jal zero +near_beqimm: + cv.beqimm a0, 1, target3 +target3: + ret From 61919a58a9574e3dd0094cdd7fbafdd9f04919e5 Mon Sep 17 00:00:00 2001 From: lonely eagle <2020382038@qq.com> Date: Thu, 25 Jun 2026 00:10:39 +0800 Subject: [PATCH 361/511] [mlir][llvm] Add nontemporal field to llvm.intr.masked.store (#205566) Add `nontemporal` field to `llvm.intr.masked.store`. Since `nontemporal` is a missing field for `llvm.intr.masked.store`, you can refer to https://github.com/llvm/llvm-project/blob/e68e8d35c91b4fd3ba0ae3ef12d79b41d92580b2/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll#L28. --------- Co-authored-by: Tobias Gysi --- .../include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 13 ++++++++----- mlir/test/Target/LLVMIR/Import/intrinsic.ll | 3 +++ mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir | 3 +++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 92f0193cdadad..73629b95a06e5 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -1124,22 +1124,25 @@ def LLVM_MaskedLoadOp : LLVM_OneResultIntrOp<"masked.load"> { /// Create a call to Masked Store intrinsic. def LLVM_MaskedStoreOp : LLVM_ZeroResultIntrOp<"masked.store"> { let arguments = (ins LLVM_AnyVector:$value, LLVM_AnyPointer:$data, - LLVM_VectorOf:$mask, I32Attr:$alignment); + LLVM_VectorOf:$mask, I32Attr:$alignment, + UnitAttr:$nontemporal); let builders = [LLVM_VoidResultTypeOpBuilder, LLVM_ZeroResultOpBuilder]; let assemblyFormat = "$value `,` $data `,` $mask attr-dict `:` " "type($value) `,` type($mask) `into` qualified(type($data))"; string llvmBuilder = [{ - builder.CreateMaskedStore( + auto *inst = builder.CreateMaskedStore( $value, $data, llvm::Align($alignment), $mask); - }]; + }] #setNonTemporalMetadataCode; string mlirBuilder = [{ auto *intrinInst = dyn_cast(inst); llvm::Align alignment = intrinInst->getParamAlign(1).valueOrOne(); + bool nontemporal = intrinInst->hasMetadata(llvm::LLVMContext::MD_nontemporal); $_op = LLVM::MaskedStoreOp::create($_builder, $_location, - $value, $data, $mask, $_builder.getI32IntegerAttr(alignment.value())); + $value, $data, $mask, $_builder.getI32IntegerAttr(alignment.value()), + nontemporal ? $_builder.getUnitAttr() : nullptr); }]; - list llvmArgIndices = [0, 1, 2, -1]; + list llvmArgIndices = [0, 1, 2, -1, -1]; } /// Create a call to Masked Gather intrinsic. diff --git a/mlir/test/Target/LLVMIR/Import/intrinsic.ll b/mlir/test/Target/LLVMIR/Import/intrinsic.ll index 959a04fff6dca..d3427b4ab9771 100644 --- a/mlir/test/Target/LLVMIR/Import/intrinsic.ll +++ b/mlir/test/Target/LLVMIR/Import/intrinsic.ll @@ -517,6 +517,9 @@ define void @masked_load_store_intrinsics(ptr %vec, <7 x i1> %mask) { ; CHECK: llvm.intr.masked.store %[[VAL2]], %[[VEC]], %[[MASK]] {alignment = 8 : i32} ; CHECK-SAME: vector<7xf32>, vector<7xi1> into !llvm.ptr call void @llvm.masked.store.v7f32.p0(<7 x float> %2, ptr %vec, i32 8, <7 x i1> %mask) + ; CHECK: llvm.intr.masked.store %[[VAL2]], %[[VEC]], %[[MASK]] {alignment = 8 : i32, nontemporal} + ; CHECK-SAME: vector<7xf32>, vector<7xi1> into !llvm.ptr + call void @llvm.masked.store.v7f32.p0(<7 x float> %2, ptr %vec, i32 8, <7 x i1> %mask), !nontemporal !{i32 1} ret void } diff --git a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir index 293a56a82b23c..ea393dd445eda 100644 --- a/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir +++ b/mlir/test/Target/LLVMIR/llvmir-intrinsics.mlir @@ -566,6 +566,9 @@ llvm.func @masked_load_store_intrinsics(%A: !llvm.ptr, %mask: vector<7xi1>) { // CHECK: call void @llvm.masked.store.v7f32.p0(<7 x float> %{{.*}}, ptr align 1 %0, <7 x i1> %{{.*}}) llvm.intr.masked.store %b, %A, %mask { alignment = 1: i32} : vector<7xf32>, vector<7xi1> into !llvm.ptr + // CHECK: call void @llvm.masked.store.v7f32.p0(<7 x float> %{{.*}}, ptr align 1 %0, <7 x i1> %{{.*}}), !nontemporal !{{.*}} + llvm.intr.masked.store %b, %A, %mask { alignment = 1: i32, nontemporal} : + vector<7xf32>, vector<7xi1> into !llvm.ptr llvm.return } From 8c7c53874759364c2c2fe16883caea9c3303105b Mon Sep 17 00:00:00 2001 From: Charles Zablit Date: Wed, 24 Jun 2026 18:14:10 +0200 Subject: [PATCH 362/511] [lldb-dap][test] Re-enable test_by_name_waitFor on Windows (#205570) `test_by_name_waitFor` passes with `LLDB_USE_LLDB_SERVER=1`. `test_by_partial_name_waitFor` hangs on exit. Skip if for now. rdar://180515488 --- lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py index 60a8758591f85..94e7fd63ccad8 100644 --- a/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py +++ b/lldb/test/API/tools/lldb-dap/attach/TestDAP_attach.py @@ -73,7 +73,7 @@ def test_by_name(self): self.attach(program=program) self.continue_and_verify_pid() - @expectedFailureWindows + @expectedFailureWindowsAndNoLLDBServer def test_by_name_waitFor(self): """ Tests waiting for, and attaching to a process by process name that @@ -98,6 +98,7 @@ def test_by_name_waitFor(self): self.spawn_thread.join(timeout=10) @expectedFailureWindows + @skipIfWindowsAndLLDBServer def test_by_partial_name_waitFor(self): """ Tests waiting for and attaching to a process by partial process name From 64f4c1abffa79cc4bf0e5156aa4d34a192c0cb73 Mon Sep 17 00:00:00 2001 From: Akash Banerjee Date: Wed, 24 Jun 2026 17:18:54 +0100 Subject: [PATCH 363/511] [Flang][OpenMP][Sema] Add OpenMP warning when mapping local descriptors to device on enter without a corresponding exit (#205580) Reland https://github.com/llvm/llvm-project/pull/201060. Issues reported in https://github.com/llvm/llvm-project/pull/203324 have now been resolved. Authored-by: agozillon --- flang/include/flang/Semantics/openmp-utils.h | 6 ++ flang/lib/Semantics/check-omp-structure.cpp | 54 +++++++++++ flang/lib/Semantics/check-omp-structure.h | 8 ++ flang/lib/Semantics/openmp-utils.cpp | 23 +++++ ...arget-enter-data-temp-descriptor-omp61.f90 | 97 +++++++++++++++++++ .../target-enter-data-temp-descriptor.f90 | 93 ++++++++++++++++++ 6 files changed, 281 insertions(+) create mode 100644 flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor-omp61.f90 create mode 100644 flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor.f90 diff --git a/flang/include/flang/Semantics/openmp-utils.h b/flang/include/flang/Semantics/openmp-utils.h index d2bfeca68bf84..75bab5f6cb8ec 100644 --- a/flang/include/flang/Semantics/openmp-utils.h +++ b/flang/include/flang/Semantics/openmp-utils.h @@ -119,6 +119,12 @@ const Symbol *GetHostSymbol(const Symbol &sym); bool IsMapEnteringType(parser::OmpMapType::Value type); bool IsMapExitingType(parser::OmpMapType::Value type); +// Returns true if the symbol has a temporary stack-allocated descriptor. +// This includes assumed-shape and assumed-rank dummy arguments that are +// not allocatable or pointer. These descriptors are created on the caller's +// stack and become invalid after the function returns. +bool HasTemporaryStackDescriptor(const Symbol &symbol); + MaybeExpr GetEvaluateExpr(const parser::Expr &parserExpr); template MaybeExpr GetEvaluateExpr(const T &inp) { return GetEvaluateExpr(parser::UnwrapRef(inp)); diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index 81600fa1ddbb9..d4572ec685e61 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -142,7 +142,27 @@ void OmpStructureChecker::Enter(const parser::SubroutineStmt &x) { scopeStack_.push_back(sym->scope()); } +void OmpStructureChecker::CheckTempDescriptorMappings() { + unsigned version{context_.langOptions().OpenMPVersion}; + for (const auto &[symbol, source] : tempDescriptorEnterMaps_) { + if (tempDescriptorExitMaps_.find(symbol) == tempDescriptorExitMaps_.end()) { + if (version >= 61) { + context_.Warn(common::UsageWarning::OpenMPUsage, source, + "The map of '%s' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data"_warn_en_US, + symbol->name()); + } else { + context_.Warn(common::UsageWarning::OpenMPUsage, source, + "The map of '%s' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference"_warn_en_US, + symbol->name()); + } + } + } + tempDescriptorEnterMaps_.clear(); + tempDescriptorExitMaps_.clear(); +} + void OmpStructureChecker::Enter(const parser::EndSubroutineStmt &x) { + CheckTempDescriptorMappings(); scopeStack_.pop_back(); } @@ -152,6 +172,7 @@ void OmpStructureChecker::Enter(const parser::FunctionStmt &x) { } void OmpStructureChecker::Enter(const parser::EndFunctionStmt &x) { + CheckTempDescriptorMappings(); scopeStack_.pop_back(); } @@ -161,6 +182,7 @@ void OmpStructureChecker::Enter(const parser::MpSubprogramStmt &x) { } void OmpStructureChecker::Enter(const parser::EndMpSubprogramStmt &x) { + CheckTempDescriptorMappings(); scopeStack_.pop_back(); } @@ -4646,6 +4668,38 @@ void OmpStructureChecker::Enter(const parser::OmpClause::Map &x) { } } } + + // If we are an enter or exit map, iterate over the maps and add them to + // containers that track if the symbol has been referenced in both an + // enter/exit map in the current scope, if it falls into the category of + // having a temporary stack descriptor. If we have reference modifiers, we + // ignore the warning and trust that the user knows what they are doing + // already, as they are aware the type comes with a descriptor and pointer + // combination. + // + // We will utilise this information to emit a warning later if the neccesary + // conditions are met, where we have an enter map without a corresponding exit + // in the current scope. + bool hasRefModifier{ + OmpGetUniqueModifier(modifiers) != nullptr}; + if (!hasRefModifier && + (llvm::is_contained(leafs, Directive::OMPD_target_enter_data) || + llvm::is_contained(leafs, Directive::OMPD_target_exit_data))) { + for (const parser::OmpObject &object : objects.v) { + if (const Symbol *sym{GetObjectSymbol(object, /*ultimate=*/true)}) { + if (HasTemporaryStackDescriptor(*sym)) { + auto maybeSource{GetObjectSource(object)}; + parser::CharBlock source{ + maybeSource.value_or(GetContext().clauseSource)}; + if (llvm::is_contained(leafs, Directive::OMPD_target_enter_data)) { + tempDescriptorEnterMaps_.emplace(sym, source); + } else { + tempDescriptorExitMaps_.insert(sym); + } + } + } + } + } } void OmpStructureChecker::Enter(const parser::OmpClause::Schedule &x) { diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h index 4499e2a213384..256383d890cab 100644 --- a/flang/lib/Semantics/check-omp-structure.h +++ b/flang/lib/Semantics/check-omp-structure.h @@ -368,6 +368,7 @@ class OmpStructureChecker : public OmpStructureCheckerBase { const parser::OmpClause &initClause); void CheckAllowedRequiresClause(llvm::omp::Clause clause); void AddEndDirectiveClauses(const parser::OmpClauseList &clauses); + void CheckTempDescriptorMappings(); void EnterDirectiveNest(const int index) { directiveNest_[index]++; } void ExitDirectiveNest(const int index) { directiveNest_[index]--; } @@ -394,6 +395,13 @@ class OmpStructureChecker : public OmpStructureCheckerBase { // IF clauses that referenced them. If there was no modifier, the entire // directive is assumed to be listed. std::map ifLeafs_; + + // Track symbols with temporary stack descriptors mapped in TARGET ENTER DATA + // and symbols mapped in TARGET EXIT DATA within the current function scope. + // Used to warn about potential issues with mapping temporary descriptors. + std::multimap tempDescriptorEnterMaps_; + std::set tempDescriptorExitMaps_; + // Stack of nested DO loops and OpenMP constructs. // This is used to verify DO loop nest for DOACROSS, and branches into // and out of OpenMP constructs. diff --git a/flang/lib/Semantics/openmp-utils.cpp b/flang/lib/Semantics/openmp-utils.cpp index 0556920877f45..cfdf2f8eb56c9 100644 --- a/flang/lib/Semantics/openmp-utils.cpp +++ b/flang/lib/Semantics/openmp-utils.cpp @@ -343,6 +343,29 @@ bool IsMapExitingType(parser::OmpMapType::Value type) { } } +// This function aims to return true when a symbol is going to result +// in a temporary stack descriptor being allocated for it in the +// lowering that may pose an issue for data mapping if left on +// device accidentally. +bool HasTemporaryStackDescriptor(const Symbol &symbol) { + const Symbol &ultimate(symbol.GetUltimate()); + bool isDummy = IsDummy(ultimate); + + if (IsAllocatableOrPointer(ultimate)) { + return !isDummy; + } + + if (!isDummy) { + return false; + } + + if (const auto *obj = ultimate.detailsIf()) { + return obj->IsAssumedShape() || obj->IsAssumedRank(); + } + + return false; +} + static MaybeExpr GetEvaluateExprFromTyped(const parser::TypedExpr &typedExpr) { // ForwardOwningPointer typedExpr // `- GenericExprWrapper ^.get() diff --git a/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor-omp61.f90 b/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor-omp61.f90 new file mode 100644 index 0000000000000..e766b1b44d0fa --- /dev/null +++ b/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor-omp61.f90 @@ -0,0 +1,97 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -fopenmp-version=61 -Werror -Wno-experimental-option + +! Check for OpenMP 6.1+ specific warning that includes ref_ptee suggestion +! when mapping variables with temporary stack descriptors on TARGET ENTER DATA +! without a corresponding TARGET EXIT DATA. + +subroutine test_assumed_shape_warning(arr) + integer, intent(inout), dimension(:,:) :: arr(:) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data [-Wopenmp-usage] + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_assumed_rank_warning(arr) + integer, intent(inout) :: arr(..) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data [-Wopenmp-usage] + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_local_allocatable_warning() + integer, allocatable :: local_arr(:) + allocate(local_arr(100)) + !WARNING: The map of 'local_arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data [-Wopenmp-usage] + !$omp target enter data map(to: local_arr) + deallocate(local_arr) +end subroutine + +subroutine test_local_pointer_warning() + integer, pointer :: local_ptr(:) + allocate(local_ptr(100)) + !WARNING: The map of 'local_ptr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data [-Wopenmp-usage] + !$omp target enter data map(to: local_ptr) + deallocate(local_ptr) +end subroutine + +module test_module +contains + subroutine test_module_procedure_warning(arr) + integer, intent(inout) :: arr(:) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference. To avoid mapping the descriptor utilize OpenMP's ref_ptee reference modifier to map just the data [-Wopenmp-usage] + !$omp target enter data map(to: arr) + end subroutine + + subroutine test_module_procedure_with_exit(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) + !$omp target exit data map(from: arr) + end subroutine +end module + +! Test cases where warnings should not be emitted, the test_errors.py script +! should fail if we emit errors for these that are not checked, so no need to +! verify with an explicit check. + +subroutine test_ref_ptee_no_warning(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(ref_ptee, to: arr) +end subroutine + +subroutine test_ref_ptr_no_warning(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(ref_ptr, to: arr) +end subroutine + +subroutine test_ref_ptr_ptee_no_warning(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(ref_ptr_ptee, to: arr) +end subroutine + +subroutine test_with_exit_data(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) + !$omp target exit data map(from: arr) +end subroutine + +subroutine test_explicit_shape_no_warning(arr, n) + integer, intent(in) :: n + integer, intent(inout) :: arr(n) + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_local_allocatable_with_exit() + integer, allocatable :: local_arr(:) + allocate(local_arr(100)) + !$omp target enter data map(to: local_arr) + !$omp target exit data map(from: local_arr) + deallocate(local_arr) +end subroutine + +subroutine test_allocatable_dummy_no_warning(arr) + integer, allocatable, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_pointer_dummy_no_warning(ptr) + integer, pointer, intent(inout) :: ptr(:) + !$omp target enter data map(to: ptr) +end subroutine diff --git a/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor.f90 b/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor.f90 new file mode 100644 index 0000000000000..bd1eb98ebec60 --- /dev/null +++ b/flang/test/Semantics/OpenMP/target-enter-data-temp-descriptor.f90 @@ -0,0 +1,93 @@ +! RUN: %python %S/../test_errors.py %s %flang -fopenmp -Werror -fopenmp-version=52 -Wno-experimental-option + +! Check for warning when mapping variables with temporary stack descriptors +! (assumed-shape, assumed-rank, local allocatables, local pointers) on +! TARGET ENTER DATA without a corresponding TARGET EXIT DATA in the same scope. + +subroutine test_assumed_shape_warning(arr) + integer, intent(inout) :: arr(:) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_assumed_shape_2d_warning(arr) + integer, intent(inout) :: arr(:,:) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_assumed_rank_warning(arr) + integer, intent(inout) :: arr(..) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_local_pointer_warning() + integer, pointer :: local_ptr(:) + allocate(local_ptr(100)) + !WARNING: The map of 'local_ptr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: local_ptr) + deallocate(local_ptr) +end subroutine + +subroutine test_local_allocatable_warning() + integer, allocatable :: local_arr(:) + allocate(local_arr(100)) + !WARNING: The map of 'local_arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: local_arr) + deallocate(local_arr) +end subroutine + +module test_module +contains + subroutine test_module_procedure_warning(arr) + integer, intent(inout) :: arr(:) + !WARNING: The map of 'arr' may include a descriptor that is created locally. Mapping this descriptor without an appropriate TARGET EXIT DATA in the same scope may result in the device retaining an invalid descriptor reference [-Wopenmp-usage] + !$omp target enter data map(to: arr) + end subroutine + + subroutine test_module_procedure_with_exit(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) + !$omp target exit data map(from: arr) + end subroutine +end module + +! Test cases where warnings should not be emitted, the test_errors.py script +! should fail if we emit errors for these that are not checked, so no need to +! verify with an explicit check. + +subroutine test_pointer_dummy_no_warning(ptr) + integer, pointer, intent(inout) :: ptr(:) + !$omp target enter data map(to: ptr) +end subroutine + +subroutine test_allocatable_dummy_no_warning(arr) + integer, allocatable, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_with_exit_data(arr) + integer, intent(inout) :: arr(:) + !$omp target enter data map(to: arr) + !$omp target exit data map(from: arr) +end subroutine + +subroutine test_explicit_shape_no_warning(arr, n) + integer, intent(in) :: n + integer, intent(inout) :: arr(n) + !$omp target enter data map(to: arr) +end subroutine + +subroutine test_assumed_size_no_warning(arr) + integer, intent(inout) :: arr(*) + !$omp target enter data map(to: arr(1:10)) +end subroutine + +subroutine test_local_allocatable_with_exit() + integer, allocatable :: local_arr(:) + allocate(local_arr(100)) + !$omp target enter data map(to: local_arr) + !$omp target exit data map(from: local_arr) + deallocate(local_arr) +end subroutine From f26fd6a436208f2a9bc845166a495066cf9fab47 Mon Sep 17 00:00:00 2001 From: Mel Chen Date: Thu, 25 Jun 2026 00:28:49 +0800 Subject: [PATCH 364/511] [LV] Pre-commit test for live-out derivation requiring VPExpandSCEVRecipe. nfc (#204350) --- .../LoopVectorize/iv_outside_user.ll | 106 ++++++++++++++++++ 1 file changed, 106 insertions(+) diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index e1b1f34d69fcf..efe9ed64b203f 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1416,3 +1416,109 @@ loop: exit: ret i32 %iv.trunc } + +define i32 @added_step(i32 %n, i32 %step_base, ptr %p) { +; VEC-LABEL: define i32 @added_step( +; VEC-SAME: i32 [[N:%.*]], i32 [[STEP_BASE:%.*]], ptr [[P:%.*]]) { +; VEC-NEXT: [[ENTRY:.*]]: +; VEC-NEXT: [[STEP:%.*]] = add i32 [[STEP_BASE]], 1 +; VEC-NEXT: [[UMAX1:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX1]], 2 +; VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]] +; VEC: [[VECTOR_SCEVCHECK]]: +; VEC-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; VEC-NEXT: [[TMP0:%.*]] = add i32 [[UMAX]], -1 +; VEC-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP0]], 0 +; VEC-NEXT: br i1 [[TMP1]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]] +; VEC: [[VECTOR_PH]]: +; VEC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX1]], 2 +; VEC-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX1]], [[N_MOD_VF]] +; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0 +; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer +; VEC-NEXT: br label %[[VECTOR_BODY:.*]] +; VEC: [[VECTOR_BODY]]: +; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VEC-NEXT: [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; VEC-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] +; VEC-NEXT: store <2 x i32> [[TMP2]], ptr [[TMP3]], align 4 +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) +; VEC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; VEC-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; VEC: [[MIDDLE_BLOCK]]: +; VEC-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[TMP2]], i64 1 +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] +; VEC-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; VEC: [[SCALAR_PH]]: +; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ] +; VEC-NEXT: br label %[[LOOP:.*]] +; VEC: [[LOOP]]: +; VEC-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VEC-NEXT: [[DERIVED:%.*]] = mul i32 [[IV]], [[STEP]] +; VEC-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; VEC-NEXT: store i32 [[DERIVED]], ptr [[GEP]], align 4 +; VEC-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; VEC-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; VEC-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}} +; VEC: [[EXIT]]: +; VEC-NEXT: [[DERIVED_LCSSA:%.*]] = phi i32 [ [[DERIVED]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: ret i32 [[DERIVED_LCSSA]] +; +; INTERLEAVE-LABEL: define i32 @added_step( +; INTERLEAVE-SAME: i32 [[N:%.*]], i32 [[STEP_BASE:%.*]], ptr [[P:%.*]]) { +; INTERLEAVE-NEXT: [[ENTRY:.*]]: +; INTERLEAVE-NEXT: [[STEP:%.*]] = add i32 [[STEP_BASE]], 1 +; INTERLEAVE-NEXT: [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[N]], i32 1) +; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[UMAX]], 2 +; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; INTERLEAVE: [[VECTOR_PH]]: +; INTERLEAVE-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[UMAX]], 2 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = sub i32 [[UMAX]], [[N_MOD_VF]] +; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]] +; INTERLEAVE: [[VECTOR_BODY]]: +; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 1 +; INTERLEAVE-NEXT: [[TMP1:%.*]] = mul i32 [[INDEX]], [[STEP]] +; INTERLEAVE-NEXT: [[TMP2:%.*]] = mul i32 [[TMP0]], [[STEP]] +; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] +; INTERLEAVE-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[P]], i32 [[TMP0]] +; INTERLEAVE-NEXT: store i32 [[TMP1]], ptr [[TMP3]], align 4 +; INTERLEAVE-NEXT: store i32 [[TMP2]], ptr [[TMP4]], align 4 +; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; INTERLEAVE-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[MIDDLE_BLOCK]]: +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX]], [[N_VEC]] +; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; INTERLEAVE: [[SCALAR_PH]]: +; INTERLEAVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; INTERLEAVE-NEXT: br label %[[LOOP:.*]] +; INTERLEAVE: [[LOOP]]: +; INTERLEAVE-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; INTERLEAVE-NEXT: [[DERIVED:%.*]] = mul i32 [[IV]], [[STEP]] +; INTERLEAVE-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; INTERLEAVE-NEXT: store i32 [[DERIVED]], ptr [[GEP]], align 4 +; INTERLEAVE-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; INTERLEAVE-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; INTERLEAVE-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT]], {{!llvm.loop ![0-9]+}} +; INTERLEAVE: [[EXIT]]: +; INTERLEAVE-NEXT: [[DERIVED_LCSSA:%.*]] = phi i32 [ [[DERIVED]], %[[LOOP]] ], [ [[TMP2]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: ret i32 [[DERIVED_LCSSA]] +; +entry: + %step = add i32 %step_base, 1 + br label %loop + +loop: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] + %derived = mul i32 %iv, %step + %gep = getelementptr i32, ptr %p, i32 %iv + store i32 %derived, ptr %gep, align 4 + %iv.next = add i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i32 %derived +} From 4b46b19051fe70ed8bf7ce9c9011f9976dd9e63d Mon Sep 17 00:00:00 2001 From: Kseniya Tikhomirova Date: Wed, 24 Jun 2026 18:39:13 +0200 Subject: [PATCH 365/511] [libsycl] add heuristics to prefer devices with compatible images (#203530) Signed-off-by: Tikhomirova, Kseniya --- libsycl/src/detail/global_objects.cpp | 7 +- libsycl/src/detail/global_objects.hpp | 5 +- .../src/detail/offload/offload_topology.cpp | 2 - libsycl/src/detail/platform_impl.cpp | 15 +- libsycl/src/detail/platform_impl.hpp | 11 ++ libsycl/src/detail/program_manager.cpp | 13 ++ libsycl/src/detail/program_manager.hpp | 4 + libsycl/src/device_selector.cpp | 10 +- libsycl/unittests/CMakeLists.txt | 1 + libsycl/unittests/common/unittests_helper.hpp | 48 +++++ .../unittests/device_selector/CMakeLists.txt | 3 + .../device_selector/get_device_preference.cpp | 181 ++++++++++++++++++ libsycl/unittests/mock/helpers.hpp | 4 + 13 files changed, 293 insertions(+), 11 deletions(-) create mode 100644 libsycl/unittests/common/unittests_helper.hpp create mode 100644 libsycl/unittests/device_selector/CMakeLists.txt create mode 100644 libsycl/unittests/device_selector/get_device_preference.cpp diff --git a/libsycl/src/detail/global_objects.cpp b/libsycl/src/detail/global_objects.cpp index fd94d772337d6..095912ffcd171 100644 --- a/libsycl/src/detail/global_objects.cpp +++ b/libsycl/src/detail/global_objects.cpp @@ -42,9 +42,10 @@ void registerStaticVarShutdownHandler() { static StaticVarShutdownHandler handler{}; } -std::vector &getOffloadTopologies() { - static std::vector Topologies( - OL_PLATFORM_BACKEND_LAST); +std::array & +getOffloadTopologies() { + static std::array + Topologies{}; return Topologies; } diff --git a/libsycl/src/detail/global_objects.hpp b/libsycl/src/detail/global_objects.hpp index 4d3106aacd560..d10b352941a8f 100644 --- a/libsycl/src/detail/global_objects.hpp +++ b/libsycl/src/detail/global_objects.hpp @@ -30,8 +30,9 @@ class PlatformImpl; /// /// This vector is populated only once at the first call of get_platforms(). /// -/// \returns std::vector of all offload topologies. -std::vector &getOffloadTopologies(); +/// \returns std::array of all offload topologies. +std::array & +getOffloadTopologies(); /// Returns implementation class objects for all platforms discovered from /// liboffload. diff --git a/libsycl/src/detail/offload/offload_topology.cpp b/libsycl/src/detail/offload/offload_topology.cpp index ab4c57ecf37eb..12ea9c78f1085 100644 --- a/libsycl/src/detail/offload/offload_topology.cpp +++ b/libsycl/src/detail/offload/offload_topology.cpp @@ -56,8 +56,6 @@ void OffloadTopology::registerNewPlatformsAndDevices( } void discoverOffloadDevices() { - callAndThrow(olInit, nullptr); - // liboffload returns devices sorted by backend + platform. We rely on this // behavior during device enumeration. using PerBackendDataType = diff --git a/libsycl/src/detail/platform_impl.cpp b/libsycl/src/detail/platform_impl.cpp index 932f282619b0e..9e36b287fc59a 100644 --- a/libsycl/src/detail/platform_impl.cpp +++ b/libsycl/src/detail/platform_impl.cpp @@ -21,6 +21,8 @@ _LIBSYCL_BEGIN_NAMESPACE_SYCL namespace detail { +bool PlatformImpl::rediscoverIfEmpty = false; + PlatformImpl &PlatformImpl::getPlatformImpl(ol_platform_handle_t Platform) { auto &PlatformCache = getPlatformCache(); for (auto &PlatImpl : PlatformCache) { @@ -36,7 +38,7 @@ PlatformImpl &PlatformImpl::getPlatformImpl(ol_platform_handle_t Platform) { } const std::vector &PlatformImpl::getPlatforms() { - [[maybe_unused]] static auto InitPlatformsOnce = []() { + static auto InitPlatforms = []() { discoverOffloadDevices(); registerStaticVarShutdownHandler(); @@ -49,9 +51,18 @@ const std::vector &PlatformImpl::getPlatforms() { OffloadPlatform, PlatformIndex++, PrivateTag{})); } } + }; + + [[maybe_unused]] static auto InitPlatformsOnce = []() { + callAndThrow(olInit, nullptr); + InitPlatforms(); return true; }(); - return getPlatformCache(); + auto &PlatformCache = getPlatformCache(); + if (rediscoverIfEmpty && PlatformCache.empty()) + InitPlatforms(); + + return PlatformCache; } PlatformImpl::PlatformImpl(ol_platform_handle_t Platform, size_t PlatformIndex, diff --git a/libsycl/src/detail/platform_impl.hpp b/libsycl/src/detail/platform_impl.hpp index 6e6f52b0bbaef..83ad3453ab765 100644 --- a/libsycl/src/detail/platform_impl.hpp +++ b/libsycl/src/detail/platform_impl.hpp @@ -32,6 +32,10 @@ _LIBSYCL_BEGIN_NAMESPACE_SYCL +namespace unittests { +struct UnittestsHelper; +} + namespace detail { class DeviceImpl; @@ -147,6 +151,13 @@ class PlatformImpl { std::vector MRootDevices; std::shared_ptr MDefaultContext; + + // Single initialization of platforms and devices doesn't allow to implement + // unittests for this behavior. This flag and friend class allows to force + // device & platform rediscovery at the next getPlatforms() call if the cache + // is empty. + static bool rediscoverIfEmpty; + friend struct ::sycl::unittests::UnittestsHelper; }; } // namespace detail diff --git a/libsycl/src/detail/program_manager.cpp b/libsycl/src/detail/program_manager.cpp index 79134ffa7a0b0..459860384b6fb 100644 --- a/libsycl/src/detail/program_manager.cpp +++ b/libsycl/src/detail/program_manager.cpp @@ -152,6 +152,19 @@ ProgramAndKernelManager::getOrCreateKernel(DeviceKernelInfo &KernelInfo, return Kernel; } +bool ProgramAndKernelManager::hasCompatibleImage(const DeviceImpl &Device) { + std::lock_guard Guard(MDataCollectionMutex); + + for (const auto &BinaryImagesPair : MDeviceImageManagers) { + for (const auto &Image : BinaryImagesPair.second) { + if (isImageCompatible(*Image, Device)) + return true; + } + } + + return false; +} + } // namespace detail _LIBSYCL_END_NAMESPACE_SYCL diff --git a/libsycl/src/detail/program_manager.hpp b/libsycl/src/detail/program_manager.hpp index 9afd6b49bbfd6..cf978796a0cfd 100644 --- a/libsycl/src/detail/program_manager.hpp +++ b/libsycl/src/detail/program_manager.hpp @@ -92,6 +92,10 @@ class ProgramAndKernelManager { /// Release device image managers and corresponding resources. void releaseResources(); + /// \return true if and only if at least one registered device image is + /// compatible with the given device. + bool hasCompatibleImage(const DeviceImpl &Device); + protected: ProgramAndKernelManager() = default; ~ProgramAndKernelManager() = default; diff --git a/libsycl/src/device_selector.cpp b/libsycl/src/device_selector.cpp index 86e5f5657c6b5..f53f98d1f5bb2 100644 --- a/libsycl/src/device_selector.cpp +++ b/libsycl/src/device_selector.cpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -21,14 +22,19 @@ static constexpr int CPUDeviceDefaultScore = 300; static constexpr int AccDeviceDefaultScore = 75; static constexpr int RejectDeviceScore = -1; +static constexpr int CompatibleImageBonus = 1000; +static constexpr int LevelZeroBonus = 50; + static int getDevicePreference(const device &Device) { int Score = 0; const auto &DeviceImpl = detail::getSyclObjImpl(Device); - // TODO: increase score for devices with compatible program images. + auto &ProgramManager = detail::ProgramAndKernelManager::getInstance(); + if (ProgramManager.hasCompatibleImage(*DeviceImpl)) + Score += CompatibleImageBonus; if (DeviceImpl->getBackend() == backend::level_zero) - Score += 50; + Score += LevelZeroBonus; return Score; } diff --git a/libsycl/unittests/CMakeLists.txt b/libsycl/unittests/CMakeLists.txt index 5fa7b6ada0cb4..56bb28161737c 100644 --- a/libsycl/unittests/CMakeLists.txt +++ b/libsycl/unittests/CMakeLists.txt @@ -5,6 +5,7 @@ add_custom_target(LibsyclUnitTests) add_custom_target(check-sycl-unittests) add_subdirectory(mock) +add_subdirectory(device_selector) add_subdirectory(platform) add_subdirectory(program_manager) add_subdirectory(queue) diff --git a/libsycl/unittests/common/unittests_helper.hpp b/libsycl/unittests/common/unittests_helper.hpp new file mode 100644 index 0000000000000..a1d5c5d0ccd7f --- /dev/null +++ b/libsycl/unittests/common/unittests_helper.hpp @@ -0,0 +1,48 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Helper utilities for libsycl unit tests. +/// +//===----------------------------------------------------------------------===// + +#ifndef _LIBSYCL_UNITTESTS_COMMON_UNITTESTS_HELPER_HPP +#define _LIBSYCL_UNITTESTS_COMMON_UNITTESTS_HELPER_HPP + +#include +#include +#include + +_LIBSYCL_BEGIN_NAMESPACE_SYCL + +namespace unittests { + +// This helper is not included to LiboffloadMock to keep LiboffloadMock isolated +// from libsycl implementation and to not introduce extra operations for tests +// where devices enumeration logic is not important. LiboffloadMock provides +// default single gpu-device configuration that is enough for most of the tests. +// For tests where devices enumeration logic is important, UnittestsHelper +// allows to call global state reset and platforms initialization methods to be +// able to set expectations on devices enumeration calls in a proper way. +struct UnittestsHelper { + UnittestsHelper() { detail::PlatformImpl::rediscoverIfEmpty = true; } + + ~UnittestsHelper() { + if (!detail::getPlatformCache().empty()) { + detail::getPlatformCache().clear(); + detail::getOffloadTopologies() = {}; + } + } + + mock::MockWrapper Mock; +}; + +} // namespace unittests +_LIBSYCL_END_NAMESPACE_SYCL + +#endif // _LIBSYCL_UNITTESTS_COMMON_UNITTESTS_HELPER_HPP diff --git a/libsycl/unittests/device_selector/CMakeLists.txt b/libsycl/unittests/device_selector/CMakeLists.txt new file mode 100644 index 0000000000000..195ba15dca1e8 --- /dev/null +++ b/libsycl/unittests/device_selector/CMakeLists.txt @@ -0,0 +1,3 @@ +add_sycl_unittest(DeviceSelectorTests + get_device_preference.cpp +) diff --git a/libsycl/unittests/device_selector/get_device_preference.cpp b/libsycl/unittests/device_selector/get_device_preference.cpp new file mode 100644 index 0000000000000..b4b6ccfa7bcd6 --- /dev/null +++ b/libsycl/unittests/device_selector/get_device_preference.cpp @@ -0,0 +1,181 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +using namespace sycl; +using namespace ::testing; + +namespace { + +class ScopedBinaryRegistration { +public: + explicit ScopedBinaryRegistration(llvm::ArrayRef KernelNames) + : MBinary(sycl::unittest::createSYCLDeviceBinary(KernelNames)) { + sycl::detail::ProgramAndKernelManager::getInstance().registerFatBin( + MBinary.data(), MBinary.size()); + } + + ~ScopedBinaryRegistration() { + sycl::detail::ProgramAndKernelManager::getInstance().unregisterFatBin( + MBinary.data(), MBinary.size()); + } + +private: + llvm::SmallString<0> MBinary; +}; + +class DeviceSelectorScoreTest : public ::testing::Test { +protected: + void SetUp() override { + // In reality gpu and cpu devices relate to different platforms. These + // tests don't have to follow this rule since selectors work with types. + Platform = mock::createDummyHandle(); + Device1 = mock::createDummyHandleWithData( + reinterpret_cast(&Platform), sizeof(Platform)); + Device2 = mock::createDummyHandleWithData( + reinterpret_cast(&Platform), sizeof(Platform)); + + EXPECT_CALL(Helper.Mock.get(), olIterateDevices(_, _)) + .WillRepeatedly([this](ol_device_iterate_cb_t Callback, + void *UserData) -> ol_result_t { + std::ignore = Callback(Device1, UserData); + std::ignore = Callback(Device2, UserData); + return OL_SUCCESS; + }); + + EXPECT_CALL(Helper.Mock.get(), + olGetDeviceInfo(_, OL_DEVICE_INFO_PLATFORM, _, _)) + .WillRepeatedly([this](ol_device_handle_t Device, + ol_device_info_t /*PropName*/, size_t PropSize, + void *PropValue) -> ol_result_t { + *static_cast(PropValue) = Platform; + return OL_SUCCESS; + }); + } + + void TearDown() override { + mock::releaseDummyHandles(Platform, Device1, Device2); + } + + unittests::UnittestsHelper Helper; + ol_platform_handle_t Platform{}; + ol_device_handle_t Device1{}; + ol_device_handle_t Device2{}; +}; + +TEST_F(DeviceSelectorScoreTest, CPUAndGPU) { + EXPECT_CALL(Helper.Mock.get(), olGetDeviceInfo(_, OL_DEVICE_INFO_TYPE, _, _)) + .WillRepeatedly([this](ol_device_handle_t Device, + ol_device_info_t /*PropName*/, size_t PropSize, + void *PropValue) -> ol_result_t { + if (Device == Device1) + *static_cast(PropValue) = OL_DEVICE_TYPE_GPU; + else if (Device == Device2) + *static_cast(PropValue) = OL_DEVICE_TYPE_CPU; + else + return mock::getMockLiboffload().makeEmptyStrError( + OL_ERRC_INVALID_NULL_HANDLE); + + return OL_SUCCESS; + }); + + auto Devices = sycl::device::get_devices(); + ASSERT_EQ(Devices.size(), 2u); + + // Device order is aligned with device iteration order + ASSERT_TRUE(Devices[0].is_gpu()); + ASSERT_TRUE(Devices[1].is_cpu()); + auto &GPUDevice = Devices[0]; + auto &CPUDevice = Devices[1]; + + EXPECT_GT(sycl::default_selector_v(GPUDevice), + sycl::default_selector_v(CPUDevice)); + + EXPECT_GT(sycl::gpu_selector_v(GPUDevice), sycl::gpu_selector_v(CPUDevice)); + EXPECT_GT(sycl::gpu_selector_v(GPUDevice), 0); + EXPECT_LT(sycl::gpu_selector_v(CPUDevice), 0); + + EXPECT_GT(sycl::cpu_selector_v(CPUDevice), sycl::cpu_selector_v(GPUDevice)); + EXPECT_GT(sycl::cpu_selector_v(CPUDevice), 0); + EXPECT_LT(sycl::cpu_selector_v(GPUDevice), 0); + + EXPECT_LT(sycl::accelerator_selector_v(GPUDevice), 0); + EXPECT_LT(sycl::accelerator_selector_v(CPUDevice), 0); +} + +TEST_F(DeviceSelectorScoreTest, TwoGpusOneCompatibleImage) { + EXPECT_CALL(Helper.Mock.get(), olGetDeviceInfo(_, OL_DEVICE_INFO_TYPE, _, _)) + .WillRepeatedly([](ol_device_handle_t Device, + ol_device_info_t /*PropName*/, size_t PropSize, + void *PropValue) -> ol_result_t { + *static_cast(PropValue) = OL_DEVICE_TYPE_GPU; + return OL_SUCCESS; + }); + + EXPECT_CALL(Helper.Mock.get(), olIsValidBinary(_, _, _, _)) + .WillRepeatedly([this](ol_device_handle_t Device, + const void * /*ProgData*/, size_t /*ProgDataSize*/, + bool *Valid) -> ol_result_t { + *Valid = (Device == Device2); + return OL_SUCCESS; + }); + + std::array KernelNames = {"kernel"}; + ScopedBinaryRegistration Registration{KernelNames}; + + auto Devices = sycl::device::get_devices(); + ASSERT_EQ(Devices.size(), 2u); + + // Device order is aligned with device iteration order + ASSERT_EQ(sycl::detail::getSyclObjImpl(Devices[1])->getOLHandle(), Device2); + auto &GPUDevice = Devices[0]; + auto &GPUDeviceWithImage = Devices[1]; + + EXPECT_GT(sycl::default_selector_v(GPUDeviceWithImage), + sycl::default_selector_v(GPUDevice)); + + sycl::device DefaultDevice{sycl::default_selector_v}; + auto DeviceDefaultNative = + sycl::detail::getSyclObjImpl(DefaultDevice)->getOLHandle(); + EXPECT_EQ(DeviceDefaultNative, Device2); +} + +TEST(DeviceSelector, AspectSelector) { + unittests::UnittestsHelper Helper; + auto Devices = sycl::device::get_devices(); + ASSERT_FALSE(Devices.empty()); + + const sycl::device &Dev = Devices.front(); + + const std::vector EmptyAspects{}; + const std::vector RequireGpu{sycl::aspect::gpu}; + const std::vector DenyGpu{sycl::aspect::gpu}; + + auto FallbackSelector = sycl::aspect_selector(EmptyAspects, EmptyAspects); + EXPECT_EQ(FallbackSelector(Dev), sycl::default_selector_v(Dev)); + + auto RequireGpuSelector = sycl::aspect_selector(RequireGpu, EmptyAspects); + EXPECT_GT(RequireGpuSelector(Dev), 0); + + auto DenyGpuSelector = sycl::aspect_selector(EmptyAspects, DenyGpu); + EXPECT_LT(DenyGpuSelector(Dev), 0); +} + +} // namespace diff --git a/libsycl/unittests/mock/helpers.hpp b/libsycl/unittests/mock/helpers.hpp index 77a4b39ca8ced..5de2ac0098f13 100644 --- a/libsycl/unittests/mock/helpers.hpp +++ b/libsycl/unittests/mock/helpers.hpp @@ -65,6 +65,10 @@ template void releaseDummyHandle(T Handle) { delete DummyHandlePtr; } +template void releaseDummyHandles(HandleT... Handles) { + (releaseDummyHandle(Handles), ...); +} + class MockLiboffload { public: MockLiboffload() { initDefault(); } From 044206f73e0dbfc9178a0dcd6052025f5dc9ac05 Mon Sep 17 00:00:00 2001 From: "forking-google-bazel-bot[bot]" <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com> Date: Wed, 24 Jun 2026 18:41:51 +0200 Subject: [PATCH 366/511] [Bazel] Fixes 916a974 (#205583) This fixes 916a974941fbac24889e9c2da1972bd221d0055e. Co-authored-by: Google Bazel Bot --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index f6c845fefc915..fd0bf0bcce72c 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -5093,6 +5093,7 @@ cc_library( ":MaskingOpInterface", ":MemOpInterfaces", ":MemRefDialect", + ":MemRefUtils", ":SideEffectInterfaces", ":SubsetOpInterface", ":Support", @@ -11925,6 +11926,7 @@ cc_library( ":LLVMDialect", ":MaskableOpInterface", ":MemRefDialect", + ":MemRefUtils", ":Pass", ":TensorDialect", ":ToLLVMIRTranslation", @@ -13495,7 +13497,6 @@ cc_library( ":AffineDialect", ":ArithUtils", ":MemRefDialect", - ":VectorDialect", ":ViewLikeInterface", "//llvm:Support", ], From 74f9f4dc96c2d2857a78823418394c61014b6b1f Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 24 Jun 2026 11:43:40 -0500 Subject: [PATCH 367/511] [Offload] Fix ordering with RPC teardown and global destructors (#205594) Summary: There's a bit of a chicken and egg problem for the RPC server if we want to do something creative with the device's image for things like DWARF dumping. The problem was that destructors can make RPC calls, but the RPC server also needed the images to be valid. Simple fix is to just split the destructor calling out and do it first so we can deinitialize RPC with valid device images. --- .../common/src/PluginInterface.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index 0e0e1163d6e39..c9dbfb9d58f29 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -608,9 +608,6 @@ Error GenericDeviceTy::init(GenericPluginTy &Plugin) { } Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) { - if (auto Err = callGlobalDestructors(Plugin, *Image)) - return Err; - GenericGlobalHandlerTy &Handler = Plugin.getGlobalHandler(); auto ProfOrErr = Handler.readProfilingGlobals(*this, *Image); if (!ProfOrErr) @@ -631,6 +628,18 @@ Error GenericDeviceTy::unloadBinary(DeviceImageTy *Image) { } Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { + // Run the global destructors first in case they required the RPC server. + for (auto &I : LoadedImages) { + if (auto Err = callGlobalDestructors(Plugin, *I)) + return Err; + } + + if (RPCServer) { + if (auto Err = RPCServer->deinitDevice(*this)) + return Err; + RPCServer = nullptr; + } + for (auto &I : LoadedImages) if (auto Err = unloadBinary(I)) return Err; @@ -649,10 +658,6 @@ Error GenericDeviceTy::deinit(GenericPluginTy &Plugin) { RecordReplay = nullptr; } - if (RPCServer) - if (auto Err = RPCServer->deinitDevice(*this)) - return Err; - #ifdef OMPT_SUPPORT if (ompt::Initialized) { bool ExpectedStatus = true; From fa2b176facbefa42bf9527d140c76c4d91fa81f2 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Thu, 25 Jun 2026 00:49:59 +0800 Subject: [PATCH 368/511] [libc++] Apply `[[nodiscard]]` to `wstring_convert`, `wbuffer_convert` (#205486) `[[nodiscard]]` should be applied to functions where discarding the return value is most likely a correctness issue. - https://libcxx.llvm.org/CodingGuidelines.html - https://timsong-cpp.github.io/cppwp/n4950/depr.conversions.string - https://timsong-cpp.github.io/cppwp/n4950/depr.conversions.buffer --- libcxx/include/__locale_dir/wbuffer_convert.h | 4 +- libcxx/include/__locale_dir/wstring_convert.h | 22 +++++----- .../conversions.string/ctor_move.pass.cpp | 2 +- .../libcxx/localization/nodiscard.verify.cpp | 41 +++++++++++++++++++ 4 files changed, 56 insertions(+), 13 deletions(-) diff --git a/libcxx/include/__locale_dir/wbuffer_convert.h b/libcxx/include/__locale_dir/wbuffer_convert.h index a6818aadf5d0b..c8bad68011a09 100644 --- a/libcxx/include/__locale_dir/wbuffer_convert.h +++ b/libcxx/include/__locale_dir/wbuffer_convert.h @@ -67,7 +67,7 @@ class _LIBCPP_DEPRECATED_IN_CXX17 wbuffer_convert : public basic_streambuf<_Elem _LIBCPP_HIDE_FROM_ABI ~wbuffer_convert(); - _LIBCPP_HIDE_FROM_ABI streambuf* rdbuf() const { return __bufptr_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI streambuf* rdbuf() const { return __bufptr_; } _LIBCPP_HIDE_FROM_ABI streambuf* rdbuf(streambuf* __bytebuf) { streambuf* __r = __bufptr_; __bufptr_ = __bytebuf; @@ -77,7 +77,7 @@ class _LIBCPP_DEPRECATED_IN_CXX17 wbuffer_convert : public basic_streambuf<_Elem wbuffer_convert(const wbuffer_convert&) = delete; wbuffer_convert& operator=(const wbuffer_convert&) = delete; - _LIBCPP_HIDE_FROM_ABI state_type state() const { return __st_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI state_type state() const { return __st_; } protected: _LIBCPP_HIDE_FROM_ABI_VIRTUAL virtual int_type underflow(); diff --git a/libcxx/include/__locale_dir/wstring_convert.h b/libcxx/include/__locale_dir/wstring_convert.h index 8f6dc9af8da91..05b81e0a880ed 100644 --- a/libcxx/include/__locale_dir/wstring_convert.h +++ b/libcxx/include/__locale_dir/wstring_convert.h @@ -64,28 +64,30 @@ class _LIBCPP_DEPRECATED_IN_CXX17 wstring_convert { wstring_convert(const wstring_convert& __wc) = delete; wstring_convert& operator=(const wstring_convert& __wc) = delete; - _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(char __byte) { return from_bytes(&__byte, &__byte + 1); } - _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __ptr) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(char __byte) { + return from_bytes(&__byte, &__byte + 1); + } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __ptr) { return from_bytes(__ptr, __ptr + char_traits::length(__ptr)); } - _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const byte_string& __str) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const byte_string& __str) { return from_bytes(__str.data(), __str.data() + __str.size()); } - _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __first, const char* __last); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI wide_string from_bytes(const char* __first, const char* __last); - _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(_Elem __wchar) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(_Elem __wchar) { return to_bytes(std::addressof(__wchar), std::addressof(__wchar) + 1); } - _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __wptr) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __wptr) { return to_bytes(__wptr, __wptr + char_traits<_Elem>::length(__wptr)); } - _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const wide_string& __wstr) { + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const wide_string& __wstr) { return to_bytes(__wstr.data(), __wstr.data() + __wstr.size()); } - _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __first, const _Elem* __last); + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI byte_string to_bytes(const _Elem* __first, const _Elem* __last); - _LIBCPP_HIDE_FROM_ABI size_t converted() const _NOEXCEPT { return __cvtcount_; } - _LIBCPP_HIDE_FROM_ABI state_type state() const { return __cvtstate_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI size_t converted() const _NOEXCEPT { return __cvtcount_; } + [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI state_type state() const { return __cvtstate_; } }; _LIBCPP_SUPPRESS_DEPRECATED_PUSH diff --git a/libcxx/test/extensions/libcxx/localization/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp b/libcxx/test/extensions/libcxx/localization/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp index a536e6e9b04c6..6cf0c68232cd0 100644 --- a/libcxx/test/extensions/libcxx/localization/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp +++ b/libcxx/test/extensions/libcxx/localization/locale.convenience/conversions/conversions.string/ctor_move.pass.cpp @@ -31,7 +31,7 @@ int main(int, char**) // create a converter and perform some conversions to generate some // interesting state. Myconv myconv; - myconv.from_bytes("\xEF\xBF\xBD"); + (void)myconv.from_bytes("\xEF\xBF\xBD"); const auto old_converted = myconv.converted(); assert(myconv.converted() == 3); // move construct a new converter and make sure the state is the same. diff --git a/libcxx/test/libcxx/localization/nodiscard.verify.cpp b/libcxx/test/libcxx/localization/nodiscard.verify.cpp index 80691e276deab..dd14091b90182 100644 --- a/libcxx/test/libcxx/localization/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/localization/nodiscard.verify.cpp @@ -7,9 +7,12 @@ //===----------------------------------------------------------------------===// // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_CODECVT +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_WSTRING_CONVERT // Check that functions are marked [[nodiscard]] +#include #include #include #include @@ -242,4 +245,42 @@ void test() { f.get(0, 0, 0, std::wstring()); } #endif + + // C++23 [depr.conversions.string] + { + typedef std::wstring_convert, char16_t> converter_type; + + converter_type myconv; + converter_type::byte_string bs; + converter_type::wide_string ws; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.from_bytes('*'); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.from_bytes(""); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.from_bytes(bs); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.from_bytes(bs.data(), bs.data()); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.to_bytes(char16_t()); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.to_bytes(ws.data()); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.to_bytes(ws); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.to_bytes(ws.data(), ws.data()); + + myconv.converted(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.state(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } + + // C++23 [depr.conversions.buffer] + { + std::wbuffer_convert, char16_t> myconv; + + myconv.rdbuf(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + myconv.state(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } } From 7eeb9e0b5bdf81e9a1ad3e763d6a45974b7f435d Mon Sep 17 00:00:00 2001 From: Paul Osmialowski Date: Wed, 24 Jun 2026 18:00:27 +0100 Subject: [PATCH 369/511] [BOLT] Increase BufSize in runtime/common.h (#204607) During my work towards bolring the flang binary, I've encountered a frequently occuring problem with running out of the buffer space. The problem affects C++ programs with a decent number of very long symbol names, which is inevitable when using template metaprogramming. As one can clearly see, flang is one of such programs. The proposed BufSize value is an effect of the trial-and-error process aiming at finiding the smallest reasonable increase. --- bolt/runtime/common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/runtime/common.h b/bolt/runtime/common.h index 8689bc8b72041..96e269e1bb79b 100644 --- a/bolt/runtime/common.h +++ b/bolt/runtime/common.h @@ -162,7 +162,7 @@ struct timespec { #error "For AArch64/ARM64,X86_64 AND RISCV64 only." #endif -constexpr uint32_t BufSize = 10240; +constexpr uint32_t BufSize = 32768U; // Helper functions for writing strings to the .fdata file. We intentionally // avoid using libc names to make it clear it is our impl. From 19a2688cb8f3fc666d89e05019fed8c1855fdfca Mon Sep 17 00:00:00 2001 From: Paul Osmialowski Date: Wed, 24 Jun 2026 18:01:07 +0100 Subject: [PATCH 370/511] [flang][cmake][perf-training] Optimize flang with PGO and BOLT (#198863) This is an attempt to replicate similar fearture already available to clang. The changes in this patch were made with an intent to reuse as much of existing infrastructure as possible. Namely, two-stage build arrangement, perf-helper.py script and the means for building of the instrumented binaries have all been incorporated into this approach. It was deliberately chosen to optimize clang along with flang as they are mostly working together in the final toolchain. See the `llvm/docs/AdvancedBuilds.rst` documentation for more details. Note that the attempt to optimize flang has exceeded one of the BOLT limitations. The size of one of the statically allocated buffers needed to be extended in this patch. --- clang/CMakeLists.txt | 12 +- flang/cmake/caches/BOLT-PGO.cmake | 26 +++ flang/cmake/caches/BOLT.cmake | 19 ++ .../caches/PGO-stage2-instrumented.cmake | 28 +++ flang/cmake/caches/PGO-stage2.cmake | 4 + flang/cmake/caches/PGO.cmake | 37 +++ flang/test/CMakeLists.txt | 210 ++++++++++++++++++ flang/test/Driver/offload-device.f90 | 8 +- flang/test/bolt.lit.cfg | 31 +++ flang/test/lit.cfg.py | 24 +- flang/test/lit.site.cfg.py.in | 10 +- flang/test/pgo.lit.cfg | 11 + flang/test/test.lit.cfg | 12 + llvm/docs/AdvancedBuilds.rst | 88 +++++++- 14 files changed, 496 insertions(+), 24 deletions(-) create mode 100644 flang/cmake/caches/BOLT-PGO.cmake create mode 100644 flang/cmake/caches/BOLT.cmake create mode 100644 flang/cmake/caches/PGO-stage2-instrumented.cmake create mode 100644 flang/cmake/caches/PGO-stage2.cmake create mode 100644 flang/cmake/caches/PGO.cmake create mode 100644 flang/test/bolt.lit.cfg create mode 100644 flang/test/pgo.lit.cfg create mode 100644 flang/test/test.lit.cfg diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index cd7ba53b03061..e920e83a537d4 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -813,11 +813,15 @@ if (CLANG_ENABLE_BOOTSTRAP) if(LLVM_BUILD_INSTRUMENTED) string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED) if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") - add_dependencies(clang-bootstrap-deps generate-sprofdata) - set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata) + set(PGO_OPT_SPROFDATA "${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata" CACHE STRING "") + set(PGO_OPT_SPROFDATA_PROVIDER generate-sprofdata CACHE STRING "") + add_dependencies(clang-bootstrap-deps ${PGO_OPT_SPROFDATA_PROVIDER}) + set(PGO_OPT -DLLVM_SPROFDATA_FILE=${PGO_OPT_SPROFDATA}) else() - add_dependencies(clang-bootstrap-deps generate-profdata) - set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + set(PGO_OPT_PROFDATA "${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata" CACHE STRING "") + set(PGO_OPT_PROFDATA_PROVIDER generate-profdata CACHE STRING "") + add_dependencies(clang-bootstrap-deps ${PGO_OPT_PROFDATA_PROVIDER}) + set(PGO_OPT -DLLVM_PROFDATA_FILE=${PGO_OPT_PROFDATA}) endif() # Use the current tools for LTO instead of the instrumented ones list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH diff --git a/flang/cmake/caches/BOLT-PGO.cmake b/flang/cmake/caches/BOLT-PGO.cmake new file mode 100644 index 0000000000000..dc13daa14523f --- /dev/null +++ b/flang/cmake/caches/BOLT-PGO.cmake @@ -0,0 +1,26 @@ +# Two-stage build of Flang with the 2nd stage optimized using BOLT and PGO + +set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "") +set(LLVM_ENABLE_PROJECTS "bolt;clang;flang;lld" CACHE STRING "") + +set(CLANG_BOOTSTRAP_TARGETS + stage2-clang-bolt + stage2-flang-bolt + stage2-check-clang + stage2-check-flang + stage2-distribution + stage2-install-distribution + CACHE STRING "") +set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS + clang-bolt + flang-bolt + check-clang + check-flang + distribution + install-distribution + CACHE STRING "") + +set(PGO_BUILD_CONFIGURATION + ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake + CACHE STRING "") +include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake) diff --git a/flang/cmake/caches/BOLT.cmake b/flang/cmake/caches/BOLT.cmake new file mode 100644 index 0000000000000..155eec704344b --- /dev/null +++ b/flang/cmake/caches/BOLT.cmake @@ -0,0 +1,19 @@ +# Two-stage build of Flang with the 2nd stage optimized using BOLT + +set(CMAKE_BUILD_TYPE "Release" CACHE STRING "") +set(CLANG_BOLT "INSTRUMENT" CACHE STRING "") +set(FLANG_BOLT ${CLANG_BOLT} CACHE STRING "") +set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") + +set(LLVM_ENABLE_PROJECTS "bolt;clang;flang" CACHE STRING "") +set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") + +# setup toolchain +set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "") +set(LLVM_DISTRIBUTION_COMPONENTS + clang + clang-resource-headers + flang + runtimes + CACHE STRING "") diff --git a/flang/cmake/caches/PGO-stage2-instrumented.cmake b/flang/cmake/caches/PGO-stage2-instrumented.cmake new file mode 100644 index 0000000000000..5cc719f56ec19 --- /dev/null +++ b/flang/cmake/caches/PGO-stage2-instrumented.cmake @@ -0,0 +1,28 @@ +# Second stage instrumentation (used by PGO.cmake) + +set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") +set(CLANG_BOOTSTRAP_TARGETS + distribution + install-distribution + install-distribution-toolchain + check-all + check-llvm + check-clang + check-flang + test-suite CACHE STRING "") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "") +set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "") +set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "") + +if(PGO_BUILD_CONFIGURATION) + include(${PGO_BUILD_CONFIGURATION}) + set(CLANG_BOOTSTRAP_CMAKE_ARGS + -C ${PGO_BUILD_CONFIGURATION} + CACHE STRING "") +else() + include(${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake) + + set(CLANG_BOOTSTRAP_CMAKE_ARGS + -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake + CACHE STRING "") +endif() diff --git a/flang/cmake/caches/PGO-stage2.cmake b/flang/cmake/caches/PGO-stage2.cmake new file mode 100644 index 0000000000000..b38a3adaa4fbc --- /dev/null +++ b/flang/cmake/caches/PGO-stage2.cmake @@ -0,0 +1,4 @@ +# Second stage of PGO (used by PGO-stage2-instrumented.cmake) + +set(CMAKE_BUILD_TYPE "Release" CACHE STRING "") +set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") diff --git a/flang/cmake/caches/PGO.cmake b/flang/cmake/caches/PGO.cmake new file mode 100644 index 0000000000000..73219b0085ba0 --- /dev/null +++ b/flang/cmake/caches/PGO.cmake @@ -0,0 +1,37 @@ +# Two-stage build of Flang with the 2nd stage optimized using PGO + +set(CMAKE_BUILD_TYPE "Release" CACHE STRING "") +set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") + +set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") + +set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") +set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE BOOL "") +set(CLANG_BOOTSTRAP_TARGETS + generate-flang-profdata + stage2 + stage2-distribution + stage2-install-distribution + stage2-install-distribution-toolchain + stage2-check-all + stage2-check-llvm + stage2-check-clang + stage2-check-flang + stage2-test-suite CACHE STRING "") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "") +set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "") +set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "") + +if(PGO_INSTRUMENT_LTO) + set(BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "") + set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "") +endif() + +if(PGO_BUILD_CONFIGURATION) + set(EXTRA_ARGS -DPGO_BUILD_CONFIGURATION=${PGO_BUILD_CONFIGURATION}) +endif() + +set(CLANG_BOOTSTRAP_CMAKE_ARGS + ${EXTRA_ARGS} + -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2-instrumented.cmake + CACHE STRING "") diff --git a/flang/test/CMakeLists.txt b/flang/test/CMakeLists.txt index 0e26711e76467..538432c05e14b 100644 --- a/flang/test/CMakeLists.txt +++ b/flang/test/CMakeLists.txt @@ -1,3 +1,20 @@ +set(FLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating flang pgo data") +set(FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake flags to pass to the cmake project with source files to use for generating flang pgo data") +set(FLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "Train clang and flang together") +if(FLANG_PGO_TRAINING_CLANG_COUPLING) + set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating clang pgo data") + set(CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake flags to pass to the cmake project with source files to use for generating clang pgo data") +endif() + +set(PERF_HELPER "${LLVM_MAIN_SRC_DIR}/../clang/utils/perf-training/perf-helper.py") + +add_custom_target(clear-flang-perf-data + COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean "${CMAKE_CURRENT_BINARY_DIR}" perf.data + COMMENT "Clearing old flang perf data") + +option(FLANG_PGO_TRAINING_USE_LLVM_BUILD "Use LLVM build for generating PGO data" ON) + # Test runner infrastructure for Flang. This configures the Flang test trees # for use by Lit, and delegates to LLVM's lit test handlers. add_subdirectory(lib) @@ -22,6 +39,7 @@ option(FLANG_TEST_ENABLE_OPENMP "Force-enable tests that require modules from Op llvm_canonicalize_cmake_booleans( FLANG_STANDALONE_BUILD + FLANG_PGO_TRAINING_USE_LLVM_BUILD LLVM_BYE_LINK_INTO_TOOLS LLVM_ENABLE_PLUGINS LLVM_INCLUDE_EXAMPLES @@ -51,6 +69,7 @@ else () set(PATHS_FOR_PLUGINS "SHLIBDIR") endif () +set(LIT_LOAD_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/test.lit.cfg") configure_lit_site_cfg( ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg.py @@ -126,6 +145,23 @@ if ("flang-rt" IN_LIST LLVM_ENABLE_RUNTIMES AND NOT FLANG_STANDALONE_BUILD) list(APPEND FLANG_TEST_DEPENDS flang-rt-test-deps) endif () +set(FLANG_BOLT_ALLOWLIST INSTRUMENT PERF LBR) +set(FLANG_BOLT OFF CACHE STRING "Apply BOLT optimization to flang. \ +May be specified as one of ${FLANG_BOLT_ALLOWLIST} to use a particular profiling \ + mechanism.") +set_property(CACHE FLANG_BOLT PROPERTY STRINGS OFF ${FLANG_BOLT_ALLOWLIST}) +string(TOUPPER "${FLANG_BOLT}" FLANG_BOLT) +if (FLANG_BOLT AND NOT FLANG_BOLT IN_LIST FLANG_BOLT_ALLOWLIST) + message(FATAL_ERROR "Specified FLANG_BOLT value '${FLANG_BOLT}' is not one of ${FLANG_BOLT_ALLOWLIST}.") +endif() + +if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) + list(APPEND FLANG_TEST_DEPENDS + clear-flang-bolt-fdata + llvm-bolt + clang-bolt) +endif() + add_custom_target(flang-test-depends DEPENDS ${FLANG_TEST_DEPENDS}) set_target_properties(flang-test-depends PROPERTIES FOLDER "Flang/Meta") @@ -154,3 +190,177 @@ if (DEFINED FLANG_TEST_TARGET_TRIPLE) "to use FLANG_TEST_TARGET_TRIPLE.") endif() endif() + +include(LLVMExternalProjectUtils) + +if(LLVM_BUILD_INSTRUMENTED) + set(CLANG_CURRENT_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/../../clang/utils/perf-training") + set(LIT_LOAD_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/pgo.lit.cfg") + configure_lit_site_cfg( + "${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in" + "${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg" + MAIN_CONFIG + "${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py" + ) + + add_lit_testsuite(generate-flang-profraw "Generating flang PGO data" + "${CMAKE_CURRENT_BINARY_DIR}/pgo-data/" + EXCLUDE_FROM_CHECK_ALL + DEPENDS ${FLANG_TEST_DEPENDS} + ) + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + add_lit_testsuite(generate-clang-profraw "Generating clang PGO data" + "${CLANG_CURRENT_BINARY_DIR}/pgo-data/" + EXCLUDE_FROM_CHECK_ALL + DEPENDS clang + ) + endif() + + add_custom_target(clear-flang-profraw + COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean "${CMAKE_CURRENT_BINARY_DIR}/.." "${CMAKE_BINARY_DIR}/profiles/" profraw + COMMENT "Clearing old flang profraw data") + if (FLANG_PGO_TRAINING_CLANG_COUPLING) + add_dependencies(clear-flang-profraw clear-profraw) + endif() + + if(NOT LLVM_PROFDATA) + find_program(LLVM_PROFDATA llvm-profdata) + endif() + + if(NOT LLVM_PROFDATA) + message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") + else() + set(PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata") + set(PROFRAW_TARGETS "") + set(PROFRAW_DIRS "") + set(PROFRAW_DEPS "") + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + list(APPEND PROFRAW_TARGETS generate-clang-profraw) + list(APPEND PROFRAW_DIRS "${CLANG_CURRENT_BINARY_DIR}") + list(APPEND PROFRAW_DEPS clang) + if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR) + llvm_ExternalProject_Add(generate-clang-profraw-external + "${CLANG_PGO_TRAINING_DATA_SOURCE_DIR}" + USE_TOOLCHAIN + EXCLUDE_FROM_ALL + NO_INSTALL + CMAKE_ARGS "${CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}") + list(APPEND PROFRAW_TARGETS generate-clang-profraw-external) + endif() + endif() + list(APPEND PROFRAW_TARGETS generate-flang-profraw) + list(APPEND PROFRAW_DIRS + "${CMAKE_CURRENT_BINARY_DIR}/.." + "${CMAKE_BINARY_DIR}/profiles/") + list(APPEND PROFRAW_DEPS + flang + flang-rt) + if (FLANG_PGO_TRAINING_DATA_SOURCE_DIR) + llvm_ExternalProject_Add(generate-flang-profraw-external + "${FLANG_PGO_TRAINING_DATA_SOURCE_DIR}" + USE_TOOLCHAIN + ENABLE_FORTRAN + EXCLUDE_FROM_ALL + NO_INSTALL + CMAKE_ARGS "${FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}") + list(APPEND PROFRAW_TARGETS generate-flang-profraw-external) + endif() + add_custom_command( + OUTPUT "${PROFDATA}" + # PROFRAW_TARGETS are custom targets which are always considered stale. + # If we add them here to 'DEPENDS', then it will always execute and running + # ninja install && ninja check-all will result in the profile data being + # generated twice, and cause the ninja check-all build to fail with errors like: + # `ld.lld: error: Function Import: link error: linking module flags 'ProfileSummary': IDs have conflicting values in` + # Therefore we call these targets manually as part of this custom command, + # which will only run if flang or ${FLANG_PGO_TRAINING_DEPS} are updated. + COMMAND "${CMAKE_COMMAND}" --build "${CMAKE_BINARY_DIR}" --target ${PROFRAW_TARGETS} + COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" merge "${LLVM_PROFDATA}" "${PROFDATA}" ${PROFRAW_DIRS} + COMMENT "Merging flang profdata" + DEPENDS ${PROFRAW_DEPS} ${FLANG_PGO_TRAINING_DEPS} clear-flang-profraw + ) + add_custom_target(generate-flang-profdata DEPENDS "${PROFDATA}") + + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_UPPER_INSTRUMENTED) + if (LLVM_BUILD_UPPER_INSTRUMENTED STREQUAL "CSSPGO") + message(STATUS "CSSPGO of flang is not supported") + else() + add_dependencies(clang-bootstrap-deps generate-flang-profdata) + endif() + endif() + endif() +endif() + +if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) + # Add a flang-bolt target for backwards compatibility. + add_custom_target(flang-bolt DEPENDS flang-test-depends) + + set(FLANG_BOLT_INSTRUMENTED "flang-bolt.inst" CACHE STRING + "Name of BOLT-instrumented flang binary") + set(FLANG_INSTRUMENTED "${LLVM_RUNTIME_OUTPUT_INTDIR}/${FLANG_BOLT_INSTRUMENTED}") + set(PERF_TRAINING_BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}") + set(FLANG_BOLT_FDATA "${PERF_TRAINING_BINARY_DIR}/flang-prof.fdata") + get_llvm_lit_path( + lit_base_dir + lit_file_name + ALLOW_EXTERNAL + ) + set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}") + + set(FLANG_BOLT_INPUTS "$") + set(FLANG_INSTRUMENTED_OUTPUTS "${FLANG_INSTRUMENTED}") + + # Add in dynamically linked libraries, if needs be. Currently only supported + # on Linux because it relies on LD_PRELOAD for instrumentation. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if (LLVM_LINK_LLVM_DYLIB) + set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING + "Name of BOLT-instrumented LLVM library") + set(LLVM_INSTRUMENTED "${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED}") + list(APPEND FLANG_BOLT_INPUTS "$") + list(APPEND FLANG_INSTRUMENTED_OUTPUTS "${LLVM_INSTRUMENTED}") + endif() + endif() + + # This POST_BUILD command is executed unconditionally even if the flang target + # is already built. We need to wrap the whole bolt optimization process in + # a single python wrapper, so that we can first check if the binary has + # already been optimized and then exit early with a 0 status if it has. + add_custom_command( + TARGET flang-test-depends POST_BUILD + COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" + bolt-optimize + --method "${FLANG_BOLT}" + --input "${FLANG_BOLT_INPUTS}" + --instrumented-output "${FLANG_INSTRUMENTED_OUTPUTS}" + --fdata "${FLANG_BOLT_FDATA}" + --perf-training-binary-dir "${PERF_TRAINING_BINARY_DIR}" + --readelf "$" + --bolt "$" + --lit "${LIT_COMMAND}" + --merge-fdata "$" + COMMENT "Optimizing flang with BOLT" + USES_TERMINAL + VERBATIM + ) + + set(LIT_LOAD_CONFIG "${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.cfg") + configure_lit_site_cfg( + "${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in" + "${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg" + MAIN_CONFIG + "${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py" + ) + + add_lit_testsuite(generate-flang-bolt-fdata "Generating BOLT profile for flang" + "${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/" + EXCLUDE_FROM_CHECK_ALL + DEPENDS flang-test-depends clear-flang-bolt-fdata clear-flang-perf-data + ) + + add_custom_target(clear-flang-bolt-fdata + COMMAND "${Python3_EXECUTABLE}" "${PERF_HELPER}" clean "${CMAKE_CURRENT_BINARY_DIR}" fdata + COMMENT "Clearing old flang BOLT fdata") + +endif() diff --git a/flang/test/Driver/offload-device.f90 b/flang/test/Driver/offload-device.f90 index 0137b61bcee0a..13834171a9233 100644 --- a/flang/test/Driver/offload-device.f90 +++ b/flang/test/Driver/offload-device.f90 @@ -5,18 +5,18 @@ ! RUN: %flang -target aarch64-linux-gnu --no-offloadlib --offload-arch=sm_80 -xcuda %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK,CUDA ! Compiled as CUDA, device-compilation is done first -! CUDA: {{(^|[\\/])flang(-[0-9]+)?(\.exe)?}}" "-fc1" "-triple" "nvptx64-nvidia-cuda" +! CUDA: {{(^|[\\/])flang(-[0-9]+)?(-bolt\.inst)?(\.exe)?}}" "-fc1" "-triple" "nvptx64-nvidia-cuda" ! CUDA-SAME: "-foffload-device" ! Host invocation -! CHECK: {{(^|[\\/])flang(-[0-9]+)?(\.exe)?}}" "-fc1" "-triple" "aarch64-unknown-linux-gnu" +! CHECK: {{(^|[\\/])flang(-[0-9]+)?(-bolt\.inst)?(\.exe)?}}" "-fc1" "-triple" "aarch64-unknown-linux-gnu" ! CHECK-NOT: -foffload-device ! Compiled as OpenMP, device-code is compiled after host-code compilation, ! once for each --offload-arch argument -! OPENMP: {{(^|[\\/])flang(-[0-9]+)?(\.exe)?}}" "-fc1" "-triple" "amdgcn-amd-amdhsa" +! OPENMP: {{(^|[\\/])flang(-[0-9]+)?(-bolt\.inst)?(\.exe)?}}" "-fc1" "-triple" "amdgcn-amd-amdhsa" ! OPENMP-SAME: "-foffload-device" -! OPENMP: {{(^|[\\/])flang(-[0-9]+)?(\.exe)?}}" "-fc1" "-triple" "nvptx64-nvidia-cuda" +! OPENMP: {{(^|[\\/])flang(-[0-9]+)?(-bolt\.inst)?(\.exe)?}}" "-fc1" "-triple" "nvptx64-nvidia-cuda" ! OPENMP-SAME: "-foffload-device" diff --git a/flang/test/bolt.lit.cfg b/flang/test/bolt.lit.cfg new file mode 100644 index 0000000000000..d245d8c616881 --- /dev/null +++ b/flang/test/bolt.lit.cfg @@ -0,0 +1,31 @@ +# -*- Python -*- + +# Configuration used for performance training when optimizing flang with BOLT + +import os + +import lit.util + +flang_bolt_mode = config.flang_bolt_mode.lower() +flang_binary = "flang" +perf_wrapper = f"{sys.executable} {config.perf_helper_dir}/perf-helper.py perf " + +if flang_bolt_mode == "instrument": + perf_wrapper = "" + flang_binary = config.flang_bolt_name +elif flang_bolt_mode == "lbr": + perf_wrapper += " --lbr -- " +elif flang_bolt_mode == "perf": + perf_wrapper += " -- " +else: + assert 0, "Unsupported flang bolt mode" + +flang_nowrapper = os.path.realpath( + lit.util.which(flang_binary, config.flang_opt_tools_dir) +).replace("\\", "/") + +config.name = "Flang BOLT Training" + +config.flang_exe = perf_wrapper + flang_nowrapper + +config.llvm_profile_file = '' diff --git a/flang/test/lit.cfg.py b/flang/test/lit.cfg.py index 3c6a33e010f59..b1abadb32fedb 100644 --- a/flang/test/lit.cfg.py +++ b/flang/test/lit.cfg.py @@ -15,9 +15,6 @@ # Configuration file for the 'lit' test runner. -# name: The name of this test suite. -config.name = "Flang" - # TODO: Consolidate the logic for turning on the internal shell by default for all LLVM test suites. # See https://github.com/llvm/llvm-project/issues/106636 for more details. # @@ -121,6 +118,12 @@ llvm_config.with_environment("PATH", config.flang_tools_dir, append_path=True) llvm_config.with_environment("PATH", config.llvm_tools_dir, append_path=True) +if config.llvm_profile_file: + config.environment["LLVM_PROFILE_FILE"] = config.llvm_profile_file + llvm_config.with_environment( + "LLVM_PROFILE_FILE", config.llvm_profile_file, append_path=False + ) + if config.flang_standalone_build: # For builds with FIR, set path for tco and enable related tests if config.flang_llvm_tools_dir != "": @@ -143,8 +146,7 @@ host_triple = config.host_triple.split("-") config.available_features.add(f"{host_triple[0]}-host") -flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) -if not flang_exe: +if not config.flang_exe: lit_config.fatal(f"Could not identify flang executable") # Intrinsic paths that are added implicitly by the `flang` driver, but have to be added manually when invoking the frontend `flang -fc1`. @@ -158,7 +160,11 @@ def get_resource_module_intrinsic_dir(modfile): # Determine the intrinsic module search path that is added by the driver. If # skipping the driver using -fc1, we need to append the path manually. flang_intrinsics_dir = subprocess.check_output( - [flang_exe, *config.flang_test_fortran_flags, f"-print-file-name={modfile}"], + [ + config.flang_exe, + *config.flang_test_fortran_flags, + f"-print-file-name={modfile}", + ], text=True, ).strip() flang_intrinsics_dir = os.path.dirname(flang_intrinsics_dir) @@ -193,7 +199,7 @@ def get_resource_module_intrinsic_dir(modfile): ) -lit_config.note(f"using flang: {flang_exe}") +lit_config.note(f"using flang: {config.flang_exe}") lit_config.note( f"using flang implicit search paths: {' '.join(flang_driver_search_args)}" ) @@ -210,13 +216,13 @@ def get_resource_module_intrinsic_dir(modfile): ), ToolSubst( "%flang", - command=flang_exe, + command=config.flang_exe, extra_args=flang_extra_search_args, unresolved="fatal", ), ToolSubst( "%flang_fc1", - command=flang_exe, + command=config.flang_exe, extra_args=["-fc1"] + flang_driver_search_args + flang_extra_search_args, unresolved="fatal", ), diff --git a/flang/test/lit.site.cfg.py.in b/flang/test/lit.site.cfg.py.in index 74c3e07e0a402..12010e74fea43 100644 --- a/flang/test/lit.site.cfg.py.in +++ b/flang/test/lit.site.cfg.py.in @@ -11,7 +11,7 @@ config.target_triple = "@LLVM_TARGET_TRIPLE@" config.llvm_target_triple_env = "@LLVM_TARGET_TRIPLE_ENV@" config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@" config.errc_messages = "@LLVM_LIT_ERRC_MESSAGES@" -config.flang_obj_root = "@FLANG_BINARY_DIR@" +config.flang_obj_root = path(r"@FLANG_BINARY_DIR@") config.flang_tools_dir = lit_config.substitute("@FLANG_TOOLS_DIR@") config.flang_headers_dir = "@HEADER_BINARY_DIR@" config.flang_llvm_tools_dir = "@CMAKE_BINARY_DIR@/bin" @@ -30,8 +30,16 @@ config.default_sysroot = "@DEFAULT_SYSROOT@" config.flang_runtime_f128_math_lib = "@FLANG_RUNTIME_F128_MATH_LIB@" config.have_ldbl_mant_dig_113 = "@HAVE_LDBL_MANT_DIG_113@" +config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training" +config.flang_bolt_mode = "@FLANG_BOLT@" +config.flang_bolt_name = "@FLANG_BOLT_INSTRUMENTED@" +config.flang_opt_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") + import lit.llvm lit.llvm.initialize(lit_config, config) +# Let the detailed config do the details. +lit_config.load_config(config, "@LIT_LOAD_CONFIG@") + # Let the main config do the real work. lit_config.load_config(config, "@FLANG_SOURCE_DIR@/test/lit.cfg.py") diff --git a/flang/test/pgo.lit.cfg b/flang/test/pgo.lit.cfg new file mode 100644 index 0000000000000..2c08f0f2a914c --- /dev/null +++ b/flang/test/pgo.lit.cfg @@ -0,0 +1,11 @@ +# -*- Python -*- + +# Configuration used for performance training when optimizing flang with PGO + +import lit.util + +config.name = 'Flang PGO Training' + +config.flang_exe = lit.util.which('flang', config.flang_opt_tools_dir).replace('\\', '/') + +config.llvm_profile_file = '../perf-training-%4m.profraw' diff --git a/flang/test/test.lit.cfg b/flang/test/test.lit.cfg new file mode 100644 index 0000000000000..69ee4ac0e0cb4 --- /dev/null +++ b/flang/test/test.lit.cfg @@ -0,0 +1,12 @@ +# -*- Python -*- + +# Configuration used for running the test suite the usual way + +import lit.util + +# name: The name of this test suite. +config.name = "Flang" + +config.flang_exe = lit.util.which("flang", config.flang_llvm_tools_dir) + +config.llvm_profile_file = '' diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst index 9e25355365a81..771b1a2b50efc 100644 --- a/llvm/docs/AdvancedBuilds.rst +++ b/llvm/docs/AdvancedBuilds.rst @@ -21,10 +21,17 @@ generator. Many of the build configurations mentioned on this documentation page can be utilized by using a CMake cache. A CMake cache is essentially a configuration -file that sets the necessary flags for a specific build configuration. The caches -for Clang are located in :code:`/clang/cmake/caches` within the monorepo. They -can be passed to CMake using the :code:`-C` flag as demonstrated in the examples -below along with additional configuration flags. +file that sets the necessary flags for a specific build configuration. + +The caches for Clang are located in :code:`/clang/cmake/caches` within the +monorepo. They can be passed to CMake using the :code:`-C` flag as demonstrated +in the examples below along with additional configuration flags. + +The caches for Flang are located in :code:`/flang/cmake/caches` within the +monorepo. They can be passed to CMake using the :code:`-C` flag as demonstrated +in the examples below along with additional configuration flags. Due to the +Flang's heavy reliance on Clang, these caches ensure equal handling of Flang +and Clang, resulting in both being built within the same arrangement. Bootstrap Builds ================ @@ -132,6 +139,15 @@ configuration with CMake with the following command: $ cmake -G Ninja -C /clang/cmake/caches/PGO.cmake \ /llvm +Similarly, to build optimized Flang (along with Clang), you can use the +following command: + +.. code-block:: console + + $ cmake -G Ninja -C /flang/cmake/caches/PGO.cmake \ + -DLLVM_ENABLE_RUNTIMES="compiler-rt;flang-rt;libunwind;openmp" \ + /llvm + There are several additional options that the cache file also accepts to modify the build, particularly the ``PGO_INSTRUMENT_LTO`` option. Setting this option to Thin or Full will enable ThinLTO or full LTO respectively, further enhancing @@ -145,8 +161,8 @@ that also enables ThinLTO, use the following command: -DPGO_INSTRUMENT_LTO=Thin \ /llvm -By default, clang will generate profile data by compiling a simple -hello world program. You can also tell clang to use an external +By default, the compiler will generate profile data by compiling a simple +hello world program. You can also configure the use of an external project for generating profile data that may be a better fit for your use case. The project you specify must either be a lit test suite (use the ``CLANG_PGO_TRAINING_DATA`` option) or a CMake project (use the @@ -162,6 +178,19 @@ profile data you would use the following command: -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR= \ -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=runtimes +Similarly, to build optimized Flang (along with Clang), you can use the +following command: + +.. code-block:: console + + $ cmake -G Ninja -C /clang/cmake/caches/PGO.cmake \ + -DLLVM_ENABLE_RUNTIMES="compiler-rt;flang-rt;libunwind;openmp" \ + -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR= \ + -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=runtimes \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_DIR= \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS="-DTEST_SUITE_SUBDIRS=Fortran" \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DEPS=runtimes + The ``BOOTSTRAP\_`` prefix tells CMake to pass the variables on to the instrumented stage two build. And the ``CLANG_PGO_TRAINING_DEPS`` option lets you specify additional build targets to build before building the external project. The @@ -191,6 +220,12 @@ should be at a path something like: /tools/clang/stage2-instrumented-bins/utils/perf-training/clang.profdata +For Flang the profile data should be at a path something like: + +.. code-block:: console + + /tools/clang/stage2-instrumented-bins/flang.profdata + You can feed that file into the ``LLVM_PROFDATA_FILE`` option when you build your optimized compiler. @@ -202,6 +237,12 @@ variable for that purpose: set(CLANG_PGO_TRAINING_DEPS builtins runtimes CACHE STRING "") +Similarly, for Flang you can make use of :code:`FLANG_PGO_TRAINING_DEPS`: + +.. code-block:: cmake + + set(FLANG_PGO_TRAINING_DEPS builtins runtimes CACHE STRING "") + The PGO cache has a slightly different stage naming scheme than other multi-stage builds. It generates three stages: stage1, stage2-instrumented, and stage2. Both of the stage2 builds are built using the stage1 compiler. @@ -226,6 +267,10 @@ The PGO cache generates the following additional targets: **stage2-check-clang** Depends on stage2 and runs check-clang using the stage2 compiler. +**stage2-check-flang** + Depends on stage2 and runs check-flang using the stage2 compiler (when using + the Flang's CMake caches) + **stage2-check-all** Depends on stage2 and runs check-all using the stage2 compiler. @@ -256,6 +301,20 @@ Then, build the BOLT-optimized binary by running the following ninja command: $ ninja clang-bolt +Similarly, to get both Flang and Clang optimized, use the following CMake +configuration: + +.. code-block:: console + + $ cmake /llvm -C /flang/cmake/caches/BOLT.cmake + +Then, build the BOLT-optimized flang and clang binaries by running the following +ninja command: + +.. code-block:: console + + $ ninja flang-bolt + If you're seeing errors in the build process, try building with a recent version of Clang/LLVM by setting the ``CMAKE_C_COMPILER`` and ``CMAKE_CXX_COMPILER`` flags to the appropriate values. @@ -279,6 +338,23 @@ Then, to build the final optimized binary, build the stage2-clang-bolt target: $ ninja stage2-clang-bolt +Similarly, to get both Flang and Clang optimized, use the following CMake +configuration: + +.. code-block:: console + + $ cmake -G Ninja /llvm \ + -C /flang/cmake/caches/BOLT-PGO.cmake \ + -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DPGO_INSTRUMENT_LTO=Thin + +Then, to build the final optimized binaries, build the stage2-flang-bolt target: + +.. code-block:: console + + $ ninja stage2-flang-bolt + 3-Stage Non-Determinism ======================= From 94016cd5ebd0c5108f274eea26cd780d3420053f Mon Sep 17 00:00:00 2001 From: Leandro Lupori Date: Wed, 24 Jun 2026 14:15:15 -0300 Subject: [PATCH 371/511] [mlir][OpenMP] Don't use label prefixes on linear variable rewrite (#200900) This is a follow-up to #194623. After that PR, matching specific label prefixes became unnecessary. In fact, doing so could potentially lead to missed linear variables in the rewrite, if they appear in basic blocks with unexpected label prefixes. --- .../OpenMP/OpenMPToLLVMIRTranslation.cpp | 60 ++++++------------- .../Target/LLVMIR/openmp-wsloop-linear.mlir | 32 ++++++++++ 2 files changed, 49 insertions(+), 43 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-wsloop-linear.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index edfa407234fa0..651ec83bf5798 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -284,42 +284,36 @@ class LinearClauseProcessor { } } - // Rewrite all uses of the original variable, in the basic blocks whose names - // start with `prefix`, with the linear variable in-place. + // Rewrite all uses of the original variable, in the basic blocks in the + // [startBB, endBB] interval, with the linear variable in-place. void rewriteInPlace(llvm::IRBuilderBase &builder, llvm::BasicBlock *startBB, - llvm::BasicBlock *endBB, llvm::StringRef prefix, - size_t varIndex) { + llvm::BasicBlock *endBB, size_t varIndex) { llvm::SmallVector worklist; - llvm::SmallPtrSet visited; - llvm::SmallPtrSet matchingBBs; + llvm::SmallPtrSet collectedBBs; assert(startBB && endBB && "Invalid startBB/endBB"); - // Traverse basic blocks from startBB to endBB and save those - // whose names start with the specified prefix. + // Collect basic blocks from startBB to endBB. worklist.push_back(startBB); - visited.insert(startBB); + collectedBBs.insert(startBB); while (!worklist.empty()) { llvm::BasicBlock *bb = worklist.pop_back_val(); - if (bb->hasName() && bb->getName().starts_with(prefix)) - matchingBBs.insert(bb); - if (bb == endBB) continue; for (llvm::BasicBlock *succ : llvm::successors(bb)) { - if (visited.insert(succ).second) + if (collectedBBs.insert(succ).second) worklist.push_back(succ); } } - // Rewrite all uses in the matching BBs. + // Rewrite all uses in the collected BBs. llvm::SmallVector users(linearOrigVal[varIndex]->users()); for (auto *user : users) { if (auto *userInst = dyn_cast(user)) { - if (matchingBBs.contains(userInst->getParent())) + if (collectedBBs.contains(userInst->getParent())) user->replaceUsesOfWith(linearOrigVal[varIndex], linearLoopBodyTemps[varIndex]); } @@ -4469,7 +4463,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, linearClauseProcessor.initLinearStep(moduleTranslation, linearStep); } - llvm::BasicBlock *sourceBlock = builder.GetInsertBlock(); llvm::Expected regionBlock = convertOmpOpRegions( wsloopOp.getRegion(), "omp.wsloop.region", builder, moduleTranslation); @@ -4510,6 +4503,10 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, } } + for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++) + linearClauseProcessor.rewriteInPlace(builder, loopInfo->getBody(), + loopInfo->getLatch(), index); + llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = ompBuilder->applyWorkshareLoop( ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier, @@ -4531,10 +4528,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, loopInfo->getLastIter()); if (failed(handleError(afterBarrierIP, *loopOp))) return failure(); - for (size_t index = 0; index < wsloopOp.getLinearVars().size(); index++) - linearClauseProcessor.rewriteInPlace( - builder, sourceBlock->getSingleSuccessor(), *regionBlock, - "omp.loop_nest.region", index); builder.restoreIP(oldIP); } @@ -4920,6 +4913,10 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, } builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin()); + for (size_t index = 0; index < simdOp.getLinearVars().size(); index++) + linearClauseProcessor.rewriteInPlace(builder, loopInfo->getBody(), + loopInfo->getLatch(), index); + ompBuilder->applySimd(loopInfo, alignedVars, simdOp.getIfExpr() ? moduleTranslation.lookupValue(simdOp.getIfExpr()) @@ -4928,29 +4925,6 @@ convertOmpSimd(Operation &opInst, llvm::IRBuilderBase &builder, linearClauseProcessor.emitStoresForLinearVar(builder); - // Check if this SIMD loop contains ordered regions - bool hasOrderedRegions = false; - simdOp.getRegion().walk([&](omp::OrderedRegionOp orderedOp) { - hasOrderedRegions = true; - return WalkResult::interrupt(); - }); - - for (size_t index = 0; index < simdOp.getLinearVars().size(); index++) { - llvm::BasicBlock *startBB = sourceBlock->getSingleSuccessor(); - llvm::BasicBlock *endBB = *regionBlock; - linearClauseProcessor.rewriteInPlace(builder, startBB, endBB, - "omp.loop_nest.region", index); - - if (hasOrderedRegions) { - // Also rewrite uses in ordered regions so they read the current value - linearClauseProcessor.rewriteInPlace(builder, startBB, endBB, - "omp.ordered.region", index); - // Also rewrite uses in finalize blocks (code after ordered regions) - linearClauseProcessor.rewriteInPlace(builder, startBB, endBB, - "omp_region.finalize", index); - } - } - // We now need to reduce the per-simd-lane reduction variable into the // original variable. This works a bit differently to other reductions (e.g. // wsloop) because we don't need to call into the OpenMP runtime to handle diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-linear.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-linear.mlir new file mode 100644 index 0000000000000..03429617a58d2 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-wsloop-linear.mlir @@ -0,0 +1,32 @@ +// Ensure that omp.wsloop with the linear clause is translated correctly. +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// ----- + +// %.linear_result must appear only in the loop body and in the +// linear_lastiter_exit block, where it's used to update the original +// variable. +// CHECK-LABEL: void @wsloop_linear_post_use({{.*}}) +// CHECK: %.linear_result = alloca i32 +// CHECK-NOT: %.linear_result +// CHECK: omp_loop.body: +// CHECK: %.linear_result +// CHECK: omp_loop.inc: +// CHECK-NOT: %.linear_result +// CHECK: omp_loop.linear_lastiter_exit: +// CHECK: load i32, ptr %.linear_result +// CHECK-NOT: %.linear_result + +llvm.func @wsloop_linear_post_use(%lb : i32, %ub : i32, %step : i32, + %x : !llvm.ptr, %out : !llvm.ptr) { + omp.wsloop linear(%x : !llvm.ptr = %step : i32) { + omp.loop_nest (%iv) : i32 = (%lb) to (%ub) step (%step) { + %cur = llvm.load %x : !llvm.ptr -> i32 + llvm.store %cur, %out : i32, !llvm.ptr + omp.yield + } + } {linear_var_types = [i32]} + %after = llvm.load %x : !llvm.ptr -> i32 + llvm.store %after, %out : i32, !llvm.ptr + llvm.return +} From bb5489e94c03cac90b748f6aea879527ee8fbd96 Mon Sep 17 00:00:00 2001 From: Lucas Mellone Date: Wed, 24 Jun 2026 19:15:50 +0200 Subject: [PATCH 372/511] [libc++][ranges] Applied [[nodiscard]] to `transform_view` (#204014) [[nodiscard]] should be applied to functions where discarding the return value is most likely a correctness issue. - https://libcxx.llvm.org/CodingGuidelines.html - https://wg21.link/range.transform Towards https://github.com/llvm/llvm-project/issues/172124 --------- Co-authored-by: A. Jiang Co-authored-by: Hristo Hristov --- libcxx/include/__ranges/transform_view.h | 53 +++++---- .../adaptor.nodiscard.verify.cpp | 23 ---- .../range.transform/nodiscard.verify.cpp | 103 ++++++++++++++++++ 3 files changed, 135 insertions(+), 44 deletions(-) delete mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.transform/adaptor.nodiscard.verify.cpp create mode 100644 libcxx/test/libcxx/ranges/range.adaptors/range.transform/nodiscard.verify.cpp diff --git a/libcxx/include/__ranges/transform_view.h b/libcxx/include/__ranges/transform_view.h index 5e817a3ca34d4..1484be006e841 100644 --- a/libcxx/include/__ranges/transform_view.h +++ b/libcxx/include/__ranges/transform_view.h @@ -88,43 +88,53 @@ class _LIBCPP_ABI_LLVM18_NO_UNIQUE_ADDRESS transform_view : public view_interfac _LIBCPP_HIDE_FROM_ABI constexpr explicit transform_view(_View __base, _Fn __func) : __func_(std::in_place, std::move(__func)), __base_(std::move(__base)) {} - _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() const& requires copy_constructible<_View> { return __base_; } - _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { return __iterator{*this, ranges::begin(__base_)}; } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr _View base() && { return std::move(__base_); } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() { + return __iterator{*this, ranges::begin(__base_)}; + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator begin() const requires range && __regular_invocable_with_range_ref { return __iterator(*this, ranges::begin(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr __sentinel end() { return __sentinel(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __sentinel end() { + return __sentinel(ranges::end(__base_)); + } + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() requires common_range<_View> { return __iterator(*this, ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr __sentinel end() const + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __sentinel end() const requires range && __regular_invocable_with_range_ref { return __sentinel(ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr __iterator end() const requires common_range && __regular_invocable_with_range_ref { return __iterator(*this, ranges::end(__base_)); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() requires sized_range<_View> { return ranges::size(__base_); } - _LIBCPP_HIDE_FROM_ABI constexpr auto size() const + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto size() const requires sized_range { return ranges::size(__base_); @@ -209,11 +219,11 @@ class transform_view<_View, _Fn>::__iterator requires _Const && convertible_to, iterator_t<_Base>> : __parent_(__i.__parent_), __current_(std::move(__i.__current_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr const iterator_t<_Base>& base() const& noexcept { return __current_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr const iterator_t<_Base>& base() const& noexcept { return __current_; } - _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Base> base() && { return std::move(__current_); } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr iterator_t<_Base> base() && { return std::move(__current_); } - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator*() const noexcept(noexcept(std::invoke(*__parent_->__func_, *__current_))) { return std::invoke(*__parent_->__func_, *__current_); } @@ -262,7 +272,7 @@ class transform_view<_View, _Fn>::__iterator return *this; } - _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) operator[](difference_type __n) const noexcept(noexcept(std::invoke(*__parent_->__func_, __current_[__n]))) requires random_access_range<_Base> { @@ -305,25 +315,26 @@ class transform_view<_View, _Fn>::__iterator return __x.__current_ <=> __y.__current_; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(__iterator __i, difference_type __n) requires random_access_range<_Base> { return __iterator{*__i.__parent_, __i.__current_ + __n}; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator+(difference_type __n, __iterator __i) requires random_access_range<_Base> { return __iterator{*__i.__parent_, __i.__current_ + __n}; } - _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr __iterator operator-(__iterator __i, difference_type __n) requires random_access_range<_Base> { return __iterator{*__i.__parent_, __i.__current_ - __n}; } - _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type operator-(const __iterator& __x, const __iterator& __y) + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr difference_type + operator-(const __iterator& __x, const __iterator& __y) requires sized_sentinel_for, iterator_t<_Base>> { return __x.__current_ - __y.__current_; @@ -361,7 +372,7 @@ class transform_view<_View, _Fn>::__sentinel { requires _Const && convertible_to, sentinel_t<_Base>> : __end_(std::move(__i.__end_)) {} - _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; } + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr sentinel_t<_Base> base() const { return __end_; } template requires sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> @@ -371,14 +382,14 @@ class transform_view<_View, _Fn>::__sentinel { template requires sized_sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> - _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> operator-(const __iterator<_OtherConst>& __x, const __sentinel& __y) { return __x.__current_ - __y.__end_; } template requires sized_sentinel_for, iterator_t<__maybe_const<_OtherConst, _View>>> - _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI friend constexpr range_difference_t<__maybe_const<_OtherConst, _View>> operator-(const __sentinel& __x, const __iterator<_OtherConst>& __y) { return __x.__end_ - __y.__current_; } diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.transform/adaptor.nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.transform/adaptor.nodiscard.verify.cpp deleted file mode 100644 index 0a5fbf72a83ee..0000000000000 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.transform/adaptor.nodiscard.verify.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -// UNSUPPORTED: c++03, c++11, c++14, c++17 - -// Test the libc++ extension that std::views::transform is marked as [[nodiscard]]. - -#include - -void test() { - int range[] = {1, 2, 3}; - auto f = [](int i) { return i; }; - - std::views::transform(f); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} - std::views::transform(range, f); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} - range | std::views::transform(f); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} - std::views::all | std::views::transform(f); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} -} diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.transform/nodiscard.verify.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.transform/nodiscard.verify.cpp new file mode 100644 index 0000000000000..76ffa16d233f5 --- /dev/null +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.transform/nodiscard.verify.cpp @@ -0,0 +1,103 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++20 + +// Test the libc++ extension that std::ranges::transform_view and std::views::transform are marked as [[nodiscard]]. + +#include +#include +#include + +#include "test_iterators.h" + +struct View : std::ranges::view_interface { + int* begin(); + const int* begin() const; + volatile int* end(); + const volatile int* end() const; +}; +static_assert(!std::ranges::common_range); +static_assert(!std::same_as, std::ranges::iterator_t>); +static_assert(!std::same_as, std::ranges::sentinel_t>); + +void test() { + auto v = View{} | std::views::transform(std::identity{}); + + // [range.transform.view] + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).base(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::move(v).base(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.begin(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).begin(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.end(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).end(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + v.size(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(v).size(); + + // [range.transform.iterator] + + auto it = v.begin(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(it).base(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::move(it).base(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + *it; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it[0]; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::as_const(it)[0]; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it + 0; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + 0 + it; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it - 0; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it - it; + + // [range.transform.sentinel] + + auto st = v.end(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + st.base(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it - st; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + st - it; + + auto c_it = std::as_const(v).begin(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + st - c_it; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + c_it - st; + + // [range.transform.overview] + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::transform(View{}, std::identity{}); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::views::transform(std::identity{}); +} From 4be3214dbcba5d4992f94c00464d4540a2846914 Mon Sep 17 00:00:00 2001 From: EuphoricThinking Date: Wed, 24 Jun 2026 19:19:33 +0200 Subject: [PATCH 373/511] [offload] add support for aligned allocations (#203353) This patch is the first step towards introducing alignment support in memory allocations using liboffload, in order to enable SYCL implementation of aligned allocations. At the level of device allocators, it does not modify the Level Zero code except for forwarding the alignment parameter, since Level Zero already allows for specifying the alignment in its device allocator implementation. For AMD and CUDA, it checks whether the alignment passed by the caller is supported by the given backend; the reasoning behind this verification is described in the following paragraphs. At the API level, it adds a new function olMemAllocAligned, which is expected to work similarly to olMemAlloc, with the difference that the buffers returned by olMemAllocAligned should be aligned to the alignment passed by the user. At the level of the plugin interface internal abstractions, it adds a new argument Alignment to existing functions and delegates memory allocation between olMemAllocAligned and olMemAlloc implementations by using a common helper function. The goal of the anticipated series of patches is to implement handling of the alignment in the memory manager at the plugin interface level. At the first stage, presented in this patch, the information about the passed alignment is used mainly for checking whether the buffer returned by the device allocators meets the requirements. In the case of the requested memory size exceeding the thresholds of allocations handled by the memory manager, the request is forwarded directly to the device. Otherwise, the memory manager is responsible for allocating memory in full pages and pooling it according to the requested chunks. In the first scenario, the requested size, which is greater than the aforementioned threshold, is usually a multiple of the page size. Therefore, any alignment smaller than the page size would be correct. Neither CUDA nor HSA provides users with the ability to specify the alignment of the allocated memory. Their APIs include the alignment as one of the possible arguments only in functions that reserve virtual address space. CUDA enables users only to check the allocation granularity, which is usually synonymous with the page size. In the case of HSA, if the memory is allocated using a pool, the user is able to check not only the granularity, but also the alignment of the buffers allocated using the given pool. However, these values - granularity and alignment - are defined only if HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is set to true, which should always be set since the current implementation of the memory pool for the AMD plugin in liboffload uses only hsa_amd_memory_pool_ts for allocations. Only Level Zero accepts the alignment parameter during the memory allocation process, but manages returned pointers internally. Since pooling is also implemented in the memory manager at the plugin level, such a design in the Level Zero device allocator duplicates pooling in the whole application. In the final version, the memory manager at the plugin interface level will handle memory pooling and the buffer alignment from different devices instead of delegating memory management to device allocator implementations, as this would simplify the Level Zero plugin design and provide AMD and CUDA plugins with support for the alignment in memory allocations, which is not natively included in their APIs. --- offload/liboffload/API/Memory.td | 27 +++ offload/liboffload/src/OffloadImpl.cpp | 21 +- offload/plugins-nextgen/amdgpu/src/rtl.cpp | 63 +++++- .../common/include/MemoryManager.h | 33 +-- .../common/include/PluginInterface.h | 3 +- .../common/src/PluginInterface.cpp | 24 +- offload/plugins-nextgen/cuda/src/rtl.cpp | 40 +++- offload/plugins-nextgen/host/src/rtl.cpp | 3 +- .../level_zero/include/L0Device.h | 4 +- .../level_zero/src/L0Device.cpp | 9 +- offload/unittests/OffloadAPI/CMakeLists.txt | 1 + .../OffloadAPI/memory/olMemAllocAligned.cpp | 211 ++++++++++++++++++ 12 files changed, 393 insertions(+), 46 deletions(-) create mode 100644 offload/unittests/OffloadAPI/memory/olMemAllocAligned.cpp diff --git a/offload/liboffload/API/Memory.td b/offload/liboffload/API/Memory.td index 78cfdd2f8855a..78f136801885a 100644 --- a/offload/liboffload/API/Memory.td +++ b/offload/liboffload/API/Memory.td @@ -51,6 +51,33 @@ def olMemAlloc : Function { ]; } +def olMemAllocAligned : Function { + let desc = "Creates a memory allocation on the specified device with specified properties."; + let details = [ + "All allocations through olMemAllocWithProp regardless of source share a single virtual address range. There is no risk of multiple devices returning equal pointers to different memory." + ]; + let params = [ + Param<"ol_device_handle_t", "Device", "handle of the device to allocate on", PARAM_IN>, + Param<"ol_alloc_type_t", "Type", "type of the allocation", PARAM_IN>, + Param<"size_t", "Size", "size of the allocation in bytes", PARAM_IN>, + Param<"size_t", "Alignment", + "alignment of the allocation im bytes. Must be non-zero and a power of two", + PARAM_IN>, + Param<"void**", "AllocationOut", "output for the allocated pointer", PARAM_OUT> + ]; + let returns = [ + Return<"OL_ERRC_INVALID_SIZE", [ + "`Size == 0`" + ]>, + Return<"OL_ERRC_INVALID_ARGUMENT", [ + "`Alignment == 0`" + ]>, + Return<"OL_ERRC_INVALID_ARGUMENT", [ + "`(Alignment & (Alignment - 1)) != 0`" + ]>, + ]; +} + def olMemFree : Function { let desc = "Frees a memory allocation previously made by olMemAlloc."; let params = [ diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 743a7529ec780..a36081f27b5ee 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -597,16 +597,17 @@ TargetAllocTy convertOlToPluginAllocTy(ol_alloc_type_t Type) { } constexpr size_t MAX_ALLOC_TRIES = 50; -Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, - size_t Size, void **AllocationOut) { +Error olMemAllocImplHelper(ol_device_handle_t Device, ol_alloc_type_t Type, + size_t Size, size_t Alignment, + void **AllocationOut) { SmallVector Rejects; // Repeat the allocation up to a certain amount of times. If it happens to // already be allocated (e.g. by a device from another vendor) throw it away // and try again. for (size_t Count = 0; Count < MAX_ALLOC_TRIES; Count++) { - auto NewAlloc = Device->Device->dataAlloc(Size, nullptr, - convertOlToPluginAllocTy(Type)); + auto NewAlloc = Device->Device->dataAlloc( + Size, nullptr, convertOlToPluginAllocTy(Type), Alignment); if (!NewAlloc) return NewAlloc.takeError(); @@ -653,6 +654,18 @@ Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, "failed to allocate non-overlapping memory"); } +Error olMemAlloc_impl(ol_device_handle_t Device, ol_alloc_type_t Type, + size_t Size, void **AllocationOut) { + return olMemAllocImplHelper(Device, Type, Size, /*Alignment=*/0, + AllocationOut); +} + +Error olMemAllocAligned_impl(ol_device_handle_t Device, ol_alloc_type_t Type, + size_t Size, size_t Alignment, + void **AllocationOut) { + return olMemAllocImplHelper(Device, Type, Size, Alignment, AllocationOut); +} + Error olMemFree_impl(void *Address) { ol_device_handle_t Device; ol_alloc_type_t Type; diff --git a/offload/plugins-nextgen/amdgpu/src/rtl.cpp b/offload/plugins-nextgen/amdgpu/src/rtl.cpp index bb07dcc5c91fe..96b204009ae2c 100644 --- a/offload/plugins-nextgen/amdgpu/src/rtl.cpp +++ b/offload/plugins-nextgen/amdgpu/src/rtl.cpp @@ -291,6 +291,10 @@ struct AMDGPUMemoryPoolTy { if (auto Err = getAttr(HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, GlobalFlags)) return Err; + if (auto Err = getAttr(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALIGNMENT, + PoolAllocationAlignment)) + return Err; + return getAttr(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_GRANULE, Granule); } @@ -324,10 +328,39 @@ struct AMDGPUMemoryPoolTy { /// Get the allocation granularity of the pool. size_t getGranule() const { return Granule; } + /// Get the allocation alignment of the pool. + size_t getAlignment() const { return PoolAllocationAlignment; } + /// Allocate memory on the memory pool. - Error allocate(size_t Size, void **PtrStorage) { + Error allocate(size_t Size, void **PtrStorage, size_t Alignment) { + // A non-zero value passed as the Alignment indicates that the user expects + // the allocation to have a specific alignment. However, the HSA API does + // not allow users to define alignment. Therefore, the passed alignment is + // compared with the alignment of the memory allocated using the given pool. + // If the default alignment is greater than or equal to the alignment + // requested by the user, it would still meet the user's requirements. + if (Alignment > 0 && Alignment >= PoolAllocationAlignment) { + return Plugin::error(ErrorCode::UNSUPPORTED, + "requested alignment (%lu) larger than maximum " + "supported pool alignment (%lu)", + Alignment, PoolAllocationAlignment); + } + hsa_status_t Status = hsa_amd_memory_pool_allocate(MemoryPool, Size, 0, PtrStorage); + + if (Alignment > 0 && !isAddrAligned(Align(Alignment), *PtrStorage)) { + if (auto FreeErr = deallocate(*PtrStorage)) { + return Plugin::error(ErrorCode::UNKNOWN, + "Failure in deallcation of the incorrectly " + "aligned pointer; requested alignemnt: %lu", + Alignment); + } + + return Plugin::error(ErrorCode::UNSUPPORTED, + "unsupported alignment size"); + } + return Plugin::check(Status, "error in hsa_amd_memory_pool_allocate: %s"); } @@ -407,6 +440,14 @@ struct AMDGPUMemoryPoolTy { /// The page size in this memory pool. size_t Granule; + + /// The alignment of the buffers allocated by + /// hsa_amd_memory_pool_allocate(...). This attribute is defined only if + /// HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED is set to true. Since + /// hsa_amd_memory_pool_allocate is the only memory-allocating function that + /// is used by the memory pool, HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED + /// should always be set. + size_t PoolAllocationAlignment; }; /// Class that implements a memory manager that gets memory from a specific @@ -442,7 +483,8 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy { assert(MemoryManager && "Invalid memory manager"); assert(PtrStorage && "Invalid pointer storage"); - auto PtrStorageOrErr = MemoryManager->allocate(Size, nullptr); + auto PtrStorageOrErr = + MemoryManager->allocate(Size, nullptr, /*Alignment=*/0); if (!PtrStorageOrErr) return PtrStorageOrErr.takeError(); @@ -466,8 +508,8 @@ struct AMDGPUMemoryManagerTy : public DeviceAllocatorTy { private: /// Allocation callback that will be called once the memory manager does not /// have more previously allocated buffers. - Expected allocate(size_t Size, void *HstPtr, - TargetAllocTy Kind) override; + Expected allocate(size_t Size, void *HstPtr, TargetAllocTy Kind, + size_t Alignment) override; /// Deallocation callback that will be called by the memory manager. Error free(void *TgtPtr, TargetAllocTy Kind) override { @@ -2660,7 +2702,8 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy { } /// Allocate memory on the device or related to the device. - Expected allocate(size_t Size, void *, TargetAllocTy Kind) override; + Expected allocate(size_t Size, void *, TargetAllocTy Kind, + size_t Alignment) override; /// Deallocate memory on the device or related to the device. Error free(void *TgtPtr, TargetAllocTy Kind) override { @@ -4364,10 +4407,11 @@ static Error Plugin::check(int32_t Code, const char *ErrFmt, ArgsTy... Args) { } Expected AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr, - TargetAllocTy Kind) { + TargetAllocTy Kind, + size_t Alignment) { // Allocate memory from the pool. void *Ptr = nullptr; - if (auto Err = MemoryPool->allocate(Size, &Ptr)) + if (auto Err = MemoryPool->allocate(Size, &Ptr, Alignment)) return std::move(Err); assert(Ptr && "Invalid pointer"); @@ -4385,7 +4429,8 @@ Expected AMDGPUMemoryManagerTy::allocate(size_t Size, void *HstPtr, } Expected AMDGPUDeviceTy::allocate(size_t Size, void *, - TargetAllocTy Kind) { + TargetAllocTy Kind, + size_t Alignment) { if (Size == 0) return nullptr; @@ -4410,7 +4455,7 @@ Expected AMDGPUDeviceTy::allocate(size_t Size, void *, // Allocate from the corresponding memory pool. void *Alloc = nullptr; - if (auto Err = MemoryPool->allocate(Size, &Alloc)) + if (auto Err = MemoryPool->allocate(Size, &Alloc, Alignment)) return std::move(Err); if (Alloc) { diff --git a/offload/plugins-nextgen/common/include/MemoryManager.h b/offload/plugins-nextgen/common/include/MemoryManager.h index 9dd4ee684f85a..4b57be45e7551 100644 --- a/offload/plugins-nextgen/common/include/MemoryManager.h +++ b/offload/plugins-nextgen/common/include/MemoryManager.h @@ -38,9 +38,9 @@ class DeviceAllocatorTy { /// Allocate a memory of size \p Size . \p HstPtr is used to assist the /// allocation. - virtual Expected - allocate(size_t Size, void *HstPtr, - TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) = 0; + virtual Expected allocate(size_t Size, void *HstPtr, + TargetAllocTy Kind = TARGET_ALLOC_DEFAULT, + size_t Alignment = 0) = 0; /// Delete the pointer \p TgtPtr on the device virtual Error free(void *TgtPtr, @@ -143,8 +143,10 @@ class MemoryManagerTy { size_t SizeThreshold = 1U << 13; /// Request memory from target device - Expected allocateOnDevice(size_t Size, void *HstPtr) const { - return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE); + Expected allocateOnDevice(size_t Size, void *HstPtr, + size_t Alignment) const { + return DeviceAllocator.allocate(Size, HstPtr, TARGET_ALLOC_DEVICE, + Alignment); } /// Deallocate data on device @@ -153,7 +155,8 @@ class MemoryManagerTy { /// This function is called when it tries to allocate memory on device but the /// device returns out of memory. It will first free all memory in the /// FreeList and try to allocate again. - Expected freeAndAllocate(size_t Size, void *HstPtr) { + Expected freeAndAllocate(size_t Size, void *HstPtr, + size_t Alignment) { std::vector RemoveList; // Deallocate all memory in FreeList @@ -178,16 +181,16 @@ class MemoryManagerTy { } // Try allocate memory again - return allocateOnDevice(Size, HstPtr); + return allocateOnDevice(Size, HstPtr, Alignment); } /// The goal is to allocate memory on the device. It first tries to /// allocate directly on the device. If a \p nullptr is returned, it might /// be because the device is OOM. In that case, it will free all unused /// memory and then try again. - Expected allocateOrFreeAndAllocateOnDevice(size_t Size, - void *HstPtr) { - auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr); + Expected allocateOrFreeAndAllocateOnDevice(size_t Size, void *HstPtr, + size_t Alignment) { + auto TgtPtrOrErr = allocateOnDevice(Size, HstPtr, Alignment); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); @@ -197,7 +200,7 @@ class MemoryManagerTy { if (TgtPtr == nullptr) { ODBG(OLDT_Alloc) << "Failed to get memory on device. Free all memory " << "in FreeLists and try again."; - TgtPtrOrErr = freeAndAllocate(Size, HstPtr); + TgtPtrOrErr = freeAndAllocate(Size, HstPtr, Alignment); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); TgtPtr = *TgtPtrOrErr; @@ -231,7 +234,7 @@ class MemoryManagerTy { /// Allocate memory of size \p Size from target device. \p HstPtr is used to /// assist the allocation. - Expected allocate(size_t Size, void *HstPtr) { + Expected allocate(size_t Size, void *HstPtr, size_t Alignment) { // If the size is zero, we will not bother the target device. Just return // nullptr directly. if (Size == 0) @@ -245,7 +248,8 @@ class MemoryManagerTy { if (Size > SizeThreshold) { ODBG(OLDT_Alloc) << Size << " is greater than the threshold " << SizeThreshold << ". Allocate it directly from device"; - auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); + auto TgtPtrOrErr = + allocateOrFreeAndAllocateOnDevice(Size, HstPtr, Alignment); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); @@ -281,7 +285,8 @@ class MemoryManagerTy { ODBG(OLDT_Alloc) << "Cannot find a node in the FreeLists. " << "Allocate on device."; // Allocate one on device - auto TgtPtrOrErr = allocateOrFreeAndAllocateOnDevice(Size, HstPtr); + auto TgtPtrOrErr = + allocateOrFreeAndAllocateOnDevice(Size, HstPtr, Alignment); if (!TgtPtrOrErr) return TgtPtrOrErr.takeError(); diff --git a/offload/plugins-nextgen/common/include/PluginInterface.h b/offload/plugins-nextgen/common/include/PluginInterface.h index 9c83d2c9319fb..dc21abf1a334a 100644 --- a/offload/plugins-nextgen/common/include/PluginInterface.h +++ b/offload/plugins-nextgen/common/include/PluginInterface.h @@ -954,7 +954,8 @@ struct GenericDeviceTy : public DeviceAllocatorTy { virtual Error memoryVAUnMap(void *VAddr, size_t Size); /// Allocate data on the device or involving the device. - Expected dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind); + Expected dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind, + size_t Alignment); /// Deallocate data from the device or involving the device. Error dataDelete(void *TgtPtr, TargetAllocTy Kind); diff --git a/offload/plugins-nextgen/common/src/PluginInterface.cpp b/offload/plugins-nextgen/common/src/PluginInterface.cpp index c9dbfb9d58f29..c77182cb03ec9 100644 --- a/offload/plugins-nextgen/common/src/PluginInterface.cpp +++ b/offload/plugins-nextgen/common/src/PluginInterface.cpp @@ -130,9 +130,9 @@ GenericKernelTy::getKernelLaunchEnvironment( if (!NeedsReductionBuffer && !KernelArgs.DynCGroupMem) return reinterpret_cast(~0); - auto AllocOrErr = GenericDevice.dataAlloc(sizeof(KernelLaunchEnvironmentTy), - /*HostPtr=*/nullptr, - TargetAllocTy::TARGET_ALLOC_DEVICE); + auto AllocOrErr = GenericDevice.dataAlloc( + sizeof(KernelLaunchEnvironmentTy), + /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE, /*Alignment=*/0); if (!AllocOrErr) return AllocOrErr.takeError(); @@ -153,7 +153,8 @@ GenericKernelTy::getKernelLaunchEnvironment( // Use number of teams many buffer elements. auto AllocOrErr = GenericDevice.dataAlloc( uint64_t(RedCfg.ReductionDataSize) * NumBlocks0, - /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE); + /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE, + /*Alignment=*/0); if (!AllocOrErr) return AllocOrErr.takeError(); LocalKLE.ReductionBuffer = *AllocOrErr; @@ -231,7 +232,8 @@ GenericKernelTy::prepareBlockMemory(GenericDeviceTy &GenericDevice, // Get global memory as fallback. auto AllocOrErr = GenericDevice.dataAlloc( NumBlocks * DynBlockMemSize, - /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE); + /*HostPtr=*/nullptr, TargetAllocTy::TARGET_ALLOC_DEVICE, + /*Alignment=*/0); if (!AllocOrErr) return AllocOrErr.takeError(); DynFallbackPtr = *AllocOrErr; @@ -990,9 +992,11 @@ Error GenericDeviceTy::getDeviceMemorySize(uint64_t &DSize) { } Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, - TargetAllocTy Kind) { + TargetAllocTy Kind, + size_t Alignment) { void *Alloc = nullptr; + // TODO Check alignment. if (RecordReplay && RecordReplay->isRecordingOrReplaying()) return RecordReplay->allocate(Size); @@ -1000,7 +1004,7 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, case TARGET_ALLOC_DEFAULT: case TARGET_ALLOC_DEVICE: if (MemoryManager) { - auto AllocOrErr = MemoryManager->allocate(Size, HostPtr); + auto AllocOrErr = MemoryManager->allocate(Size, HostPtr, Alignment); if (!AllocOrErr) return AllocOrErr.takeError(); Alloc = *AllocOrErr; @@ -1012,7 +1016,7 @@ Expected GenericDeviceTy::dataAlloc(int64_t Size, void *HostPtr, [[fallthrough]]; case TARGET_ALLOC_HOST: case TARGET_ALLOC_SHARED: { - auto AllocOrErr = allocate(Size, HostPtr, Kind); + auto AllocOrErr = allocate(Size, HostPtr, Kind, Alignment); if (!AllocOrErr) return AllocOrErr.takeError(); Alloc = *AllocOrErr; @@ -1549,8 +1553,8 @@ int32_t GenericPluginTy::load_binary(int32_t DeviceId, void *GenericPluginTy::data_alloc(int32_t DeviceId, int64_t Size, void *HostPtr, int32_t Kind) { - auto AllocOrErr = - getDevice(DeviceId).dataAlloc(Size, HostPtr, (TargetAllocTy)Kind); + auto AllocOrErr = getDevice(DeviceId).dataAlloc( + Size, HostPtr, (TargetAllocTy)Kind, /*Alignment=*/0); if (!AllocOrErr) { auto Err = AllocOrErr.takeError(); REPORT() << "Failure to allocate device memory: " diff --git a/offload/plugins-nextgen/cuda/src/rtl.cpp b/offload/plugins-nextgen/cuda/src/rtl.cpp index 51e2bdb0c01dc..fc7d9e083f85e 100644 --- a/offload/plugins-nextgen/cuda/src/rtl.cpp +++ b/offload/plugins-nextgen/cuda/src/rtl.cpp @@ -397,6 +397,20 @@ struct CUDADeviceTy : public GenericDeviceTy { return Err; MaxBlockSharedMemSize = MaxSharedMem; + CUmemAllocationProp Prop = {}; + Prop.type = CU_MEM_ALLOCATION_TYPE_PINNED; + Prop.location.type = CU_MEM_LOCATION_TYPE_DEVICE; + Prop.location.id = DeviceId; + + Res = cuMemGetAllocationGranularity(&Granularity, &Prop, + CU_MEM_ALLOC_GRANULARITY_MINIMUM); + if (auto Err = Plugin::check( + Res, "error in cuMemGetAllocationGranularity for the device: %s")) + return Err; + if (Granularity == 0) + return Plugin::error(ErrorCode::INVALID_ARGUMENT, + "wrong device page size"); + return Plugin::success(); } @@ -586,7 +600,8 @@ struct CUDADeviceTy : public GenericDeviceTy { } /// Allocate memory on the device or related to the device. - Expected allocate(size_t Size, void *, TargetAllocTy Kind) override { + Expected allocate(size_t Size, void *, TargetAllocTy Kind, + size_t Alignment = 0) override { if (Size == 0) return nullptr; @@ -597,6 +612,13 @@ struct CUDADeviceTy : public GenericDeviceTy { CUdeviceptr DevicePtr; CUresult Res; + if (Alignment > 0 && Alignment > Granularity) { + return Plugin::error(ErrorCode::UNSUPPORTED, + "requested alignment (%lu) larger than maximum " + "supported alignment (%lu)", + Alignment, Granularity); + } + switch (Kind) { case TARGET_ALLOC_DEFAULT: case TARGET_ALLOC_DEVICE: @@ -614,6 +636,19 @@ struct CUDADeviceTy : public GenericDeviceTy { if (auto Err = Plugin::check(Res, "error in cuMemAlloc[Host|Managed]: %s")) return std::move(Err); + + if (Alignment > 0 && !isAddrAligned(Align(Alignment), MemAlloc)) { + if (auto FreeErr = free(MemAlloc, Kind)) { + return Plugin::error(ErrorCode::UNKNOWN, + "Failure in deallcation of the incorrectly " + "aligned pointer; requested alignemnt: %lu", + Alignment); + } + + return Plugin::error(ErrorCode::UNSUPPORTED, + "unsupported alignment size"); + } + return MemAlloc; } @@ -1460,6 +1495,9 @@ struct CUDADeviceTy : public GenericDeviceTy { /// simultaneously. uint32_t HardwareParallelism = 0; + /// Device page size. + size_t Granularity = 0; + /// Tracker for virtual address reservations. VMemTrackerTy VMemTracker; }; diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index 7c676bd82c801..0846a6d99e3b3 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -224,7 +224,8 @@ struct GenELF64DeviceTy : public GenericDeviceTy { } /// Allocate memory. Use std::malloc in all cases. - Expected allocate(size_t Size, void *, TargetAllocTy Kind) override { + Expected allocate(size_t Size, void *, TargetAllocTy Kind, + size_t /* Alignment */) override { if (Size == 0) return nullptr; diff --git a/offload/plugins-nextgen/level_zero/include/L0Device.h b/offload/plugins-nextgen/level_zero/include/L0Device.h index 09983ac245973..4aa0633339d25 100644 --- a/offload/plugins-nextgen/level_zero/include/L0Device.h +++ b/offload/plugins-nextgen/level_zero/include/L0Device.h @@ -485,8 +485,8 @@ class L0DeviceTy final : public GenericDeviceTy { loadBinaryImpl(std::unique_ptr &&TgtImage, int32_t ImageId) override; Error unloadBinaryImpl(DeviceImageTy *Image) override; - Expected allocate(size_t Size, void *HstPtr, - TargetAllocTy Kind) override; + Expected allocate(size_t Size, void *HstPtr, TargetAllocTy Kind, + size_t Alignment) override; Error free(void *TgtPtr, TargetAllocTy Kind = TARGET_ALLOC_DEFAULT) override; /// This plugin does nothing to lock buffers. Do not return an error, just diff --git a/offload/plugins-nextgen/level_zero/src/L0Device.cpp b/offload/plugins-nextgen/level_zero/src/L0Device.cpp index ac57c74ff15f0..9a7d534c56c40 100644 --- a/offload/plugins-nextgen/level_zero/src/L0Device.cpp +++ b/offload/plugins-nextgen/level_zero/src/L0Device.cpp @@ -334,8 +334,8 @@ Error L0DeviceTy::queryAsyncImpl(__tgt_async_info &AsyncInfo, bool ReleaseQueue, } Expected L0DeviceTy::allocate(size_t Size, void *HstPtr, - TargetAllocTy Kind) { - return dataAlloc(Size, /*Align=*/0, Kind, + TargetAllocTy Kind, size_t Alignment) { + return dataAlloc(Size, Alignment, Kind, /*Offset=*/0, /*UserAlloc=*/HstPtr == nullptr, /*DevMalloc=*/false); } @@ -888,8 +888,9 @@ Error L0DeviceTy::callGlobalCtorDtorCommon(GenericPluginTy &Plugin, llvm::sort(Funcs, [](const auto &X, const auto &Y) { return X.second < Y.second; }); - auto BufferOrErr = allocate(Funcs.size() * sizeof(void *), - /*HostPtr=*/nullptr, TARGET_ALLOC_DEVICE); + auto BufferOrErr = + allocate(Funcs.size() * sizeof(void *), + /*HostPtr=*/nullptr, TARGET_ALLOC_DEVICE, /*Alignment=*/0); if (!BufferOrErr) return HandleErr(BufferOrErr.takeError()); diff --git a/offload/unittests/OffloadAPI/CMakeLists.txt b/offload/unittests/OffloadAPI/CMakeLists.txt index e862d8635e0b7..d960da4b98f82 100644 --- a/offload/unittests/OffloadAPI/CMakeLists.txt +++ b/offload/unittests/OffloadAPI/CMakeLists.txt @@ -28,6 +28,7 @@ add_offload_unittest("kernel" add_offload_unittest("memory" memory/olMemAlloc.cpp + memory/olMemAllocAligned.cpp memory/olMemFill.cpp memory/olMemFree.cpp memory/olMemcpy.cpp diff --git a/offload/unittests/OffloadAPI/memory/olMemAllocAligned.cpp b/offload/unittests/OffloadAPI/memory/olMemAllocAligned.cpp new file mode 100644 index 0000000000000..18feafbf2b325 --- /dev/null +++ b/offload/unittests/OffloadAPI/memory/olMemAllocAligned.cpp @@ -0,0 +1,211 @@ +//===--------------- Offload API tests - olMemAllocAligned ----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../common/Fixtures.hpp" +#include +#include + +using olMemAllocAlignedTest = OffloadDeviceTest; + +OFFLOAD_TESTS_INSTANTIATE_DEVICE_FIXTURE(olMemAllocAlignedTest); + +constexpr size_t DefaultAlignment = 16; +constexpr size_t TestAllocsNum = 1000; + +TEST_P(olMemAllocAlignedTest, SuccessAllocMany) { + std::vector Allocs; + Allocs.reserve(1000); + + constexpr ol_alloc_type_t TYPES[3] = { + OL_ALLOC_TYPE_DEVICE, OL_ALLOC_TYPE_MANAGED, OL_ALLOC_TYPE_HOST}; + + for (size_t I = 1; I < TestAllocsNum; I++) { + void *Alloc = nullptr; + ASSERT_SUCCESS(olMemAllocAligned(Device, TYPES[I % 3], 1024 * I, + DefaultAlignment, &Alloc)); + ASSERT_NE(Alloc, nullptr); + + Allocs.push_back(Alloc); + } + + for (auto *A : Allocs) { + olMemFree(A); + } +} + +TEST_P(olMemAllocAlignedTest, InvalidNullDevice) { + void *Alloc = nullptr; + ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE, + olMemAllocAligned(nullptr, OL_ALLOC_TYPE_DEVICE, 1024, + DefaultAlignment, &Alloc)); +} + +TEST_P(olMemAllocAlignedTest, InvalidNullOutPtr) { + ASSERT_ERROR(OL_ERRC_INVALID_NULL_POINTER, + olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, 1024, + DefaultAlignment, nullptr)); +} + +TEST_P(olMemAllocAlignedTest, InvalidAlignmentZero) { + void *Alloc = nullptr; + + ASSERT_ERROR( + OL_ERRC_INVALID_ARGUMENT, + olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, 1024, 0, &Alloc)); +} + +TEST_P(olMemAllocAlignedTest, InvalidAlignmentNotAPowerOfTwo) { + void *Alloc = nullptr; + + ASSERT_ERROR( + OL_ERRC_INVALID_ARGUMENT, + olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, 1024, 3, &Alloc)); +} + +TEST_P(olMemAllocAlignedTest, CudaExceedDefaultAlignment) { + if (getPlatformBackend() != OL_PLATFORM_BACKEND_CUDA) { + GTEST_SKIP() << "Test inteded for CUDA backend"; + } + + void *Alloc = nullptr; + // The default page size for cuda is 64 KB. + ASSERT_ERROR(OL_ERRC_UNSUPPORTED, + olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, 1024, + 1024 * 64 * 64, &Alloc)); + ASSERT_EQ(Alloc, nullptr); +} + +TEST_P(olMemAllocAlignedTest, SuccessAllocManagedDifferentAlignments) { + void *Alloc = nullptr; + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + ASSERT_SUCCESS(olMemAllocAligned(Device, OL_ALLOC_TYPE_MANAGED, 1024, + Alignment, &Alloc)); + ASSERT_NE(Alloc, nullptr); + olMemFree(Alloc); + } +} + +TEST_P(olMemAllocAlignedTest, SuccessAllocHostDifferentAlignments) { + void *Alloc = nullptr; + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + ASSERT_SUCCESS( + olMemAllocAligned(Device, OL_ALLOC_TYPE_HOST, 1024, Alignment, &Alloc)); + ASSERT_NE(Alloc, nullptr); + olMemFree(Alloc); + } +} + +TEST_P(olMemAllocAlignedTest, SuccessAllocDeviceDifferentAlignments) { + void *Alloc = nullptr; + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + ASSERT_SUCCESS(olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, 1024, + Alignment, &Alloc)); + ASSERT_NE(Alloc, nullptr); + + olMemFree(Alloc); + } +} + +TEST_P(olMemAllocAlignedTest, SuccessMemcpyManagedDiferentAlignments) { + constexpr size_t Size = 1024; + void *Alloc; + std::vector Input(Size, 42); + std::vector Output(Size, 0); + + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + + ASSERT_SUCCESS(olMemAllocAligned(Device, OL_ALLOC_TYPE_MANAGED, Size, + Alignment, &Alloc)); + // memcpy is synchronous when queue is unspecified. + ASSERT_SUCCESS(olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size)); + ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size)); + + for (uint8_t Val : Output) { + ASSERT_EQ(Val, 42); + } + + ASSERT_SUCCESS(olMemFree(Alloc)); + } +} + +TEST_P(olMemAllocAlignedTest, SuccessMemcpyDeviceDiferentAlignments) { + constexpr size_t Size = 1024; + void *Alloc; + std::vector Input(Size, 42); + std::vector Output(Size, 0); + + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + + ASSERT_SUCCESS(olMemAllocAligned(Device, OL_ALLOC_TYPE_DEVICE, Size, + Alignment, &Alloc)); + // memcpy is synchronous when queue is unspecified. + ASSERT_SUCCESS(olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size)); + ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size)); + + for (uint8_t Val : Output) { + ASSERT_EQ(Val, 42); + } + + ASSERT_SUCCESS(olMemFree(Alloc)); + } +} + +TEST_P(olMemAllocAlignedTest, SuccessMemcpyHostDiferentAlignments) { + constexpr size_t Size = 1024; + void *Alloc; + std::vector Input(Size, 42); + std::vector Output(Size, 0); + + size_t NumAlignments = 6; + size_t Alignments[] = {8, 16, 32, 64, 128, 256}; + size_t Alignment; + for (size_t i = 0; i < NumAlignments; i++) { + Alignment = Alignments[i]; + SCOPED_TRACE("alignment: " + std::to_string(Alignment)); + + ASSERT_SUCCESS( + olMemAllocAligned(Device, OL_ALLOC_TYPE_HOST, Size, Alignment, &Alloc)); + // memcpy is synchronous when queue is unspecified. + ASSERT_SUCCESS(olMemcpy(nullptr, Alloc, Device, Input.data(), Host, Size)); + ASSERT_SUCCESS(olMemcpy(nullptr, Output.data(), Host, Alloc, Device, Size)); + + for (uint8_t Val : Output) { + ASSERT_EQ(Val, 42); + } + + ASSERT_SUCCESS(olMemFree(Alloc)); + } +} From 172c4dbec9bbba0721a14f84e1fab6c34e26e9b5 Mon Sep 17 00:00:00 2001 From: Alex MacLean Date: Wed, 24 Jun 2026 10:28:30 -0700 Subject: [PATCH 374/511] [NVPTX] Rewrite kernel signatures in param AS (#204192) Rewrite the kernel signatures moving byval parameters directly into entry parameter address space (similar to how ExpandVariadics handles va_arg functions). This avoids the need for the somewhat hacky nvvm_internal_addrspace_wrap intrinsic and enables better support for parameter short pointers. --- llvm/include/llvm/IR/IntrinsicsNVVM.td | 15 - llvm/lib/Target/NVPTX/NVPTX.h | 4 +- llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 8 +- llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp | 350 +++++++----------- llvm/lib/Target/NVPTX/NVPTXPassRegistry.def | 2 +- llvm/test/CodeGen/NVPTX/bug21465.ll | 4 +- .../CodeGen/NVPTX/lower-args-alignment.ll | 14 +- .../CodeGen/NVPTX/lower-args-gridconstant.ll | 90 +++-- .../CodeGen/NVPTX/lower-byval-args-dbg.ll | 45 +++ .../NVPTX/lower-byval-args-idempotent.ll | 21 ++ llvm/test/CodeGen/NVPTX/lower-byval-args.ll | 319 +++++++++------- .../CodeGen/NVPTX/mark-kernel-ptrs-global.ll | 2 +- 12 files changed, 443 insertions(+), 431 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/lower-byval-args-dbg.ll create mode 100644 llvm/test/CodeGen/NVPTX/lower-byval-args-idempotent.ll diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td index 361746c853160..51ea934b1c0f4 100644 --- a/llvm/include/llvm/IR/IntrinsicsNVVM.td +++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td @@ -2137,21 +2137,6 @@ let IntrProperties = [IntrReadMem, IntrArgMemOnly, IntrNoCallback, IntrWillRetur def int_nvvm_ldu_global_p : Intrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty, llvm_i32_ty]>; } -// Represents an explicit hole in the LLVM IR type system. It may be inserted by -// the compiler in cases where a pointer is of the wrong type. In the backend -// this intrinsic will be folded away and not equate to any instruction. It -// should not be used by any frontend and should only be considered well defined -// when added in the following cases: -// -// - NVPTXLowerArgs: When wrapping a byval pointer argument to a kernel -// function to convert the address space from generic (0) to param (101). -// This accounts for the fact that the parameter symbols will occupy this -// space when lowered during ISel. -// -def int_nvvm_internal_addrspace_wrap : - NVVMPureIntrinsic<[llvm_anyptr_ty], [llvm_anyptr_ty], - [NoUndef>, NoUndef]>; - // Move intrinsics, used in nvvm internally let IntrProperties = [IntrNoMem] in { diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index 69acfd1fdeb53..08ccc143dfb8a 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -48,7 +48,7 @@ ModulePass *createNVVMReflectPass(unsigned int SmVersion); MachineFunctionPass *createNVPTXPrologEpilogPass(); MachineFunctionPass *createNVPTXReplaceImageHandlesPass(); FunctionPass *createNVPTXImageOptimizerPass(); -FunctionPass *createNVPTXLowerArgsPass(); +ModulePass *createNVPTXLowerArgsPass(); FunctionPass *createNVPTXSetByValParamAlignPass(); FunctionPass *createNVPTXLowerAllocaPass(); FunctionPass *createNVPTXLowerUnreachablePass(bool TrapUnreachable, @@ -120,7 +120,7 @@ struct NVPTXLowerArgsPass : OptionalPassInfoMixin { public: NVPTXLowerArgsPass(TargetMachine &TM) : TM(TM) {}; - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); }; struct NVPTXMarkKernelPtrsGlobalPass diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp index 17d9f857312d6..1c5caf124c17e 100644 --- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3171,8 +3171,6 @@ static SDValue lowerIntrinsicWOChain(SDValue Op, SelectionDAG &DAG) { case Intrinsic::nvvm_prmt_rc16: case Intrinsic::nvvm_prmt_rc8: return lowerPrmtIntrinsic(Op, DAG); - case Intrinsic::nvvm_internal_addrspace_wrap: - return Op.getOperand(1); case Intrinsic::nvvm_clusterlaunchcontrol_query_cancel_is_canceled: case Intrinsic::nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_x: case Intrinsic::nvvm_clusterlaunchcontrol_query_cancel_get_first_ctaid_y: @@ -4122,8 +4120,10 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( SDValue P; if (IsKernel) { - assert(isParamGridConstant(Arg) && "ByVal argument must be lowered to " - "grid_constant by NVPTXLowerArgs"); + assert(Arg.getType()->getPointerAddressSpace() == + ADDRESS_SPACE_ENTRY_PARAM && + "Kernel ByVal argument must be lowered to the param address " + "space by NVPTXLowerArgs"); P = ArgSymbol; P.getNode()->setIROrder(Arg.getArgNo() + 1); } else { diff --git a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp index ef317626451da..92ba3d214620d 100644 --- a/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp @@ -6,133 +6,39 @@ // //===----------------------------------------------------------------------===// // -// -// Arguments to kernel and device functions are passed via param space, -// which imposes certain restrictions: +// Arguments to kernel functions are passed via param space, which imposes +// certain restrictions: // http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces // // Kernel parameters are read-only and accessible only via ld.param // instruction, directly or via a pointer. // -// Device function parameters are directly accessible via -// ld.param/st.param, but taking the address of one returns a pointer -// to a copy created in local space which *can't* be used with -// ld.param/st.param. -// // Copying a byval struct into local memory in IR allows us to enforce // the param space restrictions, gives the rest of IR a pointer w/o // param space restrictions, and gives us an opportunity to eliminate // the copy. // -// Pointer arguments to kernel functions need more work to be lowered: -// -// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the -// global address space. This allows later optimizations to emit -// ld.global.*/st.global.* for accessing these pointer arguments. For -// example, -// -// define void @foo(float* %input) { -// %v = load float, float* %input, align 4 -// ... -// } -// -// becomes -// -// define void @foo(float* %input) { -// %input2 = addrspacecast float* %input to float addrspace(1)* -// %input3 = addrspacecast float addrspace(1)* %input2 to float* -// %v = load float, float* %input3, align 4 -// ... -// } -// -// Later, NVPTXInferAddressSpaces will optimize it to -// -// define void @foo(float* %input) { -// %input2 = addrspacecast float* %input to float addrspace(1)* -// %v = load float, float addrspace(1)* %input2, align 4 -// ... -// } -// -// 2. Convert byval kernel parameters to pointers in the param address space -// (so that NVPTX emits ld/st.param). Convert pointers *within* a byval -// kernel parameter to pointers in the global address space. This allows -// NVPTX to emit ld/st.global. -// -// struct S { -// int *x; -// int *y; -// }; -// __global__ void foo(S s) { -// int *b = s.y; -// // use b -// } -// -// "b" points to the global address space. In the IR level, -// -// define void @foo(ptr byval %input) { -// %b_ptr = getelementptr {ptr, ptr}, ptr %input, i64 0, i32 1 -// %b = load ptr, ptr %b_ptr -// ; use %b -// } +// This pass lowers byval parameters of kernel functions. It rewrites the +// kernel's signature so that each byval argument is declared directly as a +// pointer in the param address space (`ptr addrspace(101)`), then adjusts the +// body to match. The parameter symbols occupy this space when lowered during +// ISel, so making the IR type honest avoids the need for a cast or intrinsic to +// reinterpret a generic pointer as a param-space pointer. // -// becomes +// This pass uses 1 of 3 possible strategies to lower byval parameters: // -// define void @foo({i32*, i32*}* byval %input) { -// %b_param = addrspacecat ptr %input to ptr addrspace(101) -// %b_ptr = getelementptr {ptr, ptr}, ptr addrspace(101) %b_param, i64 0, i32 1 -// %b = load ptr, ptr addrspace(101) %b_ptr -// %b_global = addrspacecast ptr %b to ptr addrspace(1) -// ; use %b_generic -// } +// 1. Direct readonly nocapture uses: If we can trace through all the uses and +// we can convert them all to param AS, then we'll do this. This is useful +// for pre-SM70 targets where cvta.param is not available. // -// Create a local copy of kernel byval parameters used in a way that *might* mutate -// the parameter, by storing it in an alloca. Mutations to "grid_constant" parameters -// are undefined behaviour, and don't require local copies. +// 2. Grid constant: If the argument is a grid constant (and the target supports +// cvta.param), we can cast back to generic address space to use the pointer +// directly. // -// define void @foo(ptr byval(%struct.s) align 4 %input) { -// store i32 42, ptr %input -// ret void -// } +// 3. Local copy: If we can't trace through all the uses and we can't convert +// them all to param AS, then we'll create a local copy of the argument in +// local memory. This is useful for arguments that are mutated. // -// becomes -// -// define void @foo(ptr byval(%struct.s) align 4 %input) #1 { -// %input1 = alloca %struct.s, align 4 -// %input2 = addrspacecast ptr %input to ptr addrspace(101) -// %input3 = load %struct.s, ptr addrspace(101) %input2, align 4 -// store %struct.s %input3, ptr %input1, align 4 -// store i32 42, ptr %input1, align 4 -// ret void -// } -// -// If %input were passed to a device function, or written to memory, -// conservatively assume that %input gets mutated, and create a local copy. -// -// Convert param pointers to grid_constant byval kernel parameters that are -// passed into calls (device functions, intrinsics, inline asm), or otherwise -// "escape" (into stores/ptrtoints) to the generic address space, using the -// `nvvm.ptr.param.to.gen` intrinsic, so that NVPTX emits cvta.param -// (available for sm70+) -// -// define void @foo(ptr byval(%struct.s) %input) { -// ; %input is a grid_constant -// %call = call i32 @escape(ptr %input) -// ret void -// } -// -// becomes -// -// define void @foo(ptr byval(%struct.s) %input) { -// %input1 = addrspacecast ptr %input to ptr addrspace(101) -// ; the following intrinsic converts pointer to generic. We don't use an addrspacecast -// ; to prevent generic -> param -> generic from getting cancelled out -// %input1.gen = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) %input1) -// %call = call i32 @escape(ptr %input1.gen) -// ret void -// } -// -// TODO: merge this pass with NVPTXInferAddressSpaces so that other passes don't -// cancel the addrspacecast pair this pass emits. //===----------------------------------------------------------------------===// #include "NVPTX.h" @@ -144,18 +50,17 @@ #include "llvm/Analysis/PtrUseVisitor.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/Type.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/NVPTXAddrSpace.h" -#include "llvm/Support/NVVMAttributes.h" #define DEBUG_TYPE "nvptx-lower-args" @@ -163,12 +68,12 @@ using namespace llvm; using namespace NVPTXAS; namespace { -class NVPTXLowerArgsLegacyPass : public FunctionPass { - bool runOnFunction(Function &F) override; +class NVPTXLowerArgsLegacyPass : public ModulePass { + bool runOnModule(Module &M) override; public: static char ID; // Pass identification, replacement for typeid - NVPTXLowerArgsLegacyPass() : FunctionPass(ID) {} + NVPTXLowerArgsLegacyPass() : ModulePass(ID) {} StringRef getPassName() const override { return "Lower pointer arguments of CUDA kernels"; } @@ -186,24 +91,6 @@ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_END(NVPTXLowerArgsLegacyPass, "nvptx-lower-args", "Lower arguments (NVPTX)", false, false) -// ============================================================================= -// If the function had a byval struct ptr arg, say foo(ptr byval(%struct.x) %d), -// and we can't guarantee that the only accesses are loads, -// then add the following instructions to the first basic block: -// -// %temp = alloca %struct.x, align 8 -// %tempd = addrspacecast ptr %d to ptr addrspace(101) -// %tv = load %struct.x, ptr addrspace(101) %tempd -// store %struct.x %tv, ptr %temp, align 8 -// -// The above code allocates some space in the stack and copies the incoming -// struct from param space to local space. -// Then replace all occurrences of %d by %temp. -// -// In case we know that all users are GEPs or Loads, replace them with the same -// ones in parameter AS, so we can access them using ld.param. -// ============================================================================= - /// Recursively convert the users of a param to the param address space. static void convertToParamAS(ArrayRef OldUses, Value *Param) { struct IP { @@ -287,25 +174,6 @@ static void convertToParamAS(ArrayRef OldUses, Value *Param) { I->eraseFromParent(); } -// Create a call to the nvvm_internal_addrspace_wrap intrinsic and set the -// alignment of the return value based on the alignment of the argument. -static CallInst *createNVVMInternalAddrspaceWrap(IRBuilder<> &IRB, - Argument &Arg) { - CallInst *ArgInParam = IRB.CreateIntrinsicWithoutFolding( - Intrinsic::nvvm_internal_addrspace_wrap, - {IRB.getPtrTy(ADDRESS_SPACE_ENTRY_PARAM), Arg.getType()}, &Arg, {}, - Arg.getName() + ".param"); - - if (MaybeAlign ParamAlign = Arg.getParamAlign()) - ArgInParam->addRetAttr( - Attribute::getWithAlignment(ArgInParam->getContext(), *ParamAlign)); - - Arg.addAttr(Attribute::get(Arg.getContext(), NVVMAttr::GridConstant)); - Arg.addAttr(Attribute::ReadOnly); - - return ArgInParam; -} - namespace { struct ArgUseChecker : PtrUseVisitor { using Base = PtrUseVisitor; @@ -382,7 +250,12 @@ struct ArgUseChecker : PtrUseVisitor { void visitMemSetInst(MemSetInst &II) { PI.setAborted(&II); } }; // struct ArgUseChecker -void copyByValParam(Function &F, Argument &Arg) { +// Create a local copy of the byval parameter \p Arg in an alloca, filled by a +// copy from \p ParamPtr (a pointer to the parameter), and replace all uses of +// \p Arg with the alloca. \p ParamPtr is either the natively param-space +// argument (when called from the signature rewrite) or the generic byval +// argument itself (when called early, before the signature has been rewritten). +void copyByValParam(Function &F, Argument &Arg, Value &ParamPtr) { LLVM_DEBUG(dbgs() << "Creating a local copy of " << Arg << "\n"); Type *ByValType = Arg.getParamByValType(); const DataLayout &DL = F.getDataLayout(); @@ -395,108 +268,165 @@ void copyByValParam(Function &F, Argument &Arg) { Arg.getParamAlign().value_or(DL.getPrefTypeAlign(ByValType))); Arg.replaceAllUsesWith(AllocA); - Value *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, Arg); - - // Be sure to propagate alignment to this load; LLVM doesn't know that NVPTX - // addrspacecast preserves alignment. Since params are constant, this load + // Be sure to propagate alignment to this copy; LLVM doesn't know that NVPTX + // addrspacecast preserves alignment. Since params are constant, this copy // is definitely not volatile. const auto ArgSize = *AllocA->getAllocationSize(DL); - IRB.CreateMemCpy(AllocA, AllocA->getAlign(), ArgInParamAS, AllocA->getAlign(), + IRB.CreateMemCpy(AllocA, AllocA->getAlign(), &ParamPtr, AllocA->getAlign(), ArgSize); } } // namespace -static bool argIsProcessed(Argument *Arg) { - if (Arg->use_empty()) - return true; - - // If the argument is already wrapped, it was processed by this pass before. - if (Arg->hasOneUse()) - if (const auto *II = dyn_cast(*Arg->user_begin())) - if (II->getIntrinsicID() == Intrinsic::nvvm_internal_addrspace_wrap) - return true; - - return false; +// Returns true if F has a byval argument not yet in the param address space. +// Such arguments are lowered exactly once, so one already in param space means +// the kernel has already been processed. +static bool kernelNeedsByValLowering(const Function &F) { + return any_of(F.args(), [](const Argument &A) { + return A.hasByValAttr() && + A.getType()->getPointerAddressSpace() != ADDRESS_SPACE_ENTRY_PARAM; + }); } -static void lowerKernelByValParam(Argument *Arg, Function &F, - const bool HasCvtaParam) { +// Lower the uses of a single kernel byval argument. \p OldArg is the original +// (generic) argument whose uses are being rewritten; \p NewParamArg is its +// replacement, natively in the param address space. +static void lowerKernelByValParam(Argument &OldArg, Argument &NewParamArg, + Function &F, const bool HasCvtaParam) { assert(isKernelFunction(F)); const DataLayout &DL = F.getDataLayout(); IRBuilder<> IRB(&F.getEntryBlock().front()); - if (argIsProcessed(Arg)) + if (OldArg.use_empty()) return; // (1) First check the easy case, if were able to trace through all the uses // and we can convert them all to param AS, then we'll do this. ArgUseChecker AUC(DL); - ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(*Arg); + ArgUseChecker::PtrInfo PI = AUC.visitArgPtr(OldArg); const bool ArgUseIsReadOnly = !(PI.isEscaped() || PI.isAborted()); if (ArgUseIsReadOnly && AUC.Conditionals.empty()) { // Convert all loads and intermediate operations to use parameter AS and // skip creation of a local copy of the argument. - SmallVector UsesToUpdate(llvm::make_pointer_range(Arg->uses())); - Value *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg); + SmallVector UsesToUpdate(make_pointer_range(OldArg.uses())); for (Use *U : UsesToUpdate) - convertToParamAS(U, ArgInParamAS); + convertToParamAS(U, &NewParamArg); + // This path does not replaceAllUsesWith the old argument, so any debug-info + // uses would be left dangling and reset to poison when the old function is + // erased. Point them at the new param-space argument instead. + if (OldArg.isUsedByMetadata()) { + SmallVector DbgUsers; + findDbgUsers(&OldArg, DbgUsers); + for (DbgVariableRecord *DVR : DbgUsers) + DVR->replaceVariableLocationOp(&OldArg, &NewParamArg); + } return; } // (2) If the argument is grid constant, we get to use the pointer directly. - if (HasCvtaParam && (ArgUseIsReadOnly || isParamGridConstant(*Arg))) { - LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << *Arg << "\n"); - - // Cast argument to param address space. Because the backend will emit the - // argument already in the param address space, we need to use the noop - // intrinsic, this had the added benefit of preventing other optimizations - // from folding away this pair of addrspacecasts. - Instruction *ArgInParamAS = createNVVMInternalAddrspaceWrap(IRB, *Arg); + if (HasCvtaParam && (ArgUseIsReadOnly || isParamGridConstant(OldArg))) { + LLVM_DEBUG(dbgs() << "Using non-copy pointer to " << OldArg << "\n"); - // Cast param address to generic address space. + // Cast the param-space argument to the generic address space. Because the + // argument is natively in param space, this cast only ever goes + // param -> generic and lowers to cvta.param; there is no inverse cast for + // InferAddressSpaces to fold it away with. Value *GenericArg = IRB.CreateAddrSpaceCast( - ArgInParamAS, IRB.getPtrTy(ADDRESS_SPACE_GENERIC), - Arg->getName() + ".gen"); - - Arg->replaceAllUsesWith(GenericArg); + &NewParamArg, IRB.getPtrTy(ADDRESS_SPACE_GENERIC), + OldArg.getName() + ".gen"); - // Do not replace Arg in the cast to param space - ArgInParamAS->setOperand(0, Arg); + OldArg.replaceAllUsesWith(GenericArg); return; } // (3) Otherwise we have to create a copy of the argument in local memory. - copyByValParam(F, *Arg); + copyByValParam(F, OldArg, NewParamArg); +} + +// Rewrite a kernel's signature so that each byval argument is declared directly +// as a pointer in the param address space, then lower the body to match. This +// creates a new function, moves the body across, and erases \p F. +static void rewriteKernelByValSignature(Function &F, const bool HasCvtaParam) { + LLVMContext &Ctx = F.getContext(); + FunctionType *FTy = F.getFunctionType(); + + // Build the new signature: byval pointer arguments move to the param address + // space; all other arguments are unchanged. + SmallVector Params(FTy->params()); + for (const Argument &Arg : F.args()) + if (Arg.hasByValAttr()) + Params[Arg.getArgNo()] = PointerType::get(Ctx, ADDRESS_SPACE_ENTRY_PARAM); + + Function *NF = Function::Create( + FunctionType::get(FTy->getReturnType(), Params, FTy->isVarArg()), + F.getLinkage(), F.getAddressSpace()); + NF->copyAttributesFrom(&F); + NF->setComdat(F.getComdat()); + F.getParent()->getFunctionList().insert(F.getIterator(), NF); + + // ISel reads the param symbol directly for kernel byval arguments; this is + // valid because the signature rewrite above puts them in the param address + // space. Mark them readonly: any mutation is redirected to a local copy + // below, so the param itself is never written. + for (Argument &NewArg : NF->args()) + if (NewArg.hasByValAttr()) + NewArg.addAttr(Attribute::ReadOnly); + + // Take over F's name and uses (e.g. @llvm.used, nvvm.annotations metadata), + // then move the body across. + F.replaceAllUsesWith(NF); + NF->takeName(&F); + NF->splice(NF->begin(), &F); + + // Remap arguments. Non-byval arguments keep their type and are replaced + // directly; byval arguments change address space, so their uses are lowered + // to operate on the new param-space argument. + for (auto [OldArg, NewArg] : zip_equal(F.args(), NF->args())) { + if (OldArg.hasByValAttr()) + lowerKernelByValParam(OldArg, NewArg, *NF, HasCvtaParam); + else + OldArg.replaceAllUsesWith(&NewArg); + NewArg.takeName(&OldArg); + } + + // Move function-level metadata (debug info, etc.) to the new function. + NF->copyMetadata(&F, /*Offset=*/0); + F.clearMetadata(); + + F.eraseFromParent(); } // ============================================================================= // Main function for this pass. // ============================================================================= static bool processFunction(Function &F, NVPTXTargetMachine &TM) { - if (!isKernelFunction(F)) + if (!isKernelFunction(F) || F.isDeclaration()) return false; - const NVPTXSubtarget *ST = TM.getSubtargetImpl(F); - const bool HasCvtaParam = ST->hasCvtaParam(); + // Skip kernels with no byval arguments, and those already lowered (byval + // arguments sitting in the param address space). + if (!kernelNeedsByValLowering(F)) + return false; LLVM_DEBUG(dbgs() << "Lowering kernel args of " << F.getName() << "\n"); - bool Changed = false; - for (Argument &Arg : F.args()) - if (Arg.hasByValAttr()) { - lowerKernelByValParam(&Arg, F, HasCvtaParam); - Changed = true; - } + const NVPTXSubtarget *ST = TM.getSubtargetImpl(F); + rewriteKernelByValSignature(F, ST->hasCvtaParam()); + return true; +} +static bool processModule(Module &M, NVPTXTargetMachine &TM) { + bool Changed = false; + for (Function &F : make_early_inc_range(M)) + Changed |= processFunction(F, TM); return Changed; } -bool NVPTXLowerArgsLegacyPass::runOnFunction(Function &F) { +bool NVPTXLowerArgsLegacyPass::runOnModule(Module &M) { auto &TM = getAnalysis().getTM(); - return processFunction(F, TM); + return processModule(M, TM); } -FunctionPass *llvm::createNVPTXLowerArgsPass() { +ModulePass *llvm::createNVPTXLowerArgsPass() { return new NVPTXLowerArgsLegacyPass(); } @@ -507,7 +437,7 @@ static bool copyFunctionByValArgs(Function &F) { if (isKernelFunction(F)) { for (Argument &Arg : F.args()) if (Arg.hasByValAttr() && !isParamGridConstant(Arg)) { - copyByValParam(F, Arg); + copyByValParam(F, Arg, Arg); Changed = true; } } @@ -520,9 +450,9 @@ PreservedAnalyses NVPTXCopyByValArgsPass::run(Function &F, : PreservedAnalyses::all(); } -PreservedAnalyses NVPTXLowerArgsPass::run(Function &F, - FunctionAnalysisManager &AM) { +PreservedAnalyses NVPTXLowerArgsPass::run(Module &M, + ModuleAnalysisManager &AM) { auto &NTM = static_cast(TM); - bool Changed = processFunction(F, NTM); + bool Changed = processModule(M, NTM); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def index df4785cce54ba..70d4e92a32e2f 100644 --- a/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def +++ b/llvm/lib/Target/NVPTX/NVPTXPassRegistry.def @@ -17,6 +17,7 @@ #define MODULE_PASS(NAME, CREATE_PASS) #endif MODULE_PASS("generic-to-nvvm", GenericToNVVMPass()) +MODULE_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this)) MODULE_PASS("nvptx-lower-ctor-dtor", NVPTXCtorDtorLoweringPass()) MODULE_PASS("nvvm-reflect", NVVMReflectPass()) #undef MODULE_PASS @@ -38,7 +39,6 @@ FUNCTION_ALIAS_ANALYSIS("nvptx-aa", NVPTXAA()) #endif FUNCTION_PASS("nvvm-intr-range", NVVMIntrRangePass()) FUNCTION_PASS("nvptx-copy-byval-args", NVPTXCopyByValArgsPass()) -FUNCTION_PASS("nvptx-lower-args", NVPTXLowerArgsPass(*this)) FUNCTION_PASS("nvptx-set-byval-param-align", NVPTXSetByValParamAlignPass()) FUNCTION_PASS("nvptx-mark-kernel-ptrs-global", NVPTXMarkKernelPtrsGlobalPass()) FUNCTION_PASS("nvptx-tag-invariant-loads", NVPTXTagInvariantLoadsPass()) diff --git a/llvm/test/CodeGen/NVPTX/bug21465.ll b/llvm/test/CodeGen/NVPTX/bug21465.ll index 81691d65f01b6..9e24d9d28e176 100644 --- a/llvm/test/CodeGen/NVPTX/bug21465.ll +++ b/llvm/test/CodeGen/NVPTX/bug21465.ll @@ -11,8 +11,10 @@ target triple = "nvptx64-unknown-unknown" define ptx_kernel void @_Z11TakesStruct1SPi(ptr byval(%struct.S) align 4 nocapture readonly %input, ptr nocapture %output) #0 { entry: ; CHECK-LABEL: @_Z11TakesStruct1SPi +; CHECK-SAME: ptr addrspace(101) readonly byval(%struct.S) align 4 captures(none) %input ; PTX-LABEL: .visible .entry _Z11TakesStruct1SPi( -; CHECK: call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr %input) +; CHECK: getelementptr inbounds %struct.S, ptr addrspace(101) %input, i64 0, i32 1 +; CHECK: load i32, ptr addrspace(101) %b = getelementptr inbounds %struct.S, ptr %input, i64 0, i32 1 %0 = load i32, ptr %b, align 4 ; PTX-NOT: ld.param.b32 {{%r[0-9]+}}, [{{%rd[0-9]+}}] diff --git a/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll index c9acebf13ff07..2ef61841d1210 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-alignment.ll @@ -4,17 +4,16 @@ target triple = "nvptx64-nvidia-cuda" ; ------------------------------------------------------------------------------ -; Test that alignment can be inferred through llvm.nvvm.internal.addrspace.wrap.p101.p0 intrinsics -; thanks to the alignment attribute on the intrinsic +; Test that alignment can be inferred for loads from a byval kernel parameter +; thanks to the alignment attribute on the param-space argument. ; ------------------------------------------------------------------------------ %struct.S1 = type { i32, i32, i32, i32 } define ptx_kernel i32 @test_align8(ptr noundef readonly byval(%struct.S1) align 8 captures(none) %params) { ; CHECK-LABEL: define ptx_kernel i32 @test_align8( -; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) "nvvm.grid_constant" [[PARAMS:%.*]]) { +; CHECK-SAME: ptr addrspace(101) noundef readonly byval([[STRUCT_S1:%.*]]) align 8 captures(none) [[PARAMS:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call align 8 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 8 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[PARAMS]], align 8 ; CHECK-NEXT: ret i32 [[LOAD]] ; entry: @@ -24,10 +23,9 @@ entry: define ptx_kernel i32 @test_align1(ptr noundef readonly byval(%struct.S1) align 4 captures(none) %params) { ; CHECK-LABEL: define ptx_kernel i32 @test_align1( -; CHECK-SAME: ptr noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[PARAMS:%.*]]) { +; CHECK-SAME: ptr addrspace(101) noundef readonly byval([[STRUCT_S1:%.*]]) align 4 captures(none) [[PARAMS:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[PARAMS]]) -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[TMP0]], align 4 +; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(101) [[PARAMS]], align 4 ; CHECK-NEXT: ret i32 [[LOAD]] ; entry: diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 4d9cb6ff97413..4829220d3cc70 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -71,12 +71,12 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 "nvvm.grid_constan ; PTX-NEXT: st.global.b32 [%rd2], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_int( -; OPT-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT11:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT11]], align 4 +; OPT-SAME: ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], i32 [[INPUT2:%.*]], ptr [[OUT:%.*]], i32 [[N:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[TMP:%.*]] = load i32, ptr addrspace(101) [[INPUT1]], align 4 ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4 ; OPT-NEXT: ret void +; %tmp = load i32, ptr %input1, align 4 %add = add i32 %tmp, %input2 store i32 %add, ptr %out @@ -100,15 +100,15 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 "nvvm.gri ; PTX-NEXT: st.global.b32 [%rd2], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_struct( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 0 -; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT1]], i32 0, i32 1 +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[GEP13:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT]], i32 0, i32 0 +; OPT-NEXT: [[GEP22:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr addrspace(101) [[INPUT]], i32 0, i32 1 ; OPT-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(101) [[GEP13]], align 4 ; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(101) [[GEP22]], align 4 ; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]] ; OPT-NEXT: store i32 [[ADD]], ptr [[OUT]], align 4 ; OPT-NEXT: ret void +; %gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %int1 = load i32, ptr %gep1 @@ -136,11 +136,11 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 "nvvm.gri ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_escape( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void +; %call = call i32 @escape(ptr %input) ret void } @@ -158,9 +158,9 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: mov.b64 %SPL, __local_depot4; ; PTX-NEXT: cvta.local.u64 %SP, %SPL; ; PTX-NEXT: mov.b64 %rd1, multiple_grid_const_escape_param_0; -; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1]; ; PTX-NEXT: mov.b64 %rd2, multiple_grid_const_escape_param_2; ; PTX-NEXT: cvta.param.u64 %rd3, %rd2; +; PTX-NEXT: ld.param.b32 %r1, [multiple_grid_const_escape_param_1]; ; PTX-NEXT: cvta.param.u64 %rd4, %rd1; ; PTX-NEXT: add.u64 %rd5, %SP, 0; ; PTX-NEXT: add.u64 %rd6, %SPL, 0; @@ -179,15 +179,14 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], i32 [[A:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[B:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[B]]) -; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], i32 [[A:%.*]], ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[B:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[B_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[B]] to ptr +; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]]) ; OPT-NEXT: ret void +; %a.addr = alloca i32, align 4 store i32 %a, ptr %a.addr, align 4 %call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b) @@ -207,11 +206,11 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 "n ; PTX-NEXT: st.global.b64 [%rd3], %rd4; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_memory_escape( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[ADDR:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR]], align 8 ; OPT-NEXT: ret void +; store ptr %input, ptr %addr, align 8 ret void } @@ -234,14 +233,14 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: ret; ; PTX-NOT .local ; OPT-LABEL: define ptx_kernel void @grid_const_inlineasm_escape( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[RESULT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[TMPPTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 ; OPT-NEXT: [[TMPPTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1 ; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2 ; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT]], align 8 ; OPT-NEXT: ret void +; %tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 %1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1 @@ -273,14 +272,14 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) align 4 "nvvm.g ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape( -; OPT-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT1_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[INPUT1_GEN]], align 4 ; OPT-NEXT: [[TWICE:%.*]] = add i32 [[VAL1]], [[VAL1]] ; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT]], align 4 ; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]]) ; OPT-NEXT: ret void +; %val = load i32, ptr %input %twice = add i32 %val, %val store i32 %twice, ptr %output @@ -314,9 +313,8 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) align 4 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel i32 @grid_const_partial_escapemem( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT:%.*]], ptr [[OUTPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 ; OPT-NEXT: [[VAL1:%.*]] = load i32, ptr [[PTR1]], align 4 ; OPT-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 1 @@ -325,6 +323,7 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) align 4 ; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]] ; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]]) ; OPT-NEXT: ret i32 [[ADD]] +; %ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0 %val1 = load i32, ptr %ptr1 %ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1 @@ -356,9 +355,8 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 "nvvm.grid_c ; PTX-NEXT: st.global.b32 [%rd1], %r2; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_phi( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT1]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 ; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]] @@ -373,6 +371,7 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 "nvvm.grid_c ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 @@ -413,11 +412,9 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 "nvvm.gr ; PTX-NEXT: st.global.b32 [%rd1], %r2; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_phi_ngc( -; OPT-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S]]) align 4 [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT2]] to ptr +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT1]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 ; OPT-NEXT: br i1 [[LESS]], label %[[FIRST:.*]], label %[[SECOND:.*]] @@ -432,6 +429,7 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 "nvvm.gr ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 br i1 %less, label %first, label %second @@ -468,17 +466,16 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 "nvvm.grid_cons ; PTX-NEXT: st.global.b32 [%rd3], %r2; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_select( -; OPT-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr -; OPT-NEXT: [[TMP2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP2]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr addrspace(101) readonly byval(i32) align 4 [[INPUT2:%.*]], ptr [[INOUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT2]] to ptr +; OPT-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT1]] to ptr ; OPT-NEXT: [[VAL:%.*]] = load i32, ptr [[INOUT]], align 4 ; OPT-NEXT: [[LESS:%.*]] = icmp slt i32 [[VAL]], 0 ; OPT-NEXT: [[PTRNEW:%.*]] = select i1 [[LESS]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]] ; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT]], align 4 ; OPT-NEXT: ret void +; %val = load i32, ptr %inout %less = icmp slt i32 %val, 0 %ptrnew = select i1 %less, ptr %input1, ptr %input2 @@ -502,13 +499,13 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) align 4 "nvvm.grid_con ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel i32 @grid_const_ptrtoint( -; OPT-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[INPUT2]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT1:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: [[INPUT3:%.*]] = load i32, ptr [[INPUT1]], align 4 ; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32 ; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]] ; OPT-NEXT: ret i32 [[KEEPALIVE]] +; %val = load i32, ptr %input %ptrval = ptrtoint ptr %input to i32 %keepalive = add i32 %val, %ptrval @@ -519,9 +516,8 @@ declare void @device_func(ptr byval(i32) align 4) define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 "nvvm.grid_constant" %input) { ; OPT-LABEL: define ptx_kernel void @test_forward_byval_arg( -; OPT-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { -; OPT-NEXT: [[INPUT_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT_PARAM]] to ptr +; OPT-SAME: ptr addrspace(101) readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR0]] { +; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT]] to ptr ; OPT-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT_PARAM_GEN]]) ; OPT-NEXT: ret void ; diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args-dbg.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args-dbg.ll new file mode 100644 index 0000000000000..be5523ec38b41 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args-dbg.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; Verify that lowering a read-only byval kernel argument preserves debug-info +; uses of the argument: they must be re-pointed at the new param-space argument +; rather than left dangling (which would reset them to `ptr poison` when the old +; function is erased). +; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s + +%struct.S = type { i32, i32 } + +define ptx_kernel void @read_only_dbg(ptr %out, ptr byval(%struct.S) align 4 %s) !dbg !4 { +; CHECK-LABEL: define ptx_kernel void @read_only_dbg( +; CHECK-SAME: ptr [[OUT:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) #[[ATTR0:[0-9]+]] !dbg [[DBG3:![0-9]+]] { +; CHECK-NEXT: #dbg_value(ptr addrspace(101) [[S]], [[META6:![0-9]+]], !DIExpression(), [[META8:![0-9]+]]) +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S]], align 4, !dbg [[META8]] +; CHECK-NEXT: store i32 [[I]], ptr [[OUT]], align 4, !dbg [[META8]] +; CHECK-NEXT: ret void, !dbg [[META8]] +; + #dbg_value(ptr %s, !10, !DIExpression(), !11) + %i = load i32, ptr %s, align 4, !dbg !11 + store i32 %i, ptr %out, align 4, !dbg !11 + ret void, !dbg !11 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, emissionKind: FullDebug) +!1 = !DIFile(filename: "t.c", directory: "/") +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = distinct !DISubprogram(name: "read_only_dbg", scope: !1, file: !1, line: 1, type: !5, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!10 = !DILocalVariable(name: "s", arg: 2, scope: !4, file: !1, line: 1, type: !7) +!11 = !DILocation(line: 1, column: 1, scope: !4) +;. +; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: [[META1:![0-9]+]], isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug) +; CHECK: [[META1]] = !DIFile(filename: "{{.*}}t.c", directory: {{.*}}) +; CHECK: [[DBG3]] = distinct !DISubprogram(name: "read_only_dbg", scope: [[META1]], file: [[META1]], line: 1, type: [[META4:![0-9]+]], spFlags: DISPFlagDefinition, unit: [[META0]]) +; CHECK: [[META4]] = !DISubroutineType(types: [[META5:![0-9]+]]) +; CHECK: [[META5]] = !{null} +; CHECK: [[META6]] = !DILocalVariable(name: "s", arg: 2, scope: [[DBG3]], file: [[META1]], line: 1, type: [[META7:![0-9]+]]) +; CHECK: [[META7]] = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +; CHECK: [[META8]] = !DILocation(line: 1, column: 1, scope: [[DBG3]]) +;. diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args-idempotent.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args-idempotent.ll new file mode 100644 index 0000000000000..0ffe1acfac1b0 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args-idempotent.ll @@ -0,0 +1,21 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; Running nvptx-lower-args twice must be a no-op the second time: once a byval +; kernel argument has been rewritten into the param address space, the pass +; recognizes it as already lowered and leaves it alone. Both RUN lines share the +; same CHECK prefix, so the single-run and double-run outputs must be identical. +; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args -S | FileCheck %s +; RUN: opt < %s -mtriple nvptx64 -mcpu=sm_70 -mattr=ptx77 -passes=nvptx-lower-args,nvptx-lower-args -S | FileCheck %s + +%struct.S = type { i32, i32 } + +define ptx_kernel void @read_only(ptr %out, ptr byval(%struct.S) align 4 %s) { +; CHECK-LABEL: define ptx_kernel void @read_only( +; CHECK-SAME: ptr [[OUT:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S]], align 4 +; CHECK-NEXT: store i32 [[I]], ptr [[OUT]], align 4 +; CHECK-NEXT: ret void +; + %i = load i32, ptr %s, align 4 + store i32 %i, ptr %out, align 4 + ret void +} diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index a3144c5768431..8c92264959583 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -30,10 +30,9 @@ declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2 ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @read_only(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only( -; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S3]], align 4 +; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[S]], align 4 ; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4 ; LOWER-ARGS-NEXT: ret void ; @@ -64,10 +63,9 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @read_only_gep(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @read_only_gep( -; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(101) [[S3]], i64 4 +; LOWER-ARGS-NEXT: [[B4:%.*]] = getelementptr inbounds nuw i8, ptr addrspace(101) [[S]], i64 4 ; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr addrspace(101) [[B4]], align 4 ; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[OUT]], align 4 ; LOWER-ARGS-NEXT: ret void @@ -124,14 +122,21 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @escape_ptr(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @escape_ptr( -; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR6:[0-9]+]] -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @escape_ptr( +; LOWER-ARGS-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S]]) #[[ATTR5:[0-9]+]] +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr( +; COPY-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[S1]]) #[[ATTR4:[0-9]+]] +; COPY-NEXT: ret void ; ; PTX-LABEL: escape_ptr( ; PTX: { @@ -163,15 +168,23 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @escape_ptr_gep(ptr nocapture noundef readnone %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( -; COMMON-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 -; COMMON-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR6]] -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( +; LOWER-ARGS-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 +; LOWER-ARGS-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR5]] +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep( +; COPY-SAME: ptr noundef readnone captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 +; COPY-NEXT: call void @_Z6escapePv(ptr noundef nonnull [[B]]) #[[ATTR4]] +; COPY-NEXT: ret void ; ; PTX-LABEL: escape_ptr_gep( ; PTX: { @@ -205,14 +218,21 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @escape_ptr_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @escape_ptr_store( -; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: store ptr [[S1]], ptr [[OUT]], align 8 -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @escape_ptr_store( +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: store ptr [[S]], ptr [[OUT]], align 8 +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_store( +; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: store ptr [[S1]], ptr [[OUT]], align 8 +; COPY-NEXT: ret void ; ; PTX-LABEL: escape_ptr_store( ; PTX: { @@ -242,15 +262,23 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @escape_ptr_gep_store(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( -; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 -; COMMON-NEXT: store ptr [[B]], ptr [[OUT]], align 8 -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S]], i64 4 +; LOWER-ARGS-NEXT: store ptr [[B]], ptr [[OUT]], align 8 +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptr_gep_store( +; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: [[B:%.*]] = getelementptr inbounds nuw i8, ptr [[S1]], i64 4 +; COPY-NEXT: store ptr [[B]], ptr [[OUT]], align 8 +; COPY-NEXT: ret void ; ; PTX-LABEL: escape_ptr_gep_store( ; PTX: { @@ -282,15 +310,23 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @escape_ptrtoint(ptr nocapture noundef writeonly %out, ptr noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( -; COMMON-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64 -; COMMON-NEXT: store i64 [[I]], ptr [[OUT]], align 8 -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[I:%.*]] = ptrtoint ptr [[S]] to i64 +; LOWER-ARGS-NEXT: store i64 [[I]], ptr [[OUT]], align 8 +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @escape_ptrtoint( +; COPY-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: [[I:%.*]] = ptrtoint ptr [[S1]] to i64 +; COPY-NEXT: store i64 [[I]], ptr [[OUT]], align 8 +; COPY-NEXT: ret void ; ; PTX-LABEL: escape_ptrtoint( ; PTX: { @@ -322,10 +358,9 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @memcpy_from_param(ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param( -; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true) +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S]], i64 16, i1 true) ; LOWER-ARGS-NEXT: ret void ; ; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param( @@ -382,10 +417,9 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @memcpy_from_param_noalign (ptr nocapture noundef writeonly %out, ptr nocapture noundef readonly byval(%struct.S) align 4 %s) local_unnamed_addr #0 { ; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign( -; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-SAME: ptr noundef writeonly captures(none) [[OUT:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { ; LOWER-ARGS-NEXT: [[ENTRY:.*:]] -; LOWER-ARGS-NEXT: [[S3:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S3]], i64 16, i1 true) +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr [[OUT]], ptr addrspace(101) [[S]], i64 16, i1 true) ; LOWER-ARGS-NEXT: ret void ; ; COPY-LABEL: define dso_local ptx_kernel void @memcpy_from_param_noalign( @@ -441,14 +475,21 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @memcpy_to_param(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @memcpy_to_param( -; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[ENTRY:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true) -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @memcpy_to_param( +; LOWER-ARGS-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S1:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[ENTRY:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S]], ptr [[IN]], i64 16, i1 true) +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @memcpy_to_param( +; COPY-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[ENTRY:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr [[S1]], ptr [[IN]], i64 16, i1 true) +; COPY-NEXT: ret void ; ; PTX-LABEL: memcpy_to_param( ; PTX: { @@ -496,15 +537,23 @@ entry: ; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) define dso_local ptx_kernel void @copy_on_store(ptr nocapture noundef readonly %in, ptr nocapture noundef byval(%struct.S) align 4 %s, i1 noundef zeroext %b) local_unnamed_addr #0 { -; COMMON-LABEL: define dso_local ptx_kernel void @copy_on_store( -; COMMON-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) "nvvm.grid_constant" [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { -; COMMON-NEXT: [[BB:.*:]] -; COMMON-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 -; COMMON-NEXT: [[S2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[S]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S1]], ptr addrspace(101) align 4 [[S2]], i64 8, i1 false) -; COMMON-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4 -; COMMON-NEXT: store i32 [[I]], ptr [[S1]], align 4 -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define dso_local ptx_kernel void @copy_on_store( +; LOWER-ARGS-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr addrspace(101) noundef readonly byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S1:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; LOWER-ARGS-NEXT: [[BB:.*:]] +; LOWER-ARGS-NEXT: [[S:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[S]], ptr addrspace(101) align 4 [[S1]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4 +; LOWER-ARGS-NEXT: store i32 [[I]], ptr [[S]], align 4 +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define dso_local ptx_kernel void @copy_on_store( +; COPY-SAME: ptr noundef readonly captures(none) [[IN:%.*]], ptr noundef byval([[STRUCT_S:%.*]]) align 4 captures(none) [[S:%.*]], i1 noundef zeroext [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +; COPY-NEXT: [[BB:.*:]] +; COPY-NEXT: [[S1:%.*]] = alloca [[STRUCT_S]], align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[S1]], ptr align 4 [[S]], i64 8, i1 false) +; COPY-NEXT: [[I:%.*]] = load i32, ptr [[IN]], align 4 +; COPY-NEXT: store i32 [[I]], ptr [[S1]], align 4 +; COPY-NEXT: ret void ; ; PTX-LABEL: copy_on_store( ; PTX: { @@ -520,40 +569,34 @@ bb: define ptx_kernel void @test_select(ptr byval(i32) align 4 %input1, ptr byval(i32) align 4 %input2, ptr %out, i1 %cond) { ; SM_60-LABEL: define ptx_kernel void @test_select( -; SM_60-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; SM_60-SAME: ptr addrspace(101) readonly byval(i32) align 4 [[INPUT11:%.*]], ptr addrspace(101) readonly byval(i32) align 4 [[INPUT22:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; SM_60-NEXT: [[BB:.*:]] ; SM_60-NEXT: [[INPUT24:%.*]] = alloca i32, align 4 -; SM_60-NEXT: [[INPUT25:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT25]], i64 4, i1 false) -; SM_60-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false) -; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT24]] +; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT24]], ptr addrspace(101) align 4 [[INPUT22]], i64 4, i1 false) +; SM_60-NEXT: [[INPUT1:%.*]] = alloca i32, align 4 +; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT11]], i64 4, i1 false) +; SM_60-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1]], ptr [[INPUT24]] ; SM_60-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; SM_60-NEXT: store i32 [[VALLOADED]], ptr [[OUT]], align 4 ; SM_60-NEXT: ret void ; ; SM_70-LABEL: define ptx_kernel void @test_select( -; SM_70-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; SM_70-SAME: ptr addrspace(101) readonly byval(i32) align 4 [[INPUT1:%.*]], ptr addrspace(101) readonly byval(i32) align 4 [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; SM_70-NEXT: [[BB:.*:]] -; SM_70-NEXT: [[TMP0:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr -; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT2]] to ptr +; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT1]] to ptr ; SM_70-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1_PARAM_GEN]], ptr [[INPUT2_PARAM_GEN]] ; SM_70-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; SM_70-NEXT: store i32 [[VALLOADED]], ptr [[OUT]], align 4 ; SM_70-NEXT: ret void ; ; COPY-LABEL: define ptx_kernel void @test_select( -; COPY-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) align 4 [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT23:%.*]] = alloca i32, align 4 -; COPY-NEXT: [[INPUT24:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 4, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT23]], ptr align 4 [[INPUT2]], i64 4, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 4, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT11]], ptr align 4 [[INPUT1]], i64 4, i1 false) ; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT23]] ; COPY-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4 ; COPY-NEXT: store i32 [[VALLOADED]], ptr [[OUT]], align 4 @@ -587,27 +630,23 @@ bb: define ptx_kernel void @test_select_write(ptr byval(i32) align 4 %input1, ptr byval(i32) align 4 %input2, ptr %out, i1 %cond) { ; LOWER-ARGS-LABEL: define ptx_kernel void @test_select_write( -; LOWER-ARGS-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { +; LOWER-ARGS-SAME: ptr addrspace(101) readonly byval(i32) align 4 [[INPUT11:%.*]], ptr addrspace(101) readonly byval(i32) align 4 [[INPUT22:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] { ; LOWER-ARGS-NEXT: [[BB:.*:]] -; LOWER-ARGS-NEXT: [[INPUT22:%.*]] = alloca i32, align 4 -; LOWER-ARGS-NEXT: [[INPUT2_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT22]], ptr addrspace(101) align 4 [[INPUT2_PARAM]], i64 4, i1 false) -; LOWER-ARGS-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; LOWER-ARGS-NEXT: [[INPUT1_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT1_PARAM]], i64 4, i1 false) -; LOWER-ARGS-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT22]] +; LOWER-ARGS-NEXT: [[INPUT2:%.*]] = alloca i32, align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT2]], ptr addrspace(101) align 4 [[INPUT22]], i64 4, i1 false) +; LOWER-ARGS-NEXT: [[INPUT1:%.*]] = alloca i32, align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT11]], i64 4, i1 false) +; LOWER-ARGS-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT1]], ptr [[INPUT2]] ; LOWER-ARGS-NEXT: store i32 1, ptr [[PTRNEW]], align 4 ; LOWER-ARGS-NEXT: ret void ; ; COPY-LABEL: define ptx_kernel void @test_select_write( -; COPY-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; COPY-SAME: ptr byval(i32) align 4 [[INPUT1:%.*]], ptr byval(i32) align 4 [[INPUT2:%.*]], ptr [[OUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT22:%.*]] = alloca i32, align 4 -; COPY-NEXT: [[INPUT2_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT22]], ptr addrspace(101) align 4 [[INPUT2_PARAM]], i64 4, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT22]], ptr align 4 [[INPUT2]], i64 4, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca i32, align 4 -; COPY-NEXT: [[INPUT1_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT1_PARAM]], i64 4, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT11]], ptr align 4 [[INPUT1]], i64 4, i1 false) ; COPY-NEXT: [[PTRNEW:%.*]] = select i1 [[COND]], ptr [[INPUT11]], ptr [[INPUT22]] ; COPY-NEXT: store i32 1, ptr [[PTRNEW]], align 4 ; COPY-NEXT: ret void @@ -645,20 +684,18 @@ bb: define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) align 4 %input2, ptr %inout, i1 %cond) { ; SM_60-LABEL: define ptx_kernel void @test_phi( -; SM_60-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; SM_60-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 [[INPUT11:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S]]) align 4 [[INPUT22:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_60-NEXT: [[BB:.*:]] -; SM_60-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; SM_60-NEXT: [[INPUT2_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT2_PARAM]], i64 8, i1 false) ; SM_60-NEXT: [[INPUT13:%.*]] = alloca [[STRUCT_S]], align 4 -; SM_60-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT13]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false) +; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT13]], ptr addrspace(101) align 4 [[INPUT22]], i64 8, i1 false) +; SM_60-NEXT: [[INPUT1:%.*]] = alloca [[STRUCT_S]], align 4 +; SM_60-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT11]], i64 8, i1 false) ; SM_60-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; SM_60: [[FIRST]]: -; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT13]], i32 0, i32 0 +; SM_60-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 ; SM_60-NEXT: br label %[[MERGE:.*]] ; SM_60: [[SECOND]]: -; SM_60-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 1 +; SM_60-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT13]], i32 0, i32 1 ; SM_60-NEXT: br label %[[MERGE]] ; SM_60: [[MERGE]]: ; SM_60-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] @@ -667,12 +704,10 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval ; SM_60-NEXT: ret void ; ; SM_70-LABEL: define ptx_kernel void @test_phi( -; SM_70-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; SM_70-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S]]) align 4 [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; SM_70-NEXT: [[BB:.*:]] -; SM_70-NEXT: [[TMP0:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP0]] to ptr -; SM_70-NEXT: [[TMP1:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[TMP1]] to ptr +; SM_70-NEXT: [[INPUT2_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT2]] to ptr +; SM_70-NEXT: [[INPUT1_PARAM_GEN:%.*]] = addrspacecast ptr addrspace(101) [[INPUT1]] to ptr ; SM_70-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; SM_70: [[FIRST]]: ; SM_70-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1_PARAM_GEN]], i32 0, i32 0 @@ -687,14 +722,12 @@ define ptx_kernel void @test_phi(ptr byval(%struct.S) align 4 %input1, ptr byval ; SM_70-NEXT: ret void ; ; COPY-LABEL: define ptx_kernel void @test_phi( -; COPY-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; COPY-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) align 4 [[INPUT2:%.*]], ptr [[INOUT:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT23:%.*]] = alloca [[STRUCT_S]], align 4 -; COPY-NEXT: [[INPUT24:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT23]], ptr addrspace(101) align 4 [[INPUT24]], i64 8, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT23]], ptr align 4 [[INPUT2]], i64 8, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; COPY-NEXT: [[INPUT12:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT12]], i64 8, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT11]], ptr align 4 [[INPUT1]], i64 8, i1 false) ; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; COPY: [[FIRST]]: ; COPY-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 @@ -771,20 +804,18 @@ merge: ; preds = %second, %first define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr byval(%struct.S) align 4 %input2, i1 %cond) { ; LOWER-ARGS-LABEL: define ptx_kernel void @test_phi_write( -; LOWER-ARGS-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; LOWER-ARGS-SAME: ptr addrspace(101) readonly byval([[STRUCT_S:%.*]]) align 4 [[INPUT11:%.*]], ptr addrspace(101) readonly byval([[STRUCT_S]]) align 4 [[INPUT22:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; LOWER-ARGS-NEXT: [[BB:.*:]] -; LOWER-ARGS-NEXT: [[INPUT22:%.*]] = alloca [[STRUCT_S]], align 4 -; LOWER-ARGS-NEXT: [[INPUT2_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT22]], ptr addrspace(101) align 4 [[INPUT2_PARAM]], i64 8, i1 false) -; LOWER-ARGS-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; LOWER-ARGS-NEXT: [[INPUT1_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT1_PARAM]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[INPUT2:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT2]], ptr addrspace(101) align 4 [[INPUT22]], i64 8, i1 false) +; LOWER-ARGS-NEXT: [[INPUT1:%.*]] = alloca [[STRUCT_S]], align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT11]], i64 8, i1 false) ; LOWER-ARGS-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; LOWER-ARGS: [[FIRST]]: -; LOWER-ARGS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 +; LOWER-ARGS-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT1]], i32 0, i32 0 ; LOWER-ARGS-NEXT: br label %[[MERGE:.*]] ; LOWER-ARGS: [[SECOND]]: -; LOWER-ARGS-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT22]], i32 0, i32 1 +; LOWER-ARGS-NEXT: [[PTR2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT2]], i32 0, i32 1 ; LOWER-ARGS-NEXT: br label %[[MERGE]] ; LOWER-ARGS: [[MERGE]]: ; LOWER-ARGS-NEXT: [[PTRNEW:%.*]] = phi ptr [ [[PTR1]], %[[FIRST]] ], [ [[PTR2]], %[[SECOND]] ] @@ -792,14 +823,12 @@ define ptx_kernel void @test_phi_write(ptr byval(%struct.S) align 4 %input1, ptr ; LOWER-ARGS-NEXT: ret void ; ; COPY-LABEL: define ptx_kernel void @test_phi_write( -; COPY-SAME: ptr readonly byval([[STRUCT_S:%.*]]) align 4 "nvvm.grid_constant" [[INPUT1:%.*]], ptr readonly byval([[STRUCT_S]]) align 4 "nvvm.grid_constant" [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { +; COPY-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT1:%.*]], ptr byval([[STRUCT_S]]) align 4 [[INPUT2:%.*]], i1 [[COND:%.*]]) #[[ATTR3]] { ; COPY-NEXT: [[BB:.*:]] ; COPY-NEXT: [[INPUT22:%.*]] = alloca [[STRUCT_S]], align 4 -; COPY-NEXT: [[INPUT2_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT2]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT22]], ptr addrspace(101) align 4 [[INPUT2_PARAM]], i64 8, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT22]], ptr align 4 [[INPUT2]], i64 8, i1 false) ; COPY-NEXT: [[INPUT11:%.*]] = alloca [[STRUCT_S]], align 4 -; COPY-NEXT: [[INPUT1_PARAM:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT1]]) -; COPY-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT11]], ptr addrspace(101) align 4 [[INPUT1_PARAM]], i64 8, i1 false) +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT11]], ptr align 4 [[INPUT1]], i64 8, i1 false) ; COPY-NEXT: br i1 [[COND]], label %[[FIRST:.*]], label %[[SECOND:.*]] ; COPY: [[FIRST]]: ; COPY-NEXT: [[PTR1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[INPUT11]], i32 0, i32 0 @@ -858,13 +887,19 @@ merge: ; preds = %second, %first } define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { -; COMMON-LABEL: define ptx_kernel void @test_forward_byval_arg( -; COMMON-SAME: ptr readonly byval(i32) align 4 "nvvm.grid_constant" [[INPUT:%.*]]) #[[ATTR3:[0-9]+]] { -; COMMON-NEXT: [[INPUT1:%.*]] = alloca i32, align 4 -; COMMON-NEXT: [[INPUT2:%.*]] = call align 4 ptr addrspace(101) @llvm.nvvm.internal.addrspace.wrap.p101.p0(ptr [[INPUT]]) -; COMMON-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT1]], ptr addrspace(101) align 4 [[INPUT2]], i64 4, i1 false) -; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]]) -; COMMON-NEXT: ret void +; LOWER-ARGS-LABEL: define ptx_kernel void @test_forward_byval_arg( +; LOWER-ARGS-SAME: ptr addrspace(101) readonly byval(i32) align 4 [[INPUT1:%.*]]) #[[ATTR3]] { +; LOWER-ARGS-NEXT: [[INPUT:%.*]] = alloca i32, align 4 +; LOWER-ARGS-NEXT: call void @llvm.memcpy.p0.p101.i64(ptr align 4 [[INPUT]], ptr addrspace(101) align 4 [[INPUT1]], i64 4, i1 false) +; LOWER-ARGS-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT]]) +; LOWER-ARGS-NEXT: ret void +; +; COPY-LABEL: define ptx_kernel void @test_forward_byval_arg( +; COPY-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] { +; COPY-NEXT: [[INPUT1:%.*]] = alloca i32, align 4 +; COPY-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[INPUT1]], ptr align 4 [[INPUT]], i64 4, i1 false) +; COPY-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT1]]) +; COPY-NEXT: ret void ; ; PTX-LABEL: test_forward_byval_arg( ; PTX: { @@ -891,7 +926,7 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { define void @device_func(ptr byval(i32) align 4 %input) { ; COMMON-LABEL: define void @device_func( -; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3]] { +; COMMON-SAME: ptr byval(i32) align 4 [[INPUT:%.*]]) #[[ATTR3:[0-9]+]] { ; COMMON-NEXT: call void @device_func(ptr byval(i32) align 4 [[INPUT]]) ; COMMON-NEXT: ret void ; diff --git a/llvm/test/CodeGen/NVPTX/mark-kernel-ptrs-global.ll b/llvm/test/CodeGen/NVPTX/mark-kernel-ptrs-global.ll index d2d709b3cf8a6..0dfb2fd3e785a 100644 --- a/llvm/test/CodeGen/NVPTX/mark-kernel-ptrs-global.ll +++ b/llvm/test/CodeGen/NVPTX/mark-kernel-ptrs-global.ll @@ -71,8 +71,8 @@ define ptx_kernel void @ptr_as_int_aggr(ptr nocapture noundef readonly byval(%st ; PTXO-NEXT: .reg .b64 %rd<2>; ; PTXO-EMPTY: ; PTXO-NEXT: // %bb.0: -; PTXO-NEXT: ld.param.b32 %r1, [ptr_as_int_aggr_param_1]; ; PTXO-NEXT: ld.param.b64 %rd1, [ptr_as_int_aggr_param_0]; +; PTXO-NEXT: ld.param.b32 %r1, [ptr_as_int_aggr_param_1]; ; PTXO-NEXT: st.b32 [%rd1], %r1; ; PTXO-NEXT: ret; %i = load i64, ptr %s, align 8 From 51a68419c02fc0e98a99fa02a8d4a17919e78699 Mon Sep 17 00:00:00 2001 From: Vlad Serebrennikov Date: Wed, 24 Jun 2026 21:41:53 +0400 Subject: [PATCH 375/511] [OpenMP] Remove AST dump tests for non-variant clauses (#204493) As was suggested during discussion of #200077, and supported by Johannes in our discussion during his office hours today, this PR removes OpenMP AST dump tests that do not test the `variant` clause. The full motivation can be found in the description of the aforementioned PR, but the short version is that they are a maintenance burden that hold off improvements to `TextNodeDumper` for other parts of Clang, because they match too many unrelated details. --- clang/test/AST/ast-dump-openmp-atomic.c | 14 - clang/test/AST/ast-dump-openmp-barrier.c | 10 - clang/test/AST/ast-dump-openmp-cancel.c | 20 - .../AST/ast-dump-openmp-cancellation-point.c | 20 - clang/test/AST/ast-dump-openmp-critical.c | 12 - ...dump-openmp-distribute-parallel-for-simd.c | 265 -- .../ast-dump-openmp-distribute-parallel-for.c | 265 -- .../AST/ast-dump-openmp-distribute-simd.c | 245 -- clang/test/AST/ast-dump-openmp-distribute.c | 245 -- clang/test/AST/ast-dump-openmp-flush.c | 10 - clang/test/AST/ast-dump-openmp-for-simd.c | 245 -- clang/test/AST/ast-dump-openmp-for.c | 245 -- clang/test/AST/ast-dump-openmp-master.c | 12 - clang/test/AST/ast-dump-openmp-ordered.c | 82 - .../AST/ast-dump-openmp-parallel-for-simd.c | 255 -- clang/test/AST/ast-dump-openmp-parallel-for.c | 255 -- .../AST/ast-dump-openmp-parallel-sections.c | 25 - clang/test/AST/ast-dump-openmp-parallel.c | 17 - clang/test/AST/ast-dump-openmp-section.c | 20 - clang/test/AST/ast-dump-openmp-sections.c | 23 - clang/test/AST/ast-dump-openmp-simd.c | 245 -- clang/test/AST/ast-dump-openmp-single.c | 15 - clang/test/AST/ast-dump-openmp-split.c | 19 - clang/test/AST/ast-dump-openmp-target-data.c | 18 - .../AST/ast-dump-openmp-target-enter-data.c | 24 - .../AST/ast-dump-openmp-target-exit-data.c | 24 - ...ast-dump-openmp-target-parallel-for-simd.c | 970 -------- .../AST/ast-dump-openmp-target-parallel-for.c | 970 -------- .../AST/ast-dump-openmp-target-parallel.c | 55 - clang/test/AST/ast-dump-openmp-target-simd.c | 510 ---- ...arget-teams-distribute-parallel-for-simd.c | 1970 --------------- ...nmp-target-teams-distribute-parallel-for.c | 1970 --------------- ...dump-openmp-target-teams-distribute-simd.c | 970 -------- .../ast-dump-openmp-target-teams-distribute.c | 970 -------- clang/test/AST/ast-dump-openmp-target-teams.c | 55 - .../test/AST/ast-dump-openmp-target-update.c | 23 - clang/test/AST/ast-dump-openmp-target.c | 31 - clang/test/AST/ast-dump-openmp-task.c | 27 - clang/test/AST/ast-dump-openmp-taskgroup.c | 15 - .../test/AST/ast-dump-openmp-taskloop-simd.c | 305 --- clang/test/AST/ast-dump-openmp-taskloop.c | 305 --- clang/test/AST/ast-dump-openmp-taskwait.c | 10 - clang/test/AST/ast-dump-openmp-taskyield.c | 10 - ...penmp-teams-distribute-parallel-for-simd.c | 2193 ----------------- ...ump-openmp-teams-distribute-parallel-for.c | 2193 ----------------- .../ast-dump-openmp-teams-distribute-simd.c | 1233 --------- .../AST/ast-dump-openmp-teams-distribute.c | 1233 --------- clang/test/AST/ast-dump-openmp-teams.c | 58 - 48 files changed, 18706 deletions(-) delete mode 100644 clang/test/AST/ast-dump-openmp-atomic.c delete mode 100644 clang/test/AST/ast-dump-openmp-barrier.c delete mode 100644 clang/test/AST/ast-dump-openmp-cancel.c delete mode 100644 clang/test/AST/ast-dump-openmp-cancellation-point.c delete mode 100644 clang/test/AST/ast-dump-openmp-critical.c delete mode 100644 clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-distribute-parallel-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-distribute-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-distribute.c delete mode 100644 clang/test/AST/ast-dump-openmp-flush.c delete mode 100644 clang/test/AST/ast-dump-openmp-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-master.c delete mode 100644 clang/test/AST/ast-dump-openmp-ordered.c delete mode 100644 clang/test/AST/ast-dump-openmp-parallel-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-parallel-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-parallel-sections.c delete mode 100644 clang/test/AST/ast-dump-openmp-parallel.c delete mode 100644 clang/test/AST/ast-dump-openmp-section.c delete mode 100644 clang/test/AST/ast-dump-openmp-sections.c delete mode 100644 clang/test/AST/ast-dump-openmp-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-single.c delete mode 100644 clang/test/AST/ast-dump-openmp-split.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-data.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-enter-data.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-exit-data.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-parallel-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-parallel.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-teams-distribute.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-teams.c delete mode 100644 clang/test/AST/ast-dump-openmp-target-update.c delete mode 100644 clang/test/AST/ast-dump-openmp-target.c delete mode 100644 clang/test/AST/ast-dump-openmp-task.c delete mode 100644 clang/test/AST/ast-dump-openmp-taskgroup.c delete mode 100644 clang/test/AST/ast-dump-openmp-taskloop-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-taskloop.c delete mode 100644 clang/test/AST/ast-dump-openmp-taskwait.c delete mode 100644 clang/test/AST/ast-dump-openmp-taskyield.c delete mode 100644 clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c delete mode 100644 clang/test/AST/ast-dump-openmp-teams-distribute-simd.c delete mode 100644 clang/test/AST/ast-dump-openmp-teams-distribute.c delete mode 100644 clang/test/AST/ast-dump-openmp-teams.c diff --git a/clang/test/AST/ast-dump-openmp-atomic.c b/clang/test/AST/ast-dump-openmp-atomic.c deleted file mode 100644 index 7a54a43a32dc0..0000000000000 --- a/clang/test/AST/ast-dump-openmp-atomic.c +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(int i) { -#pragma omp atomic - ++i; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-atomic.c:3:1, line:6:1> line:3:6 test 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:15 used i 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPAtomicDirective {{.*}} -// CHECK-NEXT: `-UnaryOperator {{.*}} 'int' prefix '++' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'i' 'int' diff --git a/clang/test/AST/ast-dump-openmp-barrier.c b/clang/test/AST/ast-dump-openmp-barrier.c deleted file mode 100644 index 94312c5eae2be..0000000000000 --- a/clang/test/AST/ast-dump-openmp-barrier.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp barrier -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-barrier.c:3:1, line:5:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPBarrierDirective {{.*}} openmp_standalone_directive diff --git a/clang/test/AST/ast-dump-openmp-cancel.c b/clang/test/AST/ast-dump-openmp-cancel.c deleted file mode 100644 index 817ea149d8f5a..0000000000000 --- a/clang/test/AST/ast-dump-openmp-cancel.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp parallel - { -#pragma omp cancel parallel - } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-cancel.c:3:1, line:8:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPCancelDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-cancel.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-cancellation-point.c b/clang/test/AST/ast-dump-openmp-cancellation-point.c deleted file mode 100644 index d793ed929c872..0000000000000 --- a/clang/test/AST/ast-dump-openmp-cancellation-point.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp parallel - { -#pragma omp cancellation point parallel - } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-cancellation-point.c:3:1, line:8:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPCancellationPointDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-cancellation-point.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-critical.c b/clang/test/AST/ast-dump-openmp-critical.c deleted file mode 100644 index acf762072a5b1..0000000000000 --- a/clang/test/AST/ast-dump-openmp-critical.c +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp critical - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-critical.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPCriticalDirective {{.*}} -// CHECK-NEXT: `-NullStmt {{.*}} diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c deleted file mode 100644 index e9318fc8bbaef..0000000000000 --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for-simd.c +++ /dev/null @@ -1,265 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp distribute parallel for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp distribute parallel for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp distribute parallel for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c deleted file mode 100644 index 9392bdfd1f217..0000000000000 --- a/clang/test/AST/ast-dump-openmp-distribute-parallel-for.c +++ /dev/null @@ -1,265 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp distribute parallel for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp distribute parallel for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp distribute parallel for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-distribute-simd.c b/clang/test/AST/ast-dump-openmp-distribute-simd.c deleted file mode 100644 index 7e61b02b03cb8..0000000000000 --- a/clang/test/AST/ast-dump-openmp-distribute-simd.c +++ /dev/null @@ -1,245 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp distribute simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp distribute simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp distribute simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-distribute.c b/clang/test/AST/ast-dump-openmp-distribute.c deleted file mode 100644 index a5d89b49324f4..0000000000000 --- a/clang/test/AST/ast-dump-openmp-distribute.c +++ /dev/null @@ -1,245 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp distribute - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp distribute - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp distribute collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-distribute.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPDistributeDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-flush.c b/clang/test/AST/ast-dump-openmp-flush.c deleted file mode 100644 index 408c46f4faaa2..0000000000000 --- a/clang/test/AST/ast-dump-openmp-flush.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp flush -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-flush.c:3:1, line:5:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPFlushDirective {{.*}} openmp_standalone_directive diff --git a/clang/test/AST/ast-dump-openmp-for-simd.c b/clang/test/AST/ast-dump-openmp-for-simd.c deleted file mode 100644 index b72359c842fad..0000000000000 --- a/clang/test/AST/ast-dump-openmp-for-simd.c +++ /dev/null @@ -1,245 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-for.c b/clang/test/AST/ast-dump-openmp-for.c deleted file mode 100644 index f8be9f3e7505d..0000000000000 --- a/clang/test/AST/ast-dump-openmp-for.c +++ /dev/null @@ -1,245 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-master.c b/clang/test/AST/ast-dump-openmp-master.c deleted file mode 100644 index 87a7e28592ced..0000000000000 --- a/clang/test/AST/ast-dump-openmp-master.c +++ /dev/null @@ -1,12 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp master - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-master.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPMasterDirective {{.*}} -// CHECK-NEXT: `-NullStmt {{.*}} diff --git a/clang/test/AST/ast-dump-openmp-ordered.c b/clang/test/AST/ast-dump-openmp-ordered.c deleted file mode 100644 index 97dbe3fe0d0e1..0000000000000 --- a/clang/test/AST/ast-dump-openmp-ordered.c +++ /dev/null @@ -1,82 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(void) { -#pragma omp ordered - ; -} - -void test_two(int x) { -#pragma omp for ordered - for (int i = 0; i < x; i++) - ; -} - -void test_three(int x) { -#pragma omp for ordered(1) - for (int i = 0; i < x; i++) { -#pragma omp ordered depend(source) - } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-ordered.c:3:1, line:6:1> line:3:6 test_one 'void (void)' external-linkage -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPOrderedDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-ordered.c:4:1) *const restrict' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:8:6 test_two 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPForDirective {{.*}} -// CHECK-NEXT: | |-OMPOrderedClause {{.*}} -// CHECK-NEXT: | | `-<<>> -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-ordered.c:9:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:14:6 test_three 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPForDirective {{.*}} -// CHECK-NEXT: |-OMPOrderedClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 1 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-CompoundStmt {{.*}} -// CHECK-NEXT: | | `-OMPOrderedDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: | | `-OMPDependClause {{.*}} > -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-ordered.c:15:1) *const restrict' -// CHECK-NEXT: | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' diff --git a/clang/test/AST/ast-dump-openmp-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-parallel-for-simd.c deleted file mode 100644 index 6cad2d93a3874..0000000000000 --- a/clang/test/AST/ast-dump-openmp-parallel-for-simd.c +++ /dev/null @@ -1,255 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp parallel for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp parallel for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp parallel for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-parallel-for.c b/clang/test/AST/ast-dump-openmp-parallel-for.c deleted file mode 100644 index 7b0294cc44f1b..0000000000000 --- a/clang/test/AST/ast-dump-openmp-parallel-for.c +++ /dev/null @@ -1,255 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp parallel for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp parallel for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp parallel for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-parallel-sections.c b/clang/test/AST/ast-dump-openmp-parallel-sections.c deleted file mode 100644 index cec573c6713af..0000000000000 --- a/clang/test/AST/ast-dump-openmp-parallel-sections.c +++ /dev/null @@ -1,25 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_zero(void) { -#pragma omp parallel sections - {} -} - -void test_one(void) { -#pragma omp parallel sections - { ; } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel-sections.c:3:1, line:6:1> line:3:6 test_zero 'void (void)' external-linkage -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: `-FunctionDecl {{.*}} line:8:6 test_one 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelSectionsDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: | `-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel-sections.c:9:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-parallel.c b/clang/test/AST/ast-dump-openmp-parallel.c deleted file mode 100644 index 12d88752fa9cb..0000000000000 --- a/clang/test/AST/ast-dump-openmp-parallel.c +++ /dev/null @@ -1,17 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp parallel - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-parallel.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPParallelDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-parallel.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-section.c b/clang/test/AST/ast-dump-openmp-section.c deleted file mode 100644 index 39a16d00fc1c7..0000000000000 --- a/clang/test/AST/ast-dump-openmp-section.c +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp sections - { -#pragma omp section - ; - } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-section.c:3:1, line:9:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPSectionsDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPSectionDirective {{.*}} -// CHECK-NEXT: | `-NullStmt {{.*}} -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-section.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-sections.c b/clang/test/AST/ast-dump-openmp-sections.c deleted file mode 100644 index 1172606f01d62..0000000000000 --- a/clang/test/AST/ast-dump-openmp-sections.c +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_zero(void) { -#pragma omp sections - {} -} - -void test_one(void) { -#pragma omp sections - { ; } -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-sections.c:3:1, line:6:1> line:3:6 test_zero 'void (void)' external-linkage -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: `-FunctionDecl {{.*}} line:8:6 test_one 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPSectionsDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: | `-NullStmt {{.*}} -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-sections.c:9:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-simd.c b/clang/test/AST/ast-dump-openmp-simd.c deleted file mode 100644 index 5ba5df7588314..0000000000000 --- a/clang/test/AST/ast-dump-openmp-simd.c +++ /dev/null @@ -1,245 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPSimdDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-single.c b/clang/test/AST/ast-dump-openmp-single.c deleted file mode 100644 index df96bcd30eaed..0000000000000 --- a/clang/test/AST/ast-dump-openmp-single.c +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp single - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-single.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPSingleDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-single.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-split.c b/clang/test/AST/ast-dump-openmp-split.c deleted file mode 100644 index 821badae55e66..0000000000000 --- a/clang/test/AST/ast-dump-openmp-split.c +++ /dev/null @@ -1,19 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -fopenmp-version=60 -ast-dump %s | FileCheck %s -// -// OMPSplitDirective / OMPCountsClause; - -void body(int); - -void test(void) { -#pragma omp split counts(3, omp_fill) - for (int i = 0; i < 10; ++i) - body(i); -} - -// CHECK: OMPSplitDirective -// CHECK: OMPCountsClause -// CHECK: IntegerLiteral{{.*}}3 -// CHECK: <<>> -// CHECK: ForStmt -// CHECK: <<>> -// CHECK: CallExpr diff --git a/clang/test/AST/ast-dump-openmp-target-data.c b/clang/test/AST/ast-dump-openmp-target-data.c deleted file mode 100644 index 4b5d0638cfd12..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-data.c +++ /dev/null @@ -1,18 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(int x) { -#pragma omp target data map(x) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-data.c:3:1, line:6:1> line:3:6 test 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:15 used x 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDataDirective {{.*}} -// CHECK-NEXT: |-OMPMapClause {{.*}} -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-data.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target-enter-data.c b/clang/test/AST/ast-dump-openmp-target-enter-data.c deleted file mode 100644 index cfb15c532cbd2..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-enter-data.c +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(int x) { -#pragma omp target enter data map(to \ - : x) -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-enter-data.c:3:1, line:6:1> line:3:6 test 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:15 used x 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetEnterDataDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: |-OMPMapClause {{.*}} -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-enter-data.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target-exit-data.c b/clang/test/AST/ast-dump-openmp-target-exit-data.c deleted file mode 100644 index 8fd54edbc57b0..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-exit-data.c +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(int x) { -#pragma omp target exit data map(from \ - : x) -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-exit-data.c:3:1, line:6:1> line:3:6 test 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:15 used x 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetExitDataDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: |-OMPMapClause {{.*}} -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-exit-data.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c deleted file mode 100644 index a3cbf8762b201..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-parallel-for-simd.c +++ /dev/null @@ -1,970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target parallel for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target parallel for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target parallel for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-parallel-for.c deleted file mode 100644 index 3c9d3a145a6d9..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-parallel-for.c +++ /dev/null @@ -1,970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target parallel for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target parallel for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target parallel for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-parallel.c b/clang/test/AST/ast-dump-openmp-target-parallel.c deleted file mode 100644 index 7ea4b1f4c5800..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-parallel.c +++ /dev/null @@ -1,55 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp target parallel - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-parallel.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetParallelDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-parallel.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target-simd.c b/clang/test/AST/ast-dump-openmp-target-simd.c deleted file mode 100644 index be8b7121e239a..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-simd.c +++ /dev/null @@ -1,510 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c deleted file mode 100644 index 41a8779c43908..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for-simd.c +++ /dev/null @@ -1,1970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target teams distribute parallel for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target teams distribute parallel for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target teams distribute parallel for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target teams distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target teams distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c deleted file mode 100644 index c5df392fb0451..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-parallel-for.c +++ /dev/null @@ -1,1970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target teams distribute parallel for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target teams distribute parallel for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target teams distribute parallel for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target teams distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target teams distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-parallel-for.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c deleted file mode 100644 index fc4d45677f21d..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute-simd.c +++ /dev/null @@ -1,970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target teams distribute simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target teams distribute simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target teams distribute simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target teams distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target teams distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute-simd.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams-distribute.c b/clang/test/AST/ast-dump-openmp-target-teams-distribute.c deleted file mode 100644 index 1bc8481c64355..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-teams-distribute.c +++ /dev/null @@ -1,970 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target teams distribute - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target teams distribute - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target teams distribute collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target teams distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target teams distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams-distribute.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:10:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:17:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:24:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDistributeDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams-distribute.c:31:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-target-teams.c b/clang/test/AST/ast-dump-openmp-target-teams.c deleted file mode 100644 index dadb2b2d884ad..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-teams.c +++ /dev/null @@ -1,55 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp target teams - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-teams.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetTeamsDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-teams.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target-update.c b/clang/test/AST/ast-dump-openmp-target-update.c deleted file mode 100644 index eb3e575c23c99..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target-update.c +++ /dev/null @@ -1,23 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(int x) { -#pragma omp target update to(x) -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target-update.c:3:1, line:5:1> line:3:6 test 'void (int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:15 used x 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetUpdateDirective {{.*}} openmp_standalone_directive -// CHECK-NEXT: |-OMPToClause {{.*}} -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CompoundStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target-update.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-target.c b/clang/test/AST/ast-dump-openmp-target.c deleted file mode 100644 index 157669e74eb16..0000000000000 --- a/clang/test/AST/ast-dump-openmp-target.c +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp target - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-target.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-target.c:4:1) *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-task.c b/clang/test/AST/ast-dump-openmp-task.c deleted file mode 100644 index 95e872bfe085d..0000000000000 --- a/clang/test/AST/ast-dump-openmp-task.c +++ /dev/null @@ -1,27 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -typedef unsigned long omp_event_handle_t; -void test(void) { - omp_event_handle_t evt; -#pragma omp task detach(evt) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} line:4:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK: `-OMPTaskDirective {{.*}} -// CHECK-NEXT: |-OMPDetachClause {{.+}} -// CHECK-NEXT: | `-DeclRefExpr {{.+}} 'omp_event_handle_t':'unsigned long' lvalue Var {{.+}} 'evt' 'omp_event_handle_t':'unsigned long' -// CHECK-NEXT: |-OMPFirstprivateClause {{.+}} <> -// CHECK-NEXT: | `-DeclRefExpr {{.+}} 'omp_event_handle_t':'unsigned long' lvalue Var {{.+}} 'evt' 'omp_event_handle_t':'unsigned long' -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-task.c:6:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-taskgroup.c b/clang/test/AST/ast-dump-openmp-taskgroup.c deleted file mode 100644 index 6ef15354b9808..0000000000000 --- a/clang/test/AST/ast-dump-openmp-taskgroup.c +++ /dev/null @@ -1,15 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp taskgroup - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskgroup.c:3:1, line:6:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTaskgroupDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskgroup.c:4:1) *const restrict' diff --git a/clang/test/AST/ast-dump-openmp-taskloop-simd.c b/clang/test/AST/ast-dump-openmp-taskloop-simd.c deleted file mode 100644 index 63c3e0beadaff..0000000000000 --- a/clang/test/AST/ast-dump-openmp-taskloop-simd.c +++ /dev/null @@ -1,305 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp taskloop simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp taskloop simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp taskloop simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp taskloop simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp taskloop simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskloop-simd.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop-simd.c:4:1) *const restrict' -// CHECK-NEXT: | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop-simd.c:10:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop-simd.c:17:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopSimdDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop-simd.c:24:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTaskLoopSimdDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-NullStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop-simd.c:31:1) *const restrict' -// CHECK-NEXT: |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 0 diff --git a/clang/test/AST/ast-dump-openmp-taskloop.c b/clang/test/AST/ast-dump-openmp-taskloop.c deleted file mode 100644 index 2cf06f42cbd9c..0000000000000 --- a/clang/test/AST/ast-dump-openmp-taskloop.c +++ /dev/null @@ -1,305 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp taskloop - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp taskloop - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp taskloop collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp taskloop collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp taskloop collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskloop.c:3:1, line:7:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop.c:4:1) *const restrict' -// CHECK-NEXT: | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:9:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop.c:10:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:16:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop.c:17:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-FunctionDecl {{.*}} line:23:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTaskLoopDirective {{.*}} -// CHECK-NEXT: | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | |-value: Int 2 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> -// CHECK-NEXT: | |-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ForStmt {{.*}} -// CHECK-NEXT: | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-<<>> -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-NullStmt {{.*}} -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop.c:24:1) *const restrict' -// CHECK-NEXT: | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: `-FunctionDecl {{.*}} line:30:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTaskLoopDirective {{.*}} -// CHECK-NEXT: |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | |-value: Int 2 -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> -// CHECK-NEXT: |-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-ForStmt {{.*}} -// CHECK-NEXT: | |-DeclStmt {{.*}} -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-<<>> -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | `-NullStmt {{.*}} -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .lb. 'const unsigned long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .ub. 'const unsigned long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .st. 'const long' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .liter. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .reductions. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-taskloop.c:31:1) *const restrict' -// CHECK-NEXT: |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: `-IntegerLiteral {{.*}} 'int' 0 diff --git a/clang/test/AST/ast-dump-openmp-taskwait.c b/clang/test/AST/ast-dump-openmp-taskwait.c deleted file mode 100644 index 920a97e39d409..0000000000000 --- a/clang/test/AST/ast-dump-openmp-taskwait.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp taskwait -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskwait.c:3:1, line:5:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTaskwaitDirective {{.*}} openmp_standalone_directive diff --git a/clang/test/AST/ast-dump-openmp-taskyield.c b/clang/test/AST/ast-dump-openmp-taskyield.c deleted file mode 100644 index 8cd436ebe47bd..0000000000000 --- a/clang/test/AST/ast-dump-openmp-taskyield.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp taskyield -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-taskyield.c:3:1, line:5:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTaskyieldDirective {{.*}} openmp_standalone_directive diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c deleted file mode 100644 index 802bb86c1a700..0000000000000 --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for-simd.c +++ /dev/null @@ -1,2193 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target -#pragma omp teams distribute parallel for simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target -#pragma omp teams distribute parallel for simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForSimdDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c b/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c deleted file mode 100644 index 46381d1184d19..0000000000000 --- a/clang/test/AST/ast-dump-openmp-teams-distribute-parallel-for.c +++ /dev/null @@ -1,2193 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target -#pragma omp teams distribute parallel for - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target -#pragma omp teams distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target -#pragma omp teams distribute parallel for collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:3:1, line:8:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:11:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:19:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:27:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeParallelForDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:35:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.lb. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit used .previous.ub. 'const __size_t':'const unsigned long' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-parallel-for.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c b/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c deleted file mode 100644 index f996f3e935449..0000000000000 --- a/clang/test/AST/ast-dump-openmp-teams-distribute-simd.c +++ /dev/null @@ -1,1233 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target -#pragma omp teams distribute simd - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target -#pragma omp teams distribute simd - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target -#pragma omp teams distribute simd collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target -#pragma omp teams distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target -#pragma omp teams distribute simd collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute-simd.c:3:1, line:8:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:11:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:19:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:27:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeSimdDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:35:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:23 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute-simd.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams-distribute.c b/clang/test/AST/ast-dump-openmp-teams-distribute.c deleted file mode 100644 index 8ccea811ccc91..0000000000000 --- a/clang/test/AST/ast-dump-openmp-teams-distribute.c +++ /dev/null @@ -1,1233 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test_one(int x) { -#pragma omp target -#pragma omp teams distribute - for (int i = 0; i < x; i++) - ; -} - -void test_two(int x, int y) { -#pragma omp target -#pragma omp teams distribute - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_three(int x, int y) { -#pragma omp target -#pragma omp teams distribute collapse(1) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_four(int x, int y) { -#pragma omp target -#pragma omp teams distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - ; -} - -void test_five(int x, int y, int z) { -#pragma omp target -#pragma omp teams distribute collapse(2) - for (int i = 0; i < x; i++) - for (int i = 0; i < y; i++) - for (int i = 0; i < z; i++) - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: |-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams-distribute.c:3:1, line:8:1> line:3:6 test_one 'void (int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict' -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict' -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:4:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:5:1) *const restrict' -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:10:6 test_two 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:19 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:26 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:11:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:12:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:18:6 test_three 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:21 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:28 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:19:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:25 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:20:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'int' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: |-FunctionDecl {{.*}} line:26:6 test_four 'void (int, int)' external-linkage -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: | |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: | `-CompoundStmt {{.*}} -// CHECK-NEXT: | `-OMPTargetDirective {{.*}} -// CHECK-NEXT: | |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | |-value: Int 2 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:27:1) *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | `-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:28:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-FunctionDecl {{.*}} line:34:6 test_five 'void (int, int, int)' external-linkage -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:20 used x 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:27 used y 'int' -// CHECK-NEXT: |-ParmVarDecl {{.*}} col:34 used z 'int' -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: |-OMPFirstprivateClause {{.*}} <> -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | |-value: Int 2 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | |-<<>> -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict' -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | | | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | |-<<>> -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict' -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int' -// CHECK-NEXT: | | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int' -// CHECK-NEXT: | | `-OMPCaptureKindAttr {{.*}} <> Implicit {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDistributeDirective {{.*}} -// CHECK-NEXT: | | |-OMPCollapseClause {{.*}} -// CHECK-NEXT: | | | `-ConstantExpr {{.*}} 'int' -// CHECK-NEXT: | | | |-value: Int 2 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 2 -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | |-<<>> -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict' -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:35:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | |-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:3 implicit 'int &' -// CHECK-NEXT: | | |-FieldDecl {{.*}} col:5 implicit 'int &' -// CHECK-NEXT: | | `-FieldDecl {{.*}} col:27 implicit 'int &' -// CHECK-NEXT: | |-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-ForStmt {{.*}} -// CHECK-NEXT: | | | |-DeclStmt {{.*}} -// CHECK-NEXT: | | | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | |-<<>> -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '<' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | | | |-UnaryOperator {{.*}} 'int' postfix '++' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue Var {{.*}} 'i' 'int' -// CHECK-NEXT: | | | `-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams-distribute.c:36:1) *const restrict' -// CHECK-NEXT: | | |-VarDecl {{.*}} col:12 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | |-VarDecl {{.*}} col:14 used i 'int' cinit -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | `-VarDecl {{.*}} col:16 used i 'int' cinit -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:23 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | |-OMPCapturedExprDecl {{.*}} col:25 implicit used .capture_expr. 'int' -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' refers_to_enclosing_variable_or_capture -// CHECK-NEXT: | `-OMPCapturedExprDecl {{.*}} > col:3 implicit used .capture_expr. 'long' -// CHECK-NEXT: | `-BinaryOperator {{.*}} > 'long' '-' -// CHECK-NEXT: | |-BinaryOperator {{.*}} 'long' '*' -// CHECK-NEXT: | | |-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | `-ImplicitCastExpr {{.*}} 'long' -// CHECK-NEXT: | | `-BinaryOperator {{.*}} 'int' '/' -// CHECK-NEXT: | | |-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | |-ImplicitCastExpr {{.*}} 'int' -// CHECK-NEXT: | | | | `-DeclRefExpr {{.*}} 'int' lvalue OMPCapturedExpr {{.*}} '.capture_expr.' 'int' -// CHECK-NEXT: | | | `-ParenExpr {{.*}} 'int' -// CHECK-NEXT: | | | `-BinaryOperator {{.*}} > 'int' '+' -// CHECK-NEXT: | | | |-BinaryOperator {{.*}} 'int' '-' -// CHECK-NEXT: | | | | |-IntegerLiteral {{.*}} 'int' 0 -// CHECK-NEXT: | | | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | | | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: | | `-IntegerLiteral {{.*}} 'int' 1 -// CHECK-NEXT: | `-ImplicitCastExpr {{.*}} <> 'long' -// CHECK-NEXT: | `-IntegerLiteral {{.*}} <> 'int' 1 -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'x' 'int' -// CHECK-NEXT: |-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'y' 'int' -// CHECK-NEXT: `-DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} 'z' 'int' diff --git a/clang/test/AST/ast-dump-openmp-teams.c b/clang/test/AST/ast-dump-openmp-teams.c deleted file mode 100644 index a76ace98be02d..0000000000000 --- a/clang/test/AST/ast-dump-openmp-teams.c +++ /dev/null @@ -1,58 +0,0 @@ -// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fopenmp -ast-dump %s | FileCheck --match-full-lines -implicit-check-not=openmp_structured_block %s - -void test(void) { -#pragma omp target -#pragma omp teams - ; -} - -// CHECK: TranslationUnitDecl {{.*}} <> -// CHECK: `-FunctionDecl {{.*}} <{{.*}}ast-dump-openmp-teams.c:3:1, line:7:1> line:3:6 test 'void (void)' external-linkage -// CHECK-NEXT: `-CompoundStmt {{.*}} -// CHECK-NEXT: `-OMPTargetDirective {{.*}} -// CHECK-NEXT: `-CapturedStmt {{.*}} -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-OMPTeamsDirective {{.*}} -// CHECK-NEXT: | | `-CapturedStmt {{.*}} -// CHECK-NEXT: | | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | | |-NullStmt {{.*}} -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: | |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict' -// CHECK-NEXT: |-AlwaysInlineAttr {{.*}} <> Implicit __forceinline -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .part_id. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .privates. 'void *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .copy_fn. 'void (*const restrict)(void *const restrict, ...)' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .task_t. 'void *const' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-OMPTeamsDirective {{.*}} -// CHECK-NEXT: | `-CapturedStmt {{.*}} -// CHECK-NEXT: | `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: | |-NullStmt {{.*}} -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: | |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: | `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:4:1) *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit dyn_ptr 'void *const restrict' -// CHECK-NEXT: |-RecordDecl {{.*}} col:1 implicit struct definition -// CHECK-NEXT: | `-CapturedRecordAttr {{.*}} <> Implicit -// CHECK-NEXT: `-CapturedDecl {{.*}} <> nothrow -// CHECK-NEXT: |-NullStmt {{.*}} -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .global_tid. 'const int *const restrict' -// CHECK-NEXT: |-ImplicitParamDecl {{.*}} col:1 implicit .bound_tid. 'const int *const restrict' -// CHECK-NEXT: `-ImplicitParamDecl {{.*}} col:1 implicit __context 'struct (unnamed at {{.*}}ast-dump-openmp-teams.c:5:1) *const restrict' From fbac695e8990ebe466937db7a216e5895d278aa1 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Thu, 25 Jun 2026 02:44:00 +0900 Subject: [PATCH 376/511] [LoopInterchange] Remove some early exits in transform phase (NFCI) (#205563) This patch removes some unnecessary early exits from the transformation phase in LoopInterchange. Some of them are simply removed because they are trivially unsatisfiable. Others are replaced with assertions. These conditions should be checked in the legality check phase, so it should be safe to add those asserts. --- llvm/lib/Transforms/Scalar/LoopInterchange.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp index 1c07b25d86fb6..e9e77b7a48963 100644 --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -2115,10 +2115,8 @@ bool LoopInterchangeTransform::transform( LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); auto &InductionPHIs = LIL.getInnerLoopInductions(); - if (InductionPHIs.empty()) { - LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); - return false; - } + assert(!InductionPHIs.empty() && + "Expected at least one induction variable in the inner loop"); SmallVector InnerIndexVarList; for (PHINode *CurInductionPHI : InductionPHIs) { @@ -2479,17 +2477,15 @@ bool LoopInterchangeTransform::adjustLoopBranches() { Instruction *OuterLoopHeaderBI = OuterLoopHeader->getTerminator(); Instruction *InnerLoopHeaderBI = InnerLoopHeader->getTerminator(); - if (!OuterLoopPredecessor || !InnerLoopLatchPredecessor || - !OuterLoopLatchBI || !InnerLoopLatchBI || !OuterLoopHeaderBI || - !InnerLoopHeaderBI) - return false; + assert(OuterLoopPredecessor && InnerLoopLatchPredecessor && + "Failed to find a unique predecessor"); + assert(OuterLoopLatchBI && InnerLoopLatchBI && + "Failed to find a conditional branch"); Instruction *InnerLoopLatchPredecessorBI = InnerLoopLatchPredecessor->getTerminator(); Instruction *OuterLoopPredecessorBI = OuterLoopPredecessor->getTerminator(); - if (!OuterLoopPredecessorBI || !InnerLoopLatchPredecessorBI) - return false; BasicBlock *InnerLoopHeaderSuccessor = InnerLoopHeader->getUniqueSuccessor(); if (!InnerLoopHeaderSuccessor) return false; From e4399ec82eb41efb3e2189208de757f226acbf45 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Wed, 24 Jun 2026 18:02:19 +0000 Subject: [PATCH 377/511] [clang-tidy] Extend `modernize-type-traits` to fold remove_cv_t> into remove_cv_ref_t (#204789) --- .../clang-tidy/modernize/TypeTraitsCheck.cpp | 32 +++++++++++++++ clang-tools-extra/docs/ReleaseNotes.rst | 4 ++ .../checks/modernize/type-traits.rst | 8 ++++ .../modernize/type-traits-remove-cvref.cpp | 39 +++++++++++++++++++ 4 files changed, 83 insertions(+) create mode 100644 clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-remove-cvref.cpp diff --git a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp index 0d03006750d07..e831d0966cd58 100644 --- a/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/TypeTraitsCheck.cpp @@ -196,6 +196,20 @@ void TypeTraitsCheck::registerMatchers(MatchFinder *Finder) { this); } Finder->addMatcher(typeLoc(isType()).bind(Bind), this); + + // Only register matchers for std::remove_cvref_t simplification in c++20 + // mode. + if (getLangOpts().CPlusPlus20) { + Finder->addMatcher(templateSpecializationTypeLoc( + loc(qualType(hasDeclaration( + namedDecl(hasName("::std::remove_cv_t"))))), + hasTemplateArgumentLoc( + 0, hasTypeLoc(templateSpecializationTypeLoc(loc( + qualType(hasDeclaration(namedDecl(hasName( + "::std::remove_reference_t"))))))))) + .bind("remove_cvref"), + this); + } } static bool isNamedDeclInStdTraitsSet(const NamedDecl *ND, @@ -308,6 +322,24 @@ void TypeTraitsCheck::check(const MatchFinder::MatchResult &Result) { DNTL->getElaboratedKeywordLoc()); return; } + + if (const auto *TSTL = Result.Nodes.getNodeAs( + "remove_cvref")) { + auto InnerTL = TSTL->getArgLoc(0) + .getTypeSourceInfo() + ->getTypeLoc() + .castAs(); + if (IgnoreMacros && + (TSTL->getBeginLoc().isMacroID() || InnerTL.getBeginLoc().isMacroID())) + return; + + auto Diag = diag(TSTL->getBeginLoc(), "use c++20 type alias"); + Diag << FixItHint::CreateReplacement( + SourceRange(TSTL->getBeginLoc(), InnerTL.getLAngleLoc()), + "std::remove_cvref_t<") + << FixItHint::CreateRemoval(InnerTL.getRAngleLoc()); + return; + } } void TypeTraitsCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) { diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 8871b37ddb1bf..4a5863be59fe3 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -627,6 +627,10 @@ Changes in existing checks ` check to apply fix-it when type qualifiers and/or reference modifiers are used with parameters. +- Improved :doc:`modernize-type-traits + ` check to suggest usage of + ``std::remove_cvref_t`` when applicable. + - Improved :doc:`modernize-use-default-member-init ` check by fixing a false positive when a constructor initializer refers to a declaration that diff --git a/clang-tools-extra/docs/clang-tidy/checks/modernize/type-traits.rst b/clang-tools-extra/docs/clang-tidy/checks/modernize/type-traits.rst index fbe2b856c6b85..3871c6ae3934b 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/modernize/type-traits.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/modernize/type-traits.rst @@ -7,6 +7,9 @@ Converts standard library type traits of the form ``traits<...>::type`` and ``traits<...>::value`` into ``traits_t<...>`` and ``traits_v<...>`` respectively. +Also suggests converting ``std::remove_cv_t`` into +``std::remove_cvref_t<...>`` when targeting C++20 or above. + For example: .. code-block:: c++ @@ -16,6 +19,8 @@ For example: typename std::add_const::type std::make_signed::type + std::remove_cv_t> + Would be converted into: .. code-block:: c++ @@ -25,6 +30,9 @@ Would be converted into: std::add_const_t std::make_signed_t + std::remove_cvref_t + + Options ------- diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-remove-cvref.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-remove-cvref.cpp new file mode 100644 index 0000000000000..2b9f91c7ebe3a --- /dev/null +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/type-traits-remove-cvref.cpp @@ -0,0 +1,39 @@ +// RUN: %check_clang_tidy -std=c++20-or-later %s modernize-type-traits %t -check-suffixes=',MACRO' +// RUN: %check_clang_tidy -std=c++20-or-later %s modernize-type-traits %t -- \ +// RUN: -config='{CheckOptions: {modernize-type-traits.IgnoreMacros: true}}' + +namespace std { +template struct remove_cv { + using type = int; +}; +template +using remove_cv_t = typename remove_cv::type; // NOLINT + +template struct remove_reference { + using type = int; +}; +template +using remove_reference_t = typename remove_reference::type; // NOLINT +} + +using foo = std::remove_cv_t>; +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: use c++20 type alias +// CHECK-FIXES: using foo = std::remove_cvref_t; + +std::remove_cv_t> var; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use c++20 type alias +// CHECK-FIXES: std::remove_cvref_t var; + +template>> struct Foo {}; +// CHECK-MESSAGES: :[[@LINE-1]]:16: warning: use c++20 type alias +// CHECK-FIXES: template> struct Foo {}; + +#define RM_CV(T) std::remove_cv_t +#define RM_REF(T) std::remove_reference_t +#define RM_CVREF(T) std::remove_cv_t> +template>) struct M0 {}; +// CHECK-MESSAGES-MACRO: :[[@LINE-1]]:16: warning: use c++20 type alias +template> struct M1 {}; +// CHECK-MESSAGES-MACRO: :[[@LINE-1]]:16: warning: use c++20 type alias +template struct M2 {}; +// CHECK-MESSAGES-MACRO: :[[@LINE-1]]:16: warning: use c++20 type alias From da4e021dd47b947f8bca165510b1cd921e994f16 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 24 Jun 2026 11:05:20 -0700 Subject: [PATCH 378/511] [lldb][NFC] Change type of Breakpoint's name list (#205429) This is currently a `std::unordered_set`. The downside of this is that you need to have a `std::string` to perform a lookup of any kind. This may require an allocation whenever we want to query the name list. Even using `std::string_view` is not sufficient to perform a lookup. I propose that this instead be a `llvm::StringSet` which uses StringRefs as its primary currency for insertions, lookups, and more. --------- Co-authored-by: Jonas Devlieghere --- lldb/include/lldb/Breakpoint/Breakpoint.h | 13 +++++++------ lldb/source/Breakpoint/Breakpoint.cpp | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lldb/include/lldb/Breakpoint/Breakpoint.h b/lldb/include/lldb/Breakpoint/Breakpoint.h index 67a741c6c9251..6cb0440f4a06c 100644 --- a/lldb/include/lldb/Breakpoint/Breakpoint.h +++ b/lldb/include/lldb/Breakpoint/Breakpoint.h @@ -27,6 +27,8 @@ #include "lldb/Utility/StringList.h" #include "lldb/Utility/StructuredData.h" +#include "llvm/ADT/StringSet.h" + namespace lldb_private { /// \class Breakpoint Breakpoint.h "lldb/Breakpoint/Breakpoint.h" Class that @@ -583,8 +585,8 @@ class Breakpoint : public std::enable_shared_from_this, void GetNames(std::vector &names) { names.clear(); - for (auto name : m_name_list) { - names.push_back(name); + for (auto name : m_name_list.keys()) { + names.push_back(name.str()); } } @@ -687,10 +689,9 @@ class Breakpoint : public std::enable_shared_from_this, bool m_hardware; // If this breakpoint is required to use a hardware breakpoint Target &m_target; // The target that holds this breakpoint. - std::unordered_set m_name_list; // If not empty, this is the name - // of this breakpoint (many - // breakpoints can share the same - // name.) + /// If not empty, this is the name of this breakpoint (many breakpoints can + /// share the same name.) + llvm::StringSet<> m_name_list; lldb::SearchFilterSP m_filter_sp; // The filter that constrains the breakpoint's domain. lldb::BreakpointResolverSP diff --git a/lldb/source/Breakpoint/Breakpoint.cpp b/lldb/source/Breakpoint/Breakpoint.cpp index 07412cd092f0d..9bc014b86e2d6 100644 --- a/lldb/source/Breakpoint/Breakpoint.cpp +++ b/lldb/source/Breakpoint/Breakpoint.cpp @@ -88,7 +88,7 @@ StructuredData::ObjectSP Breakpoint::SerializeToStructuredData() { if (!m_name_list.empty()) { StructuredData::ArraySP names_array_sp(new StructuredData::Array()); - for (auto name : m_name_list) { + for (auto name : m_name_list.keys()) { names_array_sp->AddItem(std::make_shared(name)); } breakpoint_contents_sp->AddItem(Breakpoint::GetKey(OptionNames::Names), @@ -1003,9 +1003,9 @@ void Breakpoint::GetDescriptionForType(Stream *s, lldb::DescriptionLevel level, s->Printf("Names:"); s->EOL(); s->IndentMore(); - for (const std::string &name : m_name_list) { + for (llvm::StringRef name : m_name_list.keys()) { s->Indent(); - s->Printf("%s\n", name.c_str()); + s->Format("{0}\n", name); } s->IndentLess(); } From a45af75608fe96502e187be3347461bbf0c777c2 Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Wed, 24 Jun 2026 11:11:17 -0700 Subject: [PATCH 379/511] AMDGPU/GlobalISel: Fix get.rounding s_getreg lowering (#205601) Use llvm.amdgcn.s.getreg instead of emitting S_GETREG_B32 directly so instruction selection applies the required SReg_32 operand constraint. This was done for setreg but missed for getreg. Fixes https://github.com/llvm/llvm-project/pull/205265 when expensive checks are enabled. --- llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 8e4058227b96e..b9554d7858d32 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -1353,8 +1353,10 @@ bool RegBankLegalizeHelper::lowerGetRounding(MachineInstr &MI) { uint32_t BothRoundHwReg = AMDGPU::Hwreg::HwregEncoding::encode(AMDGPU::Hwreg::ID_MODE, 0, 4); - auto GetReg = B.buildInstr(AMDGPU::S_GETREG_B32, {SgprRB_S32}, {}) - .addImm(BothRoundHwReg); + auto GetReg = + B.buildIntrinsic(Intrinsic::amdgcn_s_getreg, {SgprRB_S32}, + /*HasSideEffects=*/true, /*isConvergent=*/false) + .addImm(BothRoundHwReg); // There are two rounding modes, one for f32 and one for f64/f16. We only // report in the standard value range if both are the same. From 9a8aed89b35849cd69e6de3dc990bdad1be34579 Mon Sep 17 00:00:00 2001 From: Kevin Sala Penades Date: Wed, 24 Jun 2026 11:11:50 -0700 Subject: [PATCH 380/511] [Instrumentor] Move common instruction IO functions into a class (#205460) This commit moves several instruction-related IO functions into a class instead of having them defined in the instrumentor namespace. We add the BaseInstructionIO non-templated class because InstructionIO is a templated class. Adding the common functions into InstructionIO would force us to define them in the header. --- .../llvm/Transforms/IPO/Instrumentor.h | 46 +++++++++++-------- llvm/lib/Transforms/IPO/Instrumentor.cpp | 38 +++++++-------- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Transforms/IPO/Instrumentor.h b/llvm/include/llvm/Transforms/IPO/Instrumentor.h index cf2432bd6eebf..c143098db4210 100644 --- a/llvm/include/llvm/Transforms/IPO/Instrumentor.h +++ b/llvm/include/llvm/Transforms/IPO/Instrumentor.h @@ -583,18 +583,42 @@ struct InstrumentationOpportunity { } }; -/// The base instrumentation opportunity class for instruction opportunities. +/// The base class that implements basic logic for any instruction +/// instrumentation opportunity that inherits from InstructionIO. +struct BaseInstructionIO : public InstrumentationOpportunity { + virtual ~BaseInstructionIO() {} + + BaseInstructionIO(InstrumentationLocation::KindTy Kind) + : InstrumentationOpportunity(InstrumentationLocation(Kind)) {} + + LLVM_ABI static Value *getOpcode(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getTypeSize(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getLeftOperand(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getRightOperand(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); +}; + +/// The common instrumentation opportunity class for instruction opportunities. /// Each instruction opportunity should inherit from this class and implement /// the virtual class members. If multiple opcodes are provided, all of them /// are instrumented using the same logic, and a name must be explicitly /// provided by overriding getName(). -template -struct InstructionIO : public InstrumentationOpportunity { +template struct InstructionIO : public BaseInstructionIO { virtual ~InstructionIO() {} /// Construct an instruction opportunity. InstructionIO(InstrumentationLocation::KindTy Kind) - : InstrumentationOpportunity(InstrumentationLocation(Kind)) { + : BaseInstructionIO(Kind) { static_assert(sizeof...(Opcodes) >= 1, "InstructionIO must have at least one opcode"); } @@ -622,20 +646,6 @@ struct InstructionIO : public InstrumentationOpportunity { } }; -/// Common getters use across different instrumentation opportunities. -///{ -LLVM_ABI Value *getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); -LLVM_ABI Value *getTypeSize(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); -LLVM_ABI Value *getLeft(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); -LLVM_ABI Value *getRight(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); -LLVM_ABI Value *getTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB); -///} - /// The instrumentation opportunity for functions. struct FunctionIO final : public InstrumentationOpportunity { FunctionIO(InstrumentationLocation::KindTy Kind) diff --git a/llvm/lib/Transforms/IPO/Instrumentor.cpp b/llvm/lib/Transforms/IPO/Instrumentor.cpp index 352d223751d07..4517123cec9f1 100644 --- a/llvm/lib/Transforms/IPO/Instrumentor.cpp +++ b/llvm/lib/Transforms/IPO/Instrumentor.cpp @@ -905,31 +905,31 @@ static void readValuePack(const Range &R, Value &Pack, } } -Value *llvm::instrumentor::getOpcode(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { +Value *BaseInstructionIO::getOpcode(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { auto &I = cast(V); return getCI(&Ty, I.getOpcode()); } -Value *llvm::instrumentor::getTypeSize(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { +Value *BaseInstructionIO::getTypeSize(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { auto &I = cast(V); auto &DL = I.getDataLayout(); return getCI(&Ty, DL.getTypeStoreSize(V.getType())); } -Value *llvm::instrumentor::getLeft(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { +Value *BaseInstructionIO::getLeftOperand(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { auto &I = cast(V); return I.getOperand(0); } -Value *llvm::instrumentor::getRight(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { +Value *BaseInstructionIO::getRightOperand(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { auto &I = cast(V); if (I.getNumOperands() > 1) return I.getOperand(1); @@ -937,9 +937,9 @@ Value *llvm::instrumentor::getRight(Value &V, Type &Ty, return PoisonValue::get(&Ty); } -Value *llvm::instrumentor::getTypeId(Value &V, Type &Ty, - InstrumentationConfig &IConf, - InstrumentorIRBuilderTy &IIRB) { +Value *BaseInstructionIO::getTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { return getCI(&Ty, V.getType()->getTypeID()); } @@ -1777,12 +1777,12 @@ void NumericIO::init(InstrumentationConfig &IConf, if (Config.has(PassLeft)) IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "left", "The operation's left operand.", ValArgOpts, - getLeft)); + getLeftOperand)); if (Config.has(PassRight)) IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "right", "The operation's right operand. This value is " "poison for unary operations.", - ValArgOpts, getRight)); + ValArgOpts, getRightOperand)); if (!IsPRE && Config.has(PassResult)) IRTArgs.push_back( IRTArg(IIRB.Int64Ty, "result", "Result of the operation.", @@ -1867,11 +1867,11 @@ void CompareIO::init(InstrumentationConfig &IConf, if (Config.has(PassLeft)) IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "left", "The comparison's left operand.", OperandArgOpts, - getLeft)); + getLeftOperand)); if (Config.has(PassRight)) IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "right", "The comparison's right operand.", OperandArgOpts, - getRight)); + getRightOperand)); if (!IsPRE && Config.has(PassResultSize)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "result_type_id", "The result value's type ID.", IRTArg::NONE, From 607c323e5349f3f403231a2af7c5b1e39d735ae9 Mon Sep 17 00:00:00 2001 From: Jeremy Kun Date: Wed, 24 Jun 2026 11:16:09 -0700 Subject: [PATCH 381/511] [mlir][emitc]: use converted result types when func.call has one result (#205191) The lowering for `func.call` to emitc properly uses converted result types when there are multiple return values from the called func, but not when there is a single one. --- mlir/lib/Conversion/FuncToEmitC/FuncToEmitC.cpp | 6 +++--- mlir/test/Conversion/ConvertToEmitC/func.mlir | 8 ++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Conversion/FuncToEmitC/FuncToEmitC.cpp b/mlir/lib/Conversion/FuncToEmitC/FuncToEmitC.cpp index a2279242fb86b..81d15be7d548e 100644 --- a/mlir/lib/Conversion/FuncToEmitC/FuncToEmitC.cpp +++ b/mlir/lib/Conversion/FuncToEmitC/FuncToEmitC.cpp @@ -188,9 +188,9 @@ class CallOpConversion final : public OpConversionPattern { } if (callOp.getNumResults() <= 1) { - rewriter.replaceOpWithNewOp( - callOp, callOp.getResultTypes(), adaptor.getOperands(), - callOp->getAttrs()); + rewriter.replaceOpWithNewOp(callOp, convertedResultTypes, + adaptor.getOperands(), + callOp->getAttrs()); return success(); } diff --git a/mlir/test/Conversion/ConvertToEmitC/func.mlir b/mlir/test/Conversion/ConvertToEmitC/func.mlir index 4f2518401581f..ef2095d672446 100644 --- a/mlir/test/Conversion/ConvertToEmitC/func.mlir +++ b/mlir/test/Conversion/ConvertToEmitC/func.mlir @@ -11,3 +11,11 @@ func.func @index(%arg0: index) -> index { // CHECK: return return %arg0 : index } + +// CHECK-LABEL: emitc.func @call_with_one_result +func.func @call_with_one_result(%arg0: index) -> index { + // CHECK: call @index + // CHECK-SAME: (!emitc.size_t) -> !emitc.size_t + %0 = func.call @index(%arg0) : (index) -> index + return %0 : index +} From 27722e12fee1af6684f76def240039c92e177039 Mon Sep 17 00:00:00 2001 From: Alexey Gerenkov Date: Wed, 24 Jun 2026 21:23:15 +0300 Subject: [PATCH 382/511] [Xtensa] Implement XtensaNullTargetStreamer (#203819) It fixes crash in Xtensa AsmParser::run() during ModuleSummaryIndexAnalysis pass. --- llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp | 2 ++ .../lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp | 8 ++++++++ .../lib/Target/Xtensa/MCTargetDesc/XtensaTargetStreamer.h | 7 +++---- llvm/test/CodeGen/Xtensa/null-streamer.ll | 7 +++++++ 4 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/null-streamer.ll diff --git a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp index 5ea0f35a90838..1a624fda48a8a 100644 --- a/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp +++ b/llvm/lib/Target/Xtensa/AsmParser/XtensaAsmParser.cpp @@ -41,6 +41,8 @@ class XtensaAsmParser : public MCTargetAsmParser { SMLoc getLoc() const { return getParser().getTok().getLoc(); } XtensaTargetStreamer &getTargetStreamer() { + assert(getParser().getStreamer().getTargetStreamer() && + "do not have a target streamer"); MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); return static_cast(TS); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp index cb8c3aca3502c..397b56fb57b5a 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaMCTargetDesc.cpp @@ -279,6 +279,10 @@ createXtensaObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) { return new XtensaTargetELFStreamer(S); } +static MCTargetStreamer *createXtensaNullTargetStreamer(MCStreamer &S) { + return new XtensaTargetStreamer(S); +} + extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXtensaTargetMC() { // Register the MCAsmInfo. TargetRegistry::RegisterMCAsmInfo(getTheXtensaTarget(), @@ -315,4 +319,8 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeXtensaTargetMC() { // Register the ELF target streamer. TargetRegistry::RegisterObjectTargetStreamer( getTheXtensaTarget(), createXtensaObjectTargetStreamer); + + // Register the null target streamer. + TargetRegistry::RegisterNullTargetStreamer(getTheXtensaTarget(), + createXtensaNullTargetStreamer); } diff --git a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaTargetStreamer.h b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaTargetStreamer.h index 817940e880b3c..f8aa951b3e9d3 100644 --- a/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaTargetStreamer.h +++ b/llvm/lib/Target/Xtensa/MCTargetDesc/XtensaTargetStreamer.h @@ -24,13 +24,13 @@ class XtensaTargetStreamer : public MCTargetStreamer { // section is not switched yet (SwitchLiteralSection is true) then switch to // literal section. virtual void emitLiteral(MCSymbol *LblSym, const MCExpr *Value, - bool SwitchLiteralSection, SMLoc L = SMLoc()) = 0; + bool SwitchLiteralSection, SMLoc L = SMLoc()) {}; - virtual void emitLiteralPosition() = 0; + virtual void emitLiteralPosition() {}; // Switch to the literal section. The BaseSection name is used to construct // literal section name. - virtual void startLiteralSection(MCSection *BaseSection) = 0; + virtual void startLiteralSection(MCSection *BaseSection) {}; }; class XtensaTargetAsmStreamer : public XtensaTargetStreamer { @@ -50,7 +50,6 @@ class XtensaTargetELFStreamer : public XtensaTargetStreamer { MCELFStreamer &getStreamer(); void emitLiteral(MCSymbol *LblSym, const MCExpr *Value, bool SwitchLiteralSection, SMLoc L) override; - void emitLiteralPosition() override {} void startLiteralSection(MCSection *Section) override; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/Xtensa/null-streamer.ll b/llvm/test/CodeGen/Xtensa/null-streamer.ll new file mode 100644 index 0000000000000..65ff6d21709a3 --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/null-streamer.ll @@ -0,0 +1,7 @@ +; Test the null streamer with a target streamer. +; RUN: llc -O0 -filetype=null -mtriple=xtensa < %s + +define i32 @main() { +entry: + ret i32 0 +} From 7a122015487d498a4ccd02bbf3b958fe113019ce Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 24 Jun 2026 19:28:07 +0100 Subject: [PATCH 383/511] Revert "[AArch64] Run cleanup one final time after peephole (#199711)" (#205633) This reverts commit 448c3d54df7bcd5e5be2b5d051832ad00b4cc89c as it causes compile time regressions for little gain, and sounds like the dead instructions can be removed in a better way. --- .../Target/AArch64/AArch64TargetMachine.cpp | 4 +- llvm/test/CodeGen/AArch64/O3-pipeline.ll | 1 - .../aarch64-neon-vector-insert-uaddlv.ll | 44 +++++++++---------- llvm/test/CodeGen/AArch64/fabs-fp128.ll | 5 ++- 4 files changed, 26 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index c20fb31ab8854..568563cf53220 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -815,10 +815,8 @@ void AArch64PassConfig::addMachineSSAOptimization() { // Run default MachineSSAOptimization first. TargetPassConfig::addMachineSSAOptimization(); - if (TM->getOptLevel() != CodeGenOptLevel::None) { + if (TM->getOptLevel() != CodeGenOptLevel::None) addPass(createAArch64MIPeepholeOptLegacyPass()); - addPass(&DeadMachineInstructionElimID); - } } bool AArch64PassConfig::addILPOpts() { diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll index ed2453941866a..08d3b94530d14 100644 --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -172,7 +172,6 @@ ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: AArch64 MI Peephole Optimization pass -; CHECK-NEXT: Remove dead machine instructions ; CHECK-NEXT: AArch64 Dead register definitions ; CHECK-NEXT: Detect Dead Lanes ; CHECK-NEXT: Init Undef Pass diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll index 72270e3be443f..91eda8d552397 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -210,9 +210,9 @@ define void @insert_vec_v8i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -232,10 +232,10 @@ define void @insert_vec_v3i16_uaddlv_from_v8i16(ptr %0) { ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: st1.s { v1 }[2], [x8] +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret entry: @@ -283,9 +283,9 @@ define void @insert_vec_v16i8_uaddlv_from_v8i8(ptr %0) { ; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: mov.h v2[0], v1[0] ; CHECK-NEXT: bic.4h v2, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v2, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: ucvtf.4s v2, v2 +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret entry: @@ -389,9 +389,9 @@ define void @insert_vec_v4i16_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: movi.2d v1, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -408,13 +408,13 @@ define void @insert_vec_v16i16_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: @@ -435,9 +435,9 @@ define void @insert_vec_v8i8_uaddlv_from_v4i32(ptr %0) { ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: ucvtf.4s v0, v0 -; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret entry: @@ -454,14 +454,14 @@ define void @insert_vec_v16i8_uaddlv_from_v4i32(ptr %0) { ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: uaddlv.4s d0, v0 +; CHECK-NEXT: stp q2, q2, [x0, #32] ; CHECK-NEXT: mov.h v1[0], v0[0] -; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fabs-fp128.ll b/llvm/test/CodeGen/AArch64/fabs-fp128.ll index 17b75f89b32da..903aa8adf7085 100644 --- a/llvm/test/CodeGen/AArch64/fabs-fp128.ll +++ b/llvm/test/CodeGen/AArch64/fabs-fp128.ll @@ -144,7 +144,7 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-LABEL: fabs_v4f128: ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: mov x8, v0.d[1] -; CHECK-GI-NEXT: mov v0.d[0], v0.d[0] +; CHECK-GI-NEXT: mov v7.d[0], v0.d[0] ; CHECK-GI-NEXT: mov x9, v1.d[1] ; CHECK-GI-NEXT: mov x10, v2.d[1] ; CHECK-GI-NEXT: mov x11, v3.d[1] @@ -152,13 +152,14 @@ define <4 x fp128> @fabs_v4f128(<4 x fp128> %a) { ; CHECK-GI-NEXT: mov v2.d[0], v2.d[0] ; CHECK-GI-NEXT: mov v3.d[0], v3.d[0] ; CHECK-GI-NEXT: and x8, x8, #0x7fffffffffffffff -; CHECK-GI-NEXT: mov v0.d[1], x8 +; CHECK-GI-NEXT: mov v7.d[1], x8 ; CHECK-GI-NEXT: and x8, x9, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x9, x10, #0x7fffffffffffffff ; CHECK-GI-NEXT: and x10, x11, #0x7fffffffffffffff ; CHECK-GI-NEXT: mov v1.d[1], x8 ; CHECK-GI-NEXT: mov v2.d[1], x9 ; CHECK-GI-NEXT: mov v3.d[1], x10 +; CHECK-GI-NEXT: mov v0.16b, v7.16b ; CHECK-GI-NEXT: ret entry: %c = call <4 x fp128> @llvm.fabs.v4f128(<4 x fp128> %a) From 572bce2128c26343d28216facc8303793c702121 Mon Sep 17 00:00:00 2001 From: adams381 Date: Wed, 24 Jun 2026 13:31:20 -0500 Subject: [PATCH 384/511] [CIR] Skip trivially-recursive available_externally function bodies (#198363) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CIR was emitting available_externally bodies for glibc-style inline wrappers whose sole call is back to the same asm-named symbol (via __builtin_*). LLVM then treats the function as non-terminating and can fold away surrounding null checks — the same failure mode as classic CodeGen PR9614 (basename-style if (cwd) paths). Port isTriviallyRecursive / shouldEmitFunction from CodeGenModule, including the isInlineBuiltinDeclaration exemption, and skip emitting those definitions. isTriviallyRecursive (and its FunctionIsDirectlyRecursive visitor) lives on MangleContext, so both classic CodeGen and CIRGen call getMangleContext().isTriviallyRecursive(FD). --- clang/include/clang/AST/Mangle.h | 8 + clang/lib/AST/Mangle.cpp | 55 +++++++ clang/lib/CIR/CodeGen/CIRGenModule.cpp | 22 ++- clang/lib/CIR/CodeGen/CIRGenModule.h | 10 ++ clang/lib/CodeGen/CodeGenModule.cpp | 151 ++++++------------ clang/lib/CodeGen/CodeGenModule.h | 1 - .../CIR/CodeGen/trivially-recursive-skip.cpp | 38 +++++ 7 files changed, 181 insertions(+), 104 deletions(-) create mode 100644 clang/test/CIR/CodeGen/trivially-recursive-skip.cpp diff --git a/clang/include/clang/AST/Mangle.h b/clang/include/clang/AST/Mangle.h index 3d3a70cb95b38..d62495095c7e4 100644 --- a/clang/include/clang/AST/Mangle.h +++ b/clang/include/clang/AST/Mangle.h @@ -129,6 +129,14 @@ class MangleContext { virtual bool shouldMangleCXXName(const NamedDecl *D) = 0; virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; + /// Return true if \p FD's body contains a direct call back to the symbol it + /// links as, through an asm label or a __builtin_* alias (PR9614 / glibc's + /// btowc pattern). An available_externally body of such a function is not a + /// valid stand-in for the real implementation and should be dropped before + /// codegen. The check keys off the mangled/asm name, hence it lives on + /// MangleContext. + bool isTriviallyRecursive(const FunctionDecl *FD); + virtual bool isUniqueInternalLinkageDecl(const NamedDecl *ND) { return false; } diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 4dca4b5b7e693..76c51b8d9e827 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -17,8 +17,10 @@ #include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" +#include "clang/AST/StmtVisitor.h" #include "clang/AST/VTableBuilder.h" #include "clang/Basic/ABI.h" +#include "clang/Basic/Builtins.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/IR/DataLayout.h" @@ -158,6 +160,59 @@ bool MangleContext::shouldMangleDeclName(const NamedDecl *D) { return shouldMangleCXXName(D); } +namespace { +// Visits a function body looking for a direct call back to the symbol the +// function will link as. Detects both asm-label aliases and __builtin_* +// wrappers (PR9614 / glibc btowc pattern). +struct FunctionIsDirectlyRecursive + : public ConstStmtVisitor { + const StringRef Name; + const Builtin::Context &BI; + FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) + : Name(N), BI(C) {} + + bool VisitCallExpr(const CallExpr *E) { + const FunctionDecl *FD = E->getDirectCallee(); + if (!FD) + return false; + AsmLabelAttr *Attr = FD->getAttr(); + if (Attr && Name == Attr->getLabel()) + return true; + unsigned BuiltinID = FD->getBuiltinID(); + if (!BuiltinID || !BI.isLibFunction(BuiltinID)) + return false; + std::string BuiltinNameStr = BI.getName(BuiltinID); + StringRef BuiltinName = BuiltinNameStr; + return BuiltinName.consume_front("__builtin_") && Name == BuiltinName; + } + + bool VisitStmt(const Stmt *S) { + for (const Stmt *Child : S->children()) + if (Child && this->Visit(Child)) + return true; + return false; + } +}; +} // namespace + +bool MangleContext::isTriviallyRecursive(const FunctionDecl *FD) { + StringRef Name; + if (shouldMangleDeclName(FD)) { + // C++-mangled functions can only recurse into themselves through an + // asm label that bypasses the mangled name. + AsmLabelAttr *Attr = FD->getAttr(); + if (!Attr) + return false; + Name = Attr->getLabel(); + } else { + Name = FD->getName(); + } + + FunctionIsDirectlyRecursive Walker(Name, FD->getASTContext().BuiltinInfo); + const Stmt *Body = FD->getBody(); + return Body ? Walker.Visit(Body) : false; +} + /// Given an LLDB function call label, this function prints the label /// into \c Out, together with the structor type of \c GD (if the /// decl is a constructor/destructor). LLDB knows how to handle mangled diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index 0897a3a897580..78c038733c6af 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -1598,12 +1598,30 @@ void CIRGenModule::emitGlobalVarDefinition(const clang::VarDecl *vd, emitCXXGlobalVarDeclInitFunc(vd, gv, needsGlobalCtor); } +bool CIRGenModule::shouldEmitFunction(GlobalDecl gd) { + if (getFunctionLinkage(gd) != + cir::GlobalLinkageKind::AvailableExternallyLinkage) + return true; + + const auto *fd = cast(gd.getDecl()); + // Inline builtins must be emitted; the body is redirected to a `.inline` + // symbol in CIRGenFunction::generateCode. + if (fd->isInlineBuiltinDeclaration()) + return true; + + // PR9614 / glibc btowc workaround: an available_externally function whose + // body just calls itself (via asm label or __builtin_* lowering on the + // same name) is not a valid stand-in for the real implementation. Drop + // it from the IR so the optimizer doesn't reason about its body. + return !getCXXABI().getMangleContext().isTriviallyRecursive(fd); +} + void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd, mlir::Operation *op) { const auto *decl = cast(gd.getDecl()); if (const auto *fd = dyn_cast(decl)) { - // TODO(CIR): Skip generation of CIR for functions with available_externally - // linkage at -O0. + if (!shouldEmitFunction(gd)) + return; if (const auto *method = dyn_cast(decl)) { // Make sure to emit the definition(s) before we emit the thunks. This is diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index ed956b7585f95..c6fef5dde5fd9 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -577,6 +577,16 @@ class CIRGenModule : public CIRGenTypeCache { /// false, the definition can be emitted lazily if it's used. bool mustBeEmitted(const clang::ValueDecl *d); + /// Check if `fd` ends up calling itself directly through asm label or + /// builtin-pointer-to-self trickery (e.g., glibc's `extern inline` libc + /// wrappers that call `__builtin_strrchr`, which the codegen lowers to a + /// call on the same asm-named symbol). Emitting an + /// `available_externally` body for such a function feeds the LLVM + /// Decide whether to emit the body of `gd` to CIR. Returns false for + /// available_externally functions that are trivially recursive (PR9614). + /// Mirrors classic CodeGen's `CodeGenModule::shouldEmitFunction`. + bool shouldEmitFunction(clang::GlobalDecl gd); + /// Determine whether the definition can be emitted eagerly, or should be /// delayed until the end of the translation unit. This is relevant for /// definitions whose linkage can change, e.g. implicit function diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index cc0d0341a2dd0..fec18acd46998 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4731,120 +4731,69 @@ static bool HasNonDllImportDtor(QualType T) { } namespace { - struct FunctionIsDirectlyRecursive - : public ConstStmtVisitor { - const StringRef Name; - const Builtin::Context &BI; - FunctionIsDirectlyRecursive(StringRef N, const Builtin::Context &C) - : Name(N), BI(C) {} - - bool VisitCallExpr(const CallExpr *E) { - const FunctionDecl *FD = E->getDirectCallee(); - if (!FD) - return false; - AsmLabelAttr *Attr = FD->getAttr(); - if (Attr && Name == Attr->getLabel()) - return true; - unsigned BuiltinID = FD->getBuiltinID(); - if (!BuiltinID || !BI.isLibFunction(BuiltinID)) - return false; - std::string BuiltinNameStr = BI.getName(BuiltinID); - StringRef BuiltinName = BuiltinNameStr; - return BuiltinName.consume_front("__builtin_") && Name == BuiltinName; - } - - bool VisitStmt(const Stmt *S) { - for (const Stmt *Child : S->children()) - if (Child && this->Visit(Child)) - return true; - return false; - } - }; - - // Make sure we're not referencing non-imported vars or functions. - struct DLLImportFunctionVisitor - : public RecursiveASTVisitor { - bool SafeToInline = true; +// Make sure we're not referencing non-imported vars or functions. +struct DLLImportFunctionVisitor + : public RecursiveASTVisitor { + bool SafeToInline = true; - bool shouldVisitImplicitCode() const { return true; } - - bool VisitVarDecl(VarDecl *VD) { - if (VD->getTLSKind()) { - // A thread-local variable cannot be imported. - SafeToInline = false; - return SafeToInline; - } - - // A variable definition might imply a destructor call. - if (VD->isThisDeclarationADefinition()) - SafeToInline = !HasNonDllImportDtor(VD->getType()); + bool shouldVisitImplicitCode() const { return true; } + bool VisitVarDecl(VarDecl *VD) { + if (VD->getTLSKind()) { + // A thread-local variable cannot be imported. + SafeToInline = false; return SafeToInline; } - bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { - if (const auto *D = E->getTemporary()->getDestructor()) - SafeToInline = D->hasAttr(); - return SafeToInline; - } + // A variable definition might imply a destructor call. + if (VD->isThisDeclarationADefinition()) + SafeToInline = !HasNonDllImportDtor(VD->getType()); - bool VisitDeclRefExpr(DeclRefExpr *E) { - ValueDecl *VD = E->getDecl(); - if (isa(VD)) - SafeToInline = VD->hasAttr(); - else if (VarDecl *V = dyn_cast(VD)) - SafeToInline = !V->hasGlobalStorage() || V->hasAttr(); - return SafeToInline; - } + return SafeToInline; + } - bool VisitCXXConstructExpr(CXXConstructExpr *E) { - SafeToInline = E->getConstructor()->hasAttr(); - return SafeToInline; - } + bool VisitCXXBindTemporaryExpr(CXXBindTemporaryExpr *E) { + if (const auto *D = E->getTemporary()->getDestructor()) + SafeToInline = D->hasAttr(); + return SafeToInline; + } - bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { - CXXMethodDecl *M = E->getMethodDecl(); - if (!M) { - // Call through a pointer to member function. This is safe to inline. - SafeToInline = true; - } else { - SafeToInline = M->hasAttr(); - } - return SafeToInline; - } + bool VisitDeclRefExpr(DeclRefExpr *E) { + ValueDecl *VD = E->getDecl(); + if (isa(VD)) + SafeToInline = VD->hasAttr(); + else if (VarDecl *V = dyn_cast(VD)) + SafeToInline = !V->hasGlobalStorage() || V->hasAttr(); + return SafeToInline; + } - bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { - SafeToInline = E->getOperatorDelete()->hasAttr(); - return SafeToInline; - } + bool VisitCXXConstructExpr(CXXConstructExpr *E) { + SafeToInline = E->getConstructor()->hasAttr(); + return SafeToInline; + } - bool VisitCXXNewExpr(CXXNewExpr *E) { - SafeToInline = E->getOperatorNew()->hasAttr(); - return SafeToInline; + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *E) { + CXXMethodDecl *M = E->getMethodDecl(); + if (!M) { + // Call through a pointer to member function. This is safe to inline. + SafeToInline = true; + } else { + SafeToInline = M->hasAttr(); } - }; -} + return SafeToInline; + } -// isTriviallyRecursive - Check if this function calls another -// decl that, because of the asm attribute or the other decl being a builtin, -// ends up pointing to itself. -bool -CodeGenModule::isTriviallyRecursive(const FunctionDecl *FD) { - StringRef Name; - if (getCXXABI().getMangleContext().shouldMangleDeclName(FD)) { - // asm labels are a special kind of mangling we have to support. - AsmLabelAttr *Attr = FD->getAttr(); - if (!Attr) - return false; - Name = Attr->getLabel(); - } else { - Name = FD->getName(); + bool VisitCXXDeleteExpr(CXXDeleteExpr *E) { + SafeToInline = E->getOperatorDelete()->hasAttr(); + return SafeToInline; } - FunctionIsDirectlyRecursive Walker(Name, Context.BuiltinInfo); - const Stmt *Body = FD->getBody(); - return Body ? Walker.Visit(Body) : false; -} + bool VisitCXXNewExpr(CXXNewExpr *E) { + SafeToInline = E->getOperatorNew()->hasAttr(); + return SafeToInline; + } +}; +} // namespace bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { if (getFunctionLinkage(GD) != llvm::Function::AvailableExternallyLinkage) @@ -4906,7 +4855,7 @@ bool CodeGenModule::shouldEmitFunction(GlobalDecl GD) { // but a function that calls itself through asm label/`__builtin_` trickery is // clearly not equivalent to the real implementation. // This happens in glibc's btowc and in some configure checks. - return !isTriviallyRecursive(F); + return !getCXXABI().getMangleContext().isTriviallyRecursive(F); } bool CodeGenModule::shouldOpportunisticallyEmitVTables() { diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index badb740f0ba32..4283b6a3dc869 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -643,7 +643,6 @@ class CodeGenModule : public CodeGenTypeCache { void createCUDARuntime(); void createHLSLRuntime(); - bool isTriviallyRecursive(const FunctionDecl *F); bool shouldEmitFunction(GlobalDecl GD); // Whether a global variable should be emitted by CUDA/HIP host/device // related attributes. diff --git a/clang/test/CIR/CodeGen/trivially-recursive-skip.cpp b/clang/test/CIR/CodeGen/trivially-recursive-skip.cpp new file mode 100644 index 0000000000000..5ff662a1c8dec --- /dev/null +++ b/clang/test/CIR/CodeGen/trivially-recursive-skip.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm -O2 -disable-llvm-passes %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -O2 -disable-llvm-passes %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + +extern "C++" { +extern char *strrchr(char *__s, int __c) __attribute__((__nothrow__)) +__attribute__((__pure__)) __attribute__((__nonnull__(1))) +__asm__("strrchr"); +extern char *strrchr(char *__s, int __c) __attribute__((__nothrow__)) + __asm__("strrchr") __attribute__((__always_inline__)) + __attribute__((__gnu_inline__)); +extern __inline __attribute__((__nothrow__)) + __attribute__((__always_inline__)) __attribute__((__gnu_inline__)) +char *strrchr(char *__s, int __c) { + return __builtin_strrchr(__s, __c); +} +} + +extern "C" int puts(const char *); + +extern "C" void caller(char *s) { + if (s) { + const char *base = strrchr(s, '/'); + puts(base ? base + 1 : s); + } else { + puts("(null)"); + } +} + +// The trivially-recursive strrchr body is dropped, so no available_externally +// definition of it is emitted; @caller is still emitted normally. +// CIR-NOT: cir.func {{.*}}available_externally @strrchr +// CIR: cir.func{{.*}}@caller( +// LLVM-NOT: define {{.*}}available_externally{{.*}}@strrchr +// LLVM: define{{.*}}@caller( From f48816bc7e53f0ca59f8eae5ede4bd861f6d8c6c Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Thu, 25 Jun 2026 03:33:54 +0900 Subject: [PATCH 385/511] [SYCL][E2E] Fix tests failing on new driver (#22420) See https://github.com/intel/llvm/issues/22419 and https://github.com/intel/llvm/issues/22405 Signed-off-by: Nick Sarnie --- sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp | 3 +++ sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp | 3 +++ sycl/test-e2e/ESIMD/Stencil.cpp | 4 ++++ sycl/test-e2e/ESIMD/stencil2.cpp | 4 ++++ .../test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp | 2 +- sycl/test-e2e/Properties/cache_config.cpp | 2 +- 6 files changed, 16 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp index 4760818d05c22..eb9ed9f8a87e1 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/Stencil.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu-intel-dg2 && level_zero +// UNSUPPORTED: true +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22419 + // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir NEO_CACHE_PERSISTENT=0 IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out // RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm diff --git a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp index 9e47e57b6a1df..d3e9b93506505 100644 --- a/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp +++ b/sycl/test-e2e/ESIMD/PerformanceTests/stencil2.cpp @@ -7,6 +7,9 @@ //===----------------------------------------------------------------------===// // REQUIRES: gpu-intel-dg2 && level_zero +// UNSUPPORTED: true +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22419 + // RUN: mkdir -p %t.dir && %{build} -o %t.dir/exec.out // RUN: env IGC_DumpToCustomDir=%t.dir NEO_CACHE_PERSISTENT=0 IGC_ShaderDumpEnable=1 %{run} %t.dir/exec.out // RUN: %{run-aux} python3 %S/instruction_count.py %t.dir 1699 ZTSZZ4mainENKUlRN4sycl3_V17handlerEE_clES2_E14Stencil_kernel.asm diff --git a/sycl/test-e2e/ESIMD/Stencil.cpp b/sycl/test-e2e/ESIMD/Stencil.cpp index 56260478ee08c..efbe4391c65ec 100644 --- a/sycl/test-e2e/ESIMD/Stencil.cpp +++ b/sycl/test-e2e/ESIMD/Stencil.cpp @@ -7,6 +7,10 @@ //===----------------------------------------------------------------------===// // RUN: %{build} -o %t.out // RUN: %{run} %t.out + +// UNSUPPORTED: run-mode +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22419 + #include #include "esimd_test_utils.hpp" diff --git a/sycl/test-e2e/ESIMD/stencil2.cpp b/sycl/test-e2e/ESIMD/stencil2.cpp index 7670c4bd89d3c..2f823ef32f482 100644 --- a/sycl/test-e2e/ESIMD/stencil2.cpp +++ b/sycl/test-e2e/ESIMD/stencil2.cpp @@ -7,6 +7,10 @@ //===----------------------------------------------------------------------===// // RUN: %{build} -o %t.out // RUN: %{run} %t.out + +// UNSUPPORTED: run-mode +// UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22419 + #include #include "esimd_test_utils.hpp" diff --git a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp index cd0850e13d647..98572b6277acc 100644 --- a/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp +++ b/sycl/test-e2e/KernelAndProgram/level-zero-static-link-flow.cpp @@ -4,7 +4,7 @@ // UNSUPPORTED: windows && (gpu-intel-gen12 || arch-intel_gpu_bmg_g21) // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/21556 -// UNSUPPORTED: linux && run-mode && !gpu-intel-dg2 +// UNSUPPORTED: linux && run-mode // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 // RUN: %{build} -o %t.out diff --git a/sycl/test-e2e/Properties/cache_config.cpp b/sycl/test-e2e/Properties/cache_config.cpp index ad331d3f83c79..33392de6bebe9 100644 --- a/sycl/test-e2e/Properties/cache_config.cpp +++ b/sycl/test-e2e/Properties/cache_config.cpp @@ -6,7 +6,7 @@ // UNSUPPORTED: windows && arch-intel_gpu_bmg_g21 // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22099 -// UNSUPPORTED: linux && (arch-intel_gpu_pvc || arch-intel_gpu_bmg_g21) +// UNSUPPORTED: linux && run-mode // UNSUPPORTED-TRACKER: https://github.com/intel/llvm/issues/22405 // RUN: %{build} -Wno-deprecated-declarations -o %t.out From 2db9d8abb29871f6e71d178277806dab69a5168f Mon Sep 17 00:00:00 2001 From: Ziqing Luo Date: Wed, 24 Jun 2026 11:36:02 -0700 Subject: [PATCH 386/511] [SSAF] Properly handle contributors with multiple declarations (#204482) A contributor entity can have multiple declarations all contributing interesting facts. For example, a function declaration (not definition) may have default arguments, which may provide pointer flow or unsafe buffer usage facts. This commit groups declarations by their canonical decls. The entity summary of a contributor will be collected from all its decls. In addition, this commit includes the following minor changes: - Factor the common procedure of summary extraction and insertion into a template function in SSAFAnalysesCommon.h. - Convert the no-duplicate contributor assertion into a debug warning. We need the release build to not crash. rdar://179150798 --- .../PointerFlow/PointerFlowExtractor.cpp | 50 +++++----------- .../Analyses/SSAFAnalysesCommon.cpp | 10 ++-- .../Analyses/SSAFAnalysesCommon.h | 59 ++++++++++++++++++- .../UnsafeBufferUsageExtractor.cpp | 46 +++++---------- .../PointerFlow/entity-name-no-conflict.cpp | 27 +++++++++ .../PointerFlow/multi-decl-contributor.cpp | 40 +++++++++++++ 6 files changed, 158 insertions(+), 74 deletions(-) create mode 100644 clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp create mode 100644 clang/test/Analysis/Scalable/PointerFlow/multi-decl-contributor.cpp diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp index ef5932c52a6c3..870a398cda9f2 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -315,49 +315,29 @@ class PointerFlowTUSummaryExtractor : public TUSummaryExtractor { /// \return a non-null unique pointer to a PointerFlowEntitySummary std::unique_ptr - extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx, - TUSummaryExtractor &Extractor) { + extractEntitySummary(const std::vector &ContributorDecls, + ASTContext &Ctx, TUSummaryExtractor &Extractor) { PointerFlowMatcher Matcher(Ctx, Extractor); - auto MatchAction = [&Matcher, &Contributor](const DynTypedNode &Node) { - auto Err = Matcher.matches(Node, Contributor); - if (Err) - logWarningFromError(std::move(Err)); - }; + for (const auto *Contrib : ContributorDecls) { + auto MatchAction = [&Matcher, Contrib](const DynTypedNode &Node) { + if (auto Err = Matcher.matches(Node, Contrib)) + logWarningFromError(std::move(Err)); + }; - findMatchesIn(Contributor, MatchAction); + findMatchesIn(Contrib, MatchAction); + } return std::make_unique( buildPointerFlowEntitySummary(std::move(Matcher.Results))); } void HandleTranslationUnit(ASTContext &Ctx) override { - std::vector Contributors; - - findContributors(Ctx, Contributors); - for (auto *CD : Contributors) { - // Templates are skipped, but their instantiations are handled. The idea - // is that we can conclude facts about a template through all of its - // instantiations. - if (CD->isTemplated()) - continue; - - auto EntitySummary = extractEntitySummary(CD, Ctx, *this); - - assert(EntitySummary); - if (EntitySummary->empty()) - continue; - - std::optional ContributorId = addEntity(CD); - if (!ContributorId) { - logWarningFromError(makeEntityNameErr(Ctx, CD)); - continue; - } - - [[maybe_unused]] auto [_, InsertionSucceeded] = - SummaryBuilder.addSummary(*ContributorId, std::move(EntitySummary)); - - assert(InsertionSucceeded && "duplicated contributor extraction"); - } + extractAndAddSummaries( + *this, SummaryBuilder, Ctx, + [&](const std::vector &Decls) { + return extractEntitySummary(Decls, Ctx, *this); + }, + "PointerFlow"); } }; } // namespace diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp index 660bc424fb32f..667ef69606aad 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp @@ -124,12 +124,14 @@ class ContributorFactFinder : public DynamicRecursiveASTVisitor { }; } // namespace -void ssaf::findContributors(ASTContext &Ctx, - std::vector &Contributors) { +void ssaf::findContributors( + ASTContext &Ctx, + llvm::DenseMap> + &Contributors) { ContributorFinder Finder; Finder.TraverseAST(Ctx); - Contributors.insert(Contributors.end(), Finder.Contributors.begin(), - Finder.Contributors.end()); + for (const NamedDecl *C : Finder.Contributors) + Contributors[cast(C->getCanonicalDecl())].push_back(C); } void ssaf::findMatchesIn( diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h index 38c37e7103b73..db0697798077e 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h @@ -15,10 +15,16 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" +#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang::ssaf { ///\return a short descriptions of a json::Value @@ -66,9 +72,13 @@ inline void logWarningFromError(llvm::Error Err) { llvm::consumeError(std::move(Err)); } -/// Find all contributors in an AST. -void findContributors(ASTContext &Ctx, - std::vector &Contributors); +/// Find all contributors in an AST. The found contributors are organized as a +/// map from the canonical declaration of each entity to all of its +/// declarations. +void findContributors( + ASTContext &Ctx, + llvm::DenseMap> + &Contributors); /// Perform "MatchAction" on each Stmt and Decl belonging to the `Contributor`. /// \param Contributor @@ -77,6 +87,49 @@ void findMatchesIn( const NamedDecl *Contributor, llvm::function_ref MatchActionRef); +/// The standard contributor-summary extraction procedure: +/// 1. Find and group all contributor decls by their canonical decls. +/// 2. Use \p Extract to get an EntitySummary of a contributor from all of its +/// decls. +/// 3. Insert the EntitySummary into the \p Builder. +/// +/// \param ExtractorFnT the template parameter that should be a function type +/// 'std::unique_ptr(std::vector)' for different +/// entity summary type `SummaryT`s +/// \param ExtractFn The function that extracts summaries of a contributor from +/// its decls. +/// \param ExtractorName The optional information inserted into the warning +/// message when duplicate contributor names (EntityNames) are seen. +template +void extractAndAddSummaries(TUSummaryExtractor &Extractor, + TUSummaryBuilder &Builder, ASTContext &Ctx, + ExtractorFnT ExtractFn, + const char *ExtractorName = "") { + llvm::DenseMap> + Contributors; + findContributors(Ctx, Contributors); + for (const auto &[Cano, Decls] : Contributors) { + // Templates are skipped, but their instantiations are handled. The idea + // is that we can conclude facts about a template through all of its + // instantiations. + if (Cano->isTemplated()) + continue; + + auto Summary = ExtractFn(Decls); + assert(Summary); + if (Summary->empty()) + continue; + + if (auto Id = Extractor.addEntity(Cano)) { + if (!Builder.addSummary(*Id, std::move(Summary)).second) + logWarningFromError(makeErrAtNode( + Ctx, Cano, "dropping duplicate %s summary for entity %s", + ExtractorName, Cano->getNameAsString().c_str())); + } else + logWarningFromError(makeEntityNameErr(Ctx, Cano)); + } +} + } // namespace clang::ssaf #endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_SSAFANALYSESCOMMON_H diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp index f4067e5f315ff..1ba6bc7f8e7b6 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp @@ -17,7 +17,9 @@ #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/ErrorHandling.h" +#include using namespace clang; using namespace ssaf; @@ -30,20 +32,22 @@ class UnsafeBufferUsageTUSummaryExtractor : public TUSummaryExtractor { /// \return a non-null unique pointer to a UnsafeBufferUsageEntitySummary std::unique_ptr - extractEntitySummary(const NamedDecl *Contributor, ASTContext &Ctx); + extractEntitySummary(const std::vector &ContributorDecls, + ASTContext &Ctx); void HandleTranslationUnit(ASTContext &Ctx) override; }; } // namespace clang::ssaf std::unique_ptr clang::ssaf::UnsafeBufferUsageTUSummaryExtractor::extractEntitySummary( - const NamedDecl *Contributor, ASTContext &Ctx) { + const std::vector &ContributorDecls, ASTContext &Ctx) { std::set UnsafePointers; - auto MatchAction = [&UnsafePointers, &Ctx](const DynTypedNode &Node) { matchUnsafePointers(Node, Ctx, UnsafePointers); }; - findMatchesIn(Contributor, MatchAction); + + for (const auto *Contrib : ContributorDecls) + findMatchesIn(Contrib, MatchAction); EntityPointerLevelSet Results; @@ -70,34 +74,12 @@ clang::ssaf::UnsafeBufferUsageTUSummaryExtractor::extractEntitySummary( void clang::ssaf::UnsafeBufferUsageTUSummaryExtractor::HandleTranslationUnit( ASTContext &Ctx) { - std::vector Contributors; - - findContributors(Ctx, Contributors); - for (auto *CD : Contributors) { - // Templates are skipped, but their instantiations are handled. The idea - // is that we can conclude facts about a template through all of its - // instantiations. - if (CD->isTemplated()) - continue; - - auto EntitySummary = extractEntitySummary(CD, Ctx); - - assert(EntitySummary); - if (EntitySummary->empty()) - continue; - - auto ContributorId = addEntity(CD); - - if (!ContributorId) { - logWarningFromError(makeEntityNameErr(Ctx, CD)); - continue; - } - - [[maybe_unused]] auto [Ignored, InsertionSucceeded] = - SummaryBuilder.addSummary(*ContributorId, std::move(EntitySummary)); - - assert(InsertionSucceeded && "duplicated contributor extraction"); - } + extractAndAddSummaries( + *this, SummaryBuilder, Ctx, + [&](const std::vector &Decls) { + return extractEntitySummary(Decls, Ctx); + }, + "UnsafeBufferUsage"); } namespace clang::ssaf { // NOLINTNEXTLINE(misc-use-internal-linkage) diff --git a/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp b/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp new file mode 100644 index 0000000000000..e190ee264cb7d --- /dev/null +++ b/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp @@ -0,0 +1,27 @@ +// REQUIRES: asserts +// RUN: rm -rf %t && mkdir -p %t +// RUN: %clang_cc1 -fsyntax-only -std=c++20 %s \ +// RUN: --ssaf-extract-summaries=PointerFlow \ +// RUN: --ssaf-tu-summary-file=%t/tu.summary.json \ +// RUN: --ssaf-compilation-unit-id="tu-1" \ +// RUN: -mllvm -debug-only=ssaf-analyses 2>&1 | FileCheck %s + + +// The two `Holder` instantiations are distinct types +// (each lambda is its own closure record), but the USR generator +// currently fails to distinguish them. + + +// CHECK: dropping duplicate PointerFlow summary +// FIXME: change to CHECK-NOT once the bug gets fixed + +template +struct Holder { + T *p; + void reset(T *x) { p = x; } +}; + +void caller(int x) { + Holder().reset(nullptr); + Holder().reset(nullptr); +} diff --git a/clang/test/Analysis/Scalable/PointerFlow/multi-decl-contributor.cpp b/clang/test/Analysis/Scalable/PointerFlow/multi-decl-contributor.cpp new file mode 100644 index 0000000000000..717a2875636b2 --- /dev/null +++ b/clang/test/Analysis/Scalable/PointerFlow/multi-decl-contributor.cpp @@ -0,0 +1,40 @@ +// RUN: rm -rf %t && mkdir -p %t + + +// RUN: %clang_cc1 -fsyntax-only %s \ +// RUN: --ssaf-extract-summaries=PointerFlow,UnsafeBufferUsage \ +// RUN: --ssaf-tu-summary-file=%t/tu.summary.json \ +// RUN: --ssaf-compilation-unit-id="tu-1" + + +// RUN: clang-ssaf-linker %t/tu.summary.json -o %t/lu.json + + +// RUN: clang-ssaf-analyzer %t/lu.json -o %t/wpa.json \ +// RUN: -a UnsafeBufferReachableAnalysisResult + +// RUN: FileCheck %s --input-file=%t/wpa.json + +extern int **G; + +void foo(int *p = G[5]); // (p, 1) -> (G, 2) and G is unsafe + +void foo(int *p) { + int *q = p; // (q, 1) -> (p, 1) + q[5] = 0; // q is unsafe +} + +// Check that (q, 1), (p, 1), (G, 2) and (G, 1) are all unsafe pointers. + +// CHECK-DAG: "id": [[P_ID:[0-9]+]],{{([^]]|[[:space:]])+\],[[:space:]]+"suffix": "1",[[:space:]]+"usr": }}"c:@F@foo#*I#" +// CHECK-DAG: "id": [[G_ID:[0-9]+]],{{([^]]|[[:space:]])+\],[[:space:]]+"suffix": "",[[:space:]]+"usr": }}"c:@G" +// CHECK-DAG: "id": [[Q_ID:[0-9]+]],{{([^]]|[[:space:]])+\],[[:space:]]+"suffix": "",[[:space:]]+"usr": "[^"]+@q"}} + +// CHECK: "analysis_name": "UnsafeBufferReachableAnalysisResult" + +// CHECK-DAG: {{\{[[:space:]]+}}"@": [[G_ID]]{{[[:space:]]+\},[[:space:]]+1[[:space:]]+\]}} +// CHECK-DAG: {{\{[[:space:]]+}}"@": [[G_ID]]{{[[:space:]]+\},[[:space:]]+2[[:space:]]+\]}} +// CHECK-DAG: {{\{[[:space:]]+}}"@": [[Q_ID]]{{[[:space:]]+\},[[:space:]]+1[[:space:]]+\]}} +// CHECK-DAG: {{\{[[:space:]]+}}"@": [[P_ID]]{{[[:space:]]+\},[[:space:]]+1[[:space:]]+\]}} + +// CHECK: "analysis_name": From 6888fe54241658e61da4c1899a2569d1f5a2bec5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 19:36:39 +0100 Subject: [PATCH 387/511] [X86] combineMulToPMADDWD - match 256/512-bit SIGN_EXTEND nodes (#205606) Now that the X86ISD::VPMADDWD handling is improving, we can remove some of the limits that we had to prevent regressions --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- llvm/test/CodeGen/X86/madd.ll | 66 +++++++++++++---------- llvm/test/CodeGen/X86/shrink_vmul.ll | 72 ++++++++++++------------- 3 files changed, 75 insertions(+), 65 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2ae8a433297b2..e5c8a5df1b54c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50663,7 +50663,7 @@ static SDValue combineMulToPMADDWD(SDNode *N, const SDLoc &DL, if (Op.getOpcode() == ISD::SIGN_EXTEND && N->isOnlyUserOf(Op.getNode())) { SDValue Src = Op.getOperand(0); // Convert sext(vXi16) to zext(vXi16). - if (Src.getScalarValueSizeInBits() == 16 && VT.getSizeInBits() <= 128) + if (Src.getScalarValueSizeInBits() == 16) return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Src); // Convert sext(vXi8) to zext(vXi16 sext(vXi8)) on pre-SSE41 targets // which will expand the extension. diff --git a/llvm/test/CodeGen/X86/madd.ll b/llvm/test/CodeGen/X86/madd.ll index 87bec9597d35f..4ca0b05dbe92b 100644 --- a/llvm/test/CodeGen/X86/madd.ll +++ b/llvm/test/CodeGen/X86/madd.ll @@ -3674,56 +3674,66 @@ define <5 x i32> @oddvector_mul(<16 x i16> %A, <16 x i16> %B) { ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero ; SSE42-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; SSE42-NEXT: pmaddwd %xmm0, %xmm1 -; SSE42-NEXT: movd %xmm1, %ecx +; SSE42-NEXT: pextrd $1, %xmm1, %ecx ; SSE42-NEXT: movdqa %xmm4, (%rdi) -; SSE42-NEXT: pextrd $1, %xmm1, %edx +; SSE42-NEXT: movd %xmm1, %edx ; SSE42-NEXT: addl %ecx, %edx ; SSE42-NEXT: movl %edx, 16(%rdi) ; SSE42-NEXT: retq ; ; AVX1-LABEL: oddvector_mul: ; AVX1: # %bb.0: -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 -; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] +; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4,4,5,5,6,6,7,7] +; AVX1-NEXT: vpmaddwd %xmm2, %xmm3, %xmm2 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 -; AVX1-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm5 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] -; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm4 = xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; AVX1-NEXT: vpmaddwd %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 +; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero +; AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero ; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vphaddd %xmm0, %xmm4, %xmm1 -; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero -; AVX1-NEXT: vpmaddwd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vphaddd %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vphaddd %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vphaddd %xmm0, %xmm2, %xmm1 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3] ; AVX1-NEXT: retq ; ; AVX2-LABEL: oddvector_mul: ; AVX2: # %bb.0: -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 -; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 -; AVX2-NEXT: vpmovsxwd %xmm1, %ymm3 -; AVX2-NEXT: vpmulld %ymm3, %ymm2, %ymm2 +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX2-NEXT: vpmaddwd %ymm2, %ymm3, %ymm2 ; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm1 -; AVX2-NEXT: vpmovsxwd %xmm1, %ymm1 -; AVX2-NEXT: vpmulld %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 +; AVX2-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vphaddd %ymm0, %ymm2, %ymm0 ; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] ; AVX2-NEXT: retq ; -; AVX512-LABEL: oddvector_mul: -; AVX512: # %bb.0: -; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 -; AVX512-NEXT: vpmovsxwd %ymm1, %zmm1 -; AVX512-NEXT: vpmulld %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1 -; AVX512-NEXT: vphaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] -; AVX512-NEXT: retq +; AVX512F-LABEL: oddvector_mul: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 +; AVX512F-NEXT: vpmulld %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vphaddd %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: oddvector_mul: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512BW-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512BW-NEXT: vpmaddwd %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512BW-NEXT: vphaddd %ymm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX512BW-NEXT: retq %a = sext <16 x i16> %A to <16 x i32> %b = sext <16 x i16> %B to <16 x i32> %m = mul nsw <16 x i32> %a, %b diff --git a/llvm/test/CodeGen/X86/shrink_vmul.ll b/llvm/test/CodeGen/X86/shrink_vmul.ll index 8f344490b66b7..db2ef29dac300 100644 --- a/llvm/test/CodeGen/X86/shrink_vmul.ll +++ b/llvm/test/CodeGen/X86/shrink_vmul.ll @@ -1238,18 +1238,18 @@ define void @mul_16xi16_sext(ptr nocapture readonly %a, ptr nocapture readonly % ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-AVX1-NEXT: movl c, %ecx -; X86-AVX1-NEXT: vpmovsxwd 24(%esi,%eax), %xmm0 -; X86-AVX1-NEXT: vpmovsxwd 16(%esi,%eax), %xmm1 -; X86-AVX1-NEXT: vpmovsxwd 8(%esi,%eax), %xmm2 -; X86-AVX1-NEXT: vpmovsxwd (%esi,%eax), %xmm3 -; X86-AVX1-NEXT: vpmovsxwd 24(%edx,%eax), %xmm4 -; X86-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 -; X86-AVX1-NEXT: vpmovsxwd 16(%edx,%eax), %xmm4 -; X86-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 -; X86-AVX1-NEXT: vpmovsxwd 8(%edx,%eax), %xmm4 -; X86-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 -; X86-AVX1-NEXT: vpmovsxwd (%edx,%eax), %xmm4 -; X86-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 +; X86-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X86-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 ; X86-AVX1-NEXT: vmovdqu %xmm0, 48(%ecx,%eax,4) ; X86-AVX1-NEXT: vmovdqu %xmm1, 32(%ecx,%eax,4) ; X86-AVX1-NEXT: vmovdqu %xmm2, 16(%ecx,%eax,4) @@ -1264,12 +1264,12 @@ define void @mul_16xi16_sext(ptr nocapture readonly %a, ptr nocapture readonly % ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-AVX2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-AVX2-NEXT: movl c, %esi -; X86-AVX2-NEXT: vpmovsxwd 16(%edx,%ecx), %ymm0 -; X86-AVX2-NEXT: vpmovsxwd (%edx,%ecx), %ymm1 -; X86-AVX2-NEXT: vpmovsxwd 16(%eax,%ecx), %ymm2 -; X86-AVX2-NEXT: vpmulld %ymm0, %ymm2, %ymm0 -; X86-AVX2-NEXT: vpmovsxwd (%eax,%ecx), %ymm2 -; X86-AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1 +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X86-AVX2-NEXT: vpmaddwd %ymm0, %ymm2, %ymm0 +; X86-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X86-AVX2-NEXT: vpmaddwd %ymm1, %ymm2, %ymm1 ; X86-AVX2-NEXT: vmovdqu %ymm0, 32(%esi,%ecx,4) ; X86-AVX2-NEXT: vmovdqu %ymm1, (%esi,%ecx,4) ; X86-AVX2-NEXT: popl %esi @@ -1304,18 +1304,18 @@ define void @mul_16xi16_sext(ptr nocapture readonly %a, ptr nocapture readonly % ; X64-AVX1-LABEL: mul_16xi16_sext: ; X64-AVX1: # %bb.0: # %entry ; X64-AVX1-NEXT: movq c(%rip), %rax -; X64-AVX1-NEXT: vpmovsxwd 24(%rdi,%rdx), %xmm0 -; X64-AVX1-NEXT: vpmovsxwd 16(%rdi,%rdx), %xmm1 -; X64-AVX1-NEXT: vpmovsxwd 8(%rdi,%rdx), %xmm2 -; X64-AVX1-NEXT: vpmovsxwd (%rdi,%rdx), %xmm3 -; X64-AVX1-NEXT: vpmovsxwd 24(%rsi,%rdx), %xmm4 -; X64-AVX1-NEXT: vpmulld %xmm0, %xmm4, %xmm0 -; X64-AVX1-NEXT: vpmovsxwd 16(%rsi,%rdx), %xmm4 -; X64-AVX1-NEXT: vpmulld %xmm1, %xmm4, %xmm1 -; X64-AVX1-NEXT: vpmovsxwd 8(%rsi,%rdx), %xmm4 -; X64-AVX1-NEXT: vpmulld %xmm2, %xmm4, %xmm2 -; X64-AVX1-NEXT: vpmovsxwd (%rsi,%rdx), %xmm4 -; X64-AVX1-NEXT: vpmulld %xmm3, %xmm4, %xmm3 +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmaddwd %xmm0, %xmm4, %xmm0 +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmaddwd %xmm1, %xmm4, %xmm1 +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmaddwd %xmm2, %xmm4, %xmm2 +; X64-AVX1-NEXT: vpmovzxwd {{.*#+}} xmm4 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; X64-AVX1-NEXT: vpmaddwd %xmm3, %xmm4, %xmm3 ; X64-AVX1-NEXT: vmovdqu %xmm0, 48(%rax,%rdx,4) ; X64-AVX1-NEXT: vmovdqu %xmm1, 32(%rax,%rdx,4) ; X64-AVX1-NEXT: vmovdqu %xmm2, 16(%rax,%rdx,4) @@ -1325,12 +1325,12 @@ define void @mul_16xi16_sext(ptr nocapture readonly %a, ptr nocapture readonly % ; X64-AVX2-LABEL: mul_16xi16_sext: ; X64-AVX2: # %bb.0: # %entry ; X64-AVX2-NEXT: movq c(%rip), %rax -; X64-AVX2-NEXT: vpmovsxwd 16(%rdi,%rdx), %ymm0 -; X64-AVX2-NEXT: vpmovsxwd (%rdi,%rdx), %ymm1 -; X64-AVX2-NEXT: vpmovsxwd 16(%rsi,%rdx), %ymm2 -; X64-AVX2-NEXT: vpmulld %ymm0, %ymm2, %ymm0 -; X64-AVX2-NEXT: vpmovsxwd (%rsi,%rdx), %ymm2 -; X64-AVX2-NEXT: vpmulld %ymm1, %ymm2, %ymm1 +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X64-AVX2-NEXT: vpmaddwd %ymm0, %ymm2, %ymm0 +; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; X64-AVX2-NEXT: vpmaddwd %ymm1, %ymm2, %ymm1 ; X64-AVX2-NEXT: vmovdqu %ymm0, 32(%rax,%rdx,4) ; X64-AVX2-NEXT: vmovdqu %ymm1, (%rax,%rdx,4) ; X64-AVX2-NEXT: vzeroupper From e30b9cd49fb8a3df226b9d30fec39ad24e61f55a Mon Sep 17 00:00:00 2001 From: kweronsx Date: Wed, 24 Jun 2026 20:39:43 +0200 Subject: [PATCH 388/511] [UR] Refactor reusable event API to pass device as urEventCreateExp parameter (#22387) Remove hDevice data member from ur_exp_event_desc_t. Insert hDevice as parameter of urEventCreateExp. --- .../include/unified-runtime/ur_api.h | 7 ++++--- .../include/unified-runtime/ur_ddi.h | 3 ++- .../include/unified-runtime/ur_print.hpp | 10 +++++----- .../scripts/core/exp-reusable-events.yml | 8 ++++---- .../source/adapters/cuda/event.cpp | 5 +++-- unified-runtime/source/adapters/hip/event.cpp | 5 +++-- .../source/adapters/level_zero/event.cpp | 1 + .../level_zero/ur_interface_loader.hpp | 1 + .../source/adapters/level_zero/v2/event.cpp | 1 + .../source/adapters/mock/ur_mockddi.cpp | 5 ++++- .../source/adapters/native_cpu/event.cpp | 8 ++++---- .../source/adapters/offload/event.cpp | 5 +++-- .../source/adapters/opencl/event.cpp | 5 +++-- .../loader/layers/tracing/ur_trcddi.cpp | 7 +++++-- .../loader/layers/validation/ur_valddi.cpp | 11 ++++++++-- unified-runtime/source/loader/ur_ldrddi.cpp | 4 +++- unified-runtime/source/loader/ur_libapi.cpp | 6 ++++-- unified-runtime/source/ur_api.cpp | 4 +++- .../exp_reusable_events/reusable_events.cpp | 20 +++++++------------ 19 files changed, 69 insertions(+), 47 deletions(-) diff --git a/unified-runtime/include/unified-runtime/ur_api.h b/unified-runtime/include/unified-runtime/ur_api.h index dcb0a60e76805..e52459cb9b5d3 100644 --- a/unified-runtime/include/unified-runtime/ur_api.h +++ b/unified-runtime/include/unified-runtime/ur_api.h @@ -13977,8 +13977,6 @@ typedef struct ur_exp_event_desc_t { ur_structure_type_t stype; /// [in][optional] pointer to extension-specific structure const void *pNext; - /// [in] handle of the device object associated with this event - ur_device_handle_t hDevice; /// [in] combination of event creation flags. If /// ::UR_EXP_EVENT_FLAG_ENABLE_PROFILING is set, the event captures /// UR_PROFILING_INFO_COMMAND_START and UR_PROFILING_INFO_COMMAND_END @@ -14006,7 +14004,7 @@ typedef struct ur_exp_event_desc_t { /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == pEventDesc->hDevice` +/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pEventDesc` /// + `NULL == phEvent` @@ -14021,6 +14019,8 @@ typedef struct ur_exp_event_desc_t { UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created @@ -14704,6 +14704,7 @@ typedef struct ur_event_set_callback_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_event_create_exp_params_t { ur_context_handle_t *phContext; + ur_device_handle_t *phDevice; const ur_exp_event_desc_t **ppEventDesc; ur_event_handle_t **pphEvent; } ur_event_create_exp_params_t; diff --git a/unified-runtime/include/unified-runtime/ur_ddi.h b/unified-runtime/include/unified-runtime/ur_ddi.h index 0f11ad454e8e3..7ba70aca48810 100644 --- a/unified-runtime/include/unified-runtime/ur_ddi.h +++ b/unified-runtime/include/unified-runtime/ur_ddi.h @@ -304,7 +304,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEventProcAddrTable_t)( /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urEventCreateExp typedef ur_result_t(UR_APICALL *ur_pfnEventCreateExp_t)( - ur_context_handle_t, const ur_exp_event_desc_t *, ur_event_handle_t *); + ur_context_handle_t, ur_device_handle_t, const ur_exp_event_desc_t *, + ur_event_handle_t *); /////////////////////////////////////////////////////////////////////////////// /// @brief Table of EventExp functions pointers diff --git a/unified-runtime/include/unified-runtime/ur_print.hpp b/unified-runtime/include/unified-runtime/ur_print.hpp index c741f8eb85ecc..356c722c12305 100644 --- a/unified-runtime/include/unified-runtime/ur_print.hpp +++ b/unified-runtime/include/unified-runtime/ur_print.hpp @@ -13518,11 +13518,6 @@ inline std::ostream &operator<<(std::ostream &os, ur::details::printStruct(os, (params.pNext)); - os << ", "; - os << ".hDevice = "; - - ur::details::printPtr(os, (params.hDevice)); - os << ", "; os << ".flags = "; @@ -14470,6 +14465,11 @@ operator<<(std::ostream &os, ur::details::printPtr(os, *(params->phContext)); + os << ", "; + os << ".hDevice = "; + + ur::details::printPtr(os, *(params->phDevice)); + os << ", "; os << ".pEventDesc = "; diff --git a/unified-runtime/scripts/core/exp-reusable-events.yml b/unified-runtime/scripts/core/exp-reusable-events.yml index 6e622fa677047..94df7c9f9c61f 100644 --- a/unified-runtime/scripts/core/exp-reusable-events.yml +++ b/unified-runtime/scripts/core/exp-reusable-events.yml @@ -42,9 +42,6 @@ desc: "Descriptor type for creating reusable events." name: $x_exp_event_desc_t base: $x_base_desc_t members: - - type: $x_device_handle_t - name: hDevice - desc: "[in] handle of the device object associated with this event" - type: $x_exp_event_flags_t name: flags desc: > @@ -80,6 +77,9 @@ params: - type: $x_context_handle_t name: hContext desc: "[in] handle of the context object" + - type: $x_device_handle_t + name: hDevice + desc: "[in] handle of the device object" - type: "const $x_exp_event_desc_t*" name: pEventDesc desc: "[in] pointer to event creation descriptor" @@ -89,7 +89,7 @@ params: returns: - $X_RESULT_ERROR_INVALID_NULL_HANDLE: - "`NULL == hContext`" - - "`NULL == pEventDesc->hDevice`" + - "`NULL == hDevice`" - $X_RESULT_ERROR_INVALID_NULL_POINTER: - "`NULL == pEventDesc`" - "`NULL == phEvent`" diff --git a/unified-runtime/source/adapters/cuda/event.cpp b/unified-runtime/source/adapters/cuda/event.cpp index 3e76967f6a42e..5ca5ca1396d14 100644 --- a/unified-runtime/source/adapters/cuda/event.cpp +++ b/unified-runtime/source/adapters/cuda/event.cpp @@ -305,7 +305,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( - ur_context_handle_t, const ur_exp_event_desc_t *, ur_event_handle_t *) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateExp(ur_context_handle_t, ur_device_handle_t, + const ur_exp_event_desc_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/hip/event.cpp b/unified-runtime/source/adapters/hip/event.cpp index e43865ad2818f..6c0c49a729cff 100644 --- a/unified-runtime/source/adapters/hip/event.cpp +++ b/unified-runtime/source/adapters/hip/event.cpp @@ -314,7 +314,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( - ur_context_handle_t, const ur_exp_event_desc_t *, ur_event_handle_t *) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateExp(ur_context_handle_t, ur_device_handle_t, + const ur_exp_event_desc_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/level_zero/event.cpp b/unified-runtime/source/adapters/level_zero/event.cpp index 1656c2914f381..83f9b347ff2fd 100644 --- a/unified-runtime/source/adapters/level_zero/event.cpp +++ b/unified-runtime/source/adapters/level_zero/event.cpp @@ -1014,6 +1014,7 @@ ur_result_t urEventSetCallback( } ur_result_t urEventCreateExp(ur_context_handle_t /*hContext*/, + ur_device_handle_t /*hDevice*/, const ur_exp_event_desc_t * /*pEventDesc*/, ur_event_handle_t * /*phEvent*/) { UR_LOG_LEGACY(ERR, diff --git a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp index a118f77ef4acc..fa3def0da0446 100644 --- a/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp +++ b/unified-runtime/source/adapters/level_zero/ur_interface_loader.hpp @@ -865,6 +865,7 @@ ur_result_t urEnqueueNativeCommandExp( uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent); ur_result_t urEventCreateExp(ur_context_handle_t hContext, + ur_device_handle_t hDevice, const ur_exp_event_desc_t *pEventDesc, ur_event_handle_t *phEvent); ur_result_t urGraphCreateExp(ur_context_handle_t hContext, diff --git a/unified-runtime/source/adapters/level_zero/v2/event.cpp b/unified-runtime/source/adapters/level_zero/v2/event.cpp index 0ea38ad78d2de..ad94374ff14f8 100644 --- a/unified-runtime/source/adapters/level_zero/v2/event.cpp +++ b/unified-runtime/source/adapters/level_zero/v2/event.cpp @@ -417,6 +417,7 @@ urEventCreateWithNativeHandle(ur_native_handle_t hNativeEvent, } ur_result_t urEventCreateExp(ur_context_handle_t /*hContext*/, + ur_device_handle_t /*hDevice*/, const ur_exp_event_desc_t * /*pEventDesc*/, ur_event_handle_t * /*phEvent*/) { UR_LOG(ERR, "{} function not implemented!", __FUNCTION__); diff --git a/unified-runtime/source/adapters/mock/ur_mockddi.cpp b/unified-runtime/source/adapters/mock/ur_mockddi.cpp index 711d016a8c50c..a09e14bdd876e 100644 --- a/unified-runtime/source/adapters/mock/ur_mockddi.cpp +++ b/unified-runtime/source/adapters/mock/ur_mockddi.cpp @@ -12791,13 +12791,16 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( __urdlllocal ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created ur_event_handle_t *phEvent) try { ur_result_t result = UR_RESULT_SUCCESS; - ur_event_create_exp_params_t params = {&hContext, &pEventDesc, &phEvent}; + ur_event_create_exp_params_t params = {&hContext, &hDevice, &pEventDesc, + &phEvent}; auto beforeCallback = reinterpret_cast( mock::getCallbacks().get_before_callback("urEventCreateExp")); diff --git a/unified-runtime/source/adapters/native_cpu/event.cpp b/unified-runtime/source/adapters/native_cpu/event.cpp index df100da22d113..015a685a6d4f8 100644 --- a/unified-runtime/source/adapters/native_cpu/event.cpp +++ b/unified-runtime/source/adapters/native_cpu/event.cpp @@ -108,10 +108,10 @@ urEnqueueTimestampRecordingExp(ur_queue_handle_t /*hQueue*/, bool /*blocking*/, DIE_NO_IMPLEMENTATION; } -UR_APIEXPORT ur_result_t UR_APICALL -urEventCreateExp(ur_context_handle_t /*hContext*/, - const ur_exp_event_desc_t * /*pEventDesc*/, - ur_event_handle_t * /*phEvent*/) { +UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( + ur_context_handle_t /*hContext*/, ur_device_handle_t /*hDevice*/, + const ur_exp_event_desc_t * /*pEventDesc*/, + ur_event_handle_t * /*phEvent*/) { DIE_NO_IMPLEMENTATION; } diff --git a/unified-runtime/source/adapters/offload/event.cpp b/unified-runtime/source/adapters/offload/event.cpp index e380c323d23e1..c572fa57024ae 100644 --- a/unified-runtime/source/adapters/offload/event.cpp +++ b/unified-runtime/source/adapters/offload/event.cpp @@ -129,7 +129,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( - ur_context_handle_t, const ur_exp_event_desc_t *, ur_event_handle_t *) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateExp(ur_context_handle_t, ur_device_handle_t, + const ur_exp_event_desc_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/adapters/opencl/event.cpp b/unified-runtime/source/adapters/opencl/event.cpp index 614b6fc78f513..15a16480e3336 100644 --- a/unified-runtime/source/adapters/opencl/event.cpp +++ b/unified-runtime/source/adapters/opencl/event.cpp @@ -306,7 +306,8 @@ urEnqueueTimestampRecordingExp(ur_queue_handle_t, bool, uint32_t, return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -UR_APIEXPORT ur_result_t UR_APICALL urEventCreateExp( - ur_context_handle_t, const ur_exp_event_desc_t *, ur_event_handle_t *) { +UR_APIEXPORT ur_result_t UR_APICALL +urEventCreateExp(ur_context_handle_t, ur_device_handle_t, + const ur_exp_event_desc_t *, ur_event_handle_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp index 2330183cdd54b..b931542d05728 100644 --- a/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp +++ b/unified-runtime/source/loader/layers/tracing/ur_trcddi.cpp @@ -10871,6 +10871,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( __urdlllocal ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created @@ -10880,14 +10882,15 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateExp( if (nullptr == pfnCreateExp) return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; - ur_event_create_exp_params_t params = {&hContext, &pEventDesc, &phEvent}; + ur_event_create_exp_params_t params = {&hContext, &hDevice, &pEventDesc, + &phEvent}; uint64_t instance = getContext()->notify_begin(UR_FUNCTION_EVENT_CREATE_EXP, "urEventCreateExp", ¶ms); auto &logger = getContext()->logger; UR_LOG_L(logger, INFO, " ---> urEventCreateExp\n"); - ur_result_t result = pfnCreateExp(hContext, pEventDesc, phEvent); + ur_result_t result = pfnCreateExp(hContext, hDevice, pEventDesc, phEvent); getContext()->notify_end(UR_FUNCTION_EVENT_CREATE_EXP, "urEventCreateExp", ¶ms, &result, instance); diff --git a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp index 93d02e07c4540..f49b88e5b28bb 100644 --- a/unified-runtime/source/loader/layers/validation/ur_valddi.cpp +++ b/unified-runtime/source/loader/layers/validation/ur_valddi.cpp @@ -11705,6 +11705,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( __urdlllocal ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created @@ -11725,7 +11727,7 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateExp( if (NULL == hContext) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; - if (NULL == pEventDesc->hDevice) + if (NULL == hDevice) return UR_RESULT_ERROR_INVALID_NULL_HANDLE; if (UR_EXP_EVENT_FLAGS_MASK & pEventDesc->flags) @@ -11737,7 +11739,12 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateExp( URLOG_CTX_INVALID_REFERENCE(hContext); } - ur_result_t result = pfnCreateExp(hContext, pEventDesc, phEvent); + if (getContext()->enableLifetimeValidation && + !getContext()->refCountContext->isReferenceValid(hDevice)) { + URLOG_CTX_INVALID_REFERENCE(hDevice); + } + + ur_result_t result = pfnCreateExp(hContext, hDevice, pEventDesc, phEvent); if (getContext()->enableLeakChecking && result == UR_RESULT_SUCCESS && phEvent) { diff --git a/unified-runtime/source/loader/ur_ldrddi.cpp b/unified-runtime/source/loader/ur_ldrddi.cpp index d2095cc6bb451..f100d55232ce8 100644 --- a/unified-runtime/source/loader/ur_ldrddi.cpp +++ b/unified-runtime/source/loader/ur_ldrddi.cpp @@ -6175,6 +6175,8 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueNativeCommandExp( __urdlllocal ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created @@ -6187,7 +6189,7 @@ __urdlllocal ur_result_t UR_APICALL urEventCreateExp( return UR_RESULT_ERROR_UNINITIALIZED; // forward to device-platform - return pfnCreateExp(hContext, pEventDesc, phEvent); + return pfnCreateExp(hContext, hDevice, pEventDesc, phEvent); } /////////////////////////////////////////////////////////////////////////////// diff --git a/unified-runtime/source/loader/ur_libapi.cpp b/unified-runtime/source/loader/ur_libapi.cpp index e48c835de482b..1cca29fb573ed 100644 --- a/unified-runtime/source/loader/ur_libapi.cpp +++ b/unified-runtime/source/loader/ur_libapi.cpp @@ -11357,7 +11357,7 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == pEventDesc->hDevice` +/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pEventDesc` /// + `NULL == phEvent` @@ -11372,6 +11372,8 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created @@ -11380,7 +11382,7 @@ ur_result_t UR_APICALL urEventCreateExp( if (nullptr == pfnCreateExp) return UR_RESULT_ERROR_UNINITIALIZED; - return pfnCreateExp(hContext, pEventDesc, phEvent); + return pfnCreateExp(hContext, hDevice, pEventDesc, phEvent); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/unified-runtime/source/ur_api.cpp b/unified-runtime/source/ur_api.cpp index 21cfe87913967..53fa2f5eaf718 100644 --- a/unified-runtime/source/ur_api.cpp +++ b/unified-runtime/source/ur_api.cpp @@ -9878,7 +9878,7 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( /// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC /// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE /// + `NULL == hContext` -/// + `NULL == pEventDesc->hDevice` +/// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pEventDesc` /// + `NULL == phEvent` @@ -9893,6 +9893,8 @@ ur_result_t UR_APICALL urEnqueueNativeCommandExp( ur_result_t UR_APICALL urEventCreateExp( /// [in] handle of the context object ur_context_handle_t hContext, + /// [in] handle of the device object + ur_device_handle_t hDevice, /// [in] pointer to event creation descriptor const ur_exp_event_desc_t *pEventDesc, /// [out] pointer to the handle of the event object created diff --git a/unified-runtime/test/conformance/exp_reusable_events/reusable_events.cpp b/unified-runtime/test/conformance/exp_reusable_events/reusable_events.cpp index 6dac3523e8b97..8cf62687295ae 100644 --- a/unified-runtime/test/conformance/exp_reusable_events/reusable_events.cpp +++ b/unified-runtime/test/conformance/exp_reusable_events/reusable_events.cpp @@ -13,13 +13,12 @@ TEST_P(urEventCreateExpTest, Success) { ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - device, static_cast(0), }; uur::raii::Event event = nullptr; UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urEventCreateExp(context, &desc, event.ptr())); + urEventCreateExp(context, device, &desc, event.ptr())); if (!event) { return; @@ -32,13 +31,12 @@ TEST_P(urEventCreateExpTest, SuccessWithProfilingFlag) { ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - device, UR_EXP_EVENT_FLAG_ENABLE_PROFILING, }; uur::raii::Event event = nullptr; UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urEventCreateExp(context, &desc, event.ptr())); + urEventCreateExp(context, device, &desc, event.ptr())); if (!event) { return; @@ -51,44 +49,41 @@ TEST_P(urEventCreateExpTest, InvalidNullHandleContext) { ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - device, static_cast(0), }; uur::raii::Event event = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urEventCreateExp(nullptr, &desc, event.ptr())); + urEventCreateExp(nullptr, device, &desc, event.ptr())); } TEST_P(urEventCreateExpTest, InvalidNullPointerEventDesc) { uur::raii::Event event = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEventCreateExp(context, nullptr, event.ptr())); + urEventCreateExp(context, device, nullptr, event.ptr())); } TEST_P(urEventCreateExpTest, InvalidNullPointerEventHandle) { ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - device, static_cast(0), }; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_POINTER, - urEventCreateExp(context, &desc, nullptr)); + urEventCreateExp(context, device, &desc, nullptr)); } TEST_P(urEventCreateExpTest, InvalidNullHandleEventDevice) { ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - nullptr, static_cast(0), }; uur::raii::Event event = nullptr; ASSERT_EQ_RESULT(UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urEventCreateExp(context, &desc, event.ptr())); + urEventCreateExp(context, nullptr, &desc, event.ptr())); } struct urEnqueueEventsWaitWithBarrierReusableEventTest : uur::urQueueTest {}; @@ -100,13 +95,12 @@ TEST_P(urEnqueueEventsWaitWithBarrierReusableEventTest, ur_exp_event_desc_t desc = { UR_STRUCTURE_TYPE_EXP_EVENT_DESC, nullptr, - device, static_cast(0), }; uur::raii::Event signal_event = nullptr; UUR_ASSERT_SUCCESS_OR_UNSUPPORTED( - urEventCreateExp(context, &desc, signal_event.ptr())); + urEventCreateExp(context, device, &desc, signal_event.ptr())); if (!signal_event) { return; } From 92681e78b2616a849343fa69c3cfcd84cb6fd32b Mon Sep 17 00:00:00 2001 From: Anshil Gandhi <95053726+gandhi56@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:43:29 -0400 Subject: [PATCH 389/511] [AMDGPU] Lower uniform usubsat to SOP (#203155) Prefer scalar (SALU) lowering for uniform `usubsat`, since usubsat(a, b) = max(a, b) - b. * i32: add a GCNPat matching uniform `usubsat` to S_MAX_U32 + S_SUB_I32 * i16: route uniform `usubsat` through `promoteUniformOpToI32` instead of a TableGen pattern that hard-codes the 0xffff masks. This exposes the zero-extends as real DAG nodes so KnownBits can fold the masks when the high bits are already known zero; the promoted i32 usubsat then reuses the scalar pattern. Promote-and-truncate is safe for usubsat because the result always fits in the narrow type (unlike uaddsat). Register USUBSAT with `setTargetDAGCombine` and the promotion dispatch, return ZERO_EXTEND in `getExtOpcodeForPromotedOp`, and add it to `isNarrowingProfitable` so divergent i16/i32 keep their native VALU clamp form. Co-authored by: Jeffrey Byrnes --- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 1 + llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 6 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 17 + llvm/test/CodeGen/AMDGPU/usubsat.ll | 929 +++++++++++------- 4 files changed, 598 insertions(+), 355 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 795e487219d8f..48bc2e6c2246e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -1057,6 +1057,7 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(SDNode *N, EVT SrcVT, case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::USUBSAT: if (isTypeLegal(MVT::i16) && (!DestVT.isVector() || !isOperationLegal(ISD::ADD, MVT::v2i16))) { // Check if VOP3P diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 502cc438c836c..4a32b81b06ff5 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1063,6 +1063,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, ISD::SMAX, ISD::UMIN, ISD::UMAX, + ISD::USUBSAT, ISD::AND, ISD::OR, ISD::XOR, @@ -8804,6 +8805,7 @@ static unsigned getExtOpcodeForPromotedOp(SDValue Op) { case ISD::SRL: case ISD::UMIN: case ISD::UMAX: + case ISD::USUBSAT: return ISD::ZERO_EXTEND; case ISD::ADD: case ISD::SUB: @@ -8852,7 +8854,8 @@ SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op, Opc == ISD::SRL || Opc == ISD::SRA || Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || Opc == ISD::MUL || Opc == ISD::SETCC || Opc == ISD::SELECT || Opc == ISD::SMIN || - Opc == ISD::SMAX || Opc == ISD::UMIN || Opc == ISD::UMAX); + Opc == ISD::SMAX || Opc == ISD::UMIN || Opc == ISD::UMAX || + Opc == ISD::USUBSAT); EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType() : Op->getOperand(0).getValueType(); @@ -18584,6 +18587,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case ISD::SMAX: case ISD::UMIN: case ISD::UMAX: + case ISD::USUBSAT: if (auto Res = promoteUniformOpToI32(SDValue(N, 0), DCI)) return Res; break; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index b28195be51733..670945594e92b 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -2070,6 +2070,23 @@ let AddedComplexity = 20 in { >; } +// Uniform saturating unsigned subtraction: usubsat(a, b) = max(a, b) - b +// If a >= b, result is a - b; otherwise max(a, b) = b, so b - b = 0. +// Higher complexity to prefer scalar over VALU patterns for uniform values. +let AddedComplexity = 20 in { + def : GCNPat< + (i32 (UniformBinFrag i32:$src0, i32:$src1)), + (S_SUB_I32 (S_MAX_U32 $src0, $src1), $src1) + >; +} // End AddedComplexity = 20 + +// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector +// case, the sgpr-copies pass will fix this to use the vector version. +def : GCNPat < + (i32 (addc i32:$src0, i32:$src1)), + (S_ADD_U32 $src0, $src1) +>; + // FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that // REG_SEQUENCE patterns don't support instructions with multiple // outputs. diff --git a/llvm/test/CodeGen/AMDGPU/usubsat.ll b/llvm/test/CodeGen/AMDGPU/usubsat.ll index 336f74d8e80db..120442f0b85db 100644 --- a/llvm/test/CodeGen/AMDGPU/usubsat.ll +++ b/llvm/test/CodeGen/AMDGPU/usubsat.ll @@ -22,8 +22,9 @@ define i8 @s_usubsat_i8(i8 inreg %lhs, i8 inreg %rhs) { ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-NEXT: s_and_b32 s4, s17, 0xff ; GFX8-NEXT: s_and_b32 s5, s16, 0xff +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_sub_u16_e64 v0, s5, v0 clamp ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_i8: @@ -31,8 +32,9 @@ define i8 @s_usubsat_i8(i8 inreg %lhs, i8 inreg %rhs) { ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: s_and_b32 s4, s17, 0xff ; GFX9-NEXT: s_and_b32 s5, s16, 0xff +; GFX9-NEXT: s_max_u32 s5, s5, s4 +; GFX9-NEXT: s_sub_i32 s4, s5, s4 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 -; GFX9-NEXT: v_sub_u16_e64 v0, s5, v0 clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_i8: @@ -40,24 +42,20 @@ define i8 @s_usubsat_i8(i8 inreg %lhs, i8 inreg %rhs) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: s_and_b32 s4, s17, 0xff ; GFX10-NEXT: s_and_b32 s5, s16, 0xff -; GFX10-NEXT: v_sub_nc_u16 v0, s5, s4 clamp +; GFX10-NEXT: s_max_u32 s5, s5, s4 +; GFX10-NEXT: s_sub_i32 s4, s5, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: s_usubsat_i8: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-TRUE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, s0, s1 clamp -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: s_usubsat_i8: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: s_and_b32 s1, s1, 0xff -; GFX11-FAKE16-NEXT: s_and_b32 s0, s0, 0xff -; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, s0, s1 clamp -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: s_usubsat_i8: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s1, s1, 0xff +; GFX11-NEXT: s_and_b32 s0, s0, 0xff +; GFX11-NEXT: s_max_u32 s0, s0, s1 +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i8 @llvm.usub.sat.i8(i8 %lhs, i8 %rhs) ret i8 %result } @@ -125,34 +123,42 @@ define i16 @s_usubsat_i16(i16 inreg %lhs, i16 inreg %rhs) { ; GFX8-LABEL: s_usubsat_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s17 -; GFX8-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX8-NEXT: s_and_b32 s4, 0xffff, s17 +; GFX8-NEXT: s_and_b32 s5, 0xffff, s16 +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s17 -; GFX9-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX9-NEXT: s_and_b32 s4, 0xffff, s17 +; GFX9-NEXT: s_and_b32 s5, 0xffff, s16 +; GFX9-NEXT: s_max_u32 s5, s5, s4 +; GFX9-NEXT: s_sub_i32 s4, s5, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u16 v0, s16, s17 clamp +; GFX10-NEXT: s_and_b32 s4, 0xffff, s17 +; GFX10-NEXT: s_and_b32 s5, 0xffff, s16 +; GFX10-NEXT: s_max_u32 s5, s5, s4 +; GFX10-NEXT: s_sub_i32 s4, s5, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: s_usubsat_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, s0, s1 clamp -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: s_usubsat_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, s0, s1 clamp -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: s_usubsat_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_max_u32 s0, s0, s1 +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i16 @llvm.usub.sat.i16(i16 %lhs, i16 %rhs) ret i16 %result } @@ -214,34 +220,38 @@ define i16 @uniform_usubsat_as_bithack_i16(i16 inreg %x) { ; GFX8-LABEL: uniform_usubsat_as_bithack_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX8-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX8-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX8-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX8-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: uniform_usubsat_as_bithack_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX9-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX9-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX9-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX9-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: uniform_usubsat_as_bithack_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u16 v0, s16, 0x8000 clamp +; GFX10-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX10-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX10-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: uniform_usubsat_as_bithack_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, s0, 0x8000 clamp -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: uniform_usubsat_as_bithack_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, s0, 0x8000 clamp -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: uniform_usubsat_as_bithack_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_max_u32 s0, s0, 0x8000 +; GFX11-NEXT: s_sub_i32 s0, s0, 0x8000 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 %flipsign = xor i16 %x, 32768 %result = and i16 %signsplat, %flipsign @@ -309,34 +319,38 @@ define i16 @uniform_usubsat_as_bithack2_i16(i16 inreg %x) { ; GFX8-LABEL: uniform_usubsat_as_bithack2_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX8-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX8-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX8-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX8-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: uniform_usubsat_as_bithack2_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX9-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX9-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX9-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX9-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: uniform_usubsat_as_bithack2_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u16 v0, s16, 0x8000 clamp +; GFX10-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX10-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX10-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: uniform_usubsat_as_bithack2_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, s0, 0x8000 clamp -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: uniform_usubsat_as_bithack2_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, s0, 0x8000 clamp -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: uniform_usubsat_as_bithack2_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_max_u32 s0, s0, 0x8000 +; GFX11-NEXT: s_sub_i32 s0, s0, 0x8000 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 %flipsign = add i16 %x, 32768 %result = and i16 %signsplat, %flipsign @@ -404,34 +418,38 @@ define i16 @uniform_usubsat_as_bithack_commute_i16(i16 inreg %x) { ; GFX8-LABEL: uniform_usubsat_as_bithack_commute_i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX8-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX8-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX8-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX8-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: uniform_usubsat_as_bithack_commute_i16: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, 0xffff8000 -; GFX9-NEXT: v_sub_u16_e64 v0, s16, v0 clamp +; GFX9-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX9-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX9-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: uniform_usubsat_as_bithack_commute_i16: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u16 v0, s16, 0x8000 clamp +; GFX10-NEXT: s_and_b32 s4, 0xffff, s16 +; GFX10-NEXT: s_max_u32 s4, s4, 0x8000 +; GFX10-NEXT: s_sub_i32 s4, s4, 0x8000 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-TRUE16-LABEL: uniform_usubsat_as_bithack_commute_i16: -; GFX11-TRUE16: ; %bb.0: -; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_sub_nc_u16 v0.l, s0, 0x8000 clamp -; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] -; -; GFX11-FAKE16-LABEL: uniform_usubsat_as_bithack_commute_i16: -; GFX11-FAKE16: ; %bb.0: -; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-FAKE16-NEXT: v_sub_nc_u16 v0, s0, 0x8000 clamp -; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] +; GFX11-LABEL: uniform_usubsat_as_bithack_commute_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 +; GFX11-NEXT: s_max_u32 s0, s0, 0x8000 +; GFX11-NEXT: s_sub_i32 s0, s0, 0x8000 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] %signsplat = ashr i16 %x, 15 %flipsign = add i16 %x, 32768 %result = and i16 %flipsign, %signsplat @@ -497,27 +515,33 @@ define i32 @s_usubsat_i32(i32 inreg %lhs, i32 inreg %rhs) { ; GFX8-LABEL: s_usubsat_i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s17 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp +; GFX8-NEXT: s_max_u32 s4, s16, s17 +; GFX8-NEXT: s_sub_i32 s4, s4, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s17 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp +; GFX9-NEXT: s_max_u32 s4, s16, s17 +; GFX9-NEXT: s_sub_i32 s4, s4, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s17 clamp +; GFX10-NEXT: s_max_u32 s4, s16, s17 +; GFX10-NEXT: s_sub_i32 s4, s4, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s1 clamp +; GFX11-NEXT: s_max_u32 s0, s0, s1 +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call i32 @llvm.usub.sat.i32(i32 %lhs, i32 %rhs) ret i32 %result @@ -572,14 +596,17 @@ define <2 x i16> @s_usubsat_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v2i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s4, s17, 16 -; GFX8-NEXT: s_lshr_b32 s5, s16, 16 +; GFX8-NEXT: s_and_b32 s4, s17, 0xffff +; GFX8-NEXT: s_and_b32 s5, s16, 0xffff +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 +; GFX8-NEXT: s_lshr_b32 s5, s17, 16 +; GFX8-NEXT: s_lshr_b32 s6, s16, 16 +; GFX8-NEXT: s_max_u32 s6, s6, s5 +; GFX8-NEXT: s_sub_i32 s5, s6, s5 +; GFX8-NEXT: s_lshl_b32 s5, s5, 16 +; GFX8-NEXT: s_or_b32 s4, s4, s5 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_sub_u16_sdwa v0, v1, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v1, s17 -; GFX8-NEXT: v_sub_u16_e64 v1, s16, v1 clamp -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v2i16: @@ -668,16 +695,22 @@ define <3 x i16> @s_usubsat_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v3i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s4, s18, 16 -; GFX8-NEXT: s_lshr_b32 s5, s16, 16 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_mov_b32_e32 v2, s18 -; GFX8-NEXT: v_sub_u16_sdwa v0, v1, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v1, s19 -; GFX8-NEXT: v_sub_u16_e64 v2, s16, v2 clamp -; GFX8-NEXT: v_sub_u16_e64 v1, s17, v1 clamp -; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_and_b32 s4, s19, 0xffff +; GFX8-NEXT: s_and_b32 s5, s17, 0xffff +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 +; GFX8-NEXT: s_and_b32 s5, s18, 0xffff +; GFX8-NEXT: s_and_b32 s6, s16, 0xffff +; GFX8-NEXT: s_max_u32 s6, s6, s5 +; GFX8-NEXT: s_sub_i32 s5, s6, s5 +; GFX8-NEXT: s_lshr_b32 s6, s18, 16 +; GFX8-NEXT: s_lshr_b32 s7, s16, 16 +; GFX8-NEXT: s_max_u32 s7, s7, s6 +; GFX8-NEXT: s_sub_i32 s6, s7, s6 +; GFX8-NEXT: s_lshl_b32 s6, s6, 16 +; GFX8-NEXT: s_or_b32 s5, s5, s6 +; GFX8-NEXT: v_mov_b32_e32 v0, s5 +; GFX8-NEXT: v_mov_b32_e32 v1, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v3i16: @@ -783,22 +816,28 @@ define <2 x float> @s_usubsat_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs) ; GFX8-LABEL: s_usubsat_v4i16: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: s_lshr_b32 s4, s18, 16 -; GFX8-NEXT: s_lshr_b32 s5, s16, 16 +; GFX8-NEXT: s_and_b32 s4, s18, 0xffff +; GFX8-NEXT: s_and_b32 s5, s16, 0xffff +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 +; GFX8-NEXT: s_lshr_b32 s5, s18, 16 +; GFX8-NEXT: s_lshr_b32 s6, s16, 16 +; GFX8-NEXT: s_max_u32 s6, s6, s5 +; GFX8-NEXT: s_sub_i32 s5, s6, s5 +; GFX8-NEXT: s_lshl_b32 s5, s5, 16 +; GFX8-NEXT: s_or_b32 s4, s4, s5 +; GFX8-NEXT: s_and_b32 s5, s19, 0xffff +; GFX8-NEXT: s_and_b32 s6, s17, 0xffff +; GFX8-NEXT: s_max_u32 s6, s6, s5 +; GFX8-NEXT: s_sub_i32 s5, s6, s5 +; GFX8-NEXT: s_lshr_b32 s6, s19, 16 +; GFX8-NEXT: s_lshr_b32 s7, s17, 16 +; GFX8-NEXT: s_max_u32 s7, s7, s6 +; GFX8-NEXT: s_sub_i32 s6, s7, s6 +; GFX8-NEXT: s_lshl_b32 s6, s6, 16 +; GFX8-NEXT: s_or_b32 s5, s5, s6 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 -; GFX8-NEXT: v_sub_u16_sdwa v0, v1, v0 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v1, s18 -; GFX8-NEXT: v_sub_u16_e64 v1, s16, v1 clamp -; GFX8-NEXT: s_lshr_b32 s4, s19, 16 -; GFX8-NEXT: s_lshr_b32 s5, s17, 16 -; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 -; GFX8-NEXT: v_mov_b32_e32 v1, s4 -; GFX8-NEXT: v_mov_b32_e32 v2, s5 -; GFX8-NEXT: v_sub_u16_sdwa v1, v2, v1 clamp dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GFX8-NEXT: v_mov_b32_e32 v2, s19 -; GFX8-NEXT: v_sub_u16_e64 v2, s17, v2 clamp -; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v4i16: @@ -898,33 +937,44 @@ define <2 x i32> @s_usubsat_v2i32(<2 x i32> inreg %lhs, <2 x i32> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v2i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s18 -; GFX8-NEXT: v_mov_b32_e32 v1, s19 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp -; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], s17, v1 clamp +; GFX8-NEXT: s_max_u32 s4, s16, s18 +; GFX8-NEXT: s_max_u32 s5, s17, s19 +; GFX8-NEXT: s_sub_i32 s4, s4, s18 +; GFX8-NEXT: s_sub_i32 s5, s5, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v2i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s18 -; GFX9-NEXT: v_mov_b32_e32 v1, s19 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp -; GFX9-NEXT: v_sub_u32_e64 v1, s17, v1 clamp +; GFX9-NEXT: s_max_u32 s4, s16, s18 +; GFX9-NEXT: s_max_u32 s5, s17, s19 +; GFX9-NEXT: s_sub_i32 s4, s4, s18 +; GFX9-NEXT: s_sub_i32 s5, s5, s19 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_v2i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s18 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v1, s17, s19 clamp +; GFX10-NEXT: s_max_u32 s4, s16, s18 +; GFX10-NEXT: s_max_u32 s5, s17, s19 +; GFX10-NEXT: s_sub_i32 s4, s4, s18 +; GFX10-NEXT: s_sub_i32 s5, s5, s19 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_v2i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s2 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v1, s1, s3 clamp +; GFX11-NEXT: s_max_u32 s0, s0, s2 +; GFX11-NEXT: s_max_u32 s1, s1, s3 +; GFX11-NEXT: s_sub_i32 s0, s0, s2 +; GFX11-NEXT: s_sub_i32 s1, s1, s3 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %result @@ -983,39 +1033,56 @@ define <3 x i32> @s_usubsat_v3i32(<3 x i32> inreg %lhs, <3 x i32> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v3i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s19 -; GFX8-NEXT: v_mov_b32_e32 v1, s20 -; GFX8-NEXT: v_mov_b32_e32 v2, s21 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp -; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], s17, v1 clamp -; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], s18, v2 clamp +; GFX8-NEXT: s_max_u32 s4, s16, s19 +; GFX8-NEXT: s_max_u32 s5, s17, s20 +; GFX8-NEXT: s_max_u32 s6, s18, s21 +; GFX8-NEXT: s_sub_i32 s4, s4, s19 +; GFX8-NEXT: s_sub_i32 s5, s5, s20 +; GFX8-NEXT: s_sub_i32 s6, s6, s21 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s6 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v3i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s19 -; GFX9-NEXT: v_mov_b32_e32 v1, s20 -; GFX9-NEXT: v_mov_b32_e32 v2, s21 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp -; GFX9-NEXT: v_sub_u32_e64 v1, s17, v1 clamp -; GFX9-NEXT: v_sub_u32_e64 v2, s18, v2 clamp +; GFX9-NEXT: s_max_u32 s4, s16, s19 +; GFX9-NEXT: s_max_u32 s5, s17, s20 +; GFX9-NEXT: s_max_u32 s6, s18, s21 +; GFX9-NEXT: s_sub_i32 s4, s4, s19 +; GFX9-NEXT: s_sub_i32 s5, s5, s20 +; GFX9-NEXT: s_sub_i32 s6, s6, s21 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_v3i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s19 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v1, s17, s20 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v2, s18, s21 clamp +; GFX10-NEXT: s_max_u32 s4, s16, s19 +; GFX10-NEXT: s_max_u32 s5, s17, s20 +; GFX10-NEXT: s_max_u32 s6, s18, s21 +; GFX10-NEXT: s_sub_i32 s4, s4, s19 +; GFX10-NEXT: s_sub_i32 s5, s5, s20 +; GFX10-NEXT: s_sub_i32 s6, s6, s21 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: v_mov_b32_e32 v2, s6 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_v3i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s3 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v1, s1, s16 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v2, s2, s17 clamp +; GFX11-NEXT: s_max_u32 s0, s0, s3 +; GFX11-NEXT: s_max_u32 s1, s1, s16 +; GFX11-NEXT: s_max_u32 s2, s2, s17 +; GFX11-NEXT: s_sub_i32 s0, s0, s3 +; GFX11-NEXT: s_sub_i32 s1, s1, s16 +; GFX11-NEXT: s_sub_i32 s2, s2, s17 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_mov_b32_e32 v2, s2 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <3 x i32> @llvm.usub.sat.v3i32(<3 x i32> %lhs, <3 x i32> %rhs) ret <3 x i32> %result @@ -1081,45 +1148,67 @@ define <4 x i32> @s_usubsat_v4i32(<4 x i32> inreg %lhs, <4 x i32> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v4i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_mov_b32_e32 v0, s20 -; GFX8-NEXT: v_mov_b32_e32 v1, s21 -; GFX8-NEXT: v_mov_b32_e32 v2, s22 -; GFX8-NEXT: v_mov_b32_e32 v3, s23 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp -; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], s17, v1 clamp -; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], s18, v2 clamp -; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], s19, v3 clamp +; GFX8-NEXT: s_max_u32 s4, s16, s20 +; GFX8-NEXT: s_max_u32 s5, s17, s21 +; GFX8-NEXT: s_max_u32 s6, s18, s22 +; GFX8-NEXT: s_max_u32 s7, s19, s23 +; GFX8-NEXT: s_sub_i32 s4, s4, s20 +; GFX8-NEXT: s_sub_i32 s5, s5, s21 +; GFX8-NEXT: s_sub_i32 s6, s6, s22 +; GFX8-NEXT: s_sub_i32 s7, s7, s23 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: v_mov_b32_e32 v2, s6 +; GFX8-NEXT: v_mov_b32_e32 v3, s7 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v4i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_mov_b32_e32 v0, s20 -; GFX9-NEXT: v_mov_b32_e32 v1, s21 -; GFX9-NEXT: v_mov_b32_e32 v2, s22 -; GFX9-NEXT: v_mov_b32_e32 v3, s23 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp -; GFX9-NEXT: v_sub_u32_e64 v1, s17, v1 clamp -; GFX9-NEXT: v_sub_u32_e64 v2, s18, v2 clamp -; GFX9-NEXT: v_sub_u32_e64 v3, s19, v3 clamp +; GFX9-NEXT: s_max_u32 s4, s16, s20 +; GFX9-NEXT: s_max_u32 s5, s17, s21 +; GFX9-NEXT: s_max_u32 s6, s18, s22 +; GFX9-NEXT: s_max_u32 s7, s19, s23 +; GFX9-NEXT: s_sub_i32 s4, s4, s20 +; GFX9-NEXT: s_sub_i32 s5, s5, s21 +; GFX9-NEXT: s_sub_i32 s6, s6, s22 +; GFX9-NEXT: s_sub_i32 s7, s7, s23 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: v_mov_b32_e32 v2, s6 +; GFX9-NEXT: v_mov_b32_e32 v3, s7 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_v4i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s20 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v1, s17, s21 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v2, s18, s22 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v3, s19, s23 clamp +; GFX10-NEXT: s_max_u32 s4, s16, s20 +; GFX10-NEXT: s_max_u32 s5, s17, s21 +; GFX10-NEXT: s_max_u32 s6, s18, s22 +; GFX10-NEXT: s_max_u32 s7, s19, s23 +; GFX10-NEXT: s_sub_i32 s4, s4, s20 +; GFX10-NEXT: s_sub_i32 s5, s5, s21 +; GFX10-NEXT: s_sub_i32 s6, s6, s22 +; GFX10-NEXT: s_sub_i32 s7, s7, s23 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: v_mov_b32_e32 v2, s6 +; GFX10-NEXT: v_mov_b32_e32 v3, s7 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_v4i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s16 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v1, s1, s17 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v2, s2, s18 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v3, s3, s19 clamp +; GFX11-NEXT: s_max_u32 s0, s0, s16 +; GFX11-NEXT: s_max_u32 s1, s1, s17 +; GFX11-NEXT: s_max_u32 s2, s2, s18 +; GFX11-NEXT: s_max_u32 s3, s3, s19 +; GFX11-NEXT: s_sub_i32 s0, s0, s16 +; GFX11-NEXT: s_sub_i32 s1, s1, s17 +; GFX11-NEXT: s_sub_i32 s2, s2, s18 +; GFX11-NEXT: s_sub_i32 s3, s3, s19 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ret <4 x i32> %result @@ -1204,47 +1293,63 @@ define <8 x i32> @s_usubsat_v8i32(<8 x i32> inreg %lhs, <8 x i32> inreg %rhs) { ; GFX8-LABEL: s_usubsat_v8i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_readfirstlane_b32 s6, v1 -; GFX8-NEXT: v_readfirstlane_b32 s7, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, s24 -; GFX8-NEXT: v_mov_b32_e32 v1, s25 -; GFX8-NEXT: v_mov_b32_e32 v2, s26 -; GFX8-NEXT: v_mov_b32_e32 v3, s27 -; GFX8-NEXT: v_mov_b32_e32 v4, s28 -; GFX8-NEXT: v_mov_b32_e32 v5, s29 -; GFX8-NEXT: v_mov_b32_e32 v6, s7 -; GFX8-NEXT: v_mov_b32_e32 v7, s6 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp -; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], s17, v1 clamp -; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], s18, v2 clamp -; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], s19, v3 clamp -; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], s20, v4 clamp -; GFX8-NEXT: v_sub_u32_e64 v5, s[4:5], s21, v5 clamp -; GFX8-NEXT: v_sub_u32_e64 v6, s[4:5], s22, v6 clamp -; GFX8-NEXT: v_sub_u32_e64 v7, s[4:5], s23, v7 clamp +; GFX8-NEXT: v_readfirstlane_b32 s5, v0 +; GFX8-NEXT: v_readfirstlane_b32 s4, v1 +; GFX8-NEXT: s_max_u32 s12, s22, s5 +; GFX8-NEXT: s_max_u32 s6, s16, s24 +; GFX8-NEXT: s_max_u32 s7, s17, s25 +; GFX8-NEXT: s_max_u32 s8, s18, s26 +; GFX8-NEXT: s_max_u32 s9, s19, s27 +; GFX8-NEXT: s_max_u32 s10, s20, s28 +; GFX8-NEXT: s_max_u32 s11, s21, s29 +; GFX8-NEXT: s_sub_i32 s5, s12, s5 +; GFX8-NEXT: s_max_u32 s12, s23, s4 +; GFX8-NEXT: s_sub_i32 s6, s6, s24 +; GFX8-NEXT: s_sub_i32 s7, s7, s25 +; GFX8-NEXT: s_sub_i32 s8, s8, s26 +; GFX8-NEXT: s_sub_i32 s9, s9, s27 +; GFX8-NEXT: s_sub_i32 s10, s10, s28 +; GFX8-NEXT: s_sub_i32 s11, s11, s29 +; GFX8-NEXT: s_sub_i32 s4, s12, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: v_mov_b32_e32 v1, s7 +; GFX8-NEXT: v_mov_b32_e32 v2, s8 +; GFX8-NEXT: v_mov_b32_e32 v3, s9 +; GFX8-NEXT: v_mov_b32_e32 v4, s10 +; GFX8-NEXT: v_mov_b32_e32 v5, s11 +; GFX8-NEXT: v_mov_b32_e32 v6, s5 +; GFX8-NEXT: v_mov_b32_e32 v7, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v8i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: v_readfirstlane_b32 s5, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s24 -; GFX9-NEXT: v_mov_b32_e32 v1, s25 -; GFX9-NEXT: v_mov_b32_e32 v2, s26 -; GFX9-NEXT: v_mov_b32_e32 v3, s27 -; GFX9-NEXT: v_mov_b32_e32 v4, s28 -; GFX9-NEXT: v_mov_b32_e32 v5, s29 +; GFX9-NEXT: v_readfirstlane_b32 s4, v1 +; GFX9-NEXT: s_max_u32 s12, s22, s5 +; GFX9-NEXT: s_max_u32 s6, s16, s24 +; GFX9-NEXT: s_max_u32 s7, s17, s25 +; GFX9-NEXT: s_max_u32 s8, s18, s26 +; GFX9-NEXT: s_max_u32 s9, s19, s27 +; GFX9-NEXT: s_max_u32 s10, s20, s28 +; GFX9-NEXT: s_max_u32 s11, s21, s29 +; GFX9-NEXT: s_sub_i32 s5, s12, s5 +; GFX9-NEXT: s_max_u32 s12, s23, s4 +; GFX9-NEXT: s_sub_i32 s6, s6, s24 +; GFX9-NEXT: s_sub_i32 s7, s7, s25 +; GFX9-NEXT: s_sub_i32 s8, s8, s26 +; GFX9-NEXT: s_sub_i32 s9, s9, s27 +; GFX9-NEXT: s_sub_i32 s10, s10, s28 +; GFX9-NEXT: s_sub_i32 s11, s11, s29 +; GFX9-NEXT: s_sub_i32 s4, s12, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: v_mov_b32_e32 v1, s7 +; GFX9-NEXT: v_mov_b32_e32 v2, s8 +; GFX9-NEXT: v_mov_b32_e32 v3, s9 +; GFX9-NEXT: v_mov_b32_e32 v4, s10 +; GFX9-NEXT: v_mov_b32_e32 v5, s11 ; GFX9-NEXT: v_mov_b32_e32 v6, s5 ; GFX9-NEXT: v_mov_b32_e32 v7, s4 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp -; GFX9-NEXT: v_sub_u32_e64 v1, s17, v1 clamp -; GFX9-NEXT: v_sub_u32_e64 v2, s18, v2 clamp -; GFX9-NEXT: v_sub_u32_e64 v3, s19, v3 clamp -; GFX9-NEXT: v_sub_u32_e64 v4, s20, v4 clamp -; GFX9-NEXT: v_sub_u32_e64 v5, s21, v5 clamp -; GFX9-NEXT: v_sub_u32_e64 v6, s22, v6 clamp -; GFX9-NEXT: v_sub_u32_e64 v7, s23, v7 clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_v8i32: @@ -1252,27 +1357,55 @@ define <8 x i32> @s_usubsat_v8i32(<8 x i32> inreg %lhs, <8 x i32> inreg %rhs) { ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-NEXT: v_readfirstlane_b32 s4, v1 ; GFX10-NEXT: v_readfirstlane_b32 s5, v0 -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s24 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v1, s17, s25 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v2, s18, s26 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v3, s19, s27 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v4, s20, s28 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v5, s21, s29 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v6, s22, s5 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v7, s23, s4 clamp +; GFX10-NEXT: s_max_u32 s6, s16, s24 +; GFX10-NEXT: s_max_u32 s7, s17, s25 +; GFX10-NEXT: s_max_u32 s8, s18, s26 +; GFX10-NEXT: s_max_u32 s9, s19, s27 +; GFX10-NEXT: s_max_u32 s10, s20, s28 +; GFX10-NEXT: s_max_u32 s11, s21, s29 +; GFX10-NEXT: s_max_u32 s12, s22, s5 +; GFX10-NEXT: s_max_u32 s13, s23, s4 +; GFX10-NEXT: s_sub_i32 s6, s6, s24 +; GFX10-NEXT: s_sub_i32 s7, s7, s25 +; GFX10-NEXT: s_sub_i32 s8, s8, s26 +; GFX10-NEXT: s_sub_i32 s9, s9, s27 +; GFX10-NEXT: s_sub_i32 s10, s10, s28 +; GFX10-NEXT: s_sub_i32 s11, s11, s29 +; GFX10-NEXT: s_sub_i32 s5, s12, s5 +; GFX10-NEXT: s_sub_i32 s4, s13, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s6 +; GFX10-NEXT: v_mov_b32_e32 v1, s7 +; GFX10-NEXT: v_mov_b32_e32 v2, s8 +; GFX10-NEXT: v_mov_b32_e32 v3, s9 +; GFX10-NEXT: v_mov_b32_e32 v4, s10 +; GFX10-NEXT: v_mov_b32_e32 v5, s11 +; GFX10-NEXT: v_mov_b32_e32 v6, s5 +; GFX10-NEXT: v_mov_b32_e32 v7, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_v8i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s20 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v1, s1, s21 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v2, s2, s22 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v3, s3, s23 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v4, s16, s24 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v5, s17, s25 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v6, s18, s26 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v7, s19, s27 clamp +; GFX11-NEXT: s_max_u32 s0, s0, s20 +; GFX11-NEXT: s_max_u32 s1, s1, s21 +; GFX11-NEXT: s_max_u32 s2, s2, s22 +; GFX11-NEXT: s_max_u32 s3, s3, s23 +; GFX11-NEXT: s_max_u32 s4, s16, s24 +; GFX11-NEXT: s_max_u32 s5, s17, s25 +; GFX11-NEXT: s_max_u32 s6, s18, s26 +; GFX11-NEXT: s_max_u32 s7, s19, s27 +; GFX11-NEXT: s_sub_i32 s0, s0, s20 +; GFX11-NEXT: s_sub_i32 s1, s1, s21 +; GFX11-NEXT: s_sub_i32 s2, s2, s22 +; GFX11-NEXT: s_sub_i32 s3, s3, s23 +; GFX11-NEXT: s_sub_i32 s4, s4, s24 +; GFX11-NEXT: s_sub_i32 s5, s5, s25 +; GFX11-NEXT: s_sub_i32 s6, s6, s26 +; GFX11-NEXT: s_sub_i32 s7, s7, s27 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 +; GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %lhs, <8 x i32> %rhs) ret <8 x i32> %result @@ -1417,85 +1550,133 @@ define <16 x i32> @s_usubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs ; GFX8-LABEL: s_usubsat_v16i32: ; GFX8: ; %bb.0: ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-NEXT: v_readfirstlane_b32 s6, v17 -; GFX8-NEXT: v_readfirstlane_b32 s8, v16 -; GFX8-NEXT: v_readfirstlane_b32 s10, v15 -; GFX8-NEXT: v_readfirstlane_b32 s11, v14 -; GFX8-NEXT: v_readfirstlane_b32 s12, v13 -; GFX8-NEXT: v_readfirstlane_b32 s13, v12 -; GFX8-NEXT: v_readfirstlane_b32 s14, v11 -; GFX8-NEXT: v_readfirstlane_b32 s15, v10 -; GFX8-NEXT: v_readfirstlane_b32 s40, v9 -; GFX8-NEXT: v_readfirstlane_b32 s41, v8 -; GFX8-NEXT: v_readfirstlane_b32 s42, v7 -; GFX8-NEXT: v_readfirstlane_b32 s43, v6 -; GFX8-NEXT: v_readfirstlane_b32 s44, v5 -; GFX8-NEXT: v_readfirstlane_b32 s45, v4 -; GFX8-NEXT: v_readfirstlane_b32 s46, v3 -; GFX8-NEXT: v_readfirstlane_b32 s4, v2 -; GFX8-NEXT: v_readfirstlane_b32 s7, v1 -; GFX8-NEXT: v_readfirstlane_b32 s9, v0 -; GFX8-NEXT: v_mov_b32_e32 v0, s4 -; GFX8-NEXT: v_mov_b32_e32 v1, s46 -; GFX8-NEXT: v_mov_b32_e32 v2, s45 -; GFX8-NEXT: v_mov_b32_e32 v3, s44 -; GFX8-NEXT: v_mov_b32_e32 v4, s43 -; GFX8-NEXT: v_mov_b32_e32 v5, s42 -; GFX8-NEXT: v_mov_b32_e32 v6, s41 -; GFX8-NEXT: v_mov_b32_e32 v7, s40 -; GFX8-NEXT: v_mov_b32_e32 v8, s15 -; GFX8-NEXT: v_mov_b32_e32 v9, s14 -; GFX8-NEXT: v_mov_b32_e32 v10, s13 -; GFX8-NEXT: v_mov_b32_e32 v11, s12 -; GFX8-NEXT: v_mov_b32_e32 v12, s11 -; GFX8-NEXT: v_mov_b32_e32 v13, s10 -; GFX8-NEXT: v_mov_b32_e32 v14, s8 -; GFX8-NEXT: v_mov_b32_e32 v15, s6 -; GFX8-NEXT: v_sub_u32_e64 v0, s[4:5], s16, v0 clamp -; GFX8-NEXT: v_sub_u32_e64 v1, s[4:5], s17, v1 clamp -; GFX8-NEXT: v_sub_u32_e64 v2, s[4:5], s18, v2 clamp -; GFX8-NEXT: v_sub_u32_e64 v3, s[4:5], s19, v3 clamp -; GFX8-NEXT: v_sub_u32_e64 v4, s[4:5], s20, v4 clamp -; GFX8-NEXT: v_sub_u32_e64 v5, s[4:5], s21, v5 clamp -; GFX8-NEXT: v_sub_u32_e64 v6, s[4:5], s22, v6 clamp -; GFX8-NEXT: v_sub_u32_e64 v7, s[4:5], s23, v7 clamp -; GFX8-NEXT: v_sub_u32_e64 v8, s[4:5], s24, v8 clamp -; GFX8-NEXT: v_sub_u32_e64 v9, s[4:5], s25, v9 clamp -; GFX8-NEXT: v_sub_u32_e64 v10, s[4:5], s26, v10 clamp -; GFX8-NEXT: v_sub_u32_e64 v11, s[4:5], s27, v11 clamp -; GFX8-NEXT: v_sub_u32_e64 v12, s[4:5], s28, v12 clamp -; GFX8-NEXT: v_sub_u32_e64 v13, s[4:5], s29, v13 clamp -; GFX8-NEXT: v_sub_u32_e64 v14, s[4:5], s9, v14 clamp -; GFX8-NEXT: v_sub_u32_e64 v15, s[4:5], s7, v15 clamp +; GFX8-NEXT: v_readfirstlane_b32 s15, v8 +; GFX8-NEXT: v_readfirstlane_b32 s14, v9 +; GFX8-NEXT: s_max_u32 s22, s22, s15 +; GFX8-NEXT: v_readfirstlane_b32 s13, v10 +; GFX8-NEXT: s_sub_i32 s15, s22, s15 +; GFX8-NEXT: s_max_u32 s22, s23, s14 +; GFX8-NEXT: v_readfirstlane_b32 s12, v11 +; GFX8-NEXT: s_sub_i32 s14, s22, s14 +; GFX8-NEXT: s_max_u32 s22, s24, s13 +; GFX8-NEXT: v_readfirstlane_b32 s11, v12 +; GFX8-NEXT: s_sub_i32 s13, s22, s13 +; GFX8-NEXT: s_max_u32 s22, s25, s12 +; GFX8-NEXT: v_readfirstlane_b32 s10, v13 +; GFX8-NEXT: s_sub_i32 s12, s22, s12 +; GFX8-NEXT: s_max_u32 s22, s26, s11 +; GFX8-NEXT: v_readfirstlane_b32 s9, v14 +; GFX8-NEXT: s_sub_i32 s11, s22, s11 +; GFX8-NEXT: s_max_u32 s22, s27, s10 +; GFX8-NEXT: v_readfirstlane_b32 s4, v17 +; GFX8-NEXT: v_readfirstlane_b32 s5, v1 +; GFX8-NEXT: v_readfirstlane_b32 s6, v16 +; GFX8-NEXT: v_readfirstlane_b32 s7, v0 +; GFX8-NEXT: v_readfirstlane_b32 s8, v15 +; GFX8-NEXT: v_readfirstlane_b32 s40, v7 +; GFX8-NEXT: v_readfirstlane_b32 s41, v6 +; GFX8-NEXT: v_readfirstlane_b32 s42, v5 +; GFX8-NEXT: v_readfirstlane_b32 s43, v4 +; GFX8-NEXT: v_readfirstlane_b32 s44, v3 +; GFX8-NEXT: v_readfirstlane_b32 s45, v2 +; GFX8-NEXT: s_sub_i32 s10, s22, s10 +; GFX8-NEXT: s_max_u32 s22, s28, s9 +; GFX8-NEXT: s_max_u32 s16, s16, s45 +; GFX8-NEXT: s_max_u32 s17, s17, s44 +; GFX8-NEXT: s_max_u32 s18, s18, s43 +; GFX8-NEXT: s_max_u32 s19, s19, s42 +; GFX8-NEXT: s_max_u32 s20, s20, s41 +; GFX8-NEXT: s_max_u32 s21, s21, s40 +; GFX8-NEXT: s_sub_i32 s9, s22, s9 +; GFX8-NEXT: s_max_u32 s22, s29, s8 +; GFX8-NEXT: s_max_u32 s7, s7, s6 +; GFX8-NEXT: s_max_u32 s5, s5, s4 +; GFX8-NEXT: s_sub_i32 s16, s16, s45 +; GFX8-NEXT: s_sub_i32 s17, s17, s44 +; GFX8-NEXT: s_sub_i32 s18, s18, s43 +; GFX8-NEXT: s_sub_i32 s19, s19, s42 +; GFX8-NEXT: s_sub_i32 s20, s20, s41 +; GFX8-NEXT: s_sub_i32 s21, s21, s40 +; GFX8-NEXT: s_sub_i32 s8, s22, s8 +; GFX8-NEXT: s_sub_i32 s6, s7, s6 +; GFX8-NEXT: s_sub_i32 s4, s5, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s16 +; GFX8-NEXT: v_mov_b32_e32 v1, s17 +; GFX8-NEXT: v_mov_b32_e32 v2, s18 +; GFX8-NEXT: v_mov_b32_e32 v3, s19 +; GFX8-NEXT: v_mov_b32_e32 v4, s20 +; GFX8-NEXT: v_mov_b32_e32 v5, s21 +; GFX8-NEXT: v_mov_b32_e32 v6, s15 +; GFX8-NEXT: v_mov_b32_e32 v7, s14 +; GFX8-NEXT: v_mov_b32_e32 v8, s13 +; GFX8-NEXT: v_mov_b32_e32 v9, s12 +; GFX8-NEXT: v_mov_b32_e32 v10, s11 +; GFX8-NEXT: v_mov_b32_e32 v11, s10 +; GFX8-NEXT: v_mov_b32_e32 v12, s9 +; GFX8-NEXT: v_mov_b32_e32 v13, s8 +; GFX8-NEXT: v_mov_b32_e32 v14, s6 +; GFX8-NEXT: v_mov_b32_e32 v15, s4 ; GFX8-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-LABEL: s_usubsat_v16i32: ; GFX9: ; %bb.0: ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_readfirstlane_b32 s15, v8 +; GFX9-NEXT: v_readfirstlane_b32 s14, v9 +; GFX9-NEXT: s_max_u32 s22, s22, s15 +; GFX9-NEXT: v_readfirstlane_b32 s13, v10 +; GFX9-NEXT: s_sub_i32 s15, s22, s15 +; GFX9-NEXT: s_max_u32 s22, s23, s14 +; GFX9-NEXT: v_readfirstlane_b32 s12, v11 +; GFX9-NEXT: s_sub_i32 s14, s22, s14 +; GFX9-NEXT: s_max_u32 s22, s24, s13 +; GFX9-NEXT: v_readfirstlane_b32 s11, v12 +; GFX9-NEXT: s_sub_i32 s13, s22, s13 +; GFX9-NEXT: s_max_u32 s22, s25, s12 +; GFX9-NEXT: v_readfirstlane_b32 s10, v13 +; GFX9-NEXT: s_sub_i32 s12, s22, s12 +; GFX9-NEXT: s_max_u32 s22, s26, s11 +; GFX9-NEXT: v_readfirstlane_b32 s9, v14 +; GFX9-NEXT: s_sub_i32 s11, s22, s11 +; GFX9-NEXT: s_max_u32 s22, s27, s10 ; GFX9-NEXT: v_readfirstlane_b32 s4, v17 +; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_readfirstlane_b32 s6, v16 +; GFX9-NEXT: v_readfirstlane_b32 s7, v0 ; GFX9-NEXT: v_readfirstlane_b32 s8, v15 -; GFX9-NEXT: v_readfirstlane_b32 s9, v14 -; GFX9-NEXT: v_readfirstlane_b32 s10, v13 -; GFX9-NEXT: v_readfirstlane_b32 s11, v12 -; GFX9-NEXT: v_readfirstlane_b32 s12, v11 -; GFX9-NEXT: v_readfirstlane_b32 s13, v10 -; GFX9-NEXT: v_readfirstlane_b32 s14, v9 -; GFX9-NEXT: v_readfirstlane_b32 s15, v8 ; GFX9-NEXT: v_readfirstlane_b32 s40, v7 ; GFX9-NEXT: v_readfirstlane_b32 s41, v6 ; GFX9-NEXT: v_readfirstlane_b32 s42, v5 ; GFX9-NEXT: v_readfirstlane_b32 s43, v4 ; GFX9-NEXT: v_readfirstlane_b32 s44, v3 ; GFX9-NEXT: v_readfirstlane_b32 s45, v2 -; GFX9-NEXT: v_readfirstlane_b32 s5, v1 -; GFX9-NEXT: v_readfirstlane_b32 s7, v0 -; GFX9-NEXT: v_mov_b32_e32 v0, s45 -; GFX9-NEXT: v_mov_b32_e32 v1, s44 -; GFX9-NEXT: v_mov_b32_e32 v2, s43 -; GFX9-NEXT: v_mov_b32_e32 v3, s42 -; GFX9-NEXT: v_mov_b32_e32 v4, s41 -; GFX9-NEXT: v_mov_b32_e32 v5, s40 +; GFX9-NEXT: s_sub_i32 s10, s22, s10 +; GFX9-NEXT: s_max_u32 s22, s28, s9 +; GFX9-NEXT: s_max_u32 s16, s16, s45 +; GFX9-NEXT: s_max_u32 s17, s17, s44 +; GFX9-NEXT: s_max_u32 s18, s18, s43 +; GFX9-NEXT: s_max_u32 s19, s19, s42 +; GFX9-NEXT: s_max_u32 s20, s20, s41 +; GFX9-NEXT: s_max_u32 s21, s21, s40 +; GFX9-NEXT: s_sub_i32 s9, s22, s9 +; GFX9-NEXT: s_max_u32 s22, s29, s8 +; GFX9-NEXT: s_max_u32 s7, s7, s6 +; GFX9-NEXT: s_max_u32 s5, s5, s4 +; GFX9-NEXT: s_sub_i32 s16, s16, s45 +; GFX9-NEXT: s_sub_i32 s17, s17, s44 +; GFX9-NEXT: s_sub_i32 s18, s18, s43 +; GFX9-NEXT: s_sub_i32 s19, s19, s42 +; GFX9-NEXT: s_sub_i32 s20, s20, s41 +; GFX9-NEXT: s_sub_i32 s21, s21, s40 +; GFX9-NEXT: s_sub_i32 s8, s22, s8 +; GFX9-NEXT: s_sub_i32 s6, s7, s6 +; GFX9-NEXT: s_sub_i32 s4, s5, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: v_mov_b32_e32 v1, s17 +; GFX9-NEXT: v_mov_b32_e32 v2, s18 +; GFX9-NEXT: v_mov_b32_e32 v3, s19 +; GFX9-NEXT: v_mov_b32_e32 v4, s20 +; GFX9-NEXT: v_mov_b32_e32 v5, s21 ; GFX9-NEXT: v_mov_b32_e32 v6, s15 ; GFX9-NEXT: v_mov_b32_e32 v7, s14 ; GFX9-NEXT: v_mov_b32_e32 v8, s13 @@ -1506,96 +1687,136 @@ define <16 x i32> @s_usubsat_v16i32(<16 x i32> inreg %lhs, <16 x i32> inreg %rhs ; GFX9-NEXT: v_mov_b32_e32 v13, s8 ; GFX9-NEXT: v_mov_b32_e32 v14, s6 ; GFX9-NEXT: v_mov_b32_e32 v15, s4 -; GFX9-NEXT: v_sub_u32_e64 v0, s16, v0 clamp -; GFX9-NEXT: v_sub_u32_e64 v1, s17, v1 clamp -; GFX9-NEXT: v_sub_u32_e64 v2, s18, v2 clamp -; GFX9-NEXT: v_sub_u32_e64 v3, s19, v3 clamp -; GFX9-NEXT: v_sub_u32_e64 v4, s20, v4 clamp -; GFX9-NEXT: v_sub_u32_e64 v5, s21, v5 clamp -; GFX9-NEXT: v_sub_u32_e64 v6, s22, v6 clamp -; GFX9-NEXT: v_sub_u32_e64 v7, s23, v7 clamp -; GFX9-NEXT: v_sub_u32_e64 v8, s24, v8 clamp -; GFX9-NEXT: v_sub_u32_e64 v9, s25, v9 clamp -; GFX9-NEXT: v_sub_u32_e64 v10, s26, v10 clamp -; GFX9-NEXT: v_sub_u32_e64 v11, s27, v11 clamp -; GFX9-NEXT: v_sub_u32_e64 v12, s28, v12 clamp -; GFX9-NEXT: v_sub_u32_e64 v13, s29, v13 clamp -; GFX9-NEXT: v_sub_u32_e64 v14, s7, v14 clamp -; GFX9-NEXT: v_sub_u32_e64 v15, s5, v15 clamp ; GFX9-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: s_usubsat_v16i32: ; GFX10: ; %bb.0: ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_readfirstlane_b32 s14, v9 +; GFX10-NEXT: v_readfirstlane_b32 s15, v8 +; GFX10-NEXT: v_readfirstlane_b32 s12, v11 +; GFX10-NEXT: v_readfirstlane_b32 s13, v10 +; GFX10-NEXT: v_readfirstlane_b32 s10, v13 +; GFX10-NEXT: v_readfirstlane_b32 s11, v12 +; GFX10-NEXT: s_max_u32 s22, s22, s15 +; GFX10-NEXT: s_max_u32 s23, s23, s14 ; GFX10-NEXT: v_readfirstlane_b32 s4, v17 ; GFX10-NEXT: v_readfirstlane_b32 s5, v1 ; GFX10-NEXT: v_readfirstlane_b32 s6, v16 ; GFX10-NEXT: v_readfirstlane_b32 s7, v0 ; GFX10-NEXT: v_readfirstlane_b32 s8, v15 ; GFX10-NEXT: v_readfirstlane_b32 s9, v14 -; GFX10-NEXT: v_readfirstlane_b32 s10, v13 -; GFX10-NEXT: v_readfirstlane_b32 s11, v12 -; GFX10-NEXT: v_readfirstlane_b32 s12, v11 -; GFX10-NEXT: v_readfirstlane_b32 s13, v10 -; GFX10-NEXT: v_readfirstlane_b32 s14, v9 -; GFX10-NEXT: v_readfirstlane_b32 s15, v8 ; GFX10-NEXT: v_readfirstlane_b32 s40, v7 ; GFX10-NEXT: v_readfirstlane_b32 s41, v2 ; GFX10-NEXT: v_readfirstlane_b32 s42, v3 -; GFX10-NEXT: v_readfirstlane_b32 s43, v4 +; GFX10-NEXT: v_readfirstlane_b32 s43, v6 ; GFX10-NEXT: v_readfirstlane_b32 s44, v5 -; GFX10-NEXT: v_readfirstlane_b32 s45, v6 -; GFX10-NEXT: v_sub_nc_u32_e64 v0, s16, s41 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v1, s17, s42 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v2, s18, s43 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v3, s19, s44 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v4, s20, s45 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v5, s21, s40 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v6, s22, s15 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v7, s23, s14 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v8, s24, s13 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v9, s25, s12 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v10, s26, s11 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v11, s27, s10 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v12, s28, s9 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v13, s29, s8 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v14, s7, s6 clamp -; GFX10-NEXT: v_sub_nc_u32_e64 v15, s5, s4 clamp +; GFX10-NEXT: v_readfirstlane_b32 s45, v4 +; GFX10-NEXT: s_sub_i32 s15, s22, s15 +; GFX10-NEXT: s_sub_i32 s14, s23, s14 +; GFX10-NEXT: s_max_u32 s22, s24, s13 +; GFX10-NEXT: s_max_u32 s23, s25, s12 +; GFX10-NEXT: s_sub_i32 s13, s22, s13 +; GFX10-NEXT: s_sub_i32 s12, s23, s12 +; GFX10-NEXT: s_max_u32 s22, s26, s11 +; GFX10-NEXT: s_max_u32 s23, s27, s10 +; GFX10-NEXT: s_max_u32 s16, s16, s41 +; GFX10-NEXT: s_max_u32 s17, s17, s42 +; GFX10-NEXT: s_max_u32 s18, s18, s45 +; GFX10-NEXT: s_max_u32 s19, s19, s44 +; GFX10-NEXT: s_max_u32 s20, s20, s43 +; GFX10-NEXT: s_max_u32 s21, s21, s40 +; GFX10-NEXT: s_sub_i32 s11, s22, s11 +; GFX10-NEXT: s_sub_i32 s10, s23, s10 +; GFX10-NEXT: s_max_u32 s22, s28, s9 +; GFX10-NEXT: s_max_u32 s23, s29, s8 +; GFX10-NEXT: s_max_u32 s7, s7, s6 +; GFX10-NEXT: s_max_u32 s5, s5, s4 +; GFX10-NEXT: s_sub_i32 s16, s16, s41 +; GFX10-NEXT: s_sub_i32 s17, s17, s42 +; GFX10-NEXT: s_sub_i32 s18, s18, s45 +; GFX10-NEXT: s_sub_i32 s19, s19, s44 +; GFX10-NEXT: s_sub_i32 s20, s20, s43 +; GFX10-NEXT: s_sub_i32 s21, s21, s40 +; GFX10-NEXT: s_sub_i32 s9, s22, s9 +; GFX10-NEXT: s_sub_i32 s8, s23, s8 +; GFX10-NEXT: s_sub_i32 s6, s7, s6 +; GFX10-NEXT: s_sub_i32 s4, s5, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +; GFX10-NEXT: v_mov_b32_e32 v1, s17 +; GFX10-NEXT: v_mov_b32_e32 v2, s18 +; GFX10-NEXT: v_mov_b32_e32 v3, s19 +; GFX10-NEXT: v_mov_b32_e32 v4, s20 +; GFX10-NEXT: v_mov_b32_e32 v5, s21 +; GFX10-NEXT: v_mov_b32_e32 v6, s15 +; GFX10-NEXT: v_mov_b32_e32 v7, s14 +; GFX10-NEXT: v_mov_b32_e32 v8, s13 +; GFX10-NEXT: v_mov_b32_e32 v9, s12 +; GFX10-NEXT: v_mov_b32_e32 v10, s11 +; GFX10-NEXT: v_mov_b32_e32 v11, s10 +; GFX10-NEXT: v_mov_b32_e32 v12, s9 +; GFX10-NEXT: v_mov_b32_e32 v13, s8 +; GFX10-NEXT: v_mov_b32_e32 v14, s6 +; GFX10-NEXT: v_mov_b32_e32 v15, s4 ; GFX10-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: s_usubsat_v16i32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s4, v13 -; GFX11-NEXT: v_readfirstlane_b32 s5, v12 -; GFX11-NEXT: v_readfirstlane_b32 s6, v11 -; GFX11-NEXT: v_readfirstlane_b32 s7, v10 -; GFX11-NEXT: v_readfirstlane_b32 s8, v9 -; GFX11-NEXT: v_readfirstlane_b32 s9, v8 -; GFX11-NEXT: v_readfirstlane_b32 s10, v7 -; GFX11-NEXT: v_readfirstlane_b32 s11, v6 +; GFX11-NEXT: v_readfirstlane_b32 s14, v3 +; GFX11-NEXT: v_readfirstlane_b32 s15, v2 ; GFX11-NEXT: v_readfirstlane_b32 s12, v5 ; GFX11-NEXT: v_readfirstlane_b32 s13, v4 -; GFX11-NEXT: v_readfirstlane_b32 s14, v3 -; GFX11-NEXT: v_readfirstlane_b32 s15, v0 +; GFX11-NEXT: v_readfirstlane_b32 s10, v7 +; GFX11-NEXT: v_readfirstlane_b32 s11, v6 +; GFX11-NEXT: s_max_u32 s16, s16, s15 +; GFX11-NEXT: s_max_u32 s17, s17, s14 +; GFX11-NEXT: v_readfirstlane_b32 s8, v9 +; GFX11-NEXT: v_readfirstlane_b32 s9, v8 +; GFX11-NEXT: s_sub_i32 s15, s16, s15 +; GFX11-NEXT: s_sub_i32 s14, s17, s14 +; GFX11-NEXT: s_max_u32 s16, s18, s13 +; GFX11-NEXT: s_max_u32 s17, s19, s12 +; GFX11-NEXT: v_readfirstlane_b32 s6, v11 +; GFX11-NEXT: v_readfirstlane_b32 s7, v10 +; GFX11-NEXT: s_sub_i32 s13, s16, s13 +; GFX11-NEXT: s_sub_i32 s12, s17, s12 +; GFX11-NEXT: s_max_u32 s16, s20, s11 +; GFX11-NEXT: s_max_u32 s17, s21, s10 +; GFX11-NEXT: v_readfirstlane_b32 s4, v13 +; GFX11-NEXT: v_readfirstlane_b32 s5, v12 ; GFX11-NEXT: v_readfirstlane_b32 s40, v1 -; GFX11-NEXT: v_readfirstlane_b32 s41, v2 -; GFX11-NEXT: v_sub_nc_u32_e64 v0, s0, s28 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v1, s1, s29 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v2, s2, s15 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v3, s3, s40 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v4, s16, s41 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v5, s17, s14 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v6, s18, s13 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v7, s19, s12 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v8, s20, s11 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v9, s21, s10 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v10, s22, s9 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v11, s23, s8 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v12, s24, s7 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v13, s25, s6 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v14, s26, s5 clamp -; GFX11-NEXT: v_sub_nc_u32_e64 v15, s27, s4 clamp +; GFX11-NEXT: v_readfirstlane_b32 s41, v0 +; GFX11-NEXT: s_sub_i32 s11, s16, s11 +; GFX11-NEXT: s_sub_i32 s10, s17, s10 +; GFX11-NEXT: s_max_u32 s16, s22, s9 +; GFX11-NEXT: s_max_u32 s17, s23, s8 +; GFX11-NEXT: s_sub_i32 s9, s16, s9 +; GFX11-NEXT: s_sub_i32 s8, s17, s8 +; GFX11-NEXT: s_max_u32 s16, s24, s7 +; GFX11-NEXT: s_max_u32 s17, s25, s6 +; GFX11-NEXT: s_max_u32 s0, s0, s28 +; GFX11-NEXT: s_max_u32 s1, s1, s29 +; GFX11-NEXT: s_max_u32 s2, s2, s41 +; GFX11-NEXT: s_max_u32 s3, s3, s40 +; GFX11-NEXT: s_sub_i32 s7, s16, s7 +; GFX11-NEXT: s_sub_i32 s6, s17, s6 +; GFX11-NEXT: s_max_u32 s16, s26, s5 +; GFX11-NEXT: s_max_u32 s17, s27, s4 +; GFX11-NEXT: s_sub_i32 s0, s0, s28 +; GFX11-NEXT: s_sub_i32 s1, s1, s29 +; GFX11-NEXT: s_sub_i32 s2, s2, s41 +; GFX11-NEXT: s_sub_i32 s3, s3, s40 +; GFX11-NEXT: s_sub_i32 s5, s16, s5 +; GFX11-NEXT: s_sub_i32 s4, s17, s4 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 +; GFX11-NEXT: v_dual_mov_b32 v4, s15 :: v_dual_mov_b32 v5, s14 +; GFX11-NEXT: v_dual_mov_b32 v6, s13 :: v_dual_mov_b32 v7, s12 +; GFX11-NEXT: v_dual_mov_b32 v8, s11 :: v_dual_mov_b32 v9, s10 +; GFX11-NEXT: v_dual_mov_b32 v10, s9 :: v_dual_mov_b32 v11, s8 +; GFX11-NEXT: v_dual_mov_b32 v12, s7 :: v_dual_mov_b32 v13, s6 +; GFX11-NEXT: v_dual_mov_b32 v14, s5 :: v_dual_mov_b32 v15, s4 ; GFX11-NEXT: s_setpc_b64 s[30:31] %result = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %lhs, <16 x i32> %rhs) ret <16 x i32> %result From 1530858fabbbd338fa88707763f4ee081c96d597 Mon Sep 17 00:00:00 2001 From: Jeremy Kun Date: Wed, 24 Jun 2026 11:47:11 -0700 Subject: [PATCH 390/511] [mlir][bazel]: Remove GPU dialect deps from MemRefTransforms (#205624) This change removes dead GPU dependencies (`GPUDialect` and `NVGPUDialect`) from the `MemRefTransforms` target. These dependencies are not needed by the transforms themselves and greatly increase the build time (e.g., NVVMDialect.cpp alone requires two minutes to build). This aligns the bazel build with the CMake configuration. --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 2 -- 1 file changed, 2 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index fd0bf0bcce72c..10e9741e417cc 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -13542,14 +13542,12 @@ cc_library( ":DestinationStyleOpInterface", ":DialectUtils", ":FuncDialect", - ":GPUDialect", ":IR", ":InferTypeOpInterface", ":LoopLikeInterface", ":MemRefDialect", ":MemRefPassIncGen", ":MemRefUtils", - ":NVGPUDialect", ":Pass", ":RuntimeVerifiableOpInterface", ":SCFDialect", From 59d3573bc054fe59c956f7dfd5bbe17b596f8d59 Mon Sep 17 00:00:00 2001 From: Fabian Parzefall Date: Wed, 24 Jun 2026 11:48:20 -0700 Subject: [PATCH 391/511] [BOLT] Replace partial instructions with traps in patched entries (#205211) Overwriting a function entry with a jump is likely to not perfectly align with the instruction stream. If the end of the patch does not fall onto an instruction boundary, the bytes following the jump are orphaned and will have nonsensical interpretations. This can leave other tools confused, especially since these orphaned bytes can decode to instructions that do not nicely rejoin the still intact part of the instructions stream. Overwrite these bytes with traps in the PatchEntry pass. Fixes #198455. --- bolt/include/bolt/Core/BinaryFunction.h | 8 +++++ bolt/include/bolt/Core/MCPlusBuilder.h | 5 ++++ bolt/include/bolt/Passes/PatchEntries.h | 1 + bolt/lib/Core/BinaryFunction.cpp | 23 +++++++++++++++ bolt/lib/Passes/PatchEntries.cpp | 37 ++++++++++++++++++++++-- bolt/lib/Target/X86/X86MCPlusBuilder.cpp | 5 ++++ bolt/test/X86/patch-entries.test | 9 ++++++ 7 files changed, 85 insertions(+), 3 deletions(-) diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h index 4ae8cfe372855..84fbd5661fd0a 100644 --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1025,6 +1025,14 @@ class BinaryFunction { std::optional disassembleInstructionAtOffset(uint64_t Offset) const; + /// Given a starting point \p Offset and a number of bytes \p MinLength, + /// returns the number of bytes \p MinLength + Tail such that the last + /// instruction in the sequence is not split apart. Returns std::nullopt if + /// disassembling fails. Assumes that \p Offset aligns with instruction stream + /// and that the instructions can be disassembled. + uint64_t getInstructionSequenceLength(uint64_t Offset, + uint64_t MinLength) const; + /// Return offset for the first instruction. If there is data at the /// beginning of a function then offset of the first instruction could /// be different from 0 diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index 76b4a5fe778c0..84b44a9ab5483 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1995,6 +1995,11 @@ class MCPlusBuilder { llvm_unreachable("not implemented"); } + /// Creates a breakpoint instruction in Inst. + virtual void createBreakpoint(MCInst &Inst) const { + llvm_unreachable("not implemented"); + } + /// Creates an instruction to bump the stack pointer just like a call. virtual void createStackPointerIncrement(MCInst &Inst, int Size = 8, bool NoFlagsClobber = false) const { diff --git a/bolt/include/bolt/Passes/PatchEntries.h b/bolt/include/bolt/Passes/PatchEntries.h index 04ec9165c2ff2..2f58ce08ea2e0 100644 --- a/bolt/include/bolt/Passes/PatchEntries.h +++ b/bolt/include/bolt/Passes/PatchEntries.h @@ -26,6 +26,7 @@ class PatchEntries : public BinaryFunctionPass { struct Patch { const MCSymbol *Symbol; uint64_t Address; + uint32_t PaddingAfter = 0; }; public: diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 79e92b79f6fee..200e286d8e80e 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1279,6 +1279,29 @@ BinaryFunction::disassembleInstructionAtOffset(uint64_t Offset) const { return std::nullopt; } +uint64_t +BinaryFunction::getInstructionSequenceLength(uint64_t Offset, + uint64_t MinLength) const { + assert(Offset + MinLength <= MaxSize && "Invalid offset / min length"); + ErrorOr> FunctionData = getData(); + assert(FunctionData && "Cannot get function as data"); + uint64_t Current = Offset; + const uint64_t Target = Offset + MinLength; + while (Current < Target) { + MCInst Instr; + uint64_t InstrSize = 0; + const uint64_t InstrAddress = getAddress() + Current; + [[maybe_unused]] MCDisassembler::DecodeStatus Res = + BC.DisAsm->getInstruction(Instr, InstrSize, + FunctionData->slice(Current), InstrAddress, + nulls()); + assert(Res != MCDisassembler::DecodeStatus::Fail && + "Function has been disassembled previously"); + Current += InstrSize; + } + return Current - Offset; +} + Error BinaryFunction::disassemble() { NamedRegionTimer T("disassemble", "Disassemble function", "buildfuncs", "Build Binary Functions", opts::TimeBuild); diff --git a/bolt/lib/Passes/PatchEntries.cpp b/bolt/lib/Passes/PatchEntries.cpp index 9af9e2ca3bdee..a051f4f8fe3c6 100644 --- a/bolt/lib/Passes/PatchEntries.cpp +++ b/bolt/lib/Passes/PatchEntries.cpp @@ -63,6 +63,12 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { BC.MIB->createLongTailCall(Seq, BC.Ctx->createTempSymbol(), BC.Ctx.get()); PatchSize = BC.computeCodeSize(Seq.begin(), Seq.end()); } + static size_t FillerSize = 0; + if (BC.isX86() && FillerSize == 0) { + std::array Seq; + BC.MIB->createBreakpoint(Seq[0]); + FillerSize = BC.computeCodeSize(Seq.begin(), Seq.end()); + } for (auto &BFI : BC.getBinaryFunctions()) { BinaryFunction &Function = BFI.second; @@ -91,8 +97,6 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { return false; } - PendingPatches.emplace_back( - Patch{Symbol, Function.getAddress() + Offset}); NextValidByte = Offset + PatchSize; if (NextValidByte > Function.getMaxSize()) { if (opts::Verbosity >= 1) @@ -101,6 +105,22 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { return false; } + const uint64_t PatchAddress = Function.getAddress() + Offset; + Patch P{Symbol, PatchAddress}; + + if (BC.isX86()) { + uint64_t OverwriteLength = + Function.getInstructionSequenceLength(Offset, PatchSize); + P.PaddingAfter = OverwriteLength - PatchSize; + assert(PendingPatches.empty() || + (PendingPatches.back().Address + PatchSize + + PendingPatches.back().PaddingAfter <= + PatchAddress) && + "Entry point cannot overlap with instruction stream of " + "previous entrypoint."); + } + + PendingPatches.emplace_back(P); return true; }); @@ -117,6 +137,16 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { // Add instruction patch to the binary. InstructionListType Instructions; BC.MIB->createLongTailCall(Instructions, Patch.Symbol, BC.Ctx.get()); + + if (BC.isX86()) { + assert(Patch.PaddingAfter % FillerSize == 0 && + "Padding must be multiple of filler size."); + llvm::MCInst Inst; + BC.MIB->createBreakpoint(Inst); + Instructions.resize( + Instructions.size() + Patch.PaddingAfter / FillerSize, Inst); + } + BinaryFunction *PatchFunction = BC.createInstructionPatch( Patch.Address, Instructions, NameResolver::append(Patch.Symbol->getName(), ".org.0")); @@ -128,7 +158,8 @@ Error PatchEntries::runOnFunctions(BinaryContext &BC) { uint64_t HotSize, ColdSize; std::tie(HotSize, ColdSize) = BC.calculateEmittedSize(*PatchFunction); assert(!ColdSize && "unexpected cold code"); - assert(HotSize <= PatchSize && "max patch size exceeded"); + assert(HotSize <= PatchSize + Patch.PaddingAfter && + "max patch size exceeded"); } } return Error::success(); diff --git a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp index 923de64be58c8..11a297f514530 100644 --- a/bolt/lib/Target/X86/X86MCPlusBuilder.cpp +++ b/bolt/lib/Target/X86/X86MCPlusBuilder.cpp @@ -2788,6 +2788,11 @@ class X86MCPlusBuilder : public MCPlusBuilder { Inst.setOpcode(X86::TRAP); } + void createBreakpoint(MCInst &Inst) const override { + Inst.clear(); + Inst.setOpcode(X86::INT3); + } + void createCondBranch(MCInst &Inst, const MCSymbol *Target, unsigned CC, MCContext *Ctx) const override { Inst.setOpcode(X86::JCC_1); diff --git a/bolt/test/X86/patch-entries.test b/bolt/test/X86/patch-entries.test index bf31af342dc61..6a7f3af30e80e 100644 --- a/bolt/test/X86/patch-entries.test +++ b/bolt/test/X86/patch-entries.test @@ -29,3 +29,12 @@ CHECK-FOO: 0000000000[[#%x,ORG:]] [[#%x,ORGSIZE:]] t foo.org.0 CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+ORGSIZE]] ## original FDE comes second CHECK-FOO: FDE {{.*}} pc=00[[#%x,ORG]]...00[[#%x,ORG+OPTSIZE]] + +## Check that incomplete instructions are replaced with int3: +RUN: llvm-objdump %t.out --disassemble-symbols=main.org.0 \ +RUN: | FileCheck %s --check-prefix=CHECK-NOP +CHECK-NOP: main.org.0 +CHECK-NOP-NEXT: jmp +CHECK-NOP-NEXT: int3 +CHECK-NOP-NEXT: int3 +CHECK-NOP-NEXT: int3 From 159ff5f256e065b07ebc871cf29e377dfe6166d7 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 24 Jun 2026 21:04:20 +0200 Subject: [PATCH 392/511] [VectorUtils] Discard predicates if no interleave groups are found. (#205424) If no interleave groups are found, the predicates won't be used and should be dropped. Update to use the new variant of getPtrStride which returns the added predicates (added in https://github.com/llvm/llvm-project/pull/203787). This enables only committing the predicates if there are any interleave groups. PR: https://github.com/llvm/llvm-project/pull/205424 --- llvm/include/llvm/Analysis/VectorUtils.h | 7 +++- llvm/lib/Analysis/VectorUtils.cpp | 12 ++++-- .../AArch64/discarded-interleave-group.ll | 42 ++++++++++++------- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h index b863a307b143e..e088371be9464 100644 --- a/llvm/include/llvm/Analysis/VectorUtils.h +++ b/llvm/include/llvm/Analysis/VectorUtils.h @@ -805,10 +805,13 @@ class InterleavedAccessInfo { delete Group; } - /// Collect all the accesses with a constant stride in program order. + /// Collect all the accesses with a constant stride in program order. Any + /// SCEV predicates needed to compute the strides are added to \p + /// Predicates. void collectConstStrideAccesses( MapVector &AccessStrideInfo, - const DenseMap &Strides); + const DenseMap &Strides, + SmallVectorImpl &Predicates); /// Returns true if \p Stride is allowed in an interleaved group. LLVM_ABI static bool isStrided(int Stride); diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp index ecded79e990f3..884229e09c6be 100644 --- a/llvm/lib/Analysis/VectorUtils.cpp +++ b/llvm/lib/Analysis/VectorUtils.cpp @@ -1309,7 +1309,8 @@ bool InterleavedAccessInfo::isStrided(int Stride) { void InterleavedAccessInfo::collectConstStrideAccesses( MapVector &AccessStrideInfo, - const DenseMap &Strides) { + const DenseMap &Strides, + SmallVectorImpl &Predicates) { auto &DL = TheLoop->getHeader()->getDataLayout(); // Since it's desired that the load/store instructions be maintained in @@ -1341,7 +1342,7 @@ void InterleavedAccessInfo::collectConstStrideAccesses( // even without the transformation. The wrapping checks are therefore // deferred until after we've formed the interleaved groups. int64_t Stride = getPtrStride(PSE, ElementTy, Ptr, TheLoop, *DT, Strides, - /*Assume=*/true, /*ShouldCheckWrap=*/false) + /*ShouldCheckWrap=*/false, &Predicates) .value_or(0); const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); @@ -1393,7 +1394,8 @@ void InterleavedAccessInfo::analyzeInterleaving( // Holds all accesses with a constant stride. MapVector AccessStrideInfo; - collectConstStrideAccesses(AccessStrideInfo, Strides); + SmallVector Predicates; + collectConstStrideAccesses(AccessStrideInfo, Strides, Predicates); if (AccessStrideInfo.empty()) return; @@ -1589,6 +1591,10 @@ void InterleavedAccessInfo::analyzeInterleaving( } // Iteration over A accesses. } // Iteration over B accesses. + // Commit the collected predicates to PSE if any candidate group was formed. + if (!LoadGroups.empty() || !StoreGroups.empty()) + PSE.addPredicates(Predicates); + auto InvalidateGroupIfMemberMayWrap = [&](InterleaveGroup *Group, int Index, const char *FirstOrLast) -> bool { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/discarded-interleave-group.ll b/llvm/test/Transforms/LoopVectorize/AArch64/discarded-interleave-group.ll index 4d21309776b40..1c79629ac1754 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/discarded-interleave-group.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/discarded-interleave-group.ll @@ -3,28 +3,42 @@ target triple = "arm64-apple-macosx" -; FIXME: should vectorize without predicates. define void @urem_lookup(ptr noalias %src, ptr noalias %dst, ptr noalias %tbl, i64 %N) #0 { ; CHECK-LABEL: define void @urem_lookup( ; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]], ptr noalias [[TBL:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP0]], 2 +; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[N]]) +; CHECK-NEXT: [[TMP2:%.*]] = call @llvm.stepvector.nxv4i64() +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TMP1]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[LOOP]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi [ [[ACTIVE_LANE_MASK_ENTRY]], %[[LOOP]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[TMP2]], %[[LOOP]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr inbounds float, ptr [[SRC]], i64 [[IV]] -; CHECK-NEXT: [[X:%.*]] = load float, ptr [[GEP_SRC]], align 4 -; CHECK-NEXT: [[CLAMPED:%.*]] = urem i64 [[IV]], 4 -; CHECK-NEXT: [[GEP_TBL:%.*]] = getelementptr inbounds float, ptr [[TBL]], i64 [[CLAMPED]] -; CHECK-NEXT: [[T:%.*]] = load float, ptr [[GEP_TBL]], align 4 -; CHECK-NEXT: [[M1:%.*]] = fmul float [[X]], [[T]] -; CHECK-NEXT: [[M2:%.*]] = fadd float [[M1]], [[X]] -; CHECK-NEXT: [[M3:%.*]] = fmul float [[M2]], [[T]] +; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4f32.p0(ptr align 4 [[GEP_SRC]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP4:%.*]] = urem [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TBL]], [[TMP4]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4f32.nxv4p0( align 4 [[TMP5]], [[ACTIVE_LANE_MASK]], poison) +; CHECK-NEXT: [[TMP6:%.*]] = fmul [[WIDE_MASKED_LOAD]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[TMP7:%.*]] = fadd [[TMP6]], [[WIDE_MASKED_LOAD]] +; CHECK-NEXT: [[TMP8:%.*]] = fmul [[TMP7]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds float, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store float [[M3]], ptr [[GEP_DST]], align 4 -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[COND]], label %[[EXIT:.*]], label %[[LOOP]] +; CHECK-NEXT: call void @llvm.masked.store.nxv4f32.p0( [[TMP8]], ptr align 4 [[GEP_DST]], [[ACTIVE_LANE_MASK]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[IV]], [[TMP1]] +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX_NEXT]], i64 [[N]]) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement [[ACTIVE_LANE_MASK_NEXT]], i64 0 +; CHECK-NEXT: [[TMP11:%.*]] = xor i1 [[TMP10]], true +; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: br i1 [[TMP11]], label %[[EXIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[EXIT]]: +; CHECK-NEXT: br label %[[EXIT1:.*]] +; CHECK: [[EXIT1]]: ; CHECK-NEXT: ret void ; entry: From d7c35503dfa7835e59fa4e3524ac25853fb168c2 Mon Sep 17 00:00:00 2001 From: Brian Cain Date: Wed, 24 Jun 2026 14:04:49 -0500 Subject: [PATCH 393/511] [tysan] Guard mallopt() use on SANITIZER_GLIBC instead of SANITIZER_LINUX (#203118) mallopt() is a glibc extension, not provided by other Linux C libraries such as musl, but its declaration and its use in InitializeInterceptors() are gated on SANITIZER_LINUX. Linking the TySan runtime for a musl target therefore fails: ld.lld: error: undefined symbol: mallopt >>> referenced by tysan_interceptors.cpp:215 in >>> __tysan::InitializeInterceptors() in libclang_rt.tysan-hexagon.a --- compiler-rt/lib/tysan/tysan_interceptors.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/tysan/tysan_interceptors.cpp b/compiler-rt/lib/tysan/tysan_interceptors.cpp index a93fcb4c3bcb0..095ce0e74bb8b 100644 --- a/compiler-rt/lib/tysan/tysan_interceptors.cpp +++ b/compiler-rt/lib/tysan/tysan_interceptors.cpp @@ -22,7 +22,7 @@ #define TYSAN_INTERCEPT___STRDUP 0 #endif -#if SANITIZER_LINUX +#if SANITIZER_GLIBC extern "C" int mallopt(int param, int value); #endif @@ -211,7 +211,7 @@ void InitializeInterceptors() { CHECK_EQ(inited, 0); // Instruct libc malloc to consume less memory. -#if SANITIZER_LINUX +#if SANITIZER_GLIBC mallopt(1, 0); // M_MXFAST mallopt(-3, 32 * 1024); // M_MMAP_THRESHOLD #endif From 95df8a7b10c16b1694fa552f07284e53608b9195 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Wed, 24 Jun 2026 15:12:48 -0400 Subject: [PATCH 394/511] [docs] Create utils/docs (#203962) llvm-project is home to many sphinx documentation sites, each with configuration quirks and bespoke extensions. The sphinx config model makes sharing code somewhat difficult. There are options like sphinx-multiproject, but some of our docs builds are out of the source tree while some are done out of the binary tree, so the multiproject configuration itself would need to be generated. It also would impose more uniformity around extensions than required. This change instead creates a python package at utils/docs/llvm_sphinx and makes it available to all sphinx-build processes via PYTHONPATH. Each conf.py does not modify its own sys.path because not all builds are out of the source tree, so there isn't a stable relative path to use to refer to the utils/docs/ directory. Type checking via pyright in new package is pinned to being python 3.8 compatible. The myst slug script from llvm/docs is promoted to be the default, with a slight edit to targets needed in FlangDriver.md to adapt to the change. The ghlinks extension from clang gets lifted to be the first generic extension, although it is still only used in clang currently. I used it as an example of a general extension that might be useful for other projects, but I also intend to add a new generic extension in a future patch. As part of the move ghlinks is refactored a bit: * Rather than have a main function directly to run its tests, those tests are now run via the command-line entrypoint to the new package, e.g. `python utils/docs --test` will now replace `python clang/docs/ghlinks.py --test`. This makes the command-line more pithy (`python utils/docs/llvm_sphinx/ext/ghlinks --test` is a bit long), and makes the default behavior when running be to include utils/docs in the sys.path, so imports can be absolute and the behavior is as close to the CMake targets as possible. * The static files that were previously written to a tempdir during testing are just committed directly in git. The assumption here is that the original version was taking pains to avoid polluting the clang/docs directory with test files that would confuse sphinx-build, and without that limitation the workaround can be removed. * Some typing-related changes to make it strictly python 3.8 compatible. Change-Id: Ie16de273f74973f163dbf7fe9d22d80a466d051f --- bolt/docs/conf.py | 24 +--- clang-tools-extra/docs/conf.py | 35 +---- clang/docs/conf.py | 39 ++--- flang/docs/FlangDriver.md | 4 +- flang/docs/conf.py | 37 +---- libc/docs/conf.py | 25 +--- libcxx/docs/conf.py | 24 +--- libunwind/docs/conf.py | 24 +--- lld/docs/conf.py | 24 +--- lldb/docs/_ext/lldb_setting.py | 4 +- lldb/docs/conf.py | 45 +----- llvm/cmake/modules/AddSphinxTarget.cmake | 1 + llvm/docs/conf.py | 58 ++------ llvm/docs/llvm_slug.py | 16 --- openmp/docs/conf.py | 35 +---- orc-rt/docs/conf.py | 31 +--- polly/docs/conf.py | 24 +--- pyproject.toml | 3 +- utils/docs/README.md | 20 +++ utils/docs/__main__.py | 31 ++++ utils/docs/llvm_sphinx/__init__.py | 71 +++++++++ utils/docs/llvm_sphinx/ext/__init__.py | 0 .../docs/llvm_sphinx/ext/ghlinks/__init__.py | 136 +----------------- .../docs/llvm_sphinx/ext/ghlinks/test/conf.py | 7 + .../llvm_sphinx/ext/ghlinks/test/index.rst | 8 ++ .../llvm_sphinx/ext/ghlinks/test/markdown.md | 14 ++ .../llvm_sphinx/ext/ghlinks/test/rest.rst | 19 +++ utils/docs/llvm_sphinx/help.py | 27 ++++ 28 files changed, 280 insertions(+), 506 deletions(-) delete mode 100644 llvm/docs/llvm_slug.py create mode 100644 utils/docs/README.md create mode 100644 utils/docs/__main__.py create mode 100644 utils/docs/llvm_sphinx/__init__.py create mode 100644 utils/docs/llvm_sphinx/ext/__init__.py rename clang/docs/ghlinks.py => utils/docs/llvm_sphinx/ext/ghlinks/__init__.py (60%) create mode 100644 utils/docs/llvm_sphinx/ext/ghlinks/test/conf.py create mode 100644 utils/docs/llvm_sphinx/ext/ghlinks/test/index.rst create mode 100644 utils/docs/llvm_sphinx/ext/ghlinks/test/markdown.md create mode 100644 utils/docs/llvm_sphinx/ext/ghlinks/test/rest.rst create mode 100644 utils/docs/llvm_sphinx/help.py diff --git a/bolt/docs/conf.py b/bolt/docs/conf.py index ededa1ccbb0be..14b6c1c8eff35 100644 --- a/bolt/docs/conf.py +++ b/bolt/docs/conf.py @@ -13,31 +13,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.NEVER)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.intersphinx", "sphinx.ext.todo"] # General information about the project. project = "BOLT" diff --git a/clang-tools-extra/docs/conf.py b/clang-tools-extra/docs/conf.py index 8247cd173fcf6..a80bbcf8638b5 100644 --- a/clang-tools-extra/docs/conf.py +++ b/clang-tools-extra/docs/conf.py @@ -14,42 +14,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax"] - -# When building man pages, we do not use the markdown pages, -# So, we can continue without the myst_parser dependencies. -# Doing so reduces dependencies of some packaged llvm distributions. -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - if not tags.has("builder-man"): - raise - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = [".rst", ".md"] - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.todo", "sphinx.ext.mathjax"] # General information about the project. project = "Extra Clang Tools" diff --git a/clang/docs/conf.py b/clang/docs/conf.py index 23e059baeb863..cac0686d7f426 100644 --- a/clang/docs/conf.py +++ b/clang/docs/conf.py @@ -15,44 +15,23 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath(".")) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.graphviz", "ghlinks"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - +extensions += [ + "sphinx.ext.todo", + "sphinx.ext.mathjax", + "sphinx.ext.graphviz", + "llvm_sphinx.ext.ghlinks", +] import sphinx -# When building man pages, we do not use the markdown pages, -# So, we can continue without the myst_parser dependencies. -# Doing so reduces dependencies of some packaged llvm distributions. -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - if not tags.has("builder-man"): - raise - - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - # General information about the project. project = "Clang" copyright = "2007-%d, The Clang Team" % date.today().year diff --git a/flang/docs/FlangDriver.md b/flang/docs/FlangDriver.md index 2d1140510afcb..4edc99944ad44 100644 --- a/flang/docs/FlangDriver.md +++ b/flang/docs/FlangDriver.md @@ -463,9 +463,9 @@ static FrontendPluginRegistry::Add X( ### Loading and Running a Plugin In order to use plugins, there are 2 command line options made available to the frontend driver, `flang -fc1`: -* [`-load `](#the--load-dsopath-option) for loading the dynamic shared +* [`-load `](#the-load-dsopath-option) for loading the dynamic shared object of the plugin -* [`-plugin `](#the--plugin-name-option) for calling the registered plugin +* [`-plugin `](#the-plugin-name-option) for calling the registered plugin Invocation of the example plugin is done through: ```bash diff --git a/flang/docs/conf.py b/flang/docs/conf.py index b8a30aff6bf94..1ad0c19ecb3fd 100644 --- a/flang/docs/conf.py +++ b/flang/docs/conf.py @@ -11,10 +11,9 @@ from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md + +globals().update(common_conf(tags)) # -- General configuration ----------------------------------------------------- @@ -22,47 +21,19 @@ # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ +extensions += [ "sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.intersphinx", "sphinx.ext.autodoc", ] -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - raise ImportError( - "myst_parser is required to build documentation, including man pages." - ) - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] -source_suffix = { - ".rst": "restructuredtext", - ".md": "markdown", -} -myst_heading_anchors = 6 - -# Enable myst's substitution extension since markdown files cannot use the -# |version| and |release| substitutions available to .rst files. -myst_enable_extensions = ["substitution"] - # The substitutions to use in markdown files. This contains unconditional # substitutions, but more may be added once the configuration is obtained. myst_substitutions = {"in_progress": "(In-Progress) " if tags.has("PreRelease") else ""} import sphinx -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - # General information about the project. project = "Flang" copyright = "2017-%d, The Flang Team" % date.today().year diff --git a/libc/docs/conf.py b/libc/docs/conf.py index 4608a0282c24c..1490e7bde7411 100644 --- a/libc/docs/conf.py +++ b/libc/docs/conf.py @@ -13,37 +13,20 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ +extensions += [ "sphinx.ext.intersphinx", "sphinx.ext.todo", "sphinx_reredirects", - "myst_parser", ] -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = [".rst", ".md"] - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - # General information about the project. project = "libc" copyright = "2011-%d, LLVM Project" % date.today().year diff --git a/libcxx/docs/conf.py b/libcxx/docs/conf.py index d635bac57e275..9051bce7cf356 100644 --- a/libcxx/docs/conf.py +++ b/libcxx/docs/conf.py @@ -13,31 +13,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.NEVER)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.intersphinx", "sphinx.ext.todo"] # General information about the project. project = "libc++" diff --git a/libunwind/docs/conf.py b/libunwind/docs/conf.py index 29f9c24a7ee26..2d91df6753d69 100644 --- a/libunwind/docs/conf.py +++ b/libunwind/docs/conf.py @@ -13,31 +13,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.NEVER)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.intersphinx", "sphinx.ext.todo"] # General information about the project. project = "libunwind" diff --git a/lld/docs/conf.py b/lld/docs/conf.py index 6f411ed3863d4..962424a589540 100644 --- a/lld/docs/conf.py +++ b/lld/docs/conf.py @@ -13,31 +13,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.intersphinx", "sphinx.ext.todo"] # General information about the project. project = "lld" diff --git a/lldb/docs/_ext/lldb_setting.py b/lldb/docs/_ext/lldb_setting.py index 1d6171ab482e4..82b04505eb1f5 100644 --- a/lldb/docs/_ext/lldb_setting.py +++ b/lldb/docs/_ext/lldb_setting.py @@ -5,7 +5,7 @@ from sphinx.application import Sphinx from sphinx.directives import ObjectDescription from sphinx.util.docfields import Field, GroupedField -import llvm_slug +from llvm_sphinx import make_slug # Example: @@ -37,7 +37,7 @@ def handle_signature(self, sig: str, signode: addnodes.desc_signature): typ, classes=[ "lldb-setting-type", - f"lldb-setting-type-{llvm_slug.make_slug(typ)}", + f"lldb-setting-type-{make_slug(typ)}", ], ) signode["ids"].append(sig) diff --git a/lldb/docs/conf.py b/lldb/docs/conf.py index a0ce2807d231a..e8d71ce4c4e63 100644 --- a/lldb/docs/conf.py +++ b/lldb/docs/conf.py @@ -14,8 +14,9 @@ from datetime import date from pathlib import Path -# Add path for llvm_slug module. -sys.path.insert(0, os.path.abspath(os.path.join("..", "..", "llvm", "docs"))) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md + +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) building_man_page = tags.has("builder-man") @@ -39,14 +40,12 @@ # -- General configuration ----------------------------------------------------- -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - +# lldb specific sphinx extensions sys.path.append(str(Path("_ext").resolve())) # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ +extensions += [ "sphinx.ext.todo", # Support for todo items. "sphinx.ext.mathjax", # Render math via JavaScript. "sphinx.ext.intersphinx", # Link to other projects’ documentation. @@ -57,29 +56,10 @@ # Include a file from $LLDB_BUILD_DIR (see _ext/build_include.py) extensions.append("build_include") -# When building man pages, we do not use the markdown pages, -# So, we can continue without the myst_parser dependencies. -# Doing so reduces dependencies of some packaged llvm distributions. -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - if not tags.has("builder-man"): - raise - -# Automatic anchors for markdown titles -myst_heading_anchors = 6 -myst_heading_slug_func = "llvm_slug.make_slug" -myst_enable_extensions = ["fieldlist", "colon_fence", "deflist"] +myst_enable_extensions += ["fieldlist", "colon_fence", "deflist"] autodoc_default_options = {"special-members": True} -# The suffix of source filenames. -source_suffix = { - ".rst": "restructuredtext", -} - # Unless we only generate the basic manpage we need the plugin for generating # the Python API documentation. if not building_man_page: @@ -99,19 +79,6 @@ # a list of builtin themes. html_theme = "furo" - # Since man pages do not use markdown, we do not need to register a markdown - # parser. - source_suffix[".md"] = "markdown" - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - # General information about the project. project = "LLDB" copyright = "2007-%d, The LLDB Team" % date.today().year diff --git a/llvm/cmake/modules/AddSphinxTarget.cmake b/llvm/cmake/modules/AddSphinxTarget.cmake index 9de169d7297cb..379e36db48a25 100644 --- a/llvm/cmake/modules/AddSphinxTarget.cmake +++ b/llvm/cmake/modules/AddSphinxTarget.cmake @@ -46,6 +46,7 @@ function (add_sphinx_target builder project) add_custom_target(${SPHINX_TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E env ${ARG_ENV_VARS} + --modify "PYTHONPATH=path_list_append:${LLVM_MAIN_SRC_DIR}/../utils/docs" ${SPHINX_EXECUTABLE} -b ${builder} -d "${SPHINX_DOC_TREE_DIR}" diff --git a/llvm/docs/conf.py b/llvm/docs/conf.py index af9781b66c692..2727f2248e803 100644 --- a/llvm/docs/conf.py +++ b/llvm/docs/conf.py @@ -14,59 +14,27 @@ import sys, os, re from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -sys.path.insert(0, os.path.abspath(".")) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.intersphinx", "sphinx.ext.todo"] - -# When building man pages, we do not use the markdown pages, -# So, we can continue without the myst_parser dependencies. -# Doing so reduces dependencies of some packaged llvm distributions. -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - if not tags.has("builder-man"): - raise -else: - myst_enable_extensions = ["deflist", "substitution"] - myst_url_schemes = { - "http": None, - "https": None, - "mailto": None, - "ftp": None, - "doxygen": {"url": "/doxygen/{{path}}"}, - } - -# Automatic anchors for markdown titles -myst_heading_anchors = 6 -myst_heading_slug_func = "llvm_slug.make_slug" - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -source_suffix = [".rst", ".md"] +extensions += ["sphinx.ext.intersphinx", "sphinx.ext.todo"] + +myst_enable_extensions += ["deflist"] +myst_url_schemes = { + "http": None, + "https": None, + "mailto": None, + "ftp": None, + "doxygen": {"url": "/doxygen/{{path}}"}, +} import sphinx -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - -# General information about the project. project = "LLVM" copyright = "2003-%d, LLVM Project" % date.today().year diff --git a/llvm/docs/llvm_slug.py b/llvm/docs/llvm_slug.py deleted file mode 100644 index 9dd8b1cd3f1aa..0000000000000 --- a/llvm/docs/llvm_slug.py +++ /dev/null @@ -1,16 +0,0 @@ -# -*- coding: utf-8 -*- -# -# LLVM documentation anchor slug formatting - -# Some of our markdown documentation numbers section titles -# This helpers is used by myst to remove that numbering from the anchor links. - -from docutils.nodes import make_id - - -def make_slug(str): - import re - - str = re.sub(r"^\s*(\w\.)+\w\s", "", str) - str = re.sub(r"^\s*\w\.\s", "", str) - return make_id(str) diff --git a/openmp/docs/conf.py b/openmp/docs/conf.py index d7002ee033147..ec5cbc3e4f2b0 100644 --- a/openmp/docs/conf.py +++ b/openmp/docs/conf.py @@ -14,46 +14,21 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ +extensions += [ "sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.intersphinx", - "myst_parser", ] -# Add path for llvm_slug module. -sys.path.insert(0, os.path.abspath(os.path.join("..", "..", "llvm", "docs"))) - - -myst_enable_extensions = ["substitution", "colon_fence", "deflist"] - -# Automatic anchors for markdown titles -myst_heading_anchors = 6 -myst_heading_slug_func = "llvm_slug.make_slug" - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = [".rst", ".md"] - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +myst_enable_extensions += ["colon_fence", "deflist"] # General information about the project. project = "LLVM/OpenMP" diff --git a/orc-rt/docs/conf.py b/orc-rt/docs/conf.py index b9f197d85b847..8d307024133a0 100644 --- a/orc-rt/docs/conf.py +++ b/orc-rt/docs/conf.py @@ -11,10 +11,9 @@ from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md + +globals().update(common_conf(tags, markdown=Markdown.EXCEPT_MAN)) # -- General configuration ----------------------------------------------------- @@ -22,37 +21,15 @@ # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = [ +extensions += [ "sphinx.ext.todo", "sphinx.ext.mathjax", "sphinx.ext.intersphinx", "sphinx.ext.autodoc", ] -# When building man pages, we do not use the markdown pages, -# So, we can continue without the myst_parser dependencies. -# Doing so reduces dependencies of some packaged llvm distributions. -try: - import myst_parser - - extensions.append("myst_parser") -except ImportError: - if not tags.has("builder-man"): - raise - - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] -myst_heading_anchors = 6 - import sphinx -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" - # General information about the project. project = "ORC-RT" copyright = "2025-%d, The ORC-RT Team" % date.today().year diff --git a/polly/docs/conf.py b/polly/docs/conf.py index 908f4d6fa6e17..7d3ece63534a5 100644 --- a/polly/docs/conf.py +++ b/polly/docs/conf.py @@ -14,31 +14,15 @@ import sys, os from datetime import date -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# sys.path.insert(0, os.path.abspath('.')) +from llvm_sphinx import * # see llvm-project/utils/docs/README.md -# -- General configuration ----------------------------------------------------- +globals().update(common_conf(tags, markdown=Markdown.NEVER)) -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' +# -- General configuration ----------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ["sphinx.ext.todo", "sphinx.ext.mathjax"] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ["_templates"] - -# The suffix of source filenames. -source_suffix = ".rst" - -# The encoding of source files. -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = "index" +extensions += ["sphinx.ext.todo", "sphinx.ext.mathjax"] # General information about the project. project = "Polly" diff --git a/pyproject.toml b/pyproject.toml index c84c7ffe2dc13..2992b81d026e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ extend-exclude = ''' [tool.pyright] executionEnvironments = [ - { root = "lldb/packages/Python", pythonVersion = "3.8" } + { root = "lldb/packages/Python", pythonVersion = "3.8" }, + { root = "utils/docs", pythonVersion = "3.8" }, ] [dependency-groups] diff --git a/utils/docs/README.md b/utils/docs/README.md new file mode 100644 index 0000000000000..e14b2d89d8a94 --- /dev/null +++ b/utils/docs/README.md @@ -0,0 +1,20 @@ +# utils/docs + +Common configuration and extensions for all of the sphinx projects in the +llvm-project monorepo (e.g. `llvm/docs/`, `clang/docs/`, `bolt/docs`, ...). + +This directory is injected into the `PYTHONPATH` of `sphinx-build` processes as +part of the llvm-project CMake build. + +**Note:** On the command-line, you must manually ensure this directory is part +of your `PYTHONPATH`. For example, to use `sphinx-autobuild` for projects which +support it, you might use: + +``` +$ PYTHONPATH=$PWD/utils/docs sphinx-autobuild llvm/docs/ /tmp/sphinx-build +``` + +## Testing your Environment + +To smoke-test your sphinx environment outside of a CMake build, you can use the +`utils/docs/__main__.py` entrypoint (e.g. `python3 utils/docs --test`). diff --git a/utils/docs/__main__.py b/utils/docs/__main__.py new file mode 100644 index 0000000000000..9fdcbcda35bbc --- /dev/null +++ b/utils/docs/__main__.py @@ -0,0 +1,31 @@ +# -*- coding: utf-8 -*- + +"""Command-line entrypoint to utils/docs + +Use this as e.g. `python utils/docs --test` to run docs smoke tests. +""" + +import sys +import argparse +from llvm_sphinx.ext import ghlinks +from typing import List + + +def main(argv: List[str]) -> int: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument("--test", action="store_true", help="run sphinx self-tests") + args = parser.parse_args(argv) + + if args.test: + ghlinks.run_tests() + print( + "ghlinks.py: tests passed; next, rebuild docs-clang-html and spot check the release notes" + ) + return 0 + + parser.print_help(sys.stderr) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) diff --git a/utils/docs/llvm_sphinx/__init__.py b/utils/docs/llvm_sphinx/__init__.py new file mode 100644 index 0000000000000..3b63964e5d3c0 --- /dev/null +++ b/utils/docs/llvm_sphinx/__init__.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- + +"""Shared configuration and extensions for llvm-project sphinx documentation + +Every sphinx `conf.py` in llvm-project is intended to use `common_conf` as a +baseline configuration. The sphinx module-as-conf model means the easiest +way to achieve this is by updating `globals()` directly, as in: + + from llvm_sphinx import * # see llvm-project/utils/docs/README.md + + globals().update(common_conf(tags)) + +Note: common settings like `extensions` should not be reassigned after the +call to `common_conf`, they should be modified/appended to, as in: + + extensions += ["foo"] + +""" + +import sys +from typing import Any, Dict, TYPE_CHECKING +from enum import Enum, auto +from sphinx.util.tags import Tags +from llvm_sphinx.help import venv_help + + +class Markdown(Enum): + ALWAYS = auto() + EXCEPT_MAN = auto() + NEVER = auto() + + +def common_conf(tags: Tags, markdown=Markdown.ALWAYS) -> Dict[str, Any]: + # If your documentation needs a minimal Sphinx version, state it here. + # needs_sphinx = '1.0' + # The encoding of source files. + # source_encoding = 'utf-8-sig' + extensions = [] + source_suffix = {".rst": "restructuredtext"} + if markdown != Markdown.NEVER: + # When building man pages, we do not use the markdown pages, + # So, we can continue without the myst_parser dependencies. + # Doing so reduces dependencies of some packaged llvm distributions. + try: + import myst_parser + + extensions.append("myst_parser") + except ImportError as err: + if markdown == Markdown.ALWAYS or not tags.has("builder-man"): + print(venv_help(err), file=sys.stderr) + raise + else: + source_suffix[".md"] = "markdown" + myst_enable_extensions = ["substitution"] + myst_heading_anchors = 6 + myst_heading_slug_func = "llvm_sphinx.make_slug" + templates_path = ["_templates"] + master_doc = "index" + + return locals() + + +# Some of our markdown documentation numbers section titles +# This helpers is used by myst to remove that numbering from the anchor links. +def make_slug(s: str) -> str: + from docutils.nodes import make_id + from re import sub + + s = sub(r"^\s*(\w\.)+\w\s", "", s) + s = sub(r"^\s*\w\.\s", "", s) + return make_id(s) diff --git a/utils/docs/llvm_sphinx/ext/__init__.py b/utils/docs/llvm_sphinx/ext/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/docs/ghlinks.py b/utils/docs/llvm_sphinx/ext/ghlinks/__init__.py similarity index 60% rename from clang/docs/ghlinks.py rename to utils/docs/llvm_sphinx/ext/ghlinks/__init__.py index 5ba4c9b6d3230..752ad8291ac67 100644 --- a/clang/docs/ghlinks.py +++ b/utils/docs/llvm_sphinx/ext/ghlinks/__init__.py @@ -14,52 +14,22 @@ import sys import tempfile import textwrap +from typing import Dict, List, Type, Tuple from pathlib import Path - - -def _find_docs_requirements() -> str: - for parent in Path(__file__).resolve().parents: - docs_requirements = parent / "llvm/docs/requirements.txt" - if docs_requirements.exists(): - return str(docs_requirements) - return "llvm/docs/requirements.txt" - +from llvm_sphinx.help import venv_help try: from docutils import nodes from sphinx.application import Sphinx except ImportError as err: - print( - textwrap.dedent( - f""" - ghlinks.py requires the LLVM documentation build dependencies. - - Import failed with: - {err} - - The standard requirements file is: - {_find_docs_requirements()} - - From an llvm-project checkout, a typical pip setup is: - python3 -m venv .venv - . .venv/bin/activate - python3 -m pip install -r llvm/docs/requirements.txt - python3 clang/docs/ghlinks.py --test - - With uv, a typical one-shot command is: - uv run --with-requirements llvm/docs/requirements.txt \\ - python clang/docs/ghlinks.py --test - """ - ).strip(), - file=sys.stderr, - ) + print(venv_help(err), file=sys.stderr) raise __version__ = "1.0" GH_LINK_RE = re.compile("#GH([1-9][0-9]+)") GH_LINK_TMPL = "https://github.com/llvm/llvm-project/issues/{}" -SKIP_NODES: tuple[type[nodes.Node], ...] = ( +SKIP_NODES: Tuple[Type[nodes.Node], ...] = ( nodes.FixedTextElement, nodes.literal, nodes.raw, @@ -123,109 +93,21 @@ def subst_gh_links(_app: Sphinx, doctree: nodes.document) -> None: replace_gh_links_in_subtree(doctree) -def setup(app: Sphinx) -> dict[str, object]: +def setup(app: Sphinx) -> Dict[str, object]: app.connect("doctree-read", subst_gh_links) return dict(version=__version__, parallel_read_safe=True, parallel_write_safe=True) -def main(argv: list[str]) -> int: - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("--test", action="store_true", help="run ghlinks self-tests") - args = parser.parse_args(argv) - - if args.test: - run_tests() - print( - "ghlinks.py: tests passed; next, rebuild docs-clang-html and spot check the release notes" - ) - return 0 - - parser.print_help(sys.stderr) - return 0 - - # ----------------------------------------------------------------------------- # Test code only below: # ----------------------------------------------------------------------------- - -REST_TEST_DOC = r""" -GHLink reST Test -================ - -A paragraph links #GH123 and #GH456. - -No leading zero link #GH0123. - -Inline literal ``#GH333`` stays text. - -Existing link `#GH666 `_ stays existing. - -Code block:: - - #GH777 - -.. raw:: html - - #GH888 -""" - - -MARKDOWN_TEST_DOC = r""" -# GHLink Markdown Test - -A paragraph links #GH234 and #GH567. - -No leading zero link #GH0234. - -Inline code `#GH444` stays text. - -Existing link [#GH999](https://example.com/md) stays existing. - -```c -#GH778 -``` -""" - - -def _build_test_docs() -> tuple[str, str]: +def _build_test_docs() -> Tuple[str, str]: """Build in-file reST and Markdown test strings and return their HTML.""" - conf = f""" -import sys -sys.path.insert(0, {str(Path(__file__).parent)!r}) - -extensions = ["ghlinks", "myst_parser"] -master_doc = "index" -project = "ghlinks test" -source_suffix = {{ - ".rst": "restructuredtext", - ".md": "markdown", -}} -""" - index = """ -GHLink Tests -============ - -.. toctree:: - - rest - markdown -""" - + srcdir = Path(__file__).resolve().parent / "test" with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) - srcdir = tmp_path / "src" outdir = tmp_path / "out" doctreedir = tmp_path / "doctrees" - srcdir.mkdir() - (srcdir / "conf.py").write_text(textwrap.dedent(conf), encoding="utf-8") - (srcdir / "index.rst").write_text(textwrap.dedent(index), encoding="utf-8") - (srcdir / "rest.rst").write_text( - textwrap.dedent(REST_TEST_DOC), encoding="utf-8" - ) - (srcdir / "markdown.md").write_text( - textwrap.dedent(MARKDOWN_TEST_DOC), encoding="utf-8" - ) - app = Sphinx( srcdir=srcdir, confdir=srcdir, @@ -267,7 +149,3 @@ def run_tests() -> None: for issue in ("0234", "444", "999", "778"): _check_contains(markdown_html, f"#GH{issue}") _check_contains(markdown_html, 'href="https://example.com/md"') - - -if __name__ == "__main__": - raise SystemExit(main(sys.argv[1:])) diff --git a/utils/docs/llvm_sphinx/ext/ghlinks/test/conf.py b/utils/docs/llvm_sphinx/ext/ghlinks/test/conf.py new file mode 100644 index 0000000000000..01d3091211613 --- /dev/null +++ b/utils/docs/llvm_sphinx/ext/ghlinks/test/conf.py @@ -0,0 +1,7 @@ +extensions = ["llvm_sphinx.ext.ghlinks", "myst_parser"] +master_doc = "index" +project = "ghlinks test" +source_suffix = { + ".rst": "restructuredtext", + ".md": "markdown", +} diff --git a/utils/docs/llvm_sphinx/ext/ghlinks/test/index.rst b/utils/docs/llvm_sphinx/ext/ghlinks/test/index.rst new file mode 100644 index 0000000000000..45b6ef8f145f9 --- /dev/null +++ b/utils/docs/llvm_sphinx/ext/ghlinks/test/index.rst @@ -0,0 +1,8 @@ + +GHLink Tests +============ + +.. toctree:: + + rest + markdown diff --git a/utils/docs/llvm_sphinx/ext/ghlinks/test/markdown.md b/utils/docs/llvm_sphinx/ext/ghlinks/test/markdown.md new file mode 100644 index 0000000000000..8b331071c1286 --- /dev/null +++ b/utils/docs/llvm_sphinx/ext/ghlinks/test/markdown.md @@ -0,0 +1,14 @@ + +# GHLink Markdown Test + +A paragraph links #GH234 and #GH567. + +No leading zero link #GH0234. + +Inline code `#GH444` stays text. + +Existing link [#GH999](https://example.com/md) stays existing. + +```c +#GH778 +``` diff --git a/utils/docs/llvm_sphinx/ext/ghlinks/test/rest.rst b/utils/docs/llvm_sphinx/ext/ghlinks/test/rest.rst new file mode 100644 index 0000000000000..465c61b35eeeb --- /dev/null +++ b/utils/docs/llvm_sphinx/ext/ghlinks/test/rest.rst @@ -0,0 +1,19 @@ + +GHLink reST Test +================ + +A paragraph links #GH123 and #GH456. + +No leading zero link #GH0123. + +Inline literal ``#GH333`` stays text. + +Existing link `#GH666 `_ stays existing. + +Code block:: + + #GH777 + +.. raw:: html + + #GH888 diff --git a/utils/docs/llvm_sphinx/help.py b/utils/docs/llvm_sphinx/help.py new file mode 100644 index 0000000000000..4648482f9045c --- /dev/null +++ b/utils/docs/llvm_sphinx/help.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +import textwrap + + +def venv_help(err) -> str: + return textwrap.dedent( + f""" + Missing LLVM documentation build dependencies. + + Import failed with: + {err} + + The standard requirements file is: + llvm-project/llvm/docs/requirements.txt + + From an llvm-project checkout, a typical pip setup is: + python3 -m venv .venv + . .venv/bin/activate + python3 -m pip install -r llvm/docs/requirements.txt + python3 utils/docs --test + + With uv, a typical one-shot command is: + uv run --with-requirements llvm/docs/requirements.txt \\ + python utils/docs --test + """ + ).strip() From a456490f496381a3a8f27f9c21c15459c65f32b9 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 24 Jun 2026 15:28:11 -0400 Subject: [PATCH 395/511] [libc] Prefix sanitizer macros with LIBC_ (#205002) Since these macros leak into user code via libc/shared/math.h, prefix them with LIBC_ to not cause collisions. Change created with: rg -l MSAN_UNPOISON libc | xargs sed -i '' \ -e 's/MSAN_UNPOISON/LIBC_MSAN_UNPOISON/g' rg -l ASAN_POISON_MEMORY_REGION libc | xargs sed -i '' -e \ 's/ASAN_POISON_MEMORY_REGION/LIBC_ASAN_POISON_MEMORY_REGION/g' rg -l ASAN_POISON_MEMORY_REGION libc | xargs sed -i '' -e \ 's/ASAN_POISON_MEMORY_REGION/LIBC_ASAN_POISON_MEMORY_REGION/g' No behavior change. --- libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h | 6 +++--- libc/src/__support/macros/sanitizer.h | 13 +++++++------ libc/src/sys/epoll/linux/epoll_pwait.cpp | 2 +- libc/src/sys/epoll/linux/epoll_pwait2.cpp | 2 +- libc/src/sys/epoll/linux/epoll_wait.cpp | 2 +- libc/src/sys/socket/linux/recv.cpp | 2 +- libc/src/sys/socket/linux/recvfrom.cpp | 4 ++-- libc/src/sys/socket/linux/recvmsg.cpp | 8 ++++---- libc/src/sys/socket/linux/socketpair.cpp | 2 +- libc/src/unistd/linux/pipe.cpp | 4 ++-- libc/src/unistd/linux/pipe2.cpp | 2 +- libc/src/unistd/linux/pread.cpp | 4 ++-- libc/src/unistd/linux/read.cpp | 4 ++-- .../src/string/memory_utils/memory_check_utils.h | 4 ++-- 14 files changed, 30 insertions(+), 29 deletions(-) diff --git a/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h index a38e4c9988084..eb4c9a8bad1d0 100644 --- a/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h +++ b/libc/src/__support/FPUtil/x86_64/fenv_x87_utils.h @@ -34,7 +34,7 @@ LIBC_INLINE static uint16_t get_x87_control_word() { __asm fstcw w; #else // !LIBC_COMPILER_IS_MSVC asm volatile("fnstcw %0" : "=m"(w)::); - MSAN_UNPOISON(&w, sizeof(w)); + LIBC_MSAN_UNPOISON(&w, sizeof(w)); #endif // LIBC_COMPILER_IS_MSVC return w; @@ -55,7 +55,7 @@ LIBC_INLINE static uint16_t get_x87_status_word() { __asm fnstsw w; #else // !LIBC_COMPILER_IS_MSVC asm volatile("fnstsw %0" : "=m"(w)::); - MSAN_UNPOISON(&w, sizeof(w)); + LIBC_MSAN_UNPOISON(&w, sizeof(w)); #endif // LIBC_COMPILER_IS_MSVC return w; @@ -74,7 +74,7 @@ LIBC_INLINE static void get_x87_state_descriptor(X87StateDescriptor &s) { __asm fnstenv s; #else // !LIBC_COMPILER_IS_MSVC asm volatile("fnstenv %0" : "=m"(s)); - MSAN_UNPOISON(&s, sizeof(s)); + LIBC_MSAN_UNPOISON(&s, sizeof(s)); #endif // LIBC_COMPILER_IS_MSVC } diff --git a/libc/src/__support/macros/sanitizer.h b/libc/src/__support/macros/sanitizer.h index 84268a19abbb4..abed7a20a35b8 100644 --- a/libc/src/__support/macros/sanitizer.h +++ b/libc/src/__support/macros/sanitizer.h @@ -26,24 +26,25 @@ #ifdef LIBC_HAS_MEMORY_SANITIZER // Only perform MSAN unpoison in non-constexpr context. #include -#define MSAN_UNPOISON(addr, size) \ +#define LIBC_MSAN_UNPOISON(addr, size) \ do { \ if (!__builtin_is_constant_evaluated()) \ __msan_unpoison(addr, size); \ } while (0) #else -#define MSAN_UNPOISON(ptr, size) +#define LIBC_MSAN_UNPOISON(ptr, size) #endif #ifdef LIBC_HAS_ADDRESS_SANITIZER #include -#define ASAN_POISON_MEMORY_REGION(addr, size) \ +#define LIBC_ASAN_POISON_MEMORY_REGION(addr, size) \ __asan_poison_memory_region((addr), (size)) -#define ASAN_UNPOISON_MEMORY_REGION(addr, size) \ +#define LIBC_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ __asan_unpoison_memory_region((addr), (size)) #else -#define ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) -#define ASAN_UNPOISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) +#define LIBC_ASAN_POISON_MEMORY_REGION(addr, size) ((void)(addr), (void)(size)) +#define LIBC_ASAN_UNPOISON_MEMORY_REGION(addr, size) \ + ((void)(addr), (void)(size)) #endif #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_SANITIZER_H diff --git a/libc/src/sys/epoll/linux/epoll_pwait.cpp b/libc/src/sys/epoll/linux/epoll_pwait.cpp index 24fd1dbdc467d..d0b4cc1bed5ce 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait.cpp @@ -35,7 +35,7 @@ LLVM_LIBC_FUNCTION(int, epoll_pwait, return -1; } - MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); + LIBC_MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); return ret; } diff --git a/libc/src/sys/epoll/linux/epoll_pwait2.cpp b/libc/src/sys/epoll/linux/epoll_pwait2.cpp index 219984528efdd..ee507344d68d2 100644 --- a/libc/src/sys/epoll/linux/epoll_pwait2.cpp +++ b/libc/src/sys/epoll/linux/epoll_pwait2.cpp @@ -37,7 +37,7 @@ LLVM_LIBC_FUNCTION(int, epoll_pwait2, return -1; } - MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); + LIBC_MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); return ret; } diff --git a/libc/src/sys/epoll/linux/epoll_wait.cpp b/libc/src/sys/epoll/linux/epoll_wait.cpp index 7fae7b55992fa..e459ca0e30fe5 100644 --- a/libc/src/sys/epoll/linux/epoll_wait.cpp +++ b/libc/src/sys/epoll/linux/epoll_wait.cpp @@ -41,7 +41,7 @@ LLVM_LIBC_FUNCTION(int, epoll_wait, return -1; } - MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); + LIBC_MSAN_UNPOISON(events, ret * sizeof(struct epoll_event)); return ret; } diff --git a/libc/src/sys/socket/linux/recv.cpp b/libc/src/sys/socket/linux/recv.cpp index 3c2dfdce7fbab..20b22b5d71470 100644 --- a/libc/src/sys/socket/linux/recv.cpp +++ b/libc/src/sys/socket/linux/recv.cpp @@ -28,7 +28,7 @@ LLVM_LIBC_FUNCTION(ssize_t, recv, return -1; } - MSAN_UNPOISON(buf, result.value()); + LIBC_MSAN_UNPOISON(buf, result.value()); return result.value(); } diff --git a/libc/src/sys/socket/linux/recvfrom.cpp b/libc/src/sys/socket/linux/recvfrom.cpp index 97f1c391711e7..2f9c6f7352301 100644 --- a/libc/src/sys/socket/linux/recvfrom.cpp +++ b/libc/src/sys/socket/linux/recvfrom.cpp @@ -42,13 +42,13 @@ LLVM_LIBC_FUNCTION(ssize_t, recvfrom, } ssize_t ret = result.value(); - MSAN_UNPOISON(buf, ret); + LIBC_MSAN_UNPOISON(buf, ret); if (src_addr) { size_t min_src_addr_size = (*addrlen < srcaddr_sz) ? *addrlen : srcaddr_sz; (void)min_src_addr_size; // prevent "set but not used" warning - MSAN_UNPOISON(src_addr, min_src_addr_size); + LIBC_MSAN_UNPOISON(src_addr, min_src_addr_size); } return ret; } diff --git a/libc/src/sys/socket/linux/recvmsg.cpp b/libc/src/sys/socket/linux/recvmsg.cpp index e2361e2d40245..ced8961f107be 100644 --- a/libc/src/sys/socket/linux/recvmsg.cpp +++ b/libc/src/sys/socket/linux/recvmsg.cpp @@ -28,13 +28,13 @@ LLVM_LIBC_FUNCTION(ssize_t, recvmsg, (int sockfd, msghdr *msg, int flags)) { } // Unpoison the msghdr, as well as all its components. - MSAN_UNPOISON(msg, sizeof(msghdr)); - MSAN_UNPOISON(msg->msg_name, msg->msg_namelen); + LIBC_MSAN_UNPOISON(msg, sizeof(msghdr)); + LIBC_MSAN_UNPOISON(msg->msg_name, msg->msg_namelen); for (size_t i = 0; i < msg->msg_iovlen; ++i) { - MSAN_UNPOISON(msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); + LIBC_MSAN_UNPOISON(msg->msg_iov[i].iov_base, msg->msg_iov[i].iov_len); } - MSAN_UNPOISON(msg->msg_control, msg->msg_controllen); + LIBC_MSAN_UNPOISON(msg->msg_control, msg->msg_controllen); return result.value(); } diff --git a/libc/src/sys/socket/linux/socketpair.cpp b/libc/src/sys/socket/linux/socketpair.cpp index 49445e07fd6e2..bd5171333f770 100644 --- a/libc/src/sys/socket/linux/socketpair.cpp +++ b/libc/src/sys/socket/linux/socketpair.cpp @@ -26,7 +26,7 @@ LLVM_LIBC_FUNCTION(int, socketpair, return -1; } - MSAN_UNPOISON(sv, sizeof(int) * 2); + LIBC_MSAN_UNPOISON(sv, sizeof(int) * 2); return result.value(); } diff --git a/libc/src/unistd/linux/pipe.cpp b/libc/src/unistd/linux/pipe.cpp index b9943c8338056..9b0c97439dd6f 100644 --- a/libc/src/unistd/linux/pipe.cpp +++ b/libc/src/unistd/linux/pipe.cpp @@ -12,7 +12,7 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/sanitizer.h" // for MSAN_UNPOISON +#include "src/__support/macros/sanitizer.h" // for LIBC_MSAN_UNPOISON #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -25,7 +25,7 @@ LLVM_LIBC_FUNCTION(int, pipe, (int pipefd[2])) { int ret = LIBC_NAMESPACE::syscall_impl( SYS_pipe2, reinterpret_cast(pipefd), 0); #endif - MSAN_UNPOISON(pipefd, sizeof(int) * 2); + LIBC_MSAN_UNPOISON(pipefd, sizeof(int) * 2); if (ret < 0) { libc_errno = -ret; return -1; diff --git a/libc/src/unistd/linux/pipe2.cpp b/libc/src/unistd/linux/pipe2.cpp index d30f3b37a1adc..baf78ff55be7b 100644 --- a/libc/src/unistd/linux/pipe2.cpp +++ b/libc/src/unistd/linux/pipe2.cpp @@ -23,7 +23,7 @@ LLVM_LIBC_FUNCTION(int, pipe2, (int pipefd[2], int flags)) { libc_errno = -ret; return -1; } - MSAN_UNPOISON(pipefd, sizeof(int) * 2); + LIBC_MSAN_UNPOISON(pipefd, sizeof(int) * 2); return ret; } diff --git a/libc/src/unistd/linux/pread.cpp b/libc/src/unistd/linux/pread.cpp index cf3152dbbad84..782b4c2eb8a0d 100644 --- a/libc/src/unistd/linux/pread.cpp +++ b/libc/src/unistd/linux/pread.cpp @@ -13,7 +13,7 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/sanitizer.h" // for MSAN_UNPOISON +#include "src/__support/macros/sanitizer.h" // for LIBC_MSAN_UNPOISON #include // For syscall numbers. namespace LIBC_NAMESPACE_DECL { @@ -37,7 +37,7 @@ LLVM_LIBC_FUNCTION(ssize_t, pread, } // The cast is important since there is a check that dereferences the pointer // which fails on void*. - MSAN_UNPOISON(reinterpret_cast(buf), count); + LIBC_MSAN_UNPOISON(reinterpret_cast(buf), count); if (ret < 0) { libc_errno = static_cast(-ret); return -1; diff --git a/libc/src/unistd/linux/read.cpp b/libc/src/unistd/linux/read.cpp index d39627e57a483..b9afe314d170b 100644 --- a/libc/src/unistd/linux/read.cpp +++ b/libc/src/unistd/linux/read.cpp @@ -12,7 +12,7 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" -#include "src/__support/macros/sanitizer.h" // for MSAN_UNPOISON +#include "src/__support/macros/sanitizer.h" // for LIBC_MSAN_UNPOISON namespace LIBC_NAMESPACE_DECL { @@ -24,7 +24,7 @@ LLVM_LIBC_FUNCTION(ssize_t, read, (int fd, void *buf, size_t count)) { } // The cast is important since there is a check that dereferences the pointer // which fails on void*. - MSAN_UNPOISON(reinterpret_cast(buf), count); + LIBC_MSAN_UNPOISON(reinterpret_cast(buf), count); return result.value(); } diff --git a/libc/test/src/string/memory_utils/memory_check_utils.h b/libc/test/src/string/memory_utils/memory_check_utils.h index c38039ebd3dd4..21c42620e1d0f 100644 --- a/libc/test/src/string/memory_utils/memory_check_utils.h +++ b/libc/test/src/string/memory_utils/memory_check_utils.h @@ -25,7 +25,7 @@ namespace LIBC_NAMESPACE_DECL { // This is a utility class to be used by Buffer below, do not use directly. struct PoisonedBuffer { PoisonedBuffer(size_t size) : ptr((char *)malloc(size)) { - ASAN_POISON_MEMORY_REGION(ptr, size); + LIBC_ASAN_POISON_MEMORY_REGION(ptr, size); } ~PoisonedBuffer() { free(ptr); } @@ -47,7 +47,7 @@ struct Buffer : private PoisonedBuffer { offset_ptr += distance_to_next_aligned(ptr); if (aligned == Aligned::NO) ++offset_ptr; - ASAN_UNPOISON_MEMORY_REGION(offset_ptr, size); + LIBC_ASAN_UNPOISON_MEMORY_REGION(offset_ptr, size); } cpp::span span() { return cpp::span(offset_ptr, size); } From 6deda8c2c007eaa3ddf5a2f72a85cf7c491f640f Mon Sep 17 00:00:00 2001 From: Jon Roelofs Date: Wed, 24 Jun 2026 12:43:20 -0700 Subject: [PATCH 396/511] [AArch64] Skip FrameDestroy DW_CFA_AARCH64_negate_ra_state_with_pc (#198677) The toggle semantics of DW_CFA_AARCH64_negate_ra_state_with_pc are evaluated over a linear stream of CFI opcodes, rather than along control flow edges, making it impossible to faithfully represent the RA state across all CFG paths for functions with non-trivial block layouts (for example hot-cold-splitting, or doubly-shrinkwrapped functions with multiple independent frames). libunwind works around this by recording the signing address at the FrameSetup site, and not validating that the CFI state has toggled back to "unsigned" after authentication. We can therefore omit the FrameDestroy emission, treating the marker solely as a marker for the signing site. This holds so long as each function has at most one signing location, which is currently guaranteed since shrinkwrapping does not yet create functions with more than one frame. DW_CFA_AARCH64_set_ra_state is the correct long-term fix, but cannot be used exclusively due to unwinder compatability concerns. See: https://github.com/ARM-software/abi-aa/issues/327 https://github.com/ARM-software/abi-aa/pull/346 rdar://174074919 --- .../lib/Target/AArch64/AArch64PointerAuth.cpp | 36 +++++++++++++++++++ .../AArch64/pauth-lr-tail-call-fpdiff.ll | 24 ------------- .../test/CodeGen/AArch64/swifttail-ptrauth.ll | 9 ----- 3 files changed, 36 insertions(+), 33 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp index cd660ad7d37f2..5372ffe869e1f 100644 --- a/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp +++ b/llvm/lib/Target/AArch64/AArch64PointerAuth.cpp @@ -84,6 +84,42 @@ static void emitPACCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, auto &MF = *MBB.getParent(); auto &MFnI = *MF.getInfo(); + // DW_CFA_AARCH64_negate_ra_state_with_pc is semantically broken for + // functions where shrinkwrapping places signing/authenticating pairs on + // distinct CFG paths. + // + // DWARF CFI is evaluated linearly over the byte stream, not along control + // flow edges. The toggle semantics of this directive therefore cannot + // faithfully represent the signed/unsigned RA state for all possible CFG + // paths. The added complexity versus DW_CFA_AARCH64_negate_ra_state is that + // an unwinder must also reconstruct the PC of the PACI[AB]SPPC in order to + // verify the signed LR, and that address is derived from the location of this + // directive in the linear CFI stream. + // + // The correct fix is to use DW_CFA_AARCH64_set_ra_state_with_pc, which sets + // the RA state and signing address absolutely rather than toggling them. An + // unwinder that supports this directive can reconstruct the correct state on + // any CFG path, regardless of how many signing/authenticating pairs exist in + // the function. However, not all unwinders support this directive, so we + // cannot rely on it exclusively. + // + // For unwinders that only support DW_CFA_AARCH64_negate_ra_state_with_pc, + // libunwind exploits a loophole: it records the address at the + // DW_CFA_AARCH64_negate_ra_state_with_pc site to authenticate the LR, but + // does not care that the CFI state remains "signed with pc" after + // authentication has occurred. This means we can safely omit the + // FrameDestroy emission of this directive, treating it solely as a marker + // for the signing site, as long as each function has at most one such + // signing location. That invariant holds today because shrinkwrapping + // does not yet hoist or sink PAuth_LR frame code across CFG join/split + // points; once it does, we must avoid those transformations on platforms that + // have this limitation. + // + // https://github.com/ARM-software/abi-aa/issues/327 + // https://github.com/ARM-software/abi-aa/pull/346 + if (Flags == MachineInstr::FrameDestroy && MFnI.branchProtectionPAuthLR()) + return; + CFIInstBuilder CFIBuilder(MBB, MBBI, Flags); if (MFnI.branchProtectionPAuthLR()) { CFIBuilder.buildNegateRAStateWithPC(); diff --git a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll index 67bff0b15fd49..fac71d5ec4a59 100644 --- a/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll +++ b/llvm/test/CodeGen/AArch64/pauth-lr-tail-call-fpdiff.ll @@ -66,18 +66,15 @@ define swifttailcc void @tail_call_fpdiff_a_key(ptr swiftasync %ctx) "branch-pro ; COMPAT-NEXT: adrp x15, .Ltmp0 ; COMPAT-NEXT: add x15, x15, :lo12:.Ltmp0 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #12 ; V83A-NEXT: adrp x15, .Ltmp0 ; V83A-NEXT: add x15, x15, :lo12:.Ltmp0 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autia1716 ; V9A-NEXT: adrp x15, .Ltmp0 ; V9A-NEXT: add x15, x15, :lo12:.Ltmp0 -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autia171615 ; PAUTH-NEXT: autia1716 @@ -144,18 +141,15 @@ define swifttailcc void @tail_call_fpdiff_b_key(ptr swiftasync %ctx) "branch-pro ; COMPAT-NEXT: adrp x15, .Ltmp1 ; COMPAT-NEXT: add x15, x15, :lo12:.Ltmp1 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #14 ; V83A-NEXT: adrp x15, .Ltmp1 ; V83A-NEXT: add x15, x15, :lo12:.Ltmp1 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autib1716 ; V9A-NEXT: adrp x15, .Ltmp1 ; V9A-NEXT: add x15, x15, :lo12:.Ltmp1 -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autib171615 ; PAUTH-NEXT: autib1716 @@ -209,16 +203,13 @@ define swifttailcc void @tail_call_no_fpdiff_a_key(ptr swiftasync %ctx) "branch- ; COMPAT-NEXT: adrp x16, .Ltmp2 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp2 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #29 ; V83A-NEXT: adrp x16, .Ltmp2 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp2 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autiasp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autiasppc .Ltmp2 ; PAUTH-NEXT: autiasp @@ -272,16 +263,13 @@ define swifttailcc void @tail_call_no_fpdiff_b_key(ptr swiftasync %ctx) "branch- ; COMPAT-NEXT: adrp x16, .Ltmp3 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp3 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #31 ; V83A-NEXT: adrp x16, .Ltmp3 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp3 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autibsp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autibsppc .Ltmp3 ; PAUTH-NEXT: autibsp @@ -347,18 +335,15 @@ define swifttailcc void @indirect_tail_call_fpdiff_a_key(ptr swiftasync %ctx, pt ; COMPAT-NEXT: adrp x15, .Ltmp4 ; COMPAT-NEXT: add x15, x15, :lo12:.Ltmp4 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #12 ; V83A-NEXT: adrp x15, .Ltmp4 ; V83A-NEXT: add x15, x15, :lo12:.Ltmp4 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autia1716 ; V9A-NEXT: adrp x15, .Ltmp4 ; V9A-NEXT: add x15, x15, :lo12:.Ltmp4 -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autia171615 ; PAUTH-NEXT: autia1716 @@ -426,18 +411,15 @@ define swifttailcc void @indirect_tail_call_fpdiff_b_key(ptr swiftasync %ctx, pt ; COMPAT-NEXT: adrp x15, .Ltmp5 ; COMPAT-NEXT: add x15, x15, :lo12:.Ltmp5 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #14 ; V83A-NEXT: adrp x15, .Ltmp5 ; V83A-NEXT: add x15, x15, :lo12:.Ltmp5 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autib1716 ; V9A-NEXT: adrp x15, .Ltmp5 ; V9A-NEXT: add x15, x15, :lo12:.Ltmp5 -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autib171615 ; PAUTH-NEXT: autib1716 @@ -491,16 +473,13 @@ define swifttailcc void @indirect_tail_call_no_fpdiff_a_key(ptr swiftasync %ctx, ; COMPAT-NEXT: adrp x16, .Ltmp6 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp6 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #29 ; V83A-NEXT: adrp x16, .Ltmp6 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp6 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autiasp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autiasppc .Ltmp6 ; PAUTH-NEXT: autiasp @@ -554,16 +533,13 @@ define swifttailcc void @indirect_tail_call_no_fpdiff_b_key(ptr swiftasync %ctx, ; COMPAT-NEXT: adrp x16, .Ltmp7 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp7 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #31 ; V83A-NEXT: adrp x16, .Ltmp7 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp7 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autibsp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autibsppc .Ltmp7 ; PAUTH-NEXT: autibsp diff --git a/llvm/test/CodeGen/AArch64/swifttail-ptrauth.ll b/llvm/test/CodeGen/AArch64/swifttail-ptrauth.ll index 5d31d7a99ef7b..bf074c4966e4b 100644 --- a/llvm/test/CodeGen/AArch64/swifttail-ptrauth.ll +++ b/llvm/test/CodeGen/AArch64/swifttail-ptrauth.ll @@ -51,16 +51,13 @@ define swifttailcc void @caller_to0_from0() "branch-protection-pauth-lr" "sign-r ; COMPAT-NEXT: adrp x16, .Ltmp0 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp0 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #29 ; V83A-NEXT: adrp x16, .Ltmp0 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp0 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autiasp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autiasppc .Ltmp0 ; PAUTH-NEXT: autiasp @@ -115,16 +112,13 @@ define swifttailcc void @caller_to0_from8([8 x i64], i64) "branch-protection-pau ; COMPAT-NEXT: adrp x16, .Ltmp1 ; COMPAT-NEXT: add x16, x16, :lo12:.Ltmp1 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #29 ; V83A-NEXT: adrp x16, .Ltmp1 ; V83A-NEXT: add x16, x16, :lo12:.Ltmp1 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autiasp -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autiasppc .Ltmp1 ; PAUTH-NEXT: autiasp @@ -178,18 +172,15 @@ define swifttailcc void @caller_to8_from0() "branch-protection-pauth-lr" "sign-r ; COMPAT-NEXT: adrp x15, .Ltmp2 ; COMPAT-NEXT: add x15, x15, :lo12:.Ltmp2 ; COMPAT-NEXT: hint #39 -; COMPAT-NEXT: .cfi_negate_ra_state_with_pc ; COMPAT-NEXT: hint #12 ; V83A-NEXT: adrp x15, .Ltmp2 ; V83A-NEXT: add x15, x15, :lo12:.Ltmp2 ; V83A-NEXT: hint #39 -; V83A-NEXT: .cfi_negate_ra_state_with_pc ; V83A-NEXT: autia1716 ; V9A-NEXT: adrp x15, .Ltmp2 ; V9A-NEXT: add x15, x15, :lo12:.Ltmp2 -; V9A-NEXT: .cfi_negate_ra_state_with_pc ; V9A-NEXT: autia171615 ; PAUTH-NEXT: autia1716 From 6f3c7028ab788a967a00665690f91ab42d20b642 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 24 Jun 2026 15:54:42 -0400 Subject: [PATCH 397/511] [gn build] Port 30abd9ec2b8d (#205658) --- llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn index 183b0ab1f1125..2dd43f2603598 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Utils/BUILD.gn @@ -100,7 +100,6 @@ static_library("Utils") { "StripNonLineTableDebugInfo.cpp", "SymbolRewriter.cpp", "TriggerCrashPass.cpp", - "UnifyFunctionExitNodes.cpp", "UnifyLoopExits.cpp", "Utils.cpp", "VNCoercion.cpp", From e120ceddec106bed6d8a5a9fd33ee46236e3ca14 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 24 Jun 2026 15:55:01 -0400 Subject: [PATCH 398/511] [gn] Fix oversight from cf6527a49669c796b (#205660) Without this, the interception/interception.h include in InstProfilingPlatformROCm.cpp isn't found. --- llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn index 107109cdfd47f..a57499c4a0e71 100644 --- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn +++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn @@ -1,6 +1,9 @@ import("//compiler-rt/target.gni") static_library("profile") { + configs -= [ "//llvm/utils/gn/build:llvm_code" ] + configs += [ "//llvm/utils/gn/build:crt_code" ] + output_dir = crt_current_out_dir if (current_os == "mac") { output_name = "clang_rt.profile_osx" From d6eb61be50fd42cd08758b752f38c72d81fbc0ee Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Wed, 24 Jun 2026 15:55:23 -0400 Subject: [PATCH 399/511] [gn build] Port c997a13812c1 (#205659) --- llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn index 1a3ab9f9412cb..cbeadc80b72a5 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/BUILD.gn @@ -71,6 +71,7 @@ static_library("LLVMHexagonCodeGen") { "HexagonGenPredicate.cpp", "HexagonGlobalRegion.cpp", "HexagonGlobalScheduler.cpp", + "HexagonHVXSaveRemark.cpp", "HexagonHardwareLoops.cpp", "HexagonHazardRecognizer.cpp", "HexagonISelDAGToDAG.cpp", From 29988e30b4f556821bf293900008a376c49d10d5 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Wed, 24 Jun 2026 12:57:20 -0700 Subject: [PATCH 400/511] Revert "[Dexter] Add ability to rewrite scripts to fill-in unknown values" (#205657) Reverts llvm/llvm-project#202799 to fix the failing cross-project-tests::debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp test and unblock pre-commit CI. --- .../dexter/dex/evaluation/ExpectRewriter.py | 212 ------------------ .../dexter/dex/test_script/Script.py | 26 --- .../dexter/dex/tools/ToolBase.py | 3 +- .../dexter/dex/tools/test/Tool.py | 40 +--- .../Inputs/rewrite_expect_list_expected.cpp | 130 ----------- .../Inputs/rewrite_expects_expected.cpp | 53 ----- .../rewrite_multiple_scripts_expected.cpp | 48 ---- .../scripts/rewriting/Inputs/simple_prog.cpp | 7 - .../rewriting/Inputs/whole_file_test.dex | 2 - .../Inputs/whole_file_test_expected.dex | 6 - .../scripts/rewriting/rewrite_expect_list.cpp | 44 ---- .../scripts/rewriting/rewrite_expects.cpp | 54 ----- .../rewriting/rewrite_multiple_scripts.cpp | 48 ---- .../scripts/rewriting/whole_file.test | 24 -- 14 files changed, 7 insertions(+), 690 deletions(-) delete mode 100644 cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp delete mode 100644 cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py b/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py deleted file mode 100644 index 7519d96a02a6d..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/dex/evaluation/ExpectRewriter.py +++ /dev/null @@ -1,212 +0,0 @@ -# DExTer : Debugging Experience Tester -# ~~~~~~ ~ ~~ ~ ~~ -# -# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -"""Utilities for using debugger output to generate expected values that match that output.""" - -from collections import Counter, OrderedDict, defaultdict -from copy import deepcopy -from enum import Enum, IntEnum -from typing import Any, Dict, List, Optional, Set, Tuple, Union - -from dex.dextIR import DextIR, StepIR, ValueIR -from dex.evaluation.StateMatch import get_active_where_matches -from dex.test_script.Nodes import Expect, Then, Value, Where -from dex.test_script.Script import DexterScript, Scope -from dex.tools.Main import Context - - -class ExpectedValueRewriter: - """Given a ValueIR for an Expect, generates a complete expected value that matches that value if one can be - provided.""" - - def __init__(self, expect: Expect, value: ValueIR): - self.expect = expect - self.root_value = value - self.expected_value = expect.get_variable_result(value) - - -def unique_expected_values(elements: List[ExpectedValueRewriter]): - """Given a list of ExpectedValueRewriters, and returns either a list containing the unique set of non-None expected - values, or a single item if there is only one non-duplicated expected value in the list, or None if there are no - valid expected values.""" - - unique_set = set() - result = [] - for element in elements: - expected_value = element.expected_value - if expected_value is None: - continue - if expected_value not in unique_set: - unique_set.add(expected_value) - result.append(expected_value) - if not result: - return None - if len(result) == 1: - return result[0] - return result - - -class StepExpectRewriter: - """Processes all active, unknown expects at a given debugger step and produces ExpectedValueRewriter results for - each.""" - - def __init__(self, step: StepIR, script: DexterScript): - self.step = step - self.script = script - self.state_match = get_active_where_matches(script, step) - active_expects = { - expect - for where_match in self.state_match.values() - for expect in where_match.active_expects - } - self.expect_matches: Dict[Expect, ExpectedValueRewriter] = {} - - def add_expected_values(expect: Expect, expected_value: Any, scope: Scope): - assert isinstance(expect, Value), "Non-Value expects currently unsupported" - if expect in active_expects and expected_value is None: - self.expect_matches[expect] = ExpectedValueRewriter( - expect, step.watches[expect.get_watched_expr()] - ) - - script.visit_script(visit_expect=add_expected_values) - - -class ScriptExpectRewriter: - """Given the full output from a debugger run and a script with missing expected values, returns a script with - filled-in expected values that match the debugger output.""" - - def __init__(self, context: Context, dext_ir: DextIR): - self.context = context - self.dext_ir = dext_ir - self.unknown_expect_rewrites: Dict[ - Expect, List[Tuple[int, ExpectedValueRewriter]] - ] = {} - self.new_script: Optional[DexterScript] = None - self.new_expected_values: Dict[Expect, Any] = {} - self.missing_expect_rewrites: List[Expect] = [] - - script = dext_ir.script - assert ( - script is not None - ), "Cannot use ScriptExpectRewriter on a non-script Dexter test." - - # Collect every Expect with an unknown value into the `unknown_expect_rewrites` dict. We expect all Expects in - # this dict to have observed values, and don't expect to rewrite any Expects outside of this dict. - def collect_unknown_expects(expect: Expect, expected_value: Any, scope: Scope): - assert isinstance(expect, Value), "Non-Value expects currently unsupported" - if expected_value is None: - self.unknown_expect_rewrites[expect] = [] - - script.visit_script(visit_expect=collect_unknown_expects) - - # If there are no expects to update, then there is no rewriting to be done - exit early. - if not self.unknown_expect_rewrites: - return - - # Populate the `unknown_expect_rewrites` dict, mapping each expect with an unknown value to its list of observed - # during this run, along with the corresponding step indices. - self.step_rewriters = [ - StepExpectRewriter(step, script) for step in dext_ir.steps - ] - for step_rewriter in self.step_rewriters: - step_idx = step_rewriter.step.step_index - for expect, expected_value_rewriter in step_rewriter.expect_matches.items(): - self.unknown_expect_rewrites[expect].append( - (step_idx, expected_value_rewriter) - ) - - # For each unknown expect, merge the observed values into a writable "expected values" entry, which may be a - # list or a single value. - self.new_expected_values = { - expect: expected_values - for expect, expect_rewriters in self.unknown_expect_rewrites.items() - if ( - expected_values := unique_expected_values( - [rewriter for idx, rewriter in expect_rewriters] - ) - ) - is not None - } - - # Finally, use the new expected values to rewrite the script. - self.new_script = rewrite_script(script, self.new_expected_values) - self.missing_expect_rewrites = [ - expect - for expect in self.unknown_expect_rewrites - if expect not in self.new_expected_values - ] - - @property - def num_successful_rewrites(self): - return len(self.new_expected_values) - - @property - def num_unsuccessful_rewrites(self): - return len(self.missing_expect_rewrites) - - -def rewrite_script( - script: DexterScript, add_expected_values: Dict[Expect, Any] -) -> DexterScript: - """Given a set of updates to apply to a provided script, returns a copy of the script_obj with the updates - applied. - Does not deep copy, meaning the new script contains the same node objects as the old script; this is safe as we do - not modify these objects.""" - # First build up a map describing the children of every node in the script, adding add_expected_values to the - # required expect nodes. - new_node_child_map = {} - - def replace_where(where: Where, scope: Scope): - if scope.where: - scope_where_children = new_node_child_map.setdefault(scope.where, []) - assert isinstance( - scope_where_children, list - ), f"Unexpected child for !where node: {scope_where_children}" - scope_where_children.append(where) - - def replace_then(then: Then, scope: Scope): - assert ( - scope.where not in new_node_child_map - ), "!then must be the sole child of a state node." - new_node_child_map[scope.where] = then - - def replace_expect(expect: Expect, expected_value, scope: Scope): - new_expected_value = add_expected_values.get(expect) or expected_value - new_node_child_map[expect] = new_expected_value - scope_where_children = new_node_child_map.setdefault(scope.where, []) - assert isinstance( - scope_where_children, list - ), f"Unexpected child for state node {scope.where}: {scope_where_children}" - scope_where_children.append(expect) - - script.visit_script( - visit_where=replace_where, visit_expect=replace_expect, visit_then=replace_then - ) - - # Now rebuild the script object using the two maps. - def build_subscript(node): - """Returns the subset of the script object whose parent is the given node.""" - assert isinstance( - node, (Expect, Where) - ), f"Unexpected script parent node: {node}" - if isinstance(node, Expect): - return new_node_child_map[node] - node_children = new_node_child_map[node] - if isinstance(node_children, Then): - return node_children - assert isinstance( - node_children, List - ), f"Unexpected child for state node {node}: {node_children}" - return {child: build_subscript(child) for child in node_children} - - new_script_obj = {node: build_subscript(node) for node in script.script_obj} - return DexterScript( - script.context, - new_script_obj, - script.root_scope, - script.base_dir, - script.load_context, - ) diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py index 50a5cb48a200e..26a38a604dbe5 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/test_script/Script.py @@ -139,16 +139,6 @@ def get_known_file_for_where(self, where: Where) -> Optional[str]: return next_scope.file -class ScriptLoadContext: - """Contains information about the context that the script was loaded from.""" - - def __init__(self, file: str, lines: List[str], start_line: int, stop_line: int): - self.file = file - self.lines = lines - self.start_line = start_line - self.stop_line = stop_line - - class DexterScript: def __init__( self, @@ -156,12 +146,10 @@ def __init__( script_obj, scope: Scope, source_root_dir: Optional[str], - load_context: ScriptLoadContext, ): self.context = context self.script_obj = script_obj self.root_scope = scope - self.load_context = load_context self.label_dict = LabelDict() assert scope.file is not None self.base_dir = ( @@ -283,7 +271,6 @@ def get_script(context, file, loader, source_root_dir: Optional[str]) -> DexterS try_load_yaml("\n".join(lines), loader), root_scope, source_root_dir, - ScriptLoadContext(file, lines, start_line=0, stop_line=len(lines)), ) except (Error, yaml.YAMLError) as e: raise Error(f"File '{file}' was not a valid Dexter script:\n{e}") @@ -306,7 +293,6 @@ def get_script(context, file, loader, source_root_dir: Optional[str]) -> DexterS ), root_scope, source_root_dir, - ScriptLoadContext(file, lines, start_line, stop_line), ) except (Error, yaml.YAMLError) as e: attempted_scripts.append((start_line, e)) @@ -344,15 +330,3 @@ def check_explicit_files(where: Where, _: Scope): script.visit_script(visit_where=check_explicit_files) return script, source_files - - -def write_dexter_script_file(script: DexterScript) -> str: - load_context = script.load_context - script_lines = script.dump().splitlines(True) - write_lines = ( - load_context.lines[: load_context.start_line] - + script_lines - + ["...\n"] - + load_context.lines[load_context.stop_line :] - ) - return "".join(write_lines) diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py index d54dd3924e407..4b09c134a1b6e 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/tools/ToolBase.py @@ -11,7 +11,6 @@ import tempfile from dex import __version__ -from dex.tools.Main import Context from dex.utils import ExtArgParse from dex.utils import PrettyOutput from dex.utils.ReturnCode import ReturnCode @@ -19,7 +18,7 @@ class ToolBase(object, metaclass=abc.ABCMeta): def __init__(self, context): - self.context: Context = context + self.context = context self.parser = None @abc.abstractproperty diff --git a/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py index cbb894aa4d13a..0c028773ec56c 100644 --- a/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py +++ b/cross-project-tests/debuginfo-tests/dexter/dex/tools/test/Tool.py @@ -24,13 +24,8 @@ ) from dex.dextIR.DextIR import DextIR from dex.evaluation import DebuggerRunMatch -from dex.evaluation.ExpectRewriter import ScriptExpectRewriter from dex.heuristic import Heuristic -from dex.test_script.Script import ( - DexterScript, - get_dexter_script, - write_dexter_script_file, -) +from dex.test_script.Script import get_dexter_script from dex.tools import TestToolBase from dex.utils.Exceptions import DebuggerException from dex.utils.Exceptions import BuildScriptException, HeuristicException @@ -160,6 +155,10 @@ def _init_debugger_controller(self): self.context.options.source_files.extend(list(new_source_files)) + # If we are not running a debugger, return the DextIR instead of a DebuggerController. + if self.context.options.skip_run: + return step_collection + if self.context.options.use_script: debugger_controller = ScriptDebuggerController( self.context, step_collection @@ -181,8 +180,7 @@ def _get_steps(self): if self.context.options.skip_run: self.context.logger.warning("Skipping run...") - assert isinstance(debugger_controller.step_collection, DextIR) - return debugger_controller.step_collection + return debugger_controller debugger_controller = run_debugger_subprocess( debugger_controller, self.context.working_directory.path ) @@ -256,16 +254,6 @@ def _record_structured_script_metric_results( with open(output_json_path, "w") as fp: json.dump(run_match.get_metric_json_output(), fp) - def _write_updated_structured_script( - self, test_name, rewritten_script: DexterScript - ): - """Write out the original script file, modified to replace any unknown expects with the actual observed - values.""" - if self.context.options.results_directory: - output_text_path = self._get_results_path(test_name) - with open(output_text_path, "w", encoding="utf-8") as fp: - fp.write(write_dexter_script_file(rewritten_script)) - def _record_test_and_display(self, test_case): """Output test case to o stream and record test case internally for handling later. @@ -325,22 +313,6 @@ def _run_test(self, test_name): return self._record_steps(test_name, steps) if self.context.options.use_script: - # Before evaluating, the script may contain "unknown" expects; if they should be rewritten, then do so - # first, and then use the rewritten script to evaluate. - script_writer = ScriptExpectRewriter(self.context, steps) - if script_writer.new_script: - self.context.logger.note( - f"Rewrote script to add {script_writer.num_successful_rewrites} expected values." - ) - if script_writer.num_unsuccessful_rewrites: - self.context.logger.error( - f"Failed to rewrite {script_writer.num_unsuccessful_rewrites} expected values." - ) - self._write_updated_structured_script( - test_name, script_writer.new_script - ) - steps.script = script_writer.new_script - # Then evaluate, using the new script if any was produced. run_match = DebuggerRunMatch(self.context, steps) self._record_structured_script_metric_results(test_name, run_match) self._record_successful_test_match(test_name, steps, run_match) diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp deleted file mode 100644 index 6fa8008c8badb..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expect_list_expected.cpp +++ /dev/null @@ -1,130 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expect_list_expected.cpp - -/// Test that Dexter can write lists of expected values for simple scalar -/// variables. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 3 expected values. - -// CHECK: total_watched_steps: 90 -// CHECK: correct_steps: 90 -// CHECK: incorrect_steps: 0 -// CHECK: seen_values: 86 -// CHECK: missing_values: 0 - -int main() { - int prev = 0; - int current = 0; - int next = 1; - for (int i = 0; i < 30; ++i) { - prev = current; // !dex_label loop - current = next; - next = prev + current; - } - return current; -} - -/* ---- -? !where {lines: !label 'loop'} -: !value 'prev': - - '0' - - '1' - - '2' - - '3' - - '5' - - '8' - - '13' - - '21' - - '34' - - '55' - - '89' - - '144' - - '233' - - '377' - - '610' - - '987' - - '1597' - - '2584' - - '4181' - - '6765' - - '10946' - - '17711' - - '28657' - - '46368' - - '75025' - - '121393' - - '196418' - - '317811' - !value 'current': - - '0' - - '1' - - '2' - - '3' - - '5' - - '8' - - '13' - - '21' - - '34' - - '55' - - '89' - - '144' - - '233' - - '377' - - '610' - - '987' - - '1597' - - '2584' - - '4181' - - '6765' - - '10946' - - '17711' - - '28657' - - '46368' - - '75025' - - '121393' - - '196418' - - '317811' - - '514229' - !value 'next': - - '1' - - '2' - - '3' - - '5' - - '8' - - '13' - - '21' - - '34' - - '55' - - '89' - - '144' - - '233' - - '377' - - '610' - - '987' - - '1597' - - '2584' - - '4181' - - '6765' - - '10946' - - '17711' - - '28657' - - '46368' - - '75025' - - '121393' - - '196418' - - '317811' - - '514229' - - '832040' -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp deleted file mode 100644 index e3852874b2ae3..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_expects_expected.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expects_expected.cpp - -/// Test that when we have a Dexter test with missing/unknown expected values, -/// Dexter produces a modified test file that is identical except for a modified -/// script section. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 6 expected values. -// CHECK: Failed to rewrite 2 expected values. - -// CHECK: total_watched_steps: 7 -// CHECK: correct_steps: 6 -// CHECK: incorrect_steps: 1 -// CHECK: seen_values: 6 -// CHECK: missing_values: 2 - -int multiply(int b, int a) { - int result = a * b; - return result; // !dex_label mul_ret -} - -int main() { - int a = 6; - int b = 7; - int c = multiply(a, b); - return c; // !dex_label main_ret -} -// !dex_label never_reached -/* ---- -? !where {lines: !label 'mul_ret'} -: !value 'a': '7' - !value 'b': '6' - !value 'result': '42' -? !where {lines: !label 'main_ret'} -: !value 'a': '6' - !value 'b': '7' - !value 'c': '42' - !value 'not_real': null -? !where {lines: !label 'never_reached'} -: !value 'a': null -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp deleted file mode 100644 index d14e8224635b8..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/rewrite_multiple_scripts_expected.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} \ -// RUN: %S/Inputs/rewrite_multiple_scripts_expected.cpp - -/// Test that when a file contains more than one valid YAML script (but only one -/// Dexter script), the existing YAML is printed correctly. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 1 expected values. - -// CHECK: total_watched_steps: 1 -// CHECK: correct_steps: 1 -// CHECK: incorrect_steps: 0 -// CHECK: seen_values: 1 -// CHECK: missing_values: 0 - -/* ---- -hr: # 1998 hr ranking -- Mark McGwire -- Sammy Sosa -# 1998 rbi ranking -rbi: -- Sammy Sosa -- Ken Griffey -... -*/ - -int main() { - int ret = 0; - return ret; // !dex_label ret -} - -/* ---- -? !where {lines: !label 'ret'} -: !value 'ret': '0' -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp deleted file mode 100644 index 72567b9db3e51..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/simple_prog.cpp +++ /dev/null @@ -1,7 +0,0 @@ - -int main() { - int i = 0; - i += 1; // !dex_label start - i += 1; - return i; // !dex_label end -} diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex deleted file mode 100644 index 4501532871cc9..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test.dex +++ /dev/null @@ -1,2 +0,0 @@ -!where {file: 'simple_prog.cpp', lines: !range [!label start, !label end]}: - ? !value i \ No newline at end of file diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex deleted file mode 100644 index c1cf5e87915a5..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/Inputs/whole_file_test_expected.dex +++ /dev/null @@ -1,6 +0,0 @@ -? !where {file: simple_prog.cpp, lines: !range [!label 'start', !label 'end']} -: !value 'i': - - '0' - - '1' - - '2' -... diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp deleted file mode 100644 index ad0584949c6c9..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expect_list.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expect_list_expected.cpp - -/// Test that Dexter can write lists of expected values for simple scalar -/// variables. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 3 expected values. - -// CHECK: total_watched_steps: 90 -// CHECK: correct_steps: 90 -// CHECK: incorrect_steps: 0 -// CHECK: seen_values: 86 -// CHECK: missing_values: 0 - -int main() { - int prev = 0; - int current = 0; - int next = 1; - for (int i = 0; i < 30; ++i) { - prev = current; // !dex_label loop - current = next; - next = prev + current; - } - return current; -} - -/* ---- -!where {lines: !label loop}: - ? !value prev - ? !value current - ? !value next -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp deleted file mode 100644 index bc5bae6df72fb..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_expects.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} %S/Inputs/rewrite_expects_expected.cpp - -/// Test that when we have a Dexter test with missing/unknown expected values, -/// Dexter produces a modified test file that is identical except for a modified -/// script section. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 6 expected values. -// CHECK: Failed to rewrite 2 expected values. - -// CHECK: total_watched_steps: 7 -// CHECK: correct_steps: 6 -// CHECK: incorrect_steps: 1 -// CHECK: seen_values: 6 -// CHECK: missing_values: 2 - -int multiply(int b, int a) { - int result = a * b; - return result; // !dex_label mul_ret -} - -int main() { - int a = 6; - int b = 7; - int c = multiply(a, b); - return c; // !dex_label main_ret -} -// !dex_label never_reached -/* ---- -# Comments in the Dexter script are not preserved. -!where {lines: !label mul_ret}: - ? !value a - ? !value b - ? !value result -!where {lines: !label main_ret}: - ? !value a - ? !value b - ? !value c - ? !value not_real -!where {lines: !label never_reached}: - ? !value a -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp deleted file mode 100644 index 6507a6ff746db..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/rewrite_multiple_scripts.cpp +++ /dev/null @@ -1,48 +0,0 @@ -// RUN: rm -rf %t -// RUN: mkdir %t -// RUN: %dexter_regression_test_cxx_build %s -o %t/test -// RUN: %dexter_regression_test_run --use-script --binary %t/test \ -// RUN: --results-directory %t/results -- %s 2>&1 | FileCheck %s -// RUN: diff %t/results/%{s:basename} \ -// RUN: %S/Inputs/rewrite_multiple_scripts_expected.cpp - -/// Test that when a file contains more than one valid YAML script (but only one -/// Dexter script), the existing YAML is printed correctly. - -/// NB: The exact contents of this file are compared against the expect file in -/// the Inputs/ directory; any changes to this file, including comments, -/// will require updating the corresponding expected file. -/// Although we perform an exact file comparison, we use `diff` over `cmp` -/// for more legible lit output. - -// CHECK: Rewrote script to add 1 expected values. - -// CHECK: total_watched_steps: 1 -// CHECK: correct_steps: 1 -// CHECK: incorrect_steps: 0 -// CHECK: seen_values: 1 -// CHECK: missing_values: 0 - -/* ---- -hr: # 1998 hr ranking -- Mark McGwire -- Sammy Sosa -# 1998 rbi ranking -rbi: -- Sammy Sosa -- Ken Griffey -... -*/ - -int main() { - int ret = 0; - return ret; // !dex_label ret -} - -/* ---- -!where {lines: !label ret}: - ? !value ret -... -*/ diff --git a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test b/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test deleted file mode 100644 index 97dee0168b824..0000000000000 --- a/cross-project-tests/debuginfo-tests/dexter/feature_tests/scripts/rewriting/whole_file.test +++ /dev/null @@ -1,24 +0,0 @@ -RUN: rm -rf %t -RUN: mkdir %t -RUN: %dexter_regression_test_cxx_build %S/Inputs/simple_prog.cpp -o %t/test -RUN: %dexter_regression_test_run --use-script --binary %t/test \ -RUN: --results-directory %t/results --source-root-dir %S/Inputs -- \ -RUN: %S/Inputs/whole_file_test.dex 2>&1 | FileCheck %s -RUN: diff %t/results/whole_file_test.dex %S/Inputs/whole_file_test_expected.dex - -Test that script rewriting still works when the test file is pure YAML, rather -than a YAML Dexter script embedded in another file. - -NB: The exact contents of this file are compared against the expect file in - the Inputs/ directory; any changes to this file, including comments, - will require updating the corresponding expected file. - Although we perform an exact file comparison, we use `diff` over `cmp` - for more legible lit output. - -CHECK: Rewrote script to add 1 expected values. - -CHECK: total_watched_steps: 3 -CHECK: correct_steps: 3 -CHECK: incorrect_steps: 0 -CHECK: seen_values: 3 -CHECK: missing_values: 0 \ No newline at end of file From e7c8e0217919bd2eae9e520f3a41f4261d1f4439 Mon Sep 17 00:00:00 2001 From: Aditya Gupta Date: Wed, 24 Jun 2026 15:59:38 -0400 Subject: [PATCH 401/511] [CodeGen] Add v2048bf16 vector type (#202386) Define v2048bf16 vector type in ValueTypes.td. This is useful for targets that support packed bf16 operations with large vector widths (e.g. Google TPU). --- llvm/include/llvm/CodeGen/ValueTypes.td | 1 + llvm/test/TableGen/CPtrWildcard.td | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td index 34602fcfa4460..4709a7d72ba8f 100644 --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -213,6 +213,7 @@ def v64bf16 : VTVec<64, bf16>; // 64 x bf16 vector value def v128bf16 : VTVec<128, bf16>; // 128 x bf16 vector value def v256bf16 : VTVec<256, bf16>; // 256 x bf16 vector value def v512bf16 : VTVec<512, bf16>; // 512 x bf16 vector value +def v2048bf16 : VTVec<2048, bf16>; // 2048 x bf16 vector value def v4096bf16 : VTVec<4096, bf16>; // 4096 x bf16 vector value def v1f32 : VTVec<1, f32>; // 1 x f32 vector value diff --git a/llvm/test/TableGen/CPtrWildcard.td b/llvm/test/TableGen/CPtrWildcard.td index 6b1312b6a1fe2..082e510f5a595 100644 --- a/llvm/test/TableGen/CPtrWildcard.td +++ b/llvm/test/TableGen/CPtrWildcard.td @@ -8,13 +8,13 @@ // CHECK-NEXT:/* 3*/ OPC_CheckChild0Integer, [[#]], // CHECK-NEXT:/* 5*/ OPC_RecordChild1, // #0 = $src // CHECK-NEXT:/* 6*/ OPC_Scope /*2 children */, 9, // ->17 -// CHECK-NEXT:/* 8*/ OPC_CheckChild1Type, /*MVT::c64*/5|128,2/*261*/, +// CHECK-NEXT:/* 8*/ OPC_CheckChild1Type, /*MVT::c64*/6|128,2/*262*/, // CHECK-NEXT:/* 11*/ OPC_MorphNodeTo1None, TARGET_VAL(MyTarget::C64_TO_I64), // CHECK-NEXT: MVT::i64, 1/*#Ops*/, /*OperandList*/0, // Ops = #0 // CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } [[#]]:{ *:[iPTR] }, c64:{ *:[c64] }:$src) - Complexity = 8 // CHECK-NEXT: // Dst: (C64_TO_I64:{ *:[i64] } ?:{ *:[c64] }:$src) // CHECK-NEXT:/* 17*/ /*Scope*/ 9, // ->27 -// CHECK-NEXT:/* 18*/ OPC_CheckChild1Type, /*MVT::c128*/6|128,2/*262*/, +// CHECK-NEXT:/* 18*/ OPC_CheckChild1Type, /*MVT::c128*/7|128,2/*263*/, // CHECK-NEXT:/* 21*/ OPC_MorphNodeTo1None, TARGET_VAL(MyTarget::C128_TO_I64), // CHECK-NEXT: MVT::i64, 1/*#Ops*/, /*OperandList*/0, // Ops = #0 // CHECK-NEXT: // Src: (intrinsic_wo_chain:{ *:[i64] } [[#]]:{ *:[iPTR] }, c128:{ *:[c128] }:$src) - Complexity = 8 From b17c55f8399f9d45e89df07cc59133e958cb410e Mon Sep 17 00:00:00 2001 From: Prabhu Rajasekaran Date: Wed, 24 Jun 2026 13:01:28 -0700 Subject: [PATCH 402/511] [libc][math] Fix MPI_OVER_2 constant in atan2f_float (#205621) Fix a typo in the size-optimized float-float implementation of atan2f where MPI_OVER_2 (-pi/2) was defined identically to PI_OVER_2 (pi/2). This caused atan2f to return incorrect values (off by pi) on targets where the float-float path is selected (e.g. baremetal ARM Cortex-M with hardware FMA/floating-point support) for inputs requiring reciprocal range reduction, such as atan2f(10.0f, 1.0f). --- libc/src/__support/math/atan2f_float.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/math/atan2f_float.h b/libc/src/__support/math/atan2f_float.h index 1f44b67749964..744783e855cef 100644 --- a/libc/src/__support/math/atan2f_float.h +++ b/libc/src/__support/math/atan2f_float.h @@ -134,7 +134,7 @@ LIBC_INLINE constexpr float atan2f(float y, float x) { constexpr FloatFloat MPI = {0x1.777a5cp-24f, -0x1.921fb6p1f}; constexpr FloatFloat PI_OVER_4 = {-0x1.777a5cp-26f, 0x1.921fb6p-1f}; constexpr FloatFloat PI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f}; - constexpr FloatFloat MPI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f}; + constexpr FloatFloat MPI_OVER_2 = {0x1.777a5cp-25f, -0x1.921fb6p0f}; constexpr FloatFloat THREE_PI_OVER_4 = {-0x1.99bc5cp-28f, 0x1.2d97c8p1f}; // Adjustment for constant term: // CONST_ADJ[x_sign][y_sign][recip] From 7af3d611768035aada5b3d9650bca2d5afc8504d Mon Sep 17 00:00:00 2001 From: Scott Manley Date: Wed, 24 Jun 2026 15:07:53 -0500 Subject: [PATCH 403/511] [OpenACC] add acc::ReductionAccumulateArrayOp (#205617) Add an OpenACC Dialect operation to accumulate elements of a (private) array across threads. This operation only specifies the PointerLikeType and an acc::DataBoundsOp to represent the accumulation of the array at a high level. This will ultimately get lowered by "codegen". --- .../mlir/Dialect/OpenACC/OpenACCCGOps.td | 23 +++++++++++++++++++ mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp | 10 ++++++++ mlir/test/Dialect/OpenACC/invalid-cg.mlir | 19 +++++++++++++++ mlir/test/Dialect/OpenACC/ops-cg.mlir | 9 ++++++++ 4 files changed, 61 insertions(+) diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td index 26fb6c10e51e6..70f16d423d1f8 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCCGOps.td @@ -170,6 +170,29 @@ def OpenACC_ReductionAccumulateOp let hasVerifier = 1; } +//===----------------------------------------------------------------------===// +// acc.reduction_accumulate_array +//===----------------------------------------------------------------------===// + +def OpenACC_ReductionAccumulateArrayOp + : OpenACC_Op<"reduction_accumulate_array", []> { + let summary = "Accumulates elements of an array"; + let description = [{ + Accumulates elements of an array across the specified parallel dimension + given an acc.bounds op. + }]; + let arguments = (ins Arg:$memref, + OpenACC_DataBoundsType:$bounds, + OpenACC_ReductionOperatorAttr:$reductionOperator, + OpenACC_GPUParallelDimsAttr:$par_dims); + let assemblyFormat = [{ + $memref `bounds` `(` $bounds `)` $reductionOperator `:` type($memref) attr-dict + }]; + let hasVerifier = 1; +} + //===----------------------------------------------------------------------===// // acc.kernel_environment //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp index a3c43a6867868..953acb7b7826e 100644 --- a/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp +++ b/mlir/lib/Dialect/OpenACC/IR/OpenACCCG.cpp @@ -485,6 +485,16 @@ LogicalResult ReductionAccumulateOp::verify() { return success(); } +//===----------------------------------------------------------------------===// +// ReductionAccumulateArrayOp +//===----------------------------------------------------------------------===// + +LogicalResult ReductionAccumulateArrayOp::verify() { + if (getParDims().getArray().empty()) + return emitOpError("par_dims must specify at least one parallel dimension"); + return success(); +} + //===----------------------------------------------------------------------===// // ReductionCombineOp //===----------------------------------------------------------------------===// diff --git a/mlir/test/Dialect/OpenACC/invalid-cg.mlir b/mlir/test/Dialect/OpenACC/invalid-cg.mlir index da280bda8ea91..b37d2da406ee2 100644 --- a/mlir/test/Dialect/OpenACC/invalid-cg.mlir +++ b/mlir/test/Dialect/OpenACC/invalid-cg.mlir @@ -75,6 +75,25 @@ func.func @reduction_accumulate_empty_par_dims() { // ----- +func.func @reduction_accumulate_array_invalid_operator(%private: memref<4xi32>, %bounds: !acc.data_bounds_ty) { + acc.reduction_accumulate_array %private bounds(%bounds) + : memref<4xi32> {par_dims = #acc} + // expected-error@-2 {{expected ::mlir::acc::ReductionOperator to be one of}} + // expected-error@-3 {{failed to parse OpenACC_ReductionOperatorAttr}} + return +} + +// ----- + +func.func @reduction_accumulate_array_empty_par_dims(%private: memref<4xi32>, %bounds: !acc.data_bounds_ty) { + // expected-error@+1 {{par_dims must specify at least one parallel dimension}} + acc.reduction_accumulate_array %private bounds(%bounds) + : memref<4xi32> {par_dims = #acc} + return +} + +// ----- + func.func @predicate_region_empty() { acc.compute_region { // expected-error@+1 {{region needs to have at least one block}} diff --git a/mlir/test/Dialect/OpenACC/ops-cg.mlir b/mlir/test/Dialect/OpenACC/ops-cg.mlir index 52ca1a38ebb73..6e5582a709bfe 100644 --- a/mlir/test/Dialect/OpenACC/ops-cg.mlir +++ b/mlir/test/Dialect/OpenACC/ops-cg.mlir @@ -320,6 +320,15 @@ func.func @reduction_accumulate_block_thread(%partial: i32, %private: memref, %bounds: !acc.data_bounds_ty) { + acc.reduction_accumulate_array %private bounds(%bounds) : memref<4xi32> {par_dims = #acc} + return +} +// CHECK: acc.reduction_accumulate_array %{{.*}} bounds(%{{.*}}) : memref<4xi32> {par_dims = #acc} + +// ----- + // CHECK-LABEL: func @compute_region_with_results func.func @compute_region_with_results() -> i32 { %w0 = acc.par_width {par_dim = #acc.par_dim} From a22a8e399f1394e8713479c8007cf879589ebc4b Mon Sep 17 00:00:00 2001 From: Jianhui Li Date: Wed, 24 Jun 2026 13:08:47 -0700 Subject: [PATCH 404/511] [mlir][xegpu] adding maybe_usused for unused variable/function when assertion is off (#205629) address the warning brought by the PR https://github.com/llvm/llvm-project/pull/203156 --- mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp index 2a13997aa181f..574eeabac1836 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPULayoutImpl.cpp @@ -372,7 +372,8 @@ void xegpu::removeTemporaryLayoutAttrs(Operation *op) { /// Returns true if every dimension of `shape` except the innermost /// `numInnerDims` is a unit (size-1) dimension. -static bool leadingDimsAreUnit(ArrayRef shape, int numInnerDims) { +[[maybe_unused]] static bool leadingDimsAreUnit(ArrayRef shape, + int numInnerDims) { int numLeading = static_cast(shape.size()) - numInnerDims; if (numLeading <= 0) return true; @@ -1453,7 +1454,7 @@ xegpu::setupStoreNdAnchorLayout(xegpu::LayoutKind layoutKind, Type elemTy = srcVecTy.getElementType(); auto subgroupSize = uArch->getSubgroupSize(); auto dataShape = srcVecTy.getShape(); - int rank = srcVecTy.getRank(); + [[maybe_unused]] int rank = srcVecTy.getRank(); assert(rank >= 2 && "Expected at least 2D shape for ND op"); // Compute the default 2D block IO lane layout / lane data. @@ -1506,7 +1507,7 @@ xegpu::setupPrefetchNdAnchorLayout(xegpu::LayoutKind layoutKind, Type elemTy = tdescTy.getElementType(); auto subgroupSize = uArch->getSubgroupSize(); auto dataShape = tdescTy.getShape(); - int rank = tdescTy.getRank(); + [[maybe_unused]] int rank = tdescTy.getRank(); assert(rank >= 2 && "Expected at least 2D shape for ND op"); // Compute the default 2D block IO lane layout / lane data. From 36a17d1aecfe584b38ad874f6092a73dadc1f643 Mon Sep 17 00:00:00 2001 From: Ryan Buchner Date: Wed, 24 Jun 2026 13:24:21 -0700 Subject: [PATCH 405/511] [SLP][NFC] Remove extra checks from analyzeRtStrideCandidate() (#205415) These checks duplicate the functionality of the additional checks added in #204013 --- .../Transforms/Vectorize/SLPVectorizer.cpp | 55 ++++--------------- 1 file changed, 12 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index fbf2394593e17..d3e491247f93f 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -7010,14 +7010,10 @@ static bool isReverseOrder(ArrayRef Order) { /// %x + c_2 * stride /// ... /// ``` -/// where each `c_i` is constant. The `Coeffs` will contain `c_0, c_1, c_2, ..` -/// and the SCEV of the `stride` will be returned. +/// where each `c_i` is constant. The SCEV of the `stride` will be returned. static const SCEV *calculateRtStride(ArrayRef PointerOps, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, - SmallVectorImpl &SortedIndices, - SmallVectorImpl &Coeffs) { - assert(Coeffs.size() == PointerOps.size() && - "Coeffs vector needs to be of correct size"); + SmallVectorImpl &SortedIndices) { SmallVector SCEVs; const SCEV *PtrSCEVLowest = nullptr; const SCEV *PtrSCEVHighest = nullptr; @@ -7080,7 +7076,6 @@ static const SCEV *calculateRtStride(ArrayRef PointerOps, Type *ElemTy, using DistOrdPair = std::pair; auto Compare = llvm::less_first(); std::set Offsets(Compare); - int Cnt = 0; bool IsConsecutive = true; for (const auto [Idx, PtrSCEV] : enumerate(SCEVs)) { unsigned Dist = 0; @@ -7092,36 +7087,27 @@ static const SCEV *calculateRtStride(ArrayRef PointerOps, Type *ElemTy, const auto *SC = dyn_cast(Coeff); if (!SC || isa(SC)) return nullptr; - Coeffs[Idx] = (int64_t)SC->getAPInt().getLimitedValue(); if (!SE.getMinusSCEV(PtrSCEV, SE.getAddExpr(PtrSCEVLowest, SE.getMulExpr(Stride, SC))) ->isZero()) return nullptr; Dist = SC->getAPInt().getZExtValue(); - } else { - Coeffs[Idx] = 0; } // If the strides are not the same or repeated, we can't vectorize. if ((Dist / Size) * Size != Dist || (Dist / Size) >= SCEVs.size()) return nullptr; - auto Res = Offsets.emplace(Dist, Cnt); + auto Res = Offsets.emplace(Dist, Idx); if (!Res.second) return nullptr; // Consecutive order if the inserted element is the last one. IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end(); - ++Cnt; } - if (Offsets.size() != SCEVs.size()) - return nullptr; SortedIndices.clear(); if (!IsConsecutive) { // Fill SortedIndices array only if it is non-consecutive. SortedIndices.resize(PointerOps.size()); - Cnt = 0; - for (const std::pair &Pair : Offsets) { - SortedIndices[Cnt] = Pair.second; - ++Cnt; - } + for (const auto [Idx, Pair] : enumerate(Offsets)) + SortedIndices[Idx] = Pair.second; } return Stride; } @@ -7619,7 +7605,7 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, // `PointerOps` and their indicies in `PointerOps`. SmallDenseMap, SmallVector>> OffsetToPointerOpIdxMap; - // Track to make sure that only VecSz different base pointers are consumed + // Track to make sure that only VecSz different stride multiples are consumed // Prevents cases such as: // 1, x + 0, x + 1, 2x + 0 from being recognized as legal RT strided as there // are 2 "0" and 2 "1" offsets and a stride of "x" between both offsets @@ -7746,13 +7732,9 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, // PointerOps_(NumOffsets - 1)[SortedIndices_(NumOffsets - 1)[VecSz - 1]] = // PointerOps[IndicesInAllPointerOps_(NumOffsets - 1)[VecSz - 1]], // ``` - // In order to be able to generate a strided load, we need the following - // checks to pass: - // - // (1) for each `PointerOps_j` check that the distance - // between adjacent pointers are all equal to the same value (stride). - // (2) for each `PointerOps_j` check that coefficients calculated by - // `calculateRtStride` are all the same. + // In order to be able to generate a strided load, for each `PointerOps_j` + // check that the distance between adjacent pointers are all equal to the same + // value (stride). // // As we do that, also calculate SortedIndices. Since we should not modify // `SortedIndices` unless we know that all the checks succeed, record the @@ -7784,16 +7766,11 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, int64_t LowestOffset = SortedOffsetsV[0]; ArrayRef PointerOps0 = OffsetToPointerOpIdxMap[LowestOffset].first; - SmallVector Coeffs0(VecSz); SmallVector SortedIndicesForOffset0; - const SCEV *Stride0 = calculateRtStride(PointerOps0, BaseTy, *DL, *SE, - SortedIndicesForOffset0, Coeffs0); + const SCEV *Stride0 = + calculateRtStride(PointerOps0, BaseTy, *DL, *SE, SortedIndicesForOffset0); if (!Stride0) return false; - unsigned NumCoeffs0 = Coeffs0.size(); - if (NumCoeffs0 * NumOffsets != Sz) - return false; - sort(Coeffs0); ArrayRef IndicesInAllPointerOps0 = OffsetToPointerOpIdxMap[LowestOffset].second; @@ -7801,11 +7778,8 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, // Now that we know what the common stride and coefficients has to be check // the remaining `PointerOps_j`. - SmallVector Coeffs; SmallVector SortedIndicesForOffset; for (int J : seq(1, NumOffsets)) { - Coeffs.clear(); - Coeffs.resize(VecSz); SortedIndicesForOffset.clear(); int64_t Offset = SortedOffsetsV[J]; @@ -7814,15 +7788,10 @@ bool BoUpSLP::analyzeRtStrideCandidate(ArrayRef PointerOps, ArrayRef IndicesInAllPointerOps = OffsetToPointerOpIdxMap[Offset].second; const SCEV *StrideWithinGroup = calculateRtStride( - PointerOpsForOffset, BaseTy, *DL, *SE, SortedIndicesForOffset, Coeffs); + PointerOpsForOffset, BaseTy, *DL, *SE, SortedIndicesForOffset); if (!StrideWithinGroup || StrideWithinGroup != Stride0) return false; - if (Coeffs.size() != NumCoeffs0) - return false; - sort(Coeffs); - if (Coeffs != Coeffs0) - return false; UpdateSortedIndices(SortedIndicesForOffset, IndicesInAllPointerOps, J); } From ccf694463570357d36f0e1b3622226bdac7482d4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 24 Jun 2026 22:28:18 +0200 Subject: [PATCH 406/511] [VPlan] Remove unused VPlan::getExitBlock(BasicBlock*) (NFC) (#205645) Remove unused function. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 8 -------- llvm/lib/Transforms/Vectorize/VPlan.h | 4 ---- 2 files changed, 12 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 5f29f329baf8c..96a563ee3b45c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -899,14 +899,6 @@ VPlan::~VPlan() { delete BackedgeTakenCount; } -VPIRBasicBlock *VPlan::getExitBlock(BasicBlock *IRBB) const { - auto Iter = find_if(getExitBlocks(), [IRBB](const VPIRBasicBlock *VPIRBB) { - return VPIRBB->getIRBasicBlock() == IRBB; - }); - assert(Iter != getExitBlocks().end() && "no exit block found"); - return *Iter; -} - bool VPlan::isExitBlock(VPBlockBase *VPBB) { return is_contained(ExitBlocks, VPBB); } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index f73118ac31797..bd7f87dbb3a5d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -4913,10 +4913,6 @@ class VPlan { /// the original scalar loop. ArrayRef getExitBlocks() const { return ExitBlocks; } - /// Return the VPIRBasicBlock corresponding to \p IRBB. \p IRBB must be an - /// exit block. - VPIRBasicBlock *getExitBlock(BasicBlock *IRBB) const; - /// Returns true if \p VPBB is an exit block. bool isExitBlock(VPBlockBase *VPBB); From 52e36999f97bcd5d434e1960319aae500903ec47 Mon Sep 17 00:00:00 2001 From: nvptm Date: Wed, 24 Jun 2026 13:33:19 -0700 Subject: [PATCH 407/511] [flang] Relax ignore_tkr(c) for assumed-type BIND(C) descriptor dummies (#205445) Relax `F2023 15.5.2.5 p2` derived-type checks for assumed-type ignore_tkr(c) dummies passed by descriptor to bind(C) procedures. This will allow passing a derived type with a type-bound procedure or FINAL or a parameterized derived type to a TYPE(*), dimension(..) bind(C) dummy. --- flang/docs/Directives.md | 6 ++ flang/lib/Semantics/check-call.cpp | 51 ++++++++++------- .../Semantics/call03-ignore-tkr-c-relaxed.f90 | 40 +++++++++++++ .../Semantics/call03-ignore-tkr-c-strict.f90 | 56 +++++++++++++++++++ 4 files changed, 132 insertions(+), 21 deletions(-) create mode 100644 flang/test/Semantics/call03-ignore-tkr-c-relaxed.f90 create mode 100644 flang/test/Semantics/call03-ignore-tkr-c-strict.f90 diff --git a/flang/docs/Directives.md b/flang/docs/Directives.md index 385d44b7ced07..45080acb778e3 100644 --- a/flang/docs/Directives.md +++ b/flang/docs/Directives.md @@ -27,6 +27,12 @@ A list of non-standard directives supported by Flang When the dummy argument is not passed by descriptor (e.g., an assumed-size array in a BIND(C) interface), the base address is extracted from the actual argument's descriptor and passed as a raw pointer. + When the dummy argument is assumed-type (`TYPE(*)`) and passed by descriptor + to a `BIND(C)` procedure, (C) also disables the F2023 15.5.2.5 p2 checks + that would otherwise reject actual arguments whose derived type has type + parameters, type-bound procedures, or final procedures; this is intended for + `BIND(C)` interfaces where the implementation treats the argument as an opaque + CFI descriptor at the call site. The letter (P) ignores pointer and allocatable matching, so that one can pass an allocatable array to routine with pointer array argument and vice versa. The letter (M) disables matching of the actual argument's CUDA storage diff --git a/flang/lib/Semantics/check-call.cpp b/flang/lib/Semantics/check-call.cpp index 97bb346cc72bb..f91b9b1d0b67d 100644 --- a/flang/lib/Semantics/check-call.cpp +++ b/flang/lib/Semantics/check-call.cpp @@ -517,27 +517,36 @@ static void CheckExplicitDataArg(const characteristics::DummyDataObject &dummy, actualFirstSymbol && actualFirstSymbol->attrs().test(Attr::VOLATILE)}; if (actualDerived && !actualDerived->IsVectorType()) { if (dummy.type.type().IsAssumedType()) { - if (!actualDerived->parameters().empty()) { // 15.5.2.4(2) - messages.Say( - "Actual argument associated with TYPE(*) %s may not have a parameterized derived type"_err_en_US, - dummyName); - } - if (const Symbol * - tbp{FindImmediateComponent(*actualDerived, [](const Symbol &symbol) { - return symbol.has(); - })}) { // 15.5.2.4(2) - evaluate::SayWithDeclaration(messages, *tbp, - "Actual argument associated with TYPE(*) %s may not have type-bound procedure '%s'"_err_en_US, - dummyName, tbp->name()); - } - auto finals{FinalsForDerivedTypeInstantiation(*actualDerived)}; - if (!finals.empty()) { // 15.5.2.4(2) - SourceName name{finals.front()->name()}; - if (auto *msg{messages.Say( - "Actual argument associated with TYPE(*) %s may not have derived type '%s' with FINAL subroutine '%s'"_err_en_US, - dummyName, actualDerived->typeSymbol().name(), name)}) { - msg->Attach(name, "FINAL subroutine '%s' in derived type '%s'"_en_US, - name, actualDerived->typeSymbol().name()); + // Assumed-type dummies with ignore_tkr(c) passed via descriptor to + // bind(C) procedures model opaque CFI argument passing; the callee does + // not access derived-type structure as TYPE(*). + const bool relaxAssumedTypeDerivedChecks{procedure.IsBindC() && + dummy.ignoreTKR.test(common::IgnoreTKR::Contiguous) && + dummy.IsPassedByDescriptor(/*isBindC=*/true)}; + if (!relaxAssumedTypeDerivedChecks) { + if (!actualDerived->parameters().empty()) { // F2023 15.5.2.5 p2 + messages.Say( + "Actual argument associated with TYPE(*) %s may not have a parameterized derived type"_err_en_US, + dummyName); + } + if (const Symbol *tbp{FindImmediateComponent( + *actualDerived, [](const Symbol &symbol) { + return symbol.has(); + })}) { // F2023 15.5.2.5 p2 + evaluate::SayWithDeclaration(messages, *tbp, + "Actual argument associated with TYPE(*) %s may not have type-bound procedure '%s'"_err_en_US, + dummyName, tbp->name()); + } + auto finals{FinalsForDerivedTypeInstantiation(*actualDerived)}; + if (!finals.empty()) { // F2023 15.5.2.5 p2 + SourceName name{finals.front()->name()}; + if (auto *msg{messages.Say( + "Actual argument associated with TYPE(*) %s may not have derived type '%s' with FINAL subroutine '%s'"_err_en_US, + dummyName, actualDerived->typeSymbol().name(), name)}) { + msg->Attach(name, + "FINAL subroutine '%s' in derived type '%s'"_en_US, name, + actualDerived->typeSymbol().name()); + } } } } diff --git a/flang/test/Semantics/call03-ignore-tkr-c-relaxed.f90 b/flang/test/Semantics/call03-ignore-tkr-c-relaxed.f90 new file mode 100644 index 0000000000000..922f97118e550 --- /dev/null +++ b/flang/test/Semantics/call03-ignore-tkr-c-relaxed.f90 @@ -0,0 +1,40 @@ +! RUN: %flang_fc1 -fsyntax-only %s +! Test that ignore_tkr(c) on an assumed-type bind(C) descriptor dummy +! relaxes F2023 15.5.2.5 p2 restrictions for opaque CFI argument passing. + +module m + type :: tbp + contains + procedure :: binding => subr + end type + type :: pdt(n) + integer, len :: n + end type + type :: final_typ + contains + final :: cleanup + end type + + contains + + subroutine subr(this) + class(tbp), intent(in) :: this + end subroutine + subroutine cleanup(this) + type(final_typ), intent(inout) :: this + end subroutine + subroutine cfi(x) bind(c) + type(*), dimension(..) :: x +!dir$ ignore_tkr(c) x + end subroutine +end module + +program main + use m + type(tbp) :: x + type(pdt(1)) :: y + type(final_typ) :: z + call cfi(x) + call cfi(y) + call cfi(z) +end program diff --git a/flang/test/Semantics/call03-ignore-tkr-c-strict.f90 b/flang/test/Semantics/call03-ignore-tkr-c-strict.f90 new file mode 100644 index 0000000000000..3e379d82996e1 --- /dev/null +++ b/flang/test/Semantics/call03-ignore-tkr-c-strict.f90 @@ -0,0 +1,56 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -pedantic +! Assumed-type dummies must enforce F2023 15.5.2.5 p2 for derived types with +! PDT, TBP, or FINAL unless bind(C), ignore_tkr(c), and descriptor passing +! all apply. + +module m + type :: tbp + contains + procedure :: binding => subr + end type + type :: pdt(n) + integer, len :: n + end type + type :: final_typ + contains + final :: cleanup + end type + + contains + + subroutine subr(this) + class(tbp), intent(in) :: this + end subroutine + subroutine cleanup(this) + type(final_typ), intent(inout) :: this + end subroutine + subroutine cfi(x) bind(c) + type(*), dimension(..) :: x + end subroutine + subroutine not_cfi(x) + type(*), dimension(..) :: x +!dir$ ignore_tkr(c) x + end subroutine + subroutine not_descriptor(x) bind(c) + type(*) :: x(*) +!dir$ ignore_tkr(c) x + end subroutine +end module + +program main + use m + type(tbp) :: x + type(tbp), dimension(1) :: arr + type(pdt(1)) :: y + type(final_typ) :: z + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have type-bound procedure 'binding' + call cfi(x) + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have a parameterized derived type + call cfi(y) + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have derived type 'final_typ' with FINAL subroutine 'cleanup' + call cfi(z) + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have type-bound procedure 'binding' + call not_cfi(x) + !ERROR: Actual argument associated with TYPE(*) dummy argument 'x=' may not have type-bound procedure 'binding' + call not_descriptor(arr) +end program From 28f6e1af2007ca193b9661614eb0072e8f48fc8d Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Thu, 25 Jun 2026 05:37:57 +0900 Subject: [PATCH 408/511] [SYCL] Disable InOrderQueueCrossDepsShortcutFuncs test on Win (#22424) See https://github.com/intel/llvm/issues/22412 Signed-off-by: Nick Sarnie --- sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp index 8e365878098ad..a46e8a9691867 100644 --- a/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp +++ b/sycl/unittests/scheduler/InOrderQueueHostTaskDeps.cpp @@ -127,7 +127,12 @@ TEST_F(SchedulerTest, InOrderQueueCrossDeps) { EXPECT_EQ(std::get<1>(ExecutedCommands[2]) /*EventsCount*/, 0u); } +#ifdef _WIN32 +// https://github.com/intel/llvm/issues/22412 +TEST_F(SchedulerTest, DISABLED_InOrderQueueCrossDepsShortcutFuncs) { +#else TEST_F(SchedulerTest, InOrderQueueCrossDepsShortcutFuncs) { +#endif ExecutedCommands.clear(); sycl::unittest::UrMock<> Mock; mock::getCallbacks().set_before_callback( From befc9691d50ea50731486652bc9f4433c4830dd4 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 24 Jun 2026 22:54:15 +0200 Subject: [PATCH 409/511] [LV] Round outer-loop VPlan VF down to a power of two (#205646) computeVPlanOuterloopVF computes the VF as max(1, RegSize / WidestType). WidestType may not be a power of two, which resulted in a non-power-of-2 VF. Round down like in the inner loop path. --- .../Vectorize/LoopVectorizationPlanner.cpp | 5 +- .../X86/outer-loop-non-power-of-2-type.ll | 95 +++++++++++++++++++ 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/LoopVectorize/X86/outer-loop-non-power-of-2-type.ll diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp index 509fe991d2652..dbb5ad28fb4ed 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.cpp @@ -808,7 +808,10 @@ VFSelectionContext::computeVPlanOuterloopVF(ElementCount UserVF) { : TargetTransformInfo::RGK_FixedWidthVector; TypeSize RegSize = TTI.getRegisterBitWidth(RegKind); - unsigned N = std::max(1, RegSize.getKnownMinValue() / WidestType); + // The widest type may be wider than the register width and WidestType may + // not be a power of two; round the element count down to a power of two. + unsigned N = std::max( + 1, llvm::bit_floor(RegSize.getKnownMinValue() / WidestType)); VF = ElementCount::get(N, RegSize.isScalable()); LLVM_DEBUG(dbgs() << "LV: VPlan computed VF " << VF << ".\n"); diff --git a/llvm/test/Transforms/LoopVectorize/X86/outer-loop-non-power-of-2-type.ll b/llvm/test/Transforms/LoopVectorize/X86/outer-loop-non-power-of-2-type.ll new file mode 100644 index 0000000000000..d09f42ff558db --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/outer-loop-non-power-of-2-type.ll @@ -0,0 +1,95 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 6 +; RUN: opt -passes=loop-vectorize -enable-vplan-native-path -S < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +define void @outer_with_i20_access(ptr %a, i64 %n, i64 %m) { +; CHECK-LABEL: define void @outer_with_i20_access( +; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]], i64 [[M:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[M]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[OUTER_LATCH3:.*]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[OUTER_LATCH3]] ] +; CHECK-NEXT: br label %[[INNER1:.*]] +; CHECK: [[INNER1]]: +; CHECK-NEXT: [[J2:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP3:%.*]], %[[INNER1]] ] +; CHECK-NEXT: [[TMP0:%.*]] = mul <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i64> [[TMP0]], [[J2]] +; CHECK-NEXT: [[WIDE_GEP:%.*]] = getelementptr i20, ptr [[A]], <4 x i64> [[TMP1]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i20> @llvm.masked.gather.v4i20.v4p0(<4 x ptr> align 4 [[WIDE_GEP]], <4 x i1> splat (i1 true), <4 x i20> poison) +; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i20> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: call void @llvm.masked.scatter.v4i20.v4p0(<4 x i20> [[TMP2]], <4 x ptr> align 4 [[WIDE_GEP]], <4 x i1> splat (i1 true)) +; CHECK-NEXT: [[TMP3]] = add <4 x i64> [[J2]], splat (i64 1) +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[TMP3]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[OUTER_LATCH3]], label %[[INNER1]] +; CHECK: [[OUTER_LATCH3]]: +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4) +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[OUTER:.*]] +; CHECK: [[OUTER]]: +; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[I_NEXT:%.*]], %[[OUTER_LATCH:.*]] ] +; CHECK-NEXT: br label %[[INNER:.*]] +; CHECK: [[INNER]]: +; CHECK-NEXT: [[J:%.*]] = phi i64 [ 0, %[[OUTER]] ], [ [[J_NEXT:%.*]], %[[INNER]] ] +; CHECK-NEXT: [[IDX:%.*]] = mul i64 [[I]], [[M]] +; CHECK-NEXT: [[IDX2:%.*]] = add i64 [[IDX]], [[J]] +; CHECK-NEXT: [[G:%.*]] = getelementptr i20, ptr [[A]], i64 [[IDX2]] +; CHECK-NEXT: [[L:%.*]] = load i20, ptr [[G]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add i20 [[L]], [[L]] +; CHECK-NEXT: store i20 [[ADD]], ptr [[G]], align 4 +; CHECK-NEXT: [[J_NEXT]] = add i64 [[J]], 1 +; CHECK-NEXT: [[EC_INNER:%.*]] = icmp eq i64 [[J_NEXT]], [[M]] +; CHECK-NEXT: br i1 [[EC_INNER]], label %[[OUTER_LATCH]], label %[[INNER]] +; CHECK: [[OUTER_LATCH]]: +; CHECK-NEXT: [[I_NEXT]] = add i64 [[I]], 1 +; CHECK-NEXT: [[EC_OUTER:%.*]] = icmp eq i64 [[I_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[EC_OUTER]], label %[[EXIT]], label %[[OUTER]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %outer + +outer: + %i = phi i64 [ 0, %entry ], [ %i.next, %outer.latch ] + br label %inner + +inner: + %j = phi i64 [ 0, %outer ], [ %j.next, %inner ] + %idx = mul i64 %i, %m + %idx2 = add i64 %idx, %j + %g = getelementptr i20, ptr %a, i64 %idx2 + %l = load i20, ptr %g + %add = add i20 %l, %l + store i20 %add, ptr %g + %j.next = add i64 %j, 1 + %ec.inner = icmp eq i64 %j.next, %m + br i1 %ec.inner, label %outer.latch, label %inner + +outer.latch: + %i.next = add i64 %i, 1 + %ec.outer = icmp eq i64 %i.next, %n + br i1 %ec.outer, label %exit, label %outer, !llvm.loop !0 + +exit: + ret void +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.vectorize.enable", i1 true} From 2c2bdd0dfaeec193fb959c6e9da53c2df892f2d7 Mon Sep 17 00:00:00 2001 From: Dave Lee Date: Wed, 24 Jun 2026 13:57:25 -0700 Subject: [PATCH 410/511] [lldb] Remove unused MemorySize methods (NFC) (#205463) These `MemorySize` methods are unused. There are two remaining which are used: `FileSpec::MemorySize` and `ConstString::MemorySize`. --- lldb/include/lldb/Core/Address.h | 6 ------ lldb/include/lldb/Core/AddressRange.h | 10 ---------- lldb/include/lldb/Core/Declaration.h | 8 -------- lldb/include/lldb/Core/Mangled.h | 10 ---------- lldb/include/lldb/Symbol/Block.h | 9 --------- lldb/include/lldb/Symbol/Function.h | 24 ------------------------ lldb/include/lldb/Symbol/Variable.h | 2 -- lldb/include/lldb/Symbol/VariableList.h | 2 -- lldb/include/lldb/Utility/FileSpecList.h | 10 ---------- lldb/source/Core/Address.cpp | 6 ------ lldb/source/Core/Declaration.cpp | 2 -- lldb/source/Core/Mangled.cpp | 7 ------- lldb/source/Symbol/Block.cpp | 10 ---------- lldb/source/Symbol/Function.cpp | 13 ------------- lldb/source/Symbol/Variable.cpp | 2 -- lldb/source/Symbol/VariableList.cpp | 8 -------- lldb/source/Utility/FileSpecList.cpp | 14 -------------- 17 files changed, 143 deletions(-) diff --git a/lldb/include/lldb/Core/Address.h b/lldb/include/lldb/Core/Address.h index 15fc30a2e3f92..c851237058c73 100644 --- a/lldb/include/lldb/Core/Address.h +++ b/lldb/include/lldb/Core/Address.h @@ -354,12 +354,6 @@ class Address { /// otherwise. bool IsValid() const { return m_offset != LLDB_INVALID_ADDRESS; } - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - size_t MemorySize() const; - /// Resolve a file virtual address using a section list. /// /// Given a list of sections, attempt to resolve \a addr as an offset into diff --git a/lldb/include/lldb/Core/AddressRange.h b/lldb/include/lldb/Core/AddressRange.h index af26bb3b351a3..08cfffc4d1d5b 100644 --- a/lldb/include/lldb/Core/AddressRange.h +++ b/lldb/include/lldb/Core/AddressRange.h @@ -222,16 +222,6 @@ class AddressRange { /// The size in bytes of this address range. lldb::addr_t GetByteSize() const { return m_byte_size; } - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - size_t MemorySize() const { - // Noting special for the memory size of a single AddressRange object, it - // is just the size of itself. - return sizeof(AddressRange); - } - /// Set accessor for the byte size of this range. /// /// \param[in] byte_size diff --git a/lldb/include/lldb/Core/Declaration.h b/lldb/include/lldb/Core/Declaration.h index adfe0da9cc7c8..df7ff70f94025 100644 --- a/lldb/include/lldb/Core/Declaration.h +++ b/lldb/include/lldb/Core/Declaration.h @@ -150,14 +150,6 @@ class Declaration { return m_file && m_line != 0 && m_line != LLDB_INVALID_LINE_NUMBER; } - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - /// The returned value does not include the bytes for any - /// shared string values. - size_t MemorySize() const; - /// Set accessor for the declaration file specification. /// /// \param[in] file_spec diff --git a/lldb/include/lldb/Core/Mangled.h b/lldb/include/lldb/Core/Mangled.h index 6c6f2574ad22e..1c7bbeb05a37d 100644 --- a/lldb/include/lldb/Core/Mangled.h +++ b/lldb/include/lldb/Core/Mangled.h @@ -198,16 +198,6 @@ class Mangled { } bool NameMatches(const RegularExpression ®ex) const; - /// Get the memory cost of this object. - /// - /// Return the size in bytes that this object takes in memory. This returns - /// the size in bytes of this object, not any shared string values it may - /// refer to. - /// - /// \return - /// The number of bytes that this object occupies in memory. - size_t MemorySize() const; - /// Set the string value in this object. /// /// This version auto detects if the string is mangled by inspecting the diff --git a/lldb/include/lldb/Symbol/Block.h b/lldb/include/lldb/Symbol/Block.h index 601895631e148..ca7a22e81c855 100644 --- a/lldb/include/lldb/Symbol/Block.h +++ b/lldb/include/lldb/Symbol/Block.h @@ -277,15 +277,6 @@ class Block : public UserID, public SymbolContextScope { CompilerDeclContext GetDeclContext(); - /// Get the memory cost of this object. - /// - /// Returns the cost of this object plus any owned objects from the ranges, - /// variables, and inline function information. - /// - /// \return - /// The number of bytes that this object occupies in memory. - size_t MemorySize() const; - /// Set accessor for any inlined function information. /// /// \param[in] name diff --git a/lldb/include/lldb/Symbol/Function.h b/lldb/include/lldb/Symbol/Function.h index 5f5896ea05a26..7b707b4d72540 100644 --- a/lldb/include/lldb/Symbol/Function.h +++ b/lldb/include/lldb/Symbol/Function.h @@ -105,14 +105,6 @@ class FunctionInfo { /// A const reference to the method name object. ConstString GetName() const; - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - /// The returned value does not include the bytes for any - /// shared string values. - virtual size_t MemorySize() const; - protected: /// Function method name (not a mangled name). ConstString m_name; @@ -231,14 +223,6 @@ class InlineFunctionInfo : public FunctionInfo { /// A const reference to the mangled name object. const Mangled &GetMangled() const; - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - /// The returned value does not include the bytes for any - /// shared string values. - size_t MemorySize() const override; - private: /// Mangled inlined function name (can be empty if there is no mangled /// information). @@ -579,14 +563,6 @@ class Function : public UserID, public SymbolContextScope { /// \see SymbolContextScope void DumpSymbolContext(Stream *s) override; - /// Get the memory cost of this object. - /// - /// \return - /// The number of bytes that this object occupies in memory. - /// The returned value does not include the bytes for any - /// shared string values. - size_t MemorySize() const; - /// Get whether compiler optimizations were enabled for this function /// /// The debug information may provide information about whether this diff --git a/lldb/include/lldb/Symbol/Variable.h b/lldb/include/lldb/Symbol/Variable.h index 4950205d3d43a..1e6a36d584212 100644 --- a/lldb/include/lldb/Symbol/Variable.h +++ b/lldb/include/lldb/Symbol/Variable.h @@ -88,8 +88,6 @@ class Variable : public UserID, public std::enable_shared_from_this { // the location that contains this address. bool DumpLocations(Stream *s, const Address &address); - size_t MemorySize() const; - void CalculateSymbolContext(SymbolContext *sc); bool IsInScope(StackFrame *frame); diff --git a/lldb/include/lldb/Symbol/VariableList.h b/lldb/include/lldb/Symbol/VariableList.h index cacbe4cdde8fd..12c25da0fe04f 100644 --- a/lldb/include/lldb/Symbol/VariableList.h +++ b/lldb/include/lldb/Symbol/VariableList.h @@ -64,8 +64,6 @@ class VariableList { uint32_t FindIndexForVariable(Variable *variable); - size_t MemorySize() const; - size_t GetSize() const; bool Empty() const { return m_variables.empty(); } diff --git a/lldb/include/lldb/Utility/FileSpecList.h b/lldb/include/lldb/Utility/FileSpecList.h index 21c9aed78953e..571bc65209507 100644 --- a/lldb/include/lldb/Utility/FileSpecList.h +++ b/lldb/include/lldb/Utility/FileSpecList.h @@ -202,16 +202,6 @@ class FileSpecList { /// returned. const FileSpec &GetFileSpecAtIndex(size_t idx) const; - /// Get the memory cost of this object. - /// - /// Return the size in bytes that this object takes in memory. This returns - /// the size in bytes of this object, not any shared string values it may - /// refer to. - /// - /// \return - /// The number of bytes that this object occupies in memory. - size_t MemorySize() const; - bool IsEmpty() const { return m_files.empty(); } /// Get the number of files in the file list. diff --git a/lldb/source/Core/Address.cpp b/lldb/source/Core/Address.cpp index a5f620752acfd..29efb204d7115 100644 --- a/lldb/source/Core/Address.cpp +++ b/lldb/source/Core/Address.cpp @@ -958,12 +958,6 @@ int Address::CompareModulePointerAndOffset(const Address &a, const Address &b) { return 0; } -size_t Address::MemorySize() const { - // Noting special for the memory size of a single Address object, it is just - // the size of itself. - return sizeof(Address); -} - // NOTE: Be careful using this operator. It can correctly compare two // addresses from the same Module correctly. It can't compare two addresses // from different modules in any meaningful way, but it will compare the module diff --git a/lldb/source/Core/Declaration.cpp b/lldb/source/Core/Declaration.cpp index a485c4b9ba48a..5eb574cef1767 100644 --- a/lldb/source/Core/Declaration.cpp +++ b/lldb/source/Core/Declaration.cpp @@ -53,8 +53,6 @@ bool Declaration::DumpStopContext(Stream *s, bool show_fullpaths) const { return false; } -size_t Declaration::MemorySize() const { return sizeof(Declaration); } - int Declaration::Compare(const Declaration &a, const Declaration &b) { int result = FileSpec::Compare(a.m_file, b.m_file, true); if (result) diff --git a/lldb/source/Core/Mangled.cpp b/lldb/source/Core/Mangled.cpp index 3f72571cee9ca..62c4a9131a6f7 100644 --- a/lldb/source/Core/Mangled.cpp +++ b/lldb/source/Core/Mangled.cpp @@ -407,13 +407,6 @@ void Mangled::DumpDebug(Stream *s) const { m_demangled.DumpDebug(s); } -// Return the size in byte that this object takes in memory. The size includes -// the size of the objects it owns, and not the strings that it references -// because they are shared strings. -size_t Mangled::MemorySize() const { - return m_mangled.MemorySize() + m_demangled.MemorySize(); -} - // We "guess" the language because we can't determine a symbol's language from // it's name. For example, a Pascal symbol can be mangled using the C++ // Itanium scheme, and defined in a compilation unit within the same module as diff --git a/lldb/source/Symbol/Block.cpp b/lldb/source/Symbol/Block.cpp index 3de3e5eecbf35..1f082d577f9e3 100644 --- a/lldb/source/Symbol/Block.cpp +++ b/lldb/source/Symbol/Block.cpp @@ -367,16 +367,6 @@ void Block::AddRange(const Range &range) { m_ranges.Append(range); } -// Return the current number of bytes that this object occupies in memory -size_t Block::MemorySize() const { - size_t mem_size = sizeof(Block) + m_ranges.GetSize() * sizeof(Range); - if (m_inlineInfoSP.get()) - mem_size += m_inlineInfoSP->MemorySize(); - if (m_variable_list_sp.get()) - mem_size += m_variable_list_sp->MemorySize(); - return mem_size; -} - BlockSP Block::CreateChild(user_id_t uid) { m_children.push_back(std::shared_ptr(new Block(uid, *this))); return m_children.back(); diff --git a/lldb/source/Symbol/Function.cpp b/lldb/source/Symbol/Function.cpp index 359e9369c07c2..17c7e9bb36698 100644 --- a/lldb/source/Symbol/Function.cpp +++ b/lldb/source/Symbol/Function.cpp @@ -59,10 +59,6 @@ const Declaration &FunctionInfo::GetDeclaration() const { ConstString FunctionInfo::GetName() const { return m_name; } -size_t FunctionInfo::MemorySize() const { - return m_name.MemorySize() + m_declaration.MemorySize(); -} - InlineFunctionInfo::InlineFunctionInfo(const char *name, llvm::StringRef mangled, const Declaration *decl_ptr, @@ -116,10 +112,6 @@ Mangled &InlineFunctionInfo::GetMangled() { return m_mangled; } const Mangled &InlineFunctionInfo::GetMangled() const { return m_mangled; } -size_t InlineFunctionInfo::MemorySize() const { - return FunctionInfo::MemorySize() + m_mangled.MemorySize(); -} - /// @name Call site related structures /// @{ @@ -509,11 +501,6 @@ void Function::DumpSymbolContext(Stream *s) { s->Printf(", Function{0x%8.8" PRIx64 "}", GetID()); } -size_t Function::MemorySize() const { - size_t mem_size = sizeof(Function) + m_block.MemorySize(); - return mem_size; -} - bool Function::GetIsOptimized() { bool result = false; diff --git a/lldb/source/Symbol/Variable.cpp b/lldb/source/Symbol/Variable.cpp index 3f280d9cec2c6..70a61fc7789c9 100644 --- a/lldb/source/Symbol/Variable.cpp +++ b/lldb/source/Symbol/Variable.cpp @@ -200,8 +200,6 @@ bool Variable::DumpDeclaration(Stream *s, bool show_fullpaths, return dumped_declaration_info; } -size_t Variable::MemorySize() const { return sizeof(Variable); } - CompilerDeclContext Variable::GetDeclContext() { Type *type = GetType(); if (type) diff --git a/lldb/source/Symbol/VariableList.cpp b/lldb/source/Symbol/VariableList.cpp index 9ee2994a3dc12..b9f2494d4a5bd 100644 --- a/lldb/source/Symbol/VariableList.cpp +++ b/lldb/source/Symbol/VariableList.cpp @@ -142,14 +142,6 @@ uint32_t VariableList::FindIndexForVariable(Variable *variable) { return UINT32_MAX; } -size_t VariableList::MemorySize() const { - size_t mem_size = sizeof(VariableList); - const_iterator pos, end = m_variables.end(); - for (pos = m_variables.begin(); pos != end; ++pos) - mem_size += (*pos)->MemorySize(); - return mem_size; -} - size_t VariableList::GetSize() const { return m_variables.size(); } void VariableList::Dump(Stream *s, bool show_context) const { diff --git a/lldb/source/Utility/FileSpecList.cpp b/lldb/source/Utility/FileSpecList.cpp index bdbdd5841df4b..930d89e4a3e62 100644 --- a/lldb/source/Utility/FileSpecList.cpp +++ b/lldb/source/Utility/FileSpecList.cpp @@ -217,19 +217,5 @@ SupportFileNSP SupportFileList::GetSupportFileAtIndex(size_t idx) const { return std::make_shared(); } -// Return the size in bytes that this object takes in memory. This returns the -// size in bytes of this object's member variables and any FileSpec objects its -// member variables contain, the result doesn't not include the string values -// for the directories any filenames as those are in shared string pools. -size_t FileSpecList::MemorySize() const { - size_t mem_size = sizeof(FileSpecList); - collection::const_iterator pos, end = m_files.end(); - for (pos = m_files.begin(); pos != end; ++pos) { - mem_size += pos->MemorySize(); - } - - return mem_size; -} - // Return the number of files in the file spec list. size_t FileSpecList::GetSize() const { return m_files.size(); } From 1e122aa313b54cd43bd0b83960c4053d5f698512 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 24 Jun 2026 23:02:14 +0200 Subject: [PATCH 411/511] clang: Replace getTargetID API with isProcessorName (#205653) The "target ID" naming is an AMDGPUism. Replace the getTargetID query with an isProcessorName predicate so the target reports whether the string should be considered a match. This makes alias handling more natural than checking for exact match, and avoids an unnecessary parse of the target ID. Co-authored-by: Claude (Opus 4.8) --- clang/include/clang/Basic/TargetInfo.h | 8 ++++---- clang/lib/Basic/Targets/AMDGPU.cpp | 8 ++++++-- clang/lib/Basic/Targets/AMDGPU.h | 14 +++++--------- clang/lib/Sema/SemaAMDGPU.cpp | 3 +-- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 74030564c74b1..d1914d626c753 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1324,10 +1324,10 @@ class TargetInfo : public TransferrableTargetInfo, return Triple; } - /// Returns the target ID if supported. - virtual std::optional getTargetID() const { - return std::nullopt; - } + /// Returns true if the target's processor is compatible with the processor + /// named by \p Name, i.e. \p Name names this target's processor or a + /// compatible processor. + virtual bool isProcessorName(StringRef Name) const { return false; } const char *getDataLayoutString() const { assert(!DataLayoutString.empty() && "Uninitialized DataLayout!"); diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index bfa956fa9a4e3..50f9c1aa1aa02 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -295,8 +295,12 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts, Twine("__")); Builder.defineMacro("__amdgcn_processor__", Twine("\"") + Twine(CanonName) + Twine("\"")); - Builder.defineMacro("__amdgcn_target_id__", - Twine("\"") + Twine(*getTargetID()) + Twine("\"")); + Builder.defineMacro( + "__amdgcn_target_id__", + Twine("\"") + + Twine(getCanonicalTargetID(getArchNameAMDGCN(GPUKind), + OffloadArchFeatures)) + + Twine("\"")); for (auto F : getAllPossibleTargetIDFeatures(getTriple(), CanonName)) { auto Loc = OffloadArchFeatures.find(F); if (Loc != OffloadArchFeatures.end()) { diff --git a/clang/lib/Basic/Targets/AMDGPU.h b/clang/lib/Basic/Targets/AMDGPU.h index baad17487e9a1..b13e9008b67a4 100644 --- a/clang/lib/Basic/Targets/AMDGPU.h +++ b/clang/lib/Basic/Targets/AMDGPU.h @@ -485,15 +485,11 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUTargetInfo final : public TargetInfo { return true; } - std::optional getTargetID() const override { - if (!getTriple().isAMDGCN()) - return std::nullopt; - // When -target-cpu is not set, we assume generic code that it is valid - // for all GPU and use an empty string as target ID to represent that. - if (GPUKind == llvm::AMDGPU::GK_NONE) - return std::string(""); - return getCanonicalTargetID(getArchNameAMDGCN(GPUKind), - OffloadArchFeatures); + bool isProcessorName(StringRef Name) const override { + llvm::AMDGPU::GPUKind NameKind = getTriple().isAMDGCN() + ? llvm::AMDGPU::parseArchAMDGCN(Name) + : llvm::AMDGPU::parseArchR600(Name); + return NameKind == GPUKind; } bool hasHIPImageSupport() const override { return HasImage; } diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 757cdfbf20819..29442617b6a13 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -818,8 +818,7 @@ Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) { return *ExpandedPredicates.insert(CE).first; } - if (auto TID = Ctx.getTargetInfo().getTargetID()) - P = TID->find(N) == 0; + P = TI.isProcessorName(N); } else { Expr *Arg = CE->getArg(0); if (!Arg || Arg->getType() != Ctx.BuiltinFnTy) { From 1a08a143dbe394b0b48c8716530ee4059e484607 Mon Sep 17 00:00:00 2001 From: Jeremy Kun Date: Wed, 24 Jun 2026 14:03:46 -0700 Subject: [PATCH 412/511] [mlir][bazel]: only depend on needed LLVM translations in ExecutionEngine (#205619) ExecutionEngine currently pulls in `AllToLLVMIRTranslations` which includes heavy dependencies on GPU dialects (notably NVVMDialect.cpp which takes a whopping 2 minutes to compile!), despite the ExecutionEngine not doing anything GPU-specific. This change reduces the dependencies to just the subset of LLVMIR translations that ExecutionEngine actually uses: Builtin, LLVM dialect, and OpenMP dialect. --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index 10e9741e417cc..490f624f9edea 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -9571,9 +9571,11 @@ cc_library( ], includes = ["include"], deps = [ - ":AllToLLVMIRTranslations", + ":BuiltinToLLVMIRTranslation", ":IR", ":LLVMDialect", + ":LLVMToLLVMIRTranslation", + ":OpenMPToLLVMIRTranslation", ":Support", ":ToLLVMIRTranslation", "//llvm:AllTargetsAsmParsers", From c9b053bb5fee0e22a1974a8f1a6bded5e261194f Mon Sep 17 00:00:00 2001 From: "forking-google-bazel-bot[bot]" <265904573+forking-google-bazel-bot[bot]@users.noreply.github.com> Date: Wed, 24 Jun 2026 14:14:31 -0700 Subject: [PATCH 413/511] [Bazel] Fixes 639c5a0 (#205273) This fixes 639c5a014fad13c683b01c66a1474b7aa47ce7ee. Co-authored-by: Google Bazel Bot --- .../bazel/llvm-project-overlay/libc/BUILD.bazel | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 4a6df7f43f39c..c88797cf898f6 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1605,6 +1605,22 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_osutil_linux_syscall_wrappers_ioctl", + hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/ioctl.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":__support_cpp_type_traits", + ":__support_error_or", + ":__support_macros_attributes", + ":__support_macros_config", + ":__support_osutil_syscall", + ], +) + libc_support_library( name = "__support_osutil_linux_syscall_wrappers_dup", hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/dup.h"], From d5388305d1d152499d90591458c905a089fc89c1 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 24 Jun 2026 23:15:46 +0200 Subject: [PATCH 414/511] [OpenMP][Offload] Use argument pointer array in host kernels (#205355) This is a follow-up to #194333, which changed liboffload's kernel launch interface to take an array of pointers to arguments instead of a contiguous argument buffer, but left the old path intact for OpenMP, to be cleaned up iteratively in later changes. This patch modifies the offload host kernels, and the OpenMP clang codegen, to also use argument pointer arrays. The host was the only plugin that did not support the new submission path. With it converted, OpenMP can migrate to use the new path in all scenarios, after which everything related to contiguous argument buffer and its construction can be removed from plugin interface and the backends. The change itself is fairly straightforward. The bulk of the diff is automated test updates. In the runtime, the host kernel signature becomes an array of pointers (void **), and the codegen is updated to match. With each argument being a pointer, the kernel function now adds one load to read the argument's value. Assisted-by: Claude --- clang/lib/CodeGen/CGStmtOpenMP.cpp | 35 +- clang/test/OpenMP/distribute_codegen.cpp | 150 +- clang/test/OpenMP/distribute_simd_codegen.cpp | 328 ++- .../target_dyn_groupprivate_codegen.cpp | 278 ++- .../OpenMP/target_firstprivate_codegen.cpp | 1068 +++++---- .../target_ompx_dyn_cgroup_mem_codegen.cpp | 278 ++- clang/test/OpenMP/target_parallel_codegen.cpp | 560 +++-- .../OpenMP/target_parallel_for_codegen.cpp | 644 +++--- .../target_parallel_for_simd_codegen.cpp | 2002 ++++++++-------- ...target_parallel_generic_loop_codegen-2.cpp | 80 +- .../OpenMP/target_parallel_if_codegen.cpp | 266 ++- .../target_parallel_num_threads_codegen.cpp | 250 +- ...et_parallel_num_threads_strict_codegen.cpp | 380 ++-- clang/test/OpenMP/target_private_codegen.cpp | 5 +- .../OpenMP/target_task_affinity_codegen.cpp | 162 +- clang/test/OpenMP/target_teams_codegen.cpp | 860 +++---- .../target_teams_distribute_codegen.cpp | 760 ++++--- ..._teams_distribute_parallel_for_codegen.cpp | 150 +- ...bute_parallel_for_firstprivate_codegen.cpp | 216 +- ...istribute_parallel_for_private_codegen.cpp | 25 +- ...s_distribute_parallel_for_simd_codegen.cpp | 196 +- ...parallel_for_simd_firstprivate_codegen.cpp | 216 +- ...bute_parallel_for_simd_private_codegen.cpp | 25 +- .../target_teams_distribute_simd_codegen.cpp | 2010 +++++++++-------- .../target_teams_generic_loop_codegen-1.cpp | 150 +- ...get_teams_generic_loop_private_codegen.cpp | 25 +- .../test/OpenMP/target_teams_map_codegen.cpp | 296 +-- .../OpenMP/target_teams_num_teams_codegen.cpp | 250 +- .../target_teams_thread_limit_codegen.cpp | 268 ++- clang/test/OpenMP/teams_codegen.cpp | 156 +- offload/plugins-nextgen/host/src/rtl.cpp | 7 +- 31 files changed, 6683 insertions(+), 5413 deletions(-) diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 314f4e14dd1d2..95fd6694437fe 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -751,8 +751,7 @@ static llvm::Function *emitOutlinedFunctionPrologueAggregate( Address ContextAddr = CGF.GetAddrOfLocalVar(CD->getContextParam()); ContextV = CGF.Builder.CreateLoad(ContextAddr); - // The runtime passes arguments as a flat array of promoted intptr_t values. - llvm::Type *IntPtrTy = CGF.IntPtrTy; + // The runtime passes arguments as an array of pointers. llvm::Type *PtrTy = CGF.Builder.getPtrTy(); llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(0); CharUnits SlotAlign = CharUnits::fromQuantity(PtrAlign.value()); @@ -760,11 +759,12 @@ static llvm::Function *emitOutlinedFunctionPrologueAggregate( for (auto [FD, C, FieldIdx] : llvm::zip(RD->fields(), CS.captures(), llvm::seq(RD->getNumFields()))) { - llvm::Value *Slot = - CGF.Builder.CreateConstInBoundsGEP1_32(IntPtrTy, ContextV, FieldIdx); + llvm::Value *SlotPtr = + CGF.Builder.CreateConstInBoundsGEP1_32(PtrTy, ContextV, FieldIdx); + llvm::Value *Slot = CGF.Builder.CreateAlignedLoad(PtrTy, SlotPtr, PtrAlign); - // Generate the appropriate load from the GEP into the __context struct. - // This includes all of the user arguments as well as the implicit kernel + // Generate the appropriate load from the per-argument storage. This + // includes all of the user arguments as well as the implicit kernel // argument pointer. if (C.capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = C.getCapturedVar(); @@ -973,11 +973,14 @@ llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( const ImplicitParamDecl *Param = CD->getParam(I); if (Param == CD->getContextParam()) continue; - llvm::Value *ParamAddr = Builder.CreateConstInBoundsGEP1_32( - IntPtrTy, ContextV, FieldIdx, Twine(Param->getName()) + ".addr"); + llvm::Align PtrAlign = CGM.getDataLayout().getPointerABIAlignment(0); + llvm::Value *SlotPtr = Builder.CreateConstInBoundsGEP1_32( + Builder.getPtrTy(), ContextV, FieldIdx, + Twine(Param->getName()) + ".addr"); + llvm::Value *ParamAddr = + Builder.CreateAlignedLoad(Builder.getPtrTy(), SlotPtr, PtrAlign); llvm::Value *ParamVal = Builder.CreateAlignedLoad( - Builder.getPtrTy(), ParamAddr, - CGM.getDataLayout().getPointerABIAlignment(0), Param->getName()); + Builder.getPtrTy(), ParamAddr, PtrAlign, Param->getName()); Address ParamLocalAddr = CreateMemTemp(Param->getType(), Param->getName()); Builder.CreateStore(ParamVal, ParamLocalAddr); @@ -1016,8 +1019,10 @@ llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( for (auto [FD, InnerParam, SlotIdx] : llvm::zip( RD->fields(), F->args(), llvm::seq(RD->getNumFields()))) { - llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32( - WrapperCGF.IntPtrTy, WrapperContextV, SlotIdx); + llvm::Value *SlotPtr = WrapperCGF.Builder.CreateConstInBoundsGEP1_32( + WrapperCGF.Builder.getPtrTy(), WrapperContextV, SlotIdx); + llvm::Value *Slot = WrapperCGF.Builder.CreateAlignedLoad( + WrapperCGF.Builder.getPtrTy(), SlotPtr, PtrAlign); llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad( InnerParam.getType(), Slot, PtrAlign, InnerParam.getName()); CallArgs.push_back(Val); @@ -1026,8 +1031,10 @@ llvm::Function *CodeGenFunction::GenerateOpenMPCapturedStmtFunctionAggregate( // Handle the load from the implicit dyn_ptr at the end of the __context. unsigned SlotIdx = RD->getNumFields(); auto InnerParam = F->arg_begin() + SlotIdx; - llvm::Value *Slot = WrapperCGF.Builder.CreateConstInBoundsGEP1_32( - WrapperCGF.IntPtrTy, WrapperContextV, SlotIdx); + llvm::Value *SlotPtr = WrapperCGF.Builder.CreateConstInBoundsGEP1_32( + WrapperCGF.Builder.getPtrTy(), WrapperContextV, SlotIdx); + llvm::Value *Slot = WrapperCGF.Builder.CreateAlignedLoad( + WrapperCGF.Builder.getPtrTy(), SlotPtr, PtrAlign); llvm::Value *Val = WrapperCGF.Builder.CreateAlignedLoad( InnerParam->getType(), Slot, PtrAlign, InnerParam->getName()); CallArgs.push_back(Val); diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index afd18e91911dd..f5b9a3a5d7c9e 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -2033,12 +2033,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -2145,12 +2150,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -2257,12 +2267,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -2387,11 +2402,13 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i8 [[TMP3]], ptr [[A]], align 1 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A]]) // CHECK17-NEXT: ret void @@ -2498,11 +2515,13 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.omp_outlined, ptr [[AA]]) // CHECK17-NEXT: ret void @@ -2597,12 +2616,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l56.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -2705,12 +2729,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l68.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -2813,12 +2842,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l80.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -2939,11 +2973,13 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i8 [[TMP2]], ptr [[A]], align 1 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i8 [[TMP3]], ptr [[A]], align 1 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l92.omp_outlined, ptr [[A]]) // CHECK19-NEXT: ret void @@ -3050,11 +3086,13 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l108.omp_outlined, ptr [[AA]]) // CHECK19-NEXT: ret void diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp index 329978c9a01e6..a160bc5b0b5ac 100644 --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -5627,12 +5627,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -5748,12 +5753,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -5867,12 +5877,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK17-NEXT: ret void @@ -6005,14 +6020,17 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i8 [[TMP2]], ptr [[I]], align 1 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i8 [[TMP4]], ptr [[A]], align 1 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i8 [[TMP6]], ptr [[A]], align 1 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.omp_outlined, ptr [[I]], ptr [[A]]) // CHECK17-NEXT: ret void @@ -6140,11 +6158,13 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.omp_outlined, ptr [[AA]]) // CHECK17-NEXT: ret void @@ -6246,12 +6266,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -6363,12 +6388,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -6478,12 +6508,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK19-NEXT: ret void @@ -6612,14 +6647,17 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i8 [[TMP2]], ptr [[I]], align 1 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i8 [[TMP4]], ptr [[A]], align 1 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i8 [[TMP6]], ptr [[A]], align 1 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.omp_outlined, ptr [[I]], ptr [[A]]) // CHECK19-NEXT: ret void @@ -6747,11 +6785,13 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.omp_outlined, ptr [[AA]]) // CHECK19-NEXT: ret void @@ -6853,12 +6893,17 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK21-NEXT: ret void @@ -6974,12 +7019,17 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK21-NEXT: ret void @@ -7093,12 +7143,17 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[C:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[D:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK21-NEXT: ret void @@ -7231,14 +7286,17 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i8 [[TMP2]], ptr [[I]], align 1 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i8 [[TMP4]], ptr [[A]], align 1 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i8 [[TMP6]], ptr [[A]], align 1 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.omp_outlined, ptr [[I]], ptr [[A]]) // CHECK21-NEXT: ret void @@ -7397,11 +7455,13 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.omp_outlined, ptr [[AA]]) // CHECK21-NEXT: ret void @@ -7503,12 +7563,17 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z23without_schedule_clausePfS_S_S__l70.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK23-NEXT: ret void @@ -7620,12 +7685,17 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z18static_not_chunkedPfS_S_S__l86.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK23-NEXT: ret void @@ -7735,12 +7805,17 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[C:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[D:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[A:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14static_chunkedPfS_S_S__l103.omp_outlined, ptr [[A]], ptr [[B]], ptr [[C]], ptr [[D]]) // CHECK23-NEXT: ret void @@ -7869,14 +7944,17 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i8 [[TMP2]], ptr [[I]], align 1 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i8 [[TMP4]], ptr [[A]], align 1 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i8 [[TMP3]], ptr [[I]], align 1 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i8 [[TMP6]], ptr [[A]], align 1 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z12test_precondv_l115.omp_outlined, ptr [[I]], ptr [[A]]) // CHECK23-NEXT: ret void @@ -8035,11 +8113,13 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_v_l135.omp_outlined, ptr [[AA]]) // CHECK23-NEXT: ret void diff --git a/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp b/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp index 353686c9d9953..7501c51faac3e 100644 --- a/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp +++ b/clang/test/OpenMP/target_dyn_groupprivate_codegen.cpp @@ -969,7 +969,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META32]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]], !inline_history [[META33:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] @@ -1900,7 +1900,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]], !inline_history [[META34:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -2020,24 +2020,28 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP8]], i32 0) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l71.omp_outlined, i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP12]], i32 0) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l71.omp_outlined, i64 [[TMP14]]) // CHECK9-NEXT: ret void // // @@ -2255,11 +2259,13 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l75.omp_outlined) // CHECK9-NEXT: ret void @@ -2285,21 +2291,25 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l88.omp_outlined, ptr [[THIS]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l88.omp_outlined, ptr [[THIS]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -2330,10 +2340,12 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 @@ -2347,8 +2359,9 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) // CHECK9-NEXT: ret void @@ -2377,28 +2390,32 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l60.omp_outlined, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l60.omp_outlined, i64 [[TMP15]], i64 [[TMP17]]) // CHECK9-NEXT: ret void // // @@ -2433,24 +2450,28 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP8]], i32 0) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l71.omp_outlined, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP12]], i32 0) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l71.omp_outlined, i32 [[TMP14]]) // CHECK11-NEXT: ret void // // @@ -2664,11 +2685,13 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l75.omp_outlined) // CHECK11-NEXT: ret void @@ -2694,21 +2717,25 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l88.omp_outlined, ptr [[THIS]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l88.omp_outlined, ptr [[THIS]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -2739,10 +2766,12 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 @@ -2756,8 +2785,9 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55.omp_outlined) // CHECK11-NEXT: ret void @@ -2786,28 +2816,32 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l60.omp_outlined, i32 [[TMP11]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l60.omp_outlined, i32 [[TMP15]], i32 [[TMP17]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp index 57c35327b234d..aba714e973e05 100644 --- a/clang/test/OpenMP/target_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp @@ -10161,15 +10161,19 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-NEXT: [[P:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// TCHECK-NEXT: store i32 [[TMP4]], ptr [[GA]], align 4 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[P:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +// TCHECK-NEXT: store i32 [[TMP7]], ptr [[GA]], align 4 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK-NEXT: ret void // @@ -10189,46 +10193,56 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[D5:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// TCHECK-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// TCHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// TCHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// TCHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// TCHECK-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// TCHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// TCHECK-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// TCHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// TCHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// TCHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// TCHECK-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// TCHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// TCHECK-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// TCHECK-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// TCHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// TCHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// TCHECK-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// TCHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// TCHECK-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// TCHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// TCHECK-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// TCHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// TCHECK-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// TCHECK-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// TCHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// TCHECK-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// TCHECK-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// TCHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// TCHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// TCHECK-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// TCHECK-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// TCHECK-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP5]], align 4 -// TCHECK-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// TCHECK-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP5]], 4 -// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA]], ptr align 4 [[BN]], i64 [[TMP15]], i1 false) +// TCHECK-NEXT: [[TMP24:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK-NEXT: store ptr [[TMP24]], ptr [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP8]], align 4 +// TCHECK-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP8]], 4 +// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA]], ptr align 4 [[BN]], i64 [[TMP25]], i1 false) // TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C3]], ptr align 8 [[C]], i64 400, i1 false) -// TCHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] -// TCHECK-NEXT: [[VLA4:%.*]] = alloca double, i64 [[TMP16]], align 8 -// TCHECK-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR1]], align 8 -// TCHECK-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR2]], align 8 -// TCHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] -// TCHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i64 [[TMP18]], i1 false) +// TCHECK-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP15]], [[TMP18]] +// TCHECK-NEXT: [[VLA4:%.*]] = alloca double, i64 [[TMP26]], align 8 +// TCHECK-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: store i64 [[TMP18]], ptr [[__VLA_EXPR2]], align 8 +// TCHECK-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP15]], [[TMP18]] +// TCHECK-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 +// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i64 [[TMP28]], i1 false) // TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D5]], ptr align 8 [[D]], i64 16, i1 false) -// TCHECK-NEXT: [[TMP19:%.*]] = load i16, ptr [[AA]], align 2 -// TCHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// TCHECK-NEXT: [[TMP29:%.*]] = load i16, ptr [[AA]], align 2 +// TCHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP29]] to i32 // TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD]] to i16 // TCHECK-NEXT: store i16 [[CONV6]], ptr [[AA]], align 2 @@ -10239,16 +10253,16 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C3]], i64 0, i64 1 // TCHECK-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX8]], i64 0, i64 2 // TCHECK-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX9]], align 8 -// TCHECK-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP11]] -// TCHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i64 [[TMP20]] +// TCHECK-NEXT: [[TMP30:%.*]] = mul nsw i64 1, [[TMP18]] +// TCHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i64 [[TMP30]] // TCHECK-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 // TCHECK-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX11]], align 8 // TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 0 // TCHECK-NEXT: store i64 1, ptr [[X]], align 8 // TCHECK-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 1 // TCHECK-NEXT: store i8 1, ptr [[Y]], align 8 -// TCHECK-NEXT: [[TMP21:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP21]]) +// TCHECK-NEXT: [[TMP31:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP31]]) // TCHECK-NEXT: ret void // // @@ -10259,22 +10273,25 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[E:%.*]] = load ptr, ptr [[TMP1]], align 8 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// TCHECK-NEXT: [[E:%.*]] = load ptr, ptr [[TMP3]], align 8 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[E]], i32 0, i32 0 -// TCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to double -// TCHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR]], align 8 -// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0 +// TCHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[X]], align 4 +// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// TCHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 0 // TCHECK-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8 -// TCHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR]], align 8 -// TCHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 0 -// TCHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 -// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// TCHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[PTR]], align 8 +// TCHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 0 +// TCHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 // TCHECK-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 8 // TCHECK-NEXT: ret void // @@ -10289,29 +10306,33 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 -// TCHECK-NEXT: store i8 [[TMP4]], ptr [[AAA]], align 1 -// TCHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// TCHECK-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// TCHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// TCHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // TCHECK-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// TCHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA]], align 1 -// TCHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// TCHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// TCHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP11]] to i32 // TCHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // TCHECK-NEXT: store i8 [[CONV4]], ptr [[AAA]], align 1 // TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i64 0, i64 2 -// TCHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // TCHECK-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // TCHECK-NEXT: ret void // @@ -10327,45 +10348,51 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// TCHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// TCHECK-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// TCHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// TCHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// TCHECK-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// TCHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// TCHECK-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// TCHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// TCHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// TCHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// TCHECK-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// TCHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// TCHECK-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// TCHECK-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 -// TCHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP5]], [[TMP7]] -// TCHECK-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP10]], align 2 -// TCHECK-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// TCHECK-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR1]], align 8 -// TCHECK-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP5]], [[TMP7]] -// TCHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 -// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA]], ptr align 2 [[C]], i64 [[TMP12]], i1 false) -// TCHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 -// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// TCHECK-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 +// TCHECK-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP8]], [[TMP11]] +// TCHECK-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP16]], align 2 +// TCHECK-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// TCHECK-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// TCHECK-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP8]], [[TMP11]] +// TCHECK-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +// TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA]], ptr align 2 [[C]], i64 [[TMP18]], i1 false) +// TCHECK-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// TCHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // TCHECK-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // TCHECK-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // TCHECK-NEXT: store double [[ADD]], ptr [[A]], align 8 // TCHECK-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS]], i32 0, i32 0 -// TCHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[A2]], align 8 -// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// TCHECK-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// TCHECK-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 // TCHECK-NEXT: store double [[INC]], ptr [[A2]], align 8 // TCHECK-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 -// TCHECK-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP7]] -// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP15]] +// TCHECK-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP11]] +// TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP21]] // TCHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 // TCHECK-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 -// TCHECK-NEXT: [[TMP16:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP16]]) +// TCHECK-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// TCHECK-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP22]]) // TCHECK-NEXT: ret void // // @@ -10378,21 +10405,24 @@ int bar(int n, double *ptr) { // TCHECK-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP6]], align 8 // TCHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // TCHECK-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // TCHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i64 0, i64 2 -// TCHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // TCHECK-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // TCHECK-NEXT: ret void // @@ -10406,15 +10436,19 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK1-NEXT: [[P:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// TCHECK1-NEXT: store i32 [[TMP4]], ptr [[GA]], align 4 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[P:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +// TCHECK1-NEXT: store i32 [[TMP7]], ptr [[GA]], align 4 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK1-NEXT: ret void // @@ -10434,46 +10468,56 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[D5:%.*]] = alloca [[STRUCT_TT:%.*]], align 8 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// TCHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// TCHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK1-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// TCHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// TCHECK1-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// TCHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// TCHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// TCHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// TCHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// TCHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// TCHECK1-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// TCHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// TCHECK1-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// TCHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// TCHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// TCHECK1-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// TCHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// TCHECK1-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// TCHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// TCHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// TCHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// TCHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// TCHECK1-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// TCHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// TCHECK1-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// TCHECK1-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// TCHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// TCHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// TCHECK1-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// TCHECK1-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK1-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK1-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 8 -// TCHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP5]], align 4 -// TCHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// TCHECK1-NEXT: [[TMP15:%.*]] = mul nuw i64 [[TMP5]], 4 -// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA]], ptr align 4 [[BN]], i64 [[TMP15]], i1 false) +// TCHECK1-NEXT: [[TMP24:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK1-NEXT: store ptr [[TMP24]], ptr [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP8]], align 4 +// TCHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: [[TMP25:%.*]] = mul nuw i64 [[TMP8]], 4 +// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[VLA]], ptr align 4 [[BN]], i64 [[TMP25]], i1 false) // TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[C3]], ptr align 8 [[C]], i64 400, i1 false) -// TCHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] -// TCHECK1-NEXT: [[VLA4:%.*]] = alloca double, i64 [[TMP16]], align 8 -// TCHECK1-NEXT: store i64 [[TMP9]], ptr [[__VLA_EXPR1]], align 8 -// TCHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR2]], align 8 -// TCHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP9]], [[TMP11]] -// TCHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 8 -// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i64 [[TMP18]], i1 false) +// TCHECK1-NEXT: [[TMP26:%.*]] = mul nuw i64 [[TMP15]], [[TMP18]] +// TCHECK1-NEXT: [[VLA4:%.*]] = alloca double, i64 [[TMP26]], align 8 +// TCHECK1-NEXT: store i64 [[TMP15]], ptr [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: store i64 [[TMP18]], ptr [[__VLA_EXPR2]], align 8 +// TCHECK1-NEXT: [[TMP27:%.*]] = mul nuw i64 [[TMP15]], [[TMP18]] +// TCHECK1-NEXT: [[TMP28:%.*]] = mul nuw i64 [[TMP27]], 8 +// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i64 [[TMP28]], i1 false) // TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[D5]], ptr align 8 [[D]], i64 16, i1 false) -// TCHECK1-NEXT: [[TMP19:%.*]] = load i16, ptr [[AA]], align 2 -// TCHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// TCHECK1-NEXT: [[TMP29:%.*]] = load i16, ptr [[AA]], align 2 +// TCHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP29]] to i32 // TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD]] to i16 // TCHECK1-NEXT: store i16 [[CONV6]], ptr [[AA]], align 2 @@ -10484,16 +10528,16 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C3]], i64 0, i64 1 // TCHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX8]], i64 0, i64 2 // TCHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX9]], align 8 -// TCHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP11]] -// TCHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i64 [[TMP20]] +// TCHECK1-NEXT: [[TMP30:%.*]] = mul nsw i64 1, [[TMP18]] +// TCHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i64 [[TMP30]] // TCHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i64 3 // TCHECK1-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX11]], align 8 // TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 0 // TCHECK1-NEXT: store i64 1, ptr [[X]], align 8 // TCHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 1 // TCHECK1-NEXT: store i8 1, ptr [[Y]], align 8 -// TCHECK1-NEXT: [[TMP21:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP21]]) +// TCHECK1-NEXT: [[TMP31:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP31]]) // TCHECK1-NEXT: ret void // // @@ -10504,22 +10548,25 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[PTR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[E:%.*]] = load ptr, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: [[E:%.*]] = load ptr, ptr [[TMP3]], align 8 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[E]], i32 0, i32 0 -// TCHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to double -// TCHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR]], align 8 -// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i64 0 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[X]], align 4 +// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// TCHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 0 // TCHECK1-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 8 -// TCHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR]], align 8 -// TCHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i64 0 -// TCHECK1-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 -// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// TCHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[PTR]], align 8 +// TCHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i64 0 +// TCHECK1-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 // TCHECK1-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 8 // TCHECK1-NEXT: ret void // @@ -10534,29 +10581,33 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 -// TCHECK1-NEXT: store i8 [[TMP4]], ptr [[AAA]], align 1 -// TCHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK1-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// TCHECK1-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// TCHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// TCHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // TCHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// TCHECK1-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA]], align 1 -// TCHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// TCHECK1-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// TCHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP11]] to i32 // TCHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // TCHECK1-NEXT: store i8 [[CONV4]], ptr [[AAA]], align 1 // TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i64 0, i64 2 -// TCHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // TCHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // TCHECK1-NEXT: ret void // @@ -10572,45 +10623,51 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[__VLA_EXPR1:%.*]] = alloca i64, align 8 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// TCHECK1-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// TCHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// TCHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// TCHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// TCHECK1-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// TCHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// TCHECK1-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// TCHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// TCHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// TCHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// TCHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// TCHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// TCHECK1-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// TCHECK1-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK1-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 8 -// TCHECK1-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP5]], [[TMP7]] -// TCHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP10]], align 2 -// TCHECK1-NEXT: store i64 [[TMP5]], ptr [[__VLA_EXPR0]], align 8 -// TCHECK1-NEXT: store i64 [[TMP7]], ptr [[__VLA_EXPR1]], align 8 -// TCHECK1-NEXT: [[TMP11:%.*]] = mul nuw i64 [[TMP5]], [[TMP7]] -// TCHECK1-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 2 -// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA]], ptr align 2 [[C]], i64 [[TMP12]], i1 false) -// TCHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 -// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// TCHECK1-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK1-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP8]], [[TMP11]] +// TCHECK1-NEXT: [[VLA:%.*]] = alloca i16, i64 [[TMP16]], align 2 +// TCHECK1-NEXT: store i64 [[TMP8]], ptr [[__VLA_EXPR0]], align 8 +// TCHECK1-NEXT: store i64 [[TMP11]], ptr [[__VLA_EXPR1]], align 8 +// TCHECK1-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP8]], [[TMP11]] +// TCHECK1-NEXT: [[TMP18:%.*]] = mul nuw i64 [[TMP17]], 2 +// TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 2 [[VLA]], ptr align 2 [[C]], i64 [[TMP18]], i1 false) +// TCHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// TCHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // TCHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // TCHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // TCHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8 // TCHECK1-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS]], i32 0, i32 0 -// TCHECK1-NEXT: [[TMP14:%.*]] = load double, ptr [[A2]], align 8 -// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// TCHECK1-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 8 +// TCHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 // TCHECK1-NEXT: store double [[INC]], ptr [[A2]], align 8 // TCHECK1-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 -// TCHECK1-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP7]] -// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP15]] +// TCHECK1-NEXT: [[TMP21:%.*]] = mul nsw i64 1, [[TMP11]] +// TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i64 [[TMP21]] // TCHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 // TCHECK1-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 -// TCHECK1-NEXT: [[TMP16:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 -// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP16]]) +// TCHECK1-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 8 +// TCHECK1-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP22]]) // TCHECK1-NEXT: ret void // // @@ -10623,21 +10680,24 @@ int bar(int n, double *ptr) { // TCHECK1-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK1-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK1-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK1-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK1-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK1-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// TCHECK1-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// TCHECK1-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK1-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP6]], align 8 // TCHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // TCHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[B2]], ptr align 4 [[B]], i64 40, i1 false) -// TCHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // TCHECK1-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // TCHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i64 0, i64 2 -// TCHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // TCHECK1-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // TCHECK1-NEXT: ret void // @@ -10651,15 +10711,19 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK2-NEXT: [[P:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// TCHECK2-NEXT: store i32 [[TMP4]], ptr [[GA]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[P:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// TCHECK2-NEXT: store i32 [[TMP7]], ptr [[GA]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK2-NEXT: ret void // @@ -10679,46 +10743,56 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[D5:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// TCHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// TCHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK2-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// TCHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// TCHECK2-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// TCHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// TCHECK2-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// TCHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// TCHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// TCHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// TCHECK2-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// TCHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// TCHECK2-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// TCHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// TCHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// TCHECK2-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// TCHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK2-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// TCHECK2-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// TCHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// TCHECK2-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// TCHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// TCHECK2-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// TCHECK2-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// TCHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// TCHECK2-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// TCHECK2-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// TCHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// TCHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// TCHECK2-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// TCHECK2-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK2-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK2-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 4 -// TCHECK2-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP5]], align 4 -// TCHECK2-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR0]], align 4 -// TCHECK2-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP5]], 4 -// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA]], ptr align 4 [[BN]], i32 [[TMP15]], i1 false) +// TCHECK2-NEXT: [[TMP24:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK2-NEXT: store ptr [[TMP24]], ptr [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP8]], align 4 +// TCHECK2-NEXT: store i32 [[TMP8]], ptr [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP8]], 4 +// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA]], ptr align 4 [[BN]], i32 [[TMP25]], i1 false) // TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C3]], ptr align 8 [[C]], i32 400, i1 false) -// TCHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] -// TCHECK2-NEXT: [[VLA4:%.*]] = alloca double, i32 [[TMP16]], align 8 -// TCHECK2-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR1]], align 4 -// TCHECK2-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR2]], align 4 -// TCHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] -// TCHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 -// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i32 [[TMP18]], i1 false) +// TCHECK2-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP15]], [[TMP18]] +// TCHECK2-NEXT: [[VLA4:%.*]] = alloca double, i32 [[TMP26]], align 8 +// TCHECK2-NEXT: store i32 [[TMP15]], ptr [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: store i32 [[TMP18]], ptr [[__VLA_EXPR2]], align 4 +// TCHECK2-NEXT: [[TMP27:%.*]] = mul nuw i32 [[TMP15]], [[TMP18]] +// TCHECK2-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8 +// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i32 [[TMP28]], i1 false) // TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D5]], ptr align 4 [[D]], i32 12, i1 false) -// TCHECK2-NEXT: [[TMP19:%.*]] = load i16, ptr [[AA]], align 2 -// TCHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// TCHECK2-NEXT: [[TMP29:%.*]] = load i16, ptr [[AA]], align 2 +// TCHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP29]] to i32 // TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD]] to i16 // TCHECK2-NEXT: store i16 [[CONV6]], ptr [[AA]], align 2 @@ -10729,16 +10803,16 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C3]], i32 0, i32 1 // TCHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX8]], i32 0, i32 2 // TCHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX9]], align 8 -// TCHECK2-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP11]] -// TCHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i32 [[TMP20]] +// TCHECK2-NEXT: [[TMP30:%.*]] = mul nsw i32 1, [[TMP18]] +// TCHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i32 [[TMP30]] // TCHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 // TCHECK2-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX11]], align 8 // TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 0 // TCHECK2-NEXT: store i64 1, ptr [[X]], align 4 // TCHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 1 // TCHECK2-NEXT: store i8 1, ptr [[Y]], align 4 -// TCHECK2-NEXT: [[TMP21:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP21]]) +// TCHECK2-NEXT: [[TMP31:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP31]]) // TCHECK2-NEXT: ret void // // @@ -10749,22 +10823,25 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[E:%.*]] = load ptr, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: [[E:%.*]] = load ptr, ptr [[TMP3]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK2-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[E]], i32 0, i32 0 -// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to double -// TCHECK2-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR]], align 4 -// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0 +// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[X]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// TCHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 0 // TCHECK2-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4 -// TCHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR]], align 4 -// TCHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0 -// TCHECK2-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX2]], align 4 -// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// TCHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[PTR]], align 4 +// TCHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i32 0 +// TCHECK2-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX2]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 // TCHECK2-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 4 // TCHECK2-NEXT: ret void // @@ -10779,29 +10856,33 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 -// TCHECK2-NEXT: store i8 [[TMP4]], ptr [[AAA]], align 1 -// TCHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 +// TCHECK2-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// TCHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK2-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK2-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// TCHECK2-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // TCHECK2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// TCHECK2-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA]], align 1 -// TCHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// TCHECK2-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// TCHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP11]] to i32 // TCHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // TCHECK2-NEXT: store i8 [[CONV4]], ptr [[AAA]], align 1 // TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i32 0, i32 2 -// TCHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK2-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // TCHECK2-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // TCHECK2-NEXT: ret void // @@ -10817,45 +10898,51 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// TCHECK2-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // TCHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// TCHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// TCHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// TCHECK2-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// TCHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// TCHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// TCHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK2-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// TCHECK2-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// TCHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK2-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// TCHECK2-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK2-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// TCHECK2-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK2-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 4 -// TCHECK2-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP5]], [[TMP7]] -// TCHECK2-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP10]], align 2 -// TCHECK2-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR0]], align 4 -// TCHECK2-NEXT: store i32 [[TMP7]], ptr [[__VLA_EXPR1]], align 4 -// TCHECK2-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP5]], [[TMP7]] -// TCHECK2-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2 -// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA]], ptr align 2 [[C]], i32 [[TMP12]], i1 false) -// TCHECK2-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 -// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// TCHECK2-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK2-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP8]], [[TMP11]] +// TCHECK2-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP16]], align 2 +// TCHECK2-NEXT: store i32 [[TMP8]], ptr [[__VLA_EXPR0]], align 4 +// TCHECK2-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR1]], align 4 +// TCHECK2-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP8]], [[TMP11]] +// TCHECK2-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2 +// TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA]], ptr align 2 [[C]], i32 [[TMP18]], i1 false) +// TCHECK2-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// TCHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // TCHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // TCHECK2-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // TCHECK2-NEXT: store double [[ADD]], ptr [[A]], align 4 // TCHECK2-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS]], i32 0, i32 0 -// TCHECK2-NEXT: [[TMP14:%.*]] = load double, ptr [[A2]], align 4 -// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// TCHECK2-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// TCHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 // TCHECK2-NEXT: store double [[INC]], ptr [[A2]], align 4 // TCHECK2-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 -// TCHECK2-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP7]] -// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP15]] +// TCHECK2-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP11]] +// TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP21]] // TCHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 // TCHECK2-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 -// TCHECK2-NEXT: [[TMP16:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP16]]) +// TCHECK2-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// TCHECK2-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP22]]) // TCHECK2-NEXT: ret void // // @@ -10868,21 +10955,24 @@ int bar(int n, double *ptr) { // TCHECK2-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK2-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK2-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK2-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK2-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK2-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK2-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK2-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK2-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK2-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK2-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK2-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK2-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP6]], align 4 // TCHECK2-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK2-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK2-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK2-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // TCHECK2-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // TCHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i32 0, i32 2 -// TCHECK2-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK2-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // TCHECK2-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // TCHECK2-NEXT: ret void // @@ -10896,15 +10986,19 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK3-NEXT: [[P:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// TCHECK3-NEXT: store i32 [[TMP4]], ptr [[GA]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[P:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// TCHECK3-NEXT: store i32 [[TMP7]], ptr [[GA]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK3-NEXT: ret void // @@ -10924,46 +11018,56 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[D5:%.*]] = alloca [[STRUCT_TT:%.*]], align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// TCHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// TCHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK3-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// TCHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// TCHECK3-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// TCHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// TCHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// TCHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// TCHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// TCHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// TCHECK3-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// TCHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// TCHECK3-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// TCHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// TCHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// TCHECK3-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// TCHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// TCHECK3-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// TCHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// TCHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// TCHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// TCHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// TCHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// TCHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// TCHECK3-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// TCHECK3-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// TCHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// TCHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// TCHECK3-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// TCHECK3-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK3-NEXT: [[TMP14:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK3-NEXT: store ptr [[TMP14]], ptr [[SAVED_STACK]], align 4 -// TCHECK3-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP5]], align 4 -// TCHECK3-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR0]], align 4 -// TCHECK3-NEXT: [[TMP15:%.*]] = mul nuw i32 [[TMP5]], 4 -// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA]], ptr align 4 [[BN]], i32 [[TMP15]], i1 false) +// TCHECK3-NEXT: [[TMP24:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK3-NEXT: store ptr [[TMP24]], ptr [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[VLA:%.*]] = alloca float, i32 [[TMP8]], align 4 +// TCHECK3-NEXT: store i32 [[TMP8]], ptr [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: [[TMP25:%.*]] = mul nuw i32 [[TMP8]], 4 +// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[VLA]], ptr align 4 [[BN]], i32 [[TMP25]], i1 false) // TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[C3]], ptr align 8 [[C]], i32 400, i1 false) -// TCHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] -// TCHECK3-NEXT: [[VLA4:%.*]] = alloca double, i32 [[TMP16]], align 8 -// TCHECK3-NEXT: store i32 [[TMP9]], ptr [[__VLA_EXPR1]], align 4 -// TCHECK3-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR2]], align 4 -// TCHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP9]], [[TMP11]] -// TCHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 8 -// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i32 [[TMP18]], i1 false) +// TCHECK3-NEXT: [[TMP26:%.*]] = mul nuw i32 [[TMP15]], [[TMP18]] +// TCHECK3-NEXT: [[VLA4:%.*]] = alloca double, i32 [[TMP26]], align 8 +// TCHECK3-NEXT: store i32 [[TMP15]], ptr [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: store i32 [[TMP18]], ptr [[__VLA_EXPR2]], align 4 +// TCHECK3-NEXT: [[TMP27:%.*]] = mul nuw i32 [[TMP15]], [[TMP18]] +// TCHECK3-NEXT: [[TMP28:%.*]] = mul nuw i32 [[TMP27]], 8 +// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 8 [[VLA4]], ptr align 8 [[CN]], i32 [[TMP28]], i1 false) // TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[D5]], ptr align 4 [[D]], i32 12, i1 false) -// TCHECK3-NEXT: [[TMP19:%.*]] = load i16, ptr [[AA]], align 2 -// TCHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// TCHECK3-NEXT: [[TMP29:%.*]] = load i16, ptr [[AA]], align 2 +// TCHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP29]] to i32 // TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD]] to i16 // TCHECK3-NEXT: store i16 [[CONV6]], ptr [[AA]], align 2 @@ -10974,16 +11078,16 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[C3]], i32 0, i32 1 // TCHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX8]], i32 0, i32 2 // TCHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX9]], align 8 -// TCHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP11]] -// TCHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i32 [[TMP20]] +// TCHECK3-NEXT: [[TMP30:%.*]] = mul nsw i32 1, [[TMP18]] +// TCHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, ptr [[VLA4]], i32 [[TMP30]] // TCHECK3-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX10]], i32 3 // TCHECK3-NEXT: store double 1.000000e+00, ptr [[ARRAYIDX11]], align 8 // TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 0 // TCHECK3-NEXT: store i64 1, ptr [[X]], align 4 // TCHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[D5]], i32 0, i32 1 // TCHECK3-NEXT: store i8 1, ptr [[Y]], align 4 -// TCHECK3-NEXT: [[TMP21:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP21]]) +// TCHECK3-NEXT: [[TMP31:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP31]]) // TCHECK3-NEXT: ret void // // @@ -10994,22 +11098,25 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[PTR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[E:%.*]] = load ptr, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: [[E:%.*]] = load ptr, ptr [[TMP3]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK3-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT_0:%.*]], ptr [[E]], i32 0, i32 0 -// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP2]] to double -// TCHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[PTR]], align 4 -// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP3]], i32 0 +// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[X]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP5]] to double +// TCHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[PTR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 0 // TCHECK3-NEXT: store double [[CONV]], ptr [[ARRAYIDX]], align 4 -// TCHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[PTR]], align 4 -// TCHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP4]], i32 0 -// TCHECK3-NEXT: [[TMP5:%.*]] = load double, ptr [[ARRAYIDX2]], align 4 -// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP5]], 1.000000e+00 +// TCHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[PTR]], align 4 +// TCHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[TMP7]], i32 0 +// TCHECK3-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX2]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP8]], 1.000000e+00 // TCHECK3-NEXT: store double [[INC]], ptr [[ARRAYIDX2]], align 4 // TCHECK3-NEXT: ret void // @@ -11024,29 +11131,33 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 -// TCHECK3-NEXT: store i8 [[TMP4]], ptr [[AAA]], align 1 -// TCHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 +// TCHECK3-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 +// TCHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 +// TCHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 // TCHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 -// TCHECK3-NEXT: [[TMP7:%.*]] = load i8, ptr [[AAA]], align 1 -// TCHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP7]] to i32 +// TCHECK3-NEXT: [[TMP11:%.*]] = load i8, ptr [[AAA]], align 1 +// TCHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP11]] to i32 // TCHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // TCHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i8 // TCHECK3-NEXT: store i8 [[CONV4]], ptr [[AAA]], align 1 // TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i32 0, i32 2 -// TCHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP8]], 1 +// TCHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP12]], 1 // TCHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4 // TCHECK3-NEXT: ret void // @@ -11062,45 +11173,51 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[__VLA_EXPR1:%.*]] = alloca i32, align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// TCHECK3-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // TCHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// TCHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// TCHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// TCHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// TCHECK3-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// TCHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// TCHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// TCHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// TCHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// TCHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// TCHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// TCHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// TCHECK3-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// TCHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// TCHECK3-NEXT: [[TMP9:%.*]] = call ptr @llvm.stacksave.p0() -// TCHECK3-NEXT: store ptr [[TMP9]], ptr [[SAVED_STACK]], align 4 -// TCHECK3-NEXT: [[TMP10:%.*]] = mul nuw i32 [[TMP5]], [[TMP7]] -// TCHECK3-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP10]], align 2 -// TCHECK3-NEXT: store i32 [[TMP5]], ptr [[__VLA_EXPR0]], align 4 -// TCHECK3-NEXT: store i32 [[TMP7]], ptr [[__VLA_EXPR1]], align 4 -// TCHECK3-NEXT: [[TMP11:%.*]] = mul nuw i32 [[TMP5]], [[TMP7]] -// TCHECK3-NEXT: [[TMP12:%.*]] = mul nuw i32 [[TMP11]], 2 -// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA]], ptr align 2 [[C]], i32 [[TMP12]], i1 false) -// TCHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 -// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double +// TCHECK3-NEXT: [[TMP15:%.*]] = call ptr @llvm.stacksave.p0() +// TCHECK3-NEXT: store ptr [[TMP15]], ptr [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: [[TMP16:%.*]] = mul nuw i32 [[TMP8]], [[TMP11]] +// TCHECK3-NEXT: [[VLA:%.*]] = alloca i16, i32 [[TMP16]], align 2 +// TCHECK3-NEXT: store i32 [[TMP8]], ptr [[__VLA_EXPR0]], align 4 +// TCHECK3-NEXT: store i32 [[TMP11]], ptr [[__VLA_EXPR1]], align 4 +// TCHECK3-NEXT: [[TMP17:%.*]] = mul nuw i32 [[TMP8]], [[TMP11]] +// TCHECK3-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2 +// TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 2 [[VLA]], ptr align 2 [[C]], i32 [[TMP18]], i1 false) +// TCHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// TCHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP19]] to double // TCHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // TCHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // TCHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4 // TCHECK3-NEXT: [[A2:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[THIS]], i32 0, i32 0 -// TCHECK3-NEXT: [[TMP14:%.*]] = load double, ptr [[A2]], align 4 -// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 +// TCHECK3-NEXT: [[TMP20:%.*]] = load double, ptr [[A2]], align 4 +// TCHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP20]], 1.000000e+00 // TCHECK3-NEXT: store double [[INC]], ptr [[A2]], align 4 // TCHECK3-NEXT: [[CONV3:%.*]] = fptosi double [[INC]] to i16 -// TCHECK3-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP7]] -// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP15]] +// TCHECK3-NEXT: [[TMP21:%.*]] = mul nsw i32 1, [[TMP11]] +// TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[VLA]], i32 [[TMP21]] // TCHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 // TCHECK3-NEXT: store i16 [[CONV3]], ptr [[ARRAYIDX4]], align 2 -// TCHECK3-NEXT: [[TMP16:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 -// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP16]]) +// TCHECK3-NEXT: [[TMP22:%.*]] = load ptr, ptr [[SAVED_STACK]], align 4 +// TCHECK3-NEXT: call void @llvm.stackrestore.p0(ptr [[TMP22]]) // TCHECK3-NEXT: ret void // // @@ -11113,21 +11230,24 @@ int bar(int n, double *ptr) { // TCHECK3-NEXT: [[B2:%.*]] = alloca [10 x i32], align 4 // TCHECK3-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK3-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK3-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK3-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK3-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// TCHECK3-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// TCHECK3-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// TCHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK3-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP6]], align 4 // TCHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // TCHECK3-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[B2]], ptr align 4 [[B]], i32 40, i1 false) -// TCHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP4]], 1 +// TCHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP7]], 1 // TCHECK3-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // TCHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[B2]], i32 0, i32 2 -// TCHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -// TCHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP5]], 1 +// TCHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +// TCHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 // TCHECK3-NEXT: store i32 [[ADD3]], ptr [[ARRAYIDX]], align 4 // TCHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp index af8c1e223705e..62e51ed0cf090 100644 --- a/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp +++ b/clang/test/OpenMP/target_ompx_dyn_cgroup_mem_codegen.cpp @@ -1002,7 +1002,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META32]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]], !inline_history [[META33:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] @@ -1933,7 +1933,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR2]], !inline_history [[META34:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -2053,24 +2053,28 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP8]], i32 0) -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP10]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP12]], i32 0) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i64 [[TMP14]]) // CHECK9-NEXT: ret void // // @@ -2288,11 +2292,13 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK9-NEXT: ret void @@ -2318,21 +2324,25 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP7]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -2363,10 +2373,12 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // CHECK9-NEXT: store double 2.500000e+00, ptr [[A]], align 8 @@ -2380,8 +2392,9 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) // CHECK9-NEXT: ret void @@ -2410,28 +2423,32 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP15]], i64 [[TMP17]]) // CHECK9-NEXT: ret void // // @@ -2466,24 +2483,28 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP8]], i32 0) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP10]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP12]], i32 0) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined, i32 [[TMP14]]) // CHECK11-NEXT: ret void // // @@ -2697,11 +2718,13 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK11-NEXT: ret void @@ -2727,21 +2750,25 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -2772,10 +2799,12 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[THIS]], i32 0, i32 0 // CHECK11-NEXT: store double 2.500000e+00, ptr [[A]], align 4 @@ -2789,8 +2818,9 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) // CHECK11-NEXT: ret void @@ -2819,28 +2849,32 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP11]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP15]], i32 [[TMP17]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index 266da5f073912..07fcf8354f4fb 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -682,7 +682,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META27]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META28:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META27]] @@ -859,14 +859,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28:![0-9]+]], !align [[META29:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29:![0-9]+]], !align [[META30:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META30:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META31:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -899,14 +899,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -1326,7 +1326,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META31:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META32:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -1354,7 +1354,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META31]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META32]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 @@ -1388,7 +1388,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1417,7 +1417,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -1451,7 +1451,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1475,7 +1475,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -1882,7 +1882,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META28]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META29:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META28]] @@ -2059,14 +2059,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29:![0-9]+]], !align [[META30:![0-9]+]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30:![0-9]+]], !align [[META31:![0-9]+]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -2099,14 +2099,14 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP8]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -2526,7 +2526,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META31:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META32:![0-9]+]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -2554,7 +2554,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META31]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META32]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP4]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 @@ -2588,7 +2588,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -2617,7 +2617,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -2651,7 +2651,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -2675,7 +2675,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 1 // CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4 @@ -2698,8 +2698,9 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.omp_outlined) // CHECK9-NEXT: ret void @@ -2724,16 +2725,18 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l110.omp_outlined, i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l110.omp_outlined, i64 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -2774,22 +2777,25 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l119.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l119.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -2824,32 +2830,42 @@ int bar(int n){ // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.omp_outlined, i64 [[TMP15]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.omp_outlined, i64 [[TMP25]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK9-NEXT: ret void // // @@ -2938,30 +2954,35 @@ int bar(int n){ // CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK9-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -3010,24 +3031,30 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK9-NEXT: ret void // // @@ -3080,24 +3107,28 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -3137,8 +3168,9 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.omp_outlined) // CHECK11-NEXT: ret void @@ -3163,16 +3195,18 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l110.omp_outlined, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l110.omp_outlined, i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -3213,22 +3247,25 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l119.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l119.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -3263,32 +3300,42 @@ int bar(int n){ // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.omp_outlined, i32 [[TMP15]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l144.omp_outlined, i32 [[TMP25]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK11-NEXT: ret void // // @@ -3377,30 +3424,35 @@ int bar(int n){ // CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK11-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.omp_outlined, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l198.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr [[B]]) // CHECK11-NEXT: ret void // // @@ -3449,24 +3501,30 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK11-NEXT: ret void // // @@ -3519,24 +3577,28 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l181.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index d585eb9cbd36e..1692f1548f435 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -1076,7 +1076,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META30]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META30]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META30]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META31:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META30]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META30]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META30]] @@ -1248,14 +1248,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31:![0-9]+]], !align [[META32:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32:![0-9]+]], !align [[META33:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META33:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META34:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1300,14 +1300,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1789,7 +1789,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META34:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META35:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -1824,7 +1824,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META34]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META35]] // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -1901,7 +1901,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1932,7 +1932,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK1-NEXT: ret void // // @@ -1949,7 +1949,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1980,7 +1980,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -2803,7 +2803,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META34:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -3516,7 +3516,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META34:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META35:![0-9]+]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -3551,7 +3551,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META34]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META35]] // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -3773,8 +3773,9 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined) // CHECK9-NEXT: ret void @@ -3870,28 +3871,32 @@ int bar(int n){ // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l128.omp_outlined, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l128.omp_outlined, i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]]) // CHECK9-NEXT: ret void // // @@ -4021,22 +4026,25 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -4124,38 +4132,49 @@ int bar(int n){ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 10 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP17]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]], i64 [[TMP19]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP28]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]], i64 [[TMP30]]) // CHECK9-NEXT: ret void // // @@ -4315,30 +4334,35 @@ int bar(int n){ // CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK9-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l214.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l214.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4372,24 +4396,30 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l232.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l232.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK9-NEXT: ret void // // @@ -4492,24 +4522,28 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l197.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l197.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4599,8 +4633,9 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined) // CHECK11-NEXT: ret void @@ -4696,28 +4731,32 @@ int bar(int n){ // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l128.omp_outlined, i32 [[TMP8]], i32 [[TMP10]], i32 [[TMP12]]) +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l128.omp_outlined, i32 [[TMP12]], i32 [[TMP14]], i32 [[TMP16]]) // CHECK11-NEXT: ret void // // @@ -4847,22 +4886,25 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -4950,38 +4992,49 @@ int bar(int n){ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 10 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP17]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP28]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]], i32 [[TMP30]]) // CHECK11-NEXT: ret void // // @@ -5141,30 +5194,35 @@ int bar(int n){ // CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK11-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l214.omp_outlined, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l214.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr [[B]]) // CHECK11-NEXT: ret void // // @@ -5198,24 +5256,30 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l232.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l232.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK11-NEXT: ret void // // @@ -5318,24 +5382,28 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l197.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l197.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK11-NEXT: ret void // // @@ -6188,7 +6256,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META30]] // CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META30]] // CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META30]] -// CHECK17-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK17-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META31:![0-9]+]] // CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META30]] // CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META30]] // CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META30]] @@ -6360,14 +6428,14 @@ int bar(int n){ // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31:![0-9]+]], !align [[META32:![0-9]+]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32:![0-9]+]], !align [[META33:![0-9]+]] // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META33:![0-9]+]] +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META34:![0-9]+]] // CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -6412,14 +6480,14 @@ int bar(int n){ // CHECK17-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK17-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK17-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] -// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META31]], !align [[META33]] +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META32]], !align [[META34]] // CHECK17-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK17-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK17-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -6901,7 +6969,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META34:![0-9]+]] +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META35:![0-9]+]] // CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -6936,7 +7004,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META31]], !align [[META34]] +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META32]], !align [[META35]] // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -7013,7 +7081,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -7044,7 +7112,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK17-NEXT: ret void // // @@ -7061,7 +7129,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK17-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK17-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK17-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -7092,7 +7160,7 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK17-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK17-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META31]], !align [[META32]] +// CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META32]], !align [[META33]] // CHECK17-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK17-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK17-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -7915,7 +7983,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK19-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK19-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META34:![0-9]+]] // CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -8628,7 +8696,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META34:![0-9]+]] +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META35:![0-9]+]] // CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK19-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -8663,7 +8731,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META34]] +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META19]], !align [[META35]] // CHECK19-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK19-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK19-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index 5efed69be52be..d7bbae5effb99 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -789,7 +789,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META31]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META32:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META31]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META31]] @@ -889,32 +889,32 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] -// CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1020,44 +1020,44 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK1-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK1-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1143,32 +1143,32 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK1-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1210,14 +1210,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41:![0-9]+]], !align [[META42:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42:![0-9]+]], !align [[META43:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1262,14 +1262,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1300,60 +1300,60 @@ int bar(int n){ // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1758,7 +1758,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -1793,7 +1793,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48]] // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -1816,37 +1816,37 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store double [[ADD]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store double [[INC]], ptr [[A4]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1877,7 +1877,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1908,7 +1908,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: ret void // // @@ -1925,7 +1925,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1956,7 +1956,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK1-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK1-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -1979,35 +1979,35 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2514,7 +2514,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META32]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META33:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META32]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META32]] @@ -2563,7 +2563,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META33:![0-9]+]], !align [[META34:![0-9]+]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META34:![0-9]+]], !align [[META35:![0-9]+]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -2591,7 +2591,7 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 // CHECK3-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -2612,32 +2612,32 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2743,44 +2743,44 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK3-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 -// CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK3-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK3-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 -// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2866,32 +2866,32 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2933,14 +2933,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -2985,14 +2985,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3023,60 +3023,60 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] // CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3481,7 +3481,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META47:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META48:![0-9]+]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -3516,7 +3516,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META47]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META48]] // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -3539,37 +3539,37 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP8]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store double [[ADD]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store double [[INC]], ptr [[A4]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3600,7 +3600,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3631,7 +3631,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: ret void // // @@ -3648,7 +3648,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3679,7 +3679,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK3-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK3-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK3-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -3702,35 +3702,35 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4241,7 +4241,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META31]] -// CHECK5-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK5-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META32:![0-9]+]] // CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META31]] // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META31]] @@ -4341,32 +4341,32 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV]] -// CHECK5-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP33]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4472,44 +4472,44 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK5-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK5-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK5-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 -// CHECK5-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK5-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] // CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 -// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4595,32 +4595,32 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK5-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK5-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4662,14 +4662,14 @@ int bar(int n){ // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41:![0-9]+]], !align [[META42:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42:![0-9]+]], !align [[META43:![0-9]+]] // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44:![0-9]+]] // CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -4714,14 +4714,14 @@ int bar(int n){ // CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -4752,60 +4752,60 @@ int bar(int n){ // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK5-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK5-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK5-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 // CHECK5-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] // CHECK5-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK5-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK5-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK5-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK5-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -5237,7 +5237,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47:![0-9]+]] +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48:![0-9]+]] // CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 // CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -5292,7 +5292,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -5319,37 +5319,37 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal [[META49:![0-9]+]], !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: store double [[ADD]], ptr [[A]], align 8, !nontemporal [[META50:![0-9]+]], !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal [[META49]], !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 8, !nontemporal [[META50]], !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal [[META49]], !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: store double [[INC]], ptr [[A4]], align 8, !nontemporal [[META50]], !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] // CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: @@ -5401,7 +5401,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 // CHECK5-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK5: omp.inner.for.end29: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -5436,7 +5436,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -5467,7 +5467,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK5-NEXT: ret void // // @@ -5484,7 +5484,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -5515,7 +5515,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK5-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK5-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK5-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -5538,35 +5538,35 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK5-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -6073,7 +6073,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META32]] -// CHECK7-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]] +// CHECK7-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]]) #[[ATTR3]], !inline_history [[META33:![0-9]+]] // CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META32]] // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META32]] @@ -6122,7 +6122,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META33:![0-9]+]], !align [[META34:![0-9]+]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META34:![0-9]+]], !align [[META35:![0-9]+]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -6150,7 +6150,7 @@ int bar(int n){ // CHECK7-NEXT: store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 4 // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store ptr [[K]], ptr [[K_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[K_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP1:%.*]] = load i64, ptr [[TMP0]], align 8 // CHECK7-NEXT: store i64 [[TMP1]], ptr [[DOTLINEAR_START]], align 8 // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -6171,32 +6171,32 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: store i32 [[SUB]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTLINEAR_START]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK7-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP35]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: store i64 [[ADD]], ptr [[K1]], align 8, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6302,44 +6302,44 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP6]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTLINEAR_START]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[MUL5:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV]], [[MUL5]] // CHECK7-NEXT: [[CONV6:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK7-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: store i32 [[CONV6]], ptr [[LIN2]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTLINEAR_START1]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[CONV7:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTLINEAR_STEP]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[MUL8:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK7-NEXT: [[ADD9:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK7-NEXT: [[CONV10:%.*]] = trunc i64 [[ADD9]] to i32 -// CHECK7-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: store i32 [[CONV10]], ptr [[A3]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[CONV11:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[CONV11]], 1 // CHECK7-NEXT: [[CONV13:%.*]] = trunc i32 [[ADD12]] to i16 -// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: store i16 [[CONV13]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] // CHECK7-NEXT: [[ADD14:%.*]] = add i64 [[TMP17]], 1 -// CHECK7-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP38]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD14]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP39]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6425,32 +6425,32 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42:![0-9]+]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK7-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP41]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: store i16 [[CONV]], ptr [[IT]], align 2, !llvm.access.group [[ACC_GRP42]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP42]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6492,14 +6492,14 @@ int bar(int n){ // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -6544,14 +6544,14 @@ int bar(int n){ // CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK7-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 25, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -6582,60 +6582,60 @@ int bar(int n){ // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK7-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP44]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store i8 [[CONV]], ptr [[IT]], align 1, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK7-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK7-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK7-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK7-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] // CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK7-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK7-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK7-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK7-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -7067,7 +7067,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META47:![0-9]+]] +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META48:![0-9]+]] // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP5]], ptr [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -7122,7 +7122,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META33]], !align [[META47]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META34]], !align [[META48]] // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 3, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -7149,37 +7149,37 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP9]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: store i64 [[SUB]], ptr [[IT]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double // CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal [[META49:![0-9]+]], !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: store double [[ADD]], ptr [[A]], align 4, !nontemporal [[META50:![0-9]+]], !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[A4:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal [[META49]], !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A4]], align 4, !nontemporal [[META50]], !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal [[META49]], !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: store double [[INC]], ptr [[A4]], align 4, !nontemporal [[META50]], !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] // CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: store i16 [[CONV5]], ptr [[ARRAYIDX6]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK7-NEXT: [[ADD7:%.*]] = add i64 [[TMP16]], 1 -// CHECK7-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP48]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD7]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: @@ -7231,7 +7231,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP28:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: [[ADD28:%.*]] = add i64 [[TMP28]], 1 // CHECK7-NEXT: store i64 [[ADD28]], ptr [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK7: omp.inner.for.end29: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -7266,7 +7266,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -7297,7 +7297,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: ret void // // @@ -7314,7 +7314,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -7345,7 +7345,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META33]], !align [[META34]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META34]], !align [[META35]] // CHECK7-NEXT: store i64 0, ptr [[DOTOMP_LB]], align 8 // CHECK7-NEXT: store i64 6, ptr [[DOTOMP_UB]], align 8 // CHECK7-NEXT: store i64 1, ptr [[DOTOMP_STRIDE]], align 8 @@ -7368,35 +7368,35 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP5]], ptr [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i64, ptr [[DOTOMP_UB]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK7-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i64 [[ADD]], ptr [[I]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD6]], ptr [[DOTOMP_IV]], align 8, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -9422,8 +9422,9 @@ int bar(int n){ // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.omp_outlined) // CHECK17-NEXT: ret void @@ -9509,28 +9510,32 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]]) // CHECK17-NEXT: ret void // // @@ -9667,22 +9672,25 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK17-NEXT: ret void // // @@ -9777,38 +9785,49 @@ int bar(int n){ // CHECK17-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK17-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8 -// CHECK17-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 10 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK17-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK17-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 8 +// CHECK17-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK17-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i64 [[TMP17]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]], i64 [[TMP19]]) +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i64 [[TMP28]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]], i64 [[TMP30]]) // CHECK17-NEXT: ret void // // @@ -9975,30 +9994,35 @@ int bar(int n){ // CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK17-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK17-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK17-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK17-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[B]]) // CHECK17-NEXT: ret void // // @@ -10032,24 +10056,30 @@ int bar(int n){ // CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK17-NEXT: ret void // // @@ -10159,24 +10189,28 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK17-NEXT: ret void // // @@ -10273,8 +10307,9 @@ int bar(int n){ // CHECK19-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.omp_outlined) // CHECK19-NEXT: ret void @@ -10360,28 +10395,32 @@ int bar(int n){ // CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i32 [[TMP8]], i32 [[TMP10]], i32 [[TMP12]]) +// CHECK19-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i32 [[TMP12]], i32 [[TMP14]], i32 [[TMP16]]) // CHECK19-NEXT: ret void // // @@ -10518,22 +10557,25 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK19-NEXT: ret void // // @@ -10628,38 +10670,49 @@ int bar(int n){ // CHECK19-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK19-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK19-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 10 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK19-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK19-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK19-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK19-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i32 [[TMP17]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i32 [[TMP28]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]], i32 [[TMP30]]) // CHECK19-NEXT: ret void // // @@ -10826,30 +10879,35 @@ int bar(int n){ // CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK19-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK19-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK19-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr [[B]]) // CHECK19-NEXT: ret void // // @@ -10883,24 +10941,30 @@ int bar(int n){ // CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l216.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK19-NEXT: ret void // // @@ -11010,24 +11074,28 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK19-NEXT: ret void // // @@ -11124,8 +11192,9 @@ int bar(int n){ // CHECK21-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.omp_outlined) // CHECK21-NEXT: ret void @@ -11211,28 +11280,32 @@ int bar(int n){ // CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK21-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK21-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK21-NEXT: [[TMP10:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK21-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[LIN_CASTED]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i64 [[TMP12]], i64 [[TMP14]], i64 [[TMP16]]) // CHECK21-NEXT: ret void // // @@ -11369,22 +11442,25 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK21-NEXT: ret void // // @@ -11479,38 +11555,49 @@ int bar(int n){ // CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK21-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8 -// CHECK21-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 10 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK21-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK21-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 8 +// CHECK21-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK21-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i64 [[TMP17]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]], i64 [[TMP19]]) +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP28:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK21-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i64 [[TMP28]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]], i64 [[TMP30]]) // CHECK21-NEXT: ret void // // @@ -11677,30 +11764,35 @@ int bar(int n){ // CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK21-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK21-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK21-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK21-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[B]]) // CHECK21-NEXT: ret void // // @@ -11739,44 +11831,51 @@ int bar(int n){ // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK21-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 5 -// CHECK21-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 8 -// CHECK21-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP11]], 0 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK21-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 5 +// CHECK21-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK21-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK21-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 6 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 6 +// CHECK21-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP18]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 -// CHECK21-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: store i32 [[TMP19]], ptr [[B_CASTED]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP21]], 0 // CHECK21-NEXT: [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8 // CHECK21-NEXT: store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[LOADEDV4:%.*]] = icmp ne i8 [[TMP16]], 0 +// CHECK21-NEXT: [[TMP22:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[LOADEDV4:%.*]] = icmp ne i8 [[TMP23]], 0 // CHECK21-NEXT: br i1 [[LOADEDV4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK21: omp_if.then: -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined, ptr [[THIS]], i64 [[TMP13]], i64 [[TMP6]], i64 [[TMP8]], ptr [[C]], i64 [[TMP15]]) +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined, ptr [[THIS]], i64 [[TMP20]], i64 [[TMP9]], i64 [[TMP12]], ptr [[C]], i64 [[TMP22]]) // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: // CHECK21-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK21-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i64 [[TMP13]], i64 [[TMP6]], i64 [[TMP8]], ptr [[C]], i64 [[TMP15]]) #[[ATTR1:[0-9]+]] +// CHECK21-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i64 [[TMP20]], i64 [[TMP9]], i64 [[TMP12]], ptr [[C]], i64 [[TMP22]]) #[[ATTR1:[0-9]+]] // CHECK21-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -11951,24 +12050,28 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK21-NEXT: ret void // // @@ -12065,8 +12168,9 @@ int bar(int n){ // CHECK23-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.omp_outlined) // CHECK23-NEXT: ret void @@ -12152,28 +12256,32 @@ int bar(int n){ // CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[LIN]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[LIN]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK23-NEXT: store i32 [[TMP9]], ptr [[A]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[LIN]], align 4 -// CHECK23-NEXT: store i32 [[TMP9]], ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP11]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i32 [[TMP8]], i32 [[TMP10]], i32 [[TMP12]]) +// CHECK23-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[LIN]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[LIN_CASTED]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[LIN_CASTED]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP15]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l108.omp_outlined, i32 [[TMP12]], i32 [[TMP14]], i32 [[TMP16]]) // CHECK23-NEXT: ret void // // @@ -12310,22 +12418,25 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l116.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK23-NEXT: ret void // // @@ -12420,38 +12531,49 @@ int bar(int n){ // CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK23-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK23-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 10 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK23-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK23-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK23-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK23-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i32 [[TMP17]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]], i32 [[TMP19]]) +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP28:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK23-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l140.omp_outlined, i32 [[TMP28]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]], i32 [[TMP30]]) // CHECK23-NEXT: ret void // // @@ -12618,30 +12740,35 @@ int bar(int n){ // CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK23-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK23-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK23-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK23-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l195.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr [[B]]) // CHECK23-NEXT: ret void // // @@ -12680,44 +12807,51 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2]]) // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK23-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 5 -// CHECK23-NEXT: [[TMP11:%.*]] = load i8, ptr [[TMP10]], align 4 -// CHECK23-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP11]], 0 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK23-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 5 +// CHECK23-NEXT: [[TMP16:%.*]] = load ptr, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP17]], 0 // CHECK23-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK23-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 6 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 6 +// CHECK23-NEXT: [[TMP18:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP18]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 -// CHECK23-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP14]], 0 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: store i32 [[TMP19]], ptr [[B_CASTED]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP21]], 0 // CHECK23-NEXT: [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8 // CHECK23-NEXT: store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[LOADEDV4:%.*]] = icmp ne i8 [[TMP16]], 0 +// CHECK23-NEXT: [[TMP22:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[LOADEDV4:%.*]] = icmp ne i8 [[TMP23]], 0 // CHECK23-NEXT: br i1 [[LOADEDV4]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK23: omp_if.then: -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined, ptr [[THIS]], i32 [[TMP13]], i32 [[TMP6]], i32 [[TMP8]], ptr [[C]], i32 [[TMP15]]) +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined, ptr [[THIS]], i32 [[TMP20]], i32 [[TMP9]], i32 [[TMP12]], ptr [[C]], i32 [[TMP22]]) // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: // CHECK23-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK23-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i32 [[TMP13]], i32 [[TMP6]], i32 [[TMP8]], ptr [[C]], i32 [[TMP15]]) #[[ATTR1:[0-9]+]] +// CHECK23-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l214.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i32 [[TMP20]], i32 [[TMP9]], i32 [[TMP12]], ptr [[C]], i32 [[TMP22]]) #[[ATTR1:[0-9]+]] // CHECK23-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB2]], i32 [[TMP0]]) // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -12892,24 +13026,28 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l178.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK23-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp index 30d5ae3089a6b..9bd1b23236dde 100644 --- a/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp +++ b/clang/test/OpenMP/target_parallel_generic_loop_codegen-2.cpp @@ -592,16 +592,18 @@ int nested(int a){ // TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // TCHECK-TARGET-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK-TARGET-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-TARGET-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-TARGET-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK-TARGET-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-TARGET-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // TCHECK-TARGET-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-TARGET-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP4]]) +// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i64 [[TMP6]]) // TCHECK-TARGET-NEXT: ret void // // @@ -679,16 +681,18 @@ int nested(int a){ // TCHECK-TARGET-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // TCHECK-TARGET-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // TCHECK-TARGET-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// TCHECK-TARGET-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-TARGET-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// TCHECK-TARGET-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-TARGET-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// TCHECK-TARGET-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-TARGET-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// TCHECK-TARGET-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // TCHECK-TARGET-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// TCHECK-TARGET-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-TARGET-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-NEXT: [[TMP4:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP4]]) +// TCHECK-TARGET-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-TARGET-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// TCHECK-TARGET-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i64 [[TMP6]]) // TCHECK-TARGET-NEXT: ret void // // @@ -766,16 +770,18 @@ int nested(int a){ // TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // TCHECK-TARGET-X86-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK-TARGET-X86-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-TARGET-X86-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK-TARGET-X86-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-TARGET-X86-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // TCHECK-TARGET-X86-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-TARGET-X86-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP4]]) +// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l42.omp_outlined, i32 [[TMP6]]) // TCHECK-TARGET-X86-NEXT: ret void // // @@ -853,16 +859,18 @@ int nested(int a){ // TCHECK-TARGET-X86-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // TCHECK-TARGET-X86-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // TCHECK-TARGET-X86-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// TCHECK-TARGET-X86-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// TCHECK-TARGET-X86-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// TCHECK-TARGET-X86-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// TCHECK-TARGET-X86-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// TCHECK-TARGET-X86-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// TCHECK-TARGET-X86-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // TCHECK-TARGET-X86-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4 -// TCHECK-TARGET-X86-NEXT: store i32 [[TMP3]], ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-X86-NEXT: [[TMP4:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP4]]) +// TCHECK-TARGET-X86-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 +// TCHECK-TARGET-X86-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// TCHECK-TARGET-X86-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6nestedi_l49.omp_outlined, i32 [[TMP6]]) // TCHECK-TARGET-X86-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp index 0917f4289d5c3..7daa70ba3fe4b 100644 --- a/clang/test/OpenMP/target_parallel_if_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp @@ -1623,16 +1623,18 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP4]], 0 // CHECK9-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK9-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP4]], 0 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP6]], 0 // CHECK9-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) @@ -1665,8 +1667,9 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK9-NEXT: ret void @@ -1695,33 +1698,37 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP6]], 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP9]], 0 // CHECK9-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK9-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP9]], 0 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP11]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP13]], 0 // CHECK9-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP8]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP12]]) // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i64 [[TMP8]]) #[[ATTR1]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i64 [[TMP12]]) #[[ATTR1]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: @@ -1759,18 +1766,21 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP4]], 0 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP6]], 0 // CHECK9-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK9-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP5]], 0 +// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK9-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP8]], 0 // CHECK9-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1813,19 +1823,21 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 // CHECK9-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK9-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l87.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP5]]) #[[ATTR1]] +// CHECK9-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l87.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i64 [[TMP7]]) #[[ATTR1]] // CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // @@ -1856,22 +1868,25 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[B]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -1905,16 +1920,18 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i8, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP4]], 0 // CHECK11-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK11-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP4]], 0 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP6]], 0 // CHECK11-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) @@ -1947,8 +1964,9 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK11-NEXT: ret void @@ -1977,33 +1995,37 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP6]], 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP9]], 0 // CHECK11-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK11-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP9]], 0 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP13]], 0 // CHECK11-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP8]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP12]]) // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i32 [[TMP8]]) #[[ATTR1]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], ptr [[THIS]], i32 [[TMP12]]) #[[ATTR1]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -2041,18 +2063,21 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i8, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP4]], 0 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP6]], 0 // CHECK11-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK11-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP5]], 0 +// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK11-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP8]], 0 // CHECK11-NEXT: br i1 [[LOADEDV2]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -2095,19 +2120,21 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 // CHECK11-NEXT: call void @__kmpc_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: store i32 [[TMP0]], ptr [[DOTTHREADID_TEMP_]], align 4 // CHECK11-NEXT: store i32 0, ptr [[DOTBOUND_ZERO_ADDR]], align 4 -// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l87.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP5]]) #[[ATTR1]] +// CHECK11-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l87.omp_outlined(ptr [[DOTTHREADID_TEMP_]], ptr [[DOTBOUND_ZERO_ADDR]], i32 [[TMP7]]) #[[ATTR1]] // CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // @@ -2138,22 +2165,25 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[B]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[B]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp index 2878e37e6eec3..7131f9bac93b5 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -1460,14 +1460,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]]) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK9-NEXT: ret void // @@ -1491,14 +1493,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]]) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK9-NEXT: ret void // @@ -1524,23 +1528,27 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP13]]) // CHECK9-NEXT: ret void // // @@ -1572,10 +1580,12 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1605,8 +1615,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1636,28 +1647,32 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP15]], i64 [[TMP17]]) // CHECK9-NEXT: ret void // // @@ -1689,14 +1704,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]]) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK11-NEXT: ret void // @@ -1720,14 +1737,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]]) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK11-NEXT: ret void // @@ -1753,23 +1772,27 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP13]]) // CHECK11-NEXT: ret void // // @@ -1801,10 +1824,12 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1834,8 +1859,9 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 20) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1865,28 +1891,32 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP11]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_threads(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP15]], i32 [[TMP17]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp index b5b44fd50a7dd..b72aa3d618c9e 100644 --- a/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_num_threads_strict_codegen.cpp @@ -1097,19 +1097,22 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 // CHECK9-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[TMP]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META11:![0-9]+]] -// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP6]], i64 0, i64 0 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP5]], i32 2, ptr [[ARRAYDECAY]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META11:![0-9]+]] +// CHECK9-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP9]], i64 0, i64 0 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 2, ptr [[ARRAYDECAY]]) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l61.omp_outlined) // CHECK9-NEXT: ret void // @@ -1133,14 +1136,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 1, ptr null) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 1, ptr null) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l65.omp_outlined) // CHECK9-NEXT: ret void // @@ -1165,19 +1170,22 @@ int bar(int n){ // CHECK9-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP5]]) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP8]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // @@ -1215,19 +1223,22 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP5]]) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP8]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK9-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK9-NEXT: ret void // @@ -1329,19 +1340,22 @@ int bar(int n){ // CHECK10-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 8 // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK10-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK10-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP5]]) -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: [[CALL:%.*]] = call noundef signext i32 @_ZN2S12r1Ei(ptr noundef nonnull align 8 dereferenceable(8) [[S]], i32 noundef signext [[TMP8]]) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK10-NEXT: ret void // @@ -1379,19 +1393,22 @@ int bar(int n){ // CHECK10-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK10-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK10-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK10-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP5]]) -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: [[CALL:%.*]] = call noundef signext i32 @_Z9ftemplateIiET_i(i32 noundef signext [[TMP8]]) +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK10-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK10-NEXT: ret void // @@ -1426,19 +1443,22 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 // CHECK10-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[TMP]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META11:![0-9]+]] -// CHECK10-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP6]], i64 0, i64 0 -// CHECK10-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP5]], i32 2, ptr [[ARRAYDECAY]]) +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META11:![0-9]+]] +// CHECK10-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP9]], i64 0, i64 0 +// CHECK10-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 2, ptr [[ARRAYDECAY]]) // CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l61.omp_outlined) // CHECK10-NEXT: ret void // @@ -1462,14 +1482,16 @@ int bar(int n){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 1, ptr null) +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 1, ptr null) // CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l65.omp_outlined) // CHECK10-NEXT: ret void // @@ -1561,19 +1583,22 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 // CHECK11-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[TMP]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12:![0-9]+]] -// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP6]], i32 0, i32 0 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP5]], i32 2, ptr [[ARRAYDECAY]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12:![0-9]+]] +// CHECK11-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP9]], i32 0, i32 0 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 2, ptr [[ARRAYDECAY]]) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l61.omp_outlined) // CHECK11-NEXT: ret void // @@ -1597,14 +1622,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 1, ptr null) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 1, ptr null) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l65.omp_outlined) // CHECK11-NEXT: ret void // @@ -1629,19 +1656,22 @@ int bar(int n){ // CHECK11-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP5]]) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP8]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1679,19 +1709,22 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP5]]) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP8]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK11-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK11-NEXT: ret void // @@ -1793,19 +1826,22 @@ int bar(int n){ // CHECK12-NEXT: [[S:%.*]] = alloca [[STRUCT_S1:%.*]], align 4 // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK12-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP5]]) -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: [[CALL:%.*]] = call noundef i32 @_ZN2S12r1Ei(ptr noundef nonnull align 4 dereferenceable(8) [[S]], i32 noundef [[TMP8]]) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK12-NEXT: ret void // @@ -1843,19 +1879,22 @@ int bar(int n){ // CHECK12-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK12-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: [[CALL:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP5]]) -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], [[CALL]] +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: [[CALL:%.*]] = call noundef i32 @_Z9ftemplateIiET_i(i32 noundef [[TMP8]]) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], [[CALL]] // CHECK12-NEXT: store i32 [[ADD]], ptr [[A]], align 4 // CHECK12-NEXT: ret void // @@ -1890,19 +1929,22 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 // CHECK12-NEXT: store ptr [[DOTCAPTURE_EXPR_1]], ptr [[TMP]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12:![0-9]+]] -// CHECK12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP6]], i32 0, i32 0 -// CHECK12-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP5]], i32 2, ptr [[ARRAYDECAY]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META12:![0-9]+]] +// CHECK12-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i8], ptr [[TMP9]], i32 0, i32 0 +// CHECK12-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 2, ptr [[ARRAYDECAY]]) // CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l61.omp_outlined) // CHECK12-NEXT: ret void // @@ -1926,14 +1968,16 @@ int bar(int n){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 1, ptr null) +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: call void @__kmpc_push_num_threads_strict(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 1, ptr null) // CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l65.omp_outlined) // CHECK12-NEXT: ret void // diff --git a/clang/test/OpenMP/target_private_codegen.cpp b/clang/test/OpenMP/target_private_codegen.cpp index 9f662083e7be2..32073298a8bad 100644 --- a/clang/test/OpenMP/target_private_codegen.cpp +++ b/clang/test/OpenMP/target_private_codegen.cpp @@ -210,8 +210,9 @@ struct S1 { // TCHECK: [[DYN_PTR:%.+]] = alloca ptr // TCHECK: [[B:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[SSTACK:%.+]] = alloca ptr, - // TCHECK: getelementptr inbounds i{{[0-9]+}}, ptr {{%.+}}, i32 0 - // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr {{%.+}}, + // TCHECK: [[TH_SLOT:%.+]] = getelementptr inbounds ptr, ptr {{%.+}}, i32 0 + // TCHECK: [[TH_PTR:%.+]] = load ptr, ptr [[TH_SLOT]], + // TCHECK: [[TH_ADDR_REF:%.+]] = load ptr, ptr [[TH_PTR]], // TCHECK: [[VLA_ADDR_REF:%.+]] = load i{{[0-9]+}}, ptr {{%.+}}, // TCHECK: [[VLA_ADDR_REF2:%.+]] = load i{{[0-9]+}}, ptr {{%.+}}, // TCHECK: [[RET_STACK:%.+]] = call ptr @llvm.stacksave.p0() diff --git a/clang/test/OpenMP/target_task_affinity_codegen.cpp b/clang/test/OpenMP/target_task_affinity_codegen.cpp index 23e2962583250..28904c62b1b8c 100644 --- a/clang/test/OpenMP/target_task_affinity_codegen.cpp +++ b/clang/test/OpenMP/target_task_affinity_codegen.cpp @@ -297,7 +297,7 @@ int main() { // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META13]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META13]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META13]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR1]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR1]], !inline_history [[META14:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META13]] // CHECK1-NEXT: store i32 0, ptr [[I_I]], align 4, !noalias [[META13]] // CHECK1-NEXT: br label [[FOR_COND_I:%.*]] @@ -312,7 +312,7 @@ int main() { // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDXPROM_I]] // CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 // CHECK1-NEXT: [[MUL_I:%.*]] = mul nsw i32 2, [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META14:![0-9]+]], !align [[META15:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META15:![0-9]+]], !align [[META16:![0-9]+]] // CHECK1-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META13]] // CHECK1-NEXT: [[IDXPROM1_I:%.*]] = sext i32 [[TMP19]] to i64 @@ -321,7 +321,7 @@ int main() { // CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META13]] // CHECK1-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK1-NEXT: store i32 [[INC_I]], ptr [[I_I]], align 4, !noalias [[META13]] -// CHECK1-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK1: .omp_outlined..exit: // CHECK1-NEXT: ret i32 0 // @@ -540,7 +540,7 @@ int main() { // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META14]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR1]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR1]], !inline_history [[META15:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: store i32 0, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: br label [[FOR_COND_I:%.*]] @@ -554,7 +554,7 @@ int main() { // CHECK3-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP15]] // CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 // CHECK3-NEXT: [[MUL_I:%.*]] = mul nsw i32 2, [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 4, !nonnull [[META15:![0-9]+]], !align [[META16:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 4, !nonnull [[META16:![0-9]+]], !align [[META17:![0-9]+]] // CHECK3-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 // CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 [[TMP19]] @@ -562,7 +562,7 @@ int main() { // CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK3-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[INC_I]], ptr [[I_I]], align 4, !noalias [[META14]] -// CHECK3-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: .omp_outlined..exit: // CHECK3-NEXT: ret i32 0 // @@ -647,40 +647,43 @@ int main() { // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[B:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[A:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK9-NEXT: store ptr [[B]], ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.) -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x [[STRUCT_KMP_TASK_AFFINITY_INFO_T]]], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0 -// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i64 0 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i64 1023 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 -// CHECK9-NEXT: [[TMP8:%.*]] = ptrtoaddr ptr [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i64 -// CHECK9-NEXT: [[TMP10:%.*]] = sub nuw i64 [[TMP8]], [[TMP9]] -// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP4]], i64 0 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP11]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 -// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP11]], i32 0, i32 1 -// CHECK9-NEXT: store i64 [[TMP10]], ptr [[TMP14]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_reg_task_with_affinity(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP3]], i32 1, ptr [[TMP4]]) -// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 -// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP18]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) -// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP19]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[A]], align 8 -// CHECK9-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 8 -// CHECK9-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP3]]) -// CHECK9-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK9-NEXT: store ptr [[B]], ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i64 48, i64 8, ptr @.omp_task_entry.) +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x [[STRUCT_KMP_TASK_AFFINITY_INFO_T]]], ptr [[DOTAFFS_ARR_ADDR]], i64 0, i64 0 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP8]], i64 0 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK9-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP9]], i64 1023 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 +// CHECK9-NEXT: [[TMP11:%.*]] = ptrtoaddr ptr [[TMP10]] to i64 +// CHECK9-NEXT: [[TMP12:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i64 +// CHECK9-NEXT: [[TMP13:%.*]] = sub nuw i64 [[TMP11]], [[TMP12]] +// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP7]], i64 0 +// CHECK9-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP14]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i64 +// CHECK9-NEXT: store i64 [[TMP16]], ptr [[TMP15]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP14]], i32 0, i32 1 +// CHECK9-NEXT: store i64 [[TMP13]], ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_omp_reg_task_with_affinity(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP6]], i32 1, ptr [[TMP7]]) +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK9-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP21]], ptr align 8 [[AGG_CAPTURED]], i64 8, i1 false) +// CHECK9-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[A]], align 8 +// CHECK9-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP6]]) +// CHECK9-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK9-NEXT: ret void // // @@ -733,7 +736,7 @@ int main() { // CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META14]] // CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META14]] // CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META14]] -// CHECK9-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3:[0-9]+]] +// CHECK9-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3:[0-9]+]], !inline_history [[META15:![0-9]+]] // CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META14]] // CHECK9-NEXT: store i32 0, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK9-NEXT: br label [[FOR_COND_I:%.*]] @@ -748,7 +751,7 @@ int main() { // CHECK9-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i64 [[IDXPROM_I]] // CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 // CHECK9-NEXT: [[MUL_I:%.*]] = mul nsw i32 2, [[TMP16]] -// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META15:![0-9]+]], !align [[META16:![0-9]+]] +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 8, !nonnull [[META16:![0-9]+]], !align [[META17:![0-9]+]] // CHECK9-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 8 // CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK9-NEXT: [[IDXPROM1_I:%.*]] = sext i32 [[TMP19]] to i64 @@ -757,7 +760,7 @@ int main() { // CHECK9-NEXT: [[TMP20:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META14]] // CHECK9-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK9-NEXT: store i32 [[INC_I]], ptr [[I_I]], align 4, !noalias [[META14]] -// CHECK9-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: .omp_outlined..exit: // CHECK9-NEXT: ret i32 0 // @@ -772,40 +775,43 @@ int main() { // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[B:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[A:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 -// CHECK11-NEXT: store ptr [[B]], ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.) -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x [[STRUCT_KMP_TASK_AFFINITY_INFO_T]]], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[A]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP5]], i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[A]], align 4 -// CHECK11-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP6]], i32 1023 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 -// CHECK11-NEXT: [[TMP8:%.*]] = ptrtoaddr ptr [[TMP7]] to i32 -// CHECK11-NEXT: [[TMP9:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32 -// CHECK11-NEXT: [[TMP10:%.*]] = sub nuw i32 [[TMP8]], [[TMP9]] -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP4]], i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP11]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP13:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP11]], i32 0, i32 1 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[TMP14]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_reg_task_with_affinity(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP3]], i32 1, ptr [[TMP4]]) -// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP3]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP16]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 -// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP18]], ptr align 4 [[AGG_CAPTURED]], i32 4, i1 false) -// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP3]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP19]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[A]], align 4 -// CHECK11-NEXT: store ptr [[TMP21]], ptr [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP3]]) -// CHECK11-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP0]]) +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds nuw [[STRUCT_ANON]], ptr [[AGG_CAPTURED]], i32 0, i32 0 +// CHECK11-NEXT: store ptr [[B]], ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = call ptr @__kmpc_omp_task_alloc(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 24, i32 4, ptr @.omp_task_entry.) +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x [[STRUCT_KMP_TASK_AFFINITY_INFO_T]]], ptr [[DOTAFFS_ARR_ADDR]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP8]], i32 0 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[A]], align 4 +// CHECK11-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP9]], i32 1023 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[ARRAYIDX2]], i32 1 +// CHECK11-NEXT: [[TMP11:%.*]] = ptrtoaddr ptr [[TMP10]] to i32 +// CHECK11-NEXT: [[TMP12:%.*]] = ptrtoaddr ptr [[ARRAYIDX]] to i32 +// CHECK11-NEXT: [[TMP13:%.*]] = sub nuw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP7]], i32 0 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP14]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP16:%.*]] = ptrtoint ptr [[ARRAYIDX]] to i32 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_AFFINITY_INFO_T]], ptr [[TMP14]], i32 0, i32 1 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = call i32 @__kmpc_omp_reg_task_with_affinity(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP6]], i32 1, ptr [[TMP7]]) +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES:%.*]], ptr [[TMP6]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP19]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP21:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK11-NEXT: call void @llvm.memcpy.p0.p0.i32(ptr align 4 [[TMP21]], ptr align 4 [[AGG_CAPTURED]], i32 4, i1 false) +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds nuw [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], ptr [[TMP6]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds nuw [[STRUCT__KMP_PRIVATES_T:%.*]], ptr [[TMP22]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[A]], align 4 +// CHECK11-NEXT: store ptr [[TMP24]], ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_omp_task(ptr @[[GLOB1]], i32 [[TMP0]], ptr [[TMP6]]) +// CHECK11-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_omp_taskwait(ptr @[[GLOB1]], i32 [[TMP0]]) // CHECK11-NEXT: ret void // // @@ -858,7 +864,7 @@ int main() { // CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META15]] -// CHECK11-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3:[0-9]+]] +// CHECK11-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR3:[0-9]+]], !inline_history [[META16:![0-9]+]] // CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: store i32 0, ptr [[I_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: br label [[FOR_COND_I:%.*]] @@ -872,7 +878,7 @@ int main() { // CHECK11-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 [[TMP15]] // CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX_I]], align 4 // CHECK11-NEXT: [[MUL_I:%.*]] = mul nsw i32 2, [[TMP16]] -// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 4, !nonnull [[META16:![0-9]+]], !align [[META17:![0-9]+]] +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP9]], align 4, !nonnull [[META17:![0-9]+]], !align [[META18:![0-9]+]] // CHECK11-NEXT: [[TMP18:%.*]] = load ptr, ptr [[TMP17]], align 4 // CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: [[ARRAYIDX1_I:%.*]] = getelementptr inbounds i32, ptr [[TMP18]], i32 [[TMP19]] @@ -880,7 +886,7 @@ int main() { // CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I_I]], align 4, !noalias [[META15]] // CHECK11-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[INC_I]], ptr [[I_I]], align 4, !noalias [[META15]] -// CHECK11-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: br label [[FOR_COND_I]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: .omp_outlined..exit: // CHECK11-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index 441e3634fe989..243f533ed3b7e 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -4129,25 +4129,29 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i64 [[TMP15]]) // CHECK9-NEXT: ret void // // @@ -4172,16 +4176,18 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -4213,22 +4219,25 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -4266,23 +4275,26 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP7]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i64 [[TMP10]], i64 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4320,23 +4332,26 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i64 [[TMP7]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i64 [[TMP10]], i64 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4374,23 +4389,26 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP7]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i64 [[TMP10]], i64 [[TMP12]]) // CHECK9-NEXT: ret void // // @@ -4425,32 +4443,42 @@ int bar(int n){ // CHECK9-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK9-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP15]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i64 [[TMP25]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK9-NEXT: ret void // // @@ -4535,16 +4563,18 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[NN_CASTED]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[NN_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i64 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -4586,16 +4616,18 @@ int bar(int n){ // CHECK9-NEXT: [[NN_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[NN]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[NN_CASTED]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[NN_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[NN_CASTED]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[NN_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i64 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -4632,12 +4664,14 @@ int bar(int n){ // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i64, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i64, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i64 [[TMP2]]) +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i64 [[TMP3]]) // CHECK9-NEXT: ret void // // @@ -4668,30 +4702,35 @@ int bar(int n){ // CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK9-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], i64 [[TMP18]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4740,24 +4779,30 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK9-NEXT: ret void // // @@ -4810,24 +4855,28 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4872,25 +4921,29 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101.omp_outlined, i32 [[TMP15]]) // CHECK11-NEXT: ret void // // @@ -4915,16 +4968,18 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4956,22 +5011,25 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -5009,23 +5067,26 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP7]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l124.omp_outlined, i32 [[TMP10]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5063,23 +5124,26 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i32 [[TMP7]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l130.omp_outlined, i32 [[TMP10]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5117,23 +5181,26 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP7]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP11]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l136.omp_outlined, i32 [[TMP10]], i32 [[TMP12]]) // CHECK11-NEXT: ret void // // @@ -5168,32 +5235,42 @@ int bar(int n){ // CHECK11-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP15]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l160.omp_outlined, i32 [[TMP25]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK11-NEXT: ret void // // @@ -5278,16 +5355,18 @@ int bar(int n){ // CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[NN_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[NN_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l172.omp_outlined, i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -5329,16 +5408,18 @@ int bar(int n){ // CHECK11-NEXT: [[NN_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[NN]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[NN]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[NN]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[NN_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[NN]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[NN_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[NN_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l175.omp_outlined, i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -5375,12 +5456,14 @@ int bar(int n){ // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i32 [[TMP2]]) +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z6bazzzziPi_l200.omp_outlined, i32 [[TMP3]]) // CHECK11-NEXT: ret void // // @@ -5411,30 +5494,35 @@ int bar(int n){ // CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i8, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i8 [[TMP6]], ptr [[AAA]], align 1 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i8, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i8 [[TMP9]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP12]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK11-NEXT: store i8 [[TMP12]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i32 [[TMP9]], i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP15]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: store i8 [[TMP17]], ptr [[AAA_CASTED]], align 1 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l227.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], i32 [[TMP18]], ptr [[B]]) // CHECK11-NEXT: ret void // // @@ -5483,24 +5571,30 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l245.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK11-NEXT: ret void // // @@ -5553,24 +5647,28 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l210.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index 41ce9b1ff4c35..233ddd53f489c 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -827,7 +827,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META27]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META28:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META27]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META27]] @@ -1164,14 +1164,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28:![0-9]+]], !align [[META29:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29:![0-9]+]], !align [[META30:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META30:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META31:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1216,14 +1216,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store i64 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META28]], !align [[META30]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META29]], !align [[META31]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1254,59 +1254,59 @@ int bar(int n){ // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP23]] // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i64 [[ADD20]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: store i8 [[CONV23]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1735,7 +1735,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META34:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META35:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -1770,7 +1770,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META28]], !align [[META34]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META29]], !align [[META35]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1850,7 +1850,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[N_CASTED]], align 8 @@ -1895,7 +1895,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -2000,7 +2000,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -2031,7 +2031,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META28]], !align [[META29]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META29]], !align [[META30]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2623,7 +2623,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META28]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META29:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META28]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META28]] @@ -2960,14 +2960,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29:![0-9]+]], !align [[META30:![0-9]+]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30:![0-9]+]], !align [[META31:![0-9]+]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3012,14 +3012,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store i32 [[DOTCAPTURE_EXPR_]], ptr [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3050,59 +3050,59 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store i32 [[ADD7]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP20:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store float [[CONV9]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP21:%.*]] = load float, ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store float [[CONV13]], ptr [[ARRAYIDX10]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP22:%.*]] = load double, ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store double [[ADD16]], ptr [[ARRAYIDX15]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP23]] // CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP24:%.*]] = load double, ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store double [[ADD19]], ptr [[ARRAYIDX18]], align 8, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store i64 [[ADD20]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: store i8 [[CONV23]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] // CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP31]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP32]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3531,7 +3531,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META34:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META35:![0-9]+]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -3566,7 +3566,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META29]], !align [[META34]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META30]], !align [[META35]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3646,7 +3646,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[N_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[N_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_CASTED]], align 4 @@ -3691,7 +3691,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -3796,7 +3796,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3827,7 +3827,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META29]], !align [[META30]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META30]], !align [[META31]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3898,25 +3898,29 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined, i64 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined, i64 [[TMP15]]) // CHECK9-NEXT: ret void // // @@ -3991,16 +3995,18 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l113.omp_outlined, i64 [[TMP4]]) +// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l113.omp_outlined, i64 [[TMP6]]) // CHECK9-NEXT: ret void // // @@ -4082,22 +4088,25 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l120.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l120.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK9-NEXT: ret void // // @@ -4184,38 +4193,49 @@ int bar(int n){ // CHECK9-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 10 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK9-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK9-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK9-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK9-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK9-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK9-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK9-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 8 +// CHECK9-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK9-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.omp_outlined, i64 [[TMP17]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]], i64 [[TMP19]]) +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK9-NEXT: [[TMP30:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.omp_outlined, i64 [[TMP28]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]], i64 [[TMP30]]) // CHECK9-NEXT: ret void // // @@ -4376,36 +4396,42 @@ int bar(int n){ // CHECK9-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 -// CHECK9-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP12]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK9-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], ptr [[B]]) +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP18]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP21:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK9-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK9-NEXT: [[TMP23:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP23]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4538,24 +4564,30 @@ int bar(int n){ // CHECK9-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK9-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK9-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK9-NEXT: ret void // // @@ -4658,24 +4690,28 @@ int bar(int n){ // CHECK9-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK9-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK9-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK9-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK9-NEXT: ret void // // @@ -4770,25 +4806,29 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined, i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103.omp_outlined, i32 [[TMP15]]) // CHECK11-NEXT: ret void // // @@ -4863,16 +4903,18 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l113.omp_outlined, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l113.omp_outlined, i32 [[TMP6]]) // CHECK11-NEXT: ret void // // @@ -4954,22 +4996,25 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l120.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l120.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK11-NEXT: ret void // // @@ -5056,38 +5101,49 @@ int bar(int n){ // CHECK11-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 10 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK11-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK11-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK11-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK11-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK11-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK11-NEXT: [[TMP24:%.*]] = load ptr, ptr [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, ptr [[TMP24]], align 4 +// CHECK11-NEXT: store i32 [[TMP25]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 10 +// CHECK11-NEXT: [[TMP26:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP26]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.omp_outlined, i32 [[TMP17]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]], i32 [[TMP19]]) +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP27]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: store i32 [[TMP29]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 10, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l145.omp_outlined, i32 [[TMP28]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]], i32 [[TMP30]]) // CHECK11-NEXT: ret void // // @@ -5248,36 +5304,42 @@ int bar(int n){ // CHECK11-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 4 -// CHECK11-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP12]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK11-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], i32 [[TMP15]], i32 [[TMP17]], ptr [[B]]) +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK11-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l200.omp_outlined, i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP21]], i32 [[TMP23]], ptr [[B]]) // CHECK11-NEXT: ret void // // @@ -5410,24 +5472,30 @@ int bar(int n){ // CHECK11-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK11-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l218.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK11-NEXT: ret void // // @@ -5530,24 +5598,28 @@ int bar(int n){ // CHECK11-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK11-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK11-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK11-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l183.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp index 6f239e97533fb..a1c76dff16e3d 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp @@ -1887,27 +1887,32 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 -// CHECK10-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP12]], ptr [[A]]) +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP14]], i32 [[TMP15]]) +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP17]], ptr [[A]]) // CHECK10-NEXT: ret void // // @@ -2127,20 +2132,24 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK10-NEXT: [[G:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK10-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK10-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.omp_outlined, i64 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.omp_outlined, i64 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK10-NEXT: ret void // // @@ -2356,27 +2365,32 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK12-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK12-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP12]], ptr [[A]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP14]], i32 [[TMP15]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP17]], ptr [[A]]) // CHECK12-NEXT: ret void // // @@ -2591,20 +2605,24 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK12-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK12-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK12-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.omp_outlined, i32 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l58.omp_outlined, i32 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK12-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp index 29941c9458feb..1048608dfba6a 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp @@ -2687,28 +2687,34 @@ int main() { // CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i64 [[TMP9]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP11]]) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i64 [[TMP15]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP17]]) // CHECK13-NEXT: ret void // // @@ -3022,24 +3028,29 @@ int main() { // CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i64 [[TMP7]], ptr [[S_ARR]], ptr [[TMP8]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[T_VAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i64 [[TMP12]], ptr [[S_ARR]], ptr [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -3417,28 +3428,34 @@ int main() { // CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i32 [[TMP9]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i32 [[TMP15]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP17]]) // CHECK15-NEXT: ret void // // @@ -3746,24 +3763,29 @@ int main() { // CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META8]], !align [[META9]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i32 [[TMP7]], ptr [[S_ARR]], ptr [[TMP8]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META8]], !align [[META9]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i32 [[TMP12]], ptr [[S_ARR]], ptr [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -4138,30 +4160,34 @@ int main() { // CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[G]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load volatile i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined, i64 [[TMP8]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]] +// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined, i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) // CHECK17-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp index 1a3a09b0deac0..d3c617d564de9 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp @@ -1989,8 +1989,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK13-NEXT: ret void @@ -2228,8 +2229,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK13-NEXT: ret void @@ -2513,8 +2515,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK15-NEXT: ret void @@ -2746,8 +2749,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK15-NEXT: ret void @@ -3025,8 +3029,9 @@ int main() { // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK17-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp index 24218a15b23a2..c73ef0dac7fd0 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp @@ -2384,33 +2384,39 @@ void test_target_teams_atomic() { // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[I]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[N]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 5 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[I]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[A:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 8 +// CHECK9-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 5 +// CHECK9-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP16]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP11]], i32 [[TMP12]]) -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK9-NEXT: store i32 [[TMP13]], ptr [[I_CASTED]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i64, ptr [[I_CASTED]], align 8 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l50.omp_outlined, i64 [[TMP14]], i64 [[TMP16]], ptr [[A]]) +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP17]], i32 [[TMP18]]) +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK9-NEXT: store i32 [[TMP19]], ptr [[I_CASTED]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i64, ptr [[I_CASTED]], align 8 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l50.omp_outlined, i64 [[TMP20]], i64 [[TMP22]], ptr [[A]]) // CHECK9-NEXT: ret void // // @@ -2667,20 +2673,24 @@ void test_target_teams_atomic() { // CHECK9-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK9-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK9-NEXT: [[G:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK9-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK9-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK9-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK9-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l56.omp_outlined, i64 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK9-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l56.omp_outlined, i64 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK9-NEXT: ret void // // @@ -2915,10 +2925,12 @@ void test_target_teams_atomic() { // CHECK9-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK9-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK9-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_teams_atomicv_l72.omp_outlined, ptr [[X]]) // CHECK9-NEXT: ret void @@ -3089,33 +3101,39 @@ void test_target_teams_atomic() { // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB4:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[I]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[N]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 5 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[I]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[A:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 5 +// CHECK11-NEXT: [[TMP16:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP16]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP11]], i32 [[TMP12]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[I]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[I_CASTED]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l50.omp_outlined, i32 [[TMP14]], i32 [[TMP16]], ptr [[A]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB4]], i32 [[TMP0]], i32 [[TMP17]], i32 [[TMP18]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, ptr [[I]], align 4 +// CHECK11-NEXT: store i32 [[TMP19]], ptr [[I_CASTED]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, ptr [[I_CASTED]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP21]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l50.omp_outlined, i32 [[TMP20]], i32 [[TMP22]], ptr [[A]]) // CHECK11-NEXT: ret void // // @@ -3367,20 +3385,24 @@ void test_target_teams_atomic() { // CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK11-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK11-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK11-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK11-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l56.omp_outlined, i32 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l56.omp_outlined, i32 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK11-NEXT: ret void // // @@ -3610,10 +3632,12 @@ void test_target_teams_atomic() { // CHECK11-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK11-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK11-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB4]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z24test_target_teams_atomicv_l72.omp_outlined, ptr [[X]]) // CHECK11-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp index 4bd2f911aa476..e652eee72d01d 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp @@ -3527,28 +3527,34 @@ int main() { // CHECK13-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK13-NEXT: store i32 [[TMP7]], ptr [[SIVAR]], align 4 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK13-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK13-NEXT: store i32 [[TMP12]], ptr [[SIVAR]], align 4 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP8]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP10]], ptr [[SIVAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i64 [[TMP9]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP11]]) +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i64 [[TMP15]], ptr [[S_ARR]], ptr [[VAR]], i64 [[TMP17]]) // CHECK13-NEXT: ret void // // @@ -3876,24 +3882,29 @@ int main() { // CHECK13-NEXT: [[T_VAR_CASTED:%.*]] = alloca i64, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK13-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK13-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK13-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK13-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK13-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK13-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK13-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK13-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK13-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK13-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK13-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK13-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK13-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK13-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK13-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: store ptr [[VAR]], ptr [[TMP]], align 8 -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK13-NEXT: store i32 [[TMP6]], ptr [[T_VAR_CASTED]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 -// CHECK13-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META7]], !align [[META8]] -// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i64 [[TMP7]], ptr [[S_ARR]], ptr [[TMP8]]) +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK13-NEXT: store i32 [[TMP11]], ptr [[T_VAR_CASTED]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i64, ptr [[T_VAR_CASTED]], align 8 +// CHECK13-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META7]], !align [[META8]] +// CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i64 [[TMP12]], ptr [[S_ARR]], ptr [[TMP13]]) // CHECK13-NEXT: ret void // // @@ -4285,28 +4296,34 @@ int main() { // CHECK15-NEXT: [[SIVAR_CASTED:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK15-NEXT: store i32 [[TMP7]], ptr [[SIVAR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK15-NEXT: store i32 [[TMP12]], ptr [[SIVAR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP8]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i32 [[TMP9]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP14]], ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, ptr [[SIVAR_CASTED]], align 4 +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l122.omp_outlined, ptr [[VEC]], i32 [[TMP15]], ptr [[S_ARR]], ptr [[VAR]], i32 [[TMP17]]) // CHECK15-NEXT: ret void // // @@ -4628,24 +4645,29 @@ int main() { // CHECK15-NEXT: [[T_VAR_CASTED:%.*]] = alloca i32, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], ptr [[T_VAR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK15-NEXT: [[VEC:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK15-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], ptr [[T_VAR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK15-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK15-NEXT: [[S_ARR:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK15-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK15-NEXT: [[VAR:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: store ptr [[VAR]], ptr [[TMP]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, ptr [[T_VAR]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META8]], !align [[META9]] -// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i32 [[TMP7]], ptr [[S_ARR]], ptr [[TMP8]]) +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, ptr [[T_VAR]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, ptr [[T_VAR_CASTED]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 4, !nonnull [[META8]], !align [[META9]] +// CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 4, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l81.omp_outlined, ptr [[VEC]], i32 [[TMP12]], ptr [[S_ARR]], ptr [[TMP13]]) // CHECK15-NEXT: ret void // // @@ -5034,30 +5056,34 @@ int main() { // CHECK17-NEXT: [[SIVAR_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[G]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[G1]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[G]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[SIVAR]], align 4 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[G1]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK17-NEXT: store i32 [[TMP9]], ptr [[SIVAR]], align 4 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: store ptr [[G1]], ptr [[TMP]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[G]], align 4 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[G_CASTED]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[G_CASTED]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]] -// CHECK17-NEXT: [[TMP10:%.*]] = load volatile i32, ptr [[TMP9]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[G1_CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[G1_CASTED]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[SIVAR]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[SIVAR_CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined, i64 [[TMP8]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, ptr [[G]], align 4 +// CHECK17-NEXT: store i32 [[TMP11]], ptr [[G_CASTED]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, ptr [[G_CASTED]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP]], align 8, !nonnull [[META5:![0-9]+]], !align [[META6:![0-9]+]] +// CHECK17-NEXT: [[TMP14:%.*]] = load volatile i32, ptr [[TMP13]], align 4 +// CHECK17-NEXT: store i32 [[TMP14]], ptr [[G1_CASTED]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[G1_CASTED]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[SIVAR]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[SIVAR_CASTED]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[SIVAR_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l99.omp_outlined, i64 [[TMP12]], i64 [[TMP15]], i64 [[TMP17]]) // CHECK17-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp index b975f393e3666..6d9d437979044 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp @@ -2941,8 +2941,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK13-NEXT: ret void @@ -3194,8 +3195,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK13-NEXT: ret void @@ -3493,8 +3495,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK15-NEXT: ret void @@ -3740,8 +3743,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK15-NEXT: ret void @@ -4033,8 +4037,9 @@ int main() { // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK17-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 038abb4fe564e..5c7c798512c55 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -820,7 +820,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META32]] -// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK1-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META33:![0-9]+]] // CHECK1-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK1-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] @@ -945,7 +945,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1013,28 +1013,28 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP35]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1110,31 +1110,31 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1173,14 +1173,14 @@ int bar(int n){ // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41:![0-9]+]], !align [[META42:![0-9]+]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42:![0-9]+]], !align [[META43:![0-9]+]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43:![0-9]+]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1220,14 +1220,14 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 // CHECK1-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK1-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] -// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META41]], !align [[META43]] +// CHECK1-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] // CHECK1-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 // CHECK1-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -1252,59 +1252,59 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK1-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK1-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK1-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK1-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK1-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK1-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK1-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK1-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1730,7 +1730,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48:![0-9]+]] // CHECK1-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK1-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -1765,7 +1765,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META41]], !align [[META47]] +// CHECK1-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -1788,37 +1788,37 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP14]] // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1852,7 +1852,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -1897,7 +1897,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -1942,42 +1942,42 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK1-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK1-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2018,7 +2018,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK1-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK1-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK1-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -2049,7 +2049,7 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK1-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK1-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META41]], !align [[META42]] +// CHECK1-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] // CHECK1-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK1-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK1-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -2072,35 +2072,35 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2642,7 +2642,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK3-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META34:![0-9]+]] // CHECK3-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK3-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -2767,7 +2767,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2835,28 +2835,28 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2932,31 +2932,31 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2995,14 +2995,14 @@ int bar(int n){ // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42:![0-9]+]], !align [[META43:![0-9]+]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43:![0-9]+]], !align [[META44:![0-9]+]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3042,14 +3042,14 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 // CHECK3-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK3-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] -// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK3-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 // CHECK3-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -3074,59 +3074,59 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44:![0-9]+]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK3-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK3-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK3-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK3-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] // CHECK3-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP44]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3552,7 +3552,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META42]], !align [[META47:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META48:![0-9]+]] // CHECK3-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK3-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -3587,7 +3587,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META42]], !align [[META47]] +// CHECK3-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META48]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3610,37 +3610,37 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP8]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48:![0-9]+]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP13:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP14]] // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP48]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3674,7 +3674,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3719,7 +3719,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -3764,42 +3764,42 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51:![0-9]+]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52:![0-9]+]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 // CHECK3-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK3-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] // CHECK3-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP51]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP52]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3840,7 +3840,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK3-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK3-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -3871,7 +3871,7 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK3-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK3-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META42]], !align [[META43]] +// CHECK3-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] // CHECK3-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK3-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK3-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -3894,35 +3894,35 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4464,7 +4464,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias [[META32]] // CHECK5-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias [[META32]] // CHECK5-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias [[META32]] -// CHECK5-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK5-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META33:![0-9]+]] // CHECK5-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias [[META32]] // CHECK5-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias [[META32]] // CHECK5-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias [[META32]] @@ -4578,10 +4578,10 @@ int bar(int n){ // CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal [[META33:![0-9]+]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal [[META33]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal [[META34:![0-9]+]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal [[META34]] // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal [[META33]] +// CHECK5-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal [[META34]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -4589,7 +4589,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4657,28 +4657,28 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP36]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4754,31 +4754,31 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39:![0-9]+]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP39]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4817,14 +4817,14 @@ int bar(int n){ // CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42:![0-9]+]], !align [[META43:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43:![0-9]+]], !align [[META44:![0-9]+]] // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44:![0-9]+]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META43]], !align [[META45:![0-9]+]] // CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META43]], !align [[META45]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META43]], !align [[META45]] // CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -4864,14 +4864,14 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[VLA3]], ptr [[VLA_ADDR4]], align 8 // CHECK5-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 8 // CHECK5-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META43]], !align [[META45]] // CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[VLA_ADDR4]], align 8 -// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] -// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META42]], !align [[META44]] +// CHECK5-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 8, !nonnull [[META43]], !align [[META45]] +// CHECK5-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 8, !nonnull [[META43]], !align [[META45]] // CHECK5-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 0 // CHECK5-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i64 16) ] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -4896,59 +4896,59 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK5-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK5-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK5-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK5-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK5-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i64 0, i64 1 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK5-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP20]] // CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK5-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store i64 [[ADD19]], ptr [[X]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK5-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK5-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK5-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: store i8 [[CONV22]], ptr [[Y]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK5-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5394,7 +5394,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48:![0-9]+]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META43]], !align [[META49:![0-9]+]] // CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[B_CASTED]], align 8 @@ -5436,7 +5436,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 8 // CHECK5-NEXT: [[TMP1:%.*]] = load i64, ptr [[VLA_ADDR]], align 8 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[VLA_ADDR2]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META42]], !align [[META48]] +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 8, !nonnull [[META43]], !align [[META49]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -5463,37 +5463,37 @@ int bar(int n){ // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: store double [[ADD4]], ptr [[A]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[A5:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: store double [[INC]], ptr [[A5]], align 8, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i64 [[TMP15]] // CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: @@ -5529,7 +5529,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK5-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK5: omp.inner.for.end25: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -5565,7 +5565,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -5610,7 +5610,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AAA]], ptr [[AAA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -5655,42 +5655,42 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 // CHECK5-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK5-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 // CHECK5-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 // CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK5-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK5-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5731,7 +5731,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] // CHECK5-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK5-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK5-NEXT: [[TMP2:%.*]] = load i64, ptr [[A_CASTED]], align 8 @@ -5762,7 +5762,7 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[A]], ptr [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[AA]], ptr [[AA_ADDR]], align 8 // CHECK5-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META42]], !align [[META43]] +// CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 8, !nonnull [[META43]], !align [[META44]] // CHECK5-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK5-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK5-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -5785,35 +5785,35 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -6355,7 +6355,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DOTCOPY_FN__ADDR_I]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DOTPRIVATES__ADDR_I]], align 4, !noalias [[META33]] -// CHECK7-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] +// CHECK7-NEXT: call void [[TMP10]](ptr [[TMP11]], ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], ptr [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]], !inline_history [[META34:![0-9]+]] // CHECK7-NEXT: [[TMP12:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias [[META33]] // CHECK7-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias [[META33]] @@ -6469,10 +6469,10 @@ int bar(int n){ // CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal [[META34:![0-9]+]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal [[META34]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[A1]], align 4, !nontemporal [[META35:![0-9]+]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A1]], align 4, !nontemporal [[META35]] // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal [[META34]] +// CHECK7-NEXT: store i32 [[ADD3]], ptr [[A1]], align 4, !nontemporal [[META35]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -6480,7 +6480,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6548,28 +6548,28 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38:![0-9]+]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP8]] to i32 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD2]] to i16 -// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: store i16 [[CONV3]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP38]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP37]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD4]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP38]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6645,31 +6645,31 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40:![0-9]+]] -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41:![0-9]+]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP40]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD5]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP41]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6708,14 +6708,14 @@ int bar(int n){ // CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43:![0-9]+]], !align [[META44:![0-9]+]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44:![0-9]+]], !align [[META45:![0-9]+]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -6755,14 +6755,14 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[VLA3]], ptr [[VLA_ADDR4]], align 4 // CHECK7-NEXT: store ptr [[CN]], ptr [[CN_ADDR]], align 4 // CHECK7-NEXT: store ptr [[D]], ptr [[D_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[BN_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[VLA_ADDR4]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] -// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP6:%.*]] = load ptr, ptr [[CN_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] +// CHECK7-NEXT: [[TMP7:%.*]] = load ptr, ptr [[D_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[ARRAYDECAY:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 0 // CHECK7-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[ARRAYDECAY]], i32 16) ] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 @@ -6787,59 +6787,59 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP12]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45:![0-9]+]] -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46:![0-9]+]] +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP17:%.*]] = load float, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK7-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store float [[CONV8]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP18:%.*]] = load float, ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK7-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK7-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store float [[CONV12]], ptr [[ARRAYIDX9]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], ptr [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], ptr [[ARRAYIDX13]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP19:%.*]] = load double, ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store double [[ADD15]], ptr [[ARRAYIDX14]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK7-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i32 [[TMP20]] // CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, ptr [[ARRAYIDX16]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP21:%.*]] = load double, ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store double [[ADD18]], ptr [[ARRAYIDX17]], align 8, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds nuw [[STRUCT_TT:%.*]], ptr [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i64, ptr [[X]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK7-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store i64 [[ADD19]], ptr [[X]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds nuw [[STRUCT_TT]], ptr [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i8, ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK7-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK7-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: store i8 [[CONV22]], ptr [[Y]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] // CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP45]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD23]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP46]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -7285,7 +7285,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META48:![0-9]+]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META44]], !align [[META49:![0-9]+]] // CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP4]], ptr [[B_CASTED]], align 4 // CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[B_CASTED]], align 4 @@ -7327,7 +7327,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[THIS_ADDR]], align 4 // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[VLA_ADDR]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[VLA_ADDR2]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META43]], !align [[META48]] +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR]], align 4, !nonnull [[META44]], !align [[META49]] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -7354,37 +7354,37 @@ int bar(int n){ // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49:![0-9]+]] -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50:![0-9]+]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_ADDR]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP13]] to double // CHECK7-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds nuw [[STRUCT_S1:%.*]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: store double [[ADD4]], ptr [[A]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[A5:%.*]] = getelementptr inbounds nuw [[STRUCT_S1]], ptr [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: [[TMP14:%.*]] = load double, ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: store double [[INC]], ptr [[A5]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP3]], i32 [[TMP15]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, ptr [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: store i16 [[CONV6]], ptr [[ARRAYIDX7]], align 2, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] // CHECK7-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP49]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD8]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP50]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: @@ -7420,7 +7420,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK7-NEXT: store i32 [[ADD24]], ptr [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND9]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK7: omp.inner.for.end25: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -7456,7 +7456,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -7501,7 +7501,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AAA]], ptr [[AAA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[DOTCAPTURE_EXPR_]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[N_ADDR]], align 4 @@ -7546,42 +7546,42 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP15]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54:![0-9]+]] -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55:![0-9]+]] +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD7:%.*]] = add i32 [[TMP17]], 1 // CHECK7-NEXT: [[CMP8:%.*]] = icmp ult i32 [[TMP16]], [[ADD7]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK7-NEXT: [[ADD9:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK7-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i32 [[ADD9]], ptr [[I5]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i32 [[ADD10]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP21:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP21]] to i32 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV12:%.*]] = trunc i32 [[ADD11]] to i16 -// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i16 [[CONV12]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[CONV13:%.*]] = sext i8 [[TMP22]] to i32 // CHECK7-NEXT: [[ADD14:%.*]] = add nsw i32 [[CONV13]], 1 // CHECK7-NEXT: [[CONV15:%.*]] = trunc i32 [[ADD14]] to i8 -// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i8 [[CONV15]], ptr [[AAA_ADDR]], align 1, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: store i32 [[ADD16]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] // CHECK7-NEXT: [[ADD17:%.*]] = add i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP54]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD17]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP55]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -7622,7 +7622,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[TMP1]], ptr [[A_CASTED]], align 4 // CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_CASTED]], align 4 @@ -7653,7 +7653,7 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4 // CHECK7-NEXT: store i32 [[AA]], ptr [[AA_ADDR]], align 4 // CHECK7-NEXT: store ptr [[B]], ptr [[B_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META43]], !align [[META44]] +// CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[B_ADDR]], align 4, !nonnull [[META44]], !align [[META45]] // CHECK7-NEXT: store i32 0, ptr [[DOTOMP_LB]], align 4 // CHECK7-NEXT: store i32 9, ptr [[DOTOMP_UB]], align 4 // CHECK7-NEXT: store i32 1, ptr [[DOTOMP_STRIDE]], align 4 @@ -7676,35 +7676,35 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP5]], ptr [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57:![0-9]+]] -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58:![0-9]+]] +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: store i32 [[ADD]], ptr [[I]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: store i32 [[ADD2]], ptr [[A_ADDR]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[CONV:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: store i16 [[CONV4]], ptr [[AA_ADDR]], align 2, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: store i32 [[ADD5]], ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP57]] -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD6]], ptr [[DOTOMP_IV]], align 4, !llvm.access.group [[ACC_GRP58]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -9863,25 +9863,29 @@ int bar(int n){ // CHECK17-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK17-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 +// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK17-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK17-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i64 [[TMP11]]) +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK17-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i64 [[TMP15]]) // CHECK17-NEXT: ret void // // @@ -9963,16 +9967,18 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP4]]) +// CHECK17-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP6]]) // CHECK17-NEXT: ret void // // @@ -10061,22 +10067,25 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK17-NEXT: ret void // // @@ -10168,32 +10177,42 @@ int bar(int n){ // CHECK17-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK17-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK17-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK17-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK17-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK17-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK17-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i64 [[TMP15]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i64 [[TMP25]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK17-NEXT: ret void // // @@ -10343,36 +10362,42 @@ int bar(int n){ // CHECK17-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 -// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 -// CHECK17-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 8 +// CHECK17-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK17-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK17-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK17-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 -// CHECK17-NEXT: store i32 [[TMP12]], ptr [[N_CASTED]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK17-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], ptr [[B]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, ptr [[N]], align 4 +// CHECK17-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK17-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK17-NEXT: [[TMP23:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP23]], ptr [[B]]) // CHECK17-NEXT: ret void // // @@ -10521,24 +10546,30 @@ int bar(int n){ // CHECK17-NEXT: [[B_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK17-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK17-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK17-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK17-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK17-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i64 [[TMP10]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]]) +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK17-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i64 [[TMP16]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]]) // CHECK17-NEXT: ret void // // @@ -10648,24 +10679,28 @@ int bar(int n){ // CHECK17-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK17-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK17-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK17-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK17-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK17-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK17-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK17-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK17-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK17-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK17-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK17-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK17-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK17-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK17-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK17-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK17-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK17-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK17-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK17-NEXT: ret void // // @@ -10767,25 +10802,29 @@ int bar(int n){ // CHECK19-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK19-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK19-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK19-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i32 [[TMP15]]) // CHECK19-NEXT: ret void // // @@ -10867,16 +10906,18 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP4]]) +// CHECK19-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP6]]) // CHECK19-NEXT: ret void // // @@ -10965,22 +11006,25 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK19-NEXT: ret void // // @@ -11072,32 +11116,42 @@ int bar(int n){ // CHECK19-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK19-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK19-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK19-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK19-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK19-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK19-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i32 [[TMP15]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i32 [[TMP25]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK19-NEXT: ret void // // @@ -11247,36 +11301,42 @@ int bar(int n){ // CHECK19-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 -// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 4 -// CHECK19-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 4 +// CHECK19-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK19-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK19-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK19-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 -// CHECK19-NEXT: store i32 [[TMP12]], ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK19-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], i32 [[TMP15]], i32 [[TMP17]], ptr [[B]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, ptr [[N]], align 4 +// CHECK19-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK19-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP21]], i32 [[TMP23]], ptr [[B]]) // CHECK19-NEXT: ret void // // @@ -11425,24 +11485,30 @@ int bar(int n){ // CHECK19-NEXT: [[B_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK19-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK19-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK19-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK19-NEXT: [[TMP14:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP14]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK19-NEXT: store i32 [[TMP9]], ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i32 [[TMP10]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, ptr [[B]], align 4 +// CHECK19-NEXT: store i32 [[TMP15]], ptr [[B_CASTED]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i32 [[TMP16]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]]) // CHECK19-NEXT: ret void // // @@ -11552,24 +11618,28 @@ int bar(int n){ // CHECK19-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK19-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK19-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK19-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK19-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK19-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK19-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK19-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK19-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK19-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK19-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK19-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK19-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK19-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK19-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK19-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK19-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK19-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK19-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK19-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK19-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK19-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK19-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK19-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK19-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK19-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK19-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK19-NEXT: ret void // // @@ -11671,25 +11741,29 @@ int bar(int n){ // CHECK21-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK21-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 +// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK21-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK21-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK21-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK21-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK21-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i64 [[TMP11]]) +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK21-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i64 [[TMP15]]) // CHECK21-NEXT: ret void // // @@ -11771,16 +11845,18 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP4:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP4]]) +// CHECK21-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i64 [[TMP6]]) // CHECK21-NEXT: ret void // // @@ -11869,22 +11945,25 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP6]], i64 [[TMP8]]) +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i64 [[TMP9]], i64 [[TMP11]]) // CHECK21-NEXT: ret void // // @@ -11976,32 +12055,42 @@ int bar(int n){ // CHECK21-NEXT: [[A_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[TMP8]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 7 -// CHECK21-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 8 -// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 8 -// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 9 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 8 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i64, ptr [[TMP17]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK21-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8 +// CHECK21-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK21-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 8 +// CHECK21-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK21-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i64 [[TMP15]], ptr [[B]], i64 [[TMP5]], ptr [[BN]], ptr [[C]], i64 [[TMP9]], i64 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP25:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i64 [[TMP25]], ptr [[B]], i64 [[TMP8]], ptr [[BN]], ptr [[C]], i64 [[TMP15]], i64 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK21-NEXT: ret void // // @@ -12151,36 +12240,42 @@ int bar(int n){ // CHECK21-NEXT: [[AAA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 -// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 8 -// CHECK21-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 8 +// CHECK21-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 8 +// CHECK21-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK21-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK21-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 -// CHECK21-NEXT: store i32 [[TMP12]], ptr [[N_CASTED]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK21-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK21-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], i64 [[TMP15]], i64 [[TMP17]], ptr [[B]]) +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[N]], align 4 +// CHECK21-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK21-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK21-NEXT: [[TMP23:%.*]] = load i64, ptr [[AAA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP23]], ptr [[B]]) // CHECK21-NEXT: ret void // // @@ -12331,34 +12426,41 @@ int bar(int n){ // CHECK21-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK21-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK21-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[TMP5:%.*]] = load i64, ptr [[TMP4]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[TMP6]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 4 -// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 5 -// CHECK21-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 8 -// CHECK21-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP10]], 0 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 +// CHECK21-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[TMP10]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 8 +// CHECK21-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK21-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 8 +// CHECK21-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP16]], 0 // CHECK21-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK21-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 6 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK21-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP17]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 -// CHECK21-NEXT: store i32 [[TMP11]], ptr [[B_CASTED]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK21-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK21-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP13]], 0 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, ptr [[B]], align 4 +// CHECK21-NEXT: store i32 [[TMP18]], ptr [[B_CASTED]], align 4 +// CHECK21-NEXT: [[TMP19:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK21-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP20]], 0 // CHECK21-NEXT: [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8 // CHECK21-NEXT: store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK21-NEXT: [[TMP14:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i64 [[TMP12]], i64 [[TMP5]], i64 [[TMP7]], ptr [[C]], i64 [[TMP14]]) +// CHECK21-NEXT: [[TMP21:%.*]] = load i64, ptr [[DOTCAPTURE_EXPR__CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i64 [[TMP19]], i64 [[TMP8]], i64 [[TMP11]], ptr [[C]], i64 [[TMP21]]) // CHECK21-NEXT: ret void // // @@ -12512,24 +12614,28 @@ int bar(int n){ // CHECK21-NEXT: [[AA_CASTED:%.*]] = alloca i64, align 8 // CHECK21-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK21-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK21-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK21-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK21-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK21-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 8 -// CHECK21-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK21-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 8 -// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK21-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK21-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK21-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK21-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK21-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 8 +// CHECK21-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK21-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK21-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK21-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK21-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK21-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK21-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK21-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK21-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK21-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, ptr [[AA_CASTED]], align 8 -// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i64 [[TMP7]], i64 [[TMP9]], ptr [[B]]) +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK21-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK21-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK21-NEXT: [[TMP13:%.*]] = load i64, ptr [[AA_CASTED]], align 8 +// CHECK21-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i64 [[TMP11]], i64 [[TMP13]], ptr [[B]]) // CHECK21-NEXT: ret void // // @@ -12631,25 +12737,29 @@ int bar(int n){ // CHECK23-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB2:[0-9]+]]) // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK23-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 +// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK23-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK23-NEXT: store i32 [[TMP10]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK23-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK23-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i32 [[TMP11]]) +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK23-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB2]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) +// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97.omp_outlined, i32 [[TMP15]]) // CHECK23-NEXT: ret void // // @@ -12731,16 +12841,18 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i16, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i16 [[TMP2]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP3]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP4]]) +// CHECK23-NEXT: [[TMP5:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP5]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l111.omp_outlined, i32 [[TMP6]]) // CHECK23-NEXT: ret void // // @@ -12829,22 +12941,25 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP5]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP7]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP6]], i32 [[TMP8]]) +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP8]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP10]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l118.omp_outlined, i32 [[TMP9]], i32 [[TMP11]]) // CHECK23-NEXT: ret void // // @@ -12936,32 +13051,42 @@ int bar(int n){ // CHECK23-NEXT: [[A_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP6]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP7]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 7 -// CHECK23-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP12]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 8 -// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP13]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 9 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[BN:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load ptr, ptr [[TMP11]], align 4 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[TMP16]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP17]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 7 +// CHECK23-NEXT: [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 4 +// CHECK23-NEXT: [[CN:%.*]] = load ptr, ptr [[TMP20]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 8 +// CHECK23-NEXT: [[TMP22:%.*]] = load ptr, ptr [[TMP21]], align 4 +// CHECK23-NEXT: [[D:%.*]] = load ptr, ptr [[TMP22]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 9 +// CHECK23-NEXT: [[TMP23:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP23]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i32 [[TMP15]], ptr [[B]], i32 [[TMP5]], ptr [[BN]], ptr [[C]], i32 [[TMP9]], i32 [[TMP11]], ptr [[CN]], ptr [[D]]) +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP24]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 9, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l142.omp_outlined, i32 [[TMP25]], ptr [[B]], i32 [[TMP8]], ptr [[BN]], ptr [[C]], i32 [[TMP15]], i32 [[TMP18]], ptr [[CN]], ptr [[D]]) // CHECK23-NEXT: ret void // // @@ -13111,36 +13236,42 @@ int bar(int n){ // CHECK23-NEXT: [[AAA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 -// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 4 -// CHECK23-NEXT: store i8 [[TMP8]], ptr [[AAA]], align 1 -// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP9]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i32 [[TMP6]], ptr [[N]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, ptr [[TMP8]], align 4 +// CHECK23-NEXT: store i16 [[TMP9]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i8, ptr [[TMP11]], align 4 +// CHECK23-NEXT: store i8 [[TMP12]], ptr [[AAA]], align 1 +// CHECK23-NEXT: [[TMP13:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK23-NEXT: [[TMP15:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP15]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[N]], align 4 -// CHECK23-NEXT: store i32 [[TMP12]], ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP14]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i8, ptr [[AAA]], align 1 -// CHECK23-NEXT: store i8 [[TMP16]], ptr [[AAA_CASTED]], align 1 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], i32 [[TMP15]], i32 [[TMP17]], ptr [[B]]) +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP16]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[N]], align 4 +// CHECK23-NEXT: store i32 [[TMP18]], ptr [[N_CASTED]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP20]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i8, ptr [[AAA]], align 1 +// CHECK23-NEXT: store i8 [[TMP22]], ptr [[AAA_CASTED]], align 1 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, ptr [[AAA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 5, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l197.omp_outlined, i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP21]], i32 [[TMP23]], ptr [[B]]) // CHECK23-NEXT: ret void // // @@ -13291,34 +13422,41 @@ int bar(int n){ // CHECK23-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK23-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK23-NEXT: store i32 [[TMP3]], ptr [[B]], align 4 -// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK23-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 4 -// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP8]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 5 -// CHECK23-NEXT: [[TMP10:%.*]] = load i8, ptr [[TMP9]], align 4 -// CHECK23-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP10]], 0 +// CHECK23-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP10:%.*]] = load ptr, ptr [[TMP9]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load ptr, ptr [[TMP12]], align 4 +// CHECK23-NEXT: [[C:%.*]] = load ptr, ptr [[TMP13]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 5 +// CHECK23-NEXT: [[TMP15:%.*]] = load ptr, ptr [[TMP14]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i8, ptr [[TMP15]], align 4 +// CHECK23-NEXT: [[LOADEDV:%.*]] = icmp ne i8 [[TMP16]], 0 // CHECK23-NEXT: [[STOREDV:%.*]] = zext i1 [[LOADEDV]] to i8 // CHECK23-NEXT: store i8 [[STOREDV]], ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 6 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 6 +// CHECK23-NEXT: [[TMP17:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP17]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 -// CHECK23-NEXT: store i32 [[TMP11]], ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 -// CHECK23-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP13]], 0 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, ptr [[B]], align 4 +// CHECK23-NEXT: store i32 [[TMP18]], ptr [[B_CASTED]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load i8, ptr [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[LOADEDV2:%.*]] = icmp ne i8 [[TMP20]], 0 // CHECK23-NEXT: [[STOREDV3:%.*]] = zext i1 [[LOADEDV2]] to i8 // CHECK23-NEXT: store i8 [[STOREDV3]], ptr [[DOTCAPTURE_EXPR__CASTED]], align 1 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i32 [[TMP12]], i32 [[TMP5]], i32 [[TMP7]], ptr [[C]], i32 [[TMP14]]) +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR__CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 6, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l215.omp_outlined, ptr [[THIS]], i32 [[TMP19]], i32 [[TMP8]], i32 [[TMP11]], ptr [[C]], i32 [[TMP21]]) // CHECK23-NEXT: ret void // // @@ -13472,24 +13610,28 @@ int bar(int n){ // CHECK23-NEXT: [[AA_CASTED:%.*]] = alloca i32, align 4 // CHECK23-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK23-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK23-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK23-NEXT: store i32 [[TMP2]], ptr [[A]], align 4 -// CHECK23-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK23-NEXT: [[TMP4:%.*]] = load i16, ptr [[TMP3]], align 4 -// CHECK23-NEXT: store i16 [[TMP4]], ptr [[AA]], align 2 -// CHECK23-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP5]], align 4 -// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK23-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK23-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK23-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 +// CHECK23-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK23-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i16, ptr [[TMP5]], align 4 +// CHECK23-NEXT: store i16 [[TMP6]], ptr [[AA]], align 2 +// CHECK23-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK23-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK23-NEXT: [[B:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK23-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK23-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK23-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK23-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK23-NEXT: store i32 [[TMP6]], ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, ptr [[AA]], align 2 -// CHECK23-NEXT: store i16 [[TMP8]], ptr [[AA_CASTED]], align 2 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, ptr [[AA_CASTED]], align 4 -// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i32 [[TMP7]], i32 [[TMP9]], ptr [[B]]) +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK23-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i16, ptr [[AA]], align 2 +// CHECK23-NEXT: store i16 [[TMP12]], ptr [[AA_CASTED]], align 2 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, ptr [[AA_CASTED]], align 4 +// CHECK23-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l180.omp_outlined, i32 [[TMP11]], i32 [[TMP13]], ptr [[B]]) // CHECK23-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp index f897879abd4d7..c343cf0b681b8 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_codegen-1.cpp @@ -1837,27 +1837,32 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK10-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP4]], align 8 -// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK10-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 8 -// CHECK10-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 4 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK10-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK10-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK10-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK10-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK10-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 8 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 8 +// CHECK10-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: store i32 [[TMP11]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP12]], ptr [[A]]) +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP14]], i32 [[TMP15]]) +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i64 [[TMP17]], ptr [[A]]) // CHECK10-NEXT: ret void // // @@ -2062,20 +2067,24 @@ int target_teams_fun(int *g){ // CHECK10-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK10-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK10-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK10-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK10-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK10-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 8 -// CHECK10-NEXT: [[G:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 3 -// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK10-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK10-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK10-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK10-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK10-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK10-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK10-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK10-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 8 +// CHECK10-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK10-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK10-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 8 // CHECK10-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK10-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK10-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK10-NEXT: [[TMP5:%.*]] = load i64, ptr [[N_CASTED]], align 8 -// CHECK10-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 8 -// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i64 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK10-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i64, ptr [[N_CASTED]], align 8 +// CHECK10-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 8 +// CHECK10-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i64 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK10-NEXT: ret void // // @@ -2291,27 +2300,32 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB3:[0-9]+]]) // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP4]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 4 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK12-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK12-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK12-NEXT: [[TMP11:%.*]] = load ptr, ptr [[TMP10]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 +// CHECK12-NEXT: store i32 [[TMP12]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP13]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP12]], ptr [[A]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB3]], i32 [[TMP0]], i32 [[TMP14]], i32 [[TMP15]]) +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l51.omp_outlined, i32 [[TMP17]], ptr [[A]]) // CHECK12-NEXT: ret void // // @@ -2511,20 +2525,24 @@ int target_teams_fun(int *g){ // CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK12-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK12-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], ptr [[N]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP3]], align 4 -// CHECK12-NEXT: [[G:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 3 -// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK12-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK12-NEXT: store i32 [[TMP3]], ptr [[N]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK12-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK12-NEXT: [[A:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK12-NEXT: [[G:%.*]] = load ptr, ptr [[TMP6]], align 4 +// CHECK12-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 3 +// CHECK12-NEXT: [[TMP7:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK12-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP7]], align 4 // CHECK12-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, ptr [[N]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, ptr [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load ptr, ptr [[G]], align 4 -// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i32 [[TMP5]], ptr [[A]], ptr [[TMP6]]) +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, ptr [[N]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, ptr [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load ptr, ptr [[G]], align 4 +// CHECK12-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB3]], i32 3, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16target_teams_funPi_l57.omp_outlined, i32 [[TMP9]], ptr [[A]], ptr [[TMP10]]) // CHECK12-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp index efc5fc6680d99..b3cf6526ffa0a 100644 --- a/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp +++ b/clang/test/OpenMP/target_teams_generic_loop_private_codegen.cpp @@ -1510,8 +1510,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK13-NEXT: ret void @@ -1648,8 +1649,9 @@ int main() { // CHECK13-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK13-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK13-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK13-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK13-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK13-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK13-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK13-NEXT: ret void @@ -1829,8 +1831,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l124.omp_outlined) // CHECK15-NEXT: ret void @@ -1965,8 +1968,9 @@ int main() { // CHECK15-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK15-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK15-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK15-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK15-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK15-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK15-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIiET_v_l80.omp_outlined) // CHECK15-NEXT: ret void @@ -2144,8 +2148,9 @@ int main() { // CHECK17-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK17-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK17-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK17-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK17-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK17-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK17-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB2:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l104.omp_outlined) // CHECK17-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp index f08e75cf6f9d7..98ad25fa32fb6 100644 --- a/clang/test/OpenMP/target_teams_map_codegen.cpp +++ b/clang/test/OpenMP/target_teams_map_codegen.cpp @@ -2374,8 +2374,9 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.omp_outlined) // CHECK5-NEXT: ret void @@ -2402,20 +2403,23 @@ void mapInt128() { // CHECK5-NEXT: [[Y_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 4 -// CHECK5-NEXT: store i32 [[TMP3]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], ptr [[Y_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, ptr [[Y_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.omp_outlined, i64 [[TMP4]], i64 [[TMP6]]) +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], ptr [[X_CASTED]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, ptr [[X_CASTED]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], ptr [[Y_CASTED]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, ptr [[Y_CASTED]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.omp_outlined, i64 [[TMP7]], i64 [[TMP9]]) // CHECK5-NEXT: ret void // // @@ -2440,12 +2444,15 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.omp_outlined, ptr [[X]], ptr [[Y]]) // CHECK5-NEXT: ret void @@ -2538,15 +2545,17 @@ void mapInt128() { // CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.omp_outlined, i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[X_CASTED]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.omp_outlined, i64 [[TMP5]]) // CHECK5-NEXT: ret void // // @@ -2570,15 +2579,17 @@ void mapInt128() { // CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.omp_outlined, i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[X_CASTED]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.omp_outlined, i64 [[TMP5]]) // CHECK5-NEXT: ret void // // @@ -2602,15 +2613,17 @@ void mapInt128() { // CHECK5-NEXT: [[X_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK5-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, ptr [[X_CASTED]], align 8 -// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.omp_outlined, i64 [[TMP3]]) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i64, ptr [[X_CASTED]], align 8 +// CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.omp_outlined, i64 [[TMP5]]) // CHECK5-NEXT: ret void // // @@ -2633,12 +2646,15 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK5-NEXT: ret void @@ -2758,12 +2774,15 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK5-NEXT: ret void @@ -2883,12 +2902,15 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l72.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK5-NEXT: ret void @@ -2980,12 +3002,15 @@ void mapInt128() { // CHECK5-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK5-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK5-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 2 -// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK5-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK5-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK5-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK5-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 8 +// CHECK5-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK5-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK5-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK5-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9mapInt128v_l74.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK5-NEXT: ret void @@ -3077,8 +3102,9 @@ void mapInt128() { // CHECK7-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP1:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP1]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z14mapWithPrivatev_l27.omp_outlined) // CHECK7-NEXT: ret void @@ -3105,20 +3131,23 @@ void mapInt128() { // CHECK7-NEXT: [[Y_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X]], align 4 -// CHECK7-NEXT: store i32 [[TMP3]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[Y]], align 4 -// CHECK7-NEXT: store i32 [[TMP5]], ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[Y_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.omp_outlined, i32 [[TMP4]], i32 [[TMP6]]) +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 [[TMP6]], ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, ptr [[Y]], align 4 +// CHECK7-NEXT: store i32 [[TMP8]], ptr [[Y_CASTED]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, ptr [[Y_CASTED]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z19mapWithFirstprivatev_l33.omp_outlined, i32 [[TMP7]], i32 [[TMP9]]) // CHECK7-NEXT: ret void // // @@ -3143,12 +3172,15 @@ void mapInt128() { // CHECK7-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16mapWithReductionv_l39.omp_outlined, ptr [[X]], ptr [[Y]]) // CHECK7-NEXT: ret void @@ -3241,15 +3273,17 @@ void mapInt128() { // CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.omp_outlined, i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7mapFromv_l45.omp_outlined, i32 [[TMP5]]) // CHECK7-NEXT: ret void // // @@ -3273,15 +3307,17 @@ void mapInt128() { // CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.omp_outlined, i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5mapTov_l51.omp_outlined, i32 [[TMP5]]) // CHECK7-NEXT: ret void // // @@ -3305,15 +3341,17 @@ void mapInt128() { // CHECK7-NEXT: [[X_CASTED:%.*]] = alloca i32, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[X:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP3:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP3]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = load i32, ptr [[X]], align 4 -// CHECK7-NEXT: store i32 [[TMP2]], ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: [[TMP3:%.*]] = load i32, ptr [[X_CASTED]], align 4 -// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.omp_outlined, i32 [[TMP3]]) +// CHECK7-NEXT: [[TMP4:%.*]] = load i32, ptr [[X]], align 4 +// CHECK7-NEXT: store i32 [[TMP4]], ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, ptr [[X_CASTED]], align 4 +// CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapAllocv_l57.omp_outlined, i32 [[TMP5]]) // CHECK7-NEXT: ret void // // @@ -3336,12 +3374,15 @@ void mapInt128() { // CHECK7-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l63.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK7-NEXT: ret void @@ -3461,12 +3502,15 @@ void mapInt128() { // CHECK7-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK7-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK7-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK7-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 -// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK7-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK7-NEXT: [[Y:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK7-NEXT: [[TMP3:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK7-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK7-NEXT: [[Z:%.*]] = load ptr, ptr [[TMP4]], align 4 +// CHECK7-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 2 +// CHECK7-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK7-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK7-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK7-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z8mapArrayv_l65.omp_outlined, ptr [[Y]], ptr [[Z]]) // CHECK7-NEXT: ret void diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp index 24b4027f41e95..9f45af4dc5f2d 100644 --- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp @@ -1445,14 +1445,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 0) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 0) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK9-NEXT: ret void // @@ -1476,14 +1478,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 0) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 0) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK9-NEXT: ret void // @@ -1509,23 +1513,27 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP7]], i32 0) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP11]], i32 0) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP13]]) // CHECK9-NEXT: ret void // // @@ -1557,10 +1565,12 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1590,8 +1600,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1621,28 +1632,32 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP15]], i64 [[TMP17]]) // CHECK9-NEXT: ret void // // @@ -1674,14 +1689,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 0) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 0) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK11-NEXT: ret void // @@ -1705,14 +1722,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP4]], i32 0) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 0) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK11-NEXT: ret void // @@ -1738,23 +1757,27 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP7]], i32 0) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP11]], i32 0) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP13]]) // CHECK11-NEXT: ret void // // @@ -1786,10 +1809,12 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 1024, i32 0) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1819,8 +1844,9 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 20, i32 0) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1850,28 +1876,32 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 0) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP11]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]], i32 0) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP15]], i32 [[TMP17]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp index 5c97b05c60be4..eddfc7fcc6976 100644 --- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp +++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp @@ -1484,18 +1484,21 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +// CHECK9-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 8 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 [[TMP7]]) +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK9-NEXT: ret void // @@ -1519,14 +1522,16 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP4]]) +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP6]]) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK9-NEXT: ret void // @@ -1552,23 +1557,27 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 -// CHECK9-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 8 -// CHECK9-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 8 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 8 +// CHECK9-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP7]]) -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK9-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP9]]) +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP11]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK9-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i64 [[TMP13]]) // CHECK9-NEXT: ret void // // @@ -1600,10 +1609,12 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1633,8 +1644,9 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) // CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1664,28 +1676,32 @@ int bar(int n){ // CHECK9-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK9-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK9-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK9-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK9-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK9-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK9-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 8 -// CHECK9-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK9-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK9-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK9-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK9-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK9-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK9-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 8 -// CHECK9-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK9-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK9-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK9-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 8 +// CHECK9-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK9-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK9-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK9-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK9-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK9-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 1024) -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK9-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK9-NEXT: [[TMP11:%.*]] = load i64, ptr [[A_CASTED]], align 8 -// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK9-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK9-NEXT: [[TMP13:%.*]] = load i64, ptr [[B_CASTED]], align 8 -// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP11]], i64 [[TMP13]]) +// CHECK9-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK9-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK9-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]], i32 1024) +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK9-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i64, ptr [[A_CASTED]], align 8 +// CHECK9-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK9-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK9-NEXT: [[TMP17:%.*]] = load i64, ptr [[B_CASTED]], align 8 +// CHECK9-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i64 [[TMP15]], i64 [[TMP17]]) // CHECK9-NEXT: ret void // // @@ -1718,18 +1734,21 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK11-NEXT: store i32 [[TMP7]], ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP8]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR2]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 [[TMP7]]) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 [[TMP10]]) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l104.omp_outlined) // CHECK11-NEXT: ret void // @@ -1753,14 +1772,16 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP6]]) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZL7fstatici_l108.omp_outlined) // CHECK11-NEXT: ret void // @@ -1786,23 +1807,27 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], ptr [[B]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP5:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], ptr [[B]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP8:%.*]] = load ptr, ptr [[TMP7]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP8]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP10:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP10]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP7]]) -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, ptr [[B]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP9]]) +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 [[TMP11]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, ptr [[B]], align 4 +// CHECK11-NEXT: store i32 [[TMP12]], ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l121.omp_outlined, ptr [[THIS]], i32 [[TMP13]]) // CHECK11-NEXT: ret void // // @@ -1834,10 +1859,12 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP2]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[THIS:%.*]] = load ptr, ptr [[TMP3]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 1024) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__ZN2S12r1Ei_l126.omp_outlined, ptr [[THIS]]) @@ -1867,8 +1894,9 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 0, i32 20) // CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 0, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l88.omp_outlined) @@ -1898,28 +1926,32 @@ int bar(int n){ // CHECK11-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK11-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK11-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK11-NEXT: [[TMP5:%.*]] = load i16, ptr [[TMP4]], align 4 -// CHECK11-NEXT: store i16 [[TMP5]], ptr [[B]], align 2 -// CHECK11-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK11-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK11-NEXT: [[TMP7:%.*]] = load i16, ptr [[TMP6]], align 4 -// CHECK11-NEXT: store i16 [[TMP7]], ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: store i16 [[TMP7]], ptr [[B]], align 2 +// CHECK11-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK11-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP9]], align 4 +// CHECK11-NEXT: store i16 [[TMP10]], ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK11-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK11-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK11-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 -// CHECK11-NEXT: [[TMP9:%.*]] = sext i16 [[TMP8]] to i32 -// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP9]], i32 1024) -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, ptr [[A_CASTED]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[B]], align 2 -// CHECK11-NEXT: store i16 [[TMP12]], ptr [[B_CASTED]], align 2 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, ptr [[B_CASTED]], align 4 -// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP11]], i32 [[TMP13]]) +// CHECK11-NEXT: [[TMP12:%.*]] = load i16, ptr [[DOTCAPTURE_EXPR_]], align 2 +// CHECK11-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +// CHECK11-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP13]], i32 1024) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, ptr [[A]], align 4 +// CHECK11-NEXT: store i32 [[TMP14]], ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, ptr [[A_CASTED]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i16, ptr [[B]], align 2 +// CHECK11-NEXT: store i16 [[TMP16]], ptr [[B_CASTED]], align 2 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, ptr [[B_CASTED]], align 4 +// CHECK11-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 2, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l93.omp_outlined, i32 [[TMP15]], i32 [[TMP17]]) // CHECK11-NEXT: ret void // // diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp index 3d3f2754276d5..c4f5edd9137b8 100644 --- a/clang/test/OpenMP/teams_codegen.cpp +++ b/clang/test/OpenMP/teams_codegen.cpp @@ -2538,11 +2538,13 @@ void foo() { // CHECK25-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK25-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8 -// CHECK25-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 -// CHECK25-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK25-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 +// CHECK25-NEXT: store i32 [[TMP3]], ptr [[ARGC]], align 4 +// CHECK25-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK25-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK25-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 8 // CHECK25-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l216.omp_outlined, ptr [[ARGC]]) // CHECK25-NEXT: ret void @@ -2569,9 +2571,11 @@ void foo() { // CHECK25-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 8 // CHECK25-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK25-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK25-NEXT: [[ARGC:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 0 -// CHECK25-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP0]], i32 1 -// CHECK25-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK25-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK25-NEXT: [[ARGC:%.*]] = load ptr, ptr [[TMP1]], align 8 +// CHECK25-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK25-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK25-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 8 // CHECK25-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 // CHECK25-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l209.omp_outlined, ptr [[ARGC]]) // CHECK25-NEXT: ret void @@ -2599,11 +2603,13 @@ void foo() { // CHECK27-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK27-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 4 -// CHECK27-NEXT: store i32 [[TMP2]], ptr [[ARGC]], align 4 -// CHECK27-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK27-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 +// CHECK27-NEXT: store i32 [[TMP3]], ptr [[ARGC]], align 4 +// CHECK27-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK27-NEXT: [[TMP4:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK27-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP4]], align 4 // CHECK27-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1:[0-9]+]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l216.omp_outlined, ptr [[ARGC]]) // CHECK27-NEXT: ret void @@ -2630,9 +2636,11 @@ void foo() { // CHECK27-NEXT: [[DYN_PTR1:%.*]] = alloca ptr, align 4 // CHECK27-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK27-NEXT: [[TMP0:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK27-NEXT: [[ARGC:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 0 -// CHECK27-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 1 -// CHECK27-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK27-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 0 +// CHECK27-NEXT: [[ARGC:%.*]] = load ptr, ptr [[TMP1]], align 4 +// CHECK27-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP0]], i32 1 +// CHECK27-NEXT: [[TMP2:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK27-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP2]], align 4 // CHECK27-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 // CHECK27-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l209.omp_outlined, ptr [[ARGC]]) // CHECK27-NEXT: ret void @@ -2663,21 +2671,25 @@ void foo() { // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK33-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK33-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 -// CHECK33-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 +// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK33-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK33-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK33-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 // CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 -// CHECK33-NEXT: store i32 [[TMP7]], ptr [[ARGC]], align 4 -// CHECK33-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK33-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[B]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK33-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 8 +// CHECK33-NEXT: store i32 [[TMP10]], ptr [[ARGC]], align 4 +// CHECK33-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK33-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK33-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 8 // CHECK33-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) +// CHECK33-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 +// CHECK33-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) // CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l265.omp_outlined, ptr [[ARGC]]) // CHECK33-NEXT: ret void // @@ -2706,19 +2718,23 @@ void foo() { // CHECK33-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK33-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 8 // CHECK33-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 8 -// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 0 -// CHECK33-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 8 -// CHECK33-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK33-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 1 -// CHECK33-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 8 -// CHECK33-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 -// CHECK33-NEXT: [[ARGC:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 2 -// CHECK33-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i64, ptr [[TMP1]], i32 3 -// CHECK33-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK33-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK33-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 8 +// CHECK33-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 8 +// CHECK33-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK33-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK33-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 8 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 8 +// CHECK33-NEXT: store i32 [[TMP7]], ptr [[B]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK33-NEXT: [[ARGC:%.*]] = load ptr, ptr [[TMP8]], align 8 +// CHECK33-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK33-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 8 +// CHECK33-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 8 // CHECK33-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 8 -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK33-NEXT: [[TMP7:%.*]] = load i32, ptr [[B]], align 4 -// CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 [[TMP7]]) +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK33-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK33-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP10]], i32 [[TMP11]]) // CHECK33-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l254.omp_outlined, ptr [[ARGC]]) // CHECK33-NEXT: ret void // @@ -2748,21 +2764,25 @@ void foo() { // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]]) // CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK35-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK35-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 -// CHECK35-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 +// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK35-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK35-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK35-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 // CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 -// CHECK35-NEXT: store i32 [[TMP7]], ptr [[ARGC]], align 4 -// CHECK35-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK35-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[B]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK35-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[TMP9]], align 4 +// CHECK35-NEXT: store i32 [[TMP10]], ptr [[ARGC]], align 4 +// CHECK35-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK35-NEXT: [[TMP11:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK35-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP11]], align 4 // CHECK35-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, ptr [[A]], align 4 -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, ptr [[B]], align 4 -// CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP8]], i32 [[TMP9]]) +// CHECK35-NEXT: [[TMP12:%.*]] = load i32, ptr [[A]], align 4 +// CHECK35-NEXT: [[TMP13:%.*]] = load i32, ptr [[B]], align 4 +// CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP12]], i32 [[TMP13]]) // CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l265.omp_outlined, ptr [[ARGC]]) // CHECK35-NEXT: ret void // @@ -2791,19 +2811,23 @@ void foo() { // CHECK35-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1]]) // CHECK35-NEXT: store ptr [[__CONTEXT]], ptr [[__CONTEXT_ADDR]], align 4 // CHECK35-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__CONTEXT_ADDR]], align 4 -// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0 -// CHECK35-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP2]], align 4 -// CHECK35-NEXT: store i32 [[TMP3]], ptr [[A]], align 4 -// CHECK35-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 1 -// CHECK35-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -// CHECK35-NEXT: store i32 [[TMP5]], ptr [[B]], align 4 -// CHECK35-NEXT: [[ARGC:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 2 -// CHECK35-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 3 -// CHECK35-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK35-NEXT: [[TMP2:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 0 +// CHECK35-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP2]], align 4 +// CHECK35-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP3]], align 4 +// CHECK35-NEXT: store i32 [[TMP4]], ptr [[A]], align 4 +// CHECK35-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 1 +// CHECK35-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP5]], align 4 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4 +// CHECK35-NEXT: store i32 [[TMP7]], ptr [[B]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 2 +// CHECK35-NEXT: [[ARGC:%.*]] = load ptr, ptr [[TMP8]], align 4 +// CHECK35-NEXT: [[DYN_PTR_ADDR:%.*]] = getelementptr inbounds ptr, ptr [[TMP1]], i32 3 +// CHECK35-NEXT: [[TMP9:%.*]] = load ptr, ptr [[DYN_PTR_ADDR]], align 4 +// CHECK35-NEXT: [[DYN_PTR:%.*]] = load ptr, ptr [[TMP9]], align 4 // CHECK35-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR1]], align 4 -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, ptr [[A]], align 4 -// CHECK35-NEXT: [[TMP7:%.*]] = load i32, ptr [[B]], align 4 -// CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP6]], i32 [[TMP7]]) +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, ptr [[A]], align 4 +// CHECK35-NEXT: [[TMP11:%.*]] = load i32, ptr [[B]], align 4 +// CHECK35-NEXT: call void @__kmpc_push_num_teams(ptr @[[GLOB1]], i32 [[TMP0]], i32 [[TMP10]], i32 [[TMP11]]) // CHECK35-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_teams(ptr @[[GLOB1]], i32 1, ptr @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l254.omp_outlined, ptr [[ARGC]]) // CHECK35-NEXT: ret void // diff --git a/offload/plugins-nextgen/host/src/rtl.cpp b/offload/plugins-nextgen/host/src/rtl.cpp index 0846a6d99e3b3..8e0210ebd7ff5 100644 --- a/offload/plugins-nextgen/host/src/rtl.cpp +++ b/offload/plugins-nextgen/host/src/rtl.cpp @@ -99,7 +99,7 @@ struct GenELF64KernelTy : public GenericKernelTy { "cooperative kernel launch not supported for host"); // TODO: The data will need to be copied locally if we ever support // asynchronous kernel launches in the host interface. - Func(LaunchParams.Data); + Func(LaunchParams.Ptrs); return Plugin::success(); } @@ -112,8 +112,9 @@ struct GenELF64KernelTy : public GenericKernelTy { } private: - /// Host kernel arguments are defined as a single, contiguous buffer. - using KernelTy = void(void *); + /// Host kernel arguments are defined as an array of pointers, one per + /// argument, each pointing to that argument's storage. + using KernelTy = void(void **); /// The kernel function to execute. KernelTy *Func; }; From e3d71535b6e39612bbcc1dd184ba25c5eb3472b2 Mon Sep 17 00:00:00 2001 From: Thurston Dang Date: Wed, 24 Jun 2026 14:23:07 -0700 Subject: [PATCH 415/511] [AssumptionCache] Fix removeAffectedValues() when value is repeated in AssumeInst (#205275) If a value appears more than once in an AssumeInst (e.g., `ptr %arg1` in `call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %arg1, i64 1), "align"(ptr %arg1, i64 8) ]`) it will appear multiple times in the result of findAffectedValues(). removeAffectedValues() may (depending on how the results in AffectedValues.find_as(AV.Assume) are ordered), nullify multiple values from AffectedValues.find_as(AV.Assume) in one iteration of an outer for loop. The next iteration of that outer for loop might then find a match only to a different AssumeInst, resulting in an assertion failure. This patch fixes the issue by counting the number of matching ResultElems we expect to find. This was a latent bug that was revealed by https://github.com/llvm/llvm-project/pull/204432; the latter is not itself buggy, but relies on AssumptionCache::removeAffectedValues(). --------- Co-authored-by: Nikolas Klauser --- llvm/lib/Analysis/AssumptionCache.cpp | 35 ++++- .../DropUnnecessaryAssumes/dereferenceable.ll | 131 ++++++++++++++++++ 2 files changed, 165 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Analysis/AssumptionCache.cpp b/llvm/lib/Analysis/AssumptionCache.cpp index 1f5fea2fa5d1a..94eb93e048720 100644 --- a/llvm/lib/Analysis/AssumptionCache.cpp +++ b/llvm/lib/Analysis/AssumptionCache.cpp @@ -119,6 +119,24 @@ void AssumptionCache::removeAffectedValues(AssumeInst *CI) { SmallVector Affected; findAffectedValues(CI, TTI, Affected); + // If a value appears more than once in an AssumeInst e.g., 'ptr %arg1' in: + // call void @llvm.assume(i1 true) + // [ "dereferenceable"(ptr %arg1, i64 1), + // "align"(ptr %arg1, i64 8) ] + // it will appear multiple times in Affected, but we may (depending on + // how the results in AffectedValues.find_as(AV.Assume) are ordered) + // nullify multiple instances of Elem.Assume during one iteration of the + // 'for (auto &AV : Affected)' loop below. The next iteration of that for + // loop may then find only a match to a different AssumeInst, resulting in + // an assertion failure. Avoid this by counting the number of expected + // matches. +#ifndef NDEBUG + DenseMap ExpectedMatches; + for (auto &AV : Affected) + if (AffectedValues.find_as(AV.Assume) != AffectedValues.end()) + ExpectedMatches[AV.Assume]++; +#endif + for (auto &AV : Affected) { auto AVI = AffectedValues.find_as(AV.Assume); if (AVI == AffectedValues.end()) @@ -129,15 +147,30 @@ void AssumptionCache::removeAffectedValues(AssumeInst *CI) { if (Elem.Assume == CI) { Found = true; Elem.Assume = nullptr; + +#ifndef NDEBUG + ExpectedMatches[AV.Assume]--; +#endif + assert(ExpectedMatches[AV.Assume] >= 0); + // After ExpectedMatches[AV.Assume] == 0, we still need to iterate + // through this loop to determine the value of HasNonnull, to avoid + // prematurely calling AffectedValues.erase(AVI). } HasNonnull |= !!Elem.Assume; if (HasNonnull && Found) break; } - assert(Found && "already unregistered or incorrect cache state"); + + assert(ExpectedMatches[AV.Assume] == 0 || + Found && "already unregistered or incorrect cache state"); + if (!HasNonnull) AffectedValues.erase(AVI); } + + assert( + none_of(Affected, [&](auto &AV) { return ExpectedMatches[AV.Assume]; }) && + "already unregistered or incorrect cache state"); } void AssumptionCache::unregisterAssumption(AssumeInst *CI) { diff --git a/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll index 079c0403227f3..611d607ca7ebc 100644 --- a/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll +++ b/llvm/test/Transforms/DropUnnecessaryAssumes/dereferenceable.ll @@ -53,6 +53,137 @@ define i8 @test_dereferenceable_with_align_ptr_used(ptr %p, i64 %size) { ret i8 %val } +; Regression test for AssumptionCache::removeAffectedValues() crash revealed at +; (but not caused by) def1355cf14cec28f71b8ca947b7723641c1580d +; +; Check that when the first AssumeInst has two op bundles that rely on %arg1, +; removeAffectedValues() is not confused by the presence of an unrelated +; AssumeInst that also has an %arg1 op bundle. +define ptr @test_dup_ptr_used_elsewhere(i1 %arg, ptr %arg1) { +; CHECK-LABEL: define ptr @test_dup_ptr_used_elsewhere( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; CHECK-NEXT: [[BB:.*:]] +; CHECK-NEXT: br i1 [[ARG]], label %[[BB3:.*]], label %[[BB4:.*]] +; CHECK: [[BB2:.*]]: +; CHECK-NEXT: ret ptr null +; CHECK: [[BB3]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[ARG1]], i64 1) ] +; CHECK-NEXT: br label %[[BB2]] +; CHECK: [[BB4]]: +; CHECK-NEXT: br label %[[BB2]] +; +; DROP-DEREF-LABEL: define ptr @test_dup_ptr_used_elsewhere( +; DROP-DEREF-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]]) { +; DROP-DEREF-NEXT: [[BB:.*:]] +; DROP-DEREF-NEXT: br i1 [[ARG]], label %[[BB3:.*]], label %[[BB4:.*]] +; DROP-DEREF: [[BB2:.*]]: +; DROP-DEREF-NEXT: ret ptr null +; DROP-DEREF: [[BB3]]: +; DROP-DEREF-NEXT: br label %[[BB2]] +; DROP-DEREF: [[BB4]]: +; DROP-DEREF-NEXT: br label %[[BB2]] +; +bbl: + br i1 %arg, label %bbl3, label %bbl4 + +bbl2: + ret ptr null + +bbl3: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %arg1, i64 1), "align"(ptr %arg1, i64 8) ] + br label %bbl2 + +bbl4: + call void @llvm.assume(i1 true) [ "align"(ptr %arg1, i64 4) ] + br label %bbl2 +} + +; @test_dup_ptr_used_elsewhere with extra ptr +define ptr @test_dup_ptr_used_elsewhere_extra(i1 %arg, ptr %arg1, ptr %arg2) { +; CHECK-LABEL: define ptr @test_dup_ptr_used_elsewhere_extra( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) { +; CHECK-NEXT: [[BBL:.*:]] +; CHECK-NEXT: br i1 [[ARG]], label %[[BBL3:.*]], label %[[BBL4:.*]] +; CHECK: [[BBL2:.*]]: +; CHECK-NEXT: ret ptr null +; CHECK: [[BBL3]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[ARG1]], i64 1), "dereferenceable"(ptr [[ARG2]], i64 1) ] +; CHECK-NEXT: br label %[[BBL2]] +; CHECK: [[BBL4]]: +; CHECK-NEXT: br label %[[BBL2]] +; +; DROP-DEREF-LABEL: define ptr @test_dup_ptr_used_elsewhere_extra( +; DROP-DEREF-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) { +; DROP-DEREF-NEXT: [[BBL:.*:]] +; DROP-DEREF-NEXT: br i1 [[ARG]], label %[[BBL3:.*]], label %[[BBL4:.*]] +; DROP-DEREF: [[BBL2:.*]]: +; DROP-DEREF-NEXT: ret ptr null +; DROP-DEREF: [[BBL3]]: +; DROP-DEREF-NEXT: br label %[[BBL2]] +; DROP-DEREF: [[BBL4]]: +; DROP-DEREF-NEXT: br label %[[BBL2]] +; +bbl: + br i1 %arg, label %bbl3, label %bbl4 + +bbl2: + ret ptr null + +bbl3: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %arg1, i64 1), "align"(ptr %arg1, i64 8), + "dereferenceable"(ptr %arg2, i64 1), "align"(ptr %arg2, i64 8) ] + br label %bbl2 + +bbl4: + call void @llvm.assume(i1 true) [ "align"(ptr %arg1, i64 4), "align"(ptr %arg2, i64 4) ] + br label %bbl2 +} + +; @test_dup_ptr_used_elsewhere_extra with even more op bundles +define ptr @test_dup_ptr_used_elsewhere_extra2(i1 %arg, ptr %arg1, ptr %arg2) { +; CHECK-LABEL: define ptr @test_dup_ptr_used_elsewhere_extra2( +; CHECK-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) { +; CHECK-NEXT: [[BBL:.*:]] +; CHECK-NEXT: br i1 [[ARG]], label %[[BBL3:.*]], label %[[BBL4:.*]] +; CHECK: [[BBL2:.*]]: +; CHECK-NEXT: ret ptr null +; CHECK: [[BBL3]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[ARG1]], i64 1), "dereferenceable"(ptr [[ARG2]], i64 1), "dereferenceable"(ptr [[ARG1]], i64 1), "dereferenceable"(ptr [[ARG2]], i64 1), "dereferenceable"(ptr [[ARG1]], i64 1) ] +; CHECK-NEXT: br label %[[BBL2]] +; CHECK: [[BBL4]]: +; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[ARG1]], i64 1), "dereferenceable"(ptr [[ARG2]], i64 1) ] +; CHECK-NEXT: br label %[[BBL2]] +; +; DROP-DEREF-LABEL: define ptr @test_dup_ptr_used_elsewhere_extra2( +; DROP-DEREF-SAME: i1 [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) { +; DROP-DEREF-NEXT: [[BBL:.*:]] +; DROP-DEREF-NEXT: br i1 [[ARG]], label %[[BBL3:.*]], label %[[BBL4:.*]] +; DROP-DEREF: [[BBL2:.*]]: +; DROP-DEREF-NEXT: ret ptr null +; DROP-DEREF: [[BBL3]]: +; DROP-DEREF-NEXT: br label %[[BBL2]] +; DROP-DEREF: [[BBL4]]: +; DROP-DEREF-NEXT: br label %[[BBL2]] +; +bbl: + br i1 %arg, label %bbl3, label %bbl4 + +bbl2: + ret ptr null + +bbl3: + call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %arg1, i64 1), "align"(ptr %arg2, i64 8), + "dereferenceable"(ptr %arg2, i64 1), "dereferenceable"(ptr %arg1, i64 1), + "dereferenceable"(ptr %arg2, i64 1), "dereferenceable"(ptr %arg1, i64 1) ] + br label %bbl2 + +bbl4: + call void @llvm.assume(i1 true) [ "align"(ptr %arg1, i64 4), "align"(ptr %arg2, i64 4), + "dereferenceable"(ptr %arg1, i64 1), "dereferenceable"(ptr %arg2, i64 1) ] + br label %bbl2 +} + + ; Make sure newly created assumes are handled properly. define i8 @test_dereferenceable_with_align_cache_realloc(ptr %p, ptr %q, i1 %c) { ; CHECK-LABEL: define i8 @test_dereferenceable_with_align_cache_realloc( From 866221a13adb3b87b0d791c3ed6684050e72fb0d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 24 Jun 2026 22:25:36 +0100 Subject: [PATCH 416/511] [APInt] Consistently use the terms pdep/pext instead of expandBits/compressBits (#205112) After some bikeshedding in #200570 - we decided on the terms pdep/pext --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 +- clang/lib/AST/ExprConstant.cpp | 8 +-- llvm/include/llvm/ADT/APInt.h | 12 ++-- llvm/lib/Analysis/ConstantFolding.cpp | 4 +- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 4 +- llvm/lib/Support/APInt.cpp | 4 +- llvm/unittests/ADT/APIntTest.cpp | 61 +++++++------------ llvm/unittests/Support/KnownBitsTest.cpp | 4 +- 9 files changed, 45 insertions(+), 60 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index e59d14db896a2..b76f13833da14 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -5228,13 +5228,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case clang::X86::BI__builtin_ia32_pdep_di: case Builtin::BI__builtin_elementwise_pdep: return interp__builtin_elementwise_int_binop(S, OpPC, Call, - llvm::APIntOps::expandBits); + llvm::APIntOps::pdep); case clang::X86::BI__builtin_ia32_pext_si: case clang::X86::BI__builtin_ia32_pext_di: case Builtin::BI__builtin_elementwise_pext: return interp__builtin_elementwise_int_binop(S, OpPC, Call, - llvm::APIntOps::compressBits); + llvm::APIntOps::pext); case clang::X86::BI__builtin_ia32_addcarryx_u32: case clang::X86::BI__builtin_ia32_addcarryx_u64: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index dde3b8bab43ec..b628669880f2b 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14206,9 +14206,9 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case Builtin::BI__builtin_elementwise_clmul: return EvaluateBinOpExpr(llvm::APIntOps::clmul); case Builtin::BI__builtin_elementwise_pext: - return EvaluateBinOpExpr(llvm::APIntOps::compressBits); + return EvaluateBinOpExpr(llvm::APIntOps::pext); case Builtin::BI__builtin_elementwise_pdep: - return EvaluateBinOpExpr(llvm::APIntOps::expandBits); + return EvaluateBinOpExpr(llvm::APIntOps::pdep); case Builtin::BI__builtin_elementwise_fshl: case Builtin::BI__builtin_elementwise_fshr: { APValue SourceHi, SourceLo, SourceShift; @@ -18029,7 +18029,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, if (!EvaluateInteger(E->getArg(0), Val, Info) || !EvaluateInteger(E->getArg(1), Msk, Info)) return false; - return Success(llvm::APIntOps::expandBits(Val, Msk), E); + return Success(llvm::APIntOps::pdep(Val, Msk), E); } case clang::X86::BI__builtin_ia32_pext_si: @@ -18039,7 +18039,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, if (!EvaluateInteger(E->getArg(0), Val, Info) || !EvaluateInteger(E->getArg(1), Msk, Info)) return false; - return Success(llvm::APIntOps::compressBits(Val, Msk), E); + return Success(llvm::APIntOps::pext(Val, Msk), E); } case X86::BI__builtin_ia32_ptestz128: case X86::BI__builtin_ia32_ptestz256: diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index 99eb758cc7a6f..026efbe866a93 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -2495,9 +2495,9 @@ LLVM_ABI APInt clmulh(const APInt &LHS, const APInt &RHS); /// and packs them contiguously into the least significant bits of the result. /// /// Examples: -/// (1) compressBits(i8 0b1010'1010, i8 0b1100'1100) = 0b0000'1010 -/// (2) compressBits(i8 0b1111'1111, i8 0b1010'1010) = 0b0000'1111 -LLVM_ABI APInt compressBits(const APInt &Val, const APInt &Mask); +/// (1) pext(i8 0b1010'1010, i8 0b1100'1100) = 0b0000'1010 +/// (2) pext(i8 0b1111'1111, i8 0b1010'1010) = 0b0000'1111 +LLVM_ABI APInt pext(const APInt &Val, const APInt &Mask); /// Perform an "expand" operation, also known as pdep or bdep. /// @@ -2505,9 +2505,9 @@ LLVM_ABI APInt compressBits(const APInt &Val, const APInt &Mask); /// has a 1-bit, and zeros the remaining bits. /// /// Examples: -/// (1) expandBits(i8 0b0000'1010, i8 0b1100'1100) = 0b1000'1000 -/// (2) expandBits(i8 0b0000'1111, i8 0b1010'1010) = 0b1010'1010 -LLVM_ABI APInt expandBits(const APInt &Val, const APInt &Mask); +/// (1) pdep(i8 0b0000'1010, i8 0b1100'1100) = 0b1000'1000 +/// (2) pdep(i8 0b0000'1111, i8 0b1010'1010) = 0b1010'1010 +LLVM_ABI APInt pdep(const APInt &Val, const APInt &Mask); } // namespace APIntOps diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp index 35becaa728ba5..7fd1e60033437 100644 --- a/llvm/lib/Analysis/ConstantFolding.cpp +++ b/llvm/lib/Analysis/ConstantFolding.cpp @@ -3911,11 +3911,11 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty, case Intrinsic::pdep: if (!C0 || !C1) return Constant::getNullValue(Ty); - return ConstantInt::get(Ty, APIntOps::expandBits(*C0, *C1)); + return ConstantInt::get(Ty, APIntOps::pdep(*C0, *C1)); case Intrinsic::pext: if (!C0 || !C1) return Constant::getNullValue(Ty); - return ConstantInt::get(Ty, APIntOps::compressBits(*C0, *C1)); + return ConstantInt::get(Ty, APIntOps::pext(*C0, *C1)); case Intrinsic::amdgcn_wave_reduce_umin: case Intrinsic::amdgcn_wave_reduce_umax: case Intrinsic::amdgcn_wave_reduce_max: diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b261d6322df66..3c92f395a6453 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12237,7 +12237,7 @@ SDValue DAGCombiner::visitPEXT(SDNode *N) { // pext(x, -1) -> x (all bits selected, packed into low positions = x) if (isAllOnesOrAllOnesSplat(N1)) return N0; - // fold pext(c1, c2) -> compressBits(c1, c2) + // fold pext(c1, c2) -> c3 if (SDValue C = DAG.FoldConstantArithmetic(ISD::PEXT, DL, VT, {N0, N1})) return C; return SDValue(); @@ -12257,7 +12257,7 @@ SDValue DAGCombiner::visitPDEP(SDNode *N) { if (isAllOnesOrAllOnesSplat(N1)) return N0; - // fold pdep(c1, c2) -> expandBits(c1, c2) + // fold pdep(c1, c2) -> c3 if (SDValue C = DAG.FoldConstantArithmetic(ISD::PDEP, DL, VT, {N0, N1})) return C; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 853d0712dd7c9..c542f8e7cc20b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7526,9 +7526,9 @@ static std::optional FoldValue(unsigned Opcode, const APInt &C1, case ISD::CLMULH: return APIntOps::clmulh(C1, C2); case ISD::PEXT: - return APIntOps::compressBits(C1, C2); + return APIntOps::pext(C1, C2); case ISD::PDEP: - return APIntOps::expandBits(C1, C2); + return APIntOps::pdep(C1, C2); } return std::nullopt; } diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index 221d642ae8539..c5766a2613e0c 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -3239,7 +3239,7 @@ APInt llvm::APIntOps::clmulh(const APInt &LHS, const APInt &RHS) { return clmulr(LHS, RHS).lshr(1); } -APInt llvm::APIntOps::compressBits(const APInt &Val, const APInt &Mask) { +APInt llvm::APIntOps::pext(const APInt &Val, const APInt &Mask) { unsigned BW = Val.getBitWidth(); assert(BW == Mask.getBitWidth() && "Operand mismatch"); APInt Result = APInt::getZero(BW); @@ -3249,7 +3249,7 @@ APInt llvm::APIntOps::compressBits(const APInt &Val, const APInt &Mask) { return Result; } -APInt llvm::APIntOps::expandBits(const APInt &Val, const APInt &Mask) { +APInt llvm::APIntOps::pdep(const APInt &Val, const APInt &Mask) { unsigned BW = Val.getBitWidth(); assert(BW == Mask.getBitWidth() && "Operand mismatch"); APInt Result = APInt::getZero(BW); diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index 39005969c5bad..0eea32480d0bf 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -4032,53 +4032,38 @@ TEST(APIntTest, sqrtFloor) { } } -TEST(APIntTest, compressBits) { - EXPECT_EQ(APIntOps::compressBits(APInt(8, 0), APInt(8, 0xAAU)).getZExtValue(), +TEST(APIntTest, pext) { + EXPECT_EQ(APIntOps::pext(APInt(8, 0), APInt(8, 0xAAU)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::pext(APInt(8, 0x55U), APInt(8, 0xAAU)).getZExtValue(), 0U); - EXPECT_EQ( - APIntOps::compressBits(APInt(8, 0x55U), APInt(8, 0xAAU)).getZExtValue(), - 0U); - EXPECT_EQ( - APIntOps::compressBits(APInt(8, 0xAAU), APInt(8, 0xAAU)).getZExtValue(), - 15U); - EXPECT_EQ( - APIntOps::compressBits(APInt(8, 0xFFU), APInt(8, 0xAAU)).getZExtValue(), - 15U); - EXPECT_EQ(APIntOps::compressBits(APInt(8, 0xFFU), APInt(8, 0)).getZExtValue(), - 0U); - EXPECT_EQ( - APIntOps::compressBits(APInt(4, 0xFU), APInt(4, 0xAU)).getZExtValue(), - 3U); - EXPECT_EQ( - APIntOps::compressBits(APInt(4, 0xAU), APInt(4, 0xAU)).getZExtValue(), - 3U); - EXPECT_EQ( - APIntOps::compressBits(APInt(4, 0x5U), APInt(4, 0xAU)).getZExtValue(), - 0U); -} - -TEST(APIntTest, expandBits) { - EXPECT_EQ(APIntOps::expandBits(APInt(8, 0), APInt(8, 0xAAU)).getZExtValue(), - 0U); - EXPECT_EQ(APIntOps::expandBits(APInt(8, 15U), APInt(8, 0xAAU)).getZExtValue(), + EXPECT_EQ(APIntOps::pext(APInt(8, 0xAAU), APInt(8, 0xAAU)).getZExtValue(), + 15U); + EXPECT_EQ(APIntOps::pext(APInt(8, 0xFFU), APInt(8, 0xAAU)).getZExtValue(), + 15U); + EXPECT_EQ(APIntOps::pext(APInt(8, 0xFFU), APInt(8, 0)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::pext(APInt(4, 0xFU), APInt(4, 0xAU)).getZExtValue(), 3U); + EXPECT_EQ(APIntOps::pext(APInt(4, 0xAU), APInt(4, 0xAU)).getZExtValue(), 3U); + EXPECT_EQ(APIntOps::pext(APInt(4, 0x5U), APInt(4, 0xAU)).getZExtValue(), 0U); +} + +TEST(APIntTest, pdep) { + EXPECT_EQ(APIntOps::pdep(APInt(8, 0), APInt(8, 0xAAU)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::pdep(APInt(8, 15U), APInt(8, 0xAAU)).getZExtValue(), 0xAAU); - EXPECT_EQ(APIntOps::expandBits(APInt(8, 0xFFU), APInt(8, 0)).getZExtValue(), - 0U); - EXPECT_EQ(APIntOps::expandBits(APInt(4, 3U), APInt(4, 0xAU)).getZExtValue(), - 0xAU); - EXPECT_EQ(APIntOps::expandBits(APInt(4, 1U), APInt(4, 0xAU)).getZExtValue(), - 2U); + EXPECT_EQ(APIntOps::pdep(APInt(8, 0xFFU), APInt(8, 0)).getZExtValue(), 0U); + EXPECT_EQ(APIntOps::pdep(APInt(4, 3U), APInt(4, 0xAU)).getZExtValue(), 0xAU); + EXPECT_EQ(APIntOps::pdep(APInt(4, 1U), APInt(4, 0xAU)).getZExtValue(), 2U); APInt X(8, 0b10110100U); APInt M(8, 0b11001110U); - EXPECT_EQ(APIntOps::expandBits(APIntOps::compressBits(X, M), M), X & M); + EXPECT_EQ(APIntOps::pdep(APIntOps::pext(X, M), M), X & M); } -TEST(APIntTest, compressExpandBitsExhaustive) { +TEST(APIntTest, pext_pdep_exhaustive) { for (unsigned V = 0; V < 256; ++V) { for (unsigned Mask = 0; Mask < 256; ++Mask) { APInt Val(8, V), APMask(8, Mask); - APInt Compressed = APIntOps::compressBits(Val, APMask); - APInt RoundTrip = APIntOps::expandBits(Compressed, APMask); + APInt Compressed = APIntOps::pext(Val, APMask); + APInt RoundTrip = APIntOps::pdep(Compressed, APMask); EXPECT_EQ(RoundTrip, Val & APMask); } } diff --git a/llvm/unittests/Support/KnownBitsTest.cpp b/llvm/unittests/Support/KnownBitsTest.cpp index f6f5505c67e68..d4b8dff54f3d6 100644 --- a/llvm/unittests/Support/KnownBitsTest.cpp +++ b/llvm/unittests/Support/KnownBitsTest.cpp @@ -654,8 +654,8 @@ TEST(KnownBitsTest, BinaryExhaustive) { testBinaryOpExhaustive("avgCeilS", KnownBits::avgCeilS, APIntOps::avgCeilS); testBinaryOpExhaustive("clmul", KnownBits::clmul, APIntOps::clmul); - testBinaryOpExhaustive("pext", KnownBits::pext, APIntOps::compressBits); - testBinaryOpExhaustive("pdep", KnownBits::pdep, APIntOps::expandBits); + testBinaryOpExhaustive("pext", KnownBits::pext, APIntOps::pext); + testBinaryOpExhaustive("pdep", KnownBits::pdep, APIntOps::pdep); } TEST(KnownBitsTest, UnaryExhaustive) { From 3d2da5979e99f462ce3cb9ac6f7782ebc6dc1950 Mon Sep 17 00:00:00 2001 From: Jiahao Guo Date: Thu, 25 Jun 2026 05:36:23 +0800 Subject: [PATCH 417/511] [CIR][AArch64] Migrate vsubl_high/vsubw_high NEON tests to subtraction.c (#205508) ### summary Part of : https://github.com/llvm/llvm-project/issues/185382 This is a light patch and a follow up of : https://github.com/llvm/llvm-project/pull/204088 All lowering logic have been implemented in this PR : https://github.com/llvm/llvm-project/pull/204285 , I just need to migrate these intrinsics. This PR completed with the assistance of Claude Opus4.8 Co-authored-by: Claude Opus 4.8 --- clang/test/CodeGen/AArch64/neon-intrinsics.c | 180 --------------- clang/test/CodeGen/AArch64/neon/subtraction.c | 213 +++++++++++++++++- 2 files changed, 211 insertions(+), 182 deletions(-) diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c index b37ed5aa29f10..560191e43baec 100644 --- a/clang/test/CodeGen/AArch64/neon-intrinsics.c +++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c @@ -5219,186 +5219,6 @@ uint64x2_t test_vmovl_high_u32(uint32x4_t a) { return vmovl_high_u32(a); } -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { - return vsubl_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { - return vsubl_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { - return vsubl_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubl_high_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I12_I]] to <8 x i16> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[TMP0]], [[TMP1]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { - return vsubl_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubl_high_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> -// CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { - return vsubl_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubl_high_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I12_I:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I12_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x i32> -// CHECK-NEXT: [[TMP5:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[TMP2]], [[TMP5]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { - return vsubl_high_u32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_s8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = sext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { - return vsubw_high_s8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_s16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { - return vsubw_high_s16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_s32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { - return vsubw_high_s32(a, b); -} - -// CHECK-LABEL: define dso_local <8 x i16> @test_vsubw_high_u8( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = zext <8 x i8> [[SHUFFLE_I_I_I]] to <8 x i16> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[TMP0]] -// CHECK-NEXT: ret <8 x i16> [[SUB_I]] -// -uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { - return vsubw_high_u8(a, b); -} - -// CHECK-LABEL: define dso_local <4 x i32> @test_vsubw_high_u16( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[TMP2]] -// CHECK-NEXT: ret <4 x i32> [[SUB_I]] -// -uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { - return vsubw_high_u16(a, b); -} - -// CHECK-LABEL: define dso_local <2 x i64> @test_vsubw_high_u32( -// CHECK-SAME: <2 x i64> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[SHUFFLE_I_I_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I_I_I]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -// CHECK-NEXT: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[TMP2]] -// CHECK-NEXT: ret <2 x i64> [[SUB_I]] -// -uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { - return vsubw_high_u32(a, b); -} - // CHECK-LABEL: define dso_local <8 x i8> @test_vaddhn_s16( // CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon/subtraction.c b/clang/test/CodeGen/AArch64/neon/subtraction.c index 7810445fa0094..d23f5907f0362 100644 --- a/clang/test/CodeGen/AArch64/neon/subtraction.c +++ b/clang/test/CodeGen/AArch64/neon/subtraction.c @@ -12,7 +12,7 @@ // ACLE section headings based on v2025Q2 of the ACLE specification: // * https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#subtract // -// TODO: Migrate Widening subtraction, Narrowing subtraction and Saturating subtract test cases. +// TODO: Migrate Narrowing subtraction and Saturating subtract test cases. // //============================================================================= @@ -268,7 +268,6 @@ uint64_t test_vsubd_u64(uint64_t a, uint64_t b) { //===------------------------------------------------------===// // 2.1.1.5.3. Widening subtraction // https://arm-software.github.io/acle/neon_intrinsics/advsimd.html#widening-subtraction -// TODO: Migrate the vsubl_high_* / vsubw_high_* intrinsics //===------------------------------------------------------===// // LLVM-LABEL: @test_vsubl_s8( @@ -462,3 +461,213 @@ uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) { // LLVM: ret <2 x i64> [[SUB_I]] return vsubw_u32(a, b); } + +// LLVM-LABEL: @test_vsubl_high_s8( +// CIR-LABEL: @vsubl_high_s8( +int16x8_t test_vsubl_high_s8(int8x16_t a, int8x16_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> +// LLVM: [[VMOVL0:%.*]] = sext <8 x i8> [[SHUFFLE0]] to <8 x i16> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> +// LLVM: [[VMOVL1:%.*]] = sext <8 x i8> [[SHUFFLE1]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_high_s8(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_s16( +// CIR-LABEL: @vsubl_high_s16( +int32x4_t test_vsubl_high_s16(int16x8_t a, int16x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_high_s16(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_s32( +// CIR-LABEL: @vsubl_high_s32( +int64x2_t test_vsubl_high_s32(int32x4_t a, int32x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !s64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = sext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_high_s32(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u8( +// CIR-LABEL: @vsubl_high_u8( +uint16x8_t test_vsubl_high_u8(uint8x16_t a, uint8x16_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<8 x !u16i> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[A]], <8 x i32> +// LLVM: [[VMOVL0:%.*]] = zext <8 x i8> [[SHUFFLE0]] to <8 x i16> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> +// LLVM: [[VMOVL1:%.*]] = zext <8 x i8> [[SHUFFLE1]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubl_high_u8(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u16( +// CIR-LABEL: @vsubl_high_u16( +uint32x4_t test_vsubl_high_u16(uint16x8_t a, uint16x8_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<4 x !u32i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[A]], <4 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL0:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <4 x i16> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// LLVM: [[VMOVL1:%.*]] = zext <4 x i16> [[TMP3]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubl_high_u16(a, b); +} + +// LLVM-LABEL: @test_vsubl_high_u32( +// CIR-LABEL: @vsubl_high_u32( +uint64x2_t test_vsubl_high_u32(uint32x4_t a, uint32x4_t b) { +// CIR: [[VMOVL0:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: [[VMOVL1:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: {{%.*}} = cir.sub [[VMOVL0]], [[VMOVL1]] : !cir.vector<2 x !u64i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE0:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[A]], <2 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE0]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL0:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SHUFFLE1:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> +// LLVM: [[TMP2:%.*]] = bitcast <2 x i32> [[SHUFFLE1]] to <8 x i8> +// LLVM: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x i32> +// LLVM: [[VMOVL1:%.*]] = zext <2 x i32> [[TMP3]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL0]], [[VMOVL1]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubl_high_u32(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s8( +// CIR-LABEL: @vsubw_high_s8( +int16x8_t test_vsubw_high_s8(int16x8_t a, int8x16_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s8({{.*}}) : {{.*}} -> !cir.vector<8 x !s16i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<8 x !s16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> +// LLVM: [[VMOVL_I:%.*]] = sext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubw_high_s8(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s16( +// CIR-LABEL: @vsubw_high_s16( +int32x4_t test_vsubw_high_s16(int32x4_t a, int16x8_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s16({{.*}}) : {{.*}} -> !cir.vector<4 x !s32i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<4 x !s32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL_I:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubw_high_s16(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_s32( +// CIR-LABEL: @vsubw_high_s32( +int64x2_t test_vsubw_high_s32(int64x2_t a, int32x4_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_s32({{.*}}) : {{.*}} -> !cir.vector<2 x !s64i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<2 x !s64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL_I:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubw_high_s32(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u8( +// CIR-LABEL: @vsubw_high_u8( +uint16x8_t test_vsubw_high_u8(uint16x8_t a, uint8x16_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u8({{.*}}) : {{.*}} -> !cir.vector<8 x !u16i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<8 x !u16i> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> [[B]], <16 x i8> [[B]], <8 x i32> +// LLVM: [[VMOVL_I:%.*]] = zext <8 x i8> [[SHUFFLE_I]] to <8 x i16> +// LLVM: [[SUB_I:%.*]] = sub <8 x i16> [[A]], [[VMOVL_I]] +// LLVM: ret <8 x i16> [[SUB_I]] + return vsubw_high_u8(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u16( +// CIR-LABEL: @vsubw_high_u16( +uint32x4_t test_vsubw_high_u16(uint32x4_t a, uint16x8_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u16({{.*}}) : {{.*}} -> !cir.vector<4 x !u32i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<4 x !u32i> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> [[B]], <8 x i16> [[B]], <4 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <4 x i16> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> +// LLVM: [[VMOVL_I:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32> +// LLVM: [[SUB_I:%.*]] = sub <4 x i32> [[A]], [[VMOVL_I]] +// LLVM: ret <4 x i32> [[SUB_I]] + return vsubw_high_u16(a, b); +} + +// LLVM-LABEL: @test_vsubw_high_u32( +// CIR-LABEL: @vsubw_high_u32( +uint64x2_t test_vsubw_high_u32(uint64x2_t a, uint32x4_t b) { +// CIR: [[VMOVL_I:%.*]] = cir.call @vmovl_high_u32({{.*}}) : {{.*}} -> !cir.vector<2 x !u64i> +// CIR: {{%.*}} = cir.sub {{%.*}}, [[VMOVL_I]] : !cir.vector<2 x !u64i> + +// LLVM-SAME: <2 x i64> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) +// LLVM: [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[B]], <2 x i32> +// LLVM: [[TMP0:%.*]] = bitcast <2 x i32> [[SHUFFLE_I]] to <8 x i8> +// LLVM: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> +// LLVM: [[VMOVL_I:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> +// LLVM: [[SUB_I:%.*]] = sub <2 x i64> [[A]], [[VMOVL_I]] +// LLVM: ret <2 x i64> [[SUB_I]] + return vsubw_high_u32(a, b); +} From 95cf74cdb87cdb44b32908cf17b25bf4cfae360e Mon Sep 17 00:00:00 2001 From: Walter Lee <49250218+googlewalt@users.noreply.github.com> Date: Wed, 24 Jun 2026 17:37:23 -0400 Subject: [PATCH 418/511] Revert "[Bazel] Fixes 639c5a0" (#205681) Reverts llvm/llvm-project#205273. 639c5a0 was rolled back. --- .../bazel/llvm-project-overlay/libc/BUILD.bazel | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index c88797cf898f6..4a6df7f43f39c 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1605,22 +1605,6 @@ libc_support_library( ], ) -libc_support_library( - name = "__support_osutil_linux_syscall_wrappers_ioctl", - hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/ioctl.h"], - target_compatible_with = select({ - "@platforms//os:linux": [], - "//conditions:default": ["@platforms//:incompatible"], - }), - deps = [ - ":__support_cpp_type_traits", - ":__support_error_or", - ":__support_macros_attributes", - ":__support_macros_config", - ":__support_osutil_syscall", - ], -) - libc_support_library( name = "__support_osutil_linux_syscall_wrappers_dup", hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/dup.h"], From 4fbfe7a17327188b408dbe04fdaa75d4110592fb Mon Sep 17 00:00:00 2001 From: Paul Osmialowski Date: Wed, 24 Jun 2026 22:41:20 +0100 Subject: [PATCH 419/511] [flang][cmake] Enable the runtimes in the CMake caches (#205642) I've tried to introduce that in the PR #198863, but sadly, the buildbots could not cope with this, so I decided to introduce it separately. This patch also fixes the relevant places in the docs. --- flang/cmake/caches/BOLT-PGO.cmake | 1 + flang/cmake/caches/BOLT.cmake | 1 + flang/cmake/caches/PGO-stage2.cmake | 1 + flang/cmake/caches/PGO.cmake | 1 + llvm/docs/AdvancedBuilds.rst | 4 +--- 5 files changed, 5 insertions(+), 3 deletions(-) diff --git a/flang/cmake/caches/BOLT-PGO.cmake b/flang/cmake/caches/BOLT-PGO.cmake index dc13daa14523f..e71b9a9817674 100644 --- a/flang/cmake/caches/BOLT-PGO.cmake +++ b/flang/cmake/caches/BOLT-PGO.cmake @@ -2,6 +2,7 @@ set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") set(CLANG_BOOTSTRAP_TARGETS stage2-clang-bolt diff --git a/flang/cmake/caches/BOLT.cmake b/flang/cmake/caches/BOLT.cmake index 155eec704344b..56ff3c78d6c13 100644 --- a/flang/cmake/caches/BOLT.cmake +++ b/flang/cmake/caches/BOLT.cmake @@ -7,6 +7,7 @@ set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "bolt;clang;flang" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") # setup toolchain diff --git a/flang/cmake/caches/PGO-stage2.cmake b/flang/cmake/caches/PGO-stage2.cmake index b38a3adaa4fbc..067e62da5562e 100644 --- a/flang/cmake/caches/PGO-stage2.cmake +++ b/flang/cmake/caches/PGO-stage2.cmake @@ -2,3 +2,4 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "") set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") diff --git a/flang/cmake/caches/PGO.cmake b/flang/cmake/caches/PGO.cmake index 73219b0085ba0..eed1e4322bbe9 100644 --- a/flang/cmake/caches/PGO.cmake +++ b/flang/cmake/caches/PGO.cmake @@ -4,6 +4,7 @@ set(CMAKE_BUILD_TYPE "Release" CACHE STRING "") set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE BOOL "") diff --git a/llvm/docs/AdvancedBuilds.rst b/llvm/docs/AdvancedBuilds.rst index 771b1a2b50efc..95fdc7966d02a 100644 --- a/llvm/docs/AdvancedBuilds.rst +++ b/llvm/docs/AdvancedBuilds.rst @@ -145,7 +145,6 @@ following command: .. code-block:: console $ cmake -G Ninja -C /flang/cmake/caches/PGO.cmake \ - -DLLVM_ENABLE_RUNTIMES="compiler-rt;flang-rt;libunwind;openmp" \ /llvm There are several additional options that the cache file also accepts to modify @@ -183,8 +182,7 @@ following command: .. code-block:: console - $ cmake -G Ninja -C /clang/cmake/caches/PGO.cmake \ - -DLLVM_ENABLE_RUNTIMES="compiler-rt;flang-rt;libunwind;openmp" \ + $ cmake -G Ninja -C /flang/cmake/caches/PGO.cmake \ -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR= \ -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=runtimes \ -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_DIR= \ From a2e4f0306f53f7c02c507d4d504d25af3bbe8549 Mon Sep 17 00:00:00 2001 From: Yao Qi Date: Wed, 24 Jun 2026 22:41:38 +0100 Subject: [PATCH 420/511] [lldb][Mach-O] Bounds-check GetArchitectureAtIndex against m_fat_archs (#205289) `ObjectContainerUniversalMachO::GetArchitectureAtIndex` used `m_header.nfat_arch` (read directly from the file and untrusted, up to 0xFFFFFFFF) as the bound before indexing `m_fat_archs`. When ParseHeader exhausts the data partway through and breaks early, `m_fat_archs.size()` can be smaller than `nfat_arch`, so the indexed load is out of bounds. Bound the check on the actual vector size instead. Found by lldb-target-fuzzer. --------- Co-authored-by: Alex Langford --- .../ObjectContainerUniversalMachO.cpp | 2 +- .../ObjectContainerUniversalMachOTest.cpp | 27 +++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp b/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp index f3127ef920982..1fcf7dd882bf4 100644 --- a/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp +++ b/lldb/source/Plugins/ObjectContainer/Universal-Mach-O/ObjectContainerUniversalMachO.cpp @@ -142,7 +142,7 @@ size_t ObjectContainerUniversalMachO::GetNumArchitectures() const { bool ObjectContainerUniversalMachO::GetArchitectureAtIndex( uint32_t idx, ArchSpec &arch) const { - if (idx < m_header.nfat_arch) { + if (idx < m_fat_archs.size()) { arch.SetArchitecture(eArchTypeMachO, m_fat_archs[idx].GetCPUType(), m_fat_archs[idx].GetCPUSubType()); return true; diff --git a/lldb/unittests/ObjectContainer/ObjectContainerUniversalMachOTest.cpp b/lldb/unittests/ObjectContainer/ObjectContainerUniversalMachOTest.cpp index a4346befbfd8b..50ab4c9557b66 100644 --- a/lldb/unittests/ObjectContainer/ObjectContainerUniversalMachOTest.cpp +++ b/lldb/unittests/ObjectContainer/ObjectContainerUniversalMachOTest.cpp @@ -12,6 +12,8 @@ #include "lldb/Core/ModuleSpec.h" #include "lldb/Host/FileSystem.h" #include "lldb/Symbol/ObjectFile.h" +#include "lldb/Utility/ArchSpec.h" +#include "lldb/Utility/DataBufferHeap.h" #include "lldb/Utility/FileSpec.h" #include "llvm/Support/FileSystem.h" #include "llvm/Testing/Support/Error.h" @@ -117,3 +119,28 @@ TEST_F(ObjectContainerUniversalMachOTest, SliceOffsetZero) { ASSERT_THAT_ERROR(TmpFile->discard(), llvm::Succeeded()); } + +// Regression fixture: a universal (fat) Mach-O whose header claims a huge +// nfat_arch (here 0xAFAFAFAF) but provides no fat_arch entries beyond the +// header bytes. Found by lldb-target-fuzzer. +TEST_F(ObjectContainerUniversalMachOTest, NfatArchTruncatedSlices) { + // Hand-crafted fat header: FAT_MAGIC_64 + nfat_arch=0xAFAFAFAF + 2 stray + // payload bytes, not enough for even one fat_arch_64 entry (32 bytes). + const uint8_t kData[] = { + 0xCA, 0xFE, 0xBA, 0xBF, // magic: FAT_MAGIC_64 (big endian) + 0xAF, 0xAF, 0xAF, 0xAF, // nfat_arch: 0xAFAFAFAF (untrusted, huge) + 0xAF, 0xAF, // truncated arch payload + }; + lldb::DataBufferSP Buf = + std::make_shared(kData, sizeof(kData)); + + std::unique_ptr Container( + ObjectContainerUniversalMachO::CreateInstance( + /*module_sp=*/nullptr, Buf, /*data_offset=*/0, /*file=*/nullptr, + /*file_offset=*/0, /*length=*/sizeof(kData))); + ASSERT_NE(Container.get(), nullptr); + + // m_fat_archs has zero elements, returns false. + ArchSpec Arch; + EXPECT_FALSE(Container->GetArchitectureAtIndex(0, Arch)); +} From 4264aad7f3564fd9a46b9764377333994a34867d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=BCseyin=20D=C3=B6nmez?= Date: Wed, 24 Jun 2026 23:43:54 +0200 Subject: [PATCH 421/511] [Sema] Fix ICE when passing vector types to `abs` (#205017) fix for ICE in `Sema::CheckAbsoluteValueFunction` We failed to reject non-scalar types. Fixes: #204777 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/Sema/SemaChecking.cpp | 6 ++++++ clang/test/Sema/builtin-abs-invalid.c | 10 ++++++++++ 3 files changed, 17 insertions(+) create mode 100644 clang/test/Sema/builtin-abs-invalid.c diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8bb17755b28f5..4ca239ca5f2e4 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -855,6 +855,7 @@ Miscellaneous Clang Crashes Fixed - Fixed an assertion failure in ``isAtEndOfMacroExpansion`` on macro expansions crossing the boundary of two fileIDs. (#GH115007), (#GH21755) - Fixed an assertion failure when ``__builtin_dump_struct`` is used with an immediate-escalated callable. (#GH192846) +- Fixed a crash when passing one sized implicitly casted vector to a ``abs`` function. (#GH204777) - Fixed a crash when diagnosing an invalid out-of-line definition of a member class template. (#GH201490) OpenACC Specific Changes diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index fa0ec55a63bb7..c3ca45ee55786 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -10608,6 +10608,12 @@ void Sema::CheckAbsoluteValueFunction(const CallExpr *Call, if (IsStdAbs) return; + // Prevent reaching unreachable code in getAbsoluteValueKind for unsupported + // types. + if (!ArgType->isIntegralOrEnumerationType() && + !ArgType->isRealFloatingType() && !ArgType->isAnyComplexType()) + return; + AbsoluteValueKind ArgValueKind = getAbsoluteValueKind(ArgType); AbsoluteValueKind ParamValueKind = getAbsoluteValueKind(ParamType); diff --git a/clang/test/Sema/builtin-abs-invalid.c b/clang/test/Sema/builtin-abs-invalid.c new file mode 100644 index 0000000000000..9ec6c0edc491a --- /dev/null +++ b/clang/test/Sema/builtin-abs-invalid.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s +// expected-no-diagnostics + +int abs(int); + +typedef int int1 __attribute__((__vector_size__(4))); + +void test_vector_abs(int1 x) { + (void)abs(x); +} \ No newline at end of file From 7bf71291990c7ef4ff11f90c081adf42b57acd41 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Thu, 25 Jun 2026 06:44:54 +0900 Subject: [PATCH 422/511] [SYCL] Disable CommandGraph/Scheduler unit tests on Win (#22427) See https://github.com/intel/llvm/issues/22425 Signed-off-by: Nick Sarnie --- sycl/unittests/Extensions/CommandGraph/CMakeLists.txt | 5 ++++- sycl/unittests/scheduler/CMakeLists.txt | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt index 70d50bdc4f5b1..e67e29a9c74b6 100644 --- a/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt +++ b/sycl/unittests/Extensions/CommandGraph/CMakeLists.txt @@ -1,5 +1,8 @@ set(CMAKE_CXX_EXTENSIONS OFF) - +if(WIN32) + # https://github.com/intel/llvm/issues/22425 + return() +endif() add_sycl_unittest(CommandGraphExtensionTests OBJECT Barrier.cpp CommandGraph.cpp diff --git a/sycl/unittests/scheduler/CMakeLists.txt b/sycl/unittests/scheduler/CMakeLists.txt index 9041793ecdaf2..1a31e9df51f87 100644 --- a/sycl/unittests/scheduler/CMakeLists.txt +++ b/sycl/unittests/scheduler/CMakeLists.txt @@ -1,3 +1,7 @@ +if(WIN32) + # https://github.com/intel/llvm/issues/22425 + return() +endif() add_sycl_unittest(SchedulerTests OBJECT BlockedCommands.cpp Commands.cpp From d133e72fcc1cbf7311160ba8f64f1805750fc66f Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 24 Jun 2026 15:22:25 -0700 Subject: [PATCH 423/511] [Instrumentor] Add subtype IDs to complement type IDs for vectors/arrays (#205466) If the type of an argument passed to the instrumentation is a vector or array, we still want to filter on the underlying type, and the instrumentation might also need to know. Thus, we can now pass a subtype ID, which is -1 except if it's a vector or array, then it's the element type ID. Structs need to be handled differently. --- .../llvm/Transforms/IPO/Instrumentor.h | 21 ++++ .../IPO/InstrumentorRuntimeHelper.h | 4 +- llvm/lib/Transforms/IPO/Instrumentor.cpp | 101 ++++++++++++++++-- .../IPO/InstrumentorStubPrinter.cpp | 9 +- .../Instrumentor/alloca_and_function.ll | 8 +- .../test/Instrumentation/Instrumentor/cast.ll | 2 +- .../Instrumentor/cast_crash.ll | 4 +- .../Instrumentor/default_config.json | 20 ++++ .../Instrumentation/Instrumentor/default_rt.c | 44 ++++---- .../Instrumentation/Instrumentor/default_rt.h | 4 +- .../Instrumentor/generate_rt.ll | 1 + .../Instrumentor/module_and_globals.ll | 20 ++-- .../Instrumentor/numeric_subtypeid.ll | 39 +++++++ .../numeric_subtypeid_config.json | 19 ++++ .../Instrumentor/rt_config.json | 16 +++ 15 files changed, 257 insertions(+), 55 deletions(-) create mode 100644 llvm/test/Instrumentation/Instrumentor/numeric_subtypeid.ll create mode 100644 llvm/test/Instrumentation/Instrumentor/numeric_subtypeid_config.json diff --git a/llvm/include/llvm/Transforms/IPO/Instrumentor.h b/llvm/include/llvm/Transforms/IPO/Instrumentor.h index c143098db4210..fceb9e58b9791 100644 --- a/llvm/include/llvm/Transforms/IPO/Instrumentor.h +++ b/llvm/include/llvm/Transforms/IPO/Instrumentor.h @@ -65,6 +65,7 @@ struct IRTArg { POTENTIALLY_INDIRECT = 1 << 3, INDIRECT_HAS_SIZE = 1 << 4, VALUE_PACK = 1 << 5, + TYPEID = 1 << 6, LAST, }; @@ -606,6 +607,9 @@ struct BaseInstructionIO : public InstrumentationOpportunity { LLVM_ABI static Value *getTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); }; /// The common instrumentation opportunity class for instruction opportunities. @@ -933,6 +937,7 @@ struct StoreIO : public InstructionIO { PassStoredValueSize, PassAlignment, PassValueTypeId, + PassValueSubTypeId, PassAtomicityOrdering, PassSyncScopeId, PassIsVolatile, @@ -982,6 +987,9 @@ struct StoreIO : public InstructionIO { LLVM_ABI static Value *getValueTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getValueSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getAtomicityOrdering(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); @@ -1026,6 +1034,7 @@ struct LoadIO : public InstructionIO { PassValueSize, PassAlignment, PassValueTypeId, + PassValueSubTypeId, PassAtomicityOrdering, PassSyncScopeId, PassIsVolatile, @@ -1075,6 +1084,9 @@ struct LoadIO : public InstructionIO { LLVM_ABI static Value *getValueTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getValueSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getAtomicityOrdering(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); @@ -1113,10 +1125,12 @@ struct CastIO final enum ConfigKind { PassInput, PassInputTypeId, + PassInputSubTypeId, PassInputSize, PassResult, ReplaceResult, PassResultTypeId, + PassResultSubTypeId, PassResultSize, PassOpcode, PassId, @@ -1138,12 +1152,18 @@ struct CastIO final LLVM_ABI static Value *getInputTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getInputSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getInputSize(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getResultTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); + LLVM_ABI static Value *getResultSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB); LLVM_ABI static Value *getResultSize(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB); @@ -1175,6 +1195,7 @@ struct NumericIO final enum ConfigKind { PassTypeId, + PassSubTypeId, PassSize, PassOpcode, PassResult, diff --git a/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h b/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h index af58eed0bd301..d6e451d9ed7fb 100644 --- a/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h +++ b/llvm/include/llvm/Transforms/IPO/InstrumentorRuntimeHelper.h @@ -151,8 +151,10 @@ enum LLVMTypeID { }; /// Get the string name of an LLVM Type ID. -static inline const char *getLLVMTypeIDName(uint32_t type_id) { +static inline const char *getLLVMTypeIDName(int32_t type_id) { switch (type_id) { + case -1: + return "none"; case HalfTyID: return "half"; case BFloatTyID: diff --git a/llvm/lib/Transforms/IPO/Instrumentor.cpp b/llvm/lib/Transforms/IPO/Instrumentor.cpp index 4517123cec9f1..3b52e3e1f4605 100644 --- a/llvm/lib/Transforms/IPO/Instrumentor.cpp +++ b/llvm/lib/Transforms/IPO/Instrumentor.cpp @@ -155,6 +155,19 @@ Constant *getCI(Type *IT, Ty Val, bool IsSigned = false) { return ConstantInt::get(IT, Val, IsSigned); } +Constant *getSubTypeID(Type &OpTy, Type &ReqTy) { + switch (OpTy.getTypeID()) { + case Type::TypeID::ArrayTyID: + case Type::TypeID::FixedVectorTyID: + case Type::TypeID::ScalableVectorTyID: + return getCI(&ReqTy, OpTy.getContainedType(0)->getTypeID()); + default: + break; + } + + return getCI(&ReqTy, -1, /*IsSigned=*/true); +} + /// The core of the instrumentor pass, which instruments the module as the /// instrumentation configuration mandates. class InstrumentorImpl final { @@ -933,8 +946,7 @@ Value *BaseInstructionIO::getRightOperand(Value &V, Type &Ty, auto &I = cast(V); if (I.getNumOperands() > 1) return I.getOperand(1); - else - return PoisonValue::get(&Ty); + return PoisonValue::get(&Ty); } Value *BaseInstructionIO::getTypeId(Value &V, Type &Ty, @@ -943,6 +955,12 @@ Value *BaseInstructionIO::getTypeId(Value &V, Type &Ty, return getCI(&Ty, V.getType()->getTypeID()); } +Value *BaseInstructionIO::getSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + return getSubTypeID(*V.getType(), Ty); +} + /// FunctionIO /// { void FunctionIO::init(InstrumentationConfig &IConf, @@ -1162,9 +1180,15 @@ void StoreIO::init(InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB, } if (Config.has(PassValueTypeId)) { IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "value_type_id", - "The type id of the stored value.", IRTArg::NONE, + "The type id of the stored value.", IRTArg::TYPEID, getValueTypeId)); } + if (Config.has(PassValueSubTypeId)) { + IRTArgs.push_back(IRTArg( + IIRB.Int32Ty, "value_sub_type_id", + "The type id of the stored value (for arrays and vectors, or -1).", + IRTArg::TYPEID, getValueSubTypeId)); + } if (Config.has(PassAtomicityOrdering)) { IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "atomicity_ordering", "The atomicity ordering of the store.", @@ -1236,6 +1260,13 @@ Value *StoreIO::getValueTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, return getCI(&Ty, SI.getValueOperand()->getType()->getTypeID()); } +Value *StoreIO::getValueSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &SI = cast(V); + return getSubTypeID(*SI.getValueOperand()->getType(), Ty); +} + Value *StoreIO::getAtomicityOrdering(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB) { @@ -1299,9 +1330,15 @@ void LoadIO::init(InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB, } if (Config.has(PassValueTypeId)) { IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "value_type_id", - "The type id of the loaded value.", IRTArg::NONE, + "The type id of the loaded value.", IRTArg::TYPEID, getValueTypeId)); } + if (Config.has(PassValueSubTypeId)) { + IRTArgs.push_back(IRTArg( + IIRB.Int32Ty, "value_sub_type_id", + "The sub type id of the loaded value (for arrays and vectors, or -1).", + IRTArg::TYPEID, getValueSubTypeId)); + } if (Config.has(PassAtomicityOrdering)) { IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "atomicity_ordering", "The atomicity ordering of the load.", @@ -1372,6 +1409,13 @@ Value *LoadIO::getValueTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, return getCI(&Ty, LI.getType()->getTypeID()); } +Value *LoadIO::getValueSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &LI = cast(V); + return getSubTypeID(*LI.getType(), Ty); +} + Value *LoadIO::getAtomicityOrdering(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB) { @@ -1648,12 +1692,21 @@ void CastIO::init(InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB, Config = *UserConfig; bool IsPRE = getLocationKind() == InstrumentationLocation::INSTRUCTION_PRE; if (Config.has(PassInput)) - IRTArgs.push_back(IRTArg(IIRB.Int64Ty, "input", "Input value of the cast.", - IRTArg::POTENTIALLY_INDIRECT, getInput)); + IRTArgs.push_back( + IRTArg(IIRB.Int64Ty, "input", "Input value of the cast.", + IRTArg::POTENTIALLY_INDIRECT | + (Config.has(PassResultSize) ? IRTArg::INDIRECT_HAS_SIZE + : IRTArg::NONE), + getInput)); if (Config.has(PassInputTypeId)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "input_type_id", - "The type id of the input value.", IRTArg::NONE, + "The type id of the input value.", IRTArg::TYPEID, getInputTypeId)); + if (Config.has(PassInputSubTypeId)) + IRTArgs.push_back(IRTArg( + IIRB.Int32Ty, "input_sub_type_id", + "The sub type id of the input value (for arrays and vectors, or -1).", + IRTArg::TYPEID, getInputSubTypeId)); if (Config.has(PassInputSize)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "input_size", "The size of the input value.", IRTArg::NONE, @@ -1661,12 +1714,19 @@ void CastIO::init(InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB, if (!IsPRE && Config.has(PassResult)) IRTArgs.push_back( IRTArg(IIRB.Int64Ty, "result", "Result of the cast.", - IRTArg::REPLACABLE | IRTArg::POTENTIALLY_INDIRECT, getValue, - Config.has(ReplaceResult) ? replaceValue : nullptr)); + (IRTArg::REPLACABLE | IRTArg::POTENTIALLY_INDIRECT) | + (Config.has(PassResultSize) ? IRTArg::INDIRECT_HAS_SIZE + : IRTArg::NONE), + getValue, Config.has(ReplaceResult) ? replaceValue : nullptr)); if (Config.has(PassResultTypeId)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "result_type_id", - "The type id of the result value.", IRTArg::NONE, + "The type id of the result value.", IRTArg::TYPEID, getResultTypeId)); + if (Config.has(PassResultSubTypeId)) + IRTArgs.push_back(IRTArg( + IIRB.Int32Ty, "result_sub_type_id", + "The sub type id of the result value (for arrays and vectors, or -1).", + IRTArg::TYPEID, getResultSubTypeId)); if (Config.has(PassResultSize)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "result_size", "The size of the result value.", IRTArg::NONE, @@ -1692,6 +1752,13 @@ Value *CastIO::getInputTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, return getCI(&Ty, CI.getSrcTy()->getTypeID()); } +Value *CastIO::getInputSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &CI = cast(V); + return getSubTypeID(*CI.getSrcTy(), Ty); +} + Value *CastIO::getInputSize(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB) { auto &CI = cast(V); @@ -1705,6 +1772,13 @@ Value *CastIO::getResultTypeId(Value &V, Type &Ty, InstrumentationConfig &IConf, return getCI(&Ty, CI.getDestTy()->getTypeID()); } +Value *CastIO::getResultSubTypeId(Value &V, Type &Ty, + InstrumentationConfig &IConf, + InstrumentorIRBuilderTy &IIRB) { + auto &CI = cast(V); + return getSubTypeID(*CI.getDestTy(), Ty); +} + Value *CastIO::getResultSize(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB) { auto &CI = cast(V); @@ -1766,8 +1840,13 @@ void NumericIO::init(InstrumentationConfig &IConf, (Config.has(PassSize) ? IRTArg::INDIRECT_HAS_SIZE : IRTArg::NONE); if (Config.has(PassTypeId)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "type_id", - "The operation's type id.", IRTArg::NONE, + "The operation's type id.", IRTArg::TYPEID, getTypeId)); + if (Config.has(PassSubTypeId)) + IRTArgs.push_back( + IRTArg(IIRB.Int32Ty, "sub_type_id", + "The operation's sub type id (for arrays and vectors, or -1).", + IRTArg::TYPEID, getSubTypeId)); if (Config.has(PassSize)) IRTArgs.push_back(IRTArg(IIRB.Int32Ty, "size", "The operation's type size.", IRTArg::NONE, getTypeSize)); diff --git a/llvm/lib/Transforms/IPO/InstrumentorStubPrinter.cpp b/llvm/lib/Transforms/IPO/InstrumentorStubPrinter.cpp index d892ad5de1f2e..a2c5c7660e61f 100644 --- a/llvm/lib/Transforms/IPO/InstrumentorStubPrinter.cpp +++ b/llvm/lib/Transforms/IPO/InstrumentorStubPrinter.cpp @@ -53,6 +53,8 @@ static std::pair getAsCType(Type *Ty, /// type \p Ty. The flags in \p Flags describe the properties of the argument. /// See IRTArg::IRArgFlagTy. static std::string getPrintfFormatString(Type *Ty, unsigned Flags) { + if (Flags & IRTArg::TYPEID) + return "%s"; if (Ty->isIntegerTy()) { if (Ty->getIntegerBitWidth() > 32) { assert(Ty->getIntegerBitWidth() == 64); @@ -90,7 +92,12 @@ std::pair IRTCallDescription::createCBodies() const { if (!First) AddToFormats(", "); First = false; - AddToArgs(", " + IRArg.Name); + + if (!(IRArg.Flags & IRTArg::TYPEID)) { + AddToArgs(", " + IRArg.Name); + } else { + AddToArgs(", getLLVMTypeIDName(" + IRArg.Name + ")"); + } AddToFormats(IRArg.Name + ": "); if (NumReplaceableArgs == 1 && (IRArg.Flags & IRTArg::REPLACABLE)) { DirectReturnValue = IRArg.Name; diff --git a/llvm/test/Instrumentation/Instrumentor/alloca_and_function.ll b/llvm/test/Instrumentation/Instrumentor/alloca_and_function.ll index f98dee743c09d..a2afecda64ebf 100644 --- a/llvm/test/Instrumentation/Instrumentor/alloca_and_function.ll +++ b/llvm/test/Instrumentation/Instrumentor/alloca_and_function.ll @@ -9,14 +9,12 @@ target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16 declare void @use(ptr) -;. ; CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1000, ptr @__instrumentor_ctor, ptr null }] ; CHECK: @__instrumentor_.str = private unnamed_addr constant [8 x i8] c"\00", align 1 ; CHECK: @__instrumentor_.str.1 = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 ; CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1000, ptr @__instrumentor_dtor, ptr null }] ; CHECK: @__instrumentor_.str.2 = private unnamed_addr constant [4 x i8] c"foo\00", align 1 ; CHECK: @__instrumentor_value_pack = internal global <{ i32, i32, [6 x i8], i16, i32, i32, [4 x i8], float }> <{ i32 2, i32 12, [6 x i8] zeroinitializer, i16 0, i32 4, i32 2, [4 x i8] zeroinitializer, float 0.000000e+00 }> -;. define float @foo(i16 %a, float %b) { ; CHECK-LABEL: define float @foo( ; CHECK-SAME: i16 [[A:%.*]], float [[B:%.*]]) { @@ -37,9 +35,9 @@ define float @foo(i16 %a, float %b) { ; CHECK-NEXT: [[TMP9:%.*]] = call ptr @__instrumentor_post_alloca(ptr [[TMP8]], i64 2, i64 16, i32 -3) #[[ATTR1]] ; CHECK-NEXT: [[TMP15:%.*]] = call ptr @__instrumentor_post_base_pointer_info(ptr [[TMP9]], i32 2, i32 -4) #[[ATTR1]] ; CHECK-NEXT: [[TMP10:%.*]] = zext i16 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__instrumentor_pre_store(ptr [[TMP9]], i32 0, ptr [[TMP15]], i64 [[TMP10]], i64 2, i64 2, i32 12, i32 0, i8 1, i8 0, i32 4) #[[ATTR1]] +; CHECK-NEXT: [[TMP11:%.*]] = call ptr @__instrumentor_pre_store(ptr [[TMP9]], i32 0, ptr [[TMP15]], i64 [[TMP10]], i64 2, i64 2, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 4) #[[ATTR1]] ; CHECK-NEXT: store i16 [[TMP4]], ptr [[TMP11]], align 2 -; CHECK-NEXT: call void @__instrumentor_post_store(ptr [[TMP9]], i32 0, ptr [[TMP15]], i64 [[TMP10]], i64 2, i64 2, i32 12, i32 0, i8 1, i8 0, i32 -4) #[[ATTR1]] +; CHECK-NEXT: call void @__instrumentor_post_store(ptr [[TMP9]], i32 0, ptr [[TMP15]], i64 [[TMP10]], i64 2, i64 2, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 -4) #[[ATTR1]] ; CHECK-NEXT: call void @use(ptr [[TMP9]]) ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP0]], ptr @__instrumentor_value_pack, i64 32, i1 false) ; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw <{ i32, i32, [6 x i8], i16, i32, i32, [4 x i8], float }>, ptr [[TMP0]], i32 0, i32 3 @@ -55,7 +53,5 @@ entry: call void @use(ptr %0) ret float %b } -;. ; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } ; CHECK: attributes #[[ATTR1]] = { willreturn } -;. diff --git a/llvm/test/Instrumentation/Instrumentor/cast.ll b/llvm/test/Instrumentation/Instrumentor/cast.ll index a8b1c96ec3878..77d9e0452784d 100644 --- a/llvm/test/Instrumentation/Instrumentor/cast.ll +++ b/llvm/test/Instrumentation/Instrumentor/cast.ll @@ -282,7 +282,7 @@ define i128 @test_ext(i32 %p1) { ; CHECK-NEXT: [[I1:%.*]] = zext i32 [[P1]] to i128 ; CHECK-NEXT: store i64 [[TMP3]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store i128 [[I1]], ptr [[TMP1]], align 4 -; CHECK-NEXT: call void @__instrumentor_post_cast_ind(ptr [[TMP2]], i32 8, i32 12, i32 4, ptr [[TMP1]], i32 16, i32 12, i32 16, i32 40) #[[ATTR0]] +; CHECK-NEXT: call void @__instrumentor_post_cast_ind(ptr [[TMP2]], i32 12, i32 4, ptr [[TMP1]], i32 12, i32 16, i32 40) #[[ATTR0]] ; CHECK-NEXT: [[TMP4:%.*]] = load i128, ptr [[TMP1]], align 4 ; CHECK-NEXT: ret i128 [[TMP4]] ; diff --git a/llvm/test/Instrumentation/Instrumentor/cast_crash.ll b/llvm/test/Instrumentation/Instrumentor/cast_crash.ll index ff4bde8b8f852..f9bf05c9c0cca 100644 --- a/llvm/test/Instrumentation/Instrumentor/cast_crash.ll +++ b/llvm/test/Instrumentation/Instrumentor/cast_crash.ll @@ -14,11 +14,11 @@ define i128 @test_ext(i32 %p1) { ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i32 12 ; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 ; CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP5]] to i64 -; CHECK-NEXT: call void @__instrumentor_pre_cast(i64 [[TMP6]], i32 12, i32 4, i32 12, i32 16, i32 40, i32 3) #[[ATTR1]] +; CHECK-NEXT: call void @__instrumentor_pre_cast(i64 [[TMP6]], i32 12, i32 -1, i32 4, i32 12, i32 -1, i32 16, i32 40, i32 3) #[[ATTR1]] ; CHECK-NEXT: [[I1:%.*]] = zext i32 [[TMP5]] to i128 ; CHECK-NEXT: store i64 [[TMP6]], ptr [[TMP2]], align 4 ; CHECK-NEXT: store i128 [[I1]], ptr [[TMP1]], align 4 -; CHECK-NEXT: call void @__instrumentor_post_cast_ind(ptr [[TMP2]], i32 8, i32 12, i32 4, ptr [[TMP1]], i32 16, i32 12, i32 16, i32 40, i32 -3) #[[ATTR1]] +; CHECK-NEXT: call void @__instrumentor_post_cast_ind(ptr [[TMP2]], i32 12, i32 -1, i32 4, ptr [[TMP1]], i32 12, i32 -1, i32 16, i32 40, i32 -3) #[[ATTR1]] ; CHECK-NEXT: [[TMP7:%.*]] = load i128, ptr [[TMP1]], align 4 ; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP1]], ptr @__instrumentor_value_pack, i64 16, i1 false) ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw <{ i32, i32, [4 x i8], i32 }>, ptr [[TMP1]], i32 0, i32 3 diff --git a/llvm/test/Instrumentation/Instrumentor/default_config.json b/llvm/test/Instrumentation/Instrumentor/default_config.json index 4d9c2834cbc14..6892527282d49 100644 --- a/llvm/test/Instrumentation/Instrumentor/default_config.json +++ b/llvm/test/Instrumentation/Instrumentor/default_config.json @@ -168,6 +168,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the loaded value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The sub type id of the loaded value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the load.", "sync_scope_id": true, @@ -196,6 +198,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the stored value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The type id of the stored value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the store.", "sync_scope_id": true, @@ -213,10 +217,14 @@ "input.description": "Input value of the cast.", "input_type_id": true, "input_type_id.description": "The type id of the input value.", + "input_sub_type_id": true, + "input_sub_type_id.description": "The sub type id of the input value (for arrays and vectors, or -1).", "input_size": true, "input_size.description": "The size of the input value.", "result_type_id": true, "result_type_id.description": "The type id of the result value.", + "result_sub_type_id": true, + "result_sub_type_id.description": "The sub type id of the result value (for arrays and vectors, or -1).", "result_size": true, "result_size.description": "The size of the result value.", "opcode": true, @@ -230,6 +238,8 @@ "filter.description": "Static property filter to exclude instrumentation.", "type_id": true, "type_id.description": "The operation's type id.", + "sub_type_id": true, + "sub_type_id.description": "The operation's sub type id (for arrays and vectors, or -1).", "size": true, "size.description": "The operation's type size.", "opcode": true, @@ -299,6 +309,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the loaded value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The sub type id of the loaded value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the load.", "sync_scope_id": true, @@ -326,6 +338,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the stored value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The type id of the stored value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the store.", "sync_scope_id": true, @@ -343,6 +357,8 @@ "input.description": "Input value of the cast.", "input_type_id": true, "input_type_id.description": "The type id of the input value.", + "input_sub_type_id": true, + "input_sub_type_id.description": "The sub type id of the input value (for arrays and vectors, or -1).", "input_size": true, "input_size.description": "The size of the input value.", "result": true, @@ -350,6 +366,8 @@ "result.description": "Result of the cast.", "result_type_id": true, "result_type_id.description": "The type id of the result value.", + "result_sub_type_id": true, + "result_sub_type_id.description": "The sub type id of the result value (for arrays and vectors, or -1).", "result_size": true, "result_size.description": "The size of the result value.", "opcode": true, @@ -363,6 +381,8 @@ "filter.description": "Static property filter to exclude instrumentation.", "type_id": true, "type_id.description": "The operation's type id.", + "sub_type_id": true, + "sub_type_id.description": "The operation's sub type id (for arrays and vectors, or -1).", "size": true, "size.description": "The operation's type size.", "opcode": true, diff --git a/llvm/test/Instrumentation/Instrumentor/default_rt.c b/llvm/test/Instrumentation/Instrumentor/default_rt.c index 9ecb188605e1e..bce5fa9c63a1e 100644 --- a/llvm/test/Instrumentation/Instrumentor/default_rt.c +++ b/llvm/test/Instrumentation/Instrumentor/default_rt.c @@ -82,27 +82,27 @@ void __instrumentor_pre_unreachable(int32_t id) { printf("unreachable pre -- id: %" PRId32 "\n", id); } -void *__instrumentor_pre_load(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("load pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void *__instrumentor_pre_load(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("load pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); return pointer; } -void *__instrumentor_pre_store(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("store pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void *__instrumentor_pre_store(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("store pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); return pointer; } -void *__instrumentor_pre_store_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("store pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void *__instrumentor_pre_store_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("store pre -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); return pointer; } -void __instrumentor_pre_cast(int64_t input, int32_t input_type_id, int32_t input_size, int32_t result_type_id, int32_t result_size, int32_t opcode, int32_t id) { - printf("cast pre -- input: %" PRId64 ", input_type_id: %" PRId32 ", input_size: %" PRId32 ", result_type_id: %" PRId32 ", result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input, input_type_id, input_size, result_type_id, result_size, opcode, id); +void __instrumentor_pre_cast(int64_t input, int32_t input_type_id, int32_t input_sub_type_id, int32_t input_size, int32_t result_type_id, int32_t result_sub_type_id, int32_t result_size, int32_t opcode, int32_t id) { + printf("cast pre -- input: %" PRId64 ", input_type_id: %s, input_sub_type_id: %s, input_size: %" PRId32 ", result_type_id: %s, result_sub_type_id: %s, result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input, getLLVMTypeIDName(input_type_id), getLLVMTypeIDName(input_sub_type_id), input_size, getLLVMTypeIDName(result_type_id), getLLVMTypeIDName(result_sub_type_id), result_size, opcode, id); } -void __instrumentor_pre_cast_ind(int64_t *input_ptr, int32_t input_size, int32_t input_type_id, int32_t input_size, int32_t result_type_id, int32_t result_size, int32_t opcode, int32_t id) { - printf("cast pre -- input: %p, input_size: %" PRId32 ", input_type_id: %" PRId32 ", input_size: %" PRId32 ", result_type_id: %" PRId32 ", result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input_ptr, input_size, input_type_id, input_size, result_type_id, result_size, opcode, id); +void __instrumentor_pre_cast_ind(int64_t *input_ptr, int32_t input_type_id, int32_t input_sub_type_id, int32_t input_size, int32_t result_type_id, int32_t result_sub_type_id, int32_t result_size, int32_t opcode, int32_t id) { + printf("cast pre -- input: %p, input_type_id: %s, input_sub_type_id: %s, input_size: %" PRId32 ", result_type_id: %s, result_sub_type_id: %s, result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input_ptr, getLLVMTypeIDName(input_type_id), getLLVMTypeIDName(input_sub_type_id), input_size, getLLVMTypeIDName(result_type_id), getLLVMTypeIDName(result_sub_type_id), result_size, opcode, id); } void *__instrumentor_post_alloca(void *address, int64_t size, int64_t alignment, int32_t id) { @@ -110,30 +110,30 @@ void *__instrumentor_post_alloca(void *address, int64_t size, int64_t alignment, return address; } -int64_t __instrumentor_post_load(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("load post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +int64_t __instrumentor_post_load(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("load post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); return value; } -void __instrumentor_post_load_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("load post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void __instrumentor_post_load_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("load post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); } -void __instrumentor_post_store(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("store post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void __instrumentor_post_store(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t value, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("store post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %" PRId64 ", value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); } -void __instrumentor_post_store_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { - printf("store post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %" PRId32 ", atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, value_type_id, atomicity_ordering, sync_scope_id, is_volatile, id); +void __instrumentor_post_store_ind(void *pointer, int32_t pointer_as, void *base_pointer_info, int64_t *value_ptr, int64_t value_size, int64_t alignment, int32_t value_type_id, int32_t value_sub_type_id, int32_t atomicity_ordering, int8_t sync_scope_id, int8_t is_volatile, int32_t id) { + printf("store post -- pointer: %p, pointer_as: %" PRId32 ", base_pointer_info: %p, value: %p, value_size: %" PRId64 ", alignment: %" PRId64 ", value_type_id: %s, value_sub_type_id: %s, atomicity_ordering: %" PRId32 ", sync_scope_id: %" PRId32 ", is_volatile: %" PRId32 ", id: %" PRId32 "\n", pointer, pointer_as, base_pointer_info, value_ptr, value_size, alignment, getLLVMTypeIDName(value_type_id), getLLVMTypeIDName(value_sub_type_id), atomicity_ordering, sync_scope_id, is_volatile, id); } -int64_t __instrumentor_post_cast(int64_t input, int32_t input_type_id, int32_t input_size, int64_t result, int32_t result_type_id, int32_t result_size, int32_t opcode, int32_t id) { - printf("cast post -- input: %" PRId64 ", input_type_id: %" PRId32 ", input_size: %" PRId32 ", result: %" PRId64 ", result_type_id: %" PRId32 ", result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input, input_type_id, input_size, result, result_type_id, result_size, opcode, id); +int64_t __instrumentor_post_cast(int64_t input, int32_t input_type_id, int32_t input_sub_type_id, int32_t input_size, int64_t result, int32_t result_type_id, int32_t result_sub_type_id, int32_t result_size, int32_t opcode, int32_t id) { + printf("cast post -- input: %" PRId64 ", input_type_id: %s, input_sub_type_id: %s, input_size: %" PRId32 ", result: %" PRId64 ", result_type_id: %s, result_sub_type_id: %s, result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input, getLLVMTypeIDName(input_type_id), getLLVMTypeIDName(input_sub_type_id), input_size, result, getLLVMTypeIDName(result_type_id), getLLVMTypeIDName(result_sub_type_id), result_size, opcode, id); return result; } -void __instrumentor_post_cast_ind(int64_t *input_ptr, int32_t input_size, int32_t input_type_id, int32_t input_size, int64_t *result_ptr, int32_t result_size, int32_t result_type_id, int32_t result_size, int32_t opcode, int32_t id) { - printf("cast post -- input: %p, input_size: %" PRId32 ", input_type_id: %" PRId32 ", input_size: %" PRId32 ", result: %p, result_size: %" PRId32 ", result_type_id: %" PRId32 ", result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input_ptr, input_size, input_type_id, input_size, result_ptr, result_size, result_type_id, result_size, opcode, id); +void __instrumentor_post_cast_ind(int64_t *input_ptr, int32_t input_type_id, int32_t input_sub_type_id, int32_t input_size, int64_t *result_ptr, int32_t result_type_id, int32_t result_sub_type_id, int32_t result_size, int32_t opcode, int32_t id) { + printf("cast post -- input: %p, input_type_id: %s, input_sub_type_id: %s, input_size: %" PRId32 ", result: %p, result_type_id: %s, result_sub_type_id: %s, result_size: %" PRId32 ", opcode: %" PRId32 ", id: %" PRId32 "\n", input_ptr, getLLVMTypeIDName(input_type_id), getLLVMTypeIDName(input_sub_type_id), input_size, result_ptr, getLLVMTypeIDName(result_type_id), getLLVMTypeIDName(result_sub_type_id), result_size, opcode, id); } void *__instrumentor_post_base_pointer_info(void *base_pointer, int32_t base_pointer_kind, int32_t id) { diff --git a/llvm/test/Instrumentation/Instrumentor/default_rt.h b/llvm/test/Instrumentation/Instrumentor/default_rt.h index e09686f8ce4cd..285e381753b3d 100644 --- a/llvm/test/Instrumentation/Instrumentor/default_rt.h +++ b/llvm/test/Instrumentation/Instrumentor/default_rt.h @@ -151,8 +151,10 @@ enum LLVMTypeID { }; /// Get the string name of an LLVM Type ID. -static inline const char *getLLVMTypeIDName(uint32_t type_id) { +static inline const char *getLLVMTypeIDName(int32_t type_id) { switch (type_id) { + case -1: + return "none"; case HalfTyID: return "half"; case BFloatTyID: diff --git a/llvm/test/Instrumentation/Instrumentor/generate_rt.ll b/llvm/test/Instrumentation/Instrumentor/generate_rt.ll index 533561b73ce39..1e72becdcd34c 100644 --- a/llvm/test/Instrumentation/Instrumentor/generate_rt.ll +++ b/llvm/test/Instrumentation/Instrumentor/generate_rt.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 ; RUN: rm -rf %t && mkdir -p %t && cd %t ; RUN: opt < %s -passes=instrumentor -instrumentor-read-config-files=%S/rt_config.json -S ; RUN: diff -b default_rt.c %S/default_rt.c diff --git a/llvm/test/Instrumentation/Instrumentor/module_and_globals.ll b/llvm/test/Instrumentation/Instrumentor/module_and_globals.ll index c76eb47030648..64cb0737d0863 100644 --- a/llvm/test/Instrumentation/Instrumentor/module_and_globals.ll +++ b/llvm/test/Instrumentation/Instrumentor/module_and_globals.ll @@ -39,34 +39,34 @@ entry: ; CHECK-NEXT: [[TMP1:%.*]] = call ptr @__instrumentor_post_base_pointer_info(ptr [[Y_SHADOW_LOAD]], i32 2, i32 -10) #[[ATTR0]] ; CHECK-NEXT: [[X_SHADOW_LOAD:%.*]] = load ptr, ptr @__instrumentor_shadow.X, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = call ptr @__instrumentor_post_base_pointer_info(ptr [[X_SHADOW_LOAD]], i32 2, i32 -9) #[[ATTR0]] -; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__instrumentor_pre_load(ptr [[X_SHADOW_LOAD]], i32 0, ptr [[TMP2]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 9) #[[ATTR0]] +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @__instrumentor_pre_load(ptr [[X_SHADOW_LOAD]], i32 0, ptr [[TMP2]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 9) #[[ATTR0]] ; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4 ; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i64 -; CHECK-NEXT: [[TMP13:%.*]] = call i64 @__instrumentor_post_load(ptr [[X_SHADOW_LOAD]], i32 0, ptr [[TMP2]], i64 [[TMP12]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 -9) #[[ATTR0]] +; CHECK-NEXT: [[TMP13:%.*]] = call i64 @__instrumentor_post_load(ptr [[X_SHADOW_LOAD]], i32 0, ptr [[TMP2]], i64 [[TMP12]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 -9) #[[ATTR0]] ; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP13]] to i32 -; CHECK-NEXT: [[TMP8:%.*]] = call ptr @__instrumentor_pre_load(ptr [[Y_SHADOW_LOAD]], i32 0, ptr [[TMP1]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 10) #[[ATTR0]] +; CHECK-NEXT: [[TMP8:%.*]] = call ptr @__instrumentor_pre_load(ptr [[Y_SHADOW_LOAD]], i32 0, ptr [[TMP1]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 10) #[[ATTR0]] ; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[TMP8]], align 4 ; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP4]] to i64 -; CHECK-NEXT: [[TMP21:%.*]] = call i64 @__instrumentor_post_load(ptr [[Y_SHADOW_LOAD]], i32 0, ptr [[TMP1]], i64 [[TMP15]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 -10) #[[ATTR0]] +; CHECK-NEXT: [[TMP21:%.*]] = call i64 @__instrumentor_post_load(ptr [[Y_SHADOW_LOAD]], i32 0, ptr [[TMP1]], i64 [[TMP15]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 -10) #[[ATTR0]] ; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[TMP21]] to i32 -; CHECK-NEXT: [[TMP26:%.*]] = call ptr @__instrumentor_pre_load(ptr [[Z_SHADOW_LOAD]], i32 0, ptr [[TMP0]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 11) #[[ATTR0]] +; CHECK-NEXT: [[TMP26:%.*]] = call ptr @__instrumentor_pre_load(ptr [[Z_SHADOW_LOAD]], i32 0, ptr [[TMP0]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 11) #[[ATTR0]] ; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[TMP26]], align 4 ; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64 -; CHECK-NEXT: [[TMP27:%.*]] = call i64 @__instrumentor_post_load(ptr [[Z_SHADOW_LOAD]], i32 0, ptr [[TMP0]], i64 [[TMP16]], i64 4, i64 4, i32 12, i32 0, i8 1, i8 0, i32 -11) #[[ATTR0]] +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @__instrumentor_post_load(ptr [[Z_SHADOW_LOAD]], i32 0, ptr [[TMP0]], i64 [[TMP16]], i64 4, i64 4, i32 12, i32 -1, i32 0, i8 1, i8 0, i32 -11) #[[ATTR0]] ; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[TMP27]] to i32 ; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP14]] to i64 ; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP25]] to i64 -; CHECK-NEXT: call void @__instrumentor_pre_numeric(i32 12, i32 4, i32 14, i64 [[TMP29]], i64 [[TMP30]], i64 1, i32 12) #[[ATTR0]] +; CHECK-NEXT: call void @__instrumentor_pre_numeric(i32 12, i32 -1, i32 4, i32 14, i64 [[TMP29]], i64 [[TMP30]], i64 1, i32 12) #[[ATTR0]] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP25]] ; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[ADD]] to i64 -; CHECK-NEXT: [[TMP18:%.*]] = call i64 @__instrumentor_post_numeric(i32 12, i32 4, i32 14, i64 [[TMP29]], i64 [[TMP30]], i64 [[TMP17]], i64 1, i32 -12) #[[ATTR0]] +; CHECK-NEXT: [[TMP18:%.*]] = call i64 @__instrumentor_post_numeric(i32 12, i32 -1, i32 4, i32 14, i64 [[TMP29]], i64 [[TMP30]], i64 [[TMP17]], i64 1, i32 -12) #[[ATTR0]] ; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i32 ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 ; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP28]] to i64 -; CHECK-NEXT: call void @__instrumentor_pre_numeric(i32 12, i32 4, i32 14, i64 [[TMP20]], i64 [[TMP24]], i64 1, i32 13) #[[ATTR0]] +; CHECK-NEXT: call void @__instrumentor_pre_numeric(i32 12, i32 -1, i32 4, i32 14, i64 [[TMP20]], i64 [[TMP24]], i64 1, i32 13) #[[ATTR0]] ; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP19]], [[TMP28]] ; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[ADD3]] to i64 -; CHECK-NEXT: [[TMP23:%.*]] = call i64 @__instrumentor_post_numeric(i32 12, i32 4, i32 14, i64 [[TMP20]], i64 [[TMP24]], i64 [[TMP22]], i64 1, i32 -13) #[[ATTR0]] +; CHECK-NEXT: [[TMP23:%.*]] = call i64 @__instrumentor_post_numeric(i32 12, i32 -1, i32 4, i32 14, i64 [[TMP20]], i64 [[TMP24]], i64 [[TMP22]], i64 1, i32 -13) #[[ATTR0]] ; CHECK-NEXT: [[ADD2:%.*]] = trunc i64 [[TMP23]] to i32 ; CHECK-NEXT: call void @__instrumentor_post_function(ptr @foo, ptr @__instrumentor_.str.5, i32 0, ptr null, i8 0, i32 -15) #[[ATTR0]] ; CHECK-NEXT: ret i32 [[ADD2]] diff --git a/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid.ll b/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid.ll new file mode 100644 index 0000000000000..4f6b40f19c995 --- /dev/null +++ b/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; Test that the sub_type_id parameter is correctly passed to instrumentation for vector types +; RUN: opt < %s -passes=instrumentor -instrumentor-read-config-files=%S/numeric_subtypeid_config.json -S | FileCheck %s + +define <4 x float> @test_vector_fadd(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: define <4 x float> @test_vector_fadd( +; CHECK-SAME: <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = alloca <4 x float>, align 16 +; CHECK-NEXT: [[TMP1:%.*]] = alloca <4 x float>, align 16 +; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x float>, align 16 +; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[A]], [[B]] +; CHECK-NEXT: store <4 x float> [[A]], ptr [[TMP2]], align 16 +; CHECK-NEXT: store <4 x float> [[B]], ptr [[TMP1]], align 16 +; CHECK-NEXT: store <4 x float> [[RES]], ptr [[TMP0]], align 16 +; CHECK-NEXT: call void @__instrumentor_post_numeric_ind(i32 18, i32 2, i32 16, i32 15, ptr [[TMP2]], ptr [[TMP1]], ptr [[TMP0]], i64 0, i32 -1) #[[ATTR0:[0-9]+]] +; CHECK-NEXT: ret <4 x float> [[RES]] +; +entry: + %res = fadd <4 x float> %a, %b + ret <4 x float> %res +} + +define double @test_scalar_fadd(double %a, double %b) { +; CHECK-LABEL: define double @test_scalar_fadd( +; CHECK-SAME: double [[A:%.*]], double [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[RES:%.*]] = fadd double [[A]], [[B]] +; CHECK-NEXT: [[TMP0:%.*]] = bitcast double [[A]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast double [[B]] to i64 +; CHECK-NEXT: [[TMP2:%.*]] = bitcast double [[RES]] to i64 +; CHECK-NEXT: call void @__instrumentor_post_numeric(i32 3, i32 -1, i32 8, i32 15, i64 [[TMP0]], i64 [[TMP1]], i64 [[TMP2]], i64 0, i32 -2) #[[ATTR0]] +; CHECK-NEXT: ret double [[RES]] +; +entry: + %res = fadd double %a, %b + ret double %res +} + diff --git a/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid_config.json b/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid_config.json new file mode 100644 index 0000000000000..8f3ddc0e8cdd8 --- /dev/null +++ b/llvm/test/Instrumentation/Instrumentor/numeric_subtypeid_config.json @@ -0,0 +1,19 @@ +{ + "configuration": { + "runtime_prefix": "__instrumentor_" + }, + "instruction_post": { + "numeric": { + "enabled": true, + "type_id": true, + "sub_type_id": true, + "size": true, + "opcode": true, + "left": true, + "right": true, + "result": true, + "flags": true, + "id": true + } + } +} diff --git a/llvm/test/Instrumentation/Instrumentor/rt_config.json b/llvm/test/Instrumentation/Instrumentor/rt_config.json index bcd6708cb5e15..42d0f0b047309 100644 --- a/llvm/test/Instrumentation/Instrumentor/rt_config.json +++ b/llvm/test/Instrumentation/Instrumentor/rt_config.json @@ -168,6 +168,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the loaded value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The sub type id of the loaded value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the load.", "sync_scope_id": true, @@ -196,6 +198,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the stored value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The type id of the stored value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the store.", "sync_scope_id": true, @@ -213,10 +217,14 @@ "input.description": "Input value of the cast.", "input_type_id": true, "input_type_id.description": "The type id of the input value.", + "input_sub_type_id": true, + "input_sub_type_id.description": "The sub type id of the input value (for arrays and vectors, or -1).", "input_size": true, "input_size.description": "The size of the input value.", "result_type_id": true, "result_type_id.description": "The type id of the result value.", + "result_sub_type_id": true, + "result_sub_type_id.description": "The sub type id of the result value (for arrays and vectors, or -1).", "result_size": true, "result_size.description": "The size of the result value.", "opcode": true, @@ -259,6 +267,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the loaded value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The sub type id of the loaded value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the load.", "sync_scope_id": true, @@ -286,6 +296,8 @@ "alignment.description": "The known access alignment.", "value_type_id": true, "value_type_id.description": "The type id of the stored value.", + "value_sub_type_id": true, + "value_sub_type_id.description": "The type id of the stored value (for arrays and vectors, or -1).", "atomicity_ordering": true, "atomicity_ordering.description": "The atomicity ordering of the store.", "sync_scope_id": true, @@ -303,6 +315,8 @@ "input.description": "Input value of the cast.", "input_type_id": true, "input_type_id.description": "The type id of the input value.", + "input_sub_type_id": true, + "input_sub_type_id.description": "The sub type id of the input value (for arrays and vectors, or -1).", "input_size": true, "input_size.description": "The size of the input value.", "result": true, @@ -310,6 +324,8 @@ "result.description": "Result of the cast.", "result_type_id": true, "result_type_id.description": "The type id of the result value.", + "result_sub_type_id": true, + "result_sub_type_id.description": "The sub type id of the result value (for arrays and vectors, or -1).", "result_size": true, "result_size.description": "The size of the result value.", "opcode": true, From 120bdcfb95cbcd457bef7f70278c0f44080b383a Mon Sep 17 00:00:00 2001 From: Utkarsh Saxena Date: Thu, 25 Jun 2026 00:23:26 +0200 Subject: [PATCH 424/511] Revert "[LifetimeSafety] Fix liveness propagation for all origin flows (#205323)" (#205687) Revert "[LifetimeSafety] Fix liveness propagation for all origin flows (#205323)" This reverts commit 8d2a578b2130742c8790f3dba5fb414962eafcd5. Revert "[LifetimeSafety] Model GNU statement expressions (#204841)" This reverts commit 361f3b24f2a8703eb7a32c1ae081f490888238f3. --- .../Analyses/LifetimeSafety/FactsGenerator.h | 1 - .../LifetimeSafety/FactsGenerator.cpp | 21 ------ .../Analysis/LifetimeSafety/LiveOrigins.cpp | 17 +---- .../Sema/LifetimeSafety/invalidations.cpp | 29 ++------ clang/test/Sema/LifetimeSafety/safety.cpp | 70 ------------------- 5 files changed, 9 insertions(+), 129 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 8dc5213dd8de2..5ac67263681ac 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -57,7 +57,6 @@ class FactsGenerator : public ConstStmtVisitor { void VisitArraySubscriptExpr(const ArraySubscriptExpr *ASE); void VisitCXXNewExpr(const CXXNewExpr *NE); void VisitCXXDeleteExpr(const CXXDeleteExpr *DE); - void VisitStmtExpr(const StmtExpr *SE); private: OriginList *getOriginsList(const ValueDecl &D); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index a2341ebc8f2ed..50bf79d4c1a38 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -801,21 +801,6 @@ void FactsGenerator::VisitCXXDeleteExpr(const CXXDeleteExpr *DE) { FactMgr.createFact(List->getOuterOriginID(), DE)); } -void FactsGenerator::VisitStmtExpr(const StmtExpr *SE) { - // A statement expression (`({ ...; e; })`) yields the value of its final - // expression `e`. Flow `e`'s origins into the statement expression's origin - // so a borrow `e` carries reaches the value's users. - const auto *CS = SE->getSubStmt(); - if (!CS || CS->body_empty()) - return; - const auto *Last = dyn_cast(CS->body_back()); - if (!Last) - return; - if (OriginList *Dst = getOriginsList(*SE)) - if (OriginList *Src = getRValueOrigins(Last, getOriginsList(*Last))) - flow(Dst, Src, /*Kill=*/true); -} - bool FactsGenerator::escapesViaReturn(OriginID OID) const { return llvm::any_of(EscapesInCurrentBlock, [OID](const Fact *F) { if (const auto *EF = F->getAs()) @@ -923,12 +908,6 @@ void FactsGenerator::handleMovedArgsInCall(const FunctionDecl *FD, const ParmVarDecl *PVD = FD->getParamDecl(I - IsInstance); if (!PVD->getType()->isRValueReferenceType()) continue; - // Skip lifetime annotated r-value reference parameters. Lifetime annotation - // indicates that the parameter is borrowed (not consumed), so it should not - // be marked as moved even though it's an r-value reference. - if (PVD->hasAttr() || - PVD->hasAttr()) - continue; const Expr *Arg = Args[I]; OriginList *MovedOrigins = getOriginsList(*Arg); assert(MovedOrigins->getLength() >= 1 && diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp index 69b903c813555..cfbcacf04b1b0 100644 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -161,20 +161,9 @@ class AnalysisImpl /// An OriginFlow kills the liveness of the destination origin if `KillDest` /// is true. Otherwise, it propagates liveness from destination to source. Lattice transfer(Lattice In, const OriginFlowFact &OF) { - Lattice Out = In; - OriginID Dest = OF.getDestOriginID(); - OriginID Src = OF.getSrcOriginID(); - // If the destination of the flow is live, the source of the flow must also - // be marked live before this point as its value will flow into the - // destination. - if (In.LiveOrigins.contains(Dest)) { - const LivenessInfo *DestInfo = In.LiveOrigins.lookup(Dest); - assert(DestInfo); - Out = Lattice(Factory.add(Out.LiveOrigins, Src, *DestInfo)); - } - if (OF.getKillDest()) - Out = Lattice(Factory.remove(Out.LiveOrigins, Dest)); - return Out; + if (!OF.getKillDest()) + return In; + return Lattice(Factory.remove(In.LiveOrigins, OF.getDestOriginID())); } Lattice transfer(Lattice In, const KillOriginFact &F) { diff --git a/clang/test/Sema/LifetimeSafety/invalidations.cpp b/clang/test/Sema/LifetimeSafety/invalidations.cpp index be1acc6bc7fbc..c2ac105855d07 100644 --- a/clang/test/Sema/LifetimeSafety/invalidations.cpp +++ b/clang/test/Sema/LifetimeSafety/invalidations.cpp @@ -402,20 +402,10 @@ void SelfInvalidatingMap() { // Therefore the following is safe in practice. // On the other hand, std::flat_map (since C++23) does not provide pointer stability on // insertion and following is unsafe for this container. - // FIXME: The warnings below are false positives (self-invalidation of the Owner). - // Modifying a container should not invalidate the container object itself. - // To resolve this, we need to: - // 1. Distinguish owner-borrow (borrowing the container object) from content-borrow (borrowing elements inside the container). - // 2. Make AccessPaths more precise to reason at element/field granularity rather than treating the whole container as a single storage location. - mp[1] = "42"; // expected-warning {{local variable 'mp' is later invalidated}} \ - // expected-note {{local variable 'mp' is invalidated here}} \ - // expected-note {{later used here}} - mp[2] = mp[1]; // expected-warning {{local variable 'mp' is later invalidated}} \ - // expected-warning {{local variable 'mp' is later invalidated}} \ - // expected-note {{local variable 'mp' is invalidated here}} \ - // expected-note {{later used here}} \ - // expected-note {{local variable 'mp' is invalidated here}} \ - // expected-note {{later used here}} + mp[1] = "42"; + mp[2] // expected-note {{local variable 'mp' is invalidated here}} + = + mp[1]; // expected-warning {{local variable 'mp' is later invalidated}} expected-note {{later used here}} } void InvalidateErase() { @@ -750,16 +740,9 @@ void MapSubscriptMultipleCallsDoesNotInvalidate(std::map mp, int a, in } void FlatMapSubscriptMultipleCallsInvalidate(std::flat_map mp, int a, int b) { - // FIXME: The duplicate warning below is a false positive caused by self-invalidation of the Owner 'mp'. - // While the warning on the temporary reference returned by mp[a] is a true positive (it dangles), - // the second warning on 'mp' itself is redundant and incorrect. - // Resolving this requires distinguishing owner-borrow from content-borrow. PrintMax(mp[a], mp[b]); // expected-warning {{parameter 'mp' is later invalidated}} \ - // expected-warning {{parameter 'mp' is later invalidated}} \ - // expected-note {{parameter 'mp' is invalidated here}} \ - // expected-note {{later used here}} \ - // expected-note {{parameter 'mp' is invalidated here}} \ - // expected-note {{later used here}} + // expected-note {{parameter 'mp' is invalidated here}} \ + // expected-note {{later used here}} } } // namespace AssociativeContainers diff --git a/clang/test/Sema/LifetimeSafety/safety.cpp b/clang/test/Sema/LifetimeSafety/safety.cpp index bbb8fbe6fc6a9..b59fac191dcfb 100644 --- a/clang/test/Sema/LifetimeSafety/safety.cpp +++ b/clang/test/Sema/LifetimeSafety/safety.cpp @@ -3955,73 +3955,3 @@ struct [[gsl::Pointer()]] PtrWithInt { int x; }; PtrWithInt f() { return PtrWithInt{10}; } - -// A GNU statement expression (`({ ...; e; })`) yields the value of its final -// expression `e`. `e`'s origins flow into the statement expression's value, so -// a borrow `e` carries is tracked: a borrow of a body-local dangles, and a -// borrow forwarded from an outer object propagates to the value's users. -namespace statement_expression { -void use(int *p); - -// A borrow of a statement-expression-local escaping via the value. -void borrow_of_local() { - int *p = ({ int x = 7; &x; }); // expected-warning {{local variable 'x' does not live long enough}} expected-note {{local variable 'x' is destroyed here}} - use(p); // expected-note {{later used here}} -} - -// An outer borrow forwarded through a statement expression and returned: -// use-after-return. -int *return_borrow_of_local() { - int local = 0; - return ({ (void)0; &local; }); // expected-warning {{stack memory associated with local variable 'local' is returned}} expected-note {{returned here}} -} - -// A view bound to a temporary produced by the statement expression dangles. -void borrow_temporary() { - std::string_view view = ({ std::string x = "long enough heap string!!!!!!"; x; }); // expected-warning {{temporary object does not live long enough}} expected-note {{temporary object is destroyed here}} - (void)view; // expected-note {{later used here}} -} - -// Forwarding an outer borrow that dangles. -void forward_outer_borrow() { - int *p; - { - int local = 0; - p = ({ (void)0; &local; }); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{local variable 'local' is destroyed here}} - use(p); // expected-note {{later used here}} -} - -// The statement-expression result carries the borrow, so a `?:` sibling -// supplying a valid loan no longer hides it via the merge. -void masked(bool c) { - static int valid; - int *keep = &valid; - int *r; - { - int local = 0; - r = c ? keep : ({ &local; }); // expected-warning {{local variable 'local' does not live long enough}} - } // expected-note {{local variable 'local' is destroyed here}} - use(r); // expected-note {{later used here}} -} - -// Both conditional arms are statement expressions returning a borrow of a -// body-local; each is caught as a returned stack address. -int *conditional_arms(bool c) { - return c ? ({ int x = 7; &x; }) // expected-warning {{stack memory associated with local variable 'x' is returned}} expected-note 2 {{returned here}} - : ({ int y = 7; &y; }); // expected-warning {{stack memory associated with local variable 'y' is returned}} -} - -// Negative: a statement expression yielding a long-lived borrow stays silent. -void ok() { - static int s; - int *p = ({ int unused = 0; (void)unused; &s; }); - use(p); // no-warning -} - -// A discarded statement expression's value is not consumed, so a borrow of a -// body-local in it does not reach any user and is correctly not flagged. -void discarded_body_local() { - (void)({ int x = 7; &x; }); // no-warning -} -} // namespace statement_expression From 1bb2aa4392b8e3ef9cbab3cb2bda55b5d995fc01 Mon Sep 17 00:00:00 2001 From: Alexey Gerenkov Date: Thu, 25 Jun 2026 01:42:57 +0300 Subject: [PATCH 425/511] [Xtensa] Fix trap/debugtrap operations lowering. (#200872) Fix debug operation lowering for Xtensa. Co-authored-by: Andrei Safronov --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 2 + llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 24 +++- llvm/lib/Target/Xtensa/XtensaSubtarget.h | 1 + llvm/test/CodeGen/Xtensa/trap.ll | 136 ++++++++++++++++++ 4 files changed, 161 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/trap.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index b7c3adb51bc1d..062911cd553b6 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -110,6 +110,8 @@ XtensaTargetLowering::XtensaTargetLowering(const TargetMachine &TM, // indirect jump. setOperationAction(ISD::BR_JT, MVT::Other, Custom); + setOperationAction({ISD::TRAP, ISD::DEBUGTRAP}, MVT::Other, Legal); + setOperationAction(ISD::BR_CC, MVT::i32, Legal); setOperationAction(ISD::BR_CC, MVT::i64, Expand); diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 90e20b7cd6334..b55a6ea8c932c 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -508,6 +508,7 @@ def ILL : CALLX_Inst<0x00, 0x00, 0x00, (outs), (ins), let n = 0x0; let r = 0; let s = 0; + let isTrap = 1; } //===----------------------------------------------------------------------===// @@ -654,6 +655,7 @@ def ILL_N : RRRN_Inst<0x0D, (outs), (ins), let r = 0xF; let s = 0x0; let t = 0x6; + let isTrap = 1; } def MOV_N : RRRN_Inst<0x0D, (outs AR:$t), (ins AR:$s), @@ -1401,7 +1403,7 @@ let Predicates = [HasRegionProtection] in { // Debug instructions //===----------------------------------------------------------------------===// -let isBarrier = 1, isTerminator = 1 in { +let isBarrier = 1, isTerminator = 1, isTrap = 1 in { def BREAK : RRR_Inst<0x00, 0x00, 0x00, (outs), (ins uimm4:$s, uimm4:$t), "break\t$s, $t", []>, Requires<[HasDebug]> { let r = 0x04; @@ -1419,7 +1421,25 @@ let isBarrier = 1, isTerminator = 1 in { def : InstAlias<"_break.n\t$imm", (BREAK_N uimm4:$imm)>; -def : Pat<(trap), (BREAK (i32 1), (i32 15))>; +def : Pat<(trap), (ILL)>; +def : Pat<(debugtrap), (ILL)>; + +let AddedComplexity = 15 in { + def : Pat<(trap), (ILL_N)>, + Requires<[HasDensity]>; + def : Pat<(debugtrap), (ILL_N)>, + Requires<[HasDensity]>; +} + +let AddedComplexity = 20 in { + def : Pat<(debugtrap), (BREAK (i32 1), (i32 15))>, + Requires<[HasDebug]>; +} + +let AddedComplexity = 25 in { + def : Pat<(debugtrap), (BREAK_N (i32 1))>, + Requires<[HasDensity, HasDebug]>; +} // Load instruction def LDDR32P : RRR_Inst<0x00, 0x00, 0x00, (outs AR:$s), (ins), diff --git a/llvm/lib/Target/Xtensa/XtensaSubtarget.h b/llvm/lib/Target/Xtensa/XtensaSubtarget.h index cbe5d9a720501..007cc78047e69 100644 --- a/llvm/lib/Target/Xtensa/XtensaSubtarget.h +++ b/llvm/lib/Target/Xtensa/XtensaSubtarget.h @@ -85,6 +85,7 @@ class XtensaSubtarget : public XtensaGenSubtargetInfo { bool hasMiscSR() const { return HasMiscSR; } bool hasExtendedL32R() const { return HasExtendedL32R; } bool hasDataCache() const { return HasDataCache; } + bool hasDebug() const { return HasDebug; } bool hasHighPriInterrupts() const { return HasHighPriInterrupts; } bool hasHighPriInterruptsLevel3() const { return HasHighPriInterruptsLevel3; } bool hasHighPriInterruptsLevel4() const { return HasHighPriInterruptsLevel4; } diff --git a/llvm/test/CodeGen/Xtensa/trap.ll b/llvm/test/CodeGen/Xtensa/trap.ll new file mode 100644 index 0000000000000..d895e4049e15d --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/trap.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc --mtriple=xtensa < %s | FileCheck %s --check-prefix=XTENSA +; RUN: llc --mtriple=xtensa -mattr=+debug < %s | FileCheck %s --check-prefix=XTENSA-DEBUG +; RUN: llc --mtriple=xtensa -mattr=+debug,+density < %s | FileCheck %s --check-prefix=XTENSA-DEBUG-DENSITY +; RUN: llc --mtriple=xtensa -mattr=+density < %s | FileCheck %s --check-prefix=XTENSA-DENSITY + +define void @t() noinline optnone { +; XTENSA-LABEL: t: +; XTENSA: .cfi_startproc +; XTENSA-NEXT: # %bb.0: # %entry +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-NEXT: movi a8, 0 +; XTENSA-NEXT: memw +; XTENSA-NEXT: s32i a8, a1, 0 +; XTENSA-NEXT: ill +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret +; +; XTENSA-DEBUG-LABEL: t: +; XTENSA-DEBUG: .cfi_startproc +; XTENSA-DEBUG-NEXT: # %bb.0: # %entry +; XTENSA-DEBUG-NEXT: addi a8, a1, -16 +; XTENSA-DEBUG-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DEBUG-NEXT: movi a8, 0 +; XTENSA-DEBUG-NEXT: memw +; XTENSA-DEBUG-NEXT: s32i a8, a1, 0 +; XTENSA-DEBUG-NEXT: ill +; XTENSA-DEBUG-NEXT: addi a8, a1, 16 +; XTENSA-DEBUG-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-NEXT: ret +; +; XTENSA-DEBUG-DENSITY-LABEL: t: +; XTENSA-DEBUG-DENSITY: .cfi_startproc +; XTENSA-DEBUG-DENSITY-NEXT: # %bb.0: # %entry +; XTENSA-DEBUG-DENSITY-NEXT: addi a8, a1, -16 +; XTENSA-DEBUG-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-DENSITY-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DEBUG-DENSITY-NEXT: movi a8, 0 +; XTENSA-DEBUG-DENSITY-NEXT: memw +; XTENSA-DEBUG-DENSITY-NEXT: s32i a8, a1, 0 +; XTENSA-DEBUG-DENSITY-NEXT: ill.n +; XTENSA-DEBUG-DENSITY-NEXT: addi a8, a1, 16 +; XTENSA-DEBUG-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-DENSITY-NEXT: ret +; +; XTENSA-DENSITY-LABEL: t: +; XTENSA-DENSITY: .cfi_startproc +; XTENSA-DENSITY-NEXT: # %bb.0: # %entry +; XTENSA-DENSITY-NEXT: addi a8, a1, -16 +; XTENSA-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DENSITY-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DENSITY-NEXT: movi a8, 0 +; XTENSA-DENSITY-NEXT: memw +; XTENSA-DENSITY-NEXT: s32i a8, a1, 0 +; XTENSA-DENSITY-NEXT: ill.n +; XTENSA-DENSITY-NEXT: addi a8, a1, 16 +; XTENSA-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DENSITY-NEXT: ret +entry: + %tmp = alloca i32, align 4 + store volatile i32 0, ptr %tmp, align 4 + + call void @llvm.trap() + ret void +} + +define void @t2() { +; XTENSA-LABEL: t2: +; XTENSA: .cfi_startproc +; XTENSA-NEXT: # %bb.0: # %entry +; XTENSA-NEXT: addi a8, a1, -16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-NEXT: movi a8, 0 +; XTENSA-NEXT: memw +; XTENSA-NEXT: s32i a8, a1, 0 +; XTENSA-NEXT: ill +; XTENSA-NEXT: addi a8, a1, 16 +; XTENSA-NEXT: or a1, a8, a8 +; XTENSA-NEXT: ret +; +; XTENSA-DEBUG-LABEL: t2: +; XTENSA-DEBUG: .cfi_startproc +; XTENSA-DEBUG-NEXT: # %bb.0: # %entry +; XTENSA-DEBUG-NEXT: addi a8, a1, -16 +; XTENSA-DEBUG-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DEBUG-NEXT: movi a8, 0 +; XTENSA-DEBUG-NEXT: memw +; XTENSA-DEBUG-NEXT: s32i a8, a1, 0 +; XTENSA-DEBUG-NEXT: break 1, 15 +; XTENSA-DEBUG-NEXT: addi a8, a1, 16 +; XTENSA-DEBUG-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-NEXT: ret +; +; XTENSA-DEBUG-DENSITY-LABEL: t2: +; XTENSA-DEBUG-DENSITY: .cfi_startproc +; XTENSA-DEBUG-DENSITY-NEXT: # %bb.0: # %entry +; XTENSA-DEBUG-DENSITY-NEXT: addi a8, a1, -16 +; XTENSA-DEBUG-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-DENSITY-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DEBUG-DENSITY-NEXT: movi a8, 0 +; XTENSA-DEBUG-DENSITY-NEXT: memw +; XTENSA-DEBUG-DENSITY-NEXT: s32i a8, a1, 0 +; XTENSA-DEBUG-DENSITY-NEXT: break.n 1 +; XTENSA-DEBUG-DENSITY-NEXT: addi a8, a1, 16 +; XTENSA-DEBUG-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DEBUG-DENSITY-NEXT: ret +; +; XTENSA-DENSITY-LABEL: t2: +; XTENSA-DENSITY: .cfi_startproc +; XTENSA-DENSITY-NEXT: # %bb.0: # %entry +; XTENSA-DENSITY-NEXT: addi a8, a1, -16 +; XTENSA-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DENSITY-NEXT: .cfi_def_cfa_offset 16 +; XTENSA-DENSITY-NEXT: movi a8, 0 +; XTENSA-DENSITY-NEXT: memw +; XTENSA-DENSITY-NEXT: s32i a8, a1, 0 +; XTENSA-DENSITY-NEXT: ill.n +; XTENSA-DENSITY-NEXT: addi a8, a1, 16 +; XTENSA-DENSITY-NEXT: or a1, a8, a8 +; XTENSA-DENSITY-NEXT: ret +entry: + %tmp = alloca i32, align 4 + store volatile i32 0, ptr %tmp, align 4 + + call void @llvm.debugtrap() + ret void +} + +declare void @llvm.trap() nounwind +declare void @llvm.debugtrap() nounwind From 9962ca7305f1a98f62d3a6d4b916b785c759818b Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 24 Jun 2026 15:56:22 -0700 Subject: [PATCH 426/511] [Instrumentor] Add runtime examples: [1/N] A flop counter (#205221) This adds a instrumentor-tools folder into compiler RT to showcase use cases of the instrumentor. The initial example is a program that, via instrumentation, counts the number of flops performed. Call and intrinsic support will follow after #198042. Partially developed by Claude (AI), tested and verified by me. --- .../cmake/Modules/AllSupportedArchDefs.cmake | 2 + compiler-rt/cmake/config-ix.cmake | 4 + compiler-rt/lib/CMakeLists.txt | 3 + .../lib/instrumentor-tools/CMakeLists.txt | 11 + compiler-rt/lib/instrumentor-tools/README.md | 49 +++ .../flop-counter/CMakeLists.txt | 67 ++++ .../instrumentor-tools/flop-counter/README.md | 77 +++++ .../flop-counter/flop_counter_config.json | 32 ++ .../flop-counter/flop_counter_runtime.cpp | 164 ++++++++++ .../instrumentor-tools/instrumentor_runtime.h | 293 ++++++++++++++++++ compiler-rt/test/CMakeLists.txt | 4 +- .../test/instrumentor-tools/CMakeLists.txt | 54 ++++ .../test/instrumentor-tools/lit.cfg.py | 75 +++++ .../instrumentor-tools/lit.site.cfg.py.in | 12 + .../test/instrumentor-tools/simple_flops.c | 49 +++ .../test/instrumentor-tools/vector_flops.cpp | 46 +++ 16 files changed, 941 insertions(+), 1 deletion(-) create mode 100644 compiler-rt/lib/instrumentor-tools/CMakeLists.txt create mode 100644 compiler-rt/lib/instrumentor-tools/README.md create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/README.md create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json create mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp create mode 100644 compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h create mode 100644 compiler-rt/test/instrumentor-tools/CMakeLists.txt create mode 100644 compiler-rt/test/instrumentor-tools/lit.cfg.py create mode 100644 compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in create mode 100644 compiler-rt/test/instrumentor-tools/simple_flops.c create mode 100644 compiler-rt/test/instrumentor-tools/vector_flops.cpp diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index a535cf9e3a8da..fee5f4a5720ed 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -138,3 +138,5 @@ endif() if (WIN32) set(ALL_ORC_SUPPORTED_ARCH ${X86_64}) endif() + +set(ALL_INSTRUMENTOR_SUPPORTED_ARCH ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index 083f1c98d0f16..de67acb937afd 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -704,6 +704,9 @@ if(APPLE) list_intersect(ORC_SUPPORTED_ARCH ALL_ORC_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) + list_intersect(INSTRUMENTOR_SUPPORTED_ARCH + ALL_INSTRUMENTOR_SUPPORTED_ARCH + SANITIZER_COMMON_SUPPORTED_ARCH) else() # Architectures supported by compiler-rt libraries. @@ -739,6 +742,7 @@ else() filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH}) filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH}) filter_available_targets(ORC_SUPPORTED_ARCH ${ALL_ORC_SUPPORTED_ARCH}) + filter_available_targets(INSTRUMENTOR_SUPPORTED_ARCH ${ALL_INSTRUMENTOR_SUPPORTED_ARCH}) endif() if (MSVC) diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt index e6158ec408895..4240301068366 100644 --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -78,3 +78,6 @@ endif() # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer # directories explicitly here. add_subdirectory(scudo/standalone/fuzz) + +# Instrumentor tools - educational tools demonstrating the Instrumentor pass +add_subdirectory(instrumentor-tools) diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt new file mode 100644 index 0000000000000..6f8e2fe352f5a --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt @@ -0,0 +1,11 @@ +# CMakeLists.txt for Instrumentor Examples +# +# This directory contains example runtimes that demonstrate how to use the +# LLVM Instrumentor pass for various profiling and analysis tasks. + +include(AddCompilerRT) + +add_compiler_rt_component(instrumentor-tools) + +# Add subdirectories for specific examples +add_subdirectory(flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/README.md b/compiler-rt/lib/instrumentor-tools/README.md new file mode 100644 index 0000000000000..5f50c7c7b001a --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/README.md @@ -0,0 +1,49 @@ +# Instrumentor Tools + +This directory contains example runtime libraries that demonstrate how to use +the LLVM Instrumentor pass for various profiling and analysis tasks. + +## Overview + +The LLVM Instrumentor is a configurable instrumentation pass that allows you to +insert runtime calls at various program points (e.g., function entry/exit, +memory operations, floating-point operations). Each example in this directory +provides: + +1. A runtime library that implements the instrumentation callbacks +2. An instrumentor configuration JSON file +3. Tests demonstrating usage + +## Building + +The instrumentor tools are built as part of the compiler-rt build: + +```bash +cmake -S llvm -B build -G Ninja \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DLLVM_ENABLE_PROJECTS="clang;compiler-rt" +ninja -C build +``` + +The runtime libraries will be installed in: +- Darwin: `lib/clang//lib/darwin/libclang_rt._osx.a` +- Linux: `lib/clang//lib/linux/libclang_rt.-.a` + +Configuration files will be installed in `share/llvm/instrumentor-configs/`. + +## Adding New Tools + +To add a new instrumentor example: + +1. Create a new directory under `compiler-rt/lib/instrumentor-tools/` +2. Add your runtime implementation (`.cpp` and `.h` files) +3. Create an instrumentor configuration JSON file +4. Add a `CMakeLists.txt` (see `flop-counter/CMakeLists.txt` as a template) +5. Update `compiler-rt/lib/instrumentor-tools/CMakeLists.txt` to include your subdirectory +6. Add tests in `compiler-rt/test/instrumentor-tools/` + +## Resources + +- [Instrumentor Documentation](../../../llvm/docs/Instrumentor.rst) +- [Instrumentor Runtime Headers](../../../llvm/utils/instrumentor_runtime.h) +- [Configuration Wizard](../../../llvm/utils/instrumentor-config-wizard.py) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt new file mode 100644 index 0000000000000..fc393f8ba3c08 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt @@ -0,0 +1,67 @@ +# CMakeLists.txt for FLOP Counter Example +# +# This example demonstrates counting floating-point operations using the +# Instrumentor pass. It provides a runtime library that can be linked with +# instrumented code to track and report FLOP counts. + +add_compiler_rt_component(flop-counter) + +set(FLOP_COUNTER_SOURCES + flop_counter_runtime.cpp + ) + +set(FLOP_COUNTER_HEADERS + ) + +# Include paths for instrumentor runtime headers +# The instrumentor runtime headers are in llvm/utils +include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils) +include_directories(${CMAKE_CURRENT_SOURCE_DIR}) + +# Common flags +set(FLOP_COUNTER_CFLAGS + ${COMPILER_RT_COMMON_CFLAGS} + -std=c++17 + ) + +# Determine supported architectures +if(APPLE) + # On Darwin, use the darwin OSX architectures + set(FLOP_COUNTER_SUPPORTED_ARCH arm64) + if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "") + set(FLOP_COUNTER_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES}) + endif() + if(DARWIN_osx_ARCHS) + set(FLOP_COUNTER_SUPPORTED_ARCH ${DARWIN_osx_ARCHS}) + endif() +else() + # For non-Apple platforms, use the default target architecture + set(FLOP_COUNTER_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH}) +endif() + +message(STATUS "FLOP Counter supported architectures: ${FLOP_COUNTER_SUPPORTED_ARCH}") + +# Build the static runtime library for Apple platforms +if(APPLE) + add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter + STATIC + OS osx + ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} + CFLAGS ${FLOP_COUNTER_CFLAGS} + SOURCES ${FLOP_COUNTER_SOURCES} + ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + PARENT_TARGET flop-counter) +else() + add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter + STATIC + ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} + CFLAGS ${FLOP_COUNTER_CFLAGS} + SOURCES ${FLOP_COUNTER_SOURCES} + ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + PARENT_TARGET flop-counter) +endif() + +# Install the configuration file as a resource +install(FILES flop_counter_config.json + DESTINATION share/llvm/instrumentor-configs + COMPONENT flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md new file mode 100644 index 0000000000000..c00a3e57d1a65 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md @@ -0,0 +1,77 @@ +# FLOP Counter + +A runtime library for counting floating-point operations in programs using the LLVM Instrumentor pass. + +## Features + +- **Precision Tracking**: Separates counts for single (float), double, and extended precision operations +- **Operation Categorization**: Tracks adds, multiplications, divisions, FMA operations (TODO), and others (sqrt, sin, cos, etc.) (TODO) +- **Vector Support**: Counts FLOPs in vector operations +- **Thread-Safe**: Uses atomic operations for counter updates +- **Low Overhead**: Minimal runtime overhead for counting +- **Automatic Reporting**: Prints statistics at program exit + +## Usage + +### Basic Example + +```c +#include +#include + +double compute(double a, double b) { + return sqrt(a * a + b * b); +} + +int main() { + double result = compute(3.0, 4.0); + printf("Result: %f\n", result); + return 0; +} +``` + +Compile with: +```bash +clangxx -O2 -finstrumentor=flop_counter_config.json example.cpp \ + -lclang_rt.flop_counter -o example +``` + +Run: +```bash +./example +``` + +Output: +``` +Result: 5.000000 + +================================================= + FLOP Counter Statistics +================================================= +Total FLOPs: 3 +... +``` + +## Implementation Details + +### Instrumentation Points + +The FLOP counter instruments: + +1. **Binary FP Operations**: `fadd`, `fsub`, `fmul`, `fdiv`, `frem` +2. **Unary FP Operations**: `fneg` +3. TODO: **FP Intrinsics**: `llvm.fma`, `llvm.sqrt`, `llvm.sin`, `llvm.cos`, etc. + +### FLOP Counting Rules + +- **Regular operations**: 1 FLOP per operation +- **FMA (Fused Multiply-Add)**: 2 FLOPs (multiply + add) +- **Vector operations**: Counted per element +- **Intrinsics**: TODO + +### Configuration + +The `flop_counter_config.json` file configures the instrumentor to: +- Insert callbacks after floating-point binary/unary operations +- Pass value size, type IDs, and opcodes to the runtime +- Filter to only instrument FP math operations diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json new file mode 100644 index 0000000000000..c3131c363fded --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json @@ -0,0 +1,32 @@ +{ + "configuration": { + "runtime_prefix": "__flop_counter_", + "runtime_prefix.description": "The runtime API prefix.", + "runtime_stubs_file": "rt", + "target_regex": "", + "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.", + "function_regex": "", + "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.", + "demangle_function_names": true, + "demangle_function_names.description": "Demangle functions names passed to the runtime.", + "host_enabled": true, + "host_enabled.description": "Instrument non-GPU targets", + "gpu_enabled": true, + "gpu_enabled.description": "Instrument GPU targets" + }, + "instruction_post": { + "numeric": { + "enabled": true, + "filter": "type_id < 7 || ((type_id == 17 || type_id == 18) && sub_type_id < 7)", + "filter.description": "Static property filter to exclude instrumentation.", + "type_id": true, + "type_id.description": "The operation's type id.", + "sub_type_id": true, + "sub_type_id.description": "The operation's type id.", + "size": true, + "size.description": "The operation's type size.", + "opcode": true, + "opcode.description": "The instruction opcode." + } + } +} diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp new file mode 100644 index 0000000000000..9eaa2d807838e --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp @@ -0,0 +1,164 @@ +//===-- flop_counter_runtime.cpp - FLOP Counter Runtime ------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the runtime for counting floating-point operations. +// It hooks into instrumentation points inserted by the LLVM Instrumentor pass. +// +//===----------------------------------------------------------------------===// + +#include "../instrumentor_runtime.h" + +#include +#include +#include +#include +#include +#include + +namespace { + +/// FLOP counter statistics (thread-safe using atomics) +struct FlopCounterStats { + std::atomic TotalFlops{0}; + std::atomic FloatOps{0}; // 32-bit float operations + std::atomic DoubleOps{0}; // 64-bit double operations + std::atomic ExtendedOps{ + 0}; // 80/128-bit extended precision operations + std::atomic VectorFlops{0}; // Total FLOPs from vector operations + std::atomic AddOps{0}; + std::atomic MulOps{0}; + std::atomic DivOps{0}; + std::atomic FmaOps{0}; // Fused multiply-add operations + std::atomic OtherOps{0}; // sqrt, sin, cos, etc. +}; + +// Global statistics counters +static FlopCounterStats *Stats = nullptr; + +enum { + LLVMOpcodeFAdd = 15, + LLVMOpcodeFSub = 17, + LLVMOpcodeFMul = 19, + LLVMOpcodeFDiv = 22, + LLVMOpcodeFRem = 25, + LLVMOpcodeFNeg = 13, +}; + +} // namespace + +extern "C" { + +__attribute__((constructor(1000))) void __flop_counter_initialize() { + Stats = new FlopCounterStats(); +} + +__attribute__((destructor(1000))) void __flop_counter_finalize() { + std::printf("\n"); + std::printf("=================================================\n"); + std::printf(" FLOP Counter Statistics\n"); + std::printf("=================================================\n"); + std::printf("Total FLOPs: %20llu\n", + Stats->TotalFlops.load(std::memory_order_relaxed)); + std::printf("\n"); + std::printf("By Precision:\n"); + std::printf(" Single (float): %20llu\n", + Stats->FloatOps.load(std::memory_order_relaxed)); + std::printf(" Double (double): %20llu\n", + Stats->DoubleOps.load(std::memory_order_relaxed)); + std::printf(" Extended (fp80/fp128): %20llu\n", + Stats->ExtendedOps.load(std::memory_order_relaxed)); + std::printf(" Vector FLOPs: %20llu\n", + Stats->VectorFlops.load(std::memory_order_relaxed)); + std::printf("\n"); + std::printf("By Operation:\n"); + std::printf(" Addition/Subtraction: %20llu\n", + Stats->AddOps.load(std::memory_order_relaxed)); + std::printf(" Multiplication: %20llu\n", + Stats->MulOps.load(std::memory_order_relaxed)); + std::printf(" Division: %20llu\n", + Stats->DivOps.load(std::memory_order_relaxed)); + std::printf(" Fused Multiply-Add: %20llu\n", + Stats->FmaOps.load(std::memory_order_relaxed)); + std::printf(" Other (sqrt, sin, ...): %20llu\n", + Stats->OtherOps.load(std::memory_order_relaxed)); + std::printf("=================================================\n"); + + delete Stats; +} + +void __flop_counter_post_numeric(int32_t TypeId, int32_t SubTypeId, + int32_t Size, int32_t Opcode) { + bool IsVector = false; + switch (TypeId) { + case FixedVectorTyID: + case ScalableVectorTyID: + IsVector = true; + TypeId = SubTypeId; + break; + default: + break; + }; + + int32_t TypeSize = Size; + switch (TypeId) { + case HalfTyID: + case BFloatTyID: + TypeSize = 2; + break; + case FloatTyID: + TypeSize = 4; + break; + case DoubleTyID: + TypeSize = 8; + break; + case X86_FP80TyID: + case FP128TyID: + case PPC_FP128TyID: + TypeSize = 16; + break; + default: + break; + }; + + // Determine FLOP count based on whether it's a vector operation + uint64_t FlopCount = Size / TypeSize; + if (IsVector) { + Stats->VectorFlops.fetch_add(FlopCount, std::memory_order_relaxed); + } else { + // Categorize by precision + if (TypeId == 2) { + Stats->FloatOps.fetch_add(1, std::memory_order_relaxed); + } else if (TypeId == 3) { + Stats->DoubleOps.fetch_add(1, std::memory_order_relaxed); + } else { + Stats->ExtendedOps.fetch_add(1, std::memory_order_relaxed); + } + } + + // Categorize by operation type + switch (Opcode) { + case LLVMOpcodeFAdd: + case LLVMOpcodeFSub: + Stats->AddOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + case LLVMOpcodeFMul: + Stats->MulOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + case LLVMOpcodeFDiv: + case LLVMOpcodeFRem: + Stats->DivOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + default: + Stats->OtherOps.fetch_add(FlopCount, std::memory_order_relaxed); + break; + } + + Stats->TotalFlops.fetch_add(FlopCount, std::memory_order_relaxed); +} + +} // extern "C" diff --git a/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h new file mode 100644 index 0000000000000..641096f2c0e22 --- /dev/null +++ b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h @@ -0,0 +1,293 @@ +//===-- Instrumentor Runtime Helper Header -------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This header provides helper structures and functions for reading data +// generated by the LLVM Instrumentor pass and passed to runtime functions. +// +//===----------------------------------------------------------------------===// + +#ifndef INSTRUMENTOR_RUNTIME_H +#define INSTRUMENTOR_RUNTIME_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#ifdef __cplusplus +} +#endif + +/// Header for each value in a value pack. Value packs are used to pass function +/// arguments and other variable-length data to the runtime. The format is: +/// [ValueHeader][Padding][Value Data] +/// where padding aligns the value data to 8-byte boundaries. +typedef struct { + uint32_t size; // Size of the value in bytes + uint32_t type_id; // LLVM Type::TypeID of the value +} ValuePackHeader; + +/// Iterator for reading values from a value pack. +typedef struct { + const char *current; // Current position in the pack + uint64_t offset; // Byte offset from the start + uint32_t count; // Number of elements in the pack + uint32_t index; // Current element index +} ValuePackIterator; + +/// Initialize a value pack iterator. +/// \param iter The iterator to initialize +/// \param pack_ptr Pointer to the start of the value pack +/// \param num_elements Number of elements in the pack +static inline void initValuePackIterator(ValuePackIterator *iter, + const void *pack_ptr, + uint32_t num_elements) { + iter->current = (const char *)pack_ptr; + iter->offset = 0; + iter->count = num_elements; + iter->index = 0; +} + +/// Get the header for the current value. +static inline ValuePackHeader +getValuePackHeader(const ValuePackIterator *iter) { + const ValuePackHeader *header = (const ValuePackHeader *)iter->current; + return *header; +} + +/// Get a pointer to the current value data. +static inline const void *getValuePackData(const ValuePackIterator *iter) { + // Skip header (8 bytes: size + type_id) + const char *data_start = iter->current + sizeof(ValuePackHeader); + // Calculate padding for 8-byte alignment + ValuePackHeader header = getValuePackHeader(iter); + uint32_t padding = (8 - (header.size % 8)) % 8; + // Skip padding + return data_start + padding; +} + +/// Move to the next value in the pack. +static inline void nextValuePack(ValuePackIterator *iter) { + if (iter->index >= iter->count) { + iter->current = NULL; + return; + } + ValuePackHeader header = getValuePackHeader(iter); + uint32_t padding = (8 - (header.size % 8)) % 8; + uint64_t advance = sizeof(ValuePackHeader) + padding + header.size; + iter->current += advance; + iter->offset += advance; + iter->index++; +} + +/// Get the current offset in bytes from the start of the pack. +static inline uint64_t getValuePackOffset(const ValuePackIterator *iter) { + return iter->offset; +} + +/// Extract a specific value from a value pack by index. +/// +/// \param pack_ptr Pointer to the start of the value pack +/// \param num_elements Number of elements in the pack +/// \param index Zero-based index of the value to extract +/// \param header Output parameter for the value header (can be NULL) +/// \return Pointer to the value data, or NULL if index is out of bounds +static inline const void *getValuePackEntry(const void *pack_ptr, + uint32_t num_elements, + uint32_t index, + ValuePackHeader *header) { + if (!pack_ptr || index >= num_elements) + return NULL; + + ValuePackIterator iter; + initValuePackIterator(&iter, pack_ptr, num_elements); + + while (iter.current != NULL && iter.index < iter.count) { + ValuePackHeader h = getValuePackHeader(&iter); + if (iter.index == index) { + if (header) + *header = h; + return getValuePackData(&iter); + } + nextValuePack(&iter); + } + + return NULL; // Index out of bounds +} + +/// LLVM Type IDs for interpreting value pack data. +/// These correspond to llvm::Type::TypeID enum values. +enum LLVMTypeID { + HalfTyID = 0, ///< 16-bit floating point type + BFloatTyID, ///< 16-bit floating point type (7-bit significand) + FloatTyID, ///< 32-bit floating point type + DoubleTyID, ///< 64-bit floating point type + X86_FP80TyID, ///< 80-bit floating point type (X87) + FP128TyID, ///< 128-bit floating point type (112-bit significand) + PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC) + VoidTyID, ///< type with no size + LabelTyID, ///< Labels + MetadataTyID, ///< Metadata + X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific) + TokenTyID, ///< Tokens + // Derived types... see DerivedTypes.h file. + IntegerTyID, ///< Arbitrary bit width integers + ByteTyID, ///< Arbitrary bit width bytes + FunctionTyID, ///< Functions + PointerTyID, ///< Pointers + StructTyID, ///< Structures + ArrayTyID, ///< Arrays + FixedVectorTyID, ///< Fixed width SIMD vector type + ScalableVectorTyID, ///< Scalable SIMD vector type + TypedPointerTyID, ///< Typed pointer used by some GPU targets + TargetExtTyID, ///< Target extension type +}; + +/// Get the string name of an LLVM Type ID. +static inline const char *getLLVMTypeIDName(uint32_t type_id) { + switch (type_id) { + case HalfTyID: + return "half"; + case BFloatTyID: + return "bfloat"; + case FloatTyID: + return "float"; + case DoubleTyID: + return "double"; + case X86_FP80TyID: + return "x86_fp80"; + case FP128TyID: + return "fp128"; + case PPC_FP128TyID: + return "ppc_fp128"; + case VoidTyID: + return "void"; + case LabelTyID: + return "label"; + case MetadataTyID: + return "metadata"; + case X86_AMXTyID: + return "x86_amx"; + case TokenTyID: + return "token"; + case IntegerTyID: + return "integer"; + case ByteTyID: + return "integer"; + case FunctionTyID: + return "function"; + case PointerTyID: + return "pointer"; + case StructTyID: + return "struct"; + case ArrayTyID: + return "array"; + case FixedVectorTyID: + return "fixed_vector"; + case ScalableVectorTyID: + return "scalable_vector"; + case TypedPointerTyID: + return "typed_pointer"; + case TargetExtTyID: + return "target_ext"; + default: + return "unknown"; + } +} + +#ifdef __cplusplus + +// C++ overlays for range-based iteration and quality of life improvements + +/// Range wrapper for value packs enabling range-based for loops. +/// Example: +/// for (auto val : ValuePackRange(pack_ptr, num_elements)) { +/// // val provides access to header and data +/// } +class ValuePackRange { +public: + struct ValueRef { + ValuePackHeader header; + const void *data; + + uint32_t type_id() const { return header.type_id; } + uint32_t size() const { return header.size; } + const char *type_name() const { return getLLVMTypeIDName(header.type_id); } + + template const T &as() const { + return *static_cast(data); + } + template const T *ptr() const { + return static_cast(data); + } + }; + + class iterator { + public: + iterator(const void *ptr, uint32_t num_elements, uint64_t max_offset) + : max_offset_(max_offset) { + initValuePackIterator(&iter_, ptr, num_elements); + if (ptr && !is_valid_position()) + iter_.current = nullptr; + } + + ValueRef operator*() const { + return ValueRef{getValuePackHeader(&iter_), getValuePackData(&iter_)}; + } + + iterator &operator++() { + nextValuePack(&iter_); + if (!is_valid_position()) + iter_.current = nullptr; + return *this; + } + + bool operator!=(const iterator &other) const { + return iter_.current != other.iter_.current; + } + + private: + bool is_valid_position() const { + if (!iter_.current) + return false; + if (iter_.index >= iter_.count) + return false; + if (max_offset_ > 0 && iter_.offset >= max_offset_) + return false; + return true; + } + + ValuePackIterator iter_; + uint64_t max_offset_; + }; + + ValuePackRange(const void *ptr, uint32_t num_elements, uint64_t max_size = 0) + : ptr_(ptr), num_elements_(num_elements), max_size_(max_size) {} + + iterator begin() const { return iterator(ptr_, num_elements_, max_size_); } + iterator end() const { return iterator(nullptr, 0, 0); } + +private: + const void *ptr_; + uint32_t num_elements_; + uint64_t max_size_; +}; + +/// Template helper to extract a typed value from a value pack by index. +template +inline const T *getValueAs(const void *pack_ptr, uint32_t num_elements, + uint32_t index) { + return static_cast( + getValuePackEntry(pack_ptr, num_elements, index, nullptr)); +} + +#endif // __cplusplus + +#endif // INSTRUMENTOR_RUNTIME_H diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index 3fab82518e75f..dae3e72711a18 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -60,7 +60,7 @@ umbrella_lit_testsuite_begin(check-compiler-rt) set(COMPILER_RT_KNOWN_TEST_SUITES builtins;ctx_profile;fuzzer;interception;lsan;memprof;metadata ;orc;profile;sanitizer_common;shadowcallstack - ;ubsan;xray) + ;ubsan;xray;instrumentor-tools) list(APPEND COMPILER_RT_KNOWN_TEST_SUITES ${ALL_SANITIZERS}) list(REMOVE_DUPLICATES COMPILER_RT_KNOWN_TEST_SUITES) # Sort the list so that's easier to read when emitting errors. @@ -170,6 +170,8 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS) # ShadowCallStack does not yet provide a runtime with compiler-rt, the tests # include their own minimal runtime compiler_rt_test_runtime(shadowcallstack NO_COMPILER_RT_HAS_GUARD) + + compiler_rt_test_runtime(instrumentor-tools NO_COMPILER_RT_HAS_GUARD) endif() # Now that we've traversed all the directories and know all the lit testsuites, diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt b/compiler-rt/test/instrumentor-tools/CMakeLists.txt new file mode 100644 index 0000000000000..cda35a017fed5 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/CMakeLists.txt @@ -0,0 +1,54 @@ +set(INSTRUMENTOR_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) +set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) + +# Debug: Print all relevant variables +set(INSTRUMENTOR_TESTSUITES) +set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) +list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter) + +# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined +if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH) + message(STATUS "INSTRUMENTOR_SUPPORTED_ARCH is NOT DEFINED - using fallback") + # Use the same architectures as FLOP counter + if(APPLE) + set(INSTRUMENTOR_SUPPORTED_ARCH ${DARWIN_osx_ARCHS}) + else() + set(INSTRUMENTOR_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH}) + endif() +endif() + +set(INSTRUMENTOR_TEST_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH}) +if(APPLE) + darwin_filter_host_archs(INSTRUMENTOR_SUPPORTED_ARCH INSTRUMENTOR_TEST_ARCH) +endif() + + +macro(add_instrumentor_testsuite test_mode sanitizer arch) + set(INSTRUMENTOR_LIT_TEST_MODE "${test_mode}") + set(CONFIG_NAME ${test_mode}-${arch}) + + set(INSTRUMENTOR_TEST_TARGET_ARCH ${arch}) + get_test_cc_for_arch(${arch} INSTRUMENTOR_TEST_TARGET_CC INSTRUMENTOR_TEST_TARGET_CFLAGS) + + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in + ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) + list(APPEND INSTRUMENTOR_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) + +endmacro() + +if(INSTRUMENTOR_TEST_ARCH) + foreach(arch ${INSTRUMENTOR_TEST_ARCH}) + add_instrumentor_testsuite("InstrumentorTools" instrumentor-tools ${arch}) + endforeach() +else() + message(WARNING "No architectures configured for instrumentor-tools tests!") +endif() + +if(INSTRUMENTOR_TESTSUITES) + add_lit_testsuite(check-instrumentor-tools "Running the instrumentor-tools tests" + ${INSTRUMENTOR_TESTSUITES} + DEPENDS ${INSTRUMENTOR_TEST_DEPS}) +else() + message(WARNING "No test suites configured for instrumentor-tools!") +endif() diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py b/compiler-rt/test/instrumentor-tools/lit.cfg.py new file mode 100644 index 0000000000000..00f9e120168d7 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/lit.cfg.py @@ -0,0 +1,75 @@ +# -*- Python -*- + +import os + + +def get_required_attr(config, attr_name): + attr_value = getattr(config, attr_name, None) + if attr_value is None: + lit_config.fatal( + "No attribute %r in test configuration! You may need to run " + "tests from your build directory or add this attribute " + "to lit.site.cfg.py " % attr_name + ) + return attr_value + + +# Setup config name. +config.name = "InstrumentorTools-" + config.target_arch + +# Setup source root. +config.test_source_root = os.path.dirname(__file__) + +# Setup executable root. +if ( + hasattr(config, "instrumentor_lit_binary_dir") + and config.instrumentor_lit_binary_dir is not None +): + config.test_exec_root = os.path.join( + config.instrumentor_lit_binary_dir, config.name + ) + +# Test suffixes. +config.suffixes = [".c", ".cpp", ".m", ".mm", ".ll", ".test"] + +# What to exclude. +config.excludes = ["Inputs"] + +# Clang flags. +target_cflags = [get_required_attr(config, "target_cflags")] +clang_cflags = target_cflags +clang_cxxflags = config.cxx_mode_flags + clang_cflags + + +def build_invocation(compile_flags): + return " " + " ".join([config.clang] + compile_flags) + " " + + +def make_lib_name(name): + if config.target_os != "Darwin": + return "clang_rt.instrumentor_" + name + return "clang_rt.instrumentor_" + name + "_osx" + + +# Add clang substitutions. +config.substitutions.append(("%clang ", build_invocation(clang_cflags))) +config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags))) + +flop_counter_lib = make_lib_name("flop_counter") +config.substitutions.append(("%flop_counter_lib", flop_counter_lib)) + +config.substitutions.append(("%lib_dir", config.compiler_rt_libdir)) + +# Add path to instrumentor config files +config_dir = os.path.join( + config.test_source_root, "..", "..", "lib", "instrumentor-tools" +) +config.substitutions.append(("%config_dir", config_dir)) + +# Check if running on a supported platform +if config.target_os not in [ + "Darwin", + "Linux", + "FreeBSD", +]: + config.unsupported = True diff --git a/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in new file mode 100644 index 0000000000000..8474c796f8eeb --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in @@ -0,0 +1,12 @@ +@LIT_SITE_CFG_IN_HEADER@ + +# Tool-specific config options. +config.instrumentor_lit_binary_dir = "@INSTRUMENTOR_LIT_BINARY_DIR@" +config.target_cflags = "@INSTRUMENTOR_TEST_TARGET_CFLAGS@" +config.target_arch = "@INSTRUMENTOR_TEST_TARGET_ARCH@" + +# Load common config for all compiler-rt lit tests. +lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") + +# Load tool-specific config that would do the real work. +lit_config.load_config(config, "@INSTRUMENTOR_LIT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler-rt/test/instrumentor-tools/simple_flops.c b/compiler-rt/test/instrumentor-tools/simple_flops.c new file mode 100644 index 0000000000000..9434cb506f602 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/simple_flops.c @@ -0,0 +1,49 @@ +// Test basic FLOP counting functionality +// +// This test verifies that the FLOP counter correctly counts floating-point +// operations in a simple program. +// +// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t +// RUN: %t | FileCheck %s +// +// TODO: For the correct values we need to track fmuladd calls too. +// +// CHECK: Total FLOPs: 400 +// CHECK: Single (float): 100 +// CHECK: Double (double): 300 + +#include + +// Simple function with known FLOP count +float compute_float(float a, float b, float c) { + // 3 FLOPs: add, mul, add + return a + b * c; +} + +double compute_double(double a, double b) { + // 4 FLOPs: mul, mul, add, div + return (a * a + b * b) / 2.0; +} + +int main(void) { + float f1 = 1.0f, f2 = 2.0f, f3 = 3.0f; + double d1 = 4.0, d2 = 5.0; + + // Call functions multiple times to get meaningful counts + float result_f = 0.0f; + for (int i = 0; i < 100; i++) { + result_f += compute_float(f1, f2, f3); + } + + double result_d = 0.0; + for (int i = 0; i < 100; i++) { + result_d += compute_double(d1, d2); + } + + // Prevent optimization from removing the computations + if (result_f > 0.0f && result_d > 0.0) { + printf("Computation complete\n"); + } + + return 0; +} diff --git a/compiler-rt/test/instrumentor-tools/vector_flops.cpp b/compiler-rt/test/instrumentor-tools/vector_flops.cpp new file mode 100644 index 0000000000000..db729e5886849 --- /dev/null +++ b/compiler-rt/test/instrumentor-tools/vector_flops.cpp @@ -0,0 +1,46 @@ +// Test FLOP counting with vector operations +// +// This test verifies that the FLOP counter correctly counts vector +// floating-point operations. +// +// RUN: %clangxx -O2 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t +// RUN: %t | FileCheck %s +// +// CHECK: Total FLOPs: +// CHECK: Vector FLOPs: + +#include +#include + +// Function using vector operations (if vectorized by the compiler) +void vector_compute(float *a, float *b, float *c, int n) { + for (int i = 0; i < n; i++) { + c[i] = std::sqrt(a[i] * a[i] + b[i] * b[i]); + } +} + +int main(void) { + const int N = 1000; + float a[N], b[N], c[N]; + + // Initialize arrays + for (int i = 0; i < N; i++) { + a[i] = (float)i; + b[i] = (float)(i + 1); + } + + // Compute + vector_compute(a, b, c, N); + + // Prevent optimization + float sum = 0.0f; + for (int i = 0; i < N; i++) { + sum += c[i]; + } + + if (sum > 0.0f) { + printf("Vector computation complete\n"); + } + + return 0; +} From afd57b264b78a8e2e558bc85a3b7364f8cf4b87b Mon Sep 17 00:00:00 2001 From: Sam Elliott Date: Wed, 24 Jun 2026 16:12:53 -0700 Subject: [PATCH 427/511] [RISCV][clang] Use fcf-protection flag in Multilib Selection (#205202) This ensures that we can separate out multilibs that are or are not built with control flow protection enabled. The initial version of the patch claims all values of these flags are incompatible. It might be the case that we could make this logic more complex if some versions do become compatible. --- clang/lib/Driver/ToolChain.cpp | 12 ++++++++++++ clang/test/Driver/print-multi-selection-flags.c | 13 +++++++++++++ 2 files changed, 25 insertions(+) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 7d93e7f65daf5..328f4f8c8f420 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -445,6 +445,18 @@ static void getRISCVMultilibFlags(const Driver &D, const llvm::Triple &Triple, Result.push_back("-fsanitize=shadow-call-stack"); else Result.push_back("-fno-sanitize=shadow-call-stack"); + + const Arg *CFProtectionArg = + Args.getLastArgNoClaim(options::OPT_fcf_protection_EQ); + StringRef CFProtectionVal = + CFProtectionArg ? CFProtectionArg->getValue() : "none"; + Result.push_back(("-fcf-protection=" + CFProtectionVal).str()); + + if (CFProtectionVal == "branch" || CFProtectionVal == "full") { + if (const Arg *SchemeArg = + Args.getLastArgNoClaim(options::OPT_mcf_branch_label_scheme_EQ)) + Result.push_back(SchemeArg->getAsString(Args)); + } } Multilib::flags_list diff --git a/clang/test/Driver/print-multi-selection-flags.c b/clang/test/Driver/print-multi-selection-flags.c index ba7b325892f9c..e5a116234c321 100644 --- a/clang/test/Driver/print-multi-selection-flags.c +++ b/clang/test/Driver/print-multi-selection-flags.c @@ -154,3 +154,16 @@ // CHECK-OPT-OS: -Os // CHECK-OPT-NOT: -Oz // CHECK-OPT-NOT: -Os + +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf | FileCheck --check-prefix=CHECK-CF-PROTECTION-NONE --implicit-check-not="mcf-branch-label-scheme" %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf -fcf-protection=none | FileCheck --check-prefix=CHECK-CF-PROTECTION-NONE --implicit-check-not="mcf-branch-label-scheme" %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf -fcf-protection=return | FileCheck --check-prefix=CHECK-CF-PROTECTION-RETURN --implicit-check-not="mcf-branch-label-scheme" %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf -fcf-protection=branch -mcf-branch-label-scheme=unlabeled | FileCheck --check-prefix=CHECK-CF-PROTECTION-BRANCH %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf -fcf-protection=full -mcf-branch-label-scheme=unlabeled | FileCheck --check-prefix=CHECK-CF-PROTECTION-FULL %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=riscv64-none-elf -fcf-protection -mcf-branch-label-scheme=unlabeled | FileCheck --check-prefix=CHECK-CF-PROTECTION-FULL %s +// CHECK-CF-PROTECTION-NONE: -fcf-protection=none +// CHECK-CF-PROTECTION-RETURN: -fcf-protection=return +// CHECK-CF-PROTECTION-BRANCH: -fcf-protection=branch +// CHECK-CF-PROTECTION-BRANCH: -mcf-branch-label-scheme=unlabeled +// CHECK-CF-PROTECTION-FULL: -fcf-protection=full +// CHECK-CF-PROTECTION-FULL: -mcf-branch-label-scheme=unlabeled From 564002a16b70a53dbddd1d16c20fb5c8f813f761 Mon Sep 17 00:00:00 2001 From: Alex Langford Date: Wed, 24 Jun 2026 16:15:06 -0700 Subject: [PATCH 428/511] [lldb][NFC] Remove ConstString from Language::MethodNameVariant (#205688) Language::MethodNameVariant is for when a given method name may have several language-defined variants. For example, we may see an objective-C method name with a category that should be searchable via the name without the category. The ObjCLanguage plugin computes these names without checking that they are actually useful or even exist. Because these variant names are stored in ConstString, they will live forever even if they are never used. --- lldb/include/lldb/Target/Language.h | 12 ++++++------ .../source/Breakpoint/BreakpointResolverName.cpp | 2 +- .../Plugins/Language/ObjC/ObjCLanguage.cpp | 16 ++++++++-------- lldb/source/Plugins/Language/ObjC/ObjCLanguage.h | 2 +- lldb/source/Symbol/Symtab.cpp | 8 ++++---- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lldb/include/lldb/Target/Language.h b/lldb/include/lldb/Target/Language.h index 573133f48eb4b..bd3faf89658c9 100644 --- a/lldb/include/lldb/Target/Language.h +++ b/lldb/include/lldb/Target/Language.h @@ -200,22 +200,22 @@ class Language : public PluginInterface { virtual const char *GetLanguageSpecificTypeLookupHelp(); class MethodNameVariant { - ConstString m_name; + std::string m_name; lldb::FunctionNameType m_type; public: - MethodNameVariant(ConstString name, lldb::FunctionNameType type) - : m_name(name), m_type(type) {} - ConstString GetName() const { return m_name; } + MethodNameVariant(std::string name, lldb::FunctionNameType type) + : m_name(std::move(name)), m_type(type) {} + llvm::StringRef GetName() const { return m_name; } lldb::FunctionNameType GetType() const { return m_type; } }; // If a language can have more than one possible name for a method, this // function can be used to enumerate them. This is useful when doing name // lookups. virtual std::vector - GetMethodNameVariants(ConstString method_name) const { + GetMethodNameVariants(llvm::StringRef method_name) const { return std::vector(); - }; + } class MethodName { public: diff --git a/lldb/source/Breakpoint/BreakpointResolverName.cpp b/lldb/source/Breakpoint/BreakpointResolverName.cpp index 13557d203e976..e20a66e2853f5 100644 --- a/lldb/source/Breakpoint/BreakpointResolverName.cpp +++ b/lldb/source/Breakpoint/BreakpointResolverName.cpp @@ -228,7 +228,7 @@ void BreakpointResolverName::AddNameLookup(ConstString name, std::vector variant_lookups = Module::LookupInfo::MakeLookupInfos(name, variant.GetType(), lang->GetLanguageType(), - variant.GetName()); + ConstString(variant.GetName())); llvm::append_range(m_lookups, variant_lookups); } } diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp index c3494bea83de0..4aec66951ed12 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.cpp @@ -177,14 +177,14 @@ std::string ObjCLanguage::ObjCMethodName::GetFullNameWithoutCategory() const { } std::vector -ObjCLanguage::GetMethodNameVariants(ConstString method_name) const { +ObjCLanguage::GetMethodNameVariants(llvm::StringRef method_name) const { std::vector variant_names; std::optional objc_method = - ObjCLanguage::ObjCMethodName::Create(method_name.GetStringRef(), false); + ObjCLanguage::ObjCMethodName::Create(method_name, false); if (!objc_method) return variant_names; - variant_names.emplace_back(ConstString(objc_method->GetSelector()), + variant_names.emplace_back(objc_method->GetSelector().str(), lldb::eFunctionNameTypeSelector); const std::string name_sans_category = @@ -192,29 +192,29 @@ ObjCLanguage::GetMethodNameVariants(ConstString method_name) const { if (objc_method->IsClassMethod() || objc_method->IsInstanceMethod()) { if (!name_sans_category.empty()) - variant_names.emplace_back(ConstString(name_sans_category), + variant_names.emplace_back(name_sans_category, lldb::eFunctionNameTypeFull); } else { StreamString strm; strm.Printf("+%s", objc_method->GetFullName().c_str()); - variant_names.emplace_back(ConstString(strm.GetString()), + variant_names.emplace_back(strm.GetString().str(), lldb::eFunctionNameTypeFull); strm.Clear(); strm.Printf("-%s", objc_method->GetFullName().c_str()); - variant_names.emplace_back(ConstString(strm.GetString()), + variant_names.emplace_back(strm.GetString().str(), lldb::eFunctionNameTypeFull); strm.Clear(); if (!name_sans_category.empty()) { strm.Printf("+%s", name_sans_category.c_str()); - variant_names.emplace_back(ConstString(strm.GetString()), + variant_names.emplace_back(strm.GetString().str(), lldb::eFunctionNameTypeFull); strm.Clear(); strm.Printf("-%s", name_sans_category.c_str()); - variant_names.emplace_back(ConstString(strm.GetString()), + variant_names.emplace_back(strm.GetString().str(), lldb::eFunctionNameTypeFull); } } diff --git a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h index 7b8f56063fe8b..98f3b2ec3f6fa 100644 --- a/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h +++ b/lldb/source/Plugins/Language/ObjC/ObjCLanguage.h @@ -137,7 +137,7 @@ class ObjCLanguage : public Language { // variant_names[3] => "-[NSString myStringWithCString:]" // Also returns the FunctionNameType of each possible name. std::vector - GetMethodNameVariants(ConstString method_name) const override; + GetMethodNameVariants(llvm::StringRef method_name) const override; std::pair> GetFunctionNameInfo(ConstString name) const override; diff --git a/lldb/source/Symbol/Symtab.cpp b/lldb/source/Symbol/Symtab.cpp index 767b1d416361f..fbb4ddac6e7a3 100644 --- a/lldb/source/Symbol/Symtab.cpp +++ b/lldb/source/Symbol/Symtab.cpp @@ -364,13 +364,13 @@ void Symtab::InitNameIndexes() { for (Language *lang : languages) { for (auto variant : lang->GetMethodNameVariants(name)) { if (variant.GetType() & lldb::eFunctionNameTypeSelector) - selector_to_index.Append(variant.GetName(), value); + selector_to_index.Append(ConstString(variant.GetName()), value); else if (variant.GetType() & lldb::eFunctionNameTypeFull) - name_to_index.Append(variant.GetName(), value); + name_to_index.Append(ConstString(variant.GetName()), value); else if (variant.GetType() & lldb::eFunctionNameTypeMethod) - method_to_index.Append(variant.GetName(), value); + method_to_index.Append(ConstString(variant.GetName()), value); else if (variant.GetType() & lldb::eFunctionNameTypeBase) - basename_to_index.Append(variant.GetName(), value); + basename_to_index.Append(ConstString(variant.GetName()), value); } } } From feabaefbf86a470c45d407ab6d26f38a58312d3e Mon Sep 17 00:00:00 2001 From: Michael Spencer Date: Wed, 24 Jun 2026 16:16:16 -0700 Subject: [PATCH 429/511] [Clang] Don't suppress vtable emission for classes with -fmodules-debuginfo (#204662) 847f9cb0e868 made `Sema::DefineUsedVTables` skip `Consumer.HandleVTable()` when `Class->shouldEmitInExternalSource()` is true. This works for named C++20 modules as they have an object file, but does not hold for -fmodules-debuginfo / -fpch-debuginfo. This patch additionally gates that on `Class->isInNamedModule()`. This is the same pattern used by the rest of codegen for this situation. Needing to check this everywhere is a bit unfortunate. It would be good to eventually refactor this class of checks to have clearer semantics around named modules, debug info, and -fmodules-codgen. Fixes https://github.com/llvm/llvm-project/issues/198587 Assisted-by: Claude Code: opus-4-8 --- clang/include/clang/AST/DeclBase.h | 4 ++++ clang/lib/Sema/SemaDeclCXX.cpp | 7 ++++++- clang/test/PCH/pch-debuginfo-vtable.cpp | 28 +++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) create mode 100644 clang/test/PCH/pch-debuginfo-vtable.cpp diff --git a/clang/include/clang/AST/DeclBase.h b/clang/include/clang/AST/DeclBase.h index 3c5a093439ceb..bc2fcf9b8fb42 100644 --- a/clang/include/clang/AST/DeclBase.h +++ b/clang/include/clang/AST/DeclBase.h @@ -687,6 +687,10 @@ class alignas(8) Decl { /// Whether the definition of the declaration should be emitted in external /// sources. + /// FIXME: This conflates two questions: if the entity should be emitted into + /// other object files (because there's no primary), and if the debug info + /// should be emitted into other object files. This matters for + // `-fmodules-debuginfo`, `-fmodules-codgen`, and `isInNamedModule()`. bool shouldEmitInExternalSource() const; /// Whether this declaration comes from explicit global module. diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 418ff01f3d98a..ffce0a146865e 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -19293,7 +19293,12 @@ bool Sema::DefineUsedVTables() { DefinedAnything = true; MarkVirtualMembersReferenced(Loc, Class); CXXRecordDecl *Canonical = Class->getCanonicalDecl(); - if (VTablesUsed[Canonical] && !Class->shouldEmitInExternalSource()) + // The vtable is assumed to be emitted in an external source only for + // classes attached to a named module, which is guaranteed to have an object + // file. This isn't true for -fmodules-debuginfo, which still has + // shouldEmitInExternalSource as true so that debug info gets supressed. + if (VTablesUsed[Canonical] && + !(Class->isInNamedModule() && Class->shouldEmitInExternalSource())) Consumer.HandleVTable(Class); // Warn if we're emitting a weak vtable. The vtable will be weak if there is diff --git a/clang/test/PCH/pch-debuginfo-vtable.cpp b/clang/test/PCH/pch-debuginfo-vtable.cpp new file mode 100644 index 0000000000000..f460c740b9733 --- /dev/null +++ b/clang/test/PCH/pch-debuginfo-vtable.cpp @@ -0,0 +1,28 @@ +// Check that a key function defined in a TU that includes a -fpch-debuginfo +// PCH still emits the vtable. The PCH object only carries debug info, not the +// vtable definition, so the importing TU must emit it itself. + +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t + +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -fmodules-debuginfo \ +// RUN: -building-pch-with-obj -x c++-header -emit-pch %t/b.h -o %t/b.pch + +// RUN: %clang_cc1 -std=c++20 -triple %itanium_abi_triple -include-pch %t/b.pch \ +// RUN: -emit-llvm -o - %t/b.cpp | FileCheck %s + +// CHECK: @_ZTV1B = {{.*}}constant + +//--- b.h +#pragma once +struct B { + B() = default; + virtual ~B(); + virtual void f(); +}; + +//--- b.cpp +#include "b.h" +B::~B() { } +void B::f() { } From ba4363542fcf4a7ae4f89a0e42bf12f2d89d70d1 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 24 Jun 2026 16:25:10 -0700 Subject: [PATCH 430/511] [Instrumentor][FIX] Ensure CXX headers are available (#205693) Try to address failure in #205221, which results in not found. This is CMake code copied from other compiler-rt projects using . --- .../lib/instrumentor-tools/flop-counter/CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt index fc393f8ba3c08..86a0d92baf5a6 100644 --- a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt +++ b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt @@ -24,6 +24,11 @@ set(FLOP_COUNTER_CFLAGS -std=c++17 ) +# flop counter uses C++ standard library headers. +if (TARGET cxx-headers OR HAVE_LIBCXX) + set(DEPS cxx-headers) +endif() + # Determine supported architectures if(APPLE) # On Darwin, use the darwin OSX architectures @@ -50,6 +55,7 @@ if(APPLE) CFLAGS ${FLOP_COUNTER_CFLAGS} SOURCES ${FLOP_COUNTER_SOURCES} ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + DEPS ${DEPS} PARENT_TARGET flop-counter) else() add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter @@ -58,6 +64,7 @@ else() CFLAGS ${FLOP_COUNTER_CFLAGS} SOURCES ${FLOP_COUNTER_SOURCES} ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} + DEPS ${DEPS} PARENT_TARGET flop-counter) endif() From 67ee4125c29e51d262fe1dc6ba3630d15975992a Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 24 Jun 2026 16:43:40 -0700 Subject: [PATCH 431/511] Revert "[Instrumentor] Add runtime examples: [1/N] A flop counter (#205221)" (#205696) --- .../cmake/Modules/AllSupportedArchDefs.cmake | 2 - compiler-rt/cmake/config-ix.cmake | 4 - compiler-rt/lib/CMakeLists.txt | 3 - .../lib/instrumentor-tools/CMakeLists.txt | 11 - compiler-rt/lib/instrumentor-tools/README.md | 49 --- .../flop-counter/CMakeLists.txt | 74 ----- .../instrumentor-tools/flop-counter/README.md | 77 ----- .../flop-counter/flop_counter_config.json | 32 -- .../flop-counter/flop_counter_runtime.cpp | 164 ---------- .../instrumentor-tools/instrumentor_runtime.h | 293 ------------------ compiler-rt/test/CMakeLists.txt | 4 +- .../test/instrumentor-tools/CMakeLists.txt | 54 ---- .../test/instrumentor-tools/lit.cfg.py | 75 ----- .../instrumentor-tools/lit.site.cfg.py.in | 12 - .../test/instrumentor-tools/simple_flops.c | 49 --- .../test/instrumentor-tools/vector_flops.cpp | 46 --- 16 files changed, 1 insertion(+), 948 deletions(-) delete mode 100644 compiler-rt/lib/instrumentor-tools/CMakeLists.txt delete mode 100644 compiler-rt/lib/instrumentor-tools/README.md delete mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt delete mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/README.md delete mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json delete mode 100644 compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp delete mode 100644 compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h delete mode 100644 compiler-rt/test/instrumentor-tools/CMakeLists.txt delete mode 100644 compiler-rt/test/instrumentor-tools/lit.cfg.py delete mode 100644 compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in delete mode 100644 compiler-rt/test/instrumentor-tools/simple_flops.c delete mode 100644 compiler-rt/test/instrumentor-tools/vector_flops.cpp diff --git a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake index fee5f4a5720ed..a535cf9e3a8da 100644 --- a/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake +++ b/compiler-rt/cmake/Modules/AllSupportedArchDefs.cmake @@ -138,5 +138,3 @@ endif() if (WIN32) set(ALL_ORC_SUPPORTED_ARCH ${X86_64}) endif() - -set(ALL_INSTRUMENTOR_SUPPORTED_ARCH ${ALL_SANITIZER_COMMON_SUPPORTED_ARCH}) diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake index de67acb937afd..083f1c98d0f16 100644 --- a/compiler-rt/cmake/config-ix.cmake +++ b/compiler-rt/cmake/config-ix.cmake @@ -704,9 +704,6 @@ if(APPLE) list_intersect(ORC_SUPPORTED_ARCH ALL_ORC_SUPPORTED_ARCH SANITIZER_COMMON_SUPPORTED_ARCH) - list_intersect(INSTRUMENTOR_SUPPORTED_ARCH - ALL_INSTRUMENTOR_SUPPORTED_ARCH - SANITIZER_COMMON_SUPPORTED_ARCH) else() # Architectures supported by compiler-rt libraries. @@ -742,7 +739,6 @@ else() filter_available_targets(GWP_ASAN_SUPPORTED_ARCH ${ALL_GWP_ASAN_SUPPORTED_ARCH}) filter_available_targets(NSAN_SUPPORTED_ARCH ${ALL_NSAN_SUPPORTED_ARCH}) filter_available_targets(ORC_SUPPORTED_ARCH ${ALL_ORC_SUPPORTED_ARCH}) - filter_available_targets(INSTRUMENTOR_SUPPORTED_ARCH ${ALL_INSTRUMENTOR_SUPPORTED_ARCH}) endif() if (MSVC) diff --git a/compiler-rt/lib/CMakeLists.txt b/compiler-rt/lib/CMakeLists.txt index 4240301068366..e6158ec408895 100644 --- a/compiler-rt/lib/CMakeLists.txt +++ b/compiler-rt/lib/CMakeLists.txt @@ -78,6 +78,3 @@ endif() # is true for fuzzers that exercise parts of the runtime. So we add the fuzzer # directories explicitly here. add_subdirectory(scudo/standalone/fuzz) - -# Instrumentor tools - educational tools demonstrating the Instrumentor pass -add_subdirectory(instrumentor-tools) diff --git a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/CMakeLists.txt deleted file mode 100644 index 6f8e2fe352f5a..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -# CMakeLists.txt for Instrumentor Examples -# -# This directory contains example runtimes that demonstrate how to use the -# LLVM Instrumentor pass for various profiling and analysis tasks. - -include(AddCompilerRT) - -add_compiler_rt_component(instrumentor-tools) - -# Add subdirectories for specific examples -add_subdirectory(flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/README.md b/compiler-rt/lib/instrumentor-tools/README.md deleted file mode 100644 index 5f50c7c7b001a..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# Instrumentor Tools - -This directory contains example runtime libraries that demonstrate how to use -the LLVM Instrumentor pass for various profiling and analysis tasks. - -## Overview - -The LLVM Instrumentor is a configurable instrumentation pass that allows you to -insert runtime calls at various program points (e.g., function entry/exit, -memory operations, floating-point operations). Each example in this directory -provides: - -1. A runtime library that implements the instrumentation callbacks -2. An instrumentor configuration JSON file -3. Tests demonstrating usage - -## Building - -The instrumentor tools are built as part of the compiler-rt build: - -```bash -cmake -S llvm -B build -G Ninja \ - -DCMAKE_BUILD_TYPE=RelWithDebInfo \ - -DLLVM_ENABLE_PROJECTS="clang;compiler-rt" -ninja -C build -``` - -The runtime libraries will be installed in: -- Darwin: `lib/clang//lib/darwin/libclang_rt._osx.a` -- Linux: `lib/clang//lib/linux/libclang_rt.-.a` - -Configuration files will be installed in `share/llvm/instrumentor-configs/`. - -## Adding New Tools - -To add a new instrumentor example: - -1. Create a new directory under `compiler-rt/lib/instrumentor-tools/` -2. Add your runtime implementation (`.cpp` and `.h` files) -3. Create an instrumentor configuration JSON file -4. Add a `CMakeLists.txt` (see `flop-counter/CMakeLists.txt` as a template) -5. Update `compiler-rt/lib/instrumentor-tools/CMakeLists.txt` to include your subdirectory -6. Add tests in `compiler-rt/test/instrumentor-tools/` - -## Resources - -- [Instrumentor Documentation](../../../llvm/docs/Instrumentor.rst) -- [Instrumentor Runtime Headers](../../../llvm/utils/instrumentor_runtime.h) -- [Configuration Wizard](../../../llvm/utils/instrumentor-config-wizard.py) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt b/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt deleted file mode 100644 index 86a0d92baf5a6..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/flop-counter/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -# CMakeLists.txt for FLOP Counter Example -# -# This example demonstrates counting floating-point operations using the -# Instrumentor pass. It provides a runtime library that can be linked with -# instrumented code to track and report FLOP counts. - -add_compiler_rt_component(flop-counter) - -set(FLOP_COUNTER_SOURCES - flop_counter_runtime.cpp - ) - -set(FLOP_COUNTER_HEADERS - ) - -# Include paths for instrumentor runtime headers -# The instrumentor runtime headers are in llvm/utils -include_directories(${COMPILER_RT_SOURCE_DIR}/../llvm/utils) -include_directories(${CMAKE_CURRENT_SOURCE_DIR}) - -# Common flags -set(FLOP_COUNTER_CFLAGS - ${COMPILER_RT_COMMON_CFLAGS} - -std=c++17 - ) - -# flop counter uses C++ standard library headers. -if (TARGET cxx-headers OR HAVE_LIBCXX) - set(DEPS cxx-headers) -endif() - -# Determine supported architectures -if(APPLE) - # On Darwin, use the darwin OSX architectures - set(FLOP_COUNTER_SUPPORTED_ARCH arm64) - if(NOT CMAKE_OSX_ARCHITECTURES STREQUAL "") - set(FLOP_COUNTER_SUPPORTED_ARCH ${CMAKE_OSX_ARCHITECTURES}) - endif() - if(DARWIN_osx_ARCHS) - set(FLOP_COUNTER_SUPPORTED_ARCH ${DARWIN_osx_ARCHS}) - endif() -else() - # For non-Apple platforms, use the default target architecture - set(FLOP_COUNTER_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH}) -endif() - -message(STATUS "FLOP Counter supported architectures: ${FLOP_COUNTER_SUPPORTED_ARCH}") - -# Build the static runtime library for Apple platforms -if(APPLE) - add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter - STATIC - OS osx - ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} - CFLAGS ${FLOP_COUNTER_CFLAGS} - SOURCES ${FLOP_COUNTER_SOURCES} - ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} - DEPS ${DEPS} - PARENT_TARGET flop-counter) -else() - add_compiler_rt_runtime(clang_rt.instrumentor_flop_counter - STATIC - ARCHS ${FLOP_COUNTER_SUPPORTED_ARCH} - CFLAGS ${FLOP_COUNTER_CFLAGS} - SOURCES ${FLOP_COUNTER_SOURCES} - ADDITIONAL_HEADERS ${FLOP_COUNTER_HEADERS} - DEPS ${DEPS} - PARENT_TARGET flop-counter) -endif() - -# Install the configuration file as a resource -install(FILES flop_counter_config.json - DESTINATION share/llvm/instrumentor-configs - COMPONENT flop-counter) diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md b/compiler-rt/lib/instrumentor-tools/flop-counter/README.md deleted file mode 100644 index c00a3e57d1a65..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/flop-counter/README.md +++ /dev/null @@ -1,77 +0,0 @@ -# FLOP Counter - -A runtime library for counting floating-point operations in programs using the LLVM Instrumentor pass. - -## Features - -- **Precision Tracking**: Separates counts for single (float), double, and extended precision operations -- **Operation Categorization**: Tracks adds, multiplications, divisions, FMA operations (TODO), and others (sqrt, sin, cos, etc.) (TODO) -- **Vector Support**: Counts FLOPs in vector operations -- **Thread-Safe**: Uses atomic operations for counter updates -- **Low Overhead**: Minimal runtime overhead for counting -- **Automatic Reporting**: Prints statistics at program exit - -## Usage - -### Basic Example - -```c -#include -#include - -double compute(double a, double b) { - return sqrt(a * a + b * b); -} - -int main() { - double result = compute(3.0, 4.0); - printf("Result: %f\n", result); - return 0; -} -``` - -Compile with: -```bash -clangxx -O2 -finstrumentor=flop_counter_config.json example.cpp \ - -lclang_rt.flop_counter -o example -``` - -Run: -```bash -./example -``` - -Output: -``` -Result: 5.000000 - -================================================= - FLOP Counter Statistics -================================================= -Total FLOPs: 3 -... -``` - -## Implementation Details - -### Instrumentation Points - -The FLOP counter instruments: - -1. **Binary FP Operations**: `fadd`, `fsub`, `fmul`, `fdiv`, `frem` -2. **Unary FP Operations**: `fneg` -3. TODO: **FP Intrinsics**: `llvm.fma`, `llvm.sqrt`, `llvm.sin`, `llvm.cos`, etc. - -### FLOP Counting Rules - -- **Regular operations**: 1 FLOP per operation -- **FMA (Fused Multiply-Add)**: 2 FLOPs (multiply + add) -- **Vector operations**: Counted per element -- **Intrinsics**: TODO - -### Configuration - -The `flop_counter_config.json` file configures the instrumentor to: -- Insert callbacks after floating-point binary/unary operations -- Pass value size, type IDs, and opcodes to the runtime -- Filter to only instrument FP math operations diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json deleted file mode 100644 index c3131c363fded..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_config.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "configuration": { - "runtime_prefix": "__flop_counter_", - "runtime_prefix.description": "The runtime API prefix.", - "runtime_stubs_file": "rt", - "target_regex": "", - "target_regex.description": "Regular expression to be matched against the module target. Only targets that match this regex will be instrumented.", - "function_regex": "", - "function_regex.description": "Regular expression to be matched against a function name. Only functions that match this regex will be instrumented.", - "demangle_function_names": true, - "demangle_function_names.description": "Demangle functions names passed to the runtime.", - "host_enabled": true, - "host_enabled.description": "Instrument non-GPU targets", - "gpu_enabled": true, - "gpu_enabled.description": "Instrument GPU targets" - }, - "instruction_post": { - "numeric": { - "enabled": true, - "filter": "type_id < 7 || ((type_id == 17 || type_id == 18) && sub_type_id < 7)", - "filter.description": "Static property filter to exclude instrumentation.", - "type_id": true, - "type_id.description": "The operation's type id.", - "sub_type_id": true, - "sub_type_id.description": "The operation's type id.", - "size": true, - "size.description": "The operation's type size.", - "opcode": true, - "opcode.description": "The instruction opcode." - } - } -} diff --git a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp b/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp deleted file mode 100644 index 9eaa2d807838e..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/flop-counter/flop_counter_runtime.cpp +++ /dev/null @@ -1,164 +0,0 @@ -//===-- flop_counter_runtime.cpp - FLOP Counter Runtime ------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file implements the runtime for counting floating-point operations. -// It hooks into instrumentation points inserted by the LLVM Instrumentor pass. -// -//===----------------------------------------------------------------------===// - -#include "../instrumentor_runtime.h" - -#include -#include -#include -#include -#include -#include - -namespace { - -/// FLOP counter statistics (thread-safe using atomics) -struct FlopCounterStats { - std::atomic TotalFlops{0}; - std::atomic FloatOps{0}; // 32-bit float operations - std::atomic DoubleOps{0}; // 64-bit double operations - std::atomic ExtendedOps{ - 0}; // 80/128-bit extended precision operations - std::atomic VectorFlops{0}; // Total FLOPs from vector operations - std::atomic AddOps{0}; - std::atomic MulOps{0}; - std::atomic DivOps{0}; - std::atomic FmaOps{0}; // Fused multiply-add operations - std::atomic OtherOps{0}; // sqrt, sin, cos, etc. -}; - -// Global statistics counters -static FlopCounterStats *Stats = nullptr; - -enum { - LLVMOpcodeFAdd = 15, - LLVMOpcodeFSub = 17, - LLVMOpcodeFMul = 19, - LLVMOpcodeFDiv = 22, - LLVMOpcodeFRem = 25, - LLVMOpcodeFNeg = 13, -}; - -} // namespace - -extern "C" { - -__attribute__((constructor(1000))) void __flop_counter_initialize() { - Stats = new FlopCounterStats(); -} - -__attribute__((destructor(1000))) void __flop_counter_finalize() { - std::printf("\n"); - std::printf("=================================================\n"); - std::printf(" FLOP Counter Statistics\n"); - std::printf("=================================================\n"); - std::printf("Total FLOPs: %20llu\n", - Stats->TotalFlops.load(std::memory_order_relaxed)); - std::printf("\n"); - std::printf("By Precision:\n"); - std::printf(" Single (float): %20llu\n", - Stats->FloatOps.load(std::memory_order_relaxed)); - std::printf(" Double (double): %20llu\n", - Stats->DoubleOps.load(std::memory_order_relaxed)); - std::printf(" Extended (fp80/fp128): %20llu\n", - Stats->ExtendedOps.load(std::memory_order_relaxed)); - std::printf(" Vector FLOPs: %20llu\n", - Stats->VectorFlops.load(std::memory_order_relaxed)); - std::printf("\n"); - std::printf("By Operation:\n"); - std::printf(" Addition/Subtraction: %20llu\n", - Stats->AddOps.load(std::memory_order_relaxed)); - std::printf(" Multiplication: %20llu\n", - Stats->MulOps.load(std::memory_order_relaxed)); - std::printf(" Division: %20llu\n", - Stats->DivOps.load(std::memory_order_relaxed)); - std::printf(" Fused Multiply-Add: %20llu\n", - Stats->FmaOps.load(std::memory_order_relaxed)); - std::printf(" Other (sqrt, sin, ...): %20llu\n", - Stats->OtherOps.load(std::memory_order_relaxed)); - std::printf("=================================================\n"); - - delete Stats; -} - -void __flop_counter_post_numeric(int32_t TypeId, int32_t SubTypeId, - int32_t Size, int32_t Opcode) { - bool IsVector = false; - switch (TypeId) { - case FixedVectorTyID: - case ScalableVectorTyID: - IsVector = true; - TypeId = SubTypeId; - break; - default: - break; - }; - - int32_t TypeSize = Size; - switch (TypeId) { - case HalfTyID: - case BFloatTyID: - TypeSize = 2; - break; - case FloatTyID: - TypeSize = 4; - break; - case DoubleTyID: - TypeSize = 8; - break; - case X86_FP80TyID: - case FP128TyID: - case PPC_FP128TyID: - TypeSize = 16; - break; - default: - break; - }; - - // Determine FLOP count based on whether it's a vector operation - uint64_t FlopCount = Size / TypeSize; - if (IsVector) { - Stats->VectorFlops.fetch_add(FlopCount, std::memory_order_relaxed); - } else { - // Categorize by precision - if (TypeId == 2) { - Stats->FloatOps.fetch_add(1, std::memory_order_relaxed); - } else if (TypeId == 3) { - Stats->DoubleOps.fetch_add(1, std::memory_order_relaxed); - } else { - Stats->ExtendedOps.fetch_add(1, std::memory_order_relaxed); - } - } - - // Categorize by operation type - switch (Opcode) { - case LLVMOpcodeFAdd: - case LLVMOpcodeFSub: - Stats->AddOps.fetch_add(FlopCount, std::memory_order_relaxed); - break; - case LLVMOpcodeFMul: - Stats->MulOps.fetch_add(FlopCount, std::memory_order_relaxed); - break; - case LLVMOpcodeFDiv: - case LLVMOpcodeFRem: - Stats->DivOps.fetch_add(FlopCount, std::memory_order_relaxed); - break; - default: - Stats->OtherOps.fetch_add(FlopCount, std::memory_order_relaxed); - break; - } - - Stats->TotalFlops.fetch_add(FlopCount, std::memory_order_relaxed); -} - -} // extern "C" diff --git a/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h b/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h deleted file mode 100644 index 641096f2c0e22..0000000000000 --- a/compiler-rt/lib/instrumentor-tools/instrumentor_runtime.h +++ /dev/null @@ -1,293 +0,0 @@ -//===-- Instrumentor Runtime Helper Header -------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This header provides helper structures and functions for reading data -// generated by the LLVM Instrumentor pass and passed to runtime functions. -// -//===----------------------------------------------------------------------===// - -#ifndef INSTRUMENTOR_RUNTIME_H -#define INSTRUMENTOR_RUNTIME_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include - -#ifdef __cplusplus -} -#endif - -/// Header for each value in a value pack. Value packs are used to pass function -/// arguments and other variable-length data to the runtime. The format is: -/// [ValueHeader][Padding][Value Data] -/// where padding aligns the value data to 8-byte boundaries. -typedef struct { - uint32_t size; // Size of the value in bytes - uint32_t type_id; // LLVM Type::TypeID of the value -} ValuePackHeader; - -/// Iterator for reading values from a value pack. -typedef struct { - const char *current; // Current position in the pack - uint64_t offset; // Byte offset from the start - uint32_t count; // Number of elements in the pack - uint32_t index; // Current element index -} ValuePackIterator; - -/// Initialize a value pack iterator. -/// \param iter The iterator to initialize -/// \param pack_ptr Pointer to the start of the value pack -/// \param num_elements Number of elements in the pack -static inline void initValuePackIterator(ValuePackIterator *iter, - const void *pack_ptr, - uint32_t num_elements) { - iter->current = (const char *)pack_ptr; - iter->offset = 0; - iter->count = num_elements; - iter->index = 0; -} - -/// Get the header for the current value. -static inline ValuePackHeader -getValuePackHeader(const ValuePackIterator *iter) { - const ValuePackHeader *header = (const ValuePackHeader *)iter->current; - return *header; -} - -/// Get a pointer to the current value data. -static inline const void *getValuePackData(const ValuePackIterator *iter) { - // Skip header (8 bytes: size + type_id) - const char *data_start = iter->current + sizeof(ValuePackHeader); - // Calculate padding for 8-byte alignment - ValuePackHeader header = getValuePackHeader(iter); - uint32_t padding = (8 - (header.size % 8)) % 8; - // Skip padding - return data_start + padding; -} - -/// Move to the next value in the pack. -static inline void nextValuePack(ValuePackIterator *iter) { - if (iter->index >= iter->count) { - iter->current = NULL; - return; - } - ValuePackHeader header = getValuePackHeader(iter); - uint32_t padding = (8 - (header.size % 8)) % 8; - uint64_t advance = sizeof(ValuePackHeader) + padding + header.size; - iter->current += advance; - iter->offset += advance; - iter->index++; -} - -/// Get the current offset in bytes from the start of the pack. -static inline uint64_t getValuePackOffset(const ValuePackIterator *iter) { - return iter->offset; -} - -/// Extract a specific value from a value pack by index. -/// -/// \param pack_ptr Pointer to the start of the value pack -/// \param num_elements Number of elements in the pack -/// \param index Zero-based index of the value to extract -/// \param header Output parameter for the value header (can be NULL) -/// \return Pointer to the value data, or NULL if index is out of bounds -static inline const void *getValuePackEntry(const void *pack_ptr, - uint32_t num_elements, - uint32_t index, - ValuePackHeader *header) { - if (!pack_ptr || index >= num_elements) - return NULL; - - ValuePackIterator iter; - initValuePackIterator(&iter, pack_ptr, num_elements); - - while (iter.current != NULL && iter.index < iter.count) { - ValuePackHeader h = getValuePackHeader(&iter); - if (iter.index == index) { - if (header) - *header = h; - return getValuePackData(&iter); - } - nextValuePack(&iter); - } - - return NULL; // Index out of bounds -} - -/// LLVM Type IDs for interpreting value pack data. -/// These correspond to llvm::Type::TypeID enum values. -enum LLVMTypeID { - HalfTyID = 0, ///< 16-bit floating point type - BFloatTyID, ///< 16-bit floating point type (7-bit significand) - FloatTyID, ///< 32-bit floating point type - DoubleTyID, ///< 64-bit floating point type - X86_FP80TyID, ///< 80-bit floating point type (X87) - FP128TyID, ///< 128-bit floating point type (112-bit significand) - PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC) - VoidTyID, ///< type with no size - LabelTyID, ///< Labels - MetadataTyID, ///< Metadata - X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific) - TokenTyID, ///< Tokens - // Derived types... see DerivedTypes.h file. - IntegerTyID, ///< Arbitrary bit width integers - ByteTyID, ///< Arbitrary bit width bytes - FunctionTyID, ///< Functions - PointerTyID, ///< Pointers - StructTyID, ///< Structures - ArrayTyID, ///< Arrays - FixedVectorTyID, ///< Fixed width SIMD vector type - ScalableVectorTyID, ///< Scalable SIMD vector type - TypedPointerTyID, ///< Typed pointer used by some GPU targets - TargetExtTyID, ///< Target extension type -}; - -/// Get the string name of an LLVM Type ID. -static inline const char *getLLVMTypeIDName(uint32_t type_id) { - switch (type_id) { - case HalfTyID: - return "half"; - case BFloatTyID: - return "bfloat"; - case FloatTyID: - return "float"; - case DoubleTyID: - return "double"; - case X86_FP80TyID: - return "x86_fp80"; - case FP128TyID: - return "fp128"; - case PPC_FP128TyID: - return "ppc_fp128"; - case VoidTyID: - return "void"; - case LabelTyID: - return "label"; - case MetadataTyID: - return "metadata"; - case X86_AMXTyID: - return "x86_amx"; - case TokenTyID: - return "token"; - case IntegerTyID: - return "integer"; - case ByteTyID: - return "integer"; - case FunctionTyID: - return "function"; - case PointerTyID: - return "pointer"; - case StructTyID: - return "struct"; - case ArrayTyID: - return "array"; - case FixedVectorTyID: - return "fixed_vector"; - case ScalableVectorTyID: - return "scalable_vector"; - case TypedPointerTyID: - return "typed_pointer"; - case TargetExtTyID: - return "target_ext"; - default: - return "unknown"; - } -} - -#ifdef __cplusplus - -// C++ overlays for range-based iteration and quality of life improvements - -/// Range wrapper for value packs enabling range-based for loops. -/// Example: -/// for (auto val : ValuePackRange(pack_ptr, num_elements)) { -/// // val provides access to header and data -/// } -class ValuePackRange { -public: - struct ValueRef { - ValuePackHeader header; - const void *data; - - uint32_t type_id() const { return header.type_id; } - uint32_t size() const { return header.size; } - const char *type_name() const { return getLLVMTypeIDName(header.type_id); } - - template const T &as() const { - return *static_cast(data); - } - template const T *ptr() const { - return static_cast(data); - } - }; - - class iterator { - public: - iterator(const void *ptr, uint32_t num_elements, uint64_t max_offset) - : max_offset_(max_offset) { - initValuePackIterator(&iter_, ptr, num_elements); - if (ptr && !is_valid_position()) - iter_.current = nullptr; - } - - ValueRef operator*() const { - return ValueRef{getValuePackHeader(&iter_), getValuePackData(&iter_)}; - } - - iterator &operator++() { - nextValuePack(&iter_); - if (!is_valid_position()) - iter_.current = nullptr; - return *this; - } - - bool operator!=(const iterator &other) const { - return iter_.current != other.iter_.current; - } - - private: - bool is_valid_position() const { - if (!iter_.current) - return false; - if (iter_.index >= iter_.count) - return false; - if (max_offset_ > 0 && iter_.offset >= max_offset_) - return false; - return true; - } - - ValuePackIterator iter_; - uint64_t max_offset_; - }; - - ValuePackRange(const void *ptr, uint32_t num_elements, uint64_t max_size = 0) - : ptr_(ptr), num_elements_(num_elements), max_size_(max_size) {} - - iterator begin() const { return iterator(ptr_, num_elements_, max_size_); } - iterator end() const { return iterator(nullptr, 0, 0); } - -private: - const void *ptr_; - uint32_t num_elements_; - uint64_t max_size_; -}; - -/// Template helper to extract a typed value from a value pack by index. -template -inline const T *getValueAs(const void *pack_ptr, uint32_t num_elements, - uint32_t index) { - return static_cast( - getValuePackEntry(pack_ptr, num_elements, index, nullptr)); -} - -#endif // __cplusplus - -#endif // INSTRUMENTOR_RUNTIME_H diff --git a/compiler-rt/test/CMakeLists.txt b/compiler-rt/test/CMakeLists.txt index dae3e72711a18..3fab82518e75f 100644 --- a/compiler-rt/test/CMakeLists.txt +++ b/compiler-rt/test/CMakeLists.txt @@ -60,7 +60,7 @@ umbrella_lit_testsuite_begin(check-compiler-rt) set(COMPILER_RT_KNOWN_TEST_SUITES builtins;ctx_profile;fuzzer;interception;lsan;memprof;metadata ;orc;profile;sanitizer_common;shadowcallstack - ;ubsan;xray;instrumentor-tools) + ;ubsan;xray) list(APPEND COMPILER_RT_KNOWN_TEST_SUITES ${ALL_SANITIZERS}) list(REMOVE_DUPLICATES COMPILER_RT_KNOWN_TEST_SUITES) # Sort the list so that's easier to read when emitting errors. @@ -170,8 +170,6 @@ if(COMPILER_RT_CAN_EXECUTE_TESTS) # ShadowCallStack does not yet provide a runtime with compiler-rt, the tests # include their own minimal runtime compiler_rt_test_runtime(shadowcallstack NO_COMPILER_RT_HAS_GUARD) - - compiler_rt_test_runtime(instrumentor-tools NO_COMPILER_RT_HAS_GUARD) endif() # Now that we've traversed all the directories and know all the lit testsuites, diff --git a/compiler-rt/test/instrumentor-tools/CMakeLists.txt b/compiler-rt/test/instrumentor-tools/CMakeLists.txt deleted file mode 100644 index cda35a017fed5..0000000000000 --- a/compiler-rt/test/instrumentor-tools/CMakeLists.txt +++ /dev/null @@ -1,54 +0,0 @@ -set(INSTRUMENTOR_LIT_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) -set(INSTRUMENTOR_LIT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) - -# Debug: Print all relevant variables -set(INSTRUMENTOR_TESTSUITES) -set(INSTRUMENTOR_TEST_DEPS ${SANITIZER_COMMON_LIT_TEST_DEPS}) -list(APPEND INSTRUMENTOR_TEST_DEPS flop-counter) - -# Check if INSTRUMENTOR_SUPPORTED_ARCH is defined -if(NOT DEFINED INSTRUMENTOR_SUPPORTED_ARCH) - message(STATUS "INSTRUMENTOR_SUPPORTED_ARCH is NOT DEFINED - using fallback") - # Use the same architectures as FLOP counter - if(APPLE) - set(INSTRUMENTOR_SUPPORTED_ARCH ${DARWIN_osx_ARCHS}) - else() - set(INSTRUMENTOR_SUPPORTED_ARCH ${COMPILER_RT_DEFAULT_TARGET_ARCH}) - endif() -endif() - -set(INSTRUMENTOR_TEST_ARCH ${INSTRUMENTOR_SUPPORTED_ARCH}) -if(APPLE) - darwin_filter_host_archs(INSTRUMENTOR_SUPPORTED_ARCH INSTRUMENTOR_TEST_ARCH) -endif() - - -macro(add_instrumentor_testsuite test_mode sanitizer arch) - set(INSTRUMENTOR_LIT_TEST_MODE "${test_mode}") - set(CONFIG_NAME ${test_mode}-${arch}) - - set(INSTRUMENTOR_TEST_TARGET_ARCH ${arch}) - get_test_cc_for_arch(${arch} INSTRUMENTOR_TEST_TARGET_CC INSTRUMENTOR_TEST_TARGET_CFLAGS) - - configure_lit_site_cfg( - ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.py.in - ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}/lit.site.cfg.py) - list(APPEND INSTRUMENTOR_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/${CONFIG_NAME}) - -endmacro() - -if(INSTRUMENTOR_TEST_ARCH) - foreach(arch ${INSTRUMENTOR_TEST_ARCH}) - add_instrumentor_testsuite("InstrumentorTools" instrumentor-tools ${arch}) - endforeach() -else() - message(WARNING "No architectures configured for instrumentor-tools tests!") -endif() - -if(INSTRUMENTOR_TESTSUITES) - add_lit_testsuite(check-instrumentor-tools "Running the instrumentor-tools tests" - ${INSTRUMENTOR_TESTSUITES} - DEPENDS ${INSTRUMENTOR_TEST_DEPS}) -else() - message(WARNING "No test suites configured for instrumentor-tools!") -endif() diff --git a/compiler-rt/test/instrumentor-tools/lit.cfg.py b/compiler-rt/test/instrumentor-tools/lit.cfg.py deleted file mode 100644 index 00f9e120168d7..0000000000000 --- a/compiler-rt/test/instrumentor-tools/lit.cfg.py +++ /dev/null @@ -1,75 +0,0 @@ -# -*- Python -*- - -import os - - -def get_required_attr(config, attr_name): - attr_value = getattr(config, attr_name, None) - if attr_value is None: - lit_config.fatal( - "No attribute %r in test configuration! You may need to run " - "tests from your build directory or add this attribute " - "to lit.site.cfg.py " % attr_name - ) - return attr_value - - -# Setup config name. -config.name = "InstrumentorTools-" + config.target_arch - -# Setup source root. -config.test_source_root = os.path.dirname(__file__) - -# Setup executable root. -if ( - hasattr(config, "instrumentor_lit_binary_dir") - and config.instrumentor_lit_binary_dir is not None -): - config.test_exec_root = os.path.join( - config.instrumentor_lit_binary_dir, config.name - ) - -# Test suffixes. -config.suffixes = [".c", ".cpp", ".m", ".mm", ".ll", ".test"] - -# What to exclude. -config.excludes = ["Inputs"] - -# Clang flags. -target_cflags = [get_required_attr(config, "target_cflags")] -clang_cflags = target_cflags -clang_cxxflags = config.cxx_mode_flags + clang_cflags - - -def build_invocation(compile_flags): - return " " + " ".join([config.clang] + compile_flags) + " " - - -def make_lib_name(name): - if config.target_os != "Darwin": - return "clang_rt.instrumentor_" + name - return "clang_rt.instrumentor_" + name + "_osx" - - -# Add clang substitutions. -config.substitutions.append(("%clang ", build_invocation(clang_cflags))) -config.substitutions.append(("%clangxx ", build_invocation(clang_cxxflags))) - -flop_counter_lib = make_lib_name("flop_counter") -config.substitutions.append(("%flop_counter_lib", flop_counter_lib)) - -config.substitutions.append(("%lib_dir", config.compiler_rt_libdir)) - -# Add path to instrumentor config files -config_dir = os.path.join( - config.test_source_root, "..", "..", "lib", "instrumentor-tools" -) -config.substitutions.append(("%config_dir", config_dir)) - -# Check if running on a supported platform -if config.target_os not in [ - "Darwin", - "Linux", - "FreeBSD", -]: - config.unsupported = True diff --git a/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in b/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in deleted file mode 100644 index 8474c796f8eeb..0000000000000 --- a/compiler-rt/test/instrumentor-tools/lit.site.cfg.py.in +++ /dev/null @@ -1,12 +0,0 @@ -@LIT_SITE_CFG_IN_HEADER@ - -# Tool-specific config options. -config.instrumentor_lit_binary_dir = "@INSTRUMENTOR_LIT_BINARY_DIR@" -config.target_cflags = "@INSTRUMENTOR_TEST_TARGET_CFLAGS@" -config.target_arch = "@INSTRUMENTOR_TEST_TARGET_ARCH@" - -# Load common config for all compiler-rt lit tests. -lit_config.load_config(config, "@COMPILER_RT_BINARY_DIR@/test/lit.common.configured") - -# Load tool-specific config that would do the real work. -lit_config.load_config(config, "@INSTRUMENTOR_LIT_SOURCE_DIR@/lit.cfg.py") diff --git a/compiler-rt/test/instrumentor-tools/simple_flops.c b/compiler-rt/test/instrumentor-tools/simple_flops.c deleted file mode 100644 index 9434cb506f602..0000000000000 --- a/compiler-rt/test/instrumentor-tools/simple_flops.c +++ /dev/null @@ -1,49 +0,0 @@ -// Test basic FLOP counting functionality -// -// This test verifies that the FLOP counter correctly counts floating-point -// operations in a simple program. -// -// RUN: %clangxx -O0 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t -// RUN: %t | FileCheck %s -// -// TODO: For the correct values we need to track fmuladd calls too. -// -// CHECK: Total FLOPs: 400 -// CHECK: Single (float): 100 -// CHECK: Double (double): 300 - -#include - -// Simple function with known FLOP count -float compute_float(float a, float b, float c) { - // 3 FLOPs: add, mul, add - return a + b * c; -} - -double compute_double(double a, double b) { - // 4 FLOPs: mul, mul, add, div - return (a * a + b * b) / 2.0; -} - -int main(void) { - float f1 = 1.0f, f2 = 2.0f, f3 = 3.0f; - double d1 = 4.0, d2 = 5.0; - - // Call functions multiple times to get meaningful counts - float result_f = 0.0f; - for (int i = 0; i < 100; i++) { - result_f += compute_float(f1, f2, f3); - } - - double result_d = 0.0; - for (int i = 0; i < 100; i++) { - result_d += compute_double(d1, d2); - } - - // Prevent optimization from removing the computations - if (result_f > 0.0f && result_d > 0.0) { - printf("Computation complete\n"); - } - - return 0; -} diff --git a/compiler-rt/test/instrumentor-tools/vector_flops.cpp b/compiler-rt/test/instrumentor-tools/vector_flops.cpp deleted file mode 100644 index db729e5886849..0000000000000 --- a/compiler-rt/test/instrumentor-tools/vector_flops.cpp +++ /dev/null @@ -1,46 +0,0 @@ -// Test FLOP counting with vector operations -// -// This test verifies that the FLOP counter correctly counts vector -// floating-point operations. -// -// RUN: %clangxx -O2 -g -mllvm -enable-instrumentor -mllvm -instrumentor-read-config-files=%config_dir/flop-counter/flop_counter_config.json %s -L%lib_dir -l%flop_counter_lib -o %t -// RUN: %t | FileCheck %s -// -// CHECK: Total FLOPs: -// CHECK: Vector FLOPs: - -#include -#include - -// Function using vector operations (if vectorized by the compiler) -void vector_compute(float *a, float *b, float *c, int n) { - for (int i = 0; i < n; i++) { - c[i] = std::sqrt(a[i] * a[i] + b[i] * b[i]); - } -} - -int main(void) { - const int N = 1000; - float a[N], b[N], c[N]; - - // Initialize arrays - for (int i = 0; i < N; i++) { - a[i] = (float)i; - b[i] = (float)(i + 1); - } - - // Compute - vector_compute(a, b, c, N); - - // Prevent optimization - float sum = 0.0f; - for (int i = 0; i < N; i++) { - sum += c[i]; - } - - if (sum > 0.0f) { - printf("Vector computation complete\n"); - } - - return 0; -} From 3a5054a307e264648d2295f1892dca9231ac83a6 Mon Sep 17 00:00:00 2001 From: Jan Korous Date: Wed, 24 Jun 2026 16:44:57 -0700 Subject: [PATCH 432/511] [clang][ssaf] Shorten directory name to ScalableStaticAnalysis (#204697) The directory name ScalableStaticAnalysisFramework produces build paths that exceed Windows' MAX_PATH limit (260 chars) on downstream CI bots. The clang-ssaf-format / clang-ssaf-linker tool names and SSAF-prefixed source filenames are unchanged. Assisted-By: Claude Opus 4.7 --------- Co-authored-by: Aviral Goel Co-authored-by: Aviral Goel --- .github/new-prs-labeler.yml | 8 ++-- .../developer-docs/ForceLinkerHeaders.rst | 16 +++---- .../developer-docs/HowToExtend.rst | 16 +++---- .../SummaryExtractionInternals.rst | 0 .../developer-docs/index.rst | 0 .../index.rst | 0 .../user-docs/SummaryExtraction.rst | 0 clang/docs/index.rst | 2 +- .../clang/Basic/DiagnosticFrontendKinds.td | 12 ++--- clang/include/clang/Basic/DiagnosticGroups.td | 2 +- .../Analyses/CallGraph/CallGraphSummary.h | 12 ++--- .../EntityPointerLevel/EntityPointerLevel.h | 8 ++-- .../EntityPointerLevelFormat.h | 12 ++--- .../Analyses/PointerFlow/PointerFlow.h | 10 ++-- .../PointerFlow/PointerFlowAnalysis.h | 14 +++--- .../Analyses/PointerFlow/PointerFlowFormat.h | 10 ++-- .../UnsafeBufferUsage/UnsafeBufferUsage.h | 12 ++--- .../UnsafeBufferUsageAnalysis.h | 14 +++--- .../BuiltinAnchorSources.def | 0 .../Core/ASTEntityMapping.h | 8 ++-- .../Core/EntityLinker/EntityLinker.h | 8 ++-- .../Core/EntityLinker/EntitySummaryEncoding.h | 8 ++-- .../Core/EntityLinker/LUSummary.h | 20 ++++---- .../Core/EntityLinker/LUSummaryEncoding.h | 18 +++---- .../Core/EntityLinker/TUSummaryEncoding.h | 18 +++---- .../Core/Model/BuildNamespace.h | 6 +-- .../Core/Model/EntityId.h | 6 +-- .../Core/Model/EntityIdTable.h | 10 ++-- .../Core/Model/EntityLinkage.h | 6 +-- .../Core/Model/EntityName.h | 8 ++-- .../Core/Model/PrivateFieldNames.def | 0 .../Core/Model/SummaryName.h | 6 +-- .../Core/Serialization/JSONFormat.h | 10 ++-- .../Core/Serialization/SerializationFormat.h | 26 +++++----- .../SerializationFormatRegistry.h | 10 ++-- .../Core/SummaryData/LUSummaryConsumer.h | 12 ++--- .../Core/SummaryData/SummaryData.h | 6 +-- .../Core/SummaryData/SummaryDataBuilder.h | 18 +++---- .../SummaryData/SummaryDataBuilderRegistry.h | 8 ++-- .../Core/SummaryData/SummaryDataStore.h | 14 +++--- .../Core/SummaryData/SummaryDataTraits.h | 8 ++-- .../Core/Support/ErrorBuilder.h | 6 +-- .../Core/Support/FormatProviders.h | 18 +++---- .../Core/TUSummary/EntitySummary.h | 8 ++-- .../Core/TUSummary/ExtractorRegistry.h | 10 ++-- .../Core/TUSummary/TUSummary.h | 20 ++++---- .../Core/TUSummary/TUSummaryBuilder.h | 12 ++--- .../Core/TUSummary/TUSummaryExtractor.h | 8 ++-- .../Core/WholeProgramAnalysis/AnalysisBase.h | 8 ++-- .../WholeProgramAnalysis/AnalysisDriver.h | 10 ++-- .../Core/WholeProgramAnalysis/AnalysisName.h | 6 +-- .../WholeProgramAnalysis/AnalysisRegistry.h | 14 +++--- .../WholeProgramAnalysis/AnalysisResult.h | 6 +-- .../WholeProgramAnalysis/AnalysisTraits.h | 8 ++-- .../WholeProgramAnalysis/DerivedAnalysis.h | 18 +++---- .../WholeProgramAnalysis/SummaryAnalysis.h | 20 ++++---- .../Core/WholeProgramAnalysis/WPASuite.h | 18 +++---- .../TUSummaryExtractorFrontendAction.h | 6 +-- .../SSAFBuiltinForceLinker.h | 8 ++-- .../SSAFForceLinker.h | 8 ++-- .../SourceTransformation/SourceEditEmitter.h | 6 +-- .../SourceTransformation/Transformation.h | 12 ++--- .../TransformationRegistry.h | 16 +++---- .../TransformationReportEmitter.h | 6 +-- .../Tool/Utils.h | 8 ++-- clang/include/module.modulemap | 8 ++-- clang/lib/CMakeLists.txt | 2 +- clang/lib/Driver/CMakeLists.txt | 8 ++-- clang/lib/Driver/Driver.cpp | 6 +-- clang/lib/FrontendTool/CMakeLists.txt | 8 ++-- .../ExecuteCompilerInvocation.cpp | 4 +- .../Analyses/CMakeLists.txt | 4 +- .../Analyses/CallGraph/CallGraphExtractor.cpp | 6 +-- .../CallGraph/CallGraphJSONFormat.cpp | 8 ++-- .../EntityPointerLevel/EntityPointerLevel.cpp | 4 +- .../EntityPointerLevelFormat.cpp | 6 +-- .../Analyses/PointerFlow/PointerFlow.cpp | 2 +- .../PointerFlow/PointerFlowAnalysis.cpp | 16 +++---- .../PointerFlow/PointerFlowExtractor.cpp | 14 +++--- .../PointerFlow/PointerFlowFormat.cpp | 8 ++-- .../Analyses/SSAFAnalysesCommon.cpp | 0 .../Analyses/SSAFAnalysesCommon.h | 12 ++--- .../UnsafeBufferUsage/UnsafeBufferUsage.cpp | 2 +- .../UnsafeBufferUsageAnalysis.cpp | 16 +++---- .../UnsafeBufferUsageExtractor.cpp | 10 ++-- .../UnsafeBufferUsageFormat.cpp | 8 ++-- .../CMakeLists.txt | 0 .../Core/ASTEntityMapping.cpp | 4 +- .../Core/CMakeLists.txt | 2 +- .../Core/EntityLinker/EntityLinker.cpp | 14 +++--- .../Core/Model/BuildNamespace.cpp | 2 +- .../Core/Model/EntityId.cpp | 2 +- .../Core/Model/EntityIdTable.cpp | 2 +- .../Core/Model/EntityLinkage.cpp | 2 +- .../Core/Model/EntityName.cpp | 2 +- .../Core/Model/SummaryName.cpp | 2 +- .../Core/ModelStringConversions.h | 10 ++-- .../Serialization/JSONFormat/Artifact.cpp | 0 .../JSONFormat/JSONEntitySummaryEncoding.cpp | 0 .../JSONFormat/JSONEntitySummaryEncoding.h | 10 ++-- .../JSONFormat/JSONFormatImpl.cpp | 2 +- .../Serialization/JSONFormat/JSONFormatImpl.h | 18 +++---- .../Serialization/JSONFormat/LUSummary.cpp | 2 +- .../JSONFormat/LUSummaryEncoding.cpp | 2 +- .../Serialization/JSONFormat/TUSummary.cpp | 2 +- .../JSONFormat/TUSummaryEncoding.cpp | 2 +- .../Serialization/JSONFormat/WPASuite.cpp | 2 +- .../SerializationFormatRegistry.cpp | 2 +- .../Core/SummaryData/LUSummaryConsumer.cpp | 6 +-- .../SummaryDataBuilderRegistry.cpp | 2 +- .../Core/Support/ErrorBuilder.cpp | 2 +- .../Core/TUSummary/ExtractorRegistry.cpp | 4 +- .../Core/TUSummary/TUSummaryBuilder.cpp | 10 ++-- .../Core/TUSummary/TUSummaryExtractor.cpp | 10 ++-- .../WholeProgramAnalysis/AnalysisDriver.cpp | 10 ++-- .../WholeProgramAnalysis/AnalysisName.cpp | 2 +- .../WholeProgramAnalysis/AnalysisRegistry.cpp | 4 +- .../Frontend/CMakeLists.txt | 6 +-- .../TUSummaryExtractorFrontendAction.cpp | 12 ++--- .../Plugins/CMakeLists.txt | 2 +- .../Plugins/ExamplePlugin/AnalysisResults.h | 6 +-- .../Plugins/ExamplePlugin/CMakeLists.txt | 2 +- .../Plugins/ExamplePlugin/PairsAnalysis.cpp | 10 ++-- .../Plugins/ExamplePlugin/TagsAnalysis.cpp | 10 ++-- .../ExamplePlugin/TagsPairsAnalysis.cpp | 6 +-- .../SourceTransformation/CMakeLists.txt | 4 +- .../TransformationRegistry.cpp | 2 +- .../Tool/CMakeLists.txt | 12 +++++ .../Tool/Utils.cpp | 2 +- .../Tool/CMakeLists.txt | 12 ----- .../tools/clang-ssaf-analyzer/CMakeLists.txt | 8 ++-- .../clang-ssaf-analyzer/SSAFAnalyzer.cpp | 10 ++-- clang/tools/clang-ssaf-format/CMakeLists.txt | 8 ++-- clang/tools/clang-ssaf-format/SSAFFormat.cpp | 12 ++--- clang/tools/clang-ssaf-linker/CMakeLists.txt | 8 ++-- clang/tools/clang-ssaf-linker/SSAFLinker.cpp | 12 ++--- clang/unittests/CMakeLists.txt | 2 +- .../ASTEntityMappingTest.cpp | 2 +- .../CallGraph/CallGraphExtractorTest.cpp | 10 ++-- .../Analyses/PointerFlow/PointerFlowTest.cpp | 10 ++-- .../PointerFlow/PointerFlowWPATest.cpp | 20 ++++---- .../UnsafeBufferUsageTest.cpp | 22 ++++----- .../UnsafeBufferUsageWPATest.cpp | 20 ++++---- .../BuildNamespaceTest.cpp | 4 +- .../CMakeLists.txt | 8 ++-- .../EntityIdTableTest.cpp | 8 ++-- .../EntityIdTest.cpp | 8 ++-- .../EntityLinkageTest.cpp | 4 +- .../EntityLinkerTest.cpp | 20 ++++---- .../EntityNameTest.cpp | 6 +-- .../ErrorBuilderTest.cpp | 2 +- .../FindDecl.h | 6 +-- .../TUSummaryExtractorFrontendActionTest.cpp | 27 +++++------ .../LUSummaryTest.cpp | 4 +- .../ModelStringConversionsTest.cpp | 2 +- .../Registries/FancyAnalysisData.cpp | 2 +- .../Registries/MockSerializationFormat.cpp | 18 +++---- .../Registries/MockSerializationFormat.h | 10 ++-- .../Registries/MockSummaryExtractor1.cpp | 4 +- .../Registries/MockSummaryExtractor2.cpp | 4 +- .../Registries/MockTUSummaryBuilder.h | 8 ++-- .../SerializationFormatRegistryTest.cpp | 4 +- .../SummaryExtractorRegistryTest.cpp | 4 +- .../JSONFormatTest/JSONFormatTest.cpp | 2 +- .../JSONFormatTest/JSONFormatTest.h | 6 +-- .../JSONFormatTest/LUSummaryTest.cpp | 6 +-- .../JSONFormatTest/TUSummaryTest.cpp | 6 +-- .../SourceTransformation/EmitterTest.cpp | 4 +- .../SourceTransformation/RegistryTest.cpp | 4 +- .../SummaryData/SummaryDataTest.cpp | 20 ++++---- .../SummaryNameTest.cpp | 4 +- .../TUSummaryBuilderTest.cpp | 20 ++++---- .../TestFixture.cpp | 10 ++-- .../ScalableStaticAnalysis/TestFixture.h | 47 +++++++++++++++++++ .../AnalysisDriverTest.cpp | 26 +++++----- .../UnsafeBufferReachableAnalysisTest.cpp | 24 +++++----- .../TestFixture.h | 47 ------------------- .../gn/secondary/clang/lib/Driver/BUILD.gn | 8 ++-- .../secondary/clang/lib/FrontendTool/BUILD.gn | 8 ++-- .../Analyses/BUILD.gn | 4 +- .../Core/BUILD.gn | 2 +- .../Frontend/BUILD.gn | 4 +- .../SourceTransformation/BUILD.gn | 4 +- .../Tool/BUILD.gn | 4 +- .../clang/tools/clang-ssaf-analyzer/BUILD.gn | 8 ++-- .../clang/tools/clang-ssaf-format/BUILD.gn | 8 ++-- .../clang/tools/clang-ssaf-linker/BUILD.gn | 8 ++-- .../gn/secondary/clang/unittests/BUILD.gn | 2 +- .../BUILD.gn | 8 ++-- .../llvm-project-overlay/clang/BUILD.bazel | 34 +++++++------- .../clang/unittests/BUILD.bazel | 8 ++-- 191 files changed, 822 insertions(+), 825 deletions(-) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/developer-docs/ForceLinkerHeaders.rst (88%) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/developer-docs/HowToExtend.rst (91%) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/developer-docs/SummaryExtractionInternals.rst (100%) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/developer-docs/index.rst (100%) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/index.rst (100%) rename clang/docs/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/user-docs/SummaryExtraction.rst (100%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/CallGraph/CallGraphSummary.h (74%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/EntityPointerLevel/EntityPointerLevel.h (92%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h (75%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlow.h (79%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowAnalysis.h (69%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowFormat.h (72%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h (74%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h (74%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/BuiltinAnchorSources.def (100%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/ASTEntityMapping.h (82%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/EntityLinker.h (91%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/EntitySummaryEncoding.h (79%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/LUSummary.h (70%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/LUSummaryEncoding.h (71%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/TUSummaryEncoding.h (73%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/BuildNamespace.h (95%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityId.h (88%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityIdTable.h (79%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityLinkage.h (87%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityName.h (87%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/PrivateFieldNames.def (100%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/SummaryName.h (84%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat.h (96%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/SerializationFormat.h (91%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/SerializationFormatRegistry.h (86%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/LUSummaryConsumer.h (88%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryData.h (76%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryDataBuilder.h (83%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryDataBuilderRegistry.h (86%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryDataStore.h (86%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryDataTraits.h (80%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Support/ErrorBuilder.h (96%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Support/FormatProviders.h (78%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/EntitySummary.h (69%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/ExtractorRegistry.h (81%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/TUSummary.h (63%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/TUSummaryBuilder.h (85%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/TUSummaryExtractor.h (80%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisBase.h (82%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisDriver.h (88%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisName.h (85%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisRegistry.h (85%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisResult.h (75%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisTraits.h (75%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/DerivedAnalysis.h (86%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/SummaryAnalysis.h (83%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/WPASuite.h (81%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Frontend/TUSummaryExtractorFrontendAction.h (78%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SSAFBuiltinForceLinker.h (81%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SSAFForceLinker.h (76%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/SourceEditEmitter.h (72%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/Transformation.h (65%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/TransformationRegistry.h (74%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/TransformationReportEmitter.h (75%) rename clang/include/clang/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Tool/Utils.h (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/CMakeLists.txt (85%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/CallGraph/CallGraphExtractor.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/CallGraph/CallGraphJSONFormat.cpp (95%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/EntityPointerLevel/EntityPointerLevel.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlow.cpp (88%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowAnalysis.cpp (84%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowExtractor.cpp (95%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowFormat.cpp (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/SSAFAnalysesCommon.cpp (100%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/SSAFAnalysesCommon.h (91%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp (89%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp (91%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp (87%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp (88%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/CMakeLists.txt (100%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/ASTEntityMapping.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/CMakeLists.txt (94%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/EntityLinker/EntityLinker.cpp (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/BuildNamespace.cpp (96%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityId.cpp (88%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityIdTable.cpp (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityLinkage.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/EntityName.cpp (94%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Model/SummaryName.cpp (88%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/ModelStringConversions.h (87%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/Artifact.cpp (100%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.cpp (100%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h (76%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/JSONFormatImpl.cpp (99%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/JSONFormatImpl.h (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/LUSummary.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/TUSummary.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/JSONFormat/WPASuite.cpp (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Serialization/SerializationFormatRegistry.cpp (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/LUSummaryConsumer.cpp (90%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/SummaryData/SummaryDataBuilderRegistry.cpp (91%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/Support/ErrorBuilder.cpp (96%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/ExtractorRegistry.cpp (88%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/TUSummaryBuilder.cpp (75%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/TUSummary/TUSummaryExtractor.cpp (87%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisDriver.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisName.cpp (86%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/WholeProgramAnalysis/AnalysisRegistry.cpp (89%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Frontend/CMakeLists.txt (51%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Frontend/TUSummaryExtractorFrontendAction.cpp (93%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/CMakeLists.txt (98%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/ExamplePlugin/AnalysisResults.h (81%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/ExamplePlugin/CMakeLists.txt (87%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/ExamplePlugin/PairsAnalysis.cpp (94%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/ExamplePlugin/TagsAnalysis.cpp (92%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp (94%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/CMakeLists.txt (52%) rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/TransformationRegistry.cpp (94%) create mode 100644 clang/lib/ScalableStaticAnalysis/Tool/CMakeLists.txt rename clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Tool/Utils.cpp (99%) delete mode 100644 clang/lib/ScalableStaticAnalysisFramework/Tool/CMakeLists.txt rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/ASTEntityMappingTest.cpp (99%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/CallGraph/CallGraphExtractorTest.cpp (97%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowTest.cpp (98%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/PointerFlow/PointerFlowWPATest.cpp (83%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp (96%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp (83%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/BuildNamespaceTest.cpp (96%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/CMakeLists.txt (88%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/EntityIdTableTest.cpp (91%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/EntityIdTest.cpp (89%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/EntityLinkageTest.cpp (95%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/EntityLinkerTest.cpp (97%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/EntityNameTest.cpp (92%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/ErrorBuilderTest.cpp (98%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/FindDecl.h (87%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Frontend/TUSummaryExtractorFrontendActionTest.cpp (95%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/LUSummaryTest.cpp (81%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/ModelStringConversionsTest.cpp (97%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/FancyAnalysisData.cpp (96%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/MockSerializationFormat.cpp (91%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/MockSerializationFormat.h (86%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/MockSummaryExtractor1.cpp (88%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/MockSummaryExtractor2.cpp (88%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/MockTUSummaryBuilder.h (69%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/SerializationFormatRegistryTest.cpp (95%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Registries/SummaryExtractorRegistryTest.cpp (96%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Serialization/JSONFormatTest/JSONFormatTest.cpp (99%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Serialization/JSONFormatTest/JSONFormatTest.h (95%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Serialization/JSONFormatTest/LUSummaryTest.cpp (98%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Serialization/JSONFormatTest/TUSummaryTest.cpp (98%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/EmitterTest.cpp (93%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/RegistryTest.cpp (92%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SummaryData/SummaryDataTest.cpp (94%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SummaryNameTest.cpp (93%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/TUSummaryBuilderTest.cpp (95%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/TestFixture.cpp (75%) create mode 100644 clang/unittests/ScalableStaticAnalysis/TestFixture.h rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/WholeProgramAnalysis/AnalysisDriverTest.cpp (94%) rename clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp (95%) delete mode 100644 clang/unittests/ScalableStaticAnalysisFramework/TestFixture.h rename llvm/utils/gn/secondary/clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Analyses/BUILD.gn (87%) rename llvm/utils/gn/secondary/clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Core/BUILD.gn (95%) rename llvm/utils/gn/secondary/clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Frontend/BUILD.gn (70%) rename llvm/utils/gn/secondary/clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/SourceTransformation/BUILD.gn (66%) rename llvm/utils/gn/secondary/clang/lib/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/Tool/BUILD.gn (60%) rename llvm/utils/gn/secondary/clang/unittests/{ScalableStaticAnalysisFramework => ScalableStaticAnalysis}/BUILD.gn (88%) diff --git a/.github/new-prs-labeler.yml b/.github/new-prs-labeler.yml index b51d2c197c762..3ef5afe8b1da5 100644 --- a/.github/new-prs-labeler.yml +++ b/.github/new-prs-labeler.yml @@ -698,11 +698,11 @@ clang:analysis: clang:ssaf: - changed-files: - any-glob-to-any-file: - - clang/docs/ScalableStaticAnalysisFramework/** - - clang/include/clang/ScalableStaticAnalysisFramework/** - - clang/lib/ScalableStaticAnalysisFramework/** + - clang/docs/ScalableStaticAnalysis/** + - clang/include/clang/ScalableStaticAnalysis/** + - clang/lib/ScalableStaticAnalysis/** - clang/test/Analysis/Scalable/** - - clang/unittests/ScalableStaticAnalysisFramework/** + - clang/unittests/ScalableStaticAnalysis/** clang:static analyzer: - changed-files: diff --git a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/ForceLinkerHeaders.rst b/clang/docs/ScalableStaticAnalysis/developer-docs/ForceLinkerHeaders.rst similarity index 88% rename from clang/docs/ScalableStaticAnalysisFramework/developer-docs/ForceLinkerHeaders.rst rename to clang/docs/ScalableStaticAnalysis/developer-docs/ForceLinkerHeaders.rst index fb113824217ea..d75855a8b8af3 100644 --- a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/ForceLinkerHeaders.rst +++ b/clang/docs/ScalableStaticAnalysis/developer-docs/ForceLinkerHeaders.rst @@ -46,7 +46,7 @@ For **in-tree** anchors, add a single ``ANCHOR(...)`` entry to .. code-block:: c++ - // In clang/include/clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def + // In clang/include/clang/ScalableStaticAnalysis/BuiltinAnchorSources.def ANCHOR(JSONFormatAnchorSource) ANCHOR(MyExtractorAnchorSource) // <-- Add here, in alphabetical order @@ -73,21 +73,21 @@ Header hierarchy SSAFForceLinker.h (umbrella — include this in binaries) └── SSAFBuiltinForceLinker.h (upstream built-in anchors only) -- ``clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h`` — anchors for +- ``clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h`` — anchors for upstream-provided (built-in) extractors and formats (e.g. ``JSONFormat``). -- ``clang/include/clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h`` — umbrella header +- ``clang/include/clang/ScalableStaticAnalysis/SSAFForceLinker.h`` — umbrella header that includes ``SSAFBuiltinForceLinker.h``. This is the header that downstream projects should modify to add their own force-linker includes (see :doc:`HowToExtend`). Include the umbrella header with ``// IWYU pragma: keep`` in any translation unit that must guarantee all registrations are active — typically the entry -point of a binary that uses ``clangScalableStaticAnalysisFrameworkCore``: +point of a binary that uses ``clangScalableStaticAnalysisCore``: .. code-block:: c++ // In ExecuteCompilerInvocation.cpp - #include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep + #include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep Naming convention ================= @@ -113,10 +113,10 @@ library, regardless of whether any symbols are referenced: .. code-block:: bash # GNU ld / lld (Linux, BSD) - -Wl,--whole-archive -lclangScalableStaticAnalysisFrameworkCore -Wl,--no-whole-archive + -Wl,--whole-archive -lclangScalableStaticAnalysisCore -Wl,--no-whole-archive # Apple ld - -Wl,-force_load,libclangScalableStaticAnalysisFrameworkCore.a + -Wl,-force_load,libclangScalableStaticAnalysisCore.a Since CMake 3.24, the ``$`` generator expression provides a portable way to do the same: @@ -124,7 +124,7 @@ provides a portable way to do the same: .. code-block:: cmake target_link_libraries(clang PRIVATE - "$") + "$") **Why we did not choose this approach**: diff --git a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/HowToExtend.rst b/clang/docs/ScalableStaticAnalysis/developer-docs/HowToExtend.rst similarity index 91% rename from clang/docs/ScalableStaticAnalysisFramework/developer-docs/HowToExtend.rst rename to clang/docs/ScalableStaticAnalysis/developer-docs/HowToExtend.rst index 4b5194f2b0a0b..de6800bd13e0a 100644 --- a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/HowToExtend.rst +++ b/clang/docs/ScalableStaticAnalysis/developer-docs/HowToExtend.rst @@ -10,7 +10,7 @@ SSAF is designed to be extensible with new **summary extractors** and **serializ Extensions can be added in three ways: #. **Statically, in-tree** — built as part of the upstream LLVM/Clang tree. -#. **Statically, out-of-tree (downstream)** — built in a downstream fork or project that links ``clangScalableStaticAnalysisFrameworkCore`` as a static library. +#. **Statically, out-of-tree (downstream)** — built in a downstream fork or project that links ``clangScalableStaticAnalysisCore`` as a static library. #. **Dynamically, via plugins** — loaded at runtime as shared objects. All three approaches use the same ``llvm::Registry``-based registration mechanism. @@ -28,7 +28,7 @@ Step 1: Implement the extractor .. code-block:: c++ //--- MyExtractor.h - #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" + #include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" namespace clang::ssaf { @@ -49,7 +49,7 @@ Step 2: Register the extractor //--- MyExtractor.cpp #include "MyExtractor.h" - #include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" + #include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" using namespace clang::ssaf; @@ -69,7 +69,7 @@ Step 3: Add the force-linker anchor See :doc:`ForceLinkerHeaders` for a full explanation of why this is needed. For **in-tree** additions, add one line to -``clang/include/clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def`` +``clang/include/clang/ScalableStaticAnalysis/BuiltinAnchorSources.def`` (in alphabetical order): .. code-block:: c++ @@ -96,7 +96,7 @@ Your format class must inherit from ``SerializationFormat`` and define a ``Forma .. code-block:: c++ //--- MyFormat.h - #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" + #include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" #include "clang/Support/Compiler.h" #include "llvm/Support/Registry.h" @@ -121,7 +121,7 @@ Step 2: Register the format //--- MyFormat.cpp #include "MyFormat.h" - #include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" + #include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" using namespace clang::ssaf; @@ -176,8 +176,8 @@ In-tree extensions For extensions that are part of the upstream LLVM/Clang tree: -#. Add the anchor to ``clang/include/clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def`` (in alphabetical order). -#. Add the source files to the ``clangScalableStaticAnalysisFrameworkCore`` CMake library target. +#. Add the anchor to ``clang/include/clang/ScalableStaticAnalysis/BuiltinAnchorSources.def`` (in alphabetical order). +#. Add the source files to the ``clangScalableStaticAnalysisCore`` CMake library target. #. That's it — the ``SSAFForceLinker.h`` umbrella includes ``SSAFBuiltinForceLinker.h`` transitively, so any binary that includes the umbrella will pull in the registration. diff --git a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/SummaryExtractionInternals.rst b/clang/docs/ScalableStaticAnalysis/developer-docs/SummaryExtractionInternals.rst similarity index 100% rename from clang/docs/ScalableStaticAnalysisFramework/developer-docs/SummaryExtractionInternals.rst rename to clang/docs/ScalableStaticAnalysis/developer-docs/SummaryExtractionInternals.rst diff --git a/clang/docs/ScalableStaticAnalysisFramework/developer-docs/index.rst b/clang/docs/ScalableStaticAnalysis/developer-docs/index.rst similarity index 100% rename from clang/docs/ScalableStaticAnalysisFramework/developer-docs/index.rst rename to clang/docs/ScalableStaticAnalysis/developer-docs/index.rst diff --git a/clang/docs/ScalableStaticAnalysisFramework/index.rst b/clang/docs/ScalableStaticAnalysis/index.rst similarity index 100% rename from clang/docs/ScalableStaticAnalysisFramework/index.rst rename to clang/docs/ScalableStaticAnalysis/index.rst diff --git a/clang/docs/ScalableStaticAnalysisFramework/user-docs/SummaryExtraction.rst b/clang/docs/ScalableStaticAnalysis/user-docs/SummaryExtraction.rst similarity index 100% rename from clang/docs/ScalableStaticAnalysisFramework/user-docs/SummaryExtraction.rst rename to clang/docs/ScalableStaticAnalysis/user-docs/SummaryExtraction.rst diff --git a/clang/docs/index.rst b/clang/docs/index.rst index 05bb7512fda92..b57e8178f3237 100644 --- a/clang/docs/index.rst +++ b/clang/docs/index.rst @@ -29,7 +29,7 @@ Using Clang as a Compiler ThreadSafetyAnalysis LifetimeSafety SafeBuffers - ScalableStaticAnalysisFramework/index + ScalableStaticAnalysis/index DataFlowAnalysisIntro FunctionEffectAnalysis AddressSanitizer diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index 058449ef47a46..d9e4858f5f648 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -409,30 +409,30 @@ def warn_profile_data_misexpect : Warning< def warn_ssaf_extract_tu_summary_file_unknown_output_format : Warning<"unknown output summary file format '%0' " "specified by '--ssaf-tu-summary-file=%1'">, - InGroup, DefaultError; + InGroup, DefaultError; def warn_ssaf_extract_tu_summary_file_unknown_format : Warning<"failed to parse the value of '--ssaf-tu-summary-file=%0' " "the value must follow the '.' pattern">, - InGroup, DefaultError; + InGroup, DefaultError; def warn_ssaf_must_enable_summary_extractors : Warning<"must enable some summary extractors using the " "'--ssaf-extract-summaries=' option">, - InGroup, DefaultError; + InGroup, DefaultError; def warn_ssaf_extract_summary_unknown_extractor_name : Warning<"no summary extractor%s0 %plural{1:was|:were}0 registered with name: %1">, - InGroup, DefaultError; + InGroup, DefaultError; def warn_ssaf_write_tu_summary_failed : Warning<"failed to write TU summary to '%0': %1">, - InGroup, DefaultError; + InGroup, DefaultError; def warn_ssaf_tu_summary_requires_compilation_unit_id : Warning<"option '--ssaf-tu-summary-file=' requires " "'--ssaf-compilation-unit-id=' to be set">, - InGroup, DefaultError; + InGroup, DefaultError; def err_extract_api_ignores_file_not_found : Error<"file '%0' specified by '--extract-api-ignores=' not found">, DefaultFatal; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 5cdebd35ba05d..6418ab4757a0c 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1972,7 +1972,7 @@ def BitIntExtension : DiagGroup<"bit-int-extension">; def ExtractAPIMisuse : DiagGroup<"extractapi-misuse">; // Warnings related to the "Scalable Static Analysis Framework" - SSAF. -def ScalableStaticAnalysisFramework : DiagGroup<"scalable-static-analysis-framework">; +def ScalableStaticAnalysis : DiagGroup<"scalable-static-analysis-framework">; // Warnings about using the non-standard extension having an explicit specialization // with a storage class specifier. diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h similarity index 74% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h index 8056b1001a216..e3a335384792a 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include namespace clang::ssaf { @@ -51,4 +51,4 @@ struct CallGraphSummary final : public EntitySummary { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_CALLGRAPH_CALLGRAPHSUMMARY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h similarity index 92% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h index 35eae269105f7..c296016258f8d 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H #include "clang/AST/Expr.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" #include namespace clang::ssaf { @@ -118,4 +118,4 @@ createEntityPointerLevel(const NamedDecl *ND, TUSummaryExtractor &Extractor, EntityPointerLevel incrementPointerLevel(const EntityPointerLevel &E); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVEL_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h similarity index 75% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h index bccd1a1dc6225..e650f3061181f 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/ADT/iterator_range.h" #include @@ -45,4 +45,4 @@ entityPointerLevelMapFromJSON(const llvm::json::Array &Content, JSONFormat::EntityIdFromJSONFn IdFromJSON); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_ENTITYPOINTERLEVEL_ENTITYPOINTERLEVELFORMAT_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h similarity index 79% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h index 2b5bcfc650486..abff90cc37662 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h @@ -11,11 +11,11 @@ // as directed graph edges (EdgeSet). // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOW_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOW_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOW_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOW_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" namespace clang::ssaf { @@ -49,4 +49,4 @@ class PointerFlowEntitySummary final : public EntitySummary { }; } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOW_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOW_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h similarity index 69% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h index 38cbd7d634895..645336d1633f0 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" #include "llvm/ADT/StringRef.h" #include @@ -43,4 +43,4 @@ struct PointerFlowAnalysisResult final : AnalysisResult { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWANALYSIS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.h similarity index 72% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.h index be68e92317beb..1f1d5e8210379 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.h @@ -10,11 +10,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/ADT/iterator_range.h" namespace clang::ssaf { @@ -34,4 +34,4 @@ edgeSetFromJSON(const llvm::json::Array &EdgesData, } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_POINTERFLOW_POINTERFLOWFORMAT_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h similarity index 74% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h index 972ff6a21bb25..c84104c10db7e 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" namespace clang::ssaf { /// An UnsafeBufferUsageEntitySummary contains a set of EntityPointerLevels @@ -47,4 +47,4 @@ class UnsafeBufferUsageEntitySummary final : public EntitySummary { }; } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h b/clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h similarity index 74% rename from clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h rename to clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h index da9115679bb2c..aa19c8a7e945e 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h +++ b/clang/include/clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h @@ -20,13 +20,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" #include "llvm/ADT/StringRef.h" #include @@ -59,4 +59,4 @@ struct UnsafeBufferReachableAnalysisResult final : AnalysisResult { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_UNSAFEBUFFERUSAGE_UNSAFEBUFFERUSAGEANALYSIS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def b/clang/include/clang/ScalableStaticAnalysis/BuiltinAnchorSources.def similarity index 100% rename from clang/include/clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def rename to clang/include/clang/ScalableStaticAnalysis/BuiltinAnchorSources.def diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h b/clang/include/clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h similarity index 82% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h rename to clang/include/clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h index 5057f8ac4505f..aa8ce469397a0 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ASTENTITYMAPPING_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ASTENTITYMAPPING_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ASTENTITYMAPPING_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ASTENTITYMAPPING_H #include "clang/AST/Decl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" #include "llvm/ADT/StringRef.h" #include @@ -44,4 +44,4 @@ std::optional getEntityNameForReturn(const FunctionDecl *FD); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ASTENTITYMAPPING_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ASTENTITYMAPPING_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h similarity index 91% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h rename to clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h index ed02560004632..f07def1a9b344 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYLINKER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYLINKER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYLINKER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYLINKER_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" #include "llvm/Support/Error.h" #include "llvm/TargetParser/Triple.h" #include @@ -92,4 +92,4 @@ class EntityLinker { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYLINKER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYLINKER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h similarity index 79% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h rename to clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h index 461fd74c8cafe..9442ecbbe24cc 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h @@ -11,10 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" #include "llvm/Support/Error.h" #include @@ -39,4 +39,4 @@ class EntitySummaryEncoding { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_ENTITYSUMMARYENCODING_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h similarity index 70% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h rename to clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h index 0e042f783c501..97142b0dd948a 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARY_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARY_H + +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -59,4 +59,4 @@ class LUSummary { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h similarity index 71% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h rename to clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h index f7fcd903b5c20..663697cdb88b3 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARYENCODING_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARYENCODING_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARYENCODING_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARYENCODING_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -61,4 +61,4 @@ class LUSummaryEncoding { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_LUSUMMARYENCODING_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_LUSUMMARYENCODING_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h similarity index 73% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h rename to clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h index 5cf3e8283874f..2c672a55e4873 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_TUSUMMARYENCODING_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_TUSUMMARYENCODING_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_TUSUMMARYENCODING_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_TUSUMMARYENCODING_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -64,4 +64,4 @@ class TUSummaryEncoding { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_ENTITYLINKER_TUSUMMARYENCODING_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_ENTITYLINKER_TUSUMMARYENCODING_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h similarity index 95% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h index 16c6fd6fcb4ef..6da5a9e42c4da 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h @@ -15,8 +15,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_BUILDNAMESPACE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_BUILDNAMESPACE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_BUILDNAMESPACE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_BUILDNAMESPACE_H #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" @@ -126,4 +126,4 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_BUILDNAMESPACE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_BUILDNAMESPACE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityId.h similarity index 88% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityId.h index 0da76e17c823e..e8e36119927c8 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityId.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYID_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYID_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYID_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYID_H #include "llvm/Support/raw_ostream.h" #include @@ -51,4 +51,4 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const EntityId &Id); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYID_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYID_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h similarity index 79% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h index d74b871df3e16..316a1f36369ec 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYIDTABLE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYIDTABLE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYIDTABLE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYIDTABLE_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" #include #include @@ -50,4 +50,4 @@ class EntityIdTable { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYIDTABLE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYIDTABLE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h similarity index 87% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h index 2efddffd9a29c..f90571c672452 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYLINKAGE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYLINKAGE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYLINKAGE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYLINKAGE_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -50,4 +50,4 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYLINKAGE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYLINKAGE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityName.h similarity index 87% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityName.h index 853ac96209a86..ecc402e05d4b8 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/EntityName.h @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYNAME_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYNAME_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYNAME_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYNAME_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -59,4 +59,4 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const EntityName &EN); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_ENTITYNAME_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_ENTITYNAME_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/PrivateFieldNames.def b/clang/include/clang/ScalableStaticAnalysis/Core/Model/PrivateFieldNames.def similarity index 100% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/PrivateFieldNames.def rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/PrivateFieldNames.def diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h b/clang/include/clang/ScalableStaticAnalysis/Core/Model/SummaryName.h similarity index 84% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Model/SummaryName.h index 99b5308e61a4e..d299efc6fc638 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Model/SummaryName.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_SUMMARYNAME_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_SUMMARYNAME_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_SUMMARYNAME_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_SUMMARYNAME_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -38,4 +38,4 @@ class SummaryName { llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SummaryName &SN); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODEL_SUMMARYNAME_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_MODEL_SUMMARYNAME_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h similarity index 96% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h index 7841240319af6..9d9b1ff0bbb51 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h @@ -10,11 +10,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_JSONFORMAT_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_JSONFORMAT_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/JSON.h" @@ -265,4 +265,4 @@ LLVM_DECLARE_REGISTRY(llvm::Registry) LLVM_DECLARE_REGISTRY( llvm::Registry) -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_JSONFORMAT_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h similarity index 91% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h index 621182a352bc7..ca3ee9ce3362c 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H - -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H + +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -118,7 +118,7 @@ class SerializationFormat { #define FIELD(CLASS, FIELD_NAME) \ static const auto &get##FIELD_NAME(const CLASS &X) { return X.FIELD_NAME; } \ static auto &get##FIELD_NAME(CLASS &X) { return X.FIELD_NAME; } -#include "clang/ScalableStaticAnalysisFramework/Core/Model/PrivateFieldNames.def" +#include "clang/ScalableStaticAnalysis/Core/Model/PrivateFieldNames.def" /// Per-format plugin registry for analysis result (de)serializers. /// @@ -263,4 +263,4 @@ template struct FormatInfoEntry { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMAT_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h similarity index 86% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h index 664e9c6245546..bcb926903d664 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h @@ -50,16 +50,16 @@ // ); // // Finally, extend the `AnchorSources` list in the force-linker header: -// clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h: +// clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h: // // This anchor is used to force the linker to link the MyFormat registration. // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Registry.h" @@ -86,4 +86,4 @@ using SerializationFormatRegistry = llvm::Registry; LLVM_DECLARE_REGISTRY(clang::ssaf::SerializationFormatRegistry) -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SERIALIZATION_SERIALIZATIONFORMATREGISTRY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.h similarity index 88% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.h index b26e5fadc58c2..c0f0308533a46 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.h @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataStore.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataStore.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Error.h" #include @@ -90,4 +90,4 @@ class LUSummaryConsumer final { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_LUSUMMARYCONSUMER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryData.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryData.h similarity index 76% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryData.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryData.h index 83386a684181e..8c7f98622cc25 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryData.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryData.h @@ -12,8 +12,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATA_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATA_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATA_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATA_H namespace clang::ssaf { @@ -25,4 +25,4 @@ class SummaryData { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATA_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATA_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilder.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilder.h similarity index 83% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilder.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilder.h index 343e98d46ce39..071741e23b56c 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilder.h @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryData.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataTraits.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H + +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryData.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataTraits.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include namespace clang::ssaf { @@ -94,4 +94,4 @@ class SummaryDataBuilder : public SummaryDataBuilderBase { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h similarity index 86% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h index a153eff950534..f869985779b1a 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h @@ -19,10 +19,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilder.h" #include "llvm/Support/Registry.h" #include #include @@ -71,4 +71,4 @@ class SummaryDataBuilderRegistry { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATABUILDERREGISTRY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataStore.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataStore.h similarity index 86% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataStore.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataStore.h index 3261085f8d5a2..b0e71a4f1031f 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataStore.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataStore.h @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATASTORE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATASTORE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATASTORE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATASTORE_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryData.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataTraits.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryData.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataTraits.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" #include "llvm/Support/Error.h" #include #include @@ -112,4 +112,4 @@ class SummaryDataStore { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATASTORE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATASTORE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataTraits.h b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataTraits.h similarity index 80% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataTraits.h rename to clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataTraits.h index c6bf6b6fcd4bb..d08dd68935bac 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataTraits.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataTraits.h @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include namespace clang::ssaf { @@ -38,4 +38,4 @@ inline constexpr bool HasSummaryName_v = HasSummaryName::value; } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUMMARYDATA_SUMMARYDATATRAITS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h b/clang/include/clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h similarity index 96% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h index 2cce23766b026..2ad4d8e06eb90 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_ERRORBUILDER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_ERRORBUILDER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_ERRORBUILDER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_ERRORBUILDER_H #include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" @@ -207,4 +207,4 @@ class ErrorBuilder { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_ERRORBUILDER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_ERRORBUILDER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h b/clang/include/clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h similarity index 78% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h rename to clang/include/clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h index 437152d43f425..e4a7b54033924 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h @@ -11,15 +11,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_FORMATPROVIDERS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_FORMATPROVIDERS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_FORMATPROVIDERS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_FORMATPROVIDERS_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" #include "llvm/Support/FormatProviders.h" #include "llvm/Support/raw_ostream.h" @@ -90,4 +90,4 @@ template <> struct format_provider { } // namespace llvm -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_SUPPORT_FORMATPROVIDERS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_SUPPORT_FORMATPROVIDERS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h similarity index 69% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h rename to clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h index e6c8a4716eaf2..305abc05f19f3 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_ENTITYSUMMARY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_ENTITYSUMMARY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_ENTITYSUMMARY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_ENTITYSUMMARY_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include namespace clang::ssaf { @@ -27,4 +27,4 @@ using DerivesFromEntitySummary = } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_ENTITYSUMMARY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_ENTITYSUMMARY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h similarity index 81% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h rename to clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h index f0ef920a719c8..de0fd85f2a023 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h @@ -17,14 +17,14 @@ // X("MyExtractor", "My awesome extractor"); // // Finally, extend the `AnchorSources` list in the force-linker header: -// clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h: +// clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h: // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_EXTRACTORREGISTRY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_EXTRACTORREGISTRY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_EXTRACTORREGISTRY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_EXTRACTORREGISTRY_H -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Registry.h" @@ -53,4 +53,4 @@ using TUSummaryExtractorRegistry = LLVM_DECLARE_REGISTRY(clang::ssaf::TUSummaryExtractorRegistry) -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_EXTRACTORREGISTRY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_EXTRACTORREGISTRY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h similarity index 63% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h rename to clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h index c5cadce439ba9..87b814690fb99 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARY_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARY_H + +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -48,4 +48,4 @@ class TUSummary { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h similarity index 85% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h rename to clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h index 38bd60718ed9c..5132761ed6bb5 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYBUILDER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYBUILDER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYBUILDER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYBUILDER_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include #include @@ -78,4 +78,4 @@ TUSummaryBuilder::addSummary(EntityId Entity, } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYBUILDER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYBUILDER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h similarity index 80% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h rename to clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h index b943748873821..db2343959021b 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h @@ -6,12 +6,12 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H #include "clang/AST/ASTConsumer.h" #include "clang/AST/Decl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" #include namespace clang::ssaf { @@ -42,4 +42,4 @@ class TUSummaryExtractor : public ASTConsumer { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_TUSUMMARY_TUSUMMARYEXTRACTOR_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisBase.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisBase.h similarity index 82% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisBase.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisBase.h index 1a9a5de76ffab..638791f9b1646 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisBase.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisBase.h @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" #include namespace clang::ssaf { @@ -57,4 +57,4 @@ class AnalysisBase { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISBASE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h similarity index 88% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h index 156d8e806bd0f..148234f5c9649 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Error.h" #include @@ -92,4 +92,4 @@ class AnalysisDriver final { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISDRIVER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h similarity index 85% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h index 32f76e73b14e0..71a0a26b9853d 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" @@ -46,4 +46,4 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const AnalysisName &AN); } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISNAME_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h similarity index 85% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h index f3e0327227889..9672b62d1e05f 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h @@ -28,13 +28,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/Support/Error.h" #include "llvm/Support/Registry.h" #include @@ -108,4 +108,4 @@ class AnalysisRegistry { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISREGISTRY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h similarity index 75% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h index 07d1f0549a9ee..fc10a6f2707fb 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H namespace clang::ssaf { @@ -27,4 +27,4 @@ class AnalysisResult { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISRESULT_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h similarity index 75% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h index 78df3b35648c2..f4daf01b2660f 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h @@ -10,10 +10,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" #include namespace clang::ssaf { @@ -33,4 +33,4 @@ inline constexpr bool HasAnalysisName_v = HasAnalysisName::value; } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_ANALYSISTRAITS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h similarity index 86% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h index ff9470e0a2284..1fb33c83ed6b0 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisBase.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H + +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisBase.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h" #include "llvm/Support/Error.h" #include #include @@ -130,4 +130,4 @@ class DerivedAnalysis : public DerivedAnalysisBase { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_DERIVEDANALYSIS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h similarity index 83% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h index 5b7c2c9b6b8c2..cf3bec9bb3b5e 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h @@ -12,15 +12,15 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisBase.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H + +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisBase.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h" #include "llvm/Support/Error.h" #include @@ -118,4 +118,4 @@ class SummaryAnalysis : public SummaryAnalysisBase { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_SUMMARYANALYSIS_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h similarity index 81% rename from clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h rename to clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h index 8e7693b78d485..cdc74ded19443 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h +++ b/clang/include/clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H - -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisTraits.h" +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H + +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisTraits.h" #include "llvm/Support/Error.h" #include #include @@ -96,4 +96,4 @@ class WPASuite { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_CORE_WHOLEPROGRAMANALYSIS_WPASUITE_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h b/clang/include/clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h similarity index 78% rename from clang/include/clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h rename to clang/include/clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h index fe5d75149914e..300aab9560be2 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h +++ b/clang/include/clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H #include "clang/Frontend/FrontendAction.h" #include @@ -30,4 +30,4 @@ class TUSummaryExtractorFrontendAction final : public WrapperFrontendAction { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_FRONTEND_TUSUMMARYEXTRACTORFRONTENDACTION_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h b/clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h similarity index 81% rename from clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h rename to clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h index 354379645a8de..97783c05d8fd0 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h +++ b/clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h @@ -13,12 +13,12 @@ /// /// Include this header (with IWYU pragma: keep) in any translation unit that /// must guarantee these registrations are active — typically the entry point -/// of a binary that uses clangScalableStaticAnalysisFrameworkCore. +/// of a binary that uses clangScalableStaticAnalysisCore. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFBUILTINFORCELINKER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFBUILTINFORCELINKER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFBUILTINFORCELINKER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFBUILTINFORCELINKER_H namespace clang::ssaf { @@ -40,4 +40,4 @@ namespace clang::ssaf { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFBUILTINFORCELINKER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFBUILTINFORCELINKER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h b/clang/include/clang/ScalableStaticAnalysis/SSAFForceLinker.h similarity index 76% rename from clang/include/clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h rename to clang/include/clang/ScalableStaticAnalysis/SSAFForceLinker.h index 204a504c36435..bf0b74e3955f3 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h +++ b/clang/include/clang/ScalableStaticAnalysis/SSAFForceLinker.h @@ -13,13 +13,13 @@ /// /// Include this header (with IWYU pragma: keep) in any translation unit that /// must guarantee these registrations are active — typically the entry point -/// of a binary that uses clangScalableStaticAnalysisFrameworkCore. +/// of a binary that uses clangScalableStaticAnalysisCore. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFFORCELINKER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFFORCELINKER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFFORCELINKER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFFORCELINKER_H #include "SSAFBuiltinForceLinker.h" // IWYU pragma: keep -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SSAFFORCELINKER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SSAFFORCELINKER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h similarity index 72% rename from clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h rename to clang/include/clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h index e96c9846a35f2..160d30bfbc665 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h +++ b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H #include "clang/Tooling/Core/Replacement.h" @@ -26,4 +26,4 @@ class SourceEditEmitter { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_SOURCEEDITEMITTER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/Transformation.h b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/Transformation.h similarity index 65% rename from clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/Transformation.h rename to clang/include/clang/ScalableStaticAnalysis/SourceTransformation/Transformation.h index 5553b7796b1ad..fae222cda0e84 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/Transformation.h +++ b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/Transformation.h @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATION_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATION_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATION_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATION_H #include "clang/AST/ASTConsumer.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h" namespace clang::ssaf { @@ -35,4 +35,4 @@ class Transformation : public clang::ASTConsumer { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATION_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATION_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.h b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.h similarity index 74% rename from clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.h rename to clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.h index 231a810f60f26..97301d005e496 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.h +++ b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.h @@ -18,19 +18,19 @@ // // For a statically-linked transformation also extend the `AnchorSources` // list in -// clang/include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h +// clang/include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h // (plugin-loaded transformations do not need an anchor — the dynamic loader // runs every global ctor on load). // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/Transformation.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/Transformation.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Registry.h" @@ -63,4 +63,4 @@ using TransformationRegistry = LLVM_DECLARE_REGISTRY(clang::ssaf::TransformationRegistry) -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREGISTRY_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h similarity index 75% rename from clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h rename to clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h index de57dd5a3952b..2f0e47064cd1a 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h +++ b/clang/include/clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H #include "clang/Basic/Sarif.h" #include "clang/Basic/SourceLocation.h" @@ -30,4 +30,4 @@ class TransformationReportEmitter { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_SOURCETRANSFORMATION_TRANSFORMATIONREPORTEMITTER_H diff --git a/clang/include/clang/ScalableStaticAnalysisFramework/Tool/Utils.h b/clang/include/clang/ScalableStaticAnalysis/Tool/Utils.h similarity index 92% rename from clang/include/clang/ScalableStaticAnalysisFramework/Tool/Utils.h rename to clang/include/clang/ScalableStaticAnalysis/Tool/Utils.h index f7ee5c28ffe89..f02bb8152cfd1 100644 --- a/clang/include/clang/ScalableStaticAnalysisFramework/Tool/Utils.h +++ b/clang/include/clang/ScalableStaticAnalysis/Tool/Utils.h @@ -13,10 +13,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_TOOL_UTILS_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_TOOL_UTILS_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_TOOL_UTILS_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_TOOL_UTILS_H -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/CommandLine.h" @@ -92,4 +92,4 @@ struct FormatFile { } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_TOOL_UTILS_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_TOOL_UTILS_H diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap index d3f616b3fd636..b8a9184980259 100644 --- a/clang/include/module.modulemap +++ b/clang/include/module.modulemap @@ -147,12 +147,12 @@ module Clang_Options { requires cplusplus umbrella "clang/Options" module * { ex module Clang_Parse { requires cplusplus umbrella "clang/Parse" module * { export * } } module Clang_Rewrite { requires cplusplus umbrella "clang/Rewrite/Core" module * { export * } } module Clang_RewriteFrontend { requires cplusplus umbrella "clang/Rewrite/Frontend" module * { export * } } -module Clang_ScalableStaticAnalysisFramework { +module Clang_ScalableStaticAnalysis { requires cplusplus - umbrella "clang/ScalableStaticAnalysisFramework" + umbrella "clang/ScalableStaticAnalysis" - textual header "clang/ScalableStaticAnalysisFramework/BuiltinAnchorSources.def" - textual header "clang/ScalableStaticAnalysisFramework/Core/Model/PrivateFieldNames.def" + textual header "clang/ScalableStaticAnalysis/BuiltinAnchorSources.def" + textual header "clang/ScalableStaticAnalysis/Core/Model/PrivateFieldNames.def" module * { export * } } diff --git a/clang/lib/CMakeLists.txt b/clang/lib/CMakeLists.txt index d9d1e4630a416..dee31cb3f166e 100644 --- a/clang/lib/CMakeLists.txt +++ b/clang/lib/CMakeLists.txt @@ -23,7 +23,7 @@ add_subdirectory(DirectoryWatcher) add_subdirectory(Index) add_subdirectory(IndexSerialization) add_subdirectory(InstallAPI) -add_subdirectory(ScalableStaticAnalysisFramework) +add_subdirectory(ScalableStaticAnalysis) add_subdirectory(StaticAnalyzer) add_subdirectory(Format) if(CLANG_INCLUDE_TESTS) diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt index cd410899c450f..cba616bb32e92 100644 --- a/clang/lib/Driver/CMakeLists.txt +++ b/clang/lib/Driver/CMakeLists.txt @@ -112,10 +112,10 @@ add_clang_library(clangDriver clangBasic clangDependencyScanning clangFrontend - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkFrontend - clangScalableStaticAnalysisFrameworkSourceTransformation + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisFrontend + clangScalableStaticAnalysisSourceTransformation clangSerialization clangLex clangOptions diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index ea06235de5c1f..40953c0013f62 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -70,9 +70,9 @@ #include "clang/Driver/Types.h" #include "clang/Options/OptionUtils.h" #include "clang/Options/Options.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" diff --git a/clang/lib/FrontendTool/CMakeLists.txt b/clang/lib/FrontendTool/CMakeLists.txt index 543aa06090ec8..2e9ce83d25fe1 100644 --- a/clang/lib/FrontendTool/CMakeLists.txt +++ b/clang/lib/FrontendTool/CMakeLists.txt @@ -4,10 +4,10 @@ set(LLVM_LINK_COMPONENTS ) set(link_libs - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkFrontend - clangScalableStaticAnalysisFrameworkSourceTransformation + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisFrontend + clangScalableStaticAnalysisSourceTransformation clangBasic clangCodeGen clangDriver diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 997200619e599..a206770c8490f 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -24,8 +24,8 @@ #include "clang/FrontendTool/Utils.h" #include "clang/Options/Options.h" #include "clang/Rewrite/Frontend/FrontendActions.h" -#include "clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h" -#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep +#include "clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h" +#include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep #include "clang/StaticAnalyzer/Frontend/AnalyzerHelpFlags.h" #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" #include "llvm/Option/OptTable.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Analyses/CMakeLists.txt similarity index 85% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/Analyses/CMakeLists.txt index 2bd7c925fc5b9..dc72eb6d645d5 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/Analyses/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS TargetParser ) -add_clang_library(clangScalableStaticAnalysisFrameworkAnalyses +add_clang_library(clangScalableStaticAnalysisAnalyses CallGraph/CallGraphExtractor.cpp CallGraph/CallGraphJSONFormat.cpp EntityPointerLevel/EntityPointerLevel.cpp @@ -22,7 +22,7 @@ add_clang_library(clangScalableStaticAnalysisFrameworkAnalyses clangAST clangAnalysis clangBasic - clangScalableStaticAnalysisFrameworkCore + clangScalableStaticAnalysisCore DEPENDS ) diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractor.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractor.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractor.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractor.cpp index 6dfb66a283674..112a1666bea99 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractor.cpp @@ -13,9 +13,9 @@ #include "clang/Analysis/AnalysisDeclContext.h" #include "clang/Analysis/CallGraph.h" #include "clang/Basic/SourceManager.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "llvm/ADT/STLExtras.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphJSONFormat.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphJSONFormat.cpp similarity index 95% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphJSONFormat.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphJSONFormat.cpp index 6f2414280bfaa..3260fd3e4cb3a 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphJSONFormat.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphJSONFormat.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/JSON.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.cpp index 758009be1f49b..eb8dfdc5d9ace 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.cpp @@ -6,14 +6,14 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" #include "SSAFAnalysesCommon.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/Expr.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/StmtVisitor.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include using namespace clang; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp index 7de4a63aff5f0..db417195ceb0a 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" #include "SSAFAnalysesCommon.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.cpp similarity index 88% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.cpp index fe28d9ae4efce..9f90f562c1294 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" using namespace clang; using namespace ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.cpp similarity index 84% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.cpp index de8490601cccc..bb68bb4353536 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.cpp @@ -6,15 +6,15 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h" #include "SSAFAnalysesCommon.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Error.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowExtractor.cpp similarity index 95% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowExtractor.cpp index 870a398cda9f2..8961a90acaf81 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowExtractor.cpp @@ -15,13 +15,13 @@ #include "clang/AST/ExprCXX.h" #include "clang/AST/Stmt.h" #include "clang/AST/TypeBase.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/ADT/Sequence.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.cpp similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.cpp index 146a815e0e55d..ccad92be9b745 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowFormat.h" #include "SSAFAnalysesCommon.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/SSAFAnalysesCommon.cpp similarity index 100% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/SSAFAnalysesCommon.cpp diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h b/clang/lib/ScalableStaticAnalysis/Analyses/SSAFAnalysesCommon.h similarity index 91% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h rename to clang/lib/ScalableStaticAnalysis/Analyses/SSAFAnalysesCommon.h index db0697798077e..674ccfcc58313 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/SSAFAnalysesCommon.h +++ b/clang/lib/ScalableStaticAnalysis/Analyses/SSAFAnalysesCommon.h @@ -9,15 +9,15 @@ // Common code in SSAF analyses implementations // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_SSAFANALYSESCOMMON_H -#define LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_SSAFANALYSESCOMMON_H +#ifndef LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_SSAFANALYSESCOMMON_H +#define LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_SSAFANALYSESCOMMON_H #include "clang/AST/ASTContext.h" #include "clang/AST/ASTTypeTraits.h" #include "clang/AST/Decl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Debug.h" @@ -132,4 +132,4 @@ void extractAndAddSummaries(TUSummaryExtractor &Extractor, } // namespace clang::ssaf -#endif // LLVM_CLANG_SCALABLESTATICANALYSISFRAMEWORK_ANALYSES_SSAFANALYSESCOMMON_H +#endif // LLVM_CLANG_SCALABLESTATICANALYSIS_ANALYSES_SSAFANALYSESCOMMON_H diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp similarity index 89% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp index 86545762c0121..185992efe9977 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" using namespace clang; using namespace ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp similarity index 91% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp index 6ca8c5176d21b..f34645ce8449a 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.cpp @@ -11,15 +11,15 @@ // EntityPointerLevelSets //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" #include "SSAFAnalysesCommon.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp similarity index 87% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp index 1ba6bc7f8e7b6..24f49ef05e653 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageExtractor.cpp @@ -11,11 +11,11 @@ #include "clang/AST/ASTContext.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" #include "clang/Analysis/Analyses/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp similarity index 88% rename from clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp rename to clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp index ff6ab3886941d..5fafe0e92abfa 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp +++ b/clang/lib/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageFormat.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "SSAFAnalysesCommon.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevelFormat.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/CMakeLists.txt similarity index 100% rename from clang/lib/ScalableStaticAnalysisFramework/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/CMakeLists.txt diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.cpp b/clang/lib/ScalableStaticAnalysis/Core/ASTEntityMapping.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.cpp rename to clang/lib/ScalableStaticAnalysis/Core/ASTEntityMapping.cpp index cac27b2c3f278..1f4bf0da274ed 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/ASTEntityMapping.cpp @@ -10,9 +10,9 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" +#include "clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h" #include "clang/AST/Decl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" #include "clang/UnifiedSymbolResolution/USRGeneration.h" #include "llvm/ADT/SmallString.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Core/CMakeLists.txt similarity index 94% rename from clang/lib/ScalableStaticAnalysisFramework/Core/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/Core/CMakeLists.txt index bcaf5db7f3093..b2cde5f225445 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/Core/CMakeLists.txt @@ -3,7 +3,7 @@ set(LLVM_LINK_COMPONENTS TargetParser ) -add_clang_library(clangScalableStaticAnalysisFrameworkCore +add_clang_library(clangScalableStaticAnalysisCore ASTEntityMapping.cpp EntityLinker/EntityLinker.cpp Model/BuildNamespace.cpp diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.cpp b/clang/lib/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.cpp similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.cpp rename to clang/lib/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.cpp index 5aff18999e114..462978932a53d 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.cpp @@ -6,13 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include using namespace clang::ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/BuildNamespace.cpp similarity index 96% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/BuildNamespace.cpp index 09fff31e8f4a8..f68ed961f71b9 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/BuildNamespace.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" #include "../ModelStringConversions.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityId.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityId.cpp similarity index 88% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityId.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/EntityId.cpp index 9c4eec2d7e299..a731bd24da4a0 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityId.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityId.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" namespace clang::ssaf { diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityIdTable.cpp similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/EntityIdTable.cpp index cb514581ca9ec..651179c24a342 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityIdTable.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" #include namespace clang::ssaf { diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityLinkage.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/EntityLinkage.cpp index 179b906c0b999..0bae90cd66a22 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityLinkage.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" #include "../ModelStringConversions.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityName.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityName.cpp similarity index 94% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityName.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/EntityName.cpp index 9145506865f24..c9e09df84143b 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/EntityName.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/EntityName.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" namespace clang::ssaf { diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/SummaryName.cpp b/clang/lib/ScalableStaticAnalysis/Core/Model/SummaryName.cpp similarity index 88% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Model/SummaryName.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Model/SummaryName.cpp index 1609912aa7f90..66c7726a9da90 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Model/SummaryName.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Model/SummaryName.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" namespace clang::ssaf { diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/ModelStringConversions.h b/clang/lib/ScalableStaticAnalysis/Core/ModelStringConversions.h similarity index 87% rename from clang/lib/ScalableStaticAnalysisFramework/Core/ModelStringConversions.h rename to clang/lib/ScalableStaticAnalysis/Core/ModelStringConversions.h index f168a475de807..ee1603e50025f 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/ModelStringConversions.h +++ b/clang/lib/ScalableStaticAnalysis/Core/ModelStringConversions.h @@ -13,11 +13,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODELSTRINGCONVERSIONS_H -#define LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODELSTRINGCONVERSIONS_H +#ifndef LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_MODELSTRINGCONVERSIONS_H +#define LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_MODELSTRINGCONVERSIONS_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include @@ -84,4 +84,4 @@ entityLinkageTypeFromString(llvm::StringRef Str) { } // namespace clang::ssaf -#endif // LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_MODELSTRINGCONVERSIONS_H +#endif // LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_MODELSTRINGCONVERSIONS_H diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/Artifact.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/Artifact.cpp similarity index 100% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/Artifact.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/Artifact.cpp diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.cpp similarity index 100% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.cpp diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h similarity index 76% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h index 9a1f5d49757f7..5203e9d8ffc6f 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONEntitySummaryEncoding.h @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H -#define LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H +#ifndef LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H +#define LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/Support/JSON.h" #include @@ -49,4 +49,4 @@ class JSONEntitySummaryEncoding final : public EntitySummaryEncoding { } // namespace clang::ssaf -#endif // LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H +#endif // LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONENTITYSUMMARYENCODING_H diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.cpp similarity index 99% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.cpp index f56e96295515d..8dcbb707c3a22 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" #include "llvm/Support/Registry.h" #include "llvm/TargetParser/Triple.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.h b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.h similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.h rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.h index 44eb62fba6024..191be83ccb0e5 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/JSONFormatImpl.h +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/JSONFormatImpl.h @@ -11,17 +11,17 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H -#define LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H +#ifndef LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H +#define LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H #include "../../ModelStringConversions.h" #include "JSONEntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" @@ -216,4 +216,4 @@ llvm::Error validateNormalizedTargetTriple(llvm::StringRef Triple); } // namespace clang::ssaf -#endif // LLVM_CLANG_LIB_SCALABLESTATICANALYSISFRAMEWORK_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H +#endif // LLVM_CLANG_LIB_ScalableStaticAnalysis_CORE_SERIALIZATION_JSONFORMAT_JSONFORMATIMPL_H diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummary.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummary.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummary.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummary.cpp index b0f7059b5fffc..89405bc1ed774 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummary.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummary.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" #include "llvm/TargetParser/Triple.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp index 691f8cb5b7be1..b1f78c5309dfb 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/LUSummaryEncoding.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" #include "llvm/TargetParser/Triple.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummary.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummary.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummary.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummary.cpp index 1f543b9a60710..a77a5952ddb91 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummary.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummary.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include "llvm/TargetParser/Triple.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp index 20167447e92d7..5a5ab98731c47 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/TUSummaryEncoding.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" #include "llvm/TargetParser/Triple.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/WPASuite.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/WPASuite.cpp similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/WPASuite.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/WPASuite.cpp index e65a883279513..523e12a7f60c8 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat/WPASuite.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/JSONFormat/WPASuite.cpp @@ -8,7 +8,7 @@ #include "JSONFormatImpl.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" namespace clang::ssaf { diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.cpp b/clang/lib/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.cpp similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.cpp index 1219635e5068d..7f7791922bbee 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" #include using namespace clang; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.cpp b/clang/lib/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.cpp similarity index 90% rename from clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.cpp rename to clang/lib/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.cpp index 3021751213d72..fb630e8117f59 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" using namespace clang; using namespace ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.cpp b/clang/lib/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.cpp similarity index 91% rename from clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.cpp rename to clang/lib/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.cpp index 867290c6eac31..98624bb9e2a08 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h" using namespace clang; using namespace ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.cpp b/clang/lib/ScalableStaticAnalysis/Core/Support/ErrorBuilder.cpp similarity index 96% rename from clang/lib/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.cpp rename to clang/lib/ScalableStaticAnalysis/Core/Support/ErrorBuilder.cpp index 3abc42e171065..99610725a6d23 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/Support/ErrorBuilder.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.cpp b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.cpp similarity index 88% rename from clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.cpp rename to clang/lib/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.cpp index b59d190dde519..1456d363625ad 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include using namespace clang; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.cpp b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.cpp similarity index 75% rename from clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.cpp rename to clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.cpp index daea76f7001cb..ba2def365dddc 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.cpp similarity index 87% rename from clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp rename to clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.cpp index ccd5eef377d2d..40205b90a4eb6 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.cpp @@ -6,13 +6,13 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclBase.h" -#include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "llvm/Support/Casting.h" #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.cpp b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.cpp rename to clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.cpp index 1425f7079295f..f60c916e10b67 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.cpp @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.cpp b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.cpp similarity index 86% rename from clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.cpp rename to clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.cpp index 9719196ed4d6d..011b816ae03dc 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" using namespace clang::ssaf; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.cpp b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.cpp similarity index 89% rename from clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.cpp rename to clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.cpp index 55b4f36a1764f..71779e800f612 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.cpp +++ b/clang/lib/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" #include "llvm/ADT/STLExtras.h" using namespace clang; diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Frontend/CMakeLists.txt similarity index 51% rename from clang/lib/ScalableStaticAnalysisFramework/Frontend/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/Frontend/CMakeLists.txt index dd799a9db90c8..74e1d0cbe1ed0 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/Frontend/CMakeLists.txt @@ -3,14 +3,14 @@ set(LLVM_LINK_COMPONENTS TargetParser ) -add_clang_library(clangScalableStaticAnalysisFrameworkFrontend +add_clang_library(clangScalableStaticAnalysisFrontend TUSummaryExtractorFrontendAction.cpp LINK_LIBS clangAST clangBasic clangFrontend - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore clangSema ) diff --git a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp b/clang/lib/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.cpp similarity index 93% rename from clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp rename to clang/lib/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.cpp index 4f290ccac3d16..bb4caef8f99ab 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.cpp +++ b/clang/lib/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.cpp @@ -6,17 +6,17 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h" +#include "clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h" #include "clang/AST/ASTConsumer.h" #include "clang/Basic/DiagnosticFrontend.h" #include "clang/Basic/TargetInfo.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/IOSandbox.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Plugins/CMakeLists.txt similarity index 98% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/Plugins/CMakeLists.txt index b77ffc49c0f9f..a6af1e64e0ac3 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/Plugins/CMakeLists.txt @@ -1,6 +1,6 @@ if(CLANG_PLUGIN_SUPPORT AND LLVM_ENABLE_PLUGINS AND NOT WIN32) # Plugins must never bring LLVM or Clang libraries in statically. - # clang-ssaf-analyzer already loads clangScalableStaticAnalysisFrameworkCore + # clang-ssaf-analyzer already loads clangScalableStaticAnalysisCore # and LLVM into the process; a second static copy would produce duplicate # llvm::Registry instances with separate global state, breaking registration. # diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/AnalysisResults.h b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/AnalysisResults.h similarity index 81% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/AnalysisResults.h rename to clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/AnalysisResults.h index c8ae6a542b8f0..f5072dce74e23 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/AnalysisResults.h +++ b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/AnalysisResults.h @@ -9,9 +9,9 @@ #ifndef EXAMPLE_PLUGIN_ANALYSIS_RESULTS_H #define EXAMPLE_PLUGIN_ANALYSIS_RESULTS_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" #include #include #include diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/CMakeLists.txt similarity index 87% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/CMakeLists.txt index 06c8e2abc1bd4..b4eea284636bf 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/CMakeLists.txt @@ -14,5 +14,5 @@ add_llvm_library(SSAFExamplePlugin MODULE BUILDTREE_ONLY # Pull in SSAF and LLVM include paths without any static link dependency. target_include_directories(SSAFExamplePlugin PRIVATE - $ + $ ) diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/PairsAnalysis.cpp b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/PairsAnalysis.cpp similarity index 94% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/PairsAnalysis.cpp rename to clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/PairsAnalysis.cpp index 34923f4a15e6e..94f08d7e9fb73 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/PairsAnalysis.cpp +++ b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/PairsAnalysis.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "AnalysisResults.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Registry.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsAnalysis.cpp b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsAnalysis.cpp similarity index 92% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsAnalysis.cpp rename to clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsAnalysis.cpp index 630a14544d7b0..9d74b7b30d9bf 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsAnalysis.cpp +++ b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsAnalysis.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "AnalysisResults.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Registry.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp similarity index 94% rename from clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp rename to clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp index 14750de321824..5ceb0eb971673 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp +++ b/clang/lib/ScalableStaticAnalysis/Plugins/ExamplePlugin/TagsPairsAnalysis.cpp @@ -7,9 +7,9 @@ //===----------------------------------------------------------------------===// #include "AnalysisResults.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h" #include "llvm/Support/Error.h" #include "llvm/Support/JSON.h" #include "llvm/Support/Registry.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/SourceTransformation/CMakeLists.txt similarity index 52% rename from clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/CMakeLists.txt rename to clang/lib/ScalableStaticAnalysis/SourceTransformation/CMakeLists.txt index c96a386977487..3761ee7463f86 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/CMakeLists.txt +++ b/clang/lib/ScalableStaticAnalysis/SourceTransformation/CMakeLists.txt @@ -2,12 +2,12 @@ set(LLVM_LINK_COMPONENTS Support ) -add_clang_library(clangScalableStaticAnalysisFrameworkSourceTransformation +add_clang_library(clangScalableStaticAnalysisSourceTransformation TransformationRegistry.cpp LINK_LIBS clangAST clangBasic - clangScalableStaticAnalysisFrameworkCore + clangScalableStaticAnalysisCore clangToolingCore ) diff --git a/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.cpp b/clang/lib/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.cpp similarity index 94% rename from clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.cpp rename to clang/lib/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.cpp index 9d770c8bcd0f9..505b0ee4f9e51 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.cpp +++ b/clang/lib/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.h" #include using namespace clang; diff --git a/clang/lib/ScalableStaticAnalysis/Tool/CMakeLists.txt b/clang/lib/ScalableStaticAnalysis/Tool/CMakeLists.txt new file mode 100644 index 0000000000000..2ca9a8d9b84d0 --- /dev/null +++ b/clang/lib/ScalableStaticAnalysis/Tool/CMakeLists.txt @@ -0,0 +1,12 @@ +set(LLVM_LINK_COMPONENTS + Support + TargetParser + ) + +add_clang_library(clangScalableStaticAnalysisTool + Utils.cpp + + LINK_LIBS + clangBasic + clangScalableStaticAnalysisCore + ) diff --git a/clang/lib/ScalableStaticAnalysisFramework/Tool/Utils.cpp b/clang/lib/ScalableStaticAnalysis/Tool/Utils.cpp similarity index 99% rename from clang/lib/ScalableStaticAnalysisFramework/Tool/Utils.cpp rename to clang/lib/ScalableStaticAnalysis/Tool/Utils.cpp index f14f34eb3bbb0..6a740f57741d7 100644 --- a/clang/lib/ScalableStaticAnalysisFramework/Tool/Utils.cpp +++ b/clang/lib/ScalableStaticAnalysis/Tool/Utils.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h" +#include "clang/ScalableStaticAnalysis/Tool/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" diff --git a/clang/lib/ScalableStaticAnalysisFramework/Tool/CMakeLists.txt b/clang/lib/ScalableStaticAnalysisFramework/Tool/CMakeLists.txt deleted file mode 100644 index edbe20e84c05d..0000000000000 --- a/clang/lib/ScalableStaticAnalysisFramework/Tool/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -set(LLVM_LINK_COMPONENTS - Support - TargetParser - ) - -add_clang_library(clangScalableStaticAnalysisFrameworkTool - Utils.cpp - - LINK_LIBS - clangBasic - clangScalableStaticAnalysisFrameworkCore - ) diff --git a/clang/tools/clang-ssaf-analyzer/CMakeLists.txt b/clang/tools/clang-ssaf-analyzer/CMakeLists.txt index 830db2c4d1657..f1455e105b226 100644 --- a/clang/tools/clang-ssaf-analyzer/CMakeLists.txt +++ b/clang/tools/clang-ssaf-analyzer/CMakeLists.txt @@ -16,10 +16,10 @@ add_clang_tool(clang-ssaf-analyzer clang_target_link_libraries(clang-ssaf-analyzer PRIVATE clangBasic - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkSourceTransformation - clangScalableStaticAnalysisFrameworkTool + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisSourceTransformation + clangScalableStaticAnalysisTool ) if(CLANG_PLUGIN_SUPPORT) diff --git a/clang/tools/clang-ssaf-analyzer/SSAFAnalyzer.cpp b/clang/tools/clang-ssaf-analyzer/SSAFAnalyzer.cpp index 14f037beb60c1..9595347808dba 100644 --- a/clang/tools/clang-ssaf-analyzer/SSAFAnalyzer.cpp +++ b/clang/tools/clang-ssaf-analyzer/SSAFAnalyzer.cpp @@ -12,11 +12,11 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep -#include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep +#include "clang/ScalableStaticAnalysis/Tool/Utils.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" diff --git a/clang/tools/clang-ssaf-format/CMakeLists.txt b/clang/tools/clang-ssaf-format/CMakeLists.txt index 8416dd1d88023..6051a8c8ba307 100644 --- a/clang/tools/clang-ssaf-format/CMakeLists.txt +++ b/clang/tools/clang-ssaf-format/CMakeLists.txt @@ -16,10 +16,10 @@ add_clang_tool(clang-ssaf-format clang_target_link_libraries(clang-ssaf-format PRIVATE clangBasic - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkSourceTransformation - clangScalableStaticAnalysisFrameworkTool + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisSourceTransformation + clangScalableStaticAnalysisTool ) if(CLANG_PLUGIN_SUPPORT) diff --git a/clang/tools/clang-ssaf-format/SSAFFormat.cpp b/clang/tools/clang-ssaf-format/SSAFFormat.cpp index c1762c0fa3297..9416b45c974b5 100644 --- a/clang/tools/clang-ssaf-format/SSAFFormat.cpp +++ b/clang/tools/clang-ssaf-format/SSAFFormat.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep -#include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep +#include "clang/ScalableStaticAnalysis/Tool/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" diff --git a/clang/tools/clang-ssaf-linker/CMakeLists.txt b/clang/tools/clang-ssaf-linker/CMakeLists.txt index 576daab0c9355..a4d371d908373 100644 --- a/clang/tools/clang-ssaf-linker/CMakeLists.txt +++ b/clang/tools/clang-ssaf-linker/CMakeLists.txt @@ -11,8 +11,8 @@ add_clang_tool(clang-ssaf-linker clang_target_link_libraries(clang-ssaf-linker PRIVATE clangBasic - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkSourceTransformation - clangScalableStaticAnalysisFrameworkTool + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisSourceTransformation + clangScalableStaticAnalysisTool ) diff --git a/clang/tools/clang-ssaf-linker/SSAFLinker.cpp b/clang/tools/clang-ssaf-linker/SSAFLinker.cpp index 1a620dedcbd2d..a8bf6c97a37b8 100644 --- a/clang/tools/clang-ssaf-linker/SSAFLinker.cpp +++ b/clang/tools/clang-ssaf-linker/SSAFLinker.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h" // IWYU pragma: keep -#include "clang/ScalableStaticAnalysisFramework/Tool/Utils.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/SSAFForceLinker.h" // IWYU pragma: keep +#include "clang/ScalableStaticAnalysis/Tool/Utils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" diff --git a/clang/unittests/CMakeLists.txt b/clang/unittests/CMakeLists.txt index 71c7a6842fed2..5cc21a563be3e 100644 --- a/clang/unittests/CMakeLists.txt +++ b/clang/unittests/CMakeLists.txt @@ -91,7 +91,7 @@ add_subdirectory(Tooling) add_subdirectory(Format) add_subdirectory(Frontend) add_subdirectory(Rewrite) -add_subdirectory(ScalableStaticAnalysisFramework) +add_subdirectory(ScalableStaticAnalysis) add_subdirectory(Sema) add_subdirectory(CodeGen) if(HAVE_CLANG_REPL_SUPPORT) diff --git a/clang/unittests/ScalableStaticAnalysisFramework/ASTEntityMappingTest.cpp b/clang/unittests/ScalableStaticAnalysis/ASTEntityMappingTest.cpp similarity index 99% rename from clang/unittests/ScalableStaticAnalysisFramework/ASTEntityMappingTest.cpp rename to clang/unittests/ScalableStaticAnalysis/ASTEntityMappingTest.cpp index 9d79647f841c8..c34e92cb9fa5d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/ASTEntityMappingTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/ASTEntityMappingTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" +#include "clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h" #include "FindDecl.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp b/clang/unittests/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractorTest.cpp similarity index 97% rename from clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractorTest.cpp index 0a3685c4f1057..f8769f3a9ce4d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphExtractorTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphExtractorTest.cpp @@ -12,11 +12,11 @@ #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/CallGraph/CallGraphSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/ASTEntityMapping.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Analyses/CallGraph/CallGraphSummary.h" +#include "clang/ScalableStaticAnalysis/Core/ASTEntityMapping.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp b/clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowTest.cpp similarity index 98% rename from clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowTest.cpp index 49e7bdc21738b..1c42ca0afe222 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" #include "TestFixture.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" @@ -14,10 +14,10 @@ #include "clang/AST/ExprCXX.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "clang/Tooling/Tooling.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Debug.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowWPATest.cpp b/clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowWPATest.cpp similarity index 83% rename from clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowWPATest.cpp rename to clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowWPATest.cpp index c2b906d715695..8c43ead9750ca 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowWPATest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowWPATest.cpp @@ -7,16 +7,16 @@ //===----------------------------------------------------------------------===// #include "TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp b/clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp similarity index 96% rename from clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp index 5b9bd94ec2014..c2f4cb97c0fae 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageTest.cpp @@ -6,19 +6,19 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" #include "FindDecl.h" #include "TestFixture.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" @@ -721,7 +721,7 @@ TEST_F(UnsafeBufferUsageTest, FunctionTemplateInstantiation) { void unsafe(T p) { p[1] = p[2] + p[3]; } - + void f(int *p) { unsafe(p); } @@ -742,7 +742,7 @@ TEST_F(UnsafeBufferUsageTest, MethodInClassTemplateInstantiation) { p[1] = p[2] + p[3]; } }; - + void f(int *p) { UnsafeClass UC; diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp b/clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp similarity index 83% rename from clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp rename to clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp index 80f09f9765fe1..448f838297e8f 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageWPATest.cpp @@ -7,16 +7,16 @@ //===----------------------------------------------------------------------===// #include "TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/BuildNamespaceTest.cpp b/clang/unittests/ScalableStaticAnalysis/BuildNamespaceTest.cpp similarity index 96% rename from clang/unittests/ScalableStaticAnalysisFramework/BuildNamespaceTest.cpp rename to clang/unittests/ScalableStaticAnalysis/BuildNamespaceTest.cpp index 0fe8bb6f18122..e98efd45ea6b0 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/BuildNamespaceTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/BuildNamespaceTest.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/CMakeLists.txt b/clang/unittests/ScalableStaticAnalysis/CMakeLists.txt similarity index 88% rename from clang/unittests/ScalableStaticAnalysisFramework/CMakeLists.txt rename to clang/unittests/ScalableStaticAnalysis/CMakeLists.txt index 216fa1cf2b8b8..387f03dc76fc6 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/CMakeLists.txt +++ b/clang/unittests/ScalableStaticAnalysis/CMakeLists.txt @@ -38,10 +38,10 @@ add_distinct_clang_unittest(ClangScalableAnalysisTests clangASTMatchers clangBasic clangFrontend - clangScalableStaticAnalysisFrameworkAnalyses - clangScalableStaticAnalysisFrameworkCore - clangScalableStaticAnalysisFrameworkFrontend - clangScalableStaticAnalysisFrameworkSourceTransformation + clangScalableStaticAnalysisAnalyses + clangScalableStaticAnalysisCore + clangScalableStaticAnalysisFrontend + clangScalableStaticAnalysisSourceTransformation clangSerialization clangTooling clangToolingCore diff --git a/clang/unittests/ScalableStaticAnalysisFramework/EntityIdTableTest.cpp b/clang/unittests/ScalableStaticAnalysis/EntityIdTableTest.cpp similarity index 91% rename from clang/unittests/ScalableStaticAnalysisFramework/EntityIdTableTest.cpp rename to clang/unittests/ScalableStaticAnalysis/EntityIdTableTest.cpp index 7afa4943bcb79..ead7e35f8421f 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/EntityIdTableTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/EntityIdTableTest.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" #include "gtest/gtest.h" namespace clang { diff --git a/clang/unittests/ScalableStaticAnalysisFramework/EntityIdTest.cpp b/clang/unittests/ScalableStaticAnalysis/EntityIdTest.cpp similarity index 89% rename from clang/unittests/ScalableStaticAnalysisFramework/EntityIdTest.cpp rename to clang/unittests/ScalableStaticAnalysis/EntityIdTest.cpp index 9a07aed13a432..e01ceda75324a 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/EntityIdTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/EntityIdTest.cpp @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/EntityLinkageTest.cpp b/clang/unittests/ScalableStaticAnalysis/EntityLinkageTest.cpp similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/EntityLinkageTest.cpp rename to clang/unittests/ScalableStaticAnalysis/EntityLinkageTest.cpp index a435f2f2c8974..ccf38ec8d93fb 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/EntityLinkageTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/EntityLinkageTest.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/EntityLinkerTest.cpp b/clang/unittests/ScalableStaticAnalysis/EntityLinkerTest.cpp similarity index 97% rename from clang/unittests/ScalableStaticAnalysisFramework/EntityLinkerTest.cpp rename to clang/unittests/ScalableStaticAnalysis/EntityLinkerTest.cpp index a3759703a48b3..cd579e16991ef 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/EntityLinkerTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/EntityLinkerTest.cpp @@ -6,17 +6,17 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntityLinker.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntityLinker.h" #include "TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/EntitySummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/EntitySummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/EntityNameTest.cpp b/clang/unittests/ScalableStaticAnalysis/EntityNameTest.cpp similarity index 92% rename from clang/unittests/ScalableStaticAnalysisFramework/EntityNameTest.cpp rename to clang/unittests/ScalableStaticAnalysis/EntityNameTest.cpp index 651b43744ed35..698b17efb6384 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/EntityNameTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/EntityNameTest.cpp @@ -6,9 +6,9 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/ErrorBuilderTest.cpp b/clang/unittests/ScalableStaticAnalysis/ErrorBuilderTest.cpp similarity index 98% rename from clang/unittests/ScalableStaticAnalysisFramework/ErrorBuilderTest.cpp rename to clang/unittests/ScalableStaticAnalysis/ErrorBuilderTest.cpp index 65db17416b965..5a5b74f82ae0f 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/ErrorBuilderTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/ErrorBuilderTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Support/ErrorBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/Support/ErrorBuilder.h" #include "gtest/gtest.h" #include diff --git a/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h b/clang/unittests/ScalableStaticAnalysis/FindDecl.h similarity index 87% rename from clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h rename to clang/unittests/ScalableStaticAnalysis/FindDecl.h index 5ce50fe538cda..65dee7f741287 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/FindDecl.h +++ b/clang/unittests/ScalableStaticAnalysis/FindDecl.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_FINDDECL_H -#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_FINDDECL_H +#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_FINDDECL_H +#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_FINDDECL_H #include "clang/AST/Decl.h" #include "clang/AST/DynamicRecursiveASTVisitor.h" @@ -50,4 +50,4 @@ inline const FunctionDecl *findFnByName(StringRef Name, ASTContext &Ctx) { } // namespace clang::ssaf -#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_FINDDECL_H +#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_FINDDECL_H diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp b/clang/unittests/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendActionTest.cpp similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendActionTest.cpp index cb5448e4860bc..2d65fb675eec3 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendActionTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendActionTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Frontend/TUSummaryExtractorFrontendAction.h" +#include "clang/ScalableStaticAnalysis/Frontend/TUSummaryExtractorFrontendAction.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/Frontend/CompilerInstance.h" @@ -14,10 +14,10 @@ #include "clang/Frontend/SSAFOptions.h" #include "clang/Frontend/TextDiagnosticBuffer.h" #include "clang/Lex/PreprocessorOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -483,16 +483,14 @@ TEST_F(TUSummaryExtractorFrontendActionTest, EXPECT_THAT(Log, Contains("Wrapped::Initialize")); EXPECT_THAT(Log, Contains("Wrapped::HandleTranslationUnit")); - EXPECT_THAT( - errorsMsgsOf(DiagBuf), - UnorderedElementsAre("option '--ssaf-tu-summary-file=' requires " - "'--ssaf-compilation-unit-id=' to be set")); + EXPECT_THAT(errorsMsgsOf(DiagBuf), + UnorderedElementsAre("option '--ssaf-tu-summary-file=' requires " + "'--ssaf-compilation-unit-id=' to be set")); EXPECT_FALSE(llvm::sys::fs::exists(Output)); } -TEST_F(TUSummaryExtractorFrontendActionTest, - EmptyCompilationUnitIdDiagnoses) { +TEST_F(TUSummaryExtractorFrontendActionTest, EmptyCompilationUnitIdDiagnoses) { std::string Output = makePath("output.MockSerializationFormat"); Compiler->getSSAFOpts().TUSummaryFile = Output; Compiler->getSSAFOpts().ExtractSummaries = {"NoOpExtractor"}; @@ -507,10 +505,9 @@ TEST_F(TUSummaryExtractorFrontendActionTest, EXPECT_THAT(Log, Contains("Wrapped::Initialize")); EXPECT_THAT(Log, Contains("Wrapped::HandleTranslationUnit")); - EXPECT_THAT( - errorsMsgsOf(DiagBuf), - UnorderedElementsAre("option '--ssaf-tu-summary-file=' requires " - "'--ssaf-compilation-unit-id=' to be set")); + EXPECT_THAT(errorsMsgsOf(DiagBuf), + UnorderedElementsAre("option '--ssaf-tu-summary-file=' requires " + "'--ssaf-compilation-unit-id=' to be set")); EXPECT_FALSE(llvm::sys::fs::exists(Output)); } diff --git a/clang/unittests/ScalableStaticAnalysisFramework/LUSummaryTest.cpp b/clang/unittests/ScalableStaticAnalysis/LUSummaryTest.cpp similarity index 81% rename from clang/unittests/ScalableStaticAnalysisFramework/LUSummaryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/LUSummaryTest.cpp index 55ae20ce97ca7..b67859e09c66d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/LUSummaryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/LUSummaryTest.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" #include "gtest/gtest.h" namespace clang::ssaf { diff --git a/clang/unittests/ScalableStaticAnalysisFramework/ModelStringConversionsTest.cpp b/clang/unittests/ScalableStaticAnalysis/ModelStringConversionsTest.cpp similarity index 97% rename from clang/unittests/ScalableStaticAnalysisFramework/ModelStringConversionsTest.cpp rename to clang/unittests/ScalableStaticAnalysis/ModelStringConversionsTest.cpp index aff5140f26c2f..ff24a8358612d 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/ModelStringConversionsTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/ModelStringConversionsTest.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "../../lib/ScalableStaticAnalysisFramework/Core/ModelStringConversions.h" +#include "../../lib/ScalableStaticAnalysis/Core/ModelStringConversions.h" #include "gtest/gtest.h" using namespace clang::ssaf; diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/FancyAnalysisData.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/FancyAnalysisData.cpp similarity index 96% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/FancyAnalysisData.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/FancyAnalysisData.cpp index 084835190f7bd..5f078e6d23ce5 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/FancyAnalysisData.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/FancyAnalysisData.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "Registries/MockSerializationFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Registry.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.cpp similarity index 91% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.cpp index 03ed471f86087..915fe15af832c 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.cpp @@ -7,15 +7,15 @@ //===----------------------------------------------------------------------===// #include "Registries/MockSerializationFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.h b/clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.h similarity index 86% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.h rename to clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.h index 2f74f7216ba16..2ecaab9fb0132 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSerializationFormat.h +++ b/clang/unittests/ScalableStaticAnalysis/Registries/MockSerializationFormat.h @@ -6,11 +6,11 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKSERIALIZATIONFORMAT_H -#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKSERIALIZATIONFORMAT_H +#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKSERIALIZATIONFORMAT_H +#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKSERIALIZATIONFORMAT_H -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormat.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/STLFunctionalExtras.h" #include "llvm/Support/Registry.h" @@ -82,4 +82,4 @@ class MockSerializationFormat final : public SerializationFormat { LLVM_DECLARE_REGISTRY( llvm::Registry) -#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKSERIALIZATIONFORMAT_H +#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKSERIALIZATIONFORMAT_H diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor1.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor1.cpp similarity index 88% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor1.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor1.cpp index 1d5a33900d3c2..2ff32394ae42b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor1.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor1.cpp @@ -8,8 +8,8 @@ #include "MockTUSummaryBuilder.h" #include "clang/AST/ASTContext.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" using namespace clang; using namespace ssaf; diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor2.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor2.cpp similarity index 88% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor2.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor2.cpp index ab78b5c135faa..9465d4d96a26b 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockSummaryExtractor2.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/MockSummaryExtractor2.cpp @@ -8,8 +8,8 @@ #include "MockTUSummaryBuilder.h" #include "clang/AST/ASTContext.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" using namespace clang; using namespace ssaf; diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockTUSummaryBuilder.h b/clang/unittests/ScalableStaticAnalysis/Registries/MockTUSummaryBuilder.h similarity index 69% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/MockTUSummaryBuilder.h rename to clang/unittests/ScalableStaticAnalysis/Registries/MockTUSummaryBuilder.h index 389faed325050..87689634dc061 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/MockTUSummaryBuilder.h +++ b/clang/unittests/ScalableStaticAnalysis/Registries/MockTUSummaryBuilder.h @@ -6,10 +6,10 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKTUSUMMARYBUILDER_H -#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKTUSUMMARYBUILDER_H +#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKTUSUMMARYBUILDER_H +#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKTUSUMMARYBUILDER_H -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/raw_ostream.h" @@ -28,4 +28,4 @@ class MockTUSummaryBuilder : public TUSummaryBuilder { } // namespace clang::ssaf -#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_REGISTRIES_MOCKTUSUMMARYBUILDER_H +#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_REGISTRIES_MOCKTUSUMMARYBUILDER_H diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SerializationFormatRegistryTest.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/SerializationFormatRegistryTest.cpp similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/SerializationFormatRegistryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/SerializationFormatRegistryTest.cpp index b55754a622e31..bb61b95e0ccf6 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SerializationFormatRegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/SerializationFormatRegistryTest.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/SerializationFormatRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/SerializationFormatRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringRef.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp b/clang/unittests/ScalableStaticAnalysis/Registries/SummaryExtractorRegistryTest.cpp similarity index 96% rename from clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Registries/SummaryExtractorRegistryTest.cpp index 2294c045d554b..db7c7ad835bb6 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Registries/SummaryExtractorRegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Registries/SummaryExtractorRegistryTest.cpp @@ -9,8 +9,8 @@ #include "MockTUSummaryBuilder.h" #include "clang/Frontend/MultiplexConsumer.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/ExtractorRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/ExtractorRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/StringRef.h" #include "gmock/gmock.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.cpp b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.cpp similarity index 99% rename from clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.cpp index 57258145b5dd9..777625dc9fed7 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.cpp @@ -12,7 +12,7 @@ #include "JSONFormatTest.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Registry.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.h b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.h similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.h rename to clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.h index 515735e788d0d..b411ed3f91573 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/JSONFormatTest.h +++ b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/JSONFormatTest.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H -#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H +#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H +#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H #include "TestFixture.h" #include "llvm/ADT/SmallString.h" @@ -169,4 +169,4 @@ struct MismatchedEntitySummaryForJSONFormatTest final : EntitySummary { } // namespace clang::ssaf -#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H +#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_SERIALIZATION_JSONFORMATTEST_JSONFORMATTEST_H diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/LUSummaryTest.cpp b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/LUSummaryTest.cpp similarity index 98% rename from clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/LUSummaryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/LUSummaryTest.cpp index 13e1b6c4d3f20..3a9ed5aebfd7e 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/LUSummaryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/LUSummaryTest.cpp @@ -13,9 +13,9 @@ #include "JSONFormatTest.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/TUSummaryTest.cpp b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/TUSummaryTest.cpp similarity index 98% rename from clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/TUSummaryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/TUSummaryTest.cpp index 6e88013b2e4a6..1c2a890f2ddc9 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/Serialization/JSONFormatTest/TUSummaryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/Serialization/JSONFormatTest/TUSummaryTest.cpp @@ -13,9 +13,9 @@ #include "JSONFormatTest.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Serialization/JSONFormat.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Serialization/JSONFormat.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" #include "llvm/Testing/Support/Error.h" #include "gmock/gmock.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/EmitterTest.cpp b/clang/unittests/ScalableStaticAnalysis/SourceTransformation/EmitterTest.cpp similarity index 93% rename from clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/EmitterTest.cpp rename to clang/unittests/ScalableStaticAnalysis/SourceTransformation/EmitterTest.cpp index d730c6c337c45..a2a1eac1a1e03 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/EmitterTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/SourceTransformation/EmitterTest.cpp @@ -8,8 +8,8 @@ #include "clang/Basic/Sarif.h" #include "clang/Basic/SourceLocation.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/SourceEditEmitter.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationReportEmitter.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/SourceEditEmitter.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/TransformationReportEmitter.h" #include "gtest/gtest.h" #include diff --git a/clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/RegistryTest.cpp b/clang/unittests/ScalableStaticAnalysis/SourceTransformation/RegistryTest.cpp similarity index 92% rename from clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/RegistryTest.cpp rename to clang/unittests/ScalableStaticAnalysis/SourceTransformation/RegistryTest.cpp index 0c68cd95dd499..3fa24920c20a4 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/SourceTransformation/RegistryTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/SourceTransformation/RegistryTest.cpp @@ -7,8 +7,8 @@ //===----------------------------------------------------------------------===// #include "TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/Transformation.h" -#include "clang/ScalableStaticAnalysisFramework/SourceTransformation/TransformationRegistry.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/Transformation.h" +#include "clang/ScalableStaticAnalysis/SourceTransformation/TransformationRegistry.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/SummaryData/SummaryDataTest.cpp b/clang/unittests/ScalableStaticAnalysis/SummaryData/SummaryDataTest.cpp similarity index 94% rename from clang/unittests/ScalableStaticAnalysisFramework/SummaryData/SummaryDataTest.cpp rename to clang/unittests/ScalableStaticAnalysis/SummaryData/SummaryDataTest.cpp index ecf7c1b3347be..e8137f3bac308 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/SummaryData/SummaryDataTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/SummaryData/SummaryDataTest.cpp @@ -7,16 +7,16 @@ //===----------------------------------------------------------------------===// #include "../TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/LUSummaryConsumer.h" -#include "clang/ScalableStaticAnalysisFramework/Core/SummaryData/SummaryDataBuilderRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/LUSummaryConsumer.h" +#include "clang/ScalableStaticAnalysis/Core/SummaryData/SummaryDataBuilderRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/SummaryNameTest.cpp b/clang/unittests/ScalableStaticAnalysis/SummaryNameTest.cpp similarity index 93% rename from clang/unittests/ScalableStaticAnalysisFramework/SummaryNameTest.cpp rename to clang/unittests/ScalableStaticAnalysis/SummaryNameTest.cpp index e1d35f6628232..c88071491a953 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/SummaryNameTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/SummaryNameTest.cpp @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Support/FormatProviders.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Support/FormatProviders.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp b/clang/unittests/ScalableStaticAnalysis/TUSummaryBuilderTest.cpp similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp rename to clang/unittests/ScalableStaticAnalysis/TUSummaryBuilderTest.cpp index 55c5c781e42d9..c6a7a7b531be3 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/TUSummaryBuilderTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/TUSummaryBuilderTest.cpp @@ -6,18 +6,18 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryBuilder.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryBuilder.h" #include "FindDecl.h" #include "TestFixture.h" #include "clang/Frontend/SSAFOptions.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/EntitySummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/EntitySummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.h" #include "clang/Tooling/Tooling.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" @@ -340,7 +340,7 @@ TEST_F(TUSummaryBuilderLinkageTest, ConstGlobalHasInternalLinkage) { } // See 'getLinkageForDecl' in -// ScalableStaticAnalysisFramework/Core/TUSummary/TUSummaryExtractor.cpp +// ScalableStaticAnalysis/Core/TUSummary/TUSummaryExtractor.cpp TEST_F(TUSummaryBuilderLinkageTest, ParamOfExternalFunctionIsExternal) { AST = tooling::buildASTFromCode("void target(int *ptr) {}"); diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TestFixture.cpp b/clang/unittests/ScalableStaticAnalysis/TestFixture.cpp similarity index 75% rename from clang/unittests/ScalableStaticAnalysisFramework/TestFixture.cpp rename to clang/unittests/ScalableStaticAnalysis/TestFixture.cpp index c1c41997abcf2..45e3c693a26fe 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/TestFixture.cpp +++ b/clang/unittests/ScalableStaticAnalysis/TestFixture.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" #include #include diff --git a/clang/unittests/ScalableStaticAnalysis/TestFixture.h b/clang/unittests/ScalableStaticAnalysis/TestFixture.h new file mode 100644 index 0000000000000..22fbff754fb37 --- /dev/null +++ b/clang/unittests/ScalableStaticAnalysis/TestFixture.h @@ -0,0 +1,47 @@ +//===- TestFixture.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_TESTFIXTURE_H +#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_TESTFIXTURE_H + +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/TUSummaryEncoding.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityIdTable.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/TUSummary/TUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" +#include "gtest/gtest.h" +#include + +namespace clang::ssaf { + +class TestFixture : public ::testing::Test { +protected: + static WPASuite makeWPASuite() { return WPASuite(); } + +#define FIELD(CLASS, FIELD_NAME) \ + static const auto &get##FIELD_NAME(const CLASS &X) { return X.FIELD_NAME; } \ + static auto &get##FIELD_NAME(CLASS &X) { return X.FIELD_NAME; } +#include "clang/ScalableStaticAnalysis/Core/Model/PrivateFieldNames.def" +}; + +void PrintTo(const BuildNamespace &BN, std::ostream *OS); +void PrintTo(const EntityId &E, std::ostream *OS); +void PrintTo(const EntityLinkage &EL, std::ostream *OS); +void PrintTo(const EntityName &EN, std::ostream *OS); +void PrintTo(const NestedBuildNamespace &NBN, std::ostream *OS); +void PrintTo(const SummaryName &N, std::ostream *OS); + +} // namespace clang::ssaf + +#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSIS_TESTFIXTURE_H diff --git a/clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/AnalysisDriverTest.cpp b/clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/AnalysisDriverTest.cpp similarity index 94% rename from clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/AnalysisDriverTest.cpp rename to clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/AnalysisDriverTest.cpp index 5a4192242643e..95077675fb2ec 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/AnalysisDriverTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/AnalysisDriverTest.cpp @@ -6,20 +6,20 @@ // //===----------------------------------------------------------------------===// -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" #include "../TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisRegistry.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisResult.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/DerivedAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/SummaryAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/Model/SummaryName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisRegistry.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisResult.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/DerivedAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/SummaryAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Testing/Support/Error.h" #include "gtest/gtest.h" diff --git a/clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp b/clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp similarity index 95% rename from clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp rename to clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp index 62cda9d7c6ff1..cf5420b35e7dc 100644 --- a/clang/unittests/ScalableStaticAnalysisFramework/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp +++ b/clang/unittests/ScalableStaticAnalysis/WholeProgramAnalysis/UnsafeBufferReachableAnalysisTest.cpp @@ -7,18 +7,18 @@ //===----------------------------------------------------------------------===// #include "../TestFixture.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/EntityPointerLevel/EntityPointerLevel.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlow.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/PointerFlow/PointerFlowAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" -#include "clang/ScalableStaticAnalysisFramework/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/AnalysisDriver.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" +#include "clang/ScalableStaticAnalysis/Analyses/EntityPointerLevel/EntityPointerLevel.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlow.h" +#include "clang/ScalableStaticAnalysis/Analyses/PointerFlow/PointerFlowAnalysis.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsage.h" +#include "clang/ScalableStaticAnalysis/Analyses/UnsafeBufferUsage/UnsafeBufferUsageAnalysis.h" +#include "clang/ScalableStaticAnalysis/Core/EntityLinker/LUSummary.h" +#include "clang/ScalableStaticAnalysis/Core/Model/BuildNamespace.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityId.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityLinkage.h" +#include "clang/ScalableStaticAnalysis/Core/Model/EntityName.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/AnalysisDriver.h" +#include "clang/ScalableStaticAnalysis/Core/WholeProgramAnalysis/WPASuite.h" #include "llvm/ADT/ArrayRef.h" #include "gtest/gtest.h" #include diff --git a/clang/unittests/ScalableStaticAnalysisFramework/TestFixture.h b/clang/unittests/ScalableStaticAnalysisFramework/TestFixture.h deleted file mode 100644 index 2116c2b4a664c..0000000000000 --- a/clang/unittests/ScalableStaticAnalysisFramework/TestFixture.h +++ /dev/null @@ -1,47 +0,0 @@ -//===- TestFixture.h --------------------------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_TESTFIXTURE_H -#define LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_TESTFIXTURE_H - -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/LUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/EntityLinker/TUSummaryEncoding.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/BuildNamespace.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityId.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityIdTable.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityLinkage.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/EntityName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/Model/SummaryName.h" -#include "clang/ScalableStaticAnalysisFramework/Core/TUSummary/TUSummary.h" -#include "clang/ScalableStaticAnalysisFramework/Core/WholeProgramAnalysis/WPASuite.h" -#include "gtest/gtest.h" -#include - -namespace clang::ssaf { - -class TestFixture : public ::testing::Test { -protected: - static WPASuite makeWPASuite() { return WPASuite(); } - -#define FIELD(CLASS, FIELD_NAME) \ - static const auto &get##FIELD_NAME(const CLASS &X) { return X.FIELD_NAME; } \ - static auto &get##FIELD_NAME(CLASS &X) { return X.FIELD_NAME; } -#include "clang/ScalableStaticAnalysisFramework/Core/Model/PrivateFieldNames.def" -}; - -void PrintTo(const BuildNamespace &BN, std::ostream *OS); -void PrintTo(const EntityId &E, std::ostream *OS); -void PrintTo(const EntityLinkage &EL, std::ostream *OS); -void PrintTo(const EntityName &EN, std::ostream *OS); -void PrintTo(const NestedBuildNamespace &NBN, std::ostream *OS); -void PrintTo(const SummaryName &N, std::ostream *OS); - -} // namespace clang::ssaf - -#endif // LLVM_CLANG_UNITTESTS_SCALABLESTATICANALYSISFRAMEWORK_TESTFIXTURE_H diff --git a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn index edcfcf853526d..82f633adaf261 100644 --- a/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Driver/BUILD.gn @@ -17,10 +17,10 @@ static_library("Driver") { "//clang/lib/DependencyScanning", "//clang/lib/Frontend", "//clang/lib/Options", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/Frontend", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/Frontend", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", "//llvm/include/llvm/Config:llvm-config", "//llvm/lib/BinaryFormat", "//llvm/lib/Option", diff --git a/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn index 737d9be4f7c48..f371f125e52e2 100644 --- a/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn @@ -12,10 +12,10 @@ static_library("FrontendTool") { "//clang/lib/Frontend", "//clang/lib/Frontend/Rewrite", "//clang/lib/Options", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/Frontend", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/Frontend", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", "//llvm/lib/Option", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Analyses/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Analyses/BUILD.gn similarity index 87% rename from llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Analyses/BUILD.gn rename to llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Analyses/BUILD.gn index f54dea7d2bde5..baf37a8328a18 100644 --- a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Analyses/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Analyses/BUILD.gn @@ -1,11 +1,11 @@ static_library("Analyses") { - output_name = "clangScalableStaticAnalysisFrameworkAnalyses" + output_name = "clangScalableStaticAnalysisAnalyses" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/AST", "//clang/lib/Analysis", "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Core", + "//clang/lib/ScalableStaticAnalysis/Core", "//llvm/lib/Support", ] include_dirs = [ "." ] diff --git a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Core/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Core/BUILD.gn similarity index 95% rename from llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Core/BUILD.gn rename to llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Core/BUILD.gn index a52907c6fdfe4..b0c1f30b769b1 100644 --- a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Core/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Core/BUILD.gn @@ -1,5 +1,5 @@ static_library("Core") { - output_name = "clangScalableStaticAnalysisFrameworkCore" + output_name = "clangScalableStaticAnalysisCore" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/AST", diff --git a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Frontend/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Frontend/BUILD.gn similarity index 70% rename from llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Frontend/BUILD.gn rename to llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Frontend/BUILD.gn index 184ab82dfe229..bfdae02cbe65b 100644 --- a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Frontend/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Frontend/BUILD.gn @@ -1,11 +1,11 @@ static_library("Frontend") { - output_name = "clangScalableStaticAnalysisFrameworkFrontend" + output_name = "clangScalableStaticAnalysisFrontend" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/AST", "//clang/lib/Basic", "//clang/lib/Frontend", - "//clang/lib/ScalableStaticAnalysisFramework/Core", + "//clang/lib/ScalableStaticAnalysis/Core", "//clang/lib/Sema", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/SourceTransformation/BUILD.gn similarity index 66% rename from llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/BUILD.gn rename to llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/SourceTransformation/BUILD.gn index eb046c3d0e330..b6c57d82bbaea 100644 --- a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/SourceTransformation/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/SourceTransformation/BUILD.gn @@ -1,10 +1,10 @@ static_library("SourceTransformation") { - output_name = "clangScalableStaticAnalysisFrameworkSourceTransformation" + output_name = "clangScalableStaticAnalysisSourceTransformation" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/AST", "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Core", + "//clang/lib/ScalableStaticAnalysis/Core", "//clang/lib/Tooling/Core", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Tool/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Tool/BUILD.gn similarity index 60% rename from llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Tool/BUILD.gn rename to llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Tool/BUILD.gn index b45ecef4053c0..874d1d3da17af 100644 --- a/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysisFramework/Tool/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/ScalableStaticAnalysis/Tool/BUILD.gn @@ -1,9 +1,9 @@ static_library("Tool") { - output_name = "clangScalableStaticAnalysisFrameworkTool" + output_name = "clangScalableStaticAnalysisTool" configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Core", + "//clang/lib/ScalableStaticAnalysis/Core", "//llvm/lib/Support", ] sources = [ "Utils.cpp" ] diff --git a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-analyzer/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-analyzer/BUILD.gn index 1bab754756208..4c309fd2927d4 100644 --- a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-analyzer/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-analyzer/BUILD.gn @@ -2,10 +2,10 @@ executable("clang-ssaf-analyzer") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", - "//clang/lib/ScalableStaticAnalysisFramework/Tool", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Tool", "//llvm/lib/Option", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-format/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-format/BUILD.gn index 59c0fde4ee3f1..aa328612d48b0 100644 --- a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-format/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-format/BUILD.gn @@ -2,10 +2,10 @@ executable("clang-ssaf-format") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", - "//clang/lib/ScalableStaticAnalysisFramework/Tool", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Tool", "//llvm/lib/Option", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-linker/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-linker/BUILD.gn index 829485a78b9fe..e2a5ce4a13721 100644 --- a/llvm/utils/gn/secondary/clang/tools/clang-ssaf-linker/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/tools/clang-ssaf-linker/BUILD.gn @@ -2,10 +2,10 @@ executable("clang-ssaf-linker") { configs += [ "//llvm/utils/gn/build:clang_code" ] deps = [ "//clang/lib/Basic", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", - "//clang/lib/ScalableStaticAnalysisFramework/Tool", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Tool", "//llvm/lib/Option", "//llvm/lib/Support", ] diff --git a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn index 7d108c963c1cf..f29052a406e2d 100644 --- a/llvm/utils/gn/secondary/clang/unittests/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/BUILD.gn @@ -29,7 +29,7 @@ group("unittests") { deps += [ "Analysis:ClangAnalysisTests", "Analysis/FlowSensitive:ClangAnalysisFlowSensitiveTests", - "ScalableStaticAnalysisFramework:ClangScalableAnalysisTests", + "ScalableStaticAnalysis:ClangScalableAnalysisTests", "StaticAnalyzer:StaticAnalysisTests", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysisFramework/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysis/BUILD.gn similarity index 88% rename from llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysisFramework/BUILD.gn rename to llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysis/BUILD.gn index 1295816faa3ee..6d15d08e1e679 100644 --- a/llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysisFramework/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/ScalableStaticAnalysis/BUILD.gn @@ -7,10 +7,10 @@ unittest("ClangScalableAnalysisTests") { "//clang/lib/ASTMatchers", "//clang/lib/Basic", "//clang/lib/Frontend", - "//clang/lib/ScalableStaticAnalysisFramework/Analyses", - "//clang/lib/ScalableStaticAnalysisFramework/Core", - "//clang/lib/ScalableStaticAnalysisFramework/Frontend", - "//clang/lib/ScalableStaticAnalysisFramework/SourceTransformation", + "//clang/lib/ScalableStaticAnalysis/Analyses", + "//clang/lib/ScalableStaticAnalysis/Core", + "//clang/lib/ScalableStaticAnalysis/Frontend", + "//clang/lib/ScalableStaticAnalysis/SourceTransformation", "//clang/lib/Serialization", "//clang/lib/Tooling", "//clang/lib/Tooling/Core", diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 997635a6a486a..ac27841da5d02 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -2646,16 +2646,16 @@ cc_library( cc_library( name = "scalable_static_analysis_core", srcs = glob([ - "lib/ScalableStaticAnalysisFramework/Core/**/*.cpp", - "lib/ScalableStaticAnalysisFramework/Core/**/*.h", + "lib/ScalableStaticAnalysis/Core/**/*.cpp", + "lib/ScalableStaticAnalysis/Core/**/*.h", ]), - hdrs = glob(["include/clang/ScalableStaticAnalysisFramework/Core/**/*.h"] + [ - "include/clang/ScalableStaticAnalysisFramework/SSAFBuiltinForceLinker.h", - "include/clang/ScalableStaticAnalysisFramework/SSAFForceLinker.h", + hdrs = glob(["include/clang/ScalableStaticAnalysis/Core/**/*.h"] + [ + "include/clang/ScalableStaticAnalysis/SSAFBuiltinForceLinker.h", + "include/clang/ScalableStaticAnalysis/SSAFForceLinker.h", ]), textual_hdrs = glob([ - "include/clang/ScalableStaticAnalysisFramework/Core/**/*.def", - "include/clang/ScalableStaticAnalysisFramework/*.def", + "include/clang/ScalableStaticAnalysis/Core/**/*.def", + "include/clang/ScalableStaticAnalysis/*.def", ]), deps = [ ":ast", @@ -2669,11 +2669,11 @@ cc_library( cc_library( name = "scalable_static_analysis_analyses", srcs = glob([ - "lib/ScalableStaticAnalysisFramework/Analyses/**/*.cpp", - "lib/ScalableStaticAnalysisFramework/Analyses/**/*.h", + "lib/ScalableStaticAnalysis/Analyses/**/*.cpp", + "lib/ScalableStaticAnalysis/Analyses/**/*.h", ]), - hdrs = glob(["include/clang/ScalableStaticAnalysisFramework/Analyses/**/*.h"]), - includes = ["lib/ScalableStaticAnalysisFramework/Analyses"], + hdrs = glob(["include/clang/ScalableStaticAnalysis/Analyses/**/*.h"]), + includes = ["lib/ScalableStaticAnalysis/Analyses"], deps = [ ":analysis", ":ast", @@ -2686,9 +2686,9 @@ cc_library( cc_library( name = "scalable_static_analysis_tool", srcs = glob([ - "lib/ScalableStaticAnalysisFramework/Tool/**/*.cpp", + "lib/ScalableStaticAnalysis/Tool/**/*.cpp", ]), - hdrs = glob(["include/clang/ScalableStaticAnalysisFramework/Tool/**/*.h"]), + hdrs = glob(["include/clang/ScalableStaticAnalysis/Tool/**/*.h"]), deps = [ ":basic", ":scalable_static_analysis_core", @@ -2699,9 +2699,9 @@ cc_library( cc_library( name = "scalable_static_analysis_source_transformation", srcs = glob([ - "lib/ScalableStaticAnalysisFramework/SourceTransformation/**/*.cpp", + "lib/ScalableStaticAnalysis/SourceTransformation/**/*.cpp", ]), - hdrs = glob(["include/clang/ScalableStaticAnalysisFramework/SourceTransformation/**/*.h"]), + hdrs = glob(["include/clang/ScalableStaticAnalysis/SourceTransformation/**/*.h"]), deps = [ ":ast", ":basic", @@ -2715,9 +2715,9 @@ cc_library( cc_library( name = "scalable_static_analysis_frontend", srcs = glob([ - "lib/ScalableStaticAnalysisFramework/Frontend/**/*.cpp", + "lib/ScalableStaticAnalysis/Frontend/**/*.cpp", ]), - hdrs = glob(["include/clang/ScalableStaticAnalysisFramework/Frontend/**/*.h"]), + hdrs = glob(["include/clang/ScalableStaticAnalysis/Frontend/**/*.h"]), deps = [ ":ast", ":basic", diff --git a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel index aaedb96dbd6c0..f422001dbab26 100644 --- a/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/unittests/BUILD.bazel @@ -786,11 +786,11 @@ cc_test( name = "scalable_static_analysis_tests", size = "small", srcs = glob([ - "ScalableStaticAnalysisFramework/**/*.cpp", - "ScalableStaticAnalysisFramework/**/*.h", + "ScalableStaticAnalysis/**/*.cpp", + "ScalableStaticAnalysis/**/*.h", ]), - features = ["-layering_check"], # #include "../../lib/ScalableStaticAnalysisFramework/Core/ModelStringConversions.h" - includes = ["ScalableStaticAnalysisFramework"], + features = ["-layering_check"], # #include "../../lib/ScalableStaticAnalysis/Core/ModelStringConversions.h" + includes = ["ScalableStaticAnalysis"], deps = [ "//clang:ast", "//clang:ast_matchers", From 3169d2357dc827eba998a034dc61253aad496f5f Mon Sep 17 00:00:00 2001 From: William Tran-Viet Date: Wed, 24 Jun 2026 19:46:11 -0400 Subject: [PATCH 433/511] [libc++] Implement P1885R12: `` (#141312) Resolves #105373, resolves #118371 and resolves #105332 - Implements `` - Adds availability macros for LLVM 23. - The data is stored in three tables: - One giant string split by null-terminators to represent the aliases - An index table which stores indexes into the string, each entry representing the first character of an alias - Text encoding data, which stores an index to the index table, the MIB, and the number of aliases the encoding has. Storing it in the above manner allows us to make significant savings in binary file size and required runtime storage for the data. As required by the LLVM Project's AI use policy: - The implementation for `__get_locale_encoding(const char*)` for Windows has been developed with the assistance of AI. --------- Co-authored-by: A. Jiang --- libcxx/docs/FeatureTestMacroTable.rst | 2 +- libcxx/docs/ReleaseNotes/23.rst | 2 + libcxx/docs/Status/Cxx26Issues.csv | 2 +- libcxx/docs/Status/Cxx26Papers.csv | 4 +- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__configuration/availability.h | 6 + libcxx/include/__locale | 9 + libcxx/include/__locale_dir/locale_base_api.h | 10 +- libcxx/include/__locale_dir/support/aix.h | 5 + .../include/__locale_dir/support/bsd_like.h | 7 +- libcxx/include/__locale_dir/support/linux.h | 11 +- libcxx/include/__locale_dir/support/newlib.h | 5 + libcxx/include/__locale_dir/support/windows.h | 1 + libcxx/include/module.modulemap.in | 5 + libcxx/include/text_encoding | 835 ++++++++++++ libcxx/include/version | 2 +- ...bcxxabi.v1.stable.exceptions.nonew.abilist | 3 +- ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 1 + ...bcxxabi.v1.stable.exceptions.nonew.abilist | 3 +- ...bcxxabi.v1.stable.exceptions.nonew.abilist | 3 +- ...xxabi.v1.stable.noexceptions.nonew.abilist | 1 + libcxx/modules/std.compat.cppm.in | 3 - libcxx/modules/std.cppm.in | 4 +- libcxx/modules/std/text_encoding.inc | 11 +- libcxx/src/CMakeLists.txt | 1 + libcxx/src/support/win32/locale_win32.cpp | 260 ++++ libcxx/src/text_encoding.cpp | 54 + .../libcxx/localization/nodiscard.verify.cpp | 5 +- .../environment.android.pass.cpp | 36 + .../text/text_encoding/environment.pass.cpp | 40 + .../environment.windows.pass.cpp | 39 + .../text/text_encoding/nodiscard.verify.cpp | 64 + .../text_encoding.ctor/assert.id.pass.cpp | 33 + .../assert.string_view.pass.cpp | 36 + .../environment.delete.verify.cpp | 27 + .../test/libcxx/transitive_includes/cxx03.csv | 42 + .../test/libcxx/transitive_includes/cxx11.csv | 42 + .../test/libcxx/transitive_includes/cxx14.csv | 42 + .../test/libcxx/transitive_includes/cxx17.csv | 43 + .../test/libcxx/transitive_includes/cxx20.csv | 38 + .../test/libcxx/transitive_includes/cxx23.csv | 23 + .../test/libcxx/transitive_includes/cxx26.csv | 37 + .../text_encoding.version.compile.pass.cpp | 62 + .../version.version.compile.pass.cpp | 16 +- .../get_long_double_fr_FR.pass.cpp | 5 + .../put_long_double_fr_FR.pass.cpp | 5 + .../locale/locale.members/encoding.pass.cpp | 53 + .../text/text_encoding/test_text_encoding.h | 1171 +++++++++++++++++ .../text_encoding.ctor/default.pass.cpp | 37 + .../text_encoding.ctor/id.pass.cpp | 69 + .../text_encoding.ctor/string_view.pass.cpp | 73 + .../text_encoding.eq/equal.id.pass.cpp | 61 + .../text_encoding.eq/equal.pass.cpp | 71 + .../text_encoding.hash/enabled_hash.pass.cpp | 23 + .../text_encoding.hash/hash.pass.cpp | 60 + .../text_encoding.members/aliases.pass.cpp | 42 + .../aliases_view.compile.pass.cpp | 23 + .../environment.pass.cpp | 79 ++ .../text_encoding.members/id.compile.pass.cpp | 281 ++++ .../text_encoding.members/literal.pass.cpp | 37 + .../text_encoding.aliases_view/begin.pass.cpp | 76 ++ .../text_encoding.aliases_view/empty.pass.cpp | 54 + .../text_encoding.aliases_view/end.pass.cpp | 47 + .../text_encoding.aliases_view/front.pass.cpp | 63 + .../text_encoding.aliases_view/index.pass.cpp | 37 + .../iterator.pass.cpp | 156 +++ .../operator-bool.pass.cpp | 56 + .../trivially_copyable.compile.pass.cpp | 17 + .../generate_feature_test_macro_components.py | 1 - libcxx/utils/libcxx/header_information.py | 1 - .../libcxx/test/features/availability.py | 8 + 74 files changed, 4447 insertions(+), 38 deletions(-) create mode 100644 libcxx/include/text_encoding create mode 100644 libcxx/src/text_encoding.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/environment.android.pass.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/environment.pass.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/environment.windows.pass.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/nodiscard.verify.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.id.pass.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.string_view.pass.cpp create mode 100644 libcxx/test/libcxx/text/text_encoding/text_encoding.members/environment.delete.verify.cpp create mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp create mode 100644 libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/test_text_encoding.h create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.ctor/default.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.ctor/id.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.ctor/string_view.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.eq/equal.id.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.eq/equal.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.hash/enabled_hash.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.hash/hash.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/aliases.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/aliases_view.compile.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/id.compile.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/literal.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/end.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/index.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/iterator.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/operator-bool.pass.cpp create mode 100644 libcxx/test/std/text/text_encoding/trivially_copyable.compile.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index 47f10324e2523..f8719f1f74c6d 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -536,7 +536,7 @@ Status ---------------------------------------------------------- ----------------- ``__cpp_lib_submdspan`` *unimplemented* ---------------------------------------------------------- ----------------- - ``__cpp_lib_text_encoding`` *unimplemented* + ``__cpp_lib_text_encoding`` ``202306L`` ---------------------------------------------------------- ----------------- ``__cpp_lib_to_chars`` *unimplemented* ---------------------------------------------------------- ----------------- diff --git a/libcxx/docs/ReleaseNotes/23.rst b/libcxx/docs/ReleaseNotes/23.rst index a491306a3cffb..1abe4f7c3b9e4 100644 --- a/libcxx/docs/ReleaseNotes/23.rst +++ b/libcxx/docs/ReleaseNotes/23.rst @@ -39,7 +39,9 @@ Implemented Papers ------------------ - P2440R1: ``ranges::iota``, ``ranges::shift_left`` and ``ranges::shift_right`` (`Github `__) +- P1885R12: Naming Text Encodings to Demystify Them (`Github `__) - P1899R3: ``stride_view`` (`Github `__) +- P2862R1: ``text_encoding::name()`` should never return null values (`Github `__) - P3936R1: Safer ``atomic_ref::address`` (`Github `__) - P3953R3: Rename ``std::runtime_format`` (`Github `__) - P4052R0: Renaming saturation arithmetic functions (`Github `__) diff --git a/libcxx/docs/Status/Cxx26Issues.csv b/libcxx/docs/Status/Cxx26Issues.csv index 419e5755ed4a8..d4f7a44910e00 100644 --- a/libcxx/docs/Status/Cxx26Issues.csv +++ b/libcxx/docs/Status/Cxx26Issues.csv @@ -56,7 +56,7 @@ "`LWG4035 `__","``single_view`` should provide ``empty``","2024-03 (Tokyo)","|Complete|","19","`#105328 `__","" "`LWG4036 `__","``__alignof_is_defined`` is only implicitly specified in C++ and not yet deprecated","2024-03 (Tokyo)","","","`#105329 `__","" "`LWG4037 `__","Static data members of ``ctype_base`` are not yet required to be usable in constant expressions","2024-03 (Tokyo)","","","`#105330 `__","" -"`LWG4038 `__","``std::text_encoding::aliases_view`` should have constexpr iterators","2024-03 (Tokyo)","","","`#105332 `__","" +"`LWG4038 `__","``std::text_encoding::aliases_view`` should have constexpr iterators","2024-03 (Tokyo)","|Complete|","23","`#105332 `__","" "`LWG4043 `__","""ASCII"" is not a registered character encoding","2024-03 (Tokyo)","|Nothing To Do|","","`#105335 `__","" "`LWG4045 `__","``tuple`` can create dangling references from ``tuple-like``","2024-03 (Tokyo)","","","`#105337 `__","" "`LWG4053 `__","Unary call to ``std::views::repeat`` does not decay the argument","2024-03 (Tokyo)","|Complete|","19","`#105338 `__","" diff --git a/libcxx/docs/Status/Cxx26Papers.csv b/libcxx/docs/Status/Cxx26Papers.csv index a951ac4c6c833..9744940459bcd 100644 --- a/libcxx/docs/Status/Cxx26Papers.csv +++ b/libcxx/docs/Status/Cxx26Papers.csv @@ -13,7 +13,7 @@ "`P2013R5 `__","Freestanding Language: Optional ``::operator new``","2023-06 (Varna)","","","`#105370 `__","" "`P2363R5 `__","Extending associative containers with the remaining heterogeneous overloads","2023-06 (Varna)","","","`#105371 `__","" "`P1901R2 `__","Enabling the Use of ``weak_ptr`` as Keys in Unordered Associative Containers","2023-06 (Varna)","","","`#105372 `__","" -"`P1885R12 `__","Naming Text Encodings to Demystify Them","2023-06 (Varna)","","","`#105373 `__","" +"`P1885R12 `__","Naming Text Encodings to Demystify Them","2023-06 (Varna)","|Complete|","23","`#105373 `__","" "`P0792R14 `__","``function_ref``: a type-erased callable reference","2023-06 (Varna)","","","`#105376 `__","" "`P2874R2 `__","P2874R2: Mandating Annex D Require No More","2023-06 (Varna)","|Complete|","12","`#105377 `__","" "`P2757R3 `__","Type-checking format args","2023-06 (Varna)","","","`#105378 `__","" @@ -79,7 +79,7 @@ "`P3136R1 `__","Retiring niebloids","2024-11 (Wrocław)","|Complete|","14","`#118133 `__","" "`P3138R5 `__","``views::cache_latest``","2024-11 (Wrocław)","","","`#118134 `__","" "`P3379R0 `__","Constrain ``std::expected`` equality operators","2024-11 (Wrocław)","|Complete|","21","`#118135 `__","" -"`P2862R1 `__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","","","`#118371 `__","" +"`P2862R1 `__","``text_encoding::name()`` should never return null values","2024-11 (Wrocław)","|Complete|","23","`#118371 `__","" "`P2897R7 `__","``aligned_accessor``: An ``mdspan`` accessor expressing pointer over-alignment","2024-11 (Wrocław)","|Complete|","21","`#118372 `__","" "`P3355R2 `__","Fix ``submdspan`` for C++26","2024-11 (Wrocław)","","","`#118373 `__","" "`P3222R0 `__","Fix C++26 by adding transposed special cases for P2642 layouts","2024-11 (Wrocław)","","","`#118374 `__","" diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index cb7ae820bc012..618686c685bee 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -1090,6 +1090,7 @@ set(files strstream syncstream system_error + text_encoding tgmath.h thread tuple diff --git a/libcxx/include/__configuration/availability.h b/libcxx/include/__configuration/availability.h index d0788083c0128..735af5d431cb1 100644 --- a/libcxx/include/__configuration/availability.h +++ b/libcxx/include/__configuration/availability.h @@ -38,6 +38,8 @@ // When availability annotations are disabled, we take for granted that features introduced // in all versions of the library are available. #if !_LIBCPP_HAS_VENDOR_AVAILABILITY_ANNOTATIONS +# define _LIBCPP_INTRODUCED_IN_LLVM_23 1 +# define _LIBCPP_INTRODUCED_IN_LLVM_23_ATTRIBUTE /* nothing */ # define _LIBCPP_INTRODUCED_IN_LLVM_23 1 # define _LIBCPP_INTRODUCED_IN_LLVM_23_ATTRIBUTE /* nothing */ @@ -323,6 +325,10 @@ # define _LIBCPP_AVAILABILITY_HAS_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 0 #endif +// Controls whether the implementation for text_encoding::environment() is available +#define _LIBCPP_AVAILABILITY_HAS_TEXT_ENCODING_ENVIRONMENT _LIBCPP_INTRODUCED_IN_LLVM_23 +#define _LIBCPP_AVAILABILITY_TEXT_ENCODING_ENVIRONMENT _LIBCPP_INTRODUCED_IN_LLVM_23_ATTRIBUTE + // Only define a bunch of symbols in the dylib if we need to be compatible with LLVM 7 headers or older # if defined(_LIBCPP_BUILDING_LIBRARY) && _LIBCPP_AVAILABILITY_MINIMUM_HEADER_VERSION < 8 # define _LIBCPP_HIDE_FROM_ABI_SINCE_LLVM8 diff --git a/libcxx/include/__locale b/libcxx/include/__locale index 33b1924db67c1..99d29c167f29a 100644 --- a/libcxx/include/__locale +++ b/libcxx/include/__locale @@ -11,6 +11,7 @@ #define _LIBCPP___LOCALE #include <__config> +#include <__configuration/availability.h> #if _LIBCPP_HAS_LOCALIZATION @@ -26,6 +27,7 @@ # include # include # include +# include // Some platforms require more includes than others. Keep the includes on all plaforms for now. # include @@ -109,6 +111,13 @@ public: return std::use_facet >(*this).compare( __x.data(), __x.data() + __x.size(), __y.data(), __y.data() + __y.size()) < 0; } +# if _LIBCPP_STD_VER >= 26 + [[nodiscard]] _LIBCPP_AVAILABILITY_TEXT_ENCODING_ENVIRONMENT + _LIBCPP_HIDE_FROM_ABI std::text_encoding encoding() const { + std::string __name = name(); + return std::__get_locale_encoding(__name.c_str()); + } +# endif // global locale objects: static locale global(const locale&); diff --git a/libcxx/include/__locale_dir/locale_base_api.h b/libcxx/include/__locale_dir/locale_base_api.h index e1e60e18fd7c6..f973d95ef244d 100644 --- a/libcxx/include/__locale_dir/locale_base_api.h +++ b/libcxx/include/__locale_dir/locale_base_api.h @@ -102,6 +102,8 @@ // // int __snprintf(char*, size_t, __locale_t, const char*, ...); // required by the headers // int __asprintf(char**, __locale_t, const char*, ...); // required by the headers +// +// const char* __get_locale_encoding(__locale_t); // } #if _LIBCPP_HAS_LOCALIZATION @@ -139,6 +141,7 @@ # include <__cstddef/size_t.h> # include <__utility/forward.h> # include +# include # include # include # if _LIBCPP_HAS_WIDE_CHARACTERS @@ -268,7 +271,12 @@ __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, return __libcpp_mbsrtowcs_l(__dest, __src, __len, __ps, __loc); } # endif // _LIBCPP_HAS_WIDE_CHARACTERS -# endif // _LIBCPP_BUILDING_LIBRARY + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t __loc) { + return ::nl_langinfo_l(CODESET, __loc); +} + +# endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") diff --git a/libcxx/include/__locale_dir/support/aix.h b/libcxx/include/__locale_dir/support/aix.h index 8e1a4843900da..4ab52793ae3c1 100644 --- a/libcxx/include/__locale_dir/support/aix.h +++ b/libcxx/include/__locale_dir/support/aix.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #if _LIBCPP_HAS_WIDE_CHARACTERS @@ -79,6 +80,10 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { __locale_guard __current(__loc); return std::localeconv(); } + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t __loc) { + return ::nl_langinfo_l(CODESET, __loc); +} #endif // _LIBCPP_BUILDING_LIBRARY // The following structure is a quick-and-dirty workaround for routines that AIX diff --git a/libcxx/include/__locale_dir/support/bsd_like.h b/libcxx/include/__locale_dir/support/bsd_like.h index 1a84c351eb9d1..6632ca51e1eaa 100644 --- a/libcxx/include/__locale_dir/support/bsd_like.h +++ b/libcxx/include/__locale_dir/support/bsd_like.h @@ -15,6 +15,7 @@ #include <__utility/forward.h> #include // std::lconv #include +#include #include #include #include @@ -180,7 +181,11 @@ __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, return ::mbsrtowcs_l(__dest, __src, __len, __ps, __loc); } # endif // _LIBCPP_HAS_WIDE_CHARACTERS -#endif // _LIBCPP_BUILDING_LIBRARY + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t __loc) { + return ::nl_langinfo_l(CODESET, __loc); +} +#endif // _LIBCPP_BUILDING_LIBRARY _LIBCPP_DIAGNOSTIC_PUSH _LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wgcc-compat") diff --git a/libcxx/include/__locale_dir/support/linux.h b/libcxx/include/__locale_dir/support/linux.h index 2fe3f6bb8124f..6a91a001c239c 100644 --- a/libcxx/include/__locale_dir/support/linux.h +++ b/libcxx/include/__locale_dir/support/linux.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -211,7 +212,15 @@ __mbsrtowcs(wchar_t* __dest, const char** __src, size_t __len, mbstate_t* __ps, return std::mbsrtowcs(__dest, __src, __len, __ps); } # endif // _LIBCPP_HAS_WIDE_CHARACTERS -#endif // _LIBCPP_BUILDING_LIBRARY + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding([[maybe_unused]] __locale_t __loc) { +# if defined(__ANDROID__) + return "UTF-8"; +# else + return ::nl_langinfo_l(CODESET, __loc); +# endif +} +#endif // _LIBCPP_BUILDING_LIBRARY #ifndef _LIBCPP_COMPILER_GCC // GCC complains that this can't be always_inline due to C-style varargs _LIBCPP_HIDE_FROM_ABI diff --git a/libcxx/include/__locale_dir/support/newlib.h b/libcxx/include/__locale_dir/support/newlib.h index 6ea12c3bc3e1f..7fe48572b4587 100644 --- a/libcxx/include/__locale_dir/support/newlib.h +++ b/libcxx/include/__locale_dir/support/newlib.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -76,6 +77,10 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { __locale_guard __current(__loc); return std::localeconv(); } + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t __loc) { + return ::nl_langinfo_l(CODESET, __loc); +} #endif // _LIBCPP_BUILDING_LIBRARY // diff --git a/libcxx/include/__locale_dir/support/windows.h b/libcxx/include/__locale_dir/support/windows.h index c9ea500bc0b12..6675cfb740b29 100644 --- a/libcxx/include/__locale_dir/support/windows.h +++ b/libcxx/include/__locale_dir/support/windows.h @@ -164,6 +164,7 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, const char* __loc return __new_locale; } _LIBCPP_EXPORTED_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc); +_LIBCPP_EXPORTED_FROM_ABI const char* __get_locale_encoding(__locale_t __loc); #endif // _LIBCPP_BUILDING_LIBRARY // diff --git a/libcxx/include/module.modulemap.in b/libcxx/include/module.modulemap.in index b11055940a82c..d70d0fce52460 100644 --- a/libcxx/include/module.modulemap.in +++ b/libcxx/include/module.modulemap.in @@ -2125,6 +2125,11 @@ module std [system] { export * } + module text_encoding { + header "text_encoding" + export * + } + module thread { module formatter { header "__thread/formatter.h" } module id { header "__thread/id.h" } diff --git a/libcxx/include/text_encoding b/libcxx/include/text_encoding new file mode 100644 index 0000000000000..da39be92c3dfa --- /dev/null +++ b/libcxx/include/text_encoding @@ -0,0 +1,835 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_TEXT_ENCODING +#define _LIBCPP_TEXT_ENCODING + +/* text_encoding synopsis +namespace std { + +struct text_encoding; + +// [text.encoding.hash], hash support +template struct hash; +template<> struct hash; + +struct text_encoding +{ + static constexpr size_t max_name_length = 63; + + // [text.encoding.id], enumeration text_encoding::id + enum class id : int_least32_t { + see below + }; + using enum id; + + constexpr text_encoding() = default; + constexpr explicit text_encoding(string_view enc) noexcept; + constexpr text_encoding(id i) noexcept; + + constexpr id mib() const noexcept; + constexpr const char* name() const noexcept; + + // [text.encoding.aliases], class text_encoding::aliases_view + // struct aliases_view; + constexpr aliases_view aliases() const noexcept; + + friend constexpr bool operator==(const text_encoding& a, + const text_encoding& b) noexcept; + friend constexpr bool operator==(const text_encoding& encoding, id i) noexcept; + + static consteval text_encoding literal() noexcept; + static text_encoding environment(); + template static bool environment_is(); + + private: + id mib_ = id::unknown; // exposition only + char name_[max_name_length + 1] = {0}; // exposition only + static constexpr bool comp-name(string_view a, string_view b); // exposition only +}; +} + +*/ + +#if __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) +# include <__cxx03/__config> +#else +# include <__config> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +# if _LIBCPP_STD_VER >= 26 + +# include <__algorithm/count.h> +# include <__algorithm/find_if.h> +# include <__algorithm/lower_bound.h> +# include <__cstddef/ptrdiff_t.h> +# include <__functional/hash.h> +# include <__iterator/iterator_traits.h> +# include <__ranges/enable_borrowed_range.h> +# include <__ranges/view_interface.h> +# include +# include +# include +# include + +_LIBCPP_BEGIN_NAMESPACE_STD +struct text_encoding; + +_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS + +_LIBCPP_AVAILABILITY_TEXT_ENCODING_ENVIRONMENT _LIBCPP_EXPORTED_FROM_ABI text_encoding +__get_locale_encoding(const char* __name); + +_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS + +struct text_encoding { + static_assert(CHAR_BIT == 8, "libc++ only supports platforms where CHAR_BIT == 8"); + enum class id : int_least32_t { + other = 1, + unknown = 2, + ASCII = 3, + ISOLatin1 = 4, + ISOLatin2 = 5, + ISOLatin3 = 6, + ISOLatin4 = 7, + ISOLatinCyrillic = 8, + ISOLatinArabic = 9, + ISOLatinGreek = 10, + ISOLatinHebrew = 11, + ISOLatin5 = 12, + ISOLatin6 = 13, + ISOTextComm = 14, + HalfWidthKatakana = 15, + JISEncoding = 16, + ShiftJIS = 17, + EUCPkdFmtJapanese = 18, + EUCFixWidJapanese = 19, + ISO4UnitedKingdom = 20, + ISO11SwedishForNames = 21, + ISO15Italian = 22, + ISO17Spanish = 23, + ISO21German = 24, + ISO60DanishNorwegian = 25, + ISO69French = 26, + ISO10646UTF1 = 27, + ISO646basic1983 = 28, + INVARIANT = 29, + ISO2IntlRefVersion = 30, + NATSSEFI = 31, + NATSSEFIADD = 32, // NATS-DANO (33) and NATS-DANO-ADD (34) are omitted by the standard. + ISO10Swedish = 35, + KSC56011987 = 36, + ISO2022KR = 37, + EUCKR = 38, + ISO2022JP = 39, + ISO2022JP2 = 40, + ISO13JISC6220jp = 41, + ISO14JISC6220ro = 42, + ISO16Portuguese = 43, + ISO18Greek7Old = 44, + ISO19LatinGreek = 45, + ISO25French = 46, + ISO27LatinGreek1 = 47, + ISO5427Cyrillic = 48, + ISO42JISC62261978 = 49, + ISO47BSViewdata = 50, + ISO49INIS = 51, + ISO50INIS8 = 52, + ISO51INISCyrillic = 53, + ISO54271981 = 54, + ISO5428Greek = 55, + ISO57GB1988 = 56, + ISO58GB231280 = 57, + ISO61Norwegian2 = 58, + ISO70VideotexSupp1 = 59, + ISO84Portuguese2 = 60, + ISO85Spanish2 = 61, + ISO86Hungarian = 62, + ISO87JISX0208 = 63, + ISO88Greek7 = 64, + ISO89ASMO449 = 65, + ISO90 = 66, + ISO91JISC62291984a = 67, + ISO92JISC62991984b = 68, + ISO93JIS62291984badd = 69, + ISO94JIS62291984hand = 70, + ISO95JIS62291984handadd = 71, + ISO96JISC62291984kana = 72, + ISO2033 = 73, + ISO99NAPLPS = 74, + ISO102T617bit = 75, + ISO103T618bit = 76, + ISO111ECMACyrillic = 77, + ISO121Canadian1 = 78, + ISO122Canadian2 = 79, + ISO123CSAZ24341985gr = 80, + ISO88596E = 81, + ISO88596I = 82, + ISO128T101G2 = 83, + ISO88598E = 84, + ISO88598I = 85, + ISO139CSN369103 = 86, + ISO141JUSIB1002 = 87, + ISO143IECP271 = 88, + ISO146Serbian = 89, + ISO147Macedonian = 90, + ISO150 = 91, + ISO151Cuba = 92, + ISO6937Add = 93, + ISO153GOST1976874 = 94, + ISO8859Supp = 95, + ISO10367Box = 96, + ISO158Lap = 97, + ISO159JISX02121990 = 98, + ISO646Danish = 99, + USDK = 100, + DKUS = 101, + KSC5636 = 102, + Unicode11UTF7 = 103, + ISO2022CN = 104, + ISO2022CNEXT = 105, + UTF8 = 106, + ISO885913 = 109, + ISO885914 = 110, + ISO885915 = 111, + ISO885916 = 112, + GBK = 113, + GB18030 = 114, + OSDEBCDICDF0415 = 115, + OSDEBCDICDF03IRV = 116, + OSDEBCDICDF041 = 117, + ISO115481 = 118, + KZ1048 = 119, + UCS2 = 1000, + UCS4 = 1001, + UnicodeASCII = 1002, + UnicodeLatin1 = 1003, + UnicodeJapanese = 1004, + UnicodeIBM1261 = 1005, + UnicodeIBM1268 = 1006, + UnicodeIBM1276 = 1007, + UnicodeIBM1264 = 1008, + UnicodeIBM1265 = 1009, + Unicode11 = 1010, + SCSU = 1011, + UTF7 = 1012, + UTF16BE = 1013, + UTF16LE = 1014, + UTF16 = 1015, + CESU8 = 1016, + UTF32 = 1017, + UTF32BE = 1018, + UTF32LE = 1019, + BOCU1 = 1020, + UTF7IMAP = 1021, + Windows30Latin1 = 2000, + Windows31Latin1 = 2001, + Windows31Latin2 = 2002, + Windows31Latin5 = 2003, + HPRoman8 = 2004, + AdobeStandardEncoding = 2005, + VenturaUS = 2006, + VenturaInternational = 2007, + DECMCS = 2008, + PC850Multilingual = 2009, + PC8DanishNorwegian = 2012, + PC862LatinHebrew = 2013, + PC8Turkish = 2014, + IBMSymbols = 2015, + IBMThai = 2016, + HPLegal = 2017, + HPPiFont = 2018, + HPMath8 = 2019, + HPPSMath = 2020, + HPDesktop = 2021, + VenturaMath = 2022, + MicrosoftPublishing = 2023, + Windows31J = 2024, + GB2312 = 2025, + Big5 = 2026, + Macintosh = 2027, + IBM037 = 2028, + IBM038 = 2029, + IBM273 = 2030, + IBM274 = 2031, + IBM275 = 2032, + IBM277 = 2033, + IBM278 = 2034, + IBM280 = 2035, + IBM281 = 2036, + IBM284 = 2037, + IBM285 = 2038, + IBM290 = 2039, + IBM297 = 2040, + IBM420 = 2041, + IBM423 = 2042, + IBM424 = 2043, + PC8CodePage437 = 2011, + IBM500 = 2044, + IBM851 = 2045, + PCp852 = 2010, + IBM855 = 2046, + IBM857 = 2047, + IBM860 = 2048, + IBM861 = 2049, + IBM863 = 2050, + IBM864 = 2051, + IBM865 = 2052, + IBM868 = 2053, + IBM869 = 2054, + IBM870 = 2055, + IBM871 = 2056, + IBM880 = 2057, + IBM891 = 2058, + IBM903 = 2059, + IBM904 = 2060, + IBM905 = 2061, + IBM918 = 2062, + IBM1026 = 2063, + IBMEBCDICATDE = 2064, + EBCDICATDEA = 2065, + EBCDICCAFR = 2066, + EBCDICDKNO = 2067, + EBCDICDKNOA = 2068, + EBCDICFISE = 2069, + EBCDICFISEA = 2070, + EBCDICFR = 2071, + EBCDICIT = 2072, + EBCDICPT = 2073, + EBCDICES = 2074, + EBCDICESA = 2075, + EBCDICESS = 2076, + EBCDICUK = 2077, + EBCDICUS = 2078, + Unknown8BiT = 2079, + Mnemonic = 2080, + Mnem = 2081, + VISCII = 2082, + VIQR = 2083, + KOI8R = 2084, + HZGB2312 = 2085, + IBM866 = 2086, + PC775Baltic = 2087, + KOI8U = 2088, + IBM00858 = 2089, + IBM00924 = 2090, + IBM01140 = 2091, + IBM01141 = 2092, + IBM01142 = 2093, + IBM01143 = 2094, + IBM01144 = 2095, + IBM01145 = 2096, + IBM01146 = 2097, + IBM01147 = 2098, + IBM01148 = 2099, + IBM01149 = 2100, + Big5HKSCS = 2101, + IBM1047 = 2102, + PTCP154 = 2103, + Amiga1251 = 2104, + KOI7switched = 2105, + BRF = 2106, + TSCII = 2107, + CP51932 = 2108, + windows874 = 2109, + windows1250 = 2250, + windows1251 = 2251, + windows1252 = 2252, + windows1253 = 2253, + windows1254 = 2254, + windows1255 = 2255, + windows1256 = 2256, + windows1257 = 2257, + windows1258 = 2258, + TIS620 = 2259, + CP50220 = 2260 + }; + +private: + static constexpr int __nats_dano_ = 33; + static constexpr int __nats_dano_add_ = 34; + + // A __te_data structure stores: + // Index into a table of offsets in the giant aliases string + // The MIB of that text encoding + // Number of aliases the current MIB holds + // We only need 6 bytes of information to store this information. + + // The data is structured in three different locations: + // The mib table (array of __te_data) + // Aliases string + // Table of offsets into the aliases string + + // All of the alias strings were transformed from an array of char* into one giant string split by '\0'. + // This allows us to reduce runtime memory footprint by not only removing the need to store ~884 pointers, it also + // significantly reduces the disk space taken by removing the need for relocation data for those pointers. + // We also know ahead of time that the total number of characters of all of these aliases combined can fit into 2 + // bytes, so we can also save even more memory by using 2 byte offsets rather than a full pointer into the aliases + // string. + + struct __te_data { + unsigned short __first_alias_index_; + unsigned short __mib_rep_; + unsigned char __num_aliases_; + + friend constexpr bool operator<(const __te_data& __enc, id __i) noexcept { + return __enc.__mib_rep_ < static_cast(__i); + } + }; + + static constexpr bool __comp_name(string_view __a, string_view __b) noexcept { + if (__a.empty() || __b.empty()) { + return false; + } + + // Map any non-alphanumeric character to 255, skip prefix 0s, else get tolower(__n). + auto __map_char = [](char __n, bool& __in_number) -> unsigned char { + if (__n == '0') { + return __in_number ? '0' : 255; + } + __in_number = __n >= '1' && __n <= '9'; + + if ((__n >= '1' && __n <= '9') || (__n >= 'a' && __n <= 'z')) { + return __n; + } + if (__n >= 'A' && __n <= 'Z') { + return __n + ('a' - 'A'); // tolower + } + + return 255; + }; + + auto __a_ptr = __a.begin(), __b_ptr = __b.begin(); + bool __a_in_number = false, __b_in_number = false; + + unsigned char __a_val = 255, __b_val = 255; + for (;; __a_ptr++, __b_ptr++) { + while (__a_ptr != __a.end() && (__a_val = __map_char(*__a_ptr, __a_in_number)) == 255) + __a_ptr++; + while (__b_ptr != __b.end() && (__b_val = __map_char(*__b_ptr, __b_in_number)) == 255) + __b_ptr++; + + if (__a_ptr == __a.end()) + return __b_ptr == __b.end(); + if (__b_ptr == __b.end()) + return false; + if (__a_val != __b_val) + return false; + } + return true; + } + + static constexpr unsigned short __find_data_idx(string_view __a) noexcept { + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__a.size() <= max_name_length, "input string_view must have size <= 63"); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__a.contains('\0'), "input string_view must not contain '\\0'"); + + auto __pred = [&__a](const __te_data& __entry) -> bool { + // Search aliases of text encoding for string + aliases_view __aliases(__entry); + return std::find_if(__aliases.begin(), __aliases.end(), [&__a](auto __alias) { + return __comp_name(__a, __alias); + }) != __aliases.end(); + }; + + const __te_data* __found = std::find_if(__entries + 2, std::end(__entries), __pred); + if (__found == std::end(__entries)) { + return __other_idx_; // other + } + + return __found - __entries; + } + + static constexpr unsigned short __find_data_idx_by_id(id __i) noexcept { + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__i >= id::other, "invalid text_encoding::id passed"); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__i <= id::CP50220, "invalid text_encoding::id passed"); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(int_least32_t(__i) != __nats_dano_, "Mib for NATS-DANO used"); + _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(int_least32_t(__i) != __nats_dano_add_, "Mib for NATS-DANO-ADD used"); + auto __found = std::lower_bound(std::begin(__entries), std::end(__entries), __i); + + if (__found == std::end(__entries)) { + return __unknown_idx_; // unknown + } + + return __found - __entries; + } + +public: + using enum id; + static constexpr size_t max_name_length = 63; + + // [text.encoding.aliases], class text_encoding::aliases_view + struct aliases_view : ranges::view_interface { + struct __iterator { + using iterator_concept = random_access_iterator_tag; + using iterator_category = random_access_iterator_tag; + using value_type = const char*; + using reference = const char*; + using difference_type = int; + + constexpr __iterator() noexcept = default; + + [[nodiscard]] constexpr reference operator*() const noexcept { + return __get_alias_from_offset_table(__current_idx_); + } + + [[nodiscard]] constexpr reference operator[](difference_type __n) const noexcept { + auto __it = *this; + return *(__it + __n); + } + + [[nodiscard]] friend constexpr __iterator operator+(__iterator __it, difference_type __n) noexcept { + __it += __n; + return __it; + } + + [[nodiscard]] friend constexpr __iterator operator+(difference_type __n, __iterator __it) noexcept { + __it += __n; + return __it; + } + + [[nodiscard]] friend constexpr __iterator operator-(__iterator __it, difference_type __n) noexcept { + __it -= __n; + return __it; + } + + [[nodiscard]] constexpr difference_type operator-(const __iterator& __other) const noexcept { + return __current_idx_ - __other.__current_idx_; + } + + constexpr __iterator& operator++() noexcept { + __current_idx_++; + return *this; + } + + constexpr __iterator operator++(int) noexcept { + auto __old = *this; + __current_idx_++; + return __old; + } + + constexpr __iterator& operator--() noexcept { + __current_idx_--; + return *this; + } + + constexpr __iterator operator--(int) noexcept { + auto __old = *this; + __current_idx_--; + return __old; + } + + constexpr __iterator& operator+=(difference_type __n) noexcept { + __current_idx_ += __n; + return *this; + } + + constexpr __iterator& operator-=(difference_type __n) noexcept { return (*this += -__n); } + + friend constexpr auto operator<=>(const __iterator& __it, const __iterator& __it2) noexcept = default; + + private: + friend struct aliases_view; + + constexpr __iterator(unsigned short __enc_d) noexcept : __current_idx_(__enc_d) {} + + unsigned short __current_idx_; + }; // __iterator + + [[nodiscard]] constexpr __iterator begin() const noexcept { return __iterator(__first_idx_); } + + [[nodiscard]] constexpr __iterator end() const noexcept { return __iterator(__last_idx_); } + + private: + friend struct text_encoding; + + constexpr aliases_view(const __te_data& __d) + : __first_idx_(__d.__first_alias_index_), __last_idx_(__d.__first_alias_index_ + __d.__num_aliases_) {} + + unsigned short __first_idx_; + unsigned short __last_idx_; + }; // aliases_view + + constexpr text_encoding() = default; + + constexpr explicit text_encoding(string_view __enc) noexcept : __encoding_idx_(__find_data_idx(__enc)) { + __enc.copy(__name_, max_name_length, 0); + } + + constexpr text_encoding(id __i) noexcept : __encoding_idx_(__find_data_idx_by_id(__i)) { + const char* __alias = __get_alias_from_offset_table(__get().__first_alias_index_); + if (__alias[0] != '\0') { + string_view(__alias).copy(__name_, max_name_length); + } + } + + [[nodiscard]] constexpr id mib() const noexcept { return id(__get().__mib_rep_); } + + [[nodiscard]] constexpr const char* name() const noexcept { return __name_; } + + [[nodiscard]] constexpr aliases_view aliases() const noexcept { return aliases_view(__get()); } + + friend constexpr bool operator==(const text_encoding& __a, const text_encoding& __b) noexcept { + return __a.mib() == id::other && __b.mib() == id::other + ? __comp_name(__a.__name_, __b.__name_) + : __a.mib() == __b.mib(); + } + + friend constexpr bool operator==(const text_encoding& __encoding, id __i) noexcept { return __encoding.mib() == __i; } + + [[nodiscard]] static consteval text_encoding literal() noexcept { + // TODO: Remove this branch once we have __GNUC_EXECUTION_CHARSET_NAME or __clang_literal_encoding__ unconditionally +# ifdef __GNUC_EXECUTION_CHARSET_NAME + return text_encoding(__GNUC_EXECUTION_CHARSET_NAME); +# elif defined(__clang_literal_encoding__) + return text_encoding(__clang_literal_encoding__); +# else + return text_encoding(); +# endif + } + +# if _LIBCPP_HAS_LOCALIZATION + [[nodiscard]] _LIBCPP_AVAILABILITY_TEXT_ENCODING_ENVIRONMENT static text_encoding environment() { +# if defined(_LIBCPP_WIN32API) + return __get_locale_encoding(nullptr); +# else + return __get_locale_encoding(""); +# endif + } + + template + [[nodiscard]] _LIBCPP_AVAILABILITY_TEXT_ENCODING_ENVIRONMENT static bool environment_is() { + // TODO: It may be worthwhile to implement an optimization for popular encodings, e.g. UTF-8, + // to allow checking the environment text encoding without needing to use the data tables. + +# if defined(__ANDROID__) + return _Id == std::text_encoding::id::UTF8; +# else + return environment() == _Id; +# endif + } +# else + static text_encoding environment() = delete; + + template + static bool environment_is() = delete; +# endif + +private: + constexpr const __te_data& __get() const { return __entries[__encoding_idx_]; } + + static constexpr char const* __get_alias_from_offset_table(unsigned short __idx) { + return __aliases_string + __alias_offsets.__table[__idx]; + } + + static constexpr auto __other_idx_ = 0u; + static constexpr auto __unknown_idx_ = 1u; + + char __name_[max_name_length + 1]{}; + unsigned short __encoding_idx_{1u}; + + static constexpr __te_data __entries[] = { + {0, 1, 0}, {1, 2, 0}, {2, 3, 11}, {13, 4, 9}, {22, 5, 7}, {29, 6, 7}, {36, 7, 7}, + {43, 8, 6}, {49, 9, 8}, {57, 10, 9}, {66, 11, 6}, {72, 12, 7}, {79, 13, 6}, {85, 14, 3}, + {88, 15, 3}, {91, 16, 2}, {93, 17, 3}, {96, 18, 3}, {99, 19, 2}, {101, 20, 6}, {107, 21, 5}, + {112, 22, 4}, {116, 23, 4}, {120, 24, 5}, {125, 25, 6}, {131, 26, 5}, {136, 27, 2}, {138, 28, 3}, + {141, 29, 2}, {143, 30, 4}, {147, 31, 3}, {150, 32, 3}, {153, 35, 7}, {160, 36, 6}, {166, 37, 2}, + {168, 38, 2}, {170, 39, 2}, {172, 40, 2}, {174, 41, 6}, {180, 42, 5}, {185, 43, 4}, {189, 44, 3}, + {192, 45, 3}, {195, 46, 4}, {199, 47, 3}, {202, 48, 3}, {205, 49, 3}, {208, 50, 3}, {211, 51, 3}, + {214, 52, 3}, {217, 53, 3}, {220, 54, 4}, {224, 55, 3}, {227, 56, 5}, {232, 57, 4}, {236, 58, 5}, + {241, 59, 3}, {244, 60, 4}, {248, 61, 4}, {252, 62, 5}, {257, 63, 5}, {262, 64, 3}, {265, 65, 5}, + {270, 66, 2}, {272, 67, 4}, {276, 68, 5}, {281, 69, 4}, {285, 70, 4}, {289, 71, 4}, {293, 72, 3}, + {296, 73, 4}, {300, 74, 5}, {305, 75, 3}, {308, 76, 4}, {312, 77, 4}, {316, 78, 7}, {323, 79, 6}, + {329, 80, 3}, {332, 81, 3}, {335, 82, 3}, {338, 83, 3}, {341, 84, 3}, {344, 85, 3}, {347, 86, 3}, + {350, 87, 6}, {356, 88, 3}, {359, 89, 4}, {363, 90, 4}, {367, 91, 4}, {371, 92, 5}, {376, 93, 3}, + {379, 94, 4}, {383, 95, 4}, {387, 96, 3}, {390, 97, 4}, {394, 98, 4}, {398, 99, 5}, {403, 100, 2}, + {405, 101, 2}, {407, 102, 3}, {410, 103, 2}, {412, 104, 2}, {414, 105, 2}, {416, 106, 2}, {418, 109, 2}, + {420, 110, 8}, {428, 111, 4}, {432, 112, 7}, {439, 113, 5}, {444, 114, 2}, {446, 115, 2}, {448, 116, 2}, + {450, 117, 2}, {452, 118, 4}, {456, 119, 4}, {460, 1000, 2}, {462, 1001, 2}, {464, 1002, 2}, {466, 1003, 3}, + {469, 1004, 2}, {471, 1005, 2}, {473, 1006, 2}, {475, 1007, 2}, {477, 1008, 2}, {479, 1009, 2}, {481, 1010, 2}, + {483, 1011, 2}, {485, 1012, 2}, {487, 1013, 2}, {489, 1014, 2}, {491, 1015, 2}, {493, 1016, 3}, {496, 1017, 2}, + {498, 1018, 2}, {500, 1019, 2}, {502, 1020, 3}, {505, 1021, 2}, {507, 2000, 2}, {509, 2001, 2}, {511, 2002, 2}, + {513, 2003, 2}, {515, 2004, 4}, {519, 2005, 2}, {521, 2006, 2}, {523, 2007, 2}, {525, 2008, 3}, {528, 2009, 4}, + {532, 2010, 4}, {536, 2011, 4}, {540, 2012, 2}, {542, 2013, 4}, {546, 2014, 2}, {548, 2015, 2}, {550, 2016, 2}, + {552, 2017, 2}, {554, 2018, 2}, {556, 2019, 2}, {558, 2020, 2}, {560, 2021, 2}, {562, 2022, 2}, {564, 2023, 2}, + {566, 2024, 2}, {568, 2025, 2}, {570, 2026, 2}, {572, 2027, 3}, {575, 2028, 7}, {582, 2029, 4}, {586, 2030, 3}, + {589, 2031, 4}, {593, 2032, 4}, {597, 2033, 4}, {601, 2034, 5}, {606, 2035, 4}, {610, 2036, 4}, {614, 2037, 4}, + {618, 2038, 4}, {622, 2039, 4}, {626, 2040, 4}, {630, 2041, 4}, {634, 2042, 4}, {638, 2043, 4}, {642, 2044, 5}, + {647, 2045, 4}, {651, 2046, 4}, {655, 2047, 4}, {659, 2048, 4}, {663, 2049, 5}, {668, 2050, 4}, {672, 2051, 3}, + {675, 2052, 4}, {679, 2053, 4}, {683, 2054, 5}, {688, 2055, 5}, {693, 2056, 4}, {697, 2057, 4}, {701, 2058, 3}, + {704, 2059, 3}, {707, 2060, 4}, {711, 2061, 4}, {715, 2062, 4}, {719, 2063, 3}, {722, 2064, 2}, {724, 2065, 2}, + {726, 2066, 2}, {728, 2067, 2}, {730, 2068, 2}, {732, 2069, 2}, {734, 2070, 2}, {736, 2071, 2}, {738, 2072, 2}, + {740, 2073, 2}, {742, 2074, 2}, {744, 2075, 2}, {746, 2076, 2}, {748, 2077, 2}, {750, 2078, 2}, {752, 2079, 2}, + {754, 2080, 2}, {756, 2081, 2}, {758, 2082, 2}, {760, 2083, 2}, {762, 2084, 2}, {764, 2085, 1}, {765, 2086, 4}, + {769, 2087, 3}, {772, 2088, 2}, {774, 2089, 5}, {779, 2090, 5}, {784, 2091, 5}, {789, 2092, 5}, {794, 2093, 6}, + {800, 2094, 6}, {806, 2095, 5}, {811, 2096, 5}, {816, 2097, 5}, {821, 2098, 5}, {826, 2099, 5}, {831, 2100, 5}, + {836, 2101, 2}, {838, 2102, 3}, {841, 2103, 5}, {846, 2104, 5}, {851, 2105, 2}, {853, 2106, 2}, {855, 2107, 2}, + {857, 2108, 2}, {859, 2109, 2}, {861, 2250, 2}, {863, 2251, 2}, {865, 2252, 2}, {867, 2253, 2}, {869, 2254, 2}, + {871, 2255, 2}, {873, 2256, 2}, {875, 2257, 2}, {877, 2258, 2}, {879, 2259, 3}, {882, 2260, 2}}; + + static constexpr char __aliases_string[] = + "US-ASCII\0iso-ir-6\0ANSI_X3.4-1968\0ANSI_X3.4-1986\0ISO_646.irv:1991\0ISO646-" + "US\0us\0IBM367\0cp367\0csASCII\0ASCII\0ISO_8859-1:1987\0iso-ir-100\0ISO_8859-1\0ISO-8859-" + "1\0latin1\0l1\0IBM819\0CP819\0csISOLatin1\0ISO_8859-2:1987\0iso-ir-101\0ISO_8859-2\0ISO-8859-" + "2\0latin2\0l2\0csISOLatin2\0ISO_8859-3:1988\0iso-ir-109\0ISO_8859-3\0ISO-8859-3\0latin3\0l3\0csISOLatin3\0ISO_" + "8859-4:1988\0iso-ir-110\0ISO_8859-4\0ISO-8859-4\0latin4\0l4\0csISOLatin4\0ISO_8859-5:1988\0iso-ir-144\0ISO_8859-" + "5\0ISO-8859-5\0cyrillic\0csISOLatinCyrillic\0ISO_8859-6:1987\0iso-ir-127\0ISO_8859-6\0ISO-8859-6\0ECMA-" + "114\0ASMO-708\0arabic\0csISOLatinArabic\0ISO_8859-7:1987\0iso-ir-126\0ISO_8859-7\0ISO-8859-7\0ELOT_928\0ECMA-" + "118\0greek\0greek8\0csISOLatinGreek\0ISO_8859-8:1988\0iso-ir-138\0ISO_8859-8\0ISO-8859-" + "8\0hebrew\0csISOLatinHebrew\0ISO_8859-9:1989\0iso-ir-148\0ISO_8859-9\0ISO-8859-9\0latin5\0l5\0csISOLatin5\0ISO-" + "8859-10\0iso-ir-157\0l6\0ISO_8859-10:1992\0csISOLatin6\0latin6\0ISO_6937-2-add\0iso-ir-142\0csISOTextComm\0JIS_" + "X0201\0X0201\0csHalfWidthKatakana\0JIS_Encoding\0csJISEncoding\0Shift_JIS\0MS_Kanji\0csShiftJIS\0Extended_UNIX_" + "Code_Packed_Format_for_Japanese\0csEUCPkdFmtJapanese\0EUC-JP\0Extended_UNIX_Code_Fixed_Width_for_" + "Japanese\0csEUCFixWidJapanese\0BS_4730\0iso-ir-4\0ISO646-GB\0gb\0uk\0csISO4UnitedKingdom\0SEN_850200_C\0iso-ir-" + "11\0ISO646-SE2\0se2\0csISO11SwedishForNames\0IT\0iso-ir-15\0ISO646-IT\0csISO15Italian\0ES\0iso-ir-17\0ISO646-" + "ES\0csISO17Spanish\0DIN_66003\0iso-ir-21\0de\0ISO646-DE\0csISO21German\0NS_4551-1\0iso-ir-60\0ISO646-" + "NO\0no\0csISO60DanishNorwegian\0csISO60Norwegian1\0NF_Z_62-010\0iso-ir-69\0ISO646-FR\0fr\0csISO69French\0ISO-" + "10646-UTF-1\0csISO10646UTF1\0ISO_646.basic:1983\0ref\0csISO646basic1983\0INVARIANT\0csINVARIANT\0ISO_646.irv:" + "1983\0iso-ir-2\0irv\0csISO2IntlRefVersion\0NATS-SEFI\0iso-ir-8-1\0csNATSSEFI\0NATS-SEFI-ADD\0iso-ir-8-" + "2\0csNATSSEFIADD\0SEN_850200_B\0iso-ir-10\0FI\0ISO646-FI\0ISO646-SE\0se\0csISO10Swedish\0KS_C_5601-1987\0iso-ir-" + "149\0KS_C_5601-1989\0KSC_5601\0korean\0csKSC56011987\0ISO-2022-KR\0csISO2022KR\0EUC-KR\0csEUCKR\0ISO-2022-" + "JP\0csISO2022JP\0ISO-2022-JP-2\0csISO2022JP2\0JIS_C6220-1969-jp\0JIS_C6220-1969\0iso-ir-13\0katakana\0x0201-" + "7\0csISO13JISC6220jp\0JIS_C6220-1969-ro\0iso-ir-14\0jp\0ISO646-JP\0csISO14JISC6220ro\0PT\0iso-ir-16\0ISO646-" + "PT\0csISO16Portuguese\0greek7-old\0iso-ir-18\0csISO18Greek7Old\0latin-greek\0iso-ir-19\0csISO19LatinGreek\0NF_Z_" + "62-010_(1973)\0iso-ir-25\0ISO646-FR1\0csISO25French\0Latin-greek-1\0iso-ir-27\0csISO27LatinGreek1\0ISO_" + "5427\0iso-ir-37\0csISO5427Cyrillic\0JIS_C6226-1978\0iso-ir-42\0csISO42JISC62261978\0BS_viewdata\0iso-ir-" + "47\0csISO47BSViewdata\0INIS\0iso-ir-49\0csISO49INIS\0INIS-8\0iso-ir-50\0csISO50INIS8\0INIS-cyrillic\0iso-ir-" + "51\0csISO51INISCyrillic\0ISO_5427:1981\0iso-ir-54\0ISO5427Cyrillic1981\0csISO54271981\0ISO_5428:1980\0iso-ir-" + "55\0csISO5428Greek\0GB_1988-80\0iso-ir-57\0cn\0ISO646-CN\0csISO57GB1988\0GB_2312-80\0iso-ir-" + "58\0chinese\0csISO58GB231280\0NS_4551-2\0ISO646-NO2\0iso-ir-61\0no2\0csISO61Norwegian2\0videotex-suppl\0iso-ir-" + "70\0csISO70VideotexSupp1\0PT2\0iso-ir-84\0ISO646-PT2\0csISO84Portuguese2\0ES2\0iso-ir-85\0ISO646-" + "ES2\0csISO85Spanish2\0MSZ_7795.3\0iso-ir-86\0ISO646-HU\0hu\0csISO86Hungarian\0JIS_C6226-1983\0iso-ir-" + "87\0x0208\0JIS_X0208-1983\0csISO87JISX0208\0greek7\0iso-ir-88\0csISO88Greek7\0ASMO_449\0ISO_9036\0arabic7\0iso-" + "ir-89\0csISO89ASMO449\0iso-ir-90\0csISO90\0JIS_C6229-1984-a\0iso-ir-91\0jp-ocr-a\0csISO91JISC62291984a\0JIS_" + "C6229-1984-b\0iso-ir-92\0ISO646-JP-OCR-B\0jp-ocr-b\0csISO92JISC62991984b\0JIS_C6229-1984-b-add\0iso-ir-93\0jp-" + "ocr-b-add\0csISO93JIS62291984badd\0JIS_C6229-1984-hand\0iso-ir-94\0jp-ocr-hand\0csISO94JIS62291984hand\0JIS_" + "C6229-1984-hand-add\0iso-ir-95\0jp-ocr-hand-add\0csISO95JIS62291984handadd\0JIS_C6229-1984-kana\0iso-ir-" + "96\0csISO96JISC62291984kana\0ISO_2033-1983\0iso-ir-98\0e13b\0csISO2033\0ANSI_X3.110-1983\0iso-ir-99\0CSA_T500-" + "1983\0NAPLPS\0csISO99NAPLPS\0T.61-7bit\0iso-ir-102\0csISO102T617bit\0T.61-8bit\0T.61\0iso-ir-" + "103\0csISO103T618bit\0ECMA-cyrillic\0iso-ir-111\0KOI8-E\0csISO111ECMACyrillic\0CSA_Z243.4-1985-1\0iso-ir-" + "121\0ISO646-CA\0csa7-1\0csa71\0ca\0csISO121Canadian1\0CSA_Z243.4-1985-2\0iso-ir-122\0ISO646-CA2\0csa7-" + "2\0csa72\0csISO122Canadian2\0CSA_Z243.4-1985-gr\0iso-ir-123\0csISO123CSAZ24341985gr\0ISO_8859-6-" + "E\0csISO88596E\0ISO-8859-6-E\0ISO_8859-6-I\0csISO88596I\0ISO-8859-6-I\0T.101-G2\0iso-ir-" + "128\0csISO128T101G2\0ISO_8859-8-E\0csISO88598E\0ISO-8859-8-E\0ISO_8859-8-I\0csISO88598I\0ISO-8859-8-I\0CSN_" + "369103\0iso-ir-139\0csISO139CSN369103\0JUS_I.B1.002\0iso-ir-141\0ISO646-YU\0js\0yu\0csISO141JUSIB1002\0IEC_P27-" + "1\0iso-ir-143\0csISO143IECP271\0JUS_I.B1.003-serb\0iso-ir-146\0serbian\0csISO146Serbian\0JUS_I.B1.003-" + "mac\0macedonian\0iso-ir-147\0csISO147Macedonian\0greek-ccitt\0iso-ir-150\0csISO150\0csISO150GreekCCITT\0NC_NC00-" + "10:81\0cuba\0iso-ir-151\0ISO646-CU\0csISO151Cuba\0ISO_6937-2-25\0iso-ir-152\0csISO6937Add\0GOST_19768-74\0ST_" + "SEV_358-88\0iso-ir-153\0csISO153GOST1976874\0ISO_8859-supp\0iso-ir-154\0latin1-2-5\0csISO8859Supp\0ISO_10367-" + "box\0iso-ir-155\0csISO10367Box\0latin-lap\0lap\0iso-ir-158\0csISO158Lap\0JIS_X0212-1990\0x0212\0iso-ir-" + "159\0csISO159JISX02121990\0DS_2089\0DS2089\0ISO646-DK\0dk\0csISO646Danish\0us-dk\0csUSDK\0dk-" + "us\0csDKUS\0KSC5636\0ISO646-KR\0csKSC5636\0UNICODE-1-1-UTF-7\0csUnicode11UTF7\0ISO-2022-CN\0csISO2022CN\0ISO-" + "2022-CN-EXT\0csISO2022CNEXT\0UTF-8\0csUTF8\0ISO-8859-13\0csISO885913\0ISO-8859-14\0iso-ir-199\0ISO_8859-14:" + "1998\0ISO_8859-14\0latin8\0iso-celtic\0l8\0csISO885914\0ISO-8859-15\0ISO_8859-15\0Latin-9\0csISO885915\0ISO-" + "8859-16\0iso-ir-226\0ISO_8859-16:2001\0ISO_8859-16\0latin10\0l10\0csISO885916\0GBK\0CP936\0MS936\0windows-" + "936\0csGBK\0GB18030\0csGB18030\0OSD_EBCDIC_DF04_15\0csOSDEBCDICDF0415\0OSD_EBCDIC_DF03_" + "IRV\0csOSDEBCDICDF03IRV\0OSD_EBCDIC_DF04_1\0csOSDEBCDICDF041\0ISO-11548-1\0ISO_11548-1\0ISO_TR_11548-" + "1\0csISO115481\0KZ-1048\0STRK1048-2002\0RK1048\0csKZ1048\0ISO-10646-UCS-2\0csUnicode\0ISO-10646-UCS-" + "4\0csUCS4\0ISO-10646-UCS-Basic\0csUnicodeASCII\0ISO-10646-Unicode-Latin1\0csUnicodeLatin1\0ISO-10646\0ISO-10646-" + "J-1\0csUnicodeJapanese\0ISO-Unicode-IBM-1261\0csUnicodeIBM1261\0ISO-Unicode-IBM-1268\0csUnicodeIBM1268\0ISO-" + "Unicode-IBM-1276\0csUnicodeIBM1276\0ISO-Unicode-IBM-1264\0csUnicodeIBM1264\0ISO-Unicode-IBM-" + "1265\0csUnicodeIBM1265\0UNICODE-1-1\0csUnicode11\0SCSU\0csSCSU\0UTF-7\0csUTF7\0UTF-16BE\0csUTF16BE\0UTF-" + "16LE\0csUTF16LE\0UTF-16\0csUTF16\0CESU-8\0csCESU8\0csCESU-8\0UTF-32\0csUTF32\0UTF-32BE\0csUTF32BE\0UTF-" + "32LE\0csUTF32LE\0BOCU-1\0csBOCU1\0csBOCU-1\0UTF-7-IMAP\0csUTF7IMAP\0ISO-8859-1-Windows-3.0-Latin-" + "1\0csWindows30Latin1\0ISO-8859-1-Windows-3.1-Latin-1\0csWindows31Latin1\0ISO-8859-2-Windows-Latin-" + "2\0csWindows31Latin2\0ISO-8859-9-Windows-Latin-5\0csWindows31Latin5\0hp-roman8\0roman8\0r8\0csHPRoman8\0Adobe-" + "Standard-Encoding\0csAdobeStandardEncoding\0Ventura-US\0csVenturaUS\0Ventura-" + "International\0csVenturaInternational\0DEC-" + "MCS\0dec\0csDECMCS\0IBM850\0cp850\0850\0csPC850Multilingual\0IBM852\0cp852\0852\0csPCp852\0IBM437\0cp437\0" + "437\0csPC8CodePage437\0PC8-Danish-Norwegian\0csPC8DanishNorwegian\0IBM862\0cp862\0862\0csPC862LatinHebrew\0PC8-" + "Turkish\0csPC8Turkish\0IBM-Symbols\0csIBMSymbols\0IBM-Thai\0csIBMThai\0HP-Legal\0csHPLegal\0HP-Pi-" + "font\0csHPPiFont\0HP-Math8\0csHPMath8\0Adobe-Symbol-Encoding\0csHPPSMath\0HP-DeskTop\0csHPDesktop\0Ventura-" + "Math\0csVenturaMath\0Microsoft-Publishing\0csMicrosoftPublishing\0Windows-" + "31J\0csWindows31J\0GB2312\0csGB2312\0Big5\0csBig5\0macintosh\0mac\0csMacintosh\0IBM037\0cp037\0ebcdic-cp-" + "us\0ebcdic-cp-ca\0ebcdic-cp-wt\0ebcdic-cp-nl\0csIBM037\0IBM038\0EBCDIC-" + "INT\0cp038\0csIBM038\0IBM273\0CP273\0csIBM273\0IBM274\0EBCDIC-BE\0CP274\0csIBM274\0IBM275\0EBCDIC-" + "BR\0cp275\0csIBM275\0IBM277\0EBCDIC-CP-DK\0EBCDIC-CP-NO\0csIBM277\0IBM278\0CP278\0ebcdic-cp-fi\0ebcdic-cp-" + "se\0csIBM278\0IBM280\0CP280\0ebcdic-cp-it\0csIBM280\0IBM281\0EBCDIC-JP-" + "E\0cp281\0csIBM281\0IBM284\0CP284\0ebcdic-cp-es\0csIBM284\0IBM285\0CP285\0ebcdic-cp-" + "gb\0csIBM285\0IBM290\0cp290\0EBCDIC-JP-kana\0csIBM290\0IBM297\0cp297\0ebcdic-cp-" + "fr\0csIBM297\0IBM420\0cp420\0ebcdic-cp-ar1\0csIBM420\0IBM423\0cp423\0ebcdic-cp-" + "gr\0csIBM423\0IBM424\0cp424\0ebcdic-cp-he\0csIBM424\0IBM500\0CP500\0ebcdic-cp-be\0ebcdic-cp-" + "ch\0csIBM500\0IBM851\0cp851\0851\0csIBM851\0IBM855\0cp855\0855\0csIBM855\0IBM857\0cp857\0857\0csIBM857\0IBM860\0" + "cp860\0860\0csIBM860\0IBM861\0cp861\0861\0cp-" + "is\0csIBM861\0IBM863\0cp863\0863\0csIBM863\0IBM864\0cp864\0csIBM864\0IBM865\0cp865\0865\0csIBM865\0IBM868\0CP868" + "\0cp-ar\0csIBM868\0IBM869\0cp869\0869\0cp-gr\0csIBM869\0IBM870\0CP870\0ebcdic-cp-roece\0ebcdic-cp-" + "yu\0csIBM870\0IBM871\0CP871\0ebcdic-cp-is\0csIBM871\0IBM880\0cp880\0EBCDIC-" + "Cyrillic\0csIBM880\0IBM891\0cp891\0csIBM891\0IBM903\0cp903\0csIBM903\0IBM904\0cp904\0904\0csIBBM904\0IBM905\0CP9" + "05\0ebcdic-cp-tr\0csIBM905\0IBM918\0CP918\0ebcdic-cp-ar2\0csIBM918\0IBM1026\0CP1026\0csIBM1026\0EBCDIC-AT-" + "DE\0csIBMEBCDICATDE\0EBCDIC-AT-DE-A\0csEBCDICATDEA\0EBCDIC-CA-FR\0csEBCDICCAFR\0EBCDIC-DK-" + "NO\0csEBCDICDKNO\0EBCDIC-DK-NO-A\0csEBCDICDKNOA\0EBCDIC-FI-SE\0csEBCDICFISE\0EBCDIC-FI-SE-" + "A\0csEBCDICFISEA\0EBCDIC-FR\0csEBCDICFR\0EBCDIC-IT\0csEBCDICIT\0EBCDIC-PT\0csEBCDICPT\0EBCDIC-" + "ES\0csEBCDICES\0EBCDIC-ES-A\0csEBCDICESA\0EBCDIC-ES-S\0csEBCDICESS\0EBCDIC-UK\0csEBCDICUK\0EBCDIC-" + "US\0csEBCDICUS\0UNKNOWN-" + "8BIT\0csUnknown8BiT\0MNEMONIC\0csMnemonic\0MNEM\0csMnem\0VISCII\0csVISCII\0VIQR\0csVIQR\0KOI8-R\0csKOI8R\0HZ-GB-" + "2312\0IBM866\0cp866\0866\0csIBM866\0IBM775\0cp775\0csPC775Baltic\0KOI8-" + "U\0csKOI8U\0IBM00858\0CCSID00858\0CP00858\0PC-Multilingual-850+" + "euro\0csIBM00858\0IBM00924\0CCSID00924\0CP00924\0ebcdic-Latin9--" + "euro\0csIBM00924\0IBM01140\0CCSID01140\0CP01140\0ebcdic-us-37+" + "euro\0csIBM01140\0IBM01141\0CCSID01141\0CP01141\0ebcdic-de-273+" + "euro\0csIBM01141\0IBM01142\0CCSID01142\0CP01142\0ebcdic-dk-277+euro\0ebcdic-no-277+" + "euro\0csIBM01142\0IBM01143\0CCSID01143\0CP01143\0ebcdic-fi-278+euro\0ebcdic-se-278+" + "euro\0csIBM01143\0IBM01144\0CCSID01144\0CP01144\0ebcdic-it-280+" + "euro\0csIBM01144\0IBM01145\0CCSID01145\0CP01145\0ebcdic-es-284+" + "euro\0csIBM01145\0IBM01146\0CCSID01146\0CP01146\0ebcdic-gb-285+" + "euro\0csIBM01146\0IBM01147\0CCSID01147\0CP01147\0ebcdic-fr-297+" + "euro\0csIBM01147\0IBM01148\0CCSID01148\0CP01148\0ebcdic-international-500+" + "euro\0csIBM01148\0IBM01149\0CCSID01149\0CP01149\0ebcdic-is-871+euro\0csIBM01149\0Big5-" + "HKSCS\0csBig5HKSCS\0IBM1047\0IBM-1047\0csIBM1047\0PTCP154\0csPTCP154\0PT154\0CP154\0Cyrillic-Asian\0Amiga-" + "1251\0Ami1251\0Amiga1251\0Ami-1251\0csAmiga1251\0KOI7-" + "switched\0csKOI7switched\0BRF\0csBRF\0TSCII\0csTSCII\0CP51932\0csCP51932\0windows-874\0cswindows874\0windows-" + "1250\0cswindows1250\0windows-1251\0cswindows1251\0windows-1252\0cswindows1252\0windows-" + "1253\0cswindows1253\0windows-1254\0cswindows1254\0windows-1255\0cswindows1255\0windows-" + "1256\0cswindows1256\0windows-1257\0cswindows1257\0windows-1258\0cswindows1258\0TIS-620\0csTIS620\0ISO-8859-" + "11\0CP50220\0csCP50220\0"; + + struct __offset_table { + constexpr static unsigned long long __num_aliases = + std::count(__aliases_string, __aliases_string + sizeof(__aliases_string), '\0') + 1; + unsigned short __table[__num_aliases]; + }; + + static constexpr __offset_table __alias_offsets = [] { + __offset_table __aliases{}; + __aliases.__table[0] = sizeof(__aliases_string) - 1; + __aliases.__table[1] = sizeof(__aliases_string) - 1; + __aliases.__table[2] = 0; + + unsigned long long __idx = 3; + + for (unsigned short __pos = 0; __pos < sizeof(__aliases_string) - 1 && __idx < __offset_table::__num_aliases; + __pos++) { + if (__aliases_string[__pos] == '\0') { + __aliases.__table[__idx++] = __pos + 1; + } + } + + return __aliases; + }(); +}; + +template <> +struct hash { + [[nodiscard]] static size_t operator()(const text_encoding& __enc) noexcept { + return std::hash()(__enc.mib()); + } +}; + +template <> +inline constexpr bool ranges::enable_borrowed_range = true; + +_LIBCPP_END_NAMESPACE_STD + +# endif // _LIBCPP_STD_VER >= 26 + +#endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) + +#endif // _LIBCPP_TEXT_ENCODING diff --git a/libcxx/include/version b/libcxx/include/version index 53e879959ec17..0b4438d90190d 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -633,7 +633,7 @@ __cpp_lib_void_t 201411L # undef __cpp_lib_string_view # define __cpp_lib_string_view 202403L // # define __cpp_lib_submdspan 202306L -// # define __cpp_lib_text_encoding 202306L +# define __cpp_lib_text_encoding 202306L # undef __cpp_lib_to_chars // # define __cpp_lib_to_chars 202306L // # define __cpp_lib_to_string 202306L diff --git a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist index 74283373d1b65..2a419e8c8af05 100644 --- a/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/arm64-apple-darwin.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1562,6 +1562,7 @@ {'is_defined': True, 'name': '__ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIxNS_22__cxx_atomic_base_implIxEEEEx', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__120__libcpp_atomic_waitEPVKvx', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '__ZNSt3__121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '__ZNSt3__121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} @@ -2655,4 +2656,4 @@ {'is_defined': True, 'name': '___cxa_vec_new2', 'type': 'I'} {'is_defined': True, 'name': '___cxa_vec_new3', 'type': 'I'} {'is_defined': True, 'name': '___dynamic_cast', 'type': 'I'} -{'is_defined': True, 'name': '___gxx_personality_v0', 'type': 'I'} +{'is_defined': True, 'name': '___gxx_personality_v0', 'type': 'I'} \ No newline at end of file diff --git a/libcxx/lib/abi/i686-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/i686-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist index 291149608c9c7..414a239757e1c 100644 --- a/libcxx/lib/abi/i686-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/i686-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1199,6 +1199,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIiNS_22__cxx_atomic_base_implIiEEEEi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk120__libcpp_atomic_waitEPVKvi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index da6d608af462c..590c0de8ce20f 100644 --- a/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -555,6 +555,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIiNS_22__cxx_atomic_base_implIiEEEEi', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKvi', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__throw_system_errorEiPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121__get_locale_encodingEPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121__throw_runtime_errorEPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex4lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex6unlockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist index 6ee31613d5320..f4f4ab99ad866 100644 --- a/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/powerpc64-ibm-aix.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -555,6 +555,7 @@ {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIlNS_22__cxx_atomic_base_implIlEEEEl', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKvl', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__120__throw_system_errorEiPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} +{'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121__get_locale_encodingEPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121__throw_runtime_errorEPKc', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex4lockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} {'import_export': 'EXP', 'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex6unlockEv', 'storage_mapping_class': 'DS', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist index 7b09d3dd3f75c..e1b1e110318fa 100644 --- a/libcxx/lib/abi/x86_64-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-linux-android23.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1199,6 +1199,7 @@ {'is_defined': True, 'name': '_ZNSt6__ndk120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIiNS_22__cxx_atomic_base_implIiEEEEi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk120__libcpp_atomic_waitEPVKvi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt6__ndk121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt6__ndk121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} diff --git a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist index 725ed669fb561..11cc8f180e824 100644 --- a/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-freebsd.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1212,6 +1212,7 @@ {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIlNS_22__cxx_atomic_base_implIlEEEEl', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKvl', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} @@ -2031,4 +2032,4 @@ {'is_defined': True, 'name': '_ZTv0_n24_NSt3__114basic_iostreamIcNS_11char_traitsIcEEED0Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZTv0_n24_NSt3__114basic_iostreamIcNS_11char_traitsIcEEED1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD0Ev', 'type': 'FUNC'} -{'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD1Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD1Ev', 'type': 'FUNC'} \ No newline at end of file diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist index 717d4c9bee726..a28229a9d6e97 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.exceptions.nonew.abilist @@ -1210,6 +1210,7 @@ {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIiNS_22__cxx_atomic_base_implIiEEEEi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKvi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} @@ -2030,4 +2031,4 @@ {'is_defined': True, 'name': '_ZTv0_n24_NSt3__114basic_iostreamIcNS_11char_traitsIcEEED0Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZTv0_n24_NSt3__114basic_iostreamIcNS_11char_traitsIcEEED1Ev', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD0Ev', 'type': 'FUNC'} -{'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD1Ev', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZTv0_n24_NSt3__19strstreamD1Ev', 'type': 'FUNC'} \ No newline at end of file diff --git a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist index 45db143b4cf4a..2b69d6dc89138 100644 --- a/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist +++ b/libcxx/lib/abi/x86_64-unknown-linux-gnu.libcxxabi.v1.stable.noexceptions.nonew.abilist @@ -1181,6 +1181,7 @@ {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKNS_17__cxx_atomic_implIiNS_22__cxx_atomic_base_implIiEEEEi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__libcpp_atomic_waitEPVKvi', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__120__throw_system_errorEiPKc', 'type': 'FUNC'} +{'is_defined': True, 'name': '_ZNSt3__121__get_locale_encodingEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121__throw_runtime_errorEPKc', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex4lockEv', 'type': 'FUNC'} {'is_defined': True, 'name': '_ZNSt3__121recursive_timed_mutex6unlockEv', 'type': 'FUNC'} diff --git a/libcxx/modules/std.compat.cppm.in b/libcxx/modules/std.compat.cppm.in index dd7385bf33a42..ab08125c4c4e4 100644 --- a/libcxx/modules/std.compat.cppm.in +++ b/libcxx/modules/std.compat.cppm.in @@ -75,9 +75,6 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() -# if __has_include() -# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" -# endif // __has_include() #endif // _WIN32 export module std.compat; diff --git a/libcxx/modules/std.cppm.in b/libcxx/modules/std.cppm.in index 6de6369c3387f..b9a491653659a 100644 --- a/libcxx/modules/std.cppm.in +++ b/libcxx/modules/std.cppm.in @@ -108,6 +108,7 @@ module; #include #include #include +#include #include #include #include @@ -157,9 +158,6 @@ module; # if __has_include() # error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" # endif // __has_include() -# if __has_include() -# error "please update the header information for in headers_not_available in utils/libcxx/header_information.py" -# endif // __has_include() #endif // _WIN32 export module std; diff --git a/libcxx/modules/std/text_encoding.inc b/libcxx/modules/std/text_encoding.inc index 6d5e3f1d68c60..8ad1e7bfced43 100644 --- a/libcxx/modules/std/text_encoding.inc +++ b/libcxx/modules/std/text_encoding.inc @@ -8,12 +8,11 @@ //===----------------------------------------------------------------------===// export namespace std { -#if 0 -# if _LIBCPP_STD_VER >= 23 - using std::text_encoding; - - // hash support +#if _LIBCPP_STD_VER >= 26 using std::hash; -# endif // _LIBCPP_STD_VER >= 23 + namespace ranges { + using std::ranges::enable_borrowed_range; + } + using std::text_encoding; #endif } // namespace std diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index de7817ad69f26..0c883e9ecd30d 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -91,6 +91,7 @@ if (LIBCXX_ENABLE_LOCALIZATION) ostream.cpp regex.cpp strstream.cpp + text_encoding.cpp ) endif() diff --git a/libcxx/src/support/win32/locale_win32.cpp b/libcxx/src/support/win32/locale_win32.cpp index e7c7a114e9ce6..c5117564438a5 100644 --- a/libcxx/src/support/win32/locale_win32.cpp +++ b/libcxx/src/support/win32/locale_win32.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include <__locale_dir/support/windows.h> +#include +#include #include // std::localeconv() & friends #include // va_start & friends #include @@ -14,6 +16,8 @@ #include // std::strtof & friends #include // std::strftime #include // wide char manipulation +#include +#include _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS @@ -35,6 +39,262 @@ __lconv_t* __localeconv(__locale_t& loc) { return loc.__store_lconv(lc); } +namespace { +const char* __codepage_name(unsigned int __codepage) { + switch (__codepage) { + case 0: + // If no ANSI code page is available, only Unicode can be used for the locale. + // In this case, the value is CP_ACP (0). + // Such a locale cannot be set as the system locale. + // Applications that do not support Unicode do not work correctly with locales + // marked as "Unicode only". + return nullptr; + case 037: + return "IBM037"; + case 437: + return "IBM437"; + case 500: + return "IBM500"; + case 708: + return "ASMO-708"; + case 709: + return "ASMO_449"; + case 775: + return "IBM775"; + case 850: + return "IBM850"; + case 852: + return "IBM852"; + case 855: + return "IBM855"; + case 857: + return "IBM857"; + case 858: + return "IBM00858"; + case 860: + return "IBM860"; + case 861: + return "IBM861"; + case 862: + return "IBM862"; + case 863: + return "IBM863"; + case 864: + return "IBM864"; + case 865: + return "IBM865"; + case 866: + return "IBM866"; + case 869: + return "IBM869"; + case 870: + return "IBM870"; + case 874: + return "windows-874"; + case 932: + return "Shift_JIS"; + case 936: + return "GB2312"; + case 949: + return "KS_C_5601-1989"; + case 950: + return "Big5"; + case 1026: + return "IBM1026"; + case 1047: + return "IBM1047"; + case 1140: + return "IBM01140"; + case 1141: + return "IBM01141"; + case 1142: + return "IBM01142"; + case 1143: + return "IBM01143"; + case 1144: + return "IBM01144"; + case 1145: + return "IBM01145"; + case 1146: + return "IBM01146"; + case 1147: + return "IBM01147"; + case 1148: + return "IBM01148"; + case 1149: + return "IBM01149"; + case 1200: + return "UTF-16LE"; + case 1201: + return "UTF-16BE"; + case 1250: + return "windows-1250"; + case 1251: + return "windows-1251"; + case 1252: + return "windows-1252"; + case 1253: + return "windows-1253"; + case 1254: + return "windows-1254"; + case 1255: + return "windows-1255"; + case 1256: + return "windows-1256"; + case 1257: + return "windows-1257"; + case 1258: + return "windows-1258"; + case 10000: + return "macintosh"; + case 12000: + return "UTF-32LE"; + case 12001: + return "UTF-32BE"; + case 20127: + return "US-ASCII"; + case 20273: + return "IBM273"; + case 20277: + return "IBM277"; + case 20278: + return "IBM278"; + case 20280: + return "IBM280"; + case 20284: + return "IBM284"; + case 20285: + return "IBM285"; + case 20290: + return "IBM290"; + case 20297: + return "IBM297"; + case 20420: + return "IBM420"; + case 20423: + return "IBM423"; + case 20424: + return "IBM424"; + case 20838: + return "IBM-Thai"; + case 20866: + return "KOI8-R"; + case 20871: + return "IBM871"; + case 20880: + return "IBM880"; + case 20905: + return "IBM905"; + case 20924: + return "IBM00924"; + case 20932: + return "EUC-JP"; + case 21866: + return "KOI8-U"; + case 28591: + return "ISO-8859-1"; + case 28592: + return "ISO-8859-2"; + case 28593: + return "ISO-8859-3"; + case 28594: + return "ISO-8859-4"; + case 28595: + return "ISO-8859-9"; + case 28596: + return "ISO-8859-10"; + case 28597: + return "ISO-8859-7"; + case 28598: + return "ISO-8859-8"; + case 28599: + return "ISO-8859-9-Windows-Latin-5"; + case 28603: + return "ISO-8859-13"; + case 28605: + return "ISO-8859-15"; + case 38598: + return "ISO-8859-8-I"; + case 50220: + case 50221: + case 50222: + return "ISO-2022-JP"; + case 51932: + return "EUC-JP"; + case 51936: + return "GB2312"; + case 51949: + return "EUC-KR"; + case 52936: + return "HZ-GB-2312"; + case 54936: + return "GB18030"; + case 65000: + return "UTF-7"; + case 65001: + return "UTF-8"; + default: + return nullptr; + } +} +} // namespace + +const char* __get_locale_encoding(__locale_t loc) { + const char* locale_name = loc.__get_locale(); + if (locale_name == nullptr) { + return __codepage_name(::GetACP()); + } + + std::string_view __sv(locale_name); + + // locale :: "locale-name" + // | "language"[_country-region[.code-page]] + // | ".code-page" + // GetLocaleInfoEx doesn't accept anything other than BCP-47 locale names, e.g. "en_US", + // so do a best-attempt to derive the text encoding from the name. + if (__sv == "C" || __sv == "") { + // "A locale argument value of C specifies the minimal ANSI conforming environment for C translation." + // TODO: Figure out what to do for an empty string: + // "If locale points to an empty string, the locale is the implementation-defined native environment." + return __codepage_name(::GetACP()); + } else if (auto dot = __sv.find('.'); dot != std::string_view::npos) { + std::string_view __code_page(locale_name + dot + 1); + + // Windows allows the codepage number as part of the name, + // e.g. "en_US.1252" for English US, Windows-1252. + if (std::isdigit(__code_page[0])) { + unsigned int __cpage{}; + auto __res = std::from_chars(__code_page.data(), __code_page.data() + __code_page.size(), __cpage); + if (__res) { + return __codepage_name(__cpage); + } + } else { // POSIX-style name + return locale_name + dot + 1; + } + } + + wchar_t locale_wbuffer[LOCALE_NAME_MAX_LENGTH + 1]{}; + wchar_t number_buffer[11]{}; + + bool is_ansi = ::AreFileApisANSI(); + auto codepage = is_ansi ? CP_ACP : CP_OEMCP; + int ret = ::MultiByteToWideChar( + codepage, MB_ERR_INVALID_CHARS, locale_name, __sv.size(), locale_wbuffer, LOCALE_NAME_MAX_LENGTH); + + if (ret <= 0) + return nullptr; + + // The below function fills the string with the number in text. + auto lctype = is_ansi ? LOCALE_IDEFAULTANSICODEPAGE : LOCALE_IDEFAULTCODEPAGE; + int result = ::GetLocaleInfoEx(locale_wbuffer, lctype, number_buffer, 10); + + if (result <= 0) + return nullptr; + + unsigned int acp = std::wcstoul(number_buffer, nullptr, 10); + return __codepage_name(acp); +} + // // Strtonum functions // diff --git a/libcxx/src/text_encoding.cpp b/libcxx/src/text_encoding.cpp new file mode 100644 index 0000000000000..871cf9b8b818b --- /dev/null +++ b/libcxx/src/text_encoding.cpp @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include <__config> +#include <__locale_dir/locale_base_api.h> +#include <__utility/scope_guard.h> +#include + +_LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS + +#if defined(__ANDROID__) +// UTF-8 is the always the environment encoding on Android. +std::text_encoding __get_locale_encoding([[maybe_unused]] const char* __name) { return std::text_encoding::id::UTF8; } +#else +static text_encoding __make_text_encoding(const char* __name) { + if (__name == nullptr) + return text_encoding{}; + + string_view __name_view(__name); + if (__name_view.size() > text_encoding::max_name_length) + return text_encoding{}; + + return text_encoding(__name_view); +} + +std::text_encoding __get_locale_encoding(const char* __name) { + if (__name == nullptr) + return __make_text_encoding(__locale::__get_locale_encoding(static_cast<__locale::__locale_t>(nullptr))); + + __locale::__locale_t __l = __locale::__newlocale(_LIBCPP_CTYPE_MASK, __name, static_cast<__locale::__locale_t>(0)); + + __scope_guard __locale_guard([&__l] { + if (__l) { + __locale::__freelocale(__l); + } + }); + + if (!__l) { + return text_encoding{}; + } + + return __make_text_encoding(__locale::__get_locale_encoding(__l)); +} + +#endif // __ANDROID__ + +_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS +_LIBCPP_END_NAMESPACE_STD diff --git a/libcxx/test/libcxx/localization/nodiscard.verify.cpp b/libcxx/test/libcxx/localization/nodiscard.verify.cpp index dd14091b90182..7267b4c9c7b9c 100644 --- a/libcxx/test/libcxx/localization/nodiscard.verify.cpp +++ b/libcxx/test/libcxx/localization/nodiscard.verify.cpp @@ -32,8 +32,9 @@ void test() { // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} l.combine >(l); l.name(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} -#if TEST_STD_VER >= 26 - // l.encoding(); // TODO: Verify this once P1885R12 (https://llvm.org/PR105373) is implemented. +#if TEST_STD_VER >= 26 && _LIBCPP_AVAILABILITY_HAS_TEXT_ENCODING_ENVIRONMENT + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + l.encoding(); #endif // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} l(std::string(), std::string()); diff --git a/libcxx/test/libcxx/text/text_encoding/environment.android.pass.cpp b/libcxx/test/libcxx/text/text_encoding/environment.android.pass.cpp new file mode 100644 index 0000000000000..6a80e8224d528 --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/environment.android.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// REQUIRES: std-at-least-c++26 +// REQUIRES: locale.fr_CA.ISO8859-1 + +// UNSUPPORTED: no-localization +// UNSUPPORTED: availability-te-environment-missing +// REQUIRES: android + +// std::text_encoding::environment() + +#include +#include +#include + +#include "platform_support.h" // locale name macros + +int main(int, char**) { + // On Android, UTF-8 is unconditionally returned. + ::setenv("LANG", LOCALE_fr_CA_ISO8859_1, 1); + + auto te = std::text_encoding::environment(); + assert(std::text_encoding::environment_is()); + assert(te == std::text_encoding::environment()); + assert(te.mib() == std::text_encoding::id::UTF8); + + return 0; +} diff --git a/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp b/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp new file mode 100644 index 0000000000000..b5c88c6e6d568 --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp @@ -0,0 +1,40 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// REQUIRES: std-at-least-c++26 +// REQUIRES: locale.fr_CA.ISO8859-1 + +// UNSUPPORTED: no-localization +// UNSUPPORTED: windows, android +// UNSUPPORTED: availability-te-environment-missing + +// std::text_encoding::environment() + +#include +#include +#include +#include + +#include "platform_support.h" // locale name macros + +int main(int, char**) { + // The default text_encoding::environment() which relies on nl_langinfo is + // affected by changes to the "LANG" environment variable on POSIX systems. + setenv("LANG", LOCALE_fr_CA_ISO8859_1, 1); + + auto te = std::text_encoding::environment(); + + assert(std::text_encoding::environment_is()); + assert(te == std::text_encoding::environment()); + assert(te.mib() == std::text_encoding::id::ISOLatin1); + assert(std::ranges::contains(te.aliases(), std::string_view("ISO_8859-1"))); + + return 0; +} diff --git a/libcxx/test/libcxx/text/text_encoding/environment.windows.pass.cpp b/libcxx/test/libcxx/text/text_encoding/environment.windows.pass.cpp new file mode 100644 index 0000000000000..465a62078adc1 --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/environment.windows.pass.cpp @@ -0,0 +1,39 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// REQUIRES: std-at-least-c++26 +// REQUIRES: locale.fr_CA.ISO8859-1 + +// UNSUPPORTED: no-localization +// UNSUPPORTED: availability-te-environment-missing +// REQUIRES: windows + +// std::text_encoding::environment() + +#include +#include +#include +#include + +#include "platform_support.h" // locale name macros + +int main(int, char**) { + // On Windows, changes to the "LANG" environment variable don't affect the result + // of std::text_encoding::environment() and environment_is() + auto te = std::text_encoding::environment(); + + ::SetEnvironmentVariableA("LANG", LOCALE_fr_CA_ISO8859_1); + + assert(std::text_encoding::environment_is()); + assert(te == std::text_encoding::environment()); + assert(te.mib() == std::text_encoding::id::windows1252); + + return 0; +} diff --git a/libcxx/test/libcxx/text/text_encoding/nodiscard.verify.cpp b/libcxx/test/libcxx/text/text_encoding/nodiscard.verify.cpp new file mode 100644 index 0000000000000..8bd3b0780c26b --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/nodiscard.verify.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +#include + +#include "test_macros.h" + +void test() { + std::text_encoding te{}; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.mib(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.name(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.aliases(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.aliases(); + + auto alias = te.aliases(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + alias.begin(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + alias.end(); + + auto it = alias.begin(); + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + *it; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it[0]; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it + 1; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it - 1; + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + it - it; + + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::hash()(std::text_encoding::ASCII); + +#if !defined(TEST_HAS_NO_LOCALIZATION) && _LIBCPP_AVAILABILITY_HAS_TEXT_ENCODING_ENVIRONMENT + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.environment(); + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + te.environment_is(); +#endif +} + +consteval void literal() { + // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} + std::text_encoding::literal(); +} diff --git a/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.id.pass.cpp b/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.id.pass.cpp new file mode 100644 index 0000000000000..f23629d5020ab --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.id.pass.cpp @@ -0,0 +1,33 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// REQUIRES: std-at-least-c++26 +// REQUIRES: libcpp-hardening-mode={{extensive|debug}} + +// + +// text_encoding text_encoding(std::text_encoding::id) noexcept + +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + // Make sure that text_encoding(id) asserts when the input id is not in the range of allowed values + + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::text_encoding::id(33)), "Mib for NATS-DANO used"); + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::text_encoding::id(34)), "Mib for NATS-DANO-ADD used"); + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::text_encoding::id(-1)), "invalid text_encoding::id passed"); + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::text_encoding::id(INT_MAX)), "invalid text_encoding::id passed"); + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::text_encoding::id(int(std::text_encoding::id::CP50220) + 1)), + "invalid text_encoding::id passed"); + + return 0; +} diff --git a/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.string_view.pass.cpp b/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.string_view.pass.cpp new file mode 100644 index 0000000000000..ddeef710c62f4 --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/text_encoding.ctor/assert.string_view.pass.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: has-unix-headers +// REQUIRES: std-at-least-c++26 +// REQUIRES: libcpp-hardening-mode={{extensive|debug}} + +// + +// text_encoding text_encoding(string_view) + +#include +#include +#include + +#include "check_assertion.h" + +int main(int, char**) { + std::string str(std::text_encoding::max_name_length + 1, 'X'); + + // text_encoding(string_view) asserts if its input size() > max_name_length + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::string_view(str)), "input string_view must have size <= 63"); + + // text_encoding(string_view) asserts if its input contains a null terminator + std::string str2(std::text_encoding::max_name_length, 'X'); + str2[3] = '\0'; + + TEST_LIBCPP_ASSERT_FAILURE(std::text_encoding(std::string_view(str2)), "input string_view must not contain '\\0'"); + + return 0; +} diff --git a/libcxx/test/libcxx/text/text_encoding/text_encoding.members/environment.delete.verify.cpp b/libcxx/test/libcxx/text/text_encoding/text_encoding.members/environment.delete.verify.cpp new file mode 100644 index 0000000000000..23fe29bd798bd --- /dev/null +++ b/libcxx/test/libcxx/text/text_encoding/text_encoding.members/environment.delete.verify.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// REQUIRES: no-localization + +// + +// text_encoding text_encoding::environment() +// template text_encoding text_encoding::environment_is() + +// environment() and environment_is() are deleted if libc++ is built without localization. + +#include + +void test() { + // expected-error@+1 {{attempt to use a deleted function}} + std::text_encoding::environment(); + // expected-error@+1 {{call to deleted function 'environment_is'}} + std::text_encoding::environment_is(); +} diff --git a/libcxx/test/libcxx/transitive_includes/cxx03.csv b/libcxx/test/libcxx/transitive_includes/cxx03.csv index c0031543e47bc..b0b58724e64ab 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx03.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx03.csv @@ -69,6 +69,7 @@ any streambuf any string any string_view any system_error +any text_encoding any tuple any type_traits any typeinfo @@ -122,15 +123,19 @@ atomic ratio atomic type_traits atomic version barrier atomic +barrier cctype barrier climits barrier cmath barrier compare barrier concepts barrier cstddef barrier cstdint +barrier cstdio barrier cstdlib barrier cstring barrier ctime +barrier cwchar +barrier cwctype barrier exception barrier initializer_list barrier iosfwd @@ -234,6 +239,7 @@ ccomplex streambuf ccomplex string ccomplex string_view ccomplex system_error +ccomplex text_encoding ccomplex tuple ccomplex type_traits ccomplex typeinfo @@ -292,6 +298,7 @@ chrono streambuf chrono string chrono string_view chrono system_error +chrono text_encoding chrono tuple chrono type_traits chrono typeinfo @@ -337,6 +344,7 @@ codecvt stdexcept codecvt string codecvt string_view codecvt system_error +codecvt text_encoding codecvt tuple codecvt type_traits codecvt typeinfo @@ -396,6 +404,7 @@ complex streambuf complex string complex string_view complex system_error +complex text_encoding complex tuple complex type_traits complex typeinfo @@ -501,6 +510,7 @@ ctgmath streambuf ctgmath string ctgmath string_view ctgmath system_error +ctgmath text_encoding ctgmath tuple ctgmath type_traits ctgmath typeinfo @@ -552,6 +562,7 @@ deque streambuf deque string deque string_view deque system_error +deque text_encoding deque tuple deque type_traits deque typeinfo @@ -607,6 +618,7 @@ experimental/iterator streambuf experimental/iterator string experimental/iterator string_view experimental/iterator system_error +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator type_traits experimental/iterator typeinfo @@ -732,6 +744,7 @@ format streambuf format string format string_view format system_error +format text_encoding format tuple format type_traits format typeinfo @@ -778,6 +791,7 @@ forward_list streambuf forward_list string forward_list string_view forward_list system_error +forward_list text_encoding forward_list tuple forward_list type_traits forward_list typeinfo @@ -834,6 +848,7 @@ fstream streambuf fstream string fstream string_view fstream system_error +fstream text_encoding fstream tuple fstream type_traits fstream typeinfo @@ -879,6 +894,7 @@ functional streambuf functional string functional string_view functional system_error +functional text_encoding functional tuple functional type_traits functional typeinfo @@ -936,6 +952,7 @@ future streambuf future string future string_view future system_error +future text_encoding future thread future tuple future type_traits @@ -993,6 +1010,7 @@ iomanip streambuf iomanip string iomanip string_view iomanip system_error +iomanip text_encoding iomanip tuple iomanip type_traits iomanip typeinfo @@ -1033,6 +1051,7 @@ ios stdexcept ios string ios string_view ios system_error +ios text_encoding ios tuple ios type_traits ios typeinfo @@ -1086,6 +1105,7 @@ iostream streambuf iostream string iostream string_view iostream system_error +iostream text_encoding iostream tuple iostream type_traits iostream typeinfo @@ -1139,6 +1159,7 @@ istream streambuf istream string istream string_view istream system_error +istream text_encoding istream tuple istream type_traits istream typeinfo @@ -1223,6 +1244,7 @@ list streambuf list string list string_view list system_error +list text_encoding list tuple list type_traits list typeinfo @@ -1266,6 +1288,7 @@ locale streambuf locale string locale string_view locale system_error +locale text_encoding locale tuple locale type_traits locale typeinfo @@ -1310,6 +1333,7 @@ map streambuf map string map string_view map system_error +map text_encoding map tuple map type_traits map typeinfo @@ -1442,6 +1466,7 @@ numeric streambuf numeric string numeric string_view numeric system_error +numeric text_encoding numeric tuple numeric type_traits numeric typeinfo @@ -1523,6 +1548,7 @@ ostream streambuf ostream string ostream string_view ostream system_error +ostream text_encoding ostream tuple ostream type_traits ostream typeinfo @@ -1573,6 +1599,7 @@ print streambuf print string print string_view print system_error +print text_encoding print tuple print type_traits print typeinfo @@ -1620,6 +1647,7 @@ queue streambuf queue string queue string_view queue system_error +queue text_encoding queue tuple queue type_traits queue typeinfo @@ -1668,6 +1696,7 @@ random streambuf random string random string_view random system_error +random text_encoding random tuple random type_traits random typeinfo @@ -1742,6 +1771,7 @@ regex streambuf regex string regex string_view regex system_error +regex text_encoding regex tuple regex type_traits regex typeinfo @@ -1830,6 +1860,7 @@ set streambuf set string set string_view set system_error +set text_encoding set tuple set type_traits set typeinfo @@ -1914,6 +1945,7 @@ span streambuf span string span string_view span system_error +span text_encoding span tuple span type_traits span typeinfo @@ -1968,6 +2000,7 @@ sstream streambuf sstream string sstream string_view sstream system_error +sstream text_encoding sstream tuple sstream type_traits sstream typeinfo @@ -2015,6 +2048,7 @@ stack streambuf stack string stack string_view stack system_error +stack text_encoding stack tuple stack type_traits stack typeinfo @@ -2032,6 +2066,7 @@ stdexcept new stdexcept type_traits stdexcept typeinfo stdexcept version +stop_token cstddef stop_token iosfwd stop_token version streambuf algorithm @@ -2067,6 +2102,7 @@ streambuf stdexcept streambuf string streambuf string_view streambuf system_error +streambuf text_encoding streambuf tuple streambuf type_traits streambuf typeinfo @@ -2184,6 +2220,7 @@ strstream streambuf strstream string strstream string_view strstream system_error +strstream text_encoding strstream tuple strstream type_traits strstream typeinfo @@ -2239,6 +2276,7 @@ syncstream streambuf syncstream string syncstream string_view syncstream system_error +syncstream text_encoding syncstream tuple syncstream type_traits syncstream typeinfo @@ -2331,6 +2369,7 @@ thread streambuf thread string thread string_view thread system_error +thread text_encoding thread tuple thread type_traits thread typeinfo @@ -2443,6 +2482,7 @@ unordered_set streambuf unordered_set string unordered_set string_view unordered_set system_error +unordered_set text_encoding unordered_set tuple unordered_set type_traits unordered_set typeinfo @@ -2499,6 +2539,7 @@ valarray streambuf valarray string valarray string_view valarray system_error +valarray text_encoding valarray tuple valarray type_traits valarray typeinfo @@ -2560,6 +2601,7 @@ vector streambuf vector string vector string_view vector system_error +vector text_encoding vector tuple vector type_traits vector typeinfo diff --git a/libcxx/test/libcxx/transitive_includes/cxx11.csv b/libcxx/test/libcxx/transitive_includes/cxx11.csv index c0031543e47bc..b0b58724e64ab 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx11.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx11.csv @@ -69,6 +69,7 @@ any streambuf any string any string_view any system_error +any text_encoding any tuple any type_traits any typeinfo @@ -122,15 +123,19 @@ atomic ratio atomic type_traits atomic version barrier atomic +barrier cctype barrier climits barrier cmath barrier compare barrier concepts barrier cstddef barrier cstdint +barrier cstdio barrier cstdlib barrier cstring barrier ctime +barrier cwchar +barrier cwctype barrier exception barrier initializer_list barrier iosfwd @@ -234,6 +239,7 @@ ccomplex streambuf ccomplex string ccomplex string_view ccomplex system_error +ccomplex text_encoding ccomplex tuple ccomplex type_traits ccomplex typeinfo @@ -292,6 +298,7 @@ chrono streambuf chrono string chrono string_view chrono system_error +chrono text_encoding chrono tuple chrono type_traits chrono typeinfo @@ -337,6 +344,7 @@ codecvt stdexcept codecvt string codecvt string_view codecvt system_error +codecvt text_encoding codecvt tuple codecvt type_traits codecvt typeinfo @@ -396,6 +404,7 @@ complex streambuf complex string complex string_view complex system_error +complex text_encoding complex tuple complex type_traits complex typeinfo @@ -501,6 +510,7 @@ ctgmath streambuf ctgmath string ctgmath string_view ctgmath system_error +ctgmath text_encoding ctgmath tuple ctgmath type_traits ctgmath typeinfo @@ -552,6 +562,7 @@ deque streambuf deque string deque string_view deque system_error +deque text_encoding deque tuple deque type_traits deque typeinfo @@ -607,6 +618,7 @@ experimental/iterator streambuf experimental/iterator string experimental/iterator string_view experimental/iterator system_error +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator type_traits experimental/iterator typeinfo @@ -732,6 +744,7 @@ format streambuf format string format string_view format system_error +format text_encoding format tuple format type_traits format typeinfo @@ -778,6 +791,7 @@ forward_list streambuf forward_list string forward_list string_view forward_list system_error +forward_list text_encoding forward_list tuple forward_list type_traits forward_list typeinfo @@ -834,6 +848,7 @@ fstream streambuf fstream string fstream string_view fstream system_error +fstream text_encoding fstream tuple fstream type_traits fstream typeinfo @@ -879,6 +894,7 @@ functional streambuf functional string functional string_view functional system_error +functional text_encoding functional tuple functional type_traits functional typeinfo @@ -936,6 +952,7 @@ future streambuf future string future string_view future system_error +future text_encoding future thread future tuple future type_traits @@ -993,6 +1010,7 @@ iomanip streambuf iomanip string iomanip string_view iomanip system_error +iomanip text_encoding iomanip tuple iomanip type_traits iomanip typeinfo @@ -1033,6 +1051,7 @@ ios stdexcept ios string ios string_view ios system_error +ios text_encoding ios tuple ios type_traits ios typeinfo @@ -1086,6 +1105,7 @@ iostream streambuf iostream string iostream string_view iostream system_error +iostream text_encoding iostream tuple iostream type_traits iostream typeinfo @@ -1139,6 +1159,7 @@ istream streambuf istream string istream string_view istream system_error +istream text_encoding istream tuple istream type_traits istream typeinfo @@ -1223,6 +1244,7 @@ list streambuf list string list string_view list system_error +list text_encoding list tuple list type_traits list typeinfo @@ -1266,6 +1288,7 @@ locale streambuf locale string locale string_view locale system_error +locale text_encoding locale tuple locale type_traits locale typeinfo @@ -1310,6 +1333,7 @@ map streambuf map string map string_view map system_error +map text_encoding map tuple map type_traits map typeinfo @@ -1442,6 +1466,7 @@ numeric streambuf numeric string numeric string_view numeric system_error +numeric text_encoding numeric tuple numeric type_traits numeric typeinfo @@ -1523,6 +1548,7 @@ ostream streambuf ostream string ostream string_view ostream system_error +ostream text_encoding ostream tuple ostream type_traits ostream typeinfo @@ -1573,6 +1599,7 @@ print streambuf print string print string_view print system_error +print text_encoding print tuple print type_traits print typeinfo @@ -1620,6 +1647,7 @@ queue streambuf queue string queue string_view queue system_error +queue text_encoding queue tuple queue type_traits queue typeinfo @@ -1668,6 +1696,7 @@ random streambuf random string random string_view random system_error +random text_encoding random tuple random type_traits random typeinfo @@ -1742,6 +1771,7 @@ regex streambuf regex string regex string_view regex system_error +regex text_encoding regex tuple regex type_traits regex typeinfo @@ -1830,6 +1860,7 @@ set streambuf set string set string_view set system_error +set text_encoding set tuple set type_traits set typeinfo @@ -1914,6 +1945,7 @@ span streambuf span string span string_view span system_error +span text_encoding span tuple span type_traits span typeinfo @@ -1968,6 +2000,7 @@ sstream streambuf sstream string sstream string_view sstream system_error +sstream text_encoding sstream tuple sstream type_traits sstream typeinfo @@ -2015,6 +2048,7 @@ stack streambuf stack string stack string_view stack system_error +stack text_encoding stack tuple stack type_traits stack typeinfo @@ -2032,6 +2066,7 @@ stdexcept new stdexcept type_traits stdexcept typeinfo stdexcept version +stop_token cstddef stop_token iosfwd stop_token version streambuf algorithm @@ -2067,6 +2102,7 @@ streambuf stdexcept streambuf string streambuf string_view streambuf system_error +streambuf text_encoding streambuf tuple streambuf type_traits streambuf typeinfo @@ -2184,6 +2220,7 @@ strstream streambuf strstream string strstream string_view strstream system_error +strstream text_encoding strstream tuple strstream type_traits strstream typeinfo @@ -2239,6 +2276,7 @@ syncstream streambuf syncstream string syncstream string_view syncstream system_error +syncstream text_encoding syncstream tuple syncstream type_traits syncstream typeinfo @@ -2331,6 +2369,7 @@ thread streambuf thread string thread string_view thread system_error +thread text_encoding thread tuple thread type_traits thread typeinfo @@ -2443,6 +2482,7 @@ unordered_set streambuf unordered_set string unordered_set string_view unordered_set system_error +unordered_set text_encoding unordered_set tuple unordered_set type_traits unordered_set typeinfo @@ -2499,6 +2539,7 @@ valarray streambuf valarray string valarray string_view valarray system_error +valarray text_encoding valarray tuple valarray type_traits valarray typeinfo @@ -2560,6 +2601,7 @@ vector streambuf vector string vector string_view vector system_error +vector text_encoding vector tuple vector type_traits vector typeinfo diff --git a/libcxx/test/libcxx/transitive_includes/cxx14.csv b/libcxx/test/libcxx/transitive_includes/cxx14.csv index c2eb5b44e8d7a..b438907b35195 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx14.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx14.csv @@ -71,6 +71,7 @@ any streambuf any string any string_view any system_error +any text_encoding any tuple any type_traits any typeinfo @@ -125,15 +126,19 @@ atomic ratio atomic type_traits atomic version barrier atomic +barrier cctype barrier climits barrier cmath barrier compare barrier concepts barrier cstddef barrier cstdint +barrier cstdio barrier cstdlib barrier cstring barrier ctime +barrier cwchar +barrier cwctype barrier exception barrier initializer_list barrier iosfwd @@ -239,6 +244,7 @@ ccomplex streambuf ccomplex string ccomplex string_view ccomplex system_error +ccomplex text_encoding ccomplex tuple ccomplex type_traits ccomplex typeinfo @@ -298,6 +304,7 @@ chrono streambuf chrono string chrono string_view chrono system_error +chrono text_encoding chrono tuple chrono type_traits chrono typeinfo @@ -344,6 +351,7 @@ codecvt stdexcept codecvt string codecvt string_view codecvt system_error +codecvt text_encoding codecvt tuple codecvt type_traits codecvt typeinfo @@ -404,6 +412,7 @@ complex streambuf complex string complex string_view complex system_error +complex text_encoding complex tuple complex type_traits complex typeinfo @@ -511,6 +520,7 @@ ctgmath streambuf ctgmath string ctgmath string_view ctgmath system_error +ctgmath text_encoding ctgmath tuple ctgmath type_traits ctgmath typeinfo @@ -563,6 +573,7 @@ deque streambuf deque string deque string_view deque system_error +deque text_encoding deque tuple deque type_traits deque typeinfo @@ -619,6 +630,7 @@ experimental/iterator streambuf experimental/iterator string experimental/iterator string_view experimental/iterator system_error +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator type_traits experimental/iterator typeinfo @@ -751,6 +763,7 @@ format streambuf format string format string_view format system_error +format text_encoding format tuple format type_traits format typeinfo @@ -798,6 +811,7 @@ forward_list streambuf forward_list string forward_list string_view forward_list system_error +forward_list text_encoding forward_list tuple forward_list type_traits forward_list typeinfo @@ -855,6 +869,7 @@ fstream streambuf fstream string fstream string_view fstream system_error +fstream text_encoding fstream tuple fstream type_traits fstream typeinfo @@ -901,6 +916,7 @@ functional streambuf functional string functional string_view functional system_error +functional text_encoding functional tuple functional type_traits functional typeinfo @@ -959,6 +975,7 @@ future streambuf future string future string_view future system_error +future text_encoding future thread future tuple future type_traits @@ -1017,6 +1034,7 @@ iomanip streambuf iomanip string iomanip string_view iomanip system_error +iomanip text_encoding iomanip tuple iomanip type_traits iomanip typeinfo @@ -1058,6 +1076,7 @@ ios stdexcept ios string ios string_view ios system_error +ios text_encoding ios tuple ios type_traits ios typeinfo @@ -1112,6 +1131,7 @@ iostream streambuf iostream string iostream string_view iostream system_error +iostream text_encoding iostream tuple iostream type_traits iostream typeinfo @@ -1166,6 +1186,7 @@ istream streambuf istream string istream string_view istream system_error +istream text_encoding istream tuple istream type_traits istream typeinfo @@ -1251,6 +1272,7 @@ list streambuf list string list string_view list system_error +list text_encoding list tuple list type_traits list typeinfo @@ -1295,6 +1317,7 @@ locale streambuf locale string locale string_view locale system_error +locale text_encoding locale tuple locale type_traits locale typeinfo @@ -1340,6 +1363,7 @@ map streambuf map string map string_view map system_error +map text_encoding map tuple map type_traits map typeinfo @@ -1473,6 +1497,7 @@ numeric streambuf numeric string numeric string_view numeric system_error +numeric text_encoding numeric tuple numeric type_traits numeric typeinfo @@ -1555,6 +1580,7 @@ ostream streambuf ostream string ostream string_view ostream system_error +ostream text_encoding ostream tuple ostream type_traits ostream typeinfo @@ -1606,6 +1632,7 @@ print streambuf print string print string_view print system_error +print text_encoding print tuple print type_traits print typeinfo @@ -1654,6 +1681,7 @@ queue streambuf queue string queue string_view queue system_error +queue text_encoding queue tuple queue type_traits queue typeinfo @@ -1702,6 +1730,7 @@ random streambuf random string random string_view random system_error +random text_encoding random tuple random type_traits random typeinfo @@ -1777,6 +1806,7 @@ regex streambuf regex string regex string_view regex system_error +regex text_encoding regex tuple regex type_traits regex typeinfo @@ -1866,6 +1896,7 @@ set streambuf set string set string_view set system_error +set text_encoding set tuple set type_traits set typeinfo @@ -1952,6 +1983,7 @@ span streambuf span string span string_view span system_error +span text_encoding span tuple span type_traits span typeinfo @@ -2007,6 +2039,7 @@ sstream streambuf sstream string sstream string_view sstream system_error +sstream text_encoding sstream tuple sstream type_traits sstream typeinfo @@ -2055,6 +2088,7 @@ stack streambuf stack string stack string_view stack system_error +stack text_encoding stack tuple stack type_traits stack typeinfo @@ -2072,6 +2106,7 @@ stdexcept new stdexcept type_traits stdexcept typeinfo stdexcept version +stop_token cstddef stop_token iosfwd stop_token version streambuf algorithm @@ -2108,6 +2143,7 @@ streambuf stdexcept streambuf string streambuf string_view streambuf system_error +streambuf text_encoding streambuf tuple streambuf type_traits streambuf typeinfo @@ -2228,6 +2264,7 @@ strstream streambuf strstream string strstream string_view strstream system_error +strstream text_encoding strstream tuple strstream type_traits strstream typeinfo @@ -2284,6 +2321,7 @@ syncstream streambuf syncstream string syncstream string_view syncstream system_error +syncstream text_encoding syncstream tuple syncstream type_traits syncstream typeinfo @@ -2378,6 +2416,7 @@ thread streambuf thread string thread string_view thread system_error +thread text_encoding thread tuple thread type_traits thread typeinfo @@ -2492,6 +2531,7 @@ unordered_set streambuf unordered_set string unordered_set string_view unordered_set system_error +unordered_set text_encoding unordered_set tuple unordered_set type_traits unordered_set typeinfo @@ -2549,6 +2589,7 @@ valarray streambuf valarray string valarray string_view valarray system_error +valarray text_encoding valarray tuple valarray type_traits valarray typeinfo @@ -2611,6 +2652,7 @@ vector streambuf vector string vector string_view vector system_error +vector text_encoding vector tuple vector type_traits vector typeinfo diff --git a/libcxx/test/libcxx/transitive_includes/cxx17.csv b/libcxx/test/libcxx/transitive_includes/cxx17.csv index 332cb62f35b5f..a93fb1e26196b 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx17.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx17.csv @@ -69,6 +69,7 @@ any streambuf any string any string_view any system_error +any text_encoding any tuple any type_traits any typeinfo @@ -122,15 +123,19 @@ atomic ratio atomic type_traits atomic version barrier atomic +barrier cctype barrier climits barrier cmath barrier compare barrier concepts barrier cstddef barrier cstdint +barrier cstdio barrier cstdlib barrier cstring barrier ctime +barrier cwchar +barrier cwctype barrier exception barrier initializer_list barrier iosfwd @@ -234,6 +239,7 @@ ccomplex streambuf ccomplex string ccomplex string_view ccomplex system_error +ccomplex text_encoding ccomplex tuple ccomplex type_traits ccomplex typeinfo @@ -294,6 +300,7 @@ chrono streambuf chrono string chrono string_view chrono system_error +chrono text_encoding chrono tuple chrono type_traits chrono typeinfo @@ -339,6 +346,7 @@ codecvt stdexcept codecvt string codecvt string_view codecvt system_error +codecvt text_encoding codecvt tuple codecvt type_traits codecvt typeinfo @@ -398,6 +406,7 @@ complex streambuf complex string complex string_view complex system_error +complex text_encoding complex tuple complex type_traits complex typeinfo @@ -503,6 +512,7 @@ ctgmath streambuf ctgmath string ctgmath string_view ctgmath system_error +ctgmath text_encoding ctgmath tuple ctgmath type_traits ctgmath typeinfo @@ -554,6 +564,7 @@ deque streambuf deque string deque string_view deque system_error +deque text_encoding deque tuple deque type_traits deque typeinfo @@ -609,6 +620,7 @@ experimental/iterator streambuf experimental/iterator string experimental/iterator string_view experimental/iterator system_error +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator type_traits experimental/iterator typeinfo @@ -693,6 +705,7 @@ filesystem streambuf filesystem string filesystem string_view filesystem system_error +filesystem text_encoding filesystem tuple filesystem type_traits filesystem typeinfo @@ -758,6 +771,7 @@ format streambuf format string format string_view format system_error +format text_encoding format tuple format type_traits format typeinfo @@ -804,6 +818,7 @@ forward_list streambuf forward_list string forward_list string_view forward_list system_error +forward_list text_encoding forward_list tuple forward_list type_traits forward_list typeinfo @@ -860,6 +875,7 @@ fstream streambuf fstream string fstream string_view fstream system_error +fstream text_encoding fstream tuple fstream type_traits fstream typeinfo @@ -905,6 +921,7 @@ functional streambuf functional string functional string_view functional system_error +functional text_encoding functional tuple functional type_traits functional typeinfo @@ -962,6 +979,7 @@ future streambuf future string future string_view future system_error +future text_encoding future thread future tuple future type_traits @@ -1019,6 +1037,7 @@ iomanip streambuf iomanip string iomanip string_view iomanip system_error +iomanip text_encoding iomanip tuple iomanip type_traits iomanip typeinfo @@ -1059,6 +1078,7 @@ ios stdexcept ios string ios string_view ios system_error +ios text_encoding ios tuple ios type_traits ios typeinfo @@ -1112,6 +1132,7 @@ iostream streambuf iostream string iostream string_view iostream system_error +iostream text_encoding iostream tuple iostream type_traits iostream typeinfo @@ -1165,6 +1186,7 @@ istream streambuf istream string istream string_view istream system_error +istream text_encoding istream tuple istream type_traits istream typeinfo @@ -1249,6 +1271,7 @@ list streambuf list string list string_view list system_error +list text_encoding list tuple list type_traits list typeinfo @@ -1292,6 +1315,7 @@ locale streambuf locale string locale string_view locale system_error +locale text_encoding locale tuple locale type_traits locale typeinfo @@ -1336,6 +1360,7 @@ map streambuf map string map string_view map system_error +map text_encoding map tuple map type_traits map typeinfo @@ -1495,6 +1520,7 @@ numeric streambuf numeric string numeric string_view numeric system_error +numeric text_encoding numeric tuple numeric type_traits numeric typeinfo @@ -1576,6 +1602,7 @@ ostream streambuf ostream string ostream string_view ostream system_error +ostream text_encoding ostream tuple ostream type_traits ostream typeinfo @@ -1626,6 +1653,7 @@ print streambuf print string print string_view print system_error +print text_encoding print tuple print type_traits print typeinfo @@ -1673,6 +1701,7 @@ queue streambuf queue string queue string_view queue system_error +queue text_encoding queue tuple queue type_traits queue typeinfo @@ -1721,6 +1750,7 @@ random streambuf random string random string_view random system_error +random text_encoding random tuple random type_traits random typeinfo @@ -1795,6 +1825,7 @@ regex streambuf regex string regex string_view regex system_error +regex text_encoding regex tuple regex type_traits regex typeinfo @@ -1883,6 +1914,7 @@ set streambuf set string set string_view set system_error +set text_encoding set tuple set type_traits set typeinfo @@ -1967,6 +1999,7 @@ span streambuf span string span string_view span system_error +span text_encoding span tuple span type_traits span typeinfo @@ -2021,6 +2054,7 @@ sstream streambuf sstream string sstream string_view sstream system_error +sstream text_encoding sstream tuple sstream type_traits sstream typeinfo @@ -2068,6 +2102,7 @@ stack streambuf stack string stack string_view stack system_error +stack text_encoding stack tuple stack type_traits stack typeinfo @@ -2085,6 +2120,7 @@ stdexcept new stdexcept type_traits stdexcept typeinfo stdexcept version +stop_token cstddef stop_token iosfwd stop_token version streambuf algorithm @@ -2120,6 +2156,7 @@ streambuf stdexcept streambuf string streambuf string_view streambuf system_error +streambuf text_encoding streambuf tuple streambuf type_traits streambuf typeinfo @@ -2237,6 +2274,7 @@ strstream streambuf strstream string strstream string_view strstream system_error +strstream text_encoding strstream tuple strstream type_traits strstream typeinfo @@ -2292,6 +2330,7 @@ syncstream streambuf syncstream string syncstream string_view syncstream system_error +syncstream text_encoding syncstream tuple syncstream type_traits syncstream typeinfo @@ -2384,6 +2423,7 @@ thread streambuf thread string thread string_view thread system_error +thread text_encoding thread tuple thread type_traits thread typeinfo @@ -2496,6 +2536,7 @@ unordered_set streambuf unordered_set string unordered_set string_view unordered_set system_error +unordered_set text_encoding unordered_set tuple unordered_set type_traits unordered_set typeinfo @@ -2552,6 +2593,7 @@ valarray streambuf valarray string valarray string_view valarray system_error +valarray text_encoding valarray tuple valarray type_traits valarray typeinfo @@ -2613,6 +2655,7 @@ vector streambuf vector string vector string_view vector system_error +vector text_encoding vector tuple vector type_traits vector typeinfo diff --git a/libcxx/test/libcxx/transitive_includes/cxx20.csv b/libcxx/test/libcxx/transitive_includes/cxx20.csv index 55c79acff5a8f..ded0dd1a51c16 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx20.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx20.csv @@ -219,6 +219,7 @@ ccomplex streambuf ccomplex string ccomplex string_view ccomplex system_error +ccomplex text_encoding ccomplex tuple ccomplex type_traits ccomplex typeinfo @@ -289,6 +290,7 @@ chrono streambuf chrono string chrono string_view chrono system_error +chrono text_encoding chrono tuple chrono type_traits chrono typeinfo @@ -334,6 +336,7 @@ codecvt stdexcept codecvt string codecvt string_view codecvt system_error +codecvt text_encoding codecvt tuple codecvt type_traits codecvt typeinfo @@ -393,6 +396,7 @@ complex streambuf complex string complex string_view complex system_error +complex text_encoding complex tuple complex type_traits complex typeinfo @@ -499,6 +503,7 @@ ctgmath streambuf ctgmath string ctgmath string_view ctgmath system_error +ctgmath text_encoding ctgmath tuple ctgmath type_traits ctgmath typeinfo @@ -550,6 +555,7 @@ deque streambuf deque string deque string_view deque system_error +deque text_encoding deque tuple deque type_traits deque typeinfo @@ -605,6 +611,7 @@ experimental/iterator streambuf experimental/iterator string experimental/iterator string_view experimental/iterator system_error +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator type_traits experimental/iterator typeinfo @@ -689,6 +696,7 @@ filesystem streambuf filesystem string filesystem string_view filesystem system_error +filesystem text_encoding filesystem tuple filesystem type_traits filesystem typeinfo @@ -754,6 +762,7 @@ format streambuf format string format string_view format system_error +format text_encoding format tuple format type_traits format typeinfo @@ -800,6 +809,7 @@ forward_list streambuf forward_list string forward_list string_view forward_list system_error +forward_list text_encoding forward_list tuple forward_list type_traits forward_list typeinfo @@ -856,6 +866,7 @@ fstream streambuf fstream string fstream string_view fstream system_error +fstream text_encoding fstream tuple fstream type_traits fstream typeinfo @@ -901,6 +912,7 @@ functional streambuf functional string functional string_view functional system_error +functional text_encoding functional tuple functional type_traits functional typeinfo @@ -956,6 +968,7 @@ future streambuf future string future string_view future system_error +future text_encoding future thread future tuple future type_traits @@ -1013,6 +1026,7 @@ iomanip streambuf iomanip string iomanip string_view iomanip system_error +iomanip text_encoding iomanip tuple iomanip type_traits iomanip typeinfo @@ -1053,6 +1067,7 @@ ios stdexcept ios string ios string_view ios system_error +ios text_encoding ios tuple ios type_traits ios typeinfo @@ -1106,6 +1121,7 @@ iostream streambuf iostream string iostream string_view iostream system_error +iostream text_encoding iostream tuple iostream type_traits iostream typeinfo @@ -1159,6 +1175,7 @@ istream streambuf istream string istream string_view istream system_error +istream text_encoding istream tuple istream type_traits istream typeinfo @@ -1243,6 +1260,7 @@ list streambuf list string list string_view list system_error +list text_encoding list tuple list type_traits list typeinfo @@ -1286,6 +1304,7 @@ locale streambuf locale string locale string_view locale system_error +locale text_encoding locale tuple locale type_traits locale typeinfo @@ -1330,6 +1349,7 @@ map streambuf map string map string_view map system_error +map text_encoding map tuple map type_traits map typeinfo @@ -1489,6 +1509,7 @@ numeric streambuf numeric string numeric string_view numeric system_error +numeric text_encoding numeric tuple numeric type_traits numeric typeinfo @@ -1570,6 +1591,7 @@ ostream streambuf ostream string ostream string_view ostream system_error +ostream text_encoding ostream tuple ostream type_traits ostream typeinfo @@ -1620,6 +1642,7 @@ print streambuf print string print string_view print system_error +print text_encoding print tuple print type_traits print typeinfo @@ -1667,6 +1690,7 @@ queue streambuf queue string queue string_view queue system_error +queue text_encoding queue tuple queue type_traits queue typeinfo @@ -1715,6 +1739,7 @@ random streambuf random string random string_view random system_error +random text_encoding random tuple random type_traits random typeinfo @@ -1762,6 +1787,7 @@ ranges streambuf ranges string ranges string_view ranges system_error +ranges text_encoding ranges tuple ranges type_traits ranges typeinfo @@ -1813,6 +1839,7 @@ regex streambuf regex string regex string_view regex system_error +regex text_encoding regex tuple regex type_traits regex typeinfo @@ -1901,6 +1928,7 @@ set streambuf set string set string_view set system_error +set text_encoding set tuple set type_traits set typeinfo @@ -1985,6 +2013,7 @@ span streambuf span string span string_view span system_error +span text_encoding span tuple span type_traits span typeinfo @@ -2039,6 +2068,7 @@ sstream streambuf sstream string sstream string_view sstream system_error +sstream text_encoding sstream tuple sstream type_traits sstream typeinfo @@ -2086,6 +2116,7 @@ stack streambuf stack string stack string_view stack system_error +stack text_encoding stack tuple stack type_traits stack typeinfo @@ -2150,6 +2181,7 @@ streambuf stdexcept streambuf string streambuf string_view streambuf system_error +streambuf text_encoding streambuf tuple streambuf type_traits streambuf typeinfo @@ -2267,6 +2299,7 @@ strstream streambuf strstream string strstream string_view strstream system_error +strstream text_encoding strstream tuple strstream type_traits strstream typeinfo @@ -2322,6 +2355,7 @@ syncstream streambuf syncstream string syncstream string_view syncstream system_error +syncstream text_encoding syncstream tuple syncstream type_traits syncstream typeinfo @@ -2412,6 +2446,7 @@ thread streambuf thread string thread string_view thread system_error +thread text_encoding thread tuple thread type_traits thread typeinfo @@ -2524,6 +2559,7 @@ unordered_set streambuf unordered_set string unordered_set string_view unordered_set system_error +unordered_set text_encoding unordered_set tuple unordered_set type_traits unordered_set typeinfo @@ -2580,6 +2616,7 @@ valarray streambuf valarray string valarray string_view valarray system_error +valarray text_encoding valarray tuple valarray type_traits valarray typeinfo @@ -2641,6 +2678,7 @@ vector streambuf vector string vector string_view vector system_error +vector text_encoding vector tuple vector type_traits vector typeinfo diff --git a/libcxx/test/libcxx/transitive_includes/cxx23.csv b/libcxx/test/libcxx/transitive_includes/cxx23.csv index 073f698786117..8385b37c07822 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx23.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx23.csv @@ -90,6 +90,7 @@ ccomplex stdexcept ccomplex streambuf ccomplex string ccomplex string_view +ccomplex text_encoding ccomplex tuple ccomplex typeinfo ccomplex version @@ -129,6 +130,7 @@ chrono stdexcept chrono streambuf chrono string chrono string_view +chrono text_encoding chrono tuple chrono typeinfo chrono version @@ -152,6 +154,7 @@ codecvt limits codecvt stdexcept codecvt string codecvt string_view +codecvt text_encoding codecvt tuple codecvt typeinfo codecvt version @@ -185,6 +188,7 @@ complex stdexcept complex streambuf complex string complex string_view +complex text_encoding complex tuple complex typeinfo complex version @@ -234,6 +238,7 @@ ctgmath stdexcept ctgmath streambuf ctgmath string ctgmath string_view +ctgmath text_encoding ctgmath tuple ctgmath typeinfo ctgmath version @@ -285,6 +290,7 @@ experimental/iterator stdexcept experimental/iterator streambuf experimental/iterator string experimental/iterator string_view +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator typeinfo experimental/iterator variant @@ -330,6 +336,7 @@ filesystem stdexcept filesystem streambuf filesystem string filesystem string_view +filesystem text_encoding filesystem tuple filesystem typeinfo filesystem version @@ -380,6 +387,7 @@ format optional format stdexcept format string format string_view +format text_encoding format tuple format typeinfo format version @@ -419,6 +427,7 @@ fstream stdexcept fstream streambuf fstream string fstream string_view +fstream text_encoding fstream tuple fstream typeinfo fstream version @@ -463,6 +472,7 @@ future stdexcept future streambuf future string future string_view +future text_encoding future tuple future typeinfo future version @@ -490,6 +500,7 @@ iomanip stdexcept iomanip streambuf iomanip string iomanip string_view +iomanip text_encoding iomanip tuple iomanip typeinfo iomanip version @@ -513,6 +524,7 @@ ios ratio ios stdexcept ios string ios string_view +ios text_encoding ios tuple ios typeinfo ios version @@ -548,6 +560,7 @@ iostream stdexcept iostream streambuf iostream string iostream string_view +iostream text_encoding iostream tuple iostream typeinfo iostream version @@ -575,6 +588,7 @@ istream stdexcept istream streambuf istream string istream string_view +istream text_encoding istream tuple istream typeinfo istream version @@ -631,6 +645,7 @@ locale stdexcept locale streambuf locale string locale string_view +locale text_encoding locale tuple locale typeinfo locale version @@ -735,6 +750,7 @@ ostream stdexcept ostream streambuf ostream string ostream string_view +ostream text_encoding ostream tuple ostream typeinfo ostream version @@ -760,6 +776,7 @@ print optional print stdexcept print string print string_view +print text_encoding print tuple print typeinfo print version @@ -850,6 +867,7 @@ regex limits regex stdexcept regex string regex string_view +regex text_encoding regex tuple regex typeinfo regex vector @@ -916,6 +934,7 @@ sstream stdexcept sstream streambuf sstream string sstream string_view +sstream text_encoding sstream tuple sstream typeinfo sstream version @@ -960,6 +979,7 @@ streambuf ratio streambuf stdexcept streambuf string streambuf string_view +streambuf text_encoding streambuf tuple streambuf typeinfo streambuf version @@ -1015,6 +1035,7 @@ strstream stdexcept strstream streambuf strstream string strstream string_view +strstream text_encoding strstream tuple strstream typeinfo strstream version @@ -1050,6 +1071,7 @@ syncstream stdexcept syncstream streambuf syncstream string syncstream string_view +syncstream text_encoding syncstream tuple syncstream typeinfo syncstream version @@ -1098,6 +1120,7 @@ thread stdexcept thread streambuf thread string thread string_view +thread text_encoding thread tuple thread typeinfo thread version diff --git a/libcxx/test/libcxx/transitive_includes/cxx26.csv b/libcxx/test/libcxx/transitive_includes/cxx26.csv index 1450a13e125ef..cf1154a251f23 100644 --- a/libcxx/test/libcxx/transitive_includes/cxx26.csv +++ b/libcxx/test/libcxx/transitive_includes/cxx26.csv @@ -85,6 +85,7 @@ ccomplex stdexcept ccomplex streambuf ccomplex string ccomplex string_view +ccomplex text_encoding ccomplex tuple ccomplex typeinfo ccomplex version @@ -123,6 +124,7 @@ chrono stdexcept chrono streambuf chrono string chrono string_view +chrono text_encoding chrono tuple chrono typeinfo chrono version @@ -146,6 +148,7 @@ codecvt limits codecvt stdexcept codecvt string codecvt string_view +codecvt text_encoding codecvt tuple codecvt typeinfo codecvt version @@ -176,6 +179,7 @@ complex stdexcept complex streambuf complex string complex string_view +complex text_encoding complex tuple complex typeinfo complex version @@ -221,6 +225,7 @@ ctgmath stdexcept ctgmath streambuf ctgmath string ctgmath string_view +ctgmath text_encoding ctgmath tuple ctgmath typeinfo ctgmath version @@ -268,6 +273,7 @@ experimental/iterator stdexcept experimental/iterator streambuf experimental/iterator string experimental/iterator string_view +experimental/iterator text_encoding experimental/iterator tuple experimental/iterator typeinfo experimental/iterator variant @@ -312,6 +318,7 @@ filesystem ratio filesystem stdexcept filesystem string filesystem string_view +filesystem text_encoding filesystem tuple filesystem typeinfo filesystem version @@ -361,6 +368,7 @@ format optional format stdexcept format string format string_view +format text_encoding format tuple format typeinfo format version @@ -396,6 +404,7 @@ fstream stdexcept fstream streambuf fstream string fstream string_view +fstream text_encoding fstream tuple fstream typeinfo fstream version @@ -439,6 +448,7 @@ future stdexcept future streambuf future string future string_view +future text_encoding future tuple future typeinfo future version @@ -462,6 +472,7 @@ iomanip limits iomanip stdexcept iomanip string iomanip string_view +iomanip text_encoding iomanip tuple iomanip typeinfo iomanip version @@ -483,6 +494,7 @@ ios limits ios stdexcept ios string ios string_view +ios text_encoding ios tuple ios typeinfo ios version @@ -514,6 +526,7 @@ iostream stdexcept iostream streambuf iostream string iostream string_view +iostream text_encoding iostream tuple iostream typeinfo iostream version @@ -538,6 +551,7 @@ istream stdexcept istream streambuf istream string istream string_view +istream text_encoding istream tuple istream typeinfo istream version @@ -592,6 +606,7 @@ locale stdexcept locale streambuf locale string locale string_view +locale text_encoding locale tuple locale typeinfo locale version @@ -691,6 +706,7 @@ ostream stdexcept ostream streambuf ostream string ostream string_view +ostream text_encoding ostream tuple ostream typeinfo ostream version @@ -715,6 +731,7 @@ print optional print stdexcept print string print string_view +print text_encoding print tuple print typeinfo print version @@ -797,6 +814,7 @@ regex limits regex stdexcept regex string regex string_view +regex text_encoding regex tuple regex typeinfo regex vector @@ -860,6 +878,7 @@ sstream stdexcept sstream streambuf sstream string sstream string_view +sstream text_encoding sstream tuple sstream typeinfo sstream version @@ -901,6 +920,7 @@ streambuf limits streambuf stdexcept streambuf string streambuf string_view +streambuf text_encoding streambuf tuple streambuf typeinfo streambuf version @@ -953,6 +973,7 @@ strstream stdexcept strstream streambuf strstream string strstream string_view +strstream text_encoding strstream tuple strstream typeinfo strstream version @@ -986,6 +1007,7 @@ syncstream stdexcept syncstream streambuf syncstream string syncstream string_view +syncstream text_encoding syncstream tuple syncstream typeinfo syncstream version @@ -1006,6 +1028,20 @@ system_error string system_error string_view system_error tuple system_error version +text_encoding cctype +text_encoding climits +text_encoding compare +text_encoding cstdint +text_encoding cstdio +text_encoding cstring +text_encoding cwchar +text_encoding cwctype +text_encoding initializer_list +text_encoding iosfwd +text_encoding limits +text_encoding stdexcept +text_encoding string_view +text_encoding version thread array thread bitset thread cctype @@ -1032,6 +1068,7 @@ thread stdexcept thread streambuf thread string thread string_view +thread text_encoding thread tuple thread typeinfo thread version diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp new file mode 100644 index 0000000000000..1678e8840af8d --- /dev/null +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/text_encoding.version.compile.pass.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. + +// + +// Test the feature test macros defined by + +// clang-format off + +#include +#include "test_macros.h" + +#if TEST_STD_VER < 14 + +# ifdef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 17 + +# ifdef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 20 + +# ifdef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 23 + +# ifdef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should not be defined before c++26" +# endif + +#elif TEST_STD_VER > 23 + +# ifndef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should be defined in c++26" +# endif +# if __cpp_lib_text_encoding != 202306L +# error "__cpp_lib_text_encoding should have the value 202306L in c++26" +# endif + +#endif // TEST_STD_VER > 23 + +// clang-format on diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index d4808fde45444..83131c5402321 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -8146,17 +8146,11 @@ # endif # endif -# if !defined(_LIBCPP_VERSION) -# ifndef __cpp_lib_text_encoding -# error "__cpp_lib_text_encoding should be defined in c++26" -# endif -# if __cpp_lib_text_encoding != 202306L -# error "__cpp_lib_text_encoding should have the value 202306L in c++26" -# endif -# else -# ifdef __cpp_lib_text_encoding -# error "__cpp_lib_text_encoding should not be defined because it is unimplemented in libc++!" -# endif +# ifndef __cpp_lib_text_encoding +# error "__cpp_lib_text_encoding should be defined in c++26" +# endif +# if __cpp_lib_text_encoding != 202306L +# error "__cpp_lib_text_encoding should have the value 202306L in c++26" # endif # ifndef __cpp_lib_three_way_comparison diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index c9ed59f3cb9aa..efaac8f8d4507 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -30,6 +30,11 @@ #include "platform_support.h" // locale name macros #include "test_macros.h" +// glibc has a THOUSANDS_SEP macro already defined +#ifdef THOUSANDS_SEP +# undef THOUSANDS_SEP +#endif + #ifdef _AIX // the AIX libc expects U202F as LC_MONETARY thousands_sep # define THOUSANDS_SEP L"\u202F" diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index f9d7998b07ff4..7d03b7053150f 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -30,6 +30,11 @@ #include "platform_support.h" // locale name macros #include "test_macros.h" +// glibc has a THOUSANDS_SEP macro already defined +#ifdef THOUSANDS_SEP +# undef THOUSANDS_SEP +#endif + #ifdef _AIX // the AIX libc expects U202F as LC_MONETARY thousands_sep # define THOUSANDS_SEP L"\u202F" diff --git a/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp new file mode 100644 index 0000000000000..74ba547cf6619 --- /dev/null +++ b/libcxx/test/std/localization/locales/locale/locale.members/encoding.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// REQUIRES: std-at-least-c++26 +// REQUIRES: locale.en_US.UTF-8 +// UNSUPPORTED: no-localization +// UNSUPPORTED: availability-te-environment-missing + +// class locale + +// text_encoding locale::encoding() const + +#include +#include +#include +#include +#include +#include + +#include "platform_support.h" + +int main(int, char**) { + { + // 1. Locale built with en_US.UTF-8 returns text_encoding representing "UTF-8" + const std::locale utf8_locale(LOCALE_en_US_UTF_8); + std::same_as decltype(auto) te = utf8_locale.encoding(); + auto utf8_te = std::text_encoding{std::text_encoding::UTF8}; + + if (te != std::text_encoding::UTF8) { + std::cerr << std::format("Expected UTF-8, received {{ {}, \"{}\" }}", int(te.mib()), te.name()); + assert(false); + } + assert(te == utf8_te); + } +#if defined(_WIN32) + { + // BCP-47 locale name + const std::locale loc("en-US"); + std::same_as decltype(auto) te = loc.encoding(); + auto w1252 = std::text_encoding{std::text_encoding::windows1252}; + assert(te == std::text_encoding::windows1252); + assert(te == w1252); + } +#endif + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/test_text_encoding.h b/libcxx/test/std/text/text_encoding/test_text_encoding.h new file mode 100644 index 0000000000000..e18bc3aaeea15 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/test_text_encoding.h @@ -0,0 +1,1171 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_TEST_TEXT_ENCODING_H +#define _LIBCPP_TEST_TEXT_ENCODING_H + +#include + +struct encoding_data { + std::string_view name; + int mib; + int size; +}; + +constexpr encoding_data unique_encoding_data[]{ + {"US-ASCII", 3, 8}, + {"ISO_8859-1:1987", 4, 15}, + {"ISO_8859-2:1987", 5, 15}, + {"ISO_8859-3:1988", 6, 15}, + {"ISO_8859-4:1988", 7, 15}, + {"ISO_8859-5:1988", 8, 15}, + {"ISO_8859-6:1987", 9, 15}, + {"ISO_8859-7:1987", 10, 15}, + {"ISO_8859-8:1988", 11, 15}, + {"ISO_8859-9:1989", 12, 15}, + {"ISO-8859-10", 13, 11}, + {"ISO_6937-2-add", 14, 14}, + {"JIS_X0201", 15, 9}, + {"JIS_Encoding", 16, 12}, + {"Shift_JIS", 17, 9}, + {"Extended_UNIX_Code_Packed_Format_for_Japanese", 18, 45}, + {"Extended_UNIX_Code_Fixed_Width_for_Japanese", 19, 43}, + {"BS_4730", 20, 7}, + {"SEN_850200_C", 21, 12}, + {"IT", 22, 2}, + {"ES", 23, 2}, + {"DIN_66003", 24, 9}, + {"NS_4551-1", 25, 9}, + {"NF_Z_62-010", 26, 11}, + {"ISO-10646-UTF-1", 27, 15}, + {"ISO_646.basic:1983", 28, 18}, + {"INVARIANT", 29, 9}, + {"ISO_646.irv:1983", 30, 16}, + {"NATS-SEFI", 31, 9}, + {"NATS-SEFI-ADD", 32, 13}, + {"SEN_850200_B", 35, 12}, + {"KS_C_5601-1987", 36, 14}, + {"ISO-2022-KR", 37, 11}, + {"EUC-KR", 38, 6}, + {"ISO-2022-JP", 39, 11}, + {"ISO-2022-JP-2", 40, 13}, + {"JIS_C6220-1969-jp", 41, 17}, + {"JIS_C6220-1969-ro", 42, 17}, + {"PT", 43, 2}, + {"greek7-old", 44, 10}, + {"latin-greek", 45, 11}, + {"NF_Z_62-010_(1973)", 46, 18}, + {"Latin-greek-1", 47, 13}, + {"ISO_5427", 48, 8}, + {"JIS_C6226-1978", 49, 14}, + {"BS_viewdata", 50, 11}, + {"INIS", 51, 4}, + {"INIS-8", 52, 6}, + {"INIS-cyrillic", 53, 13}, + {"ISO_5427:1981", 54, 13}, + {"ISO_5428:1980", 55, 13}, + {"GB_1988-80", 56, 10}, + {"GB_2312-80", 57, 10}, + {"NS_4551-2", 58, 9}, + {"videotex-suppl", 59, 14}, + {"PT2", 60, 3}, + {"ES2", 61, 3}, + {"MSZ_7795.3", 62, 10}, + {"JIS_C6226-1983", 63, 14}, + {"greek7", 64, 6}, + {"ASMO_449", 65, 8}, + {"iso-ir-90", 66, 9}, + {"JIS_C6229-1984-a", 67, 16}, + {"JIS_C6229-1984-b", 68, 16}, + {"JIS_C6229-1984-b-add", 69, 20}, + {"JIS_C6229-1984-hand", 70, 19}, + {"JIS_C6229-1984-hand-add", 71, 23}, + {"JIS_C6229-1984-kana", 72, 19}, + {"ISO_2033-1983", 73, 13}, + {"ANSI_X3.110-1983", 74, 16}, + {"T.61-7bit", 75, 9}, + {"T.61-8bit", 76, 9}, + {"ECMA-cyrillic", 77, 13}, + {"CSA_Z243.4-1985-1", 78, 17}, + {"CSA_Z243.4-1985-2", 79, 17}, + {"CSA_Z243.4-1985-gr", 80, 18}, + {"ISO_8859-6-E", 81, 12}, + {"ISO_8859-6-I", 82, 12}, + {"T.101-G2", 83, 8}, + {"ISO_8859-8-E", 84, 12}, + {"ISO_8859-8-I", 85, 12}, + {"CSN_369103", 86, 10}, + {"JUS_I.B1.002", 87, 12}, + {"IEC_P27-1", 88, 9}, + {"JUS_I.B1.003-serb", 89, 17}, + {"JUS_I.B1.003-mac", 90, 16}, + {"greek-ccitt", 91, 11}, + {"NC_NC00-10:81", 92, 13}, + {"ISO_6937-2-25", 93, 13}, + {"GOST_19768-74", 94, 13}, + {"ISO_8859-supp", 95, 13}, + {"ISO_10367-box", 96, 13}, + {"latin-lap", 97, 9}, + {"JIS_X0212-1990", 98, 14}, + {"DS_2089", 99, 7}, + {"us-dk", 100, 5}, + {"dk-us", 101, 5}, + {"KSC5636", 102, 7}, + {"UNICODE-1-1-UTF-7", 103, 17}, + {"ISO-2022-CN", 104, 11}, + {"ISO-2022-CN-EXT", 105, 15}, + {"UTF-8", 106, 5}, + {"ISO-8859-13", 109, 11}, + {"ISO-8859-14", 110, 11}, + {"ISO-8859-15", 111, 11}, + {"ISO-8859-16", 112, 11}, + {"GBK", 113, 3}, + {"GB18030", 114, 7}, + {"OSD_EBCDIC_DF04_15", 115, 18}, + {"OSD_EBCDIC_DF03_IRV", 116, 19}, + {"OSD_EBCDIC_DF04_1", 117, 17}, + {"ISO-11548-1", 118, 11}, + {"KZ-1048", 119, 7}, + {"ISO-10646-UCS-2", 1000, 15}, + {"ISO-10646-UCS-4", 1001, 15}, + {"ISO-10646-UCS-Basic", 1002, 19}, + {"ISO-10646-Unicode-Latin1", 1003, 24}, + {"ISO-10646-J-1", 1004, 13}, + {"ISO-Unicode-IBM-1261", 1005, 20}, + {"ISO-Unicode-IBM-1268", 1006, 20}, + {"ISO-Unicode-IBM-1276", 1007, 20}, + {"ISO-Unicode-IBM-1264", 1008, 20}, + {"ISO-Unicode-IBM-1265", 1009, 20}, + {"UNICODE-1-1", 1010, 11}, + {"SCSU", 1011, 4}, + {"UTF-7", 1012, 5}, + {"UTF-16BE", 1013, 8}, + {"UTF-16LE", 1014, 8}, + {"UTF-16", 1015, 6}, + {"CESU-8", 1016, 6}, + {"UTF-32", 1017, 6}, + {"UTF-32BE", 1018, 8}, + {"UTF-32LE", 1019, 8}, + {"BOCU-1", 1020, 6}, + {"UTF-7-IMAP", 1021, 10}, + {"ISO-8859-1-Windows-3.0-Latin-1", 2000, 30}, + {"ISO-8859-1-Windows-3.1-Latin-1", 2001, 30}, + {"ISO-8859-2-Windows-Latin-2", 2002, 26}, + {"ISO-8859-9-Windows-Latin-5", 2003, 26}, + {"hp-roman8", 2004, 9}, + {"Adobe-Standard-Encoding", 2005, 23}, + {"Ventura-US", 2006, 10}, + {"Ventura-International", 2007, 21}, + {"DEC-MCS", 2008, 7}, + {"IBM850", 2009, 6}, + {"IBM852", 2010, 6}, + {"IBM437", 2011, 6}, + {"PC8-Danish-Norwegian", 2012, 20}, + {"IBM862", 2013, 6}, + {"PC8-Turkish", 2014, 11}, + {"IBM-Symbols", 2015, 11}, + {"IBM-Thai", 2016, 8}, + {"HP-Legal", 2017, 8}, + {"HP-Pi-font", 2018, 10}, + {"HP-Math8", 2019, 8}, + {"Adobe-Symbol-Encoding", 2020, 21}, + {"HP-DeskTop", 2021, 10}, + {"Ventura-Math", 2022, 12}, + {"Microsoft-Publishing", 2023, 20}, + {"Windows-31J", 2024, 11}, + {"GB2312", 2025, 6}, + {"Big5", 2026, 4}, + {"macintosh", 2027, 9}, + {"IBM037", 2028, 6}, + {"IBM038", 2029, 6}, + {"IBM273", 2030, 6}, + {"IBM274", 2031, 6}, + {"IBM275", 2032, 6}, + {"IBM277", 2033, 6}, + {"IBM278", 2034, 6}, + {"IBM280", 2035, 6}, + {"IBM281", 2036, 6}, + {"IBM284", 2037, 6}, + {"IBM285", 2038, 6}, + {"IBM290", 2039, 6}, + {"IBM297", 2040, 6}, + {"IBM420", 2041, 6}, + {"IBM423", 2042, 6}, + {"IBM424", 2043, 6}, + {"IBM500", 2044, 6}, + {"IBM851", 2045, 6}, + {"IBM855", 2046, 6}, + {"IBM857", 2047, 6}, + {"IBM860", 2048, 6}, + {"IBM861", 2049, 6}, + {"IBM863", 2050, 6}, + {"IBM864", 2051, 6}, + {"IBM865", 2052, 6}, + {"IBM868", 2053, 6}, + {"IBM869", 2054, 6}, + {"IBM870", 2055, 6}, + {"IBM871", 2056, 6}, + {"IBM880", 2057, 6}, + {"IBM891", 2058, 6}, + {"IBM903", 2059, 6}, + {"IBM904", 2060, 6}, + {"IBM905", 2061, 6}, + {"IBM918", 2062, 6}, + {"IBM1026", 2063, 7}, + {"EBCDIC-AT-DE", 2064, 12}, + {"EBCDIC-AT-DE-A", 2065, 14}, + {"EBCDIC-CA-FR", 2066, 12}, + {"EBCDIC-DK-NO", 2067, 12}, + {"EBCDIC-DK-NO-A", 2068, 14}, + {"EBCDIC-FI-SE", 2069, 12}, + {"EBCDIC-FI-SE-A", 2070, 14}, + {"EBCDIC-FR", 2071, 9}, + {"EBCDIC-IT", 2072, 9}, + {"EBCDIC-PT", 2073, 9}, + {"EBCDIC-ES", 2074, 9}, + {"EBCDIC-ES-A", 2075, 11}, + {"EBCDIC-ES-S", 2076, 11}, + {"EBCDIC-UK", 2077, 9}, + {"EBCDIC-US", 2078, 9}, + {"UNKNOWN-8BIT", 2079, 12}, + {"MNEMONIC", 2080, 8}, + {"MNEM", 2081, 4}, + {"VISCII", 2082, 6}, + {"VIQR", 2083, 4}, + {"KOI8-R", 2084, 6}, + {"HZ-GB-2312", 2085, 10}, + {"IBM866", 2086, 6}, + {"IBM775", 2087, 6}, + {"KOI8-U", 2088, 6}, + {"IBM00858", 2089, 8}, + {"IBM00924", 2090, 8}, + {"IBM01140", 2091, 8}, + {"IBM01141", 2092, 8}, + {"IBM01142", 2093, 8}, + {"IBM01143", 2094, 8}, + {"IBM01144", 2095, 8}, + {"IBM01145", 2096, 8}, + {"IBM01146", 2097, 8}, + {"IBM01147", 2098, 8}, + {"IBM01148", 2099, 8}, + {"IBM01149", 2100, 8}, + {"Big5-HKSCS", 2101, 10}, + {"IBM1047", 2102, 7}, + {"PTCP154", 2103, 7}, + {"Amiga-1251", 2104, 10}, + {"KOI7-switched", 2105, 13}, + {"BRF", 2106, 3}, + {"TSCII", 2107, 5}, + {"CP51932", 2108, 7}, + {"windows-874", 2109, 11}, + {"windows-1250", 2250, 12}, + {"windows-1251", 2251, 12}, + {"windows-1252", 2252, 12}, + {"windows-1253", 2253, 12}, + {"windows-1254", 2254, 12}, + {"windows-1255", 2255, 12}, + {"windows-1256", 2256, 12}, + {"windows-1257", 2257, 12}, + {"windows-1258", 2258, 12}, + {"TIS-620", 2259, 7}, + {"CP50220", 2260, 7}, +}; + +constexpr std::string_view other_names[]{ + "graah", "oops", "libcxx", "foobar", "barbaz", "1234567", "llvm-project", "utf-81", "utf-9", "CA-ASCII"}; + +struct enc_data { + int mib; + std::string_view name; +}; + +constexpr inline enc_data all_encoding_data[] = { + {3, "US-ASCII"}, + {3, "ANSI_X3.4-1968"}, + {3, "ANSI_X3.4-1986"}, + {3, "IBM367"}, + {3, "ISO646-US"}, + {3, "ISO_646.irv:1991"}, + {3, "cp367"}, + {3, "csASCII"}, + {3, "iso-ir-6"}, + {3, "us"}, + {4, "ISO-8859-1"}, + {4, "ISO_8859-1:1987"}, + {4, "CP819"}, + {4, "IBM819"}, + {4, "ISO_8859-1"}, + {4, "csISOLatin1"}, + {4, "iso-ir-100"}, + {4, "l1"}, + {4, "latin1"}, + {5, "ISO-8859-2"}, + {5, "ISO_8859-2:1987"}, + {5, "ISO_8859-2"}, + {5, "csISOLatin2"}, + {5, "iso-ir-101"}, + {5, "l2"}, + {5, "latin2"}, + {6, "ISO-8859-3"}, + {6, "ISO_8859-3:1988"}, + {6, "ISO_8859-3"}, + {6, "csISOLatin3"}, + {6, "iso-ir-109"}, + {6, "l3"}, + {6, "latin3"}, + {7, "ISO-8859-4"}, + {7, "ISO_8859-4:1988"}, + {7, "ISO_8859-4"}, + {7, "csISOLatin4"}, + {7, "iso-ir-110"}, + {7, "l4"}, + {7, "latin4"}, + {8, "ISO-8859-5"}, + {8, "ISO_8859-5:1988"}, + {8, "ISO_8859-5"}, + {8, "csISOLatinCyrillic"}, + {8, "cyrillic"}, + {8, "iso-ir-144"}, + {9, "ISO-8859-6"}, + {9, "ISO_8859-6:1987"}, + {9, "ASMO-708"}, + {9, "ECMA-114"}, + {9, "ISO_8859-6"}, + {9, "arabic"}, + {9, "csISOLatinArabic"}, + {9, "iso-ir-127"}, + {10, "ISO-8859-7"}, + {10, "ISO_8859-7:1987"}, + {10, "ECMA-118"}, + {10, "ELOT_928"}, + {10, "ISO_8859-7"}, + {10, "csISOLatinGreek"}, + {10, "greek"}, + {10, "greek8"}, + {10, "iso-ir-126"}, + {11, "ISO-8859-8"}, + {11, "ISO_8859-8:1988"}, + {11, "ISO_8859-8"}, + {11, "csISOLatinHebrew"}, + {11, "hebrew"}, + {11, "iso-ir-138"}, + {12, "ISO-8859-9"}, + {12, "ISO_8859-9:1989"}, + {12, "ISO_8859-9"}, + {12, "csISOLatin5"}, + {12, "iso-ir-148"}, + {12, "l5"}, + {12, "latin5"}, + {13, "ISO-8859-10"}, + {13, "ISO_8859-10:1992"}, + {13, "csISOLatin6"}, + {13, "iso-ir-157"}, + {13, "l6"}, + {13, "latin6"}, + {14, "ISO_6937-2-add"}, + {14, "csISOTextComm"}, + {14, "iso-ir-142"}, + {15, "JIS_X0201"}, + {15, "X0201"}, + {15, "csHalfWidthKatakana"}, + {16, "JIS_Encoding"}, + {16, "csJISEncoding"}, + {17, "Shift_JIS"}, + {17, "MS_Kanji"}, + {17, "csShiftJIS"}, + {18, "EUC-JP"}, + {18, "Extended_UNIX_Code_Packed_Format_for_Japanese"}, + {18, "csEUCPkdFmtJapanese"}, + {19, "Extended_UNIX_Code_Fixed_Width_for_Japanese"}, + {19, "csEUCFixWidJapanese"}, + {20, "BS_4730"}, + {20, "ISO646-GB"}, + {20, "csISO4UnitedKingdom"}, + {20, "gb"}, + {20, "iso-ir-4"}, + {20, "uk"}, + {21, "SEN_850200_C"}, + {21, "ISO646-SE2"}, + {21, "csISO11SwedishForNames"}, + {21, "iso-ir-11"}, + {21, "se2"}, + {22, "IT"}, + {22, "ISO646-IT"}, + {22, "csISO15Italian"}, + {22, "iso-ir-15"}, + {23, "ES"}, + {23, "ISO646-ES"}, + {23, "csISO17Spanish"}, + {23, "iso-ir-17"}, + {24, "DIN_66003"}, + {24, "ISO646-DE"}, + {24, "csISO21German"}, + {24, "de"}, + {24, "iso-ir-21"}, + {25, "NS_4551-1"}, + {25, "ISO646-NO"}, + {25, "csISO60DanishNorwegian"}, + {25, "csISO60Norwegian1"}, + {25, "iso-ir-60"}, + {25, "no"}, + {26, "NF_Z_62-010"}, + {26, "ISO646-FR"}, + {26, "csISO69French"}, + {26, "fr"}, + {26, "iso-ir-69"}, + {27, "ISO-10646-UTF-1"}, + {27, "csISO10646UTF1"}, + {28, "ISO_646.basic:1983"}, + {28, "csISO646basic1983"}, + {28, "ref"}, + {29, "INVARIANT"}, + {29, "csINVARIANT"}, + {30, "ISO_646.irv:1983"}, + {30, "csISO2IntlRefVersion"}, + {30, "irv"}, + {30, "iso-ir-2"}, + {31, "NATS-SEFI"}, + {31, "csNATSSEFI"}, + {31, "iso-ir-8-1"}, + {32, "NATS-SEFI-ADD"}, + {32, "csNATSSEFIADD"}, + {32, "iso-ir-8-2"}, + // 33 NATS-DANO, 34 NATS-DANO-ADD omitted + {35, "SEN_850200_B"}, + {35, "FI"}, + {35, "ISO646-FI"}, + {35, "ISO646-SE"}, + {35, "csISO10Swedish"}, + {35, "iso-ir-10"}, + {35, "se"}, + {36, "KS_C_5601-1987"}, + {36, "KSC_5601"}, + {36, "KS_C_5601-1989"}, + {36, "csKSC56011987"}, + {36, "iso-ir-149"}, + {36, "korean"}, + {37, "ISO-2022-KR"}, + {37, "csISO2022KR"}, + {38, "EUC-KR"}, + {38, "csEUCKR"}, + {39, "ISO-2022-JP"}, + {39, "csISO2022JP"}, + {40, "ISO-2022-JP-2"}, + {40, "csISO2022JP2"}, + {41, "JIS_C6220-1969-jp"}, + {41, "JIS_C6220-1969"}, + {41, "csISO13JISC6220jp"}, + {41, "iso-ir-13"}, + {41, "katakana"}, + {41, "x0201-7"}, + {42, "JIS_C6220-1969-ro"}, + {42, "ISO646-JP"}, + {42, "csISO14JISC6220ro"}, + {42, "iso-ir-14"}, + {42, "jp"}, + {43, "PT"}, + {43, "ISO646-PT"}, + {43, "csISO16Portuguese"}, + {43, "iso-ir-16"}, + {44, "greek7-old"}, + {44, "csISO18Greek7Old"}, + {44, "iso-ir-18"}, + {45, "latin-greek"}, + {45, "csISO19LatinGreek"}, + {45, "iso-ir-19"}, + {46, "NF_Z_62-010_(1973)"}, + {46, "ISO646-FR1"}, + {46, "csISO25French"}, + {46, "iso-ir-25"}, + {47, "Latin-greek-1"}, + {47, "csISO27LatinGreek1"}, + {47, "iso-ir-27"}, + {48, "ISO_5427"}, + {48, "csISO5427Cyrillic"}, + {48, "iso-ir-37"}, + {49, "JIS_C6226-1978"}, + {49, "csISO42JISC62261978"}, + {49, "iso-ir-42"}, + {50, "BS_viewdata"}, + {50, "csISO47BSViewdata"}, + {50, "iso-ir-47"}, + {51, "INIS"}, + {51, "csISO49INIS"}, + {51, "iso-ir-49"}, + {52, "INIS-8"}, + {52, "csISO50INIS8"}, + {52, "iso-ir-50"}, + {53, "INIS-cyrillic"}, + {53, "csISO51INISCyrillic"}, + {53, "iso-ir-51"}, + {54, "ISO_5427:1981"}, + {54, "ISO5427Cyrillic1981"}, + {54, "csISO54271981"}, + {54, "iso-ir-54"}, + {55, "ISO_5428:1980"}, + {55, "csISO5428Greek"}, + {55, "iso-ir-55"}, + {56, "GB_1988-80"}, + {56, "ISO646-CN"}, + {56, "cn"}, + {56, "csISO57GB1988"}, + {56, "iso-ir-57"}, + {57, "GB_2312-80"}, + {57, "chinese"}, + {57, "csISO58GB231280"}, + {57, "iso-ir-58"}, + {58, "NS_4551-2"}, + {58, "ISO646-NO2"}, + {58, "csISO61Norwegian2"}, + {58, "iso-ir-61"}, + {58, "no2"}, + {59, "videotex-suppl"}, + {59, "csISO70VideotexSupp1"}, + {59, "iso-ir-70"}, + {60, "PT2"}, + {60, "ISO646-PT2"}, + {60, "csISO84Portuguese2"}, + {60, "iso-ir-84"}, + {61, "ES2"}, + {61, "ISO646-ES2"}, + {61, "csISO85Spanish2"}, + {61, "iso-ir-85"}, + {62, "MSZ_7795.3"}, + {62, "ISO646-HU"}, + {62, "csISO86Hungarian"}, + {62, "hu"}, + {62, "iso-ir-86"}, + {63, "JIS_C6226-1983"}, + {63, "JIS_X0208-1983"}, + {63, "csISO87JISX0208"}, + {63, "iso-ir-87"}, + {63, "x0208"}, + {64, "greek7"}, + {64, "csISO88Greek7"}, + {64, "iso-ir-88"}, + {65, "ASMO_449"}, + {65, "ISO_9036"}, + {65, "arabic7"}, + {65, "csISO89ASMO449"}, + {65, "iso-ir-89"}, + {66, "iso-ir-90"}, + {66, "csISO90"}, + {67, "JIS_C6229-1984-a"}, + {67, "csISO91JISC62291984a"}, + {67, "iso-ir-91"}, + {67, "jp-ocr-a"}, + {68, "JIS_C6229-1984-b"}, + {68, "ISO646-JP-OCR-B"}, + {68, "csISO92JISC62991984b"}, + {68, "iso-ir-92"}, + {68, "jp-ocr-b"}, + {69, "JIS_C6229-1984-b-add"}, + {69, "csISO93JIS62291984badd"}, + {69, "iso-ir-93"}, + {69, "jp-ocr-b-add"}, + {70, "JIS_C6229-1984-hand"}, + {70, "csISO94JIS62291984hand"}, + {70, "iso-ir-94"}, + {70, "jp-ocr-hand"}, + {71, "JIS_C6229-1984-hand-add"}, + {71, "csISO95JIS62291984handadd"}, + {71, "iso-ir-95"}, + {71, "jp-ocr-hand-add"}, + {72, "JIS_C6229-1984-kana"}, + {72, "csISO96JISC62291984kana"}, + {72, "iso-ir-96"}, + {73, "ISO_2033-1983"}, + {73, "csISO2033"}, + {73, "e13b"}, + {73, "iso-ir-98"}, + {74, "ANSI_X3.110-1983"}, + {74, "CSA_T500-1983"}, + {74, "NAPLPS"}, + {74, "csISO99NAPLPS"}, + {74, "iso-ir-99"}, + {75, "T.61-7bit"}, + {75, "csISO102T617bit"}, + {75, "iso-ir-102"}, + {76, "T.61-8bit"}, + {76, "T.61"}, + {76, "csISO103T618bit"}, + {76, "iso-ir-103"}, + {77, "ECMA-cyrillic"}, + {77, "KOI8-E"}, + {77, "csISO111ECMACyrillic"}, + {77, "iso-ir-111"}, + {78, "CSA_Z243.4-1985-1"}, + {78, "ISO646-CA"}, + {78, "ca"}, + {78, "csISO121Canadian1"}, + {78, "csa7-1"}, + {78, "csa71"}, + {78, "iso-ir-121"}, + {79, "CSA_Z243.4-1985-2"}, + {79, "ISO646-CA2"}, + {79, "csISO122Canadian2"}, + {79, "csa7-2"}, + {79, "csa72"}, + {79, "iso-ir-122"}, + {80, "CSA_Z243.4-1985-gr"}, + {80, "csISO123CSAZ24341985gr"}, + {80, "iso-ir-123"}, + {81, "ISO-8859-6-E"}, + {81, "ISO_8859-6-E"}, + {81, "csISO88596E"}, + {82, "ISO-8859-6-I"}, + {82, "ISO_8859-6-I"}, + {82, "csISO88596I"}, + {83, "T.101-G2"}, + {83, "csISO128T101G2"}, + {83, "iso-ir-128"}, + {84, "ISO-8859-8-E"}, + {84, "ISO_8859-8-E"}, + {84, "csISO88598E"}, + {85, "ISO-8859-8-I"}, + {85, "ISO_8859-8-I"}, + {85, "csISO88598I"}, + {86, "CSN_369103"}, + {86, "csISO139CSN369103"}, + {86, "iso-ir-139"}, + {87, "JUS_I.B1.002"}, + {87, "ISO646-YU"}, + {87, "csISO141JUSIB1002"}, + {87, "iso-ir-141"}, + {87, "js"}, + {87, "yu"}, + {88, "IEC_P27-1"}, + {88, "csISO143IECP271"}, + {88, "iso-ir-143"}, + {89, "JUS_I.B1.003-serb"}, + {89, "csISO146Serbian"}, + {89, "iso-ir-146"}, + {89, "serbian"}, + {90, "JUS_I.B1.003-mac"}, + {90, "csISO147Macedonian"}, + {90, "iso-ir-147"}, + {90, "macedonian"}, + {91, "greek-ccitt"}, + {91, "csISO150"}, + {91, "csISO150GreekCCITT"}, + {91, "iso-ir-150"}, + {92, "NC_NC00-10:81"}, + {92, "ISO646-CU"}, + {92, "csISO151Cuba"}, + {92, "cuba"}, + {92, "iso-ir-151"}, + {93, "ISO_6937-2-25"}, + {93, "csISO6937Add"}, + {93, "iso-ir-152"}, + {94, "GOST_19768-74"}, + {94, "ST_SEV_358-88"}, + {94, "csISO153GOST1976874"}, + {94, "iso-ir-153"}, + {95, "ISO_8859-supp"}, + {95, "csISO8859Supp"}, + {95, "iso-ir-154"}, + {95, "latin1-2-5"}, + {96, "ISO_10367-box"}, + {96, "csISO10367Box"}, + {96, "iso-ir-155"}, + {97, "latin-lap"}, + {97, "csISO158Lap"}, + {97, "iso-ir-158"}, + {97, "lap"}, + {98, "JIS_X0212-1990"}, + {98, "csISO159JISX02121990"}, + {98, "iso-ir-159"}, + {98, "x0212"}, + {99, "DS_2089"}, + {99, "DS2089"}, + {99, "ISO646-DK"}, + {99, "csISO646Danish"}, + {99, "dk"}, + {100, "us-dk"}, + {100, "csUSDK"}, + {101, "dk-us"}, + {101, "csDKUS"}, + {102, "KSC5636"}, + {102, "ISO646-KR"}, + {102, "csKSC5636"}, + {103, "UNICODE-1-1-UTF-7"}, + {103, "csUnicode11UTF7"}, + {104, "ISO-2022-CN"}, + {104, "csISO2022CN"}, + {105, "ISO-2022-CN-EXT"}, + {105, "csISO2022CNEXT"}, + {106, "UTF-8"}, + {106, "csUTF8"}, + {109, "ISO-8859-13"}, + {109, "csISO885913"}, + {110, "ISO-8859-14"}, + {110, "ISO_8859-14"}, + {110, "ISO_8859-14:1998"}, + {110, "csISO885914"}, + {110, "iso-celtic"}, + {110, "iso-ir-199"}, + {110, "l8"}, + {110, "latin8"}, + {111, "ISO-8859-15"}, + {111, "ISO_8859-15"}, + {111, "Latin-9"}, + {111, "csISO885915"}, + {112, "ISO-8859-16"}, + {112, "ISO_8859-16"}, + {112, "ISO_8859-16:2001"}, + {112, "csISO885916"}, + {112, "iso-ir-226"}, + {112, "l10"}, + {112, "latin10"}, + {113, "GBK"}, + {113, "CP936"}, + {113, "MS936"}, + {113, "csGBK"}, + {113, "windows-936"}, + {114, "GB18030"}, + {114, "csGB18030"}, + {115, "OSD_EBCDIC_DF04_15"}, + {115, "csOSDEBCDICDF0415"}, + {116, "OSD_EBCDIC_DF03_IRV"}, + {116, "csOSDEBCDICDF03IRV"}, + {117, "OSD_EBCDIC_DF04_1"}, + {117, "csOSDEBCDICDF041"}, + {118, "ISO-11548-1"}, + {118, "ISO_11548-1"}, + {118, "ISO_TR_11548-1"}, + {118, "csISO115481"}, + {119, "KZ-1048"}, + {119, "RK1048"}, + {119, "STRK1048-2002"}, + {119, "csKZ1048"}, + {1000, "ISO-10646-UCS-2"}, + {1000, "csUnicode"}, + {1001, "ISO-10646-UCS-4"}, + {1001, "csUCS4"}, + {1002, "ISO-10646-UCS-Basic"}, + {1002, "csUnicodeASCII"}, + {1003, "ISO-10646-Unicode-Latin1"}, + {1003, "ISO-10646"}, + {1003, "csUnicodeLatin1"}, + {1004, "ISO-10646-J-1"}, + {1004, "csUnicodeJapanese"}, + {1005, "ISO-Unicode-IBM-1261"}, + {1005, "csUnicodeIBM1261"}, + {1006, "ISO-Unicode-IBM-1268"}, + {1006, "csUnicodeIBM1268"}, + {1007, "ISO-Unicode-IBM-1276"}, + {1007, "csUnicodeIBM1276"}, + {1008, "ISO-Unicode-IBM-1264"}, + {1008, "csUnicodeIBM1264"}, + {1009, "ISO-Unicode-IBM-1265"}, + {1009, "csUnicodeIBM1265"}, + {1010, "UNICODE-1-1"}, + {1010, "csUnicode11"}, + {1011, "SCSU"}, + {1011, "csSCSU"}, + {1012, "UTF-7"}, + {1012, "csUTF7"}, + {1013, "UTF-16BE"}, + {1013, "csUTF16BE"}, + {1014, "UTF-16LE"}, + {1014, "csUTF16LE"}, + {1015, "UTF-16"}, + {1015, "csUTF16"}, + {1016, "CESU-8"}, + {1016, "csCESU-8"}, + {1016, "csCESU8"}, + {1017, "UTF-32"}, + {1017, "csUTF32"}, + {1018, "UTF-32BE"}, + {1018, "csUTF32BE"}, + {1019, "UTF-32LE"}, + {1019, "csUTF32LE"}, + {1020, "BOCU-1"}, + {1020, "csBOCU-1"}, + {1020, "csBOCU1"}, + {1021, "UTF-7-IMAP"}, + {1021, "csUTF7IMAP"}, + {2000, "ISO-8859-1-Windows-3.0-Latin-1"}, + {2000, "csWindows30Latin1"}, + {2001, "ISO-8859-1-Windows-3.1-Latin-1"}, + {2001, "csWindows31Latin1"}, + {2002, "ISO-8859-2-Windows-Latin-2"}, + {2002, "csWindows31Latin2"}, + {2003, "ISO-8859-9-Windows-Latin-5"}, + {2003, "csWindows31Latin5"}, + {2004, "hp-roman8"}, + {2004, "csHPRoman8"}, + {2004, "r8"}, + {2004, "roman8"}, + {2005, "Adobe-Standard-Encoding"}, + {2005, "csAdobeStandardEncoding"}, + {2006, "Ventura-US"}, + {2006, "csVenturaUS"}, + {2007, "Ventura-International"}, + {2007, "csVenturaInternational"}, + {2008, "DEC-MCS"}, + {2008, "csDECMCS"}, + {2008, "dec"}, + {2009, "IBM850"}, + {2009, "850"}, + {2009, "cp850"}, + {2009, "csPC850Multilingual"}, + {2012, "PC8-Danish-Norwegian"}, + {2012, "csPC8DanishNorwegian"}, + {2013, "IBM862"}, + {2013, "862"}, + {2013, "cp862"}, + {2013, "csPC862LatinHebrew"}, + {2014, "PC8-Turkish"}, + {2014, "csPC8Turkish"}, + {2015, "IBM-Symbols"}, + {2015, "csIBMSymbols"}, + {2016, "IBM-Thai"}, + {2016, "csIBMThai"}, + {2017, "HP-Legal"}, + {2017, "csHPLegal"}, + {2018, "HP-Pi-font"}, + {2018, "csHPPiFont"}, + {2019, "HP-Math8"}, + {2019, "csHPMath8"}, + {2020, "Adobe-Symbol-Encoding"}, + {2020, "csHPPSMath"}, + {2021, "HP-DeskTop"}, + {2021, "csHPDesktop"}, + {2022, "Ventura-Math"}, + {2022, "csVenturaMath"}, + {2023, "Microsoft-Publishing"}, + {2023, "csMicrosoftPublishing"}, + {2024, "Windows-31J"}, + {2024, "csWindows31J"}, + {2025, "GB2312"}, + {2025, "csGB2312"}, + {2026, "Big5"}, + {2026, "csBig5"}, + {2027, "macintosh"}, + {2027, "csMacintosh"}, + {2027, "mac"}, + {2028, "IBM037"}, + {2028, "cp037"}, + {2028, "csIBM037"}, + {2028, "ebcdic-cp-ca"}, + {2028, "ebcdic-cp-nl"}, + {2028, "ebcdic-cp-us"}, + {2028, "ebcdic-cp-wt"}, + {2029, "IBM038"}, + {2029, "EBCDIC-INT"}, + {2029, "cp038"}, + {2029, "csIBM038"}, + {2030, "IBM273"}, + {2030, "CP273"}, + {2030, "csIBM273"}, + {2031, "IBM274"}, + {2031, "CP274"}, + {2031, "EBCDIC-BE"}, + {2031, "csIBM274"}, + {2032, "IBM275"}, + {2032, "EBCDIC-BR"}, + {2032, "cp275"}, + {2032, "csIBM275"}, + {2033, "IBM277"}, + {2033, "EBCDIC-CP-DK"}, + {2033, "EBCDIC-CP-NO"}, + {2033, "csIBM277"}, + {2034, "IBM278"}, + {2034, "CP278"}, + {2034, "csIBM278"}, + {2034, "ebcdic-cp-fi"}, + {2034, "ebcdic-cp-se"}, + {2035, "IBM280"}, + {2035, "CP280"}, + {2035, "csIBM280"}, + {2035, "ebcdic-cp-it"}, + {2036, "IBM281"}, + {2036, "EBCDIC-JP-E"}, + {2036, "cp281"}, + {2036, "csIBM281"}, + {2037, "IBM284"}, + {2037, "CP284"}, + {2037, "csIBM284"}, + {2037, "ebcdic-cp-es"}, + {2038, "IBM285"}, + {2038, "CP285"}, + {2038, "csIBM285"}, + {2038, "ebcdic-cp-gb"}, + {2039, "IBM290"}, + {2039, "EBCDIC-JP-kana"}, + {2039, "cp290"}, + {2039, "csIBM290"}, + {2040, "IBM297"}, + {2040, "cp297"}, + {2040, "csIBM297"}, + {2040, "ebcdic-cp-fr"}, + {2041, "IBM420"}, + {2041, "cp420"}, + {2041, "csIBM420"}, + {2041, "ebcdic-cp-ar1"}, + {2042, "IBM423"}, + {2042, "cp423"}, + {2042, "csIBM423"}, + {2042, "ebcdic-cp-gr"}, + {2043, "IBM424"}, + {2043, "cp424"}, + {2043, "csIBM424"}, + {2043, "ebcdic-cp-he"}, + {2011, "IBM437"}, + {2011, "437"}, + {2011, "cp437"}, + {2011, "csPC8CodePage437"}, + {2044, "IBM500"}, + {2044, "CP500"}, + {2044, "csIBM500"}, + {2044, "ebcdic-cp-be"}, + {2044, "ebcdic-cp-ch"}, + {2045, "IBM851"}, + {2045, "851"}, + {2045, "cp851"}, + {2045, "csIBM851"}, + {2010, "IBM852"}, + {2010, "852"}, + {2010, "cp852"}, + {2010, "csPCp852"}, + {2046, "IBM855"}, + {2046, "855"}, + {2046, "cp855"}, + {2046, "csIBM855"}, + {2047, "IBM857"}, + {2047, "857"}, + {2047, "cp857"}, + {2047, "csIBM857"}, + {2048, "IBM860"}, + {2048, "860"}, + {2048, "cp860"}, + {2048, "csIBM860"}, + {2049, "IBM861"}, + {2049, "861"}, + {2049, "cp-is"}, + {2049, "cp861"}, + {2049, "csIBM861"}, + {2050, "IBM863"}, + {2050, "863"}, + {2050, "cp863"}, + {2050, "csIBM863"}, + {2051, "IBM864"}, + {2051, "cp864"}, + {2051, "csIBM864"}, + {2052, "IBM865"}, + {2052, "865"}, + {2052, "cp865"}, + {2052, "csIBM865"}, + {2053, "IBM868"}, + {2053, "CP868"}, + {2053, "cp-ar"}, + {2053, "csIBM868"}, + {2054, "IBM869"}, + {2054, "869"}, + {2054, "cp-gr"}, + {2054, "cp869"}, + {2054, "csIBM869"}, + {2055, "IBM870"}, + {2055, "CP870"}, + {2055, "csIBM870"}, + {2055, "ebcdic-cp-roece"}, + {2055, "ebcdic-cp-yu"}, + {2056, "IBM871"}, + {2056, "CP871"}, + {2056, "csIBM871"}, + {2056, "ebcdic-cp-is"}, + {2057, "IBM880"}, + {2057, "EBCDIC-Cyrillic"}, + {2057, "cp880"}, + {2057, "csIBM880"}, + {2058, "IBM891"}, + {2058, "cp891"}, + {2058, "csIBM891"}, + {2059, "IBM903"}, + {2059, "cp903"}, + {2059, "csIBM903"}, + {2060, "IBM904"}, + {2060, "904"}, + {2060, "cp904"}, + {2060, "csIBBM904"}, + {2061, "IBM905"}, + {2061, "CP905"}, + {2061, "csIBM905"}, + {2061, "ebcdic-cp-tr"}, + {2062, "IBM918"}, + {2062, "CP918"}, + {2062, "csIBM918"}, + {2062, "ebcdic-cp-ar2"}, + {2063, "IBM1026"}, + {2063, "CP1026"}, + {2063, "csIBM1026"}, + {2064, "EBCDIC-AT-DE"}, + {2064, "csIBMEBCDICATDE"}, + {2065, "EBCDIC-AT-DE-A"}, + {2065, "csEBCDICATDEA"}, + {2066, "EBCDIC-CA-FR"}, + {2066, "csEBCDICCAFR"}, + {2067, "EBCDIC-DK-NO"}, + {2067, "csEBCDICDKNO"}, + {2068, "EBCDIC-DK-NO-A"}, + {2068, "csEBCDICDKNOA"}, + {2069, "EBCDIC-FI-SE"}, + {2069, "csEBCDICFISE"}, + {2070, "EBCDIC-FI-SE-A"}, + {2070, "csEBCDICFISEA"}, + {2071, "EBCDIC-FR"}, + {2071, "csEBCDICFR"}, + {2072, "EBCDIC-IT"}, + {2072, "csEBCDICIT"}, + {2073, "EBCDIC-PT"}, + {2073, "csEBCDICPT"}, + {2074, "EBCDIC-ES"}, + {2074, "csEBCDICES"}, + {2075, "EBCDIC-ES-A"}, + {2075, "csEBCDICESA"}, + {2076, "EBCDIC-ES-S"}, + {2076, "csEBCDICESS"}, + {2077, "EBCDIC-UK"}, + {2077, "csEBCDICUK"}, + {2078, "EBCDIC-US"}, + {2078, "csEBCDICUS"}, + {2079, "UNKNOWN-8BIT"}, + {2079, "csUnknown8BiT"}, + {2080, "MNEMONIC"}, + {2080, "csMnemonic"}, + {2081, "MNEM"}, + {2081, "csMnem"}, + {2082, "VISCII"}, + {2082, "csVISCII"}, + {2083, "VIQR"}, + {2083, "csVIQR"}, + {2084, "KOI8-R"}, + {2084, "csKOI8R"}, + {2085, "HZ-GB-2312"}, + {2086, "IBM866"}, + {2086, "866"}, + {2086, "cp866"}, + {2086, "csIBM866"}, + {2087, "IBM775"}, + {2087, "cp775"}, + {2087, "csPC775Baltic"}, + {2088, "KOI8-U"}, + {2088, "csKOI8U"}, + {2089, "IBM00858"}, + {2089, "CCSID00858"}, + {2089, "CP00858"}, + {2089, "PC-Multilingual-850+euro"}, + {2089, "csIBM00858"}, + {2090, "IBM00924"}, + {2090, "CCSID00924"}, + {2090, "CP00924"}, + {2090, "csIBM00924"}, + {2090, "ebcdic-Latin9--euro"}, + {2091, "IBM01140"}, + {2091, "CCSID01140"}, + {2091, "CP01140"}, + {2091, "csIBM01140"}, + {2091, "ebcdic-us-37+euro"}, + {2092, "IBM01141"}, + {2092, "CCSID01141"}, + {2092, "CP01141"}, + {2092, "csIBM01141"}, + {2092, "ebcdic-de-273+euro"}, + {2093, "IBM01142"}, + {2093, "CCSID01142"}, + {2093, "CP01142"}, + {2093, "csIBM01142"}, + {2093, "ebcdic-dk-277+euro"}, + {2093, "ebcdic-no-277+euro"}, + {2094, "IBM01143"}, + {2094, "CCSID01143"}, + {2094, "CP01143"}, + {2094, "csIBM01143"}, + {2094, "ebcdic-fi-278+euro"}, + {2094, "ebcdic-se-278+euro"}, + {2095, "IBM01144"}, + {2095, "CCSID01144"}, + {2095, "CP01144"}, + {2095, "csIBM01144"}, + {2095, "ebcdic-it-280+euro"}, + {2096, "IBM01145"}, + {2096, "CCSID01145"}, + {2096, "CP01145"}, + {2096, "csIBM01145"}, + {2096, "ebcdic-es-284+euro"}, + {2097, "IBM01146"}, + {2097, "CCSID01146"}, + {2097, "CP01146"}, + {2097, "csIBM01146"}, + {2097, "ebcdic-gb-285+euro"}, + {2098, "IBM01147"}, + {2098, "CCSID01147"}, + {2098, "CP01147"}, + {2098, "csIBM01147"}, + {2098, "ebcdic-fr-297+euro"}, + {2099, "IBM01148"}, + {2099, "CCSID01148"}, + {2099, "CP01148"}, + {2099, "csIBM01148"}, + {2099, "ebcdic-international-500+euro"}, + {2100, "IBM01149"}, + {2100, "CCSID01149"}, + {2100, "CP01149"}, + {2100, "csIBM01149"}, + {2100, "ebcdic-is-871+euro"}, + {2101, "Big5-HKSCS"}, + {2101, "csBig5HKSCS"}, + {2102, "IBM1047"}, + {2102, "IBM-1047"}, + {2102, "csIBM1047"}, + {2103, "PTCP154"}, + {2103, "CP154"}, + {2103, "Cyrillic-Asian"}, + {2103, "PT154"}, + {2103, "csPTCP154"}, + {2104, "Amiga-1251"}, + {2104, "Ami-1251"}, + {2104, "Ami1251"}, + {2104, "Amiga1251"}, + {2104, "csAmiga1251"}, + {2105, "KOI7-switched"}, + {2105, "csKOI7switched"}, + {2106, "BRF"}, + {2106, "csBRF"}, + {2107, "TSCII"}, + {2107, "csTSCII"}, + {2108, "CP51932"}, + {2108, "csCP51932"}, + {2109, "windows-874"}, + {2109, "cswindows874"}, + {2250, "windows-1250"}, + {2250, "cswindows1250"}, + {2251, "windows-1251"}, + {2251, "cswindows1251"}, + {2252, "windows-1252"}, + {2252, "cswindows1252"}, + {2253, "windows-1253"}, + {2253, "cswindows1253"}, + {2254, "windows-1254"}, + {2254, "cswindows1254"}, + {2255, "windows-1255"}, + {2255, "cswindows1255"}, + {2256, "windows-1256"}, + {2256, "cswindows1256"}, + {2257, "windows-1257"}, + {2257, "cswindows1257"}, + {2258, "windows-1258"}, + {2258, "cswindows1258"}, + {2259, "TIS-620"}, + {2259, "ISO-8859-11"}, + {2259, "csTIS620"}, + {2260, "CP50220"}, + {2260, "csCP50220"}}; + +#endif // _LIBCPP_TEST_TEXT_ENCODING_H diff --git a/libcxx/test/std/text/text_encoding/text_encoding.ctor/default.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.ctor/default.pass.cpp new file mode 100644 index 0000000000000..23757d82d8811 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.ctor/default.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::text_encoding() noexcept + +#include +#include +#include +#include + +constexpr bool test() { + std::text_encoding te{}; + assert(te.mib() == std::text_encoding::unknown); + assert(std::string_view("") == te.name()); + + return true; +} + +int main(int, char**) { + // 1. Default constructor must be nothrow + static_assert(std::is_nothrow_default_constructible_v, "Must be nothrow default constructible"); + + // 2. Default constructing a text_encoding object makes it so that mib() == id::unknown, and its name is empty + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.ctor/id.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.ctor/id.pass.cpp new file mode 100644 index 0000000000000..bc3d729e770e3 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.ctor/id.pass.cpp @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::text_encoding(std::text_encoding::id) noexcept + +#include +#include +#include +#include +#include +#include + +#include "../test_text_encoding.h" + +using id = std::text_encoding::id; + +constexpr bool test() { + { + // 2. Constructing an object with a valid id must set mib() and the name to the corresponding value. + for (auto pair : unique_encoding_data) { + std::text_encoding te{id(pair.mib)}; + + assert(te.mib() == id(pair.mib)); + assert(pair.name == te.name()); + assert(std::ranges::contains(te.aliases(), pair.name)); + } + } + + { + // 3. Constructing an object using id::unknown or id::other must set mib() to id::unknown or id::other, respectively, and the name to an empty string. + { + std::text_encoding te{id::other}; + + assert(te.mib() == id::other); + assert(std::string_view("") == te.name()); + assert(std::ranges::empty(te.aliases())); + } + + { + std::text_encoding te{id::unknown}; + + assert(te.mib() == id::unknown); + assert(std::string_view("") == te.name()); + assert(std::ranges::empty(te.aliases())); + } + } + + return true; +} + +int main(int, char**) { + // 1. text_encoding(id) must be nothrow + static_assert(std::is_nothrow_constructible_v, + "Must be nothrow constructible with id"); + + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.ctor/string_view.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.ctor/string_view.pass.cpp new file mode 100644 index 0000000000000..c3202f78642f2 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.ctor/string_view.pass.cpp @@ -0,0 +1,73 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-steps): -fconstexpr-steps=40000000 +// ADDITIONAL_COMPILE_FLAGS(has-fconstexpr-ops-limit): -fconstexpr-ops-limit=1000000000 + +// + +// text_encoding::text_encoding(string_view) noexcept + +#include +#include +#include +#include + +#include "../test_text_encoding.h" + +using id = std::text_encoding::id; + +constexpr void test_ctor(std::string_view str, id expect_id) { + std::text_encoding te{str}; + + assert(te.mib() == expect_id); + assert(te.name() == str); +} + +constexpr bool test() { + // The first encoding name for each mib in the data table. + for (auto& data : unique_encoding_data) { + std::text_encoding te{data.name}; + + assert(te.mib() == id(data.mib)); + assert(te.name() == data.name); + } + + // Names that should all result in an "other" text encoding + for (auto& name : other_names) { + std::text_encoding te{name}; + + assert(te.mib() == std::text_encoding::other); + assert(te.name() == name); + } + + test_ctor("U_T_F-8", id::UTF8); + test_ctor("utf8", id::UTF8); + test_ctor("u.t.f-008", id::UTF8); + test_ctor("utf-80", id::other); + test_ctor("iso885931988", id::ISOLatin3); + test_ctor("iso00885931988", id::ISOLatin3); + + return true; +} + +int main(int, char**) { + static_assert(std::is_nothrow_constructible_v, + "Must be nothrow constructible with string_view"); + + test(); + static_assert(test()); + + // Check every possible alias, intentionally runtime only as it would take unreasonably long to test in constexpr. + for (auto& enc : all_encoding_data) { + test_ctor(enc.name, id(enc.mib)); + } + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.id.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.id.pass.cpp new file mode 100644 index 0000000000000..79de614299fa0 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.id.pass.cpp @@ -0,0 +1,61 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// bool text_encoding::operator==(const text_encoding&, id) noexcept + +#include +#include + +#include "test_macros.h" +#include "../test_text_encoding.h" + +using id = std::text_encoding::id; + +constexpr void test_primary_encodings() { + for (auto& data : unique_encoding_data) { + std::text_encoding te{id(data.mib)}; + assert(te == id(data.mib)); + } +} + +constexpr bool test() { + // operator==(const text_encoding&, id) must be noexcept and returns bool + { + std::text_encoding te{}; + ASSERT_SAME_TYPE(decltype(te == id::UTF8), bool); + ASSERT_NOEXCEPT(te == id::UTF8); + } + + // operator==(const text_encoding&, id) returns true if mib() is equal to the id + { + test_primary_encodings(); + + // unknown, other cases + assert(std::text_encoding() == id::unknown); + assert(std::text_encoding(id::unknown) == id::unknown); + assert(std::text_encoding(id::other) == id::other); + } + + // operator==(const text_encoding&, id) returns false if mib() is not equal to the id + { + assert(!(std::text_encoding(id::UTF8) == id::UTF16)); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.pass.cpp new file mode 100644 index 0000000000000..5041801fcb3e8 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.eq/equal.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// bool text_encoding::operator==(const text_encoding&, const text_encoding&) noexcept + +#include +#include + +#include "test_macros.h" + +using id = std::text_encoding::id; + +constexpr bool test() { + // 1. operator==(const text_encoding&, const text_encoding&) must be noexcept and returns bool + { + ASSERT_NOEXCEPT(std::text_encoding() == std::text_encoding()); + ASSERT_SAME_TYPE(bool, decltype(std::text_encoding() == std::text_encoding())); + } + + // 2. operator==(const text_encoding&, const text_encoding&) returns true if both text_encoding ids are equal + { + std::text_encoding te1{id::UTF8}; + std::text_encoding te2{id::UTF8}; + assert(te1 == te2); + assert((te1 == te2) == !(te1 != te2)); + + const std::text_encoding& te11 = te1; + const std::text_encoding& te22 = te2; + assert(te11 == te22); + assert((te11 == te22) == !(te11 != te22)); + } + + // 3. operator==(const text_encoding&, const text_encoding&) for text_encodings with ids of "other" return true if the names are equal + { + std::text_encoding other_te1{"foo"}; + std::text_encoding other_te2{"foo"}; + assert(other_te1 == other_te2); + } + + // 4. operator==(const text_encoding&, const text_encoding&) returns false when comparing text_encodings with different ids + { + std::text_encoding te1{id::UTF8}; + std::text_encoding te2{id::UTF16}; + assert(!(te1 == te2)); + } + + // 5. operator==(const text_encoding&, const text_encoding&) for text_encodings with ids of "other" returns false if the names are not equal + { + std::text_encoding other_te1("foo"); + std::text_encoding other_te2("bar"); + assert(!(other_te1 == other_te2)); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.hash/enabled_hash.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.hash/enabled_hash.pass.cpp new file mode 100644 index 0000000000000..642b67fa3c08c --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.hash/enabled_hash.pass.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// template <> struct hash + +#include + +#include "poisoned_hash_helper.h" + +int main(int, char**) { + test_library_hash_specializations_available(); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.hash/hash.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.hash/hash.pass.cpp new file mode 100644 index 0000000000000..82ca786174ad7 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.hash/hash.pass.cpp @@ -0,0 +1,60 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// template <> struct hash + +#include "poisoned_hash_helper.h" +#include +#include +#include +#include + +void test_te_hash() { + using T = std::text_encoding; + using H = std::hash; + + { + const T te(T::ASCII); + const H h{}; + assert(h(te) == h(te)); + static_assert(std::is_same_v); + } + + { + const T te1(T::ASCII); + const T te2(T::UTF8); + const H h{}; + + assert(h(te1) != h(te2)); + } + + { + const T te1(T::unknown); + const T te2(T::unknown); + const H h{}; + assert(h(te1) == h(te2)); + } + + { + const T te1(T::other); + const T te2(T::other); + const H h{}; + assert(h(te1) == h(te2)); + } +} + +int main(int, char**) { + test_te_hash(); + test_hash_enabled(); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/aliases.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/aliases.pass.cpp new file mode 100644 index 0000000000000..bf616788fe449 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/aliases.pass.cpp @@ -0,0 +1,42 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// constexpr text_encoding::aliases() + +#include +#include +#include + +#include "test_macros.h" + +constexpr bool test() { + static_assert(noexcept(std::text_encoding().aliases())); + ASSERT_SAME_TYPE(decltype(std::text_encoding().aliases()), std::text_encoding::aliases_view); + + // 2 aliases + std::text_encoding utf8{std::text_encoding::UTF8}; + + auto aliases = utf8.aliases(); + + assert(aliases.size() == 2); + assert(std::string_view(aliases[0]) == "UTF-8"); + assert(std::string_view(aliases[1]) == "csUTF8"); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/aliases_view.compile.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/aliases_view.compile.pass.cpp new file mode 100644 index 0000000000000..711ad99476e77 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/aliases_view.compile.pass.cpp @@ -0,0 +1,23 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::aliases_view; + +#include +#include + +static_assert(std::copyable); +static_assert(std::ranges::view); +static_assert(std::ranges::random_access_range); +static_assert(std::ranges::borrowed_range); +static_assert(std::same_as, const char*>); +static_assert(std::same_as, const char*>); diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp new file mode 100644 index 0000000000000..113b151765c81 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 +// REQUIRES: locale.en_US.UTF-8 + +// UNSUPPORTED: no-localization +// UNSUPPORTED: availability-te-environment-missing + +// + +// text_encoding text_encoding::environment(); + +#include +#include +#include +#include +#include + +#include "platform_support.h" +#include "test_macros.h" + +int main(int, char**) { + auto check_env = []() { +#if defined(__ANDROID__) + constexpr std::text_encoding::id expected_id = std::text_encoding::UTF8; +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) + constexpr std::text_encoding::id expected_id = std::text_encoding::ASCII; +#elif defined(_WIN32) + constexpr std::text_encoding::id expected_id = std::text_encoding::windows1252; +#elif defined(_AIX) + constexpr std::text_encoding::id expected_id = std::text_encoding::ISOLatin1; +#else + constexpr std::text_encoding::id expected_id = std::text_encoding::unknown; +#endif + + std::same_as decltype(auto) te = std::text_encoding::environment(); + + bool fail = false; + if (te != expected_id) { + std::cerr << std::format( + "Environment mismatch: Expected ID {}, received: {{{},{}}}\n", int(expected_id), int(te.mib()), te.name()); + fail = true; + } + std::same_as decltype(auto) env_is_expected = std::text_encoding::environment_is(); + if (!env_is_expected) { + fail = true; + } + + return !fail; + }; + + { + // 1. Depending on the platform's default, verify that environment() returns the corresponding text encoding. + assert(check_env()); + } + + auto te = std::text_encoding::environment(); + // 2. text_encoding::environment()'s return value isn't altered by changes to locale. + { + std::setlocale(LC_ALL, LOCALE_en_US_UTF_8); + + auto te2 = std::text_encoding::environment(); + assert(te == te2); + } + + { + std::setlocale(LC_CTYPE, LOCALE_en_US_UTF_8); + + auto te2 = std::text_encoding::environment(); + assert(te == te2); + } + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/id.compile.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/id.compile.pass.cpp new file mode 100644 index 0000000000000..65a7bddea3afa --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/id.compile.pass.cpp @@ -0,0 +1,281 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// std::text_encoding::id; +// Test that every member of the id enumeration has the correct value. + +#include +#include +#include + +using std::int_least32_t; + +static_assert(std::is_scoped_enum_v); +static_assert(std::is_same_v, std::int_least32_t>); +static_assert(int_least32_t(std::text_encoding::id::other) == 1); +static_assert(int_least32_t(std::text_encoding::id::unknown) == 2); +static_assert(int_least32_t(std::text_encoding::id::ASCII) == 3); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin1) == 4); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin2) == 5); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin3) == 6); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin4) == 7); +static_assert(int_least32_t(std::text_encoding::id::ISOLatinCyrillic) == 8); +static_assert(int_least32_t(std::text_encoding::id::ISOLatinArabic) == 9); +static_assert(int_least32_t(std::text_encoding::id::ISOLatinGreek) == 10); +static_assert(int_least32_t(std::text_encoding::id::ISOLatinHebrew) == 11); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin5) == 12); +static_assert(int_least32_t(std::text_encoding::id::ISOLatin6) == 13); +static_assert(int_least32_t(std::text_encoding::id::ISOTextComm) == 14); +static_assert(int_least32_t(std::text_encoding::id::HalfWidthKatakana) == 15); +static_assert(int_least32_t(std::text_encoding::id::JISEncoding) == 16); +static_assert(int_least32_t(std::text_encoding::id::ShiftJIS) == 17); +static_assert(int_least32_t(std::text_encoding::id::EUCPkdFmtJapanese) == 18); +static_assert(int_least32_t(std::text_encoding::id::EUCFixWidJapanese) == 19); +static_assert(int_least32_t(std::text_encoding::id::ISO4UnitedKingdom) == 20); +static_assert(int_least32_t(std::text_encoding::id::ISO11SwedishForNames) == 21); +static_assert(int_least32_t(std::text_encoding::id::ISO15Italian) == 22); +static_assert(int_least32_t(std::text_encoding::id::ISO17Spanish) == 23); +static_assert(int_least32_t(std::text_encoding::id::ISO21German) == 24); +static_assert(int_least32_t(std::text_encoding::id::ISO60DanishNorwegian) == 25); +static_assert(int_least32_t(std::text_encoding::id::ISO69French) == 26); +static_assert(int_least32_t(std::text_encoding::id::ISO10646UTF1) == 27); +static_assert(int_least32_t(std::text_encoding::id::ISO646basic1983) == 28); +static_assert(int_least32_t(std::text_encoding::id::INVARIANT) == 29); +static_assert(int_least32_t(std::text_encoding::id::ISO2IntlRefVersion) == 30); +static_assert(int_least32_t(std::text_encoding::id::NATSSEFI) == 31); +static_assert(int_least32_t(std::text_encoding::id::NATSSEFIADD) == 32); +static_assert(int_least32_t(std::text_encoding::id::ISO10Swedish) == 35); +static_assert(int_least32_t(std::text_encoding::id::KSC56011987) == 36); +static_assert(int_least32_t(std::text_encoding::id::ISO2022KR) == 37); +static_assert(int_least32_t(std::text_encoding::id::EUCKR) == 38); +static_assert(int_least32_t(std::text_encoding::id::ISO2022JP) == 39); +static_assert(int_least32_t(std::text_encoding::id::ISO2022JP2) == 40); +static_assert(int_least32_t(std::text_encoding::id::ISO13JISC6220jp) == 41); +static_assert(int_least32_t(std::text_encoding::id::ISO14JISC6220ro) == 42); +static_assert(int_least32_t(std::text_encoding::id::ISO16Portuguese) == 43); +static_assert(int_least32_t(std::text_encoding::id::ISO18Greek7Old) == 44); +static_assert(int_least32_t(std::text_encoding::id::ISO19LatinGreek) == 45); +static_assert(int_least32_t(std::text_encoding::id::ISO25French) == 46); +static_assert(int_least32_t(std::text_encoding::id::ISO27LatinGreek1) == 47); +static_assert(int_least32_t(std::text_encoding::id::ISO5427Cyrillic) == 48); +static_assert(int_least32_t(std::text_encoding::id::ISO42JISC62261978) == 49); +static_assert(int_least32_t(std::text_encoding::id::ISO47BSViewdata) == 50); +static_assert(int_least32_t(std::text_encoding::id::ISO49INIS) == 51); +static_assert(int_least32_t(std::text_encoding::id::ISO50INIS8) == 52); +static_assert(int_least32_t(std::text_encoding::id::ISO51INISCyrillic) == 53); +static_assert(int_least32_t(std::text_encoding::id::ISO54271981) == 54); +static_assert(int_least32_t(std::text_encoding::id::ISO5428Greek) == 55); +static_assert(int_least32_t(std::text_encoding::id::ISO57GB1988) == 56); +static_assert(int_least32_t(std::text_encoding::id::ISO58GB231280) == 57); +static_assert(int_least32_t(std::text_encoding::id::ISO61Norwegian2) == 58); +static_assert(int_least32_t(std::text_encoding::id::ISO70VideotexSupp1) == 59); +static_assert(int_least32_t(std::text_encoding::id::ISO84Portuguese2) == 60); +static_assert(int_least32_t(std::text_encoding::id::ISO85Spanish2) == 61); +static_assert(int_least32_t(std::text_encoding::id::ISO86Hungarian) == 62); +static_assert(int_least32_t(std::text_encoding::id::ISO87JISX0208) == 63); +static_assert(int_least32_t(std::text_encoding::id::ISO88Greek7) == 64); +static_assert(int_least32_t(std::text_encoding::id::ISO89ASMO449) == 65); +static_assert(int_least32_t(std::text_encoding::id::ISO90) == 66); +static_assert(int_least32_t(std::text_encoding::id::ISO91JISC62291984a) == 67); +static_assert(int_least32_t(std::text_encoding::id::ISO92JISC62991984b) == 68); +static_assert(int_least32_t(std::text_encoding::id::ISO93JIS62291984badd) == 69); +static_assert(int_least32_t(std::text_encoding::id::ISO94JIS62291984hand) == 70); +static_assert(int_least32_t(std::text_encoding::id::ISO95JIS62291984handadd) == 71); +static_assert(int_least32_t(std::text_encoding::id::ISO96JISC62291984kana) == 72); +static_assert(int_least32_t(std::text_encoding::id::ISO2033) == 73); +static_assert(int_least32_t(std::text_encoding::id::ISO99NAPLPS) == 74); +static_assert(int_least32_t(std::text_encoding::id::ISO102T617bit) == 75); +static_assert(int_least32_t(std::text_encoding::id::ISO103T618bit) == 76); +static_assert(int_least32_t(std::text_encoding::id::ISO111ECMACyrillic) == 77); +static_assert(int_least32_t(std::text_encoding::id::ISO121Canadian1) == 78); +static_assert(int_least32_t(std::text_encoding::id::ISO122Canadian2) == 79); +static_assert(int_least32_t(std::text_encoding::id::ISO123CSAZ24341985gr) == 80); +static_assert(int_least32_t(std::text_encoding::id::ISO88596E) == 81); +static_assert(int_least32_t(std::text_encoding::id::ISO88596I) == 82); +static_assert(int_least32_t(std::text_encoding::id::ISO128T101G2) == 83); +static_assert(int_least32_t(std::text_encoding::id::ISO88598E) == 84); +static_assert(int_least32_t(std::text_encoding::id::ISO88598I) == 85); +static_assert(int_least32_t(std::text_encoding::id::ISO139CSN369103) == 86); +static_assert(int_least32_t(std::text_encoding::id::ISO141JUSIB1002) == 87); +static_assert(int_least32_t(std::text_encoding::id::ISO143IECP271) == 88); +static_assert(int_least32_t(std::text_encoding::id::ISO146Serbian) == 89); +static_assert(int_least32_t(std::text_encoding::id::ISO147Macedonian) == 90); +static_assert(int_least32_t(std::text_encoding::id::ISO150) == 91); +static_assert(int_least32_t(std::text_encoding::id::ISO151Cuba) == 92); +static_assert(int_least32_t(std::text_encoding::id::ISO6937Add) == 93); +static_assert(int_least32_t(std::text_encoding::id::ISO153GOST1976874) == 94); +static_assert(int_least32_t(std::text_encoding::id::ISO8859Supp) == 95); +static_assert(int_least32_t(std::text_encoding::id::ISO10367Box) == 96); +static_assert(int_least32_t(std::text_encoding::id::ISO158Lap) == 97); +static_assert(int_least32_t(std::text_encoding::id::ISO159JISX02121990) == 98); +static_assert(int_least32_t(std::text_encoding::id::ISO646Danish) == 99); +static_assert(int_least32_t(std::text_encoding::id::USDK) == 100); +static_assert(int_least32_t(std::text_encoding::id::DKUS) == 101); +static_assert(int_least32_t(std::text_encoding::id::KSC5636) == 102); +static_assert(int_least32_t(std::text_encoding::id::Unicode11UTF7) == 103); +static_assert(int_least32_t(std::text_encoding::id::ISO2022CN) == 104); +static_assert(int_least32_t(std::text_encoding::id::ISO2022CNEXT) == 105); +static_assert(int_least32_t(std::text_encoding::id::UTF8) == 106); +static_assert(int_least32_t(std::text_encoding::id::ISO885913) == 109); +static_assert(int_least32_t(std::text_encoding::id::ISO885914) == 110); +static_assert(int_least32_t(std::text_encoding::id::ISO885915) == 111); +static_assert(int_least32_t(std::text_encoding::id::ISO885916) == 112); +static_assert(int_least32_t(std::text_encoding::id::GBK) == 113); +static_assert(int_least32_t(std::text_encoding::id::GB18030) == 114); +static_assert(int_least32_t(std::text_encoding::id::OSDEBCDICDF0415) == 115); +static_assert(int_least32_t(std::text_encoding::id::OSDEBCDICDF03IRV) == 116); +static_assert(int_least32_t(std::text_encoding::id::OSDEBCDICDF041) == 117); +static_assert(int_least32_t(std::text_encoding::id::ISO115481) == 118); +static_assert(int_least32_t(std::text_encoding::id::KZ1048) == 119); +static_assert(int_least32_t(std::text_encoding::id::UCS2) == 1000); +static_assert(int_least32_t(std::text_encoding::id::UCS4) == 1001); +static_assert(int_least32_t(std::text_encoding::id::UnicodeASCII) == 1002); +static_assert(int_least32_t(std::text_encoding::id::UnicodeLatin1) == 1003); +static_assert(int_least32_t(std::text_encoding::id::UnicodeJapanese) == 1004); +static_assert(int_least32_t(std::text_encoding::id::UnicodeIBM1261) == 1005); +static_assert(int_least32_t(std::text_encoding::id::UnicodeIBM1268) == 1006); +static_assert(int_least32_t(std::text_encoding::id::UnicodeIBM1276) == 1007); +static_assert(int_least32_t(std::text_encoding::id::UnicodeIBM1264) == 1008); +static_assert(int_least32_t(std::text_encoding::id::UnicodeIBM1265) == 1009); +static_assert(int_least32_t(std::text_encoding::id::Unicode11) == 1010); +static_assert(int_least32_t(std::text_encoding::id::SCSU) == 1011); +static_assert(int_least32_t(std::text_encoding::id::UTF7) == 1012); +static_assert(int_least32_t(std::text_encoding::id::UTF16BE) == 1013); +static_assert(int_least32_t(std::text_encoding::id::UTF16LE) == 1014); +static_assert(int_least32_t(std::text_encoding::id::UTF16) == 1015); +static_assert(int_least32_t(std::text_encoding::id::CESU8) == 1016); +static_assert(int_least32_t(std::text_encoding::id::UTF32) == 1017); +static_assert(int_least32_t(std::text_encoding::id::UTF32BE) == 1018); +static_assert(int_least32_t(std::text_encoding::id::UTF32LE) == 1019); +static_assert(int_least32_t(std::text_encoding::id::BOCU1) == 1020); +static_assert(int_least32_t(std::text_encoding::id::UTF7IMAP) == 1021); +static_assert(int_least32_t(std::text_encoding::id::Windows30Latin1) == 2000); +static_assert(int_least32_t(std::text_encoding::id::Windows31Latin1) == 2001); +static_assert(int_least32_t(std::text_encoding::id::Windows31Latin2) == 2002); +static_assert(int_least32_t(std::text_encoding::id::Windows31Latin5) == 2003); +static_assert(int_least32_t(std::text_encoding::id::HPRoman8) == 2004); +static_assert(int_least32_t(std::text_encoding::id::AdobeStandardEncoding) == 2005); +static_assert(int_least32_t(std::text_encoding::id::VenturaUS) == 2006); +static_assert(int_least32_t(std::text_encoding::id::VenturaInternational) == 2007); +static_assert(int_least32_t(std::text_encoding::id::DECMCS) == 2008); +static_assert(int_least32_t(std::text_encoding::id::PC850Multilingual) == 2009); +static_assert(int_least32_t(std::text_encoding::id::PC8DanishNorwegian) == 2012); +static_assert(int_least32_t(std::text_encoding::id::PC862LatinHebrew) == 2013); +static_assert(int_least32_t(std::text_encoding::id::PC8Turkish) == 2014); +static_assert(int_least32_t(std::text_encoding::id::IBMSymbols) == 2015); +static_assert(int_least32_t(std::text_encoding::id::IBMThai) == 2016); +static_assert(int_least32_t(std::text_encoding::id::HPLegal) == 2017); +static_assert(int_least32_t(std::text_encoding::id::HPPiFont) == 2018); +static_assert(int_least32_t(std::text_encoding::id::HPMath8) == 2019); +static_assert(int_least32_t(std::text_encoding::id::HPPSMath) == 2020); +static_assert(int_least32_t(std::text_encoding::id::HPDesktop) == 2021); +static_assert(int_least32_t(std::text_encoding::id::VenturaMath) == 2022); +static_assert(int_least32_t(std::text_encoding::id::MicrosoftPublishing) == 2023); +static_assert(int_least32_t(std::text_encoding::id::Windows31J) == 2024); +static_assert(int_least32_t(std::text_encoding::id::GB2312) == 2025); +static_assert(int_least32_t(std::text_encoding::id::Big5) == 2026); +static_assert(int_least32_t(std::text_encoding::id::Macintosh) == 2027); +static_assert(int_least32_t(std::text_encoding::id::IBM037) == 2028); +static_assert(int_least32_t(std::text_encoding::id::IBM038) == 2029); +static_assert(int_least32_t(std::text_encoding::id::IBM273) == 2030); +static_assert(int_least32_t(std::text_encoding::id::IBM274) == 2031); +static_assert(int_least32_t(std::text_encoding::id::IBM275) == 2032); +static_assert(int_least32_t(std::text_encoding::id::IBM277) == 2033); +static_assert(int_least32_t(std::text_encoding::id::IBM278) == 2034); +static_assert(int_least32_t(std::text_encoding::id::IBM280) == 2035); +static_assert(int_least32_t(std::text_encoding::id::IBM281) == 2036); +static_assert(int_least32_t(std::text_encoding::id::IBM284) == 2037); +static_assert(int_least32_t(std::text_encoding::id::IBM285) == 2038); +static_assert(int_least32_t(std::text_encoding::id::IBM290) == 2039); +static_assert(int_least32_t(std::text_encoding::id::IBM297) == 2040); +static_assert(int_least32_t(std::text_encoding::id::IBM420) == 2041); +static_assert(int_least32_t(std::text_encoding::id::IBM423) == 2042); +static_assert(int_least32_t(std::text_encoding::id::IBM424) == 2043); +static_assert(int_least32_t(std::text_encoding::id::PC8CodePage437) == 2011); +static_assert(int_least32_t(std::text_encoding::id::IBM500) == 2044); +static_assert(int_least32_t(std::text_encoding::id::IBM851) == 2045); +static_assert(int_least32_t(std::text_encoding::id::PCp852) == 2010); +static_assert(int_least32_t(std::text_encoding::id::IBM855) == 2046); +static_assert(int_least32_t(std::text_encoding::id::IBM857) == 2047); +static_assert(int_least32_t(std::text_encoding::id::IBM860) == 2048); +static_assert(int_least32_t(std::text_encoding::id::IBM861) == 2049); +static_assert(int_least32_t(std::text_encoding::id::IBM863) == 2050); +static_assert(int_least32_t(std::text_encoding::id::IBM864) == 2051); +static_assert(int_least32_t(std::text_encoding::id::IBM865) == 2052); +static_assert(int_least32_t(std::text_encoding::id::IBM868) == 2053); +static_assert(int_least32_t(std::text_encoding::id::IBM869) == 2054); +static_assert(int_least32_t(std::text_encoding::id::IBM870) == 2055); +static_assert(int_least32_t(std::text_encoding::id::IBM871) == 2056); +static_assert(int_least32_t(std::text_encoding::id::IBM880) == 2057); +static_assert(int_least32_t(std::text_encoding::id::IBM891) == 2058); +static_assert(int_least32_t(std::text_encoding::id::IBM903) == 2059); +static_assert(int_least32_t(std::text_encoding::id::IBM904) == 2060); +static_assert(int_least32_t(std::text_encoding::id::IBM905) == 2061); +static_assert(int_least32_t(std::text_encoding::id::IBM918) == 2062); +static_assert(int_least32_t(std::text_encoding::id::IBM1026) == 2063); +static_assert(int_least32_t(std::text_encoding::id::IBMEBCDICATDE) == 2064); +static_assert(int_least32_t(std::text_encoding::id::EBCDICATDEA) == 2065); +static_assert(int_least32_t(std::text_encoding::id::EBCDICCAFR) == 2066); +static_assert(int_least32_t(std::text_encoding::id::EBCDICDKNO) == 2067); +static_assert(int_least32_t(std::text_encoding::id::EBCDICDKNOA) == 2068); +static_assert(int_least32_t(std::text_encoding::id::EBCDICFISE) == 2069); +static_assert(int_least32_t(std::text_encoding::id::EBCDICFISEA) == 2070); +static_assert(int_least32_t(std::text_encoding::id::EBCDICFR) == 2071); +static_assert(int_least32_t(std::text_encoding::id::EBCDICIT) == 2072); +static_assert(int_least32_t(std::text_encoding::id::EBCDICPT) == 2073); +static_assert(int_least32_t(std::text_encoding::id::EBCDICES) == 2074); +static_assert(int_least32_t(std::text_encoding::id::EBCDICESA) == 2075); +static_assert(int_least32_t(std::text_encoding::id::EBCDICESS) == 2076); +static_assert(int_least32_t(std::text_encoding::id::EBCDICUK) == 2077); +static_assert(int_least32_t(std::text_encoding::id::EBCDICUS) == 2078); +static_assert(int_least32_t(std::text_encoding::id::Unknown8BiT) == 2079); +static_assert(int_least32_t(std::text_encoding::id::Mnemonic) == 2080); +static_assert(int_least32_t(std::text_encoding::id::Mnem) == 2081); +static_assert(int_least32_t(std::text_encoding::id::VISCII) == 2082); +static_assert(int_least32_t(std::text_encoding::id::VIQR) == 2083); +static_assert(int_least32_t(std::text_encoding::id::KOI8R) == 2084); +static_assert(int_least32_t(std::text_encoding::id::HZGB2312) == 2085); +static_assert(int_least32_t(std::text_encoding::id::IBM866) == 2086); +static_assert(int_least32_t(std::text_encoding::id::PC775Baltic) == 2087); +static_assert(int_least32_t(std::text_encoding::id::KOI8U) == 2088); +static_assert(int_least32_t(std::text_encoding::id::IBM00858) == 2089); +static_assert(int_least32_t(std::text_encoding::id::IBM00924) == 2090); +static_assert(int_least32_t(std::text_encoding::id::IBM01140) == 2091); +static_assert(int_least32_t(std::text_encoding::id::IBM01141) == 2092); +static_assert(int_least32_t(std::text_encoding::id::IBM01142) == 2093); +static_assert(int_least32_t(std::text_encoding::id::IBM01143) == 2094); +static_assert(int_least32_t(std::text_encoding::id::IBM01144) == 2095); +static_assert(int_least32_t(std::text_encoding::id::IBM01145) == 2096); +static_assert(int_least32_t(std::text_encoding::id::IBM01146) == 2097); +static_assert(int_least32_t(std::text_encoding::id::IBM01147) == 2098); +static_assert(int_least32_t(std::text_encoding::id::IBM01148) == 2099); +static_assert(int_least32_t(std::text_encoding::id::IBM01149) == 2100); +static_assert(int_least32_t(std::text_encoding::id::Big5HKSCS) == 2101); +static_assert(int_least32_t(std::text_encoding::id::IBM1047) == 2102); +static_assert(int_least32_t(std::text_encoding::id::PTCP154) == 2103); +static_assert(int_least32_t(std::text_encoding::id::Amiga1251) == 2104); +static_assert(int_least32_t(std::text_encoding::id::KOI7switched) == 2105); +static_assert(int_least32_t(std::text_encoding::id::BRF) == 2106); +static_assert(int_least32_t(std::text_encoding::id::TSCII) == 2107); +static_assert(int_least32_t(std::text_encoding::id::CP51932) == 2108); +static_assert(int_least32_t(std::text_encoding::id::windows874) == 2109); +static_assert(int_least32_t(std::text_encoding::id::windows1250) == 2250); +static_assert(int_least32_t(std::text_encoding::id::windows1251) == 2251); +static_assert(int_least32_t(std::text_encoding::id::windows1252) == 2252); +static_assert(int_least32_t(std::text_encoding::id::windows1253) == 2253); +static_assert(int_least32_t(std::text_encoding::id::windows1254) == 2254); +static_assert(int_least32_t(std::text_encoding::id::windows1255) == 2255); +static_assert(int_least32_t(std::text_encoding::id::windows1256) == 2256); +static_assert(int_least32_t(std::text_encoding::id::windows1257) == 2257); +static_assert(int_least32_t(std::text_encoding::id::windows1258) == 2258); +static_assert(int_least32_t(std::text_encoding::id::TIS620) == 2259); +static_assert(int_least32_t(std::text_encoding::id::CP50220) == 2260); diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/literal.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/literal.pass.cpp new file mode 100644 index 0000000000000..949d286ad10fb --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/literal.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding text_encoding::literal() noexcept; + +#include +#include +#include + +constexpr bool test() { + std::same_as decltype(auto) te = std::text_encoding::literal(); +#ifdef __GNUC_EXECUTION_CHARSET_NAME + assert(std::string_view(te.name()) == std::string_view(__GNUC_EXECUTION_CHARSET_NAME)); +#elif defined(__clang_literal_encoding__) + assert(std::string_view(te.name()) == std::string_view(__clang_literal_encoding__)); +#else + assert(te.mib() = std::text_encoding::id::unknown); +#endif + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp new file mode 100644 index 0000000000000..8465196338b41 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/begin.pass.cpp @@ -0,0 +1,76 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// constexpr implementation-defined text_encoding::aliases_view::begin() const; + +#include +#include +#include + +#include "test_macros.h" + +constexpr bool test() { + // 1. begin() of an aliases_view from a single text_encoding object are the same. + { + std::text_encoding te{std::text_encoding::UTF8}; + + std::same_as decltype(auto) view1 = te.aliases(); + + auto view2 = te.aliases(); + ASSERT_NOEXCEPT(view1.begin()); + + assert(std::ranges::begin(view1) == std::ranges::begin(view2)); + assert(view1.begin() == view2.begin()); + } + + // 2. begin() of aliases_views of two text_encoding objects that represent the same ID but hold different names are the same. + { + std::text_encoding te1{"ANSI_X3.4-1968"}; + std::text_encoding te2{"ANSI_X3.4-1986"}; + + auto view1 = te1.aliases(); + auto view2 = te2.aliases(); + + assert(view1.begin() == view2.begin()); + assert(std::ranges::begin(view1) == std::ranges::begin(view2)); + } + + // 3. begin() of aliases_views of two text_encoding objects that represent different IDs are different. + { + std::text_encoding te1{std::text_encoding::UTF8}; + std::text_encoding te2{std::text_encoding::ASCII}; + + auto view1 = te1.aliases(); + auto view2 = te2.aliases(); + + assert(!(view1.begin() == view2.begin())); + assert(!(std::ranges::begin(view1) == std::ranges::begin(view2))); + } + + { + // 2 aliases + std::text_encoding te{std::text_encoding::UTF8}; + + auto view = te.aliases(); + + assert(view.begin() + 2 == view.end()); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp new file mode 100644 index 0000000000000..367f9216ef5ff --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/empty.pass.cpp @@ -0,0 +1,54 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// view_interface::empty() + +#include +#include +#include + +#include "../../test_text_encoding.h" + +using id = std::text_encoding::id; + +constexpr bool test() { + // 1. An alias_view of a text_encoding object for "other" and "unknown" are empty + { + std::text_encoding te_other{id::other}; + + auto other_range = te_other.aliases(); + assert(other_range.empty()); + } + + { + std::text_encoding te_unknown{id::unknown}; + + auto unknown_range = te_unknown.aliases(); + assert(unknown_range.empty()); + } + + // 2. An alias_view of a text_encoding object for a known encoding e.g. "UTF-8" is not empty + for (auto& data : unique_encoding_data) { + std::text_encoding te{id(data.mib)}; + std::text_encoding::aliases_view range = te.aliases(); + assert(!range.empty()); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/end.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/end.pass.cpp new file mode 100644 index 0000000000000..b99517fcf2400 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/end.pass.cpp @@ -0,0 +1,47 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::aliases_view::end() + +#include +#include + +#include "test_macros.h" + +constexpr bool test() { + { + std::text_encoding a_other{"foobar"}; + + std::text_encoding::aliases_view other_aliases = a_other.aliases(); + + // 1. begin() of an aliases_view of "other" is equal to end() + ASSERT_NOEXCEPT(other_aliases.end()); + assert(other_aliases.begin() == other_aliases.end()); + } + + { + std::text_encoding utf8{std::text_encoding::UTF8}; + + std::text_encoding::aliases_view aliases = utf8.aliases(); + + assert(aliases.begin() != aliases.end()); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp new file mode 100644 index 0000000000000..86b10aaa2d478 --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/front.pass.cpp @@ -0,0 +1,63 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::aliases_view::front() + +#include +#include +#include +#include + +using id = std::text_encoding::id; + +constexpr bool test() { + // 1. An aliases_view from a single text_encoding object returns the same front() + { + std::text_encoding te{id::UTF8}; + + auto view1 = te.aliases(); + auto view2 = te.aliases(); + + assert(std::string_view(view1.front()) == std::string_view(view2.front())); + } + + // 2. An aliases_views of two text_encoding objects that represent the same ID but hold different names return the same front() + { + std::text_encoding te1{"US-ASCII"}; + std::text_encoding te2{"ANSI_X3.4-1986"}; + + auto view1 = te1.aliases(); + auto view2 = te2.aliases(); + + assert(std::string_view(view1.front()) == std::string_view(view2.front())); + } + + // 3. An aliases_views of two text_encoding objects that represent different IDs return different front() + { + std::text_encoding te1{id::UTF8}; + std::text_encoding te2{id::ASCII}; + + auto view1 = te1.aliases(); + auto view2 = te2.aliases(); + + assert(!(std::string_view(view1.front()) == std::string_view(view2.front()))); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/index.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/index.pass.cpp new file mode 100644 index 0000000000000..d6376dbfa958f --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/index.pass.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::aliases_view::operator[] + +#include +#include +#include + +#include "test_macros.h" + +constexpr bool test() { + std::text_encoding te(std::text_encoding::id::UTF8); + std::text_encoding::aliases_view aliases = te.aliases(); + auto iter = aliases.begin(); + + ASSERT_SAME_TYPE(decltype(aliases[0]), const char*); + assert(aliases.size() >= 2 && "assumed below"); + assert(std::string_view(aliases[0]) == *iter); + assert(std::string_view(aliases[1]) == std::string_view(*(iter + 1))); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/iterator.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/iterator.pass.cpp new file mode 100644 index 0000000000000..ae9397983e9ef --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/iterator.pass.cpp @@ -0,0 +1,156 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// text_encoding::aliases_view::iterator (implementation-defined) +// +// Implementation is almost trivial, so everything is tested here. + +#include +#include +#include +#include +#include +#include + +#include "test_macros.h" + +constexpr bool test() { + // Test iterator operators. + std::text_encoding te{std::text_encoding::ASCII}; // 11 aliases + + auto i = te.aliases().begin(); + auto j = te.aliases().begin(); + auto k = te.aliases().end(); + + static_assert(std::three_way_comparable); + + { // iterator operator return types + ASSERT_SAME_TYPE(const char*, decltype(*i)); + ASSERT_SAME_TYPE(const char*, decltype(i[0])); + ASSERT_SAME_TYPE(decltype(i), decltype(i + 1)); + ASSERT_SAME_TYPE(decltype(i), decltype(1 + i)); + ASSERT_SAME_TYPE(decltype(i), decltype(i - 1)); + ASSERT_SAME_TYPE(std::add_lvalue_reference_t, decltype(++i)); + ASSERT_SAME_TYPE(decltype(i), decltype(i++)); + ASSERT_SAME_TYPE(std::add_lvalue_reference_t, decltype(--i)); + ASSERT_SAME_TYPE(decltype(i), decltype(i--)); + ASSERT_SAME_TYPE(std::add_lvalue_reference_t, decltype(i += 1)); + ASSERT_SAME_TYPE(std::add_lvalue_reference_t, decltype(i -= 1)); + ASSERT_SAME_TYPE(bool, decltype(i == j)); + ASSERT_SAME_TYPE(bool, decltype(i != j)); + ASSERT_SAME_TYPE(bool, decltype(i > j)); + ASSERT_SAME_TYPE(bool, decltype(i < j)); + ASSERT_SAME_TYPE(bool, decltype(i >= j)); + ASSERT_SAME_TYPE(bool, decltype(i <= j)); + ASSERT_SAME_TYPE(std::strong_ordering, decltype(i <=> j)); + } + { + ASSERT_NOEXCEPT(i == j); + ASSERT_NOEXCEPT(i != k); + ASSERT_NOEXCEPT(i <=> j); + ASSERT_NOEXCEPT(i > j); + ASSERT_NOEXCEPT(i < j); + ASSERT_NOEXCEPT(i >= j); + ASSERT_NOEXCEPT(i <= j); + assert(i == j); + assert(i != k); + assert(i <= j); + assert(i >= j); + assert(i <=> j == std::strong_ordering::equal); + assert(std::string_view(*i) == std::string_view(*j)); + } + { + ASSERT_NOEXCEPT(*i); + ASSERT_NOEXCEPT(i[0]); + assert(std::string_view(i[0]) == std::string_view(j[0])); + assert(std::string_view(i[1]) != std::string_view(j[3])); + } + { + i++; + assert(i > j); + assert(i >= j); + assert(!(i <= j)); + assert(i <=> j == std::strong_ordering::greater); + assert(i - j == 1); + assert(std::string_view(*i) != std::string_view(*j)); + } + { + i--; + assert(i == te.aliases().begin()); + assert(i == j); + assert(i != k); + std::same_as decltype(auto) str1 = *i; + std::same_as decltype(auto) str2 = *j; + assert(std::string_view(str1) == std::string_view(str2)); + } + { + i++; + j++; + assert(i != te.aliases().begin()); + assert(i == j); + assert(i != k); + assert(std::string_view(*i) == std::string_view(*j)); + } + { + ASSERT_NOEXCEPT(i + 1); + ASSERT_NOEXCEPT(1 + i); + ASSERT_NOEXCEPT(i - 1); + std::same_as decltype(auto) temp = i + 2; + assert(i != temp); + assert(std::string_view(*temp) != std::string_view(*j)); + std::same_as decltype(auto) temp2 = temp - 2; + assert(std::string_view(*temp2) == std::string_view(*j)); + } + { + ASSERT_NOEXCEPT(i - j); + assert(i - j == 0); + assert(k - i > 0); + } + { + ASSERT_NOEXCEPT(i++); + ASSERT_NOEXCEPT(++i); + ASSERT_NOEXCEPT(i--); + ASSERT_NOEXCEPT(--i); + std::same_as> decltype(auto) temp = ++i; + assert(temp == i); + assert(&temp == &i); + + std::same_as decltype(auto) temp2 = j++; + assert(temp2 == j - 1); + assert(i == j); + } + { + ASSERT_NOEXCEPT(i += 2); + ASSERT_NOEXCEPT(i -= 2); + i += 2; + j += 3; + + auto tempi = i; + auto tempj = j; + assert(i != j); + assert((i <=> j) == std::strong_ordering::less); + i -= 2; + j -= 3; + assert(i == j); + assert(i != tempi && (tempi - i) == 2); + assert(j != tempj && (tempj - j) == 3); + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/operator-bool.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/operator-bool.pass.cpp new file mode 100644 index 0000000000000..9d63ff4cdfccd --- /dev/null +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/text_encoding.aliases_view/operator-bool.pass.cpp @@ -0,0 +1,56 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// + +// view_interface::operator bool() + +#include +#include +#include + +#include "../../test_text_encoding.h" + +using id = std::text_encoding::id; + +constexpr bool test() { + // 1. An alias_view of a text_encoding object for "other" and "unknown" are empty + { + { + std::text_encoding te_other{id::other}; + auto other_range = te_other.aliases(); + assert(!bool(other_range)); + } + + { + std::text_encoding te_unknown{id::unknown}; + auto unknown_range = te_unknown.aliases(); + assert(!bool(unknown_range)); + } + } + + // 2. An alias_view of a text_encoding object for a known encoding e.g. "UTF-8" is not empty + { + for (auto& data : unique_encoding_data) { + std::text_encoding te{id(data.mib)}; + auto range = te.aliases(); + assert(bool(range)); + } + } + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/text/text_encoding/trivially_copyable.compile.pass.cpp b/libcxx/test/std/text/text_encoding/trivially_copyable.compile.pass.cpp new file mode 100644 index 0000000000000..7f555a289641a --- /dev/null +++ b/libcxx/test/std/text/text_encoding/trivially_copyable.compile.pass.cpp @@ -0,0 +1,17 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: std-at-least-c++26 + +// +// Class text_encoding is a trivially copyable type ([basic.types.general]). + +#include +#include + +static_assert(std::is_trivially_copyable_v); diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index 56b5d620d73a4..83d97605213ad 100644 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -1417,7 +1417,6 @@ def add_version_header(tc): "c++26": 202306 # P1885R12 Naming Text Encodings to Demystify Them }, "headers": ["text_encoding"], - "unimplemented": True, }, { "name": "__cpp_lib_three_way_comparison", diff --git a/libcxx/utils/libcxx/header_information.py b/libcxx/utils/libcxx/header_information.py index 5e4c98675e41b..dbb1d9e040a33 100644 --- a/libcxx/utils/libcxx/header_information.py +++ b/libcxx/utils/libcxx/header_information.py @@ -172,7 +172,6 @@ def __hash__(self) -> int: "spanstream", "stacktrace", "stdfloat", - "text_encoding", ])) # Undeprecate headers that are deprecated in C++17 and removed in C++20. diff --git a/libcxx/utils/libcxx/test/features/availability.py b/libcxx/utils/libcxx/test/features/availability.py index bae7b33d2130b..c0f2a3d4cdc88 100644 --- a/libcxx/utils/libcxx/test/features/availability.py +++ b/libcxx/utils/libcxx/test/features/availability.py @@ -203,4 +203,12 @@ cfg.available_features, ), ), + # Tests that require std::text_encoding::environment() in the built library + Feature( + name="availability-te-environment-missing", + when=lambda cfg: BooleanExpression.evaluate( + "!libcpp-has-no-availability-markup && (stdlib=apple-libc++ && !_target-has-llvm-23)", + cfg.available_features, + ), + ), ] From 40368e7d4ae740b69bfaf2d0b1f78c853abd38a9 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Wed, 24 Jun 2026 16:52:32 -0700 Subject: [PATCH 434/511] [IR] Fix invalid debug metadata diagnostic kind (#205648) This type is only ever passed to LLVMContext::diagnose directly, and there are no downcasts to this type, so classof is effectively dead, but we should fix this oversight. Fixes #205340 --- llvm/include/llvm/IR/DiagnosticInfo.h | 6 ++-- llvm/unittests/IR/CMakeLists.txt | 1 + llvm/unittests/IR/DiagnosticInfoTest.cpp | 36 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 llvm/unittests/IR/DiagnosticInfoTest.cpp diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h index 28fbfa757b1cc..aff4c81ad2856 100644 --- a/llvm/include/llvm/IR/DiagnosticInfo.h +++ b/llvm/include/llvm/IR/DiagnosticInfo.h @@ -240,14 +240,14 @@ class LLVM_ABI DiagnosticInfoDebugMetadataVersion : public DiagnosticInfo { class LLVM_ABI DiagnosticInfoIgnoringInvalidDebugMetadata : public DiagnosticInfo { private: - /// The module that is concerned by this debug metadata version diagnostic. + /// The module that is concerned by this invalid debug metadata diagnostic. const Module &M; public: - /// \p The module that is concerned by this debug metadata version diagnostic. + /// \p The module that is concerned by this invalid debug metadata diagnostic. DiagnosticInfoIgnoringInvalidDebugMetadata( const Module &M, DiagnosticSeverity Severity = DS_Warning) - : DiagnosticInfo(DK_DebugMetadataVersion, Severity), M(M) {} + : DiagnosticInfo(DK_DebugMetadataInvalid, Severity), M(M) {} const Module &getModule() const { return M; } diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt index d62ce66ef9d34..df83992fadd1e 100644 --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -21,6 +21,7 @@ add_llvm_unittest(IRTests ConstantRangeListTest.cpp ConstantsTest.cpp DataLayoutTest.cpp + DiagnosticInfoTest.cpp DebugInfoTest.cpp DebugTypeODRUniquingTest.cpp DemandedBitsTest.cpp diff --git a/llvm/unittests/IR/DiagnosticInfoTest.cpp b/llvm/unittests/IR/DiagnosticInfoTest.cpp new file mode 100644 index 0000000000000..9726e2fcf76ce --- /dev/null +++ b/llvm/unittests/IR/DiagnosticInfoTest.cpp @@ -0,0 +1,36 @@ +//===- DiagnosticInfoTest.cpp - DiagnosticInfo unit tests -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Casting.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +TEST(DiagnosticInfoTest, DebugMetadataKindsMatchClassof) { + LLVMContext C; + Module M("M", C); + + DiagnosticInfoDebugMetadataVersion Version(M, 1); + const DiagnosticInfo *VersionInfo = &Version; + EXPECT_EQ(DK_DebugMetadataVersion, VersionInfo->getKind()); + EXPECT_TRUE(isa(VersionInfo)); + EXPECT_FALSE(isa(VersionInfo)); + + DiagnosticInfoIgnoringInvalidDebugMetadata Invalid(M); + const DiagnosticInfo *InvalidInfo = &Invalid; + EXPECT_EQ(DK_DebugMetadataInvalid, InvalidInfo->getKind()); + EXPECT_TRUE(isa(InvalidInfo)); + EXPECT_FALSE(isa(InvalidInfo)); +} + +} // end anonymous namespace From 404e73092292fa363fc9936fe70c5704fed7cddf Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 25 Jun 2026 01:57:07 +0200 Subject: [PATCH 435/511] [Clang][SYCL] Introduce warn_drv_unsupported_option_for_target_host_only (#22411) The diagnostic warn_drv_unsupported_option_for_target was extended with a %select to append "; only supported for host compilation" for SYCL's use case (commit 707093d9ea91), requiring a third argument at all call sites. 707093d9ea91's change now breaks test Driver/hip-sanitize-options.hip which only expects 2 args. Fix by splitting the diagnostic into two: - warn_drv_unsupported_option_for_target (2 args, same-as-upstream) - warn_drv_unsupported_option_for_target_host_only (2 args, SYCL-specific) Update SYCL.cpp to use the new variant; drop the stale << 0 third arg from SanitizerArgs.cpp and Clang.cpp. Now Clang.cpp part is the same as upstream. CMPLRLLVM-76199 Co-authored-by: Claude Sonnet 4.6 --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 11 +++++++---- clang/lib/Driver/SanitizerArgs.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- clang/lib/Driver/ToolChains/SYCL.cpp | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index a18e0396ce005..e1add286ed9a3 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -164,10 +164,9 @@ def warn_drv_unsupported_option_for_offload_arch_req_feature : Warning< "ignoring '%0' option for offload arch '%1' as it is not currently supported " "there. Use it with an offload arch containing '%2' instead">, InGroup; -def warn_drv_unsupported_option_for_target - : Warning<"ignoring '%0' option as it is not currently supported for " - "target '%1'%select{|; only supported for host compilation}2">, - InGroup; +def warn_drv_unsupported_option_for_target : Warning< + "ignoring '%0' option as it is not currently supported for target '%1'">, + InGroup; def err_drv_unsupported_option_for_target : Error< "'%0' option is not currently supported for target '%1'">; def warn_drv_unsupported_option_part_for_target : Warning< @@ -175,6 +174,10 @@ def warn_drv_unsupported_option_part_for_target : Warning< InGroup; def err_drv_unsupported_option_part_for_target : Error< "'%0' in '%1' option is not currently supported for target '%2'">; +def warn_drv_unsupported_option_for_target_host_only + : Warning<"ignoring '%0' option as it is not currently supported for " + "target '%1'; only supported for host compilation">, + InGroup; def warn_drv_invalid_argument_for_flang : Warning< "'%0' is not valid for Fortran">, InGroup; diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp index bd2963fbc3db8..f3d23f4e972e0 100644 --- a/clang/lib/Driver/SanitizerArgs.cpp +++ b/clang/lib/Driver/SanitizerArgs.cpp @@ -1469,7 +1469,7 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args, if (!SanitizeArg.empty()) TC.getDriver().Diag(diag::warn_drv_unsupported_option_for_target) - << SanitizeArg << TC.getTripleString() << 0; + << SanitizeArg << TC.getTripleString(); #endif return; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 9d40aecab817f..969b278562b84 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -3668,7 +3668,7 @@ static void RenderSSPOptions(const Driver &D, const ToolChain &TC, if (EffectiveTriple.isBPF() && StackProtectorLevel != LangOptions::SSPOff) { D.Diag(diag::warn_drv_unsupported_option_for_target) - << A->getSpelling() << EffectiveTriple.getTriple() << 0; + << A->getSpelling() << EffectiveTriple.getTriple(); StackProtectorLevel = DefaultStackProtectorLevel; } } else { diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 621a93f1e8a0e..0410cd7ef02af 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -1498,8 +1498,8 @@ SYCLToolChain::SYCLToolChain(const Driver &D, const llvm::Triple &Triple, SanitizeVal == "thread") continue; } - D.Diag(clang::diag::warn_drv_unsupported_option_for_target) - << A->getAsString(Args) << getTriple().str() << 1; + D.Diag(clang::diag::warn_drv_unsupported_option_for_target_host_only) + << A->getAsString(Args) << getTriple().str(); } } } From 3ce67e68068b49198179abc16c9b33e01dd2429e Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 25 Jun 2026 01:58:54 +0200 Subject: [PATCH 436/511] [Driver][SYCL] Treat stdin as C++ when -fsycl is active (#204968) 1723b7a30145 added a frontend check that rejects C inputs when SYCL mode is active (since SYCL requires C++). The stdin path in BuildInputs hardcoded TY_C regardless of driver mode, so `-fsycl -dM -E -` would pass -x c to cc1 and trigger the new diagnostic. Fix: use TY_CXX for stdin when IsSYCL. Also, upstream a downstream test that fails due to 1723b7a30145. --------- Co-authored-by: Claude Sonnet 4.6 --- clang/lib/Driver/Driver.cpp | 4 ++++ clang/test/Driver/sycl-print-internal-defines.cpp | 8 ++++++++ 2 files changed, 12 insertions(+) create mode 100644 clang/test/Driver/sycl-print-internal-defines.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 40953c0013f62..eb87bd2eebad0 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -3134,6 +3134,8 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, Diag(clang::diag::warn_drv_unused_x) << LastXArg->getValue(); } + bool IsSYCL = Args.hasFlag(options::OPT_fsycl, options::OPT_fno_sycl, false); + for (Arg *A : Args) { if (A->getOption().getKind() == Option::InputClass) { const char *Value = A->getValue(); @@ -3151,6 +3153,8 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, Ty = types::TY_Fortran; } else if (IsDXCMode()) { Ty = types::TY_HLSL; + } else if (IsSYCL) { + Ty = types::TY_CXX; } else { // If running with -E, treat as a C input (this changes the // builtin macros, for example). This may be overridden by -ObjC diff --git a/clang/test/Driver/sycl-print-internal-defines.cpp b/clang/test/Driver/sycl-print-internal-defines.cpp new file mode 100644 index 0000000000000..389aaf25608c6 --- /dev/null +++ b/clang/test/Driver/sycl-print-internal-defines.cpp @@ -0,0 +1,8 @@ +// Test that clang can print defines in SYCL mode. + +// RUN: %clangxx -fsycl -dM -E %s 2>&1 | FileCheck --check-prefix CHECK-PRINT-INTERNAL-DEFINES %s +// CHECK-PRINT-INTERNAL-DEFINES: #define + +// Printing defines also works when input is stdin. +// RUN: %clangxx -fsycl -dM -E - < /dev/null 2>&1 | FileCheck --check-prefixes=CHECK-PRINT-INTERNAL-DEFINES,CHECK-NO-ERROR %s +// CHECK-NO-ERROR-NOT: error: From 580e5c11fd069e6d8cd0e1711d75c2c14284b547 Mon Sep 17 00:00:00 2001 From: Jackson Stogel Date: Wed, 24 Jun 2026 17:11:02 -0700 Subject: [PATCH 437/511] [libc][stat] Move internal statx type definition into OSUtil/linux (#203975) This PR refactors the internally defined `statx` buffer to a shareable location so other LLVM-libc linux entrypoints may call `statx` without concern for name conflicts around `linux/stat.h`. Specifically, this PR moves `libc/src/sys/stat/linux/kernel_statx.h` to `libc/src/__support/OSUtil/linux/stat/` and splits it into two files, `kernel_statx_types.h` + `stat_via_statx.h`. This will be used by `realpath`. --- .../src/__support/OSUtil/linux/CMakeLists.txt | 1 + .../OSUtil/linux/stat/CMakeLists.txt | 22 ++++ .../OSUtil/linux/stat/kernel_statx_types.h | 72 ++++++++++++ .../OSUtil/linux/stat/stat_via_statx.h | 66 +++++++++++ libc/src/sys/stat/linux/CMakeLists.txt | 24 ++-- libc/src/sys/stat/linux/fstat.cpp | 18 ++- libc/src/sys/stat/linux/kernel_statx.h | 107 ------------------ libc/src/sys/stat/linux/lstat.cpp | 18 ++- libc/src/sys/stat/linux/stat.cpp | 17 ++- .../llvm-project-overlay/libc/BUILD.bazel | 51 +++++++++ 10 files changed, 256 insertions(+), 140 deletions(-) create mode 100644 libc/src/__support/OSUtil/linux/stat/CMakeLists.txt create mode 100644 libc/src/__support/OSUtil/linux/stat/kernel_statx_types.h create mode 100644 libc/src/__support/OSUtil/linux/stat/stat_via_statx.h delete mode 100644 libc/src/sys/stat/linux/kernel_statx.h diff --git a/libc/src/__support/OSUtil/linux/CMakeLists.txt b/libc/src/__support/OSUtil/linux/CMakeLists.txt index 525e62c7ca23b..3f2c2bd9e63b5 100644 --- a/libc/src/__support/OSUtil/linux/CMakeLists.txt +++ b/libc/src/__support/OSUtil/linux/CMakeLists.txt @@ -4,6 +4,7 @@ endif() add_subdirectory(${LIBC_TARGET_ARCHITECTURE}) add_subdirectory(syscall_wrappers) +add_subdirectory(stat) add_object_library( linux_util diff --git a/libc/src/__support/OSUtil/linux/stat/CMakeLists.txt b/libc/src/__support/OSUtil/linux/stat/CMakeLists.txt new file mode 100644 index 0000000000000..cf4343bed6840 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/stat/CMakeLists.txt @@ -0,0 +1,22 @@ +add_header_library( + kernel_statx_types + HDRS + kernel_statx_types.h + DEPENDS + libc.hdr.stdint_proxy + libc.src.__support.macros.config +) + +add_header_library( + stat_via_statx + HDRS + stat_via_statx.h + DEPENDS + .kernel_statx_types + libc.hdr.stdint_proxy + libc.hdr.types.struct_stat + libc.src.__support.common + libc.src.__support.error_or + libc.src.__support.macros.config + libc.src.__support.OSUtil.linux.syscall_wrappers.statx +) diff --git a/libc/src/__support/OSUtil/linux/stat/kernel_statx_types.h b/libc/src/__support/OSUtil/linux/stat/kernel_statx_types.h new file mode 100644 index 0000000000000..080d48bb176ef --- /dev/null +++ b/libc/src/__support/OSUtil/linux/stat/kernel_statx_types.h @@ -0,0 +1,72 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Internal definitions for Linux kernel statx types, matching . +/// +/// We define equivalent `statx` and `statx_timestamp` types here instead +/// of directly including to avoid name conflicts with system +/// libc implementations that choose provide their own `statx` definitions in +/// `` (like musl or older glibc versions). +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_KERNEL_STATX_TYPES_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_KERNEL_STATX_TYPES_H + +#include "hdr/stdint_proxy.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace internal { + +struct kernel_statx_timestamp { + int64_t tv_sec; + uint32_t tv_nsec; + int32_t __reserved; +}; + +struct kernel_statx_buf { + uint32_t stx_mask; // What results were written + uint32_t stx_blksize; // Preferred general I/O size + uint64_t stx_attributes; // Flags conveying information about the file + uint32_t stx_nlink; // Number of hard links + uint32_t stx_uid; // User ID of owner + uint32_t stx_gid; // Group ID of owner + uint16_t stx_mode; // File mode + uint16_t __spare0[1]; + uint64_t stx_ino; // Inode number + uint64_t stx_size; // File size + uint64_t stx_blocks; // Number of 512-byte blocks allocated + uint64_t stx_attributes_mask; // Mask to show what's supported in + // stx_attributes + struct kernel_statx_timestamp stx_atime; // Last access time + struct kernel_statx_timestamp stx_btime; // File creation time + struct kernel_statx_timestamp stx_ctime; // Last attribute change time + struct kernel_statx_timestamp stx_mtime; // Last data modification time + uint32_t stx_rdev_major; // Device ID of special file + uint32_t stx_rdev_minor; + uint32_t stx_dev_major; // ID of device containing file + uint32_t stx_dev_minor; + uint64_t stx_mnt_id; + uint64_t __spare2; + uint64_t __spare3[12]; // Spare space for future expansion +}; + +// The below mask value is based on the definition of a similarly +// named macro in linux/stat.h. When this flag is passed for the +// mask argument to the statx syscall, all fields except the +// stx_btime field will be filled in. +constexpr unsigned int KERNEL_STATX_BASIC_STATS_MASK = 0x7FF; + +} // namespace internal + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_KERNEL_STATX_TYPES_H diff --git a/libc/src/__support/OSUtil/linux/stat/stat_via_statx.h b/libc/src/__support/OSUtil/linux/stat/stat_via_statx.h new file mode 100644 index 0000000000000..e9d78099b5918 --- /dev/null +++ b/libc/src/__support/OSUtil/linux/stat/stat_via_statx.h @@ -0,0 +1,66 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Linux implementation for `stat` functionality via the `statx` syscall. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_STAT_VIA_STATX_H +#define LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_STAT_VIA_STATX_H + +#include "hdr/stdint_proxy.h" +#include "hdr/types/struct_stat.h" +#include "src/__support/OSUtil/linux/stat/kernel_statx_types.h" +#include "src/__support/OSUtil/linux/syscall_wrappers/statx.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" + +// It is safe to include this kernel header as it is designed to be +// included from user programs without causing any name pollution. +#include + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +/// Populates `statbuf` via a call to the `statx` syscall. +LIBC_INLINE ErrorOr stat_via_statx(int dirfd, const char *__restrict path, + int flags, + struct stat *__restrict statbuf) { + kernel_statx_buf xbuf; + ErrorOr result = linux_syscalls::statx( + dirfd, path, flags, KERNEL_STATX_BASIC_STATS_MASK, &xbuf); + if (!result) + return result; + + statbuf->st_dev = MKDEV(xbuf.stx_dev_major, xbuf.stx_dev_minor); + statbuf->st_ino = static_castst_ino)>(xbuf.stx_ino); + statbuf->st_mode = xbuf.stx_mode; + statbuf->st_nlink = xbuf.stx_nlink; + statbuf->st_uid = xbuf.stx_uid; + statbuf->st_gid = xbuf.stx_gid; + statbuf->st_rdev = MKDEV(xbuf.stx_rdev_major, xbuf.stx_rdev_minor); + statbuf->st_size = xbuf.stx_size; + statbuf->st_atim.tv_sec = xbuf.stx_atime.tv_sec; + statbuf->st_atim.tv_nsec = xbuf.stx_atime.tv_nsec; + statbuf->st_mtim.tv_sec = xbuf.stx_mtime.tv_sec; + statbuf->st_mtim.tv_nsec = xbuf.stx_mtime.tv_nsec; + statbuf->st_ctim.tv_sec = xbuf.stx_ctime.tv_sec; + statbuf->st_ctim.tv_nsec = xbuf.stx_ctime.tv_nsec; + statbuf->st_blksize = xbuf.stx_blksize; + statbuf->st_blocks = + static_castst_blocks)>(xbuf.stx_blocks); + + return 0; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_OSUTIL_LINUX_STAT_STAT_VIA_STATX_H diff --git a/libc/src/sys/stat/linux/CMakeLists.txt b/libc/src/sys/stat/linux/CMakeLists.txt index 005ebfc81bef2..271cdccc42f60 100644 --- a/libc/src/sys/stat/linux/CMakeLists.txt +++ b/libc/src/sys/stat/linux/CMakeLists.txt @@ -60,18 +60,6 @@ add_entrypoint_object( libc.src.errno.errno ) -add_header_library( - kernel_statx - HDRS - kernel_statx.h - DEPENDS - libc.hdr.stdint_proxy - libc.hdr.types.struct_stat - libc.src.__support.OSUtil.linux.syscall_wrappers.statx - libc.src.__support.common - libc.src.__support.libc_errno -) - add_entrypoint_object( stat SRCS @@ -79,9 +67,11 @@ add_entrypoint_object( HDRS ../stat.h DEPENDS - .kernel_statx libc.hdr.fcntl_macros libc.hdr.types.struct_stat + libc.src.__support.error_or + libc.src.__support.libc_errno + libc.src.__support.OSUtil.linux.stat.stat_via_statx ) add_entrypoint_object( @@ -91,9 +81,11 @@ add_entrypoint_object( HDRS ../lstat.h DEPENDS - .kernel_statx libc.hdr.fcntl_macros libc.hdr.types.struct_stat + libc.src.__support.error_or + libc.src.__support.libc_errno + libc.src.__support.OSUtil.linux.stat.stat_via_statx ) add_entrypoint_object( @@ -103,9 +95,11 @@ add_entrypoint_object( HDRS ../fstat.h DEPENDS - .kernel_statx libc.hdr.fcntl_macros libc.hdr.types.struct_stat + libc.src.__support.error_or + libc.src.__support.libc_errno + libc.src.__support.OSUtil.linux.stat.stat_via_statx ) add_entrypoint_object( diff --git a/libc/src/sys/stat/linux/fstat.cpp b/libc/src/sys/stat/linux/fstat.cpp index 07e8b00a5fbb3..447c6ce6a5bb9 100644 --- a/libc/src/sys/stat/linux/fstat.cpp +++ b/libc/src/sys/stat/linux/fstat.cpp @@ -7,18 +7,24 @@ //===----------------------------------------------------------------------===// #include "src/sys/stat/fstat.h" -#include "kernel_statx.h" -#include "src/__support/macros/config.h" - -#include "src/__support/common.h" - #include "hdr/fcntl_macros.h" #include "hdr/types/struct_stat.h" +#include "src/__support/OSUtil/linux/stat/stat_via_statx.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, fstat, (int fd, struct stat *statbuf)) { - return statx(fd, "", AT_EMPTY_PATH, statbuf); + ErrorOr result = + internal::stat_via_statx(fd, "", AT_EMPTY_PATH, statbuf); + if (!result) { + libc_errno = result.error(); + return -1; + } + return 0; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sys/stat/linux/kernel_statx.h b/libc/src/sys/stat/linux/kernel_statx.h deleted file mode 100644 index 7c2160992102f..0000000000000 --- a/libc/src/sys/stat/linux/kernel_statx.h +++ /dev/null @@ -1,107 +0,0 @@ -//===-- Wrapper over SYS_statx syscall ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIBC_SRC_SYS_STAT_LINUX_KERNEL_STATX_H -#define LLVM_LIBC_SRC_SYS_STAT_LINUX_KERNEL_STATX_H - -#include "hdr/stdint_proxy.h" -#include "src/__support/OSUtil/linux/syscall_wrappers/statx.h" -#include "src/__support/common.h" -#include "src/__support/libc_errno.h" -#include "src/__support/macros/config.h" - -#include "hdr/types/struct_stat.h" - -// It is safe to include this kernel header as it is designed to be -// included from user programs without causing any name pollution. -#include - -namespace { - -// The type definitions in the internal namespace match kernel's definition of -// the statx_timestamp and statx types in linux/stat.h. We define equivalent -// types here instead of including that header file to avoid name mixup between -// linux/stat.h and the libc's stat.h. -struct statx_timestamp { - int64_t tv_sec; - uint32_t tv_nsec; - int32_t __reserved; -}; - -struct statx_buf { - uint32_t stx_mask; // What results were written - uint32_t stx_blksize; // Preferred general I/O size - uint64_t stx_attributes; // Flags conveying information about the file - uint32_t stx_nlink; // Number of hard links - uint32_t stx_uid; // User ID of owner - uint32_t stx_gid; // Group ID of owner - uint16_t stx_mode; // File mode - uint16_t __spare0[1]; - uint64_t stx_ino; // Inode number - uint64_t stx_size; // File size - uint64_t stx_blocks; // Number of 512-byte blocks allocated - uint64_t stx_attributes_mask; // Mask to show what's supported in - // stx_attributes - struct statx_timestamp stx_atime; // Last access time - struct statx_timestamp stx_btime; // File creation time - struct statx_timestamp stx_ctime; // Last attribute change time - struct statx_timestamp stx_mtime; // Last data modification time - uint32_t stx_rdev_major; // Device ID of special file - uint32_t stx_rdev_minor; - uint32_t stx_dev_major; // ID of device containing file - uint32_t stx_dev_minor; - uint64_t stx_mnt_id; - uint64_t __spare2; - uint64_t __spare3[12]; // Spare space for future expansion -}; - -// The below mask value is based on the definition of a similarly -// named macro in linux/stat.h. When this flag is passed for the -// mask argument to the statx syscall, all fields except the -// stx_btime field will be filled in. -constexpr unsigned int STATX_BASIC_STATS_MASK = 0x7FF; - -} // Anonymous namespace - -namespace LIBC_NAMESPACE_DECL { - -LIBC_INLINE int statx(int dirfd, const char *__restrict path, int flags, - struct stat *__restrict statbuf) { - // We make a statx syscall and copy out the result into the |statbuf|. - ::statx_buf xbuf; - auto result = linux_syscalls::statx(dirfd, path, flags, - ::STATX_BASIC_STATS_MASK, &xbuf); - if (!result) { - libc_errno = result.error(); - return -1; - } - - statbuf->st_dev = MKDEV(xbuf.stx_dev_major, xbuf.stx_dev_minor); - statbuf->st_ino = static_castst_ino)>(xbuf.stx_ino); - statbuf->st_mode = xbuf.stx_mode; - statbuf->st_nlink = xbuf.stx_nlink; - statbuf->st_uid = xbuf.stx_uid; - statbuf->st_gid = xbuf.stx_gid; - statbuf->st_rdev = MKDEV(xbuf.stx_rdev_major, xbuf.stx_rdev_minor); - statbuf->st_size = xbuf.stx_size; - statbuf->st_atim.tv_sec = xbuf.stx_atime.tv_sec; - statbuf->st_atim.tv_nsec = xbuf.stx_atime.tv_nsec; - statbuf->st_mtim.tv_sec = xbuf.stx_mtime.tv_sec; - statbuf->st_mtim.tv_nsec = xbuf.stx_mtime.tv_nsec; - statbuf->st_ctim.tv_sec = xbuf.stx_ctime.tv_sec; - statbuf->st_ctim.tv_nsec = xbuf.stx_ctime.tv_nsec; - statbuf->st_blksize = xbuf.stx_blksize; - statbuf->st_blocks = - static_castst_blocks)>(xbuf.stx_blocks); - - return 0; -} - -} // namespace LIBC_NAMESPACE_DECL - -#endif // LLVM_LIBC_SRC_SYS_STAT_LINUX_KERNEL_STATX_H diff --git a/libc/src/sys/stat/linux/lstat.cpp b/libc/src/sys/stat/linux/lstat.cpp index dd917d47d1491..4cd8d974ef413 100644 --- a/libc/src/sys/stat/linux/lstat.cpp +++ b/libc/src/sys/stat/linux/lstat.cpp @@ -7,20 +7,26 @@ //===----------------------------------------------------------------------===// #include "src/sys/stat/lstat.h" -#include "kernel_statx.h" -#include "src/__support/macros/config.h" - -#include "src/__support/common.h" - #include "hdr/fcntl_macros.h" #include "hdr/types/struct_stat.h" +#include "src/__support/OSUtil/linux/stat/stat_via_statx.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, lstat, (const char *__restrict path, struct stat *__restrict statbuf)) { - return statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW, statbuf); + ErrorOr result = + internal::stat_via_statx(AT_FDCWD, path, AT_SYMLINK_NOFOLLOW, statbuf); + if (!result) { + libc_errno = result.error(); + return -1; + } + return 0; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/sys/stat/linux/stat.cpp b/libc/src/sys/stat/linux/stat.cpp index 8261752a699d1..06d1d9de894a3 100644 --- a/libc/src/sys/stat/linux/stat.cpp +++ b/libc/src/sys/stat/linux/stat.cpp @@ -7,20 +7,25 @@ //===----------------------------------------------------------------------===// #include "src/sys/stat/stat.h" -#include "kernel_statx.h" -#include "src/__support/macros/config.h" - -#include "src/__support/common.h" - #include "hdr/fcntl_macros.h" #include "hdr/types/struct_stat.h" +#include "src/__support/OSUtil/linux/stat/stat_via_statx.h" +#include "src/__support/common.h" +#include "src/__support/error_or.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(int, stat, (const char *__restrict path, struct stat *__restrict statbuf)) { - return statx(AT_FDCWD, path, 0, statbuf); + ErrorOr result = internal::stat_via_statx(AT_FDCWD, path, 0, statbuf); + if (!result) { + libc_errno = result.error(); + return -1; + } + return 0; } } // namespace LIBC_NAMESPACE_DECL diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 4a6df7f43f39c..e0f981d35b965 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -709,6 +709,11 @@ libc_support_library( deps = [":hdr_fcntl_overlay"], ) +libc_support_library( + name = "types_struct_stat", + hdrs = ["hdr/types/struct_stat.h"], +) + libc_support_library( name = "types_struct_timespec", hdrs = ["hdr/types/struct_timespec.h"], @@ -1879,6 +1884,21 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_osutil_linux_syscall_wrappers_statx", + hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/statx.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":__support_common", + ":__support_error_or", + ":__support_macros_config", + ":__support_osutil_syscall", + ], +) + libc_support_library( name = "__support_osutil_linux_syscall_wrappers_unlink", hdrs = ["src/__support/OSUtil/linux/syscall_wrappers/unlink.h"], @@ -2822,6 +2842,37 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_osutil_linux_stat_kernel_statx_types", + hdrs = ["src/__support/OSUtil/linux/stat/kernel_statx_types.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":__support_macros_config", + ":hdr_stdint_proxy", + ], +) + +libc_support_library( + name = "__support_osutil_linux_stat_stat_via_statx", + hdrs = ["src/__support/OSUtil/linux/stat/stat_via_statx.h"], + target_compatible_with = select({ + "@platforms//os:linux": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":__support_common", + ":__support_error_or", + ":__support_macros_config", + ":__support_osutil_linux_stat_kernel_statx_types", + ":__support_osutil_linux_syscall_wrappers_statx", + ":hdr_stdint_proxy", + ":types_struct_stat", + ], +) + libc_support_library( name = "__support_stringutil", srcs = glob(["src/__support/StringUtil/tables/**/*.h"]) + [ From 385afce6fd79ce4cacab297a814f654028dc3d6f Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Wed, 24 Jun 2026 17:31:23 -0700 Subject: [PATCH 438/511] [x86] Handle implicit sections when determining if a global is large (#204247) Just like explicit sections. We were seeing globals with implicit sections marked large under the medium code model. Assisted-by: Gemini --- .../llvm/Target/TargetLoweringObjectFile.h | 5 + .../CodeGen/TargetLoweringObjectFileImpl.cpp | 29 +--- llvm/lib/Target/TargetLoweringObjectFile.cpp | 19 +++ llvm/lib/Target/TargetMachine.cpp | 22 +-- .../CodeGen/X86/large-implicit-section.ll | 134 ++++++++++++++++++ 5 files changed, 177 insertions(+), 32 deletions(-) create mode 100644 llvm/test/CodeGen/X86/large-implicit-section.ll diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h index 800bbe45c6a97..6d2546e340815 100644 --- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h +++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h @@ -132,6 +132,11 @@ class LLVM_ABI TargetLoweringObjectFile : public MCObjectFileInfo { static SectionKind getKindForGlobal(const GlobalObject *GO, const TargetMachine &TM); + /// Return the section name specified by '#pragma clang section' or the + /// section attribute. + static StringRef getCustomSectionName(const GlobalObject *GO, + SectionKind Kind); + /// This method computes the appropriate section to emit the specified global /// variable or function definition. This should not be passed external (or /// available externally) globals. diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index f983c3205f927..bffaf8893e825 100644 --- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -816,34 +816,14 @@ getGlobalObjectInfo(const GlobalObject *GO, const TargetMachine &TM, return {Group, IsComdat, Flags, Type, EntrySize}; } -static StringRef handlePragmaClangSection(const GlobalObject *GO, - SectionKind Kind) { - // Check if '#pragma clang section' name is applicable. - // Note that pragma directive overrides -ffunction-section, -fdata-section - // and so section name is exactly as user specified and not uniqued. - const GlobalVariable *GV = dyn_cast(GO); - if (GV && GV->hasImplicitSection()) { - auto Attrs = GV->getAttributes(); - if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) - return Attrs.getAttribute("bss-section").getValueAsString(); - else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) - return Attrs.getAttribute("rodata-section").getValueAsString(); - else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) - return Attrs.getAttribute("relro-section").getValueAsString(); - else if (Attrs.hasAttribute("data-section") && Kind.isData()) - return Attrs.getAttribute("data-section").getValueAsString(); - } - - return GO->getSection(); -} - static MCSection *selectExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM, MCContext &Ctx, Mangler &Mang, unsigned &NextUniqueID, bool Retain, bool ForceUnique) { - StringRef SectionName = handlePragmaClangSection(GO, Kind); + StringRef SectionName = + TargetLoweringObjectFile::getCustomSectionName(GO, Kind); // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); @@ -1384,7 +1364,8 @@ static void checkMachOComdat(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - StringRef SectionName = handlePragmaClangSection(GO, Kind); + StringRef SectionName = + TargetLoweringObjectFile::getCustomSectionName(GO, Kind); // Parse the section specifier and create it if valid. StringRef Segment, Section; @@ -1750,7 +1731,7 @@ static int getSelectionForCOFF(const GlobalValue *GV) { MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - StringRef Name = handlePragmaClangSection(GO, Kind); + StringRef Name = TargetLoweringObjectFile::getCustomSectionName(GO, Kind); if (Name == getInstrProfSectionName(IPSK_covmap, Triple::COFF, /*AddSegmentInfo=*/false) || Name == getInstrProfSectionName(IPSK_covfun, Triple::COFF, diff --git a/llvm/lib/Target/TargetLoweringObjectFile.cpp b/llvm/lib/Target/TargetLoweringObjectFile.cpp index 43649a0cd95c7..8ceddc689fc3c 100644 --- a/llvm/lib/Target/TargetLoweringObjectFile.cpp +++ b/llvm/lib/Target/TargetLoweringObjectFile.cpp @@ -13,6 +13,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/BinaryFormat/Dwarf.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -368,6 +369,24 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalObject *GO, return SectionKind::getData(); } +StringRef TargetLoweringObjectFile::getCustomSectionName(const GlobalObject *GO, + SectionKind Kind) { + const GlobalVariable *GV = dyn_cast(GO); + if (GV && GV->hasImplicitSection()) { + auto Attrs = GV->getAttributes(); + if (Attrs.hasAttribute("bss-section") && Kind.isBSS()) + return Attrs.getAttribute("bss-section").getValueAsString(); + else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) + return Attrs.getAttribute("rodata-section").getValueAsString(); + else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) + return Attrs.getAttribute("relro-section").getValueAsString(); + else if (Attrs.hasAttribute("data-section") && Kind.isData()) + return Attrs.getAttribute("data-section").getValueAsString(); + } + + return GO->getSection(); +} + /// This method computes the appropriate section to emit the specified global /// variable or function definition. This should not be passed external (or /// available externally) globals. diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp index 9ec10b5be0fd4..70c3ebb0c4e0f 100644 --- a/llvm/lib/Target/TargetMachine.cpp +++ b/llvm/lib/Target/TargetMachine.cpp @@ -98,14 +98,20 @@ bool TargetMachine::isLargeGlobalValue(const GlobalValue *GVal) const { return true; } - // Treat all globals in explicit sections as small, except for the standard - // large sections of .lbss, .ldata, .lrodata. This reduces the risk of linking - // together small and large sections, resulting in small references to large - // data sections. The code model attribute overrides this above. - if (GV->hasSection()) { - StringRef Name = GV->getSection(); - return IsPrefix(Name, ".lbss") || IsPrefix(Name, ".ldata") || - IsPrefix(Name, ".lrodata"); + // Treat all globals in user-defined sections as small, except for the + // standard large sections of .lbss, .ldata, .lrodata. This reduces the risk + // of linking together small and large sections, resulting in small + // references to large data sections. The code model attribute overrides this + // above. + if (GV->hasSection() || GV->hasImplicitSection()) { + SectionKind Kind = TargetLoweringObjectFile::getKindForGlobal(GV, *this); + StringRef SectionName = + TargetLoweringObjectFile::getCustomSectionName(GV, Kind); + if (!SectionName.empty()) { + return IsPrefix(SectionName, ".lbss") || + IsPrefix(SectionName, ".ldata") || + IsPrefix(SectionName, ".lrodata"); + } } // Respect large data threshold for medium and large code models. diff --git a/llvm/test/CodeGen/X86/large-implicit-section.ll b/llvm/test/CodeGen/X86/large-implicit-section.ll new file mode 100644 index 0000000000000..87e384c88c2ee --- /dev/null +++ b/llvm/test/CodeGen/X86/large-implicit-section.ll @@ -0,0 +1,134 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=x86_64-linux-gnu -code-model=medium -relocation-model=static -large-data-threshold=16 < %s | FileCheck %s --check-prefix=STATIC +; RUN: llc -mtriple=x86_64-linux-gnu -code-model=medium -relocation-model=pic -large-data-threshold=16 < %s | FileCheck %s --check-prefix=PIC + +@small_in_large_bss = dso_local global i32 0 "bss-section"=".lbss.my_bss" +@small_in_large_data = dso_local global i32 1 "data-section"=".ldata.my_data" +@small_in_large_rodata = dso_local constant i32 2 "rodata-section"=".lrodata.my_rodata" + +@ptr = dso_local global i32 0 +@small_in_large_relro = dso_local constant ptr @ptr "relro-section"=".ldata.rel.ro.my_relro" + +@large_in_small_implicit_bss = dso_local global [4 x i64] zeroinitializer "bss-section"="my_bss" +@large_in_small_implicit_data = dso_local global [4 x i64] [i64 1, i64 0, i64 0, i64 0] "data-section"="my_data" +@large_in_small_implicit_rodata = dso_local constant [4 x i64] zeroinitializer "rodata-section"="my_rodata" +@large_in_small_implicit_relro = dso_local constant [4 x ptr] [ptr @ptr, ptr null, ptr null, ptr null] "relro-section"="my_relro" + +define i32 @load_bss() { +; STATIC-LABEL: load_bss: +; STATIC: # %bb.0: +; STATIC-NEXT: movabsq $small_in_large_bss, %rax +; STATIC-NEXT: movl (%rax), %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: load_bss: +; PIC: # %bb.0: +; PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax +; PIC-NEXT: movabsq $.Lsmall_in_large_bss$local@GOTOFF, %rcx +; PIC-NEXT: movl (%rax,%rcx), %eax +; PIC-NEXT: retq + %v = load i32, ptr @small_in_large_bss + ret i32 %v +} + +define i32 @load_data() { +; STATIC-LABEL: load_data: +; STATIC: # %bb.0: +; STATIC-NEXT: movabsq $small_in_large_data, %rax +; STATIC-NEXT: movl (%rax), %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: load_data: +; PIC: # %bb.0: +; PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax +; PIC-NEXT: movabsq $.Lsmall_in_large_data$local@GOTOFF, %rcx +; PIC-NEXT: movl (%rax,%rcx), %eax +; PIC-NEXT: retq + %v = load i32, ptr @small_in_large_data + ret i32 %v +} + +define i32 @load_rodata() { +; STATIC-LABEL: load_rodata: +; STATIC: # %bb.0: +; STATIC-NEXT: movabsq $small_in_large_rodata, %rax +; STATIC-NEXT: movl (%rax), %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: load_rodata: +; PIC: # %bb.0: +; PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax +; PIC-NEXT: movabsq $.Lsmall_in_large_rodata$local@GOTOFF, %rcx +; PIC-NEXT: movl (%rax,%rcx), %eax +; PIC-NEXT: retq + %v = load i32, ptr @small_in_large_rodata + ret i32 %v +} + +define ptr @load_relro() { +; STATIC-LABEL: load_relro: +; STATIC: # %bb.0: +; STATIC-NEXT: movq small_in_large_relro(%rip), %rax +; STATIC-NEXT: retq +; +; PIC-LABEL: load_relro: +; PIC: # %bb.0: +; PIC-NEXT: leaq _GLOBAL_OFFSET_TABLE_(%rip), %rax +; PIC-NEXT: movabsq $.Lsmall_in_large_relro$local@GOTOFF, %rcx +; PIC-NEXT: movq (%rax,%rcx), %rax +; PIC-NEXT: retq + %v = load ptr, ptr @small_in_large_relro + ret ptr %v +} + +define ptr @lea_large_in_small_implicit_bss() { +; STATIC-LABEL: lea_large_in_small_implicit_bss: +; STATIC: # %bb.0: +; STATIC-NEXT: movl $large_in_small_implicit_bss, %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: lea_large_in_small_implicit_bss: +; PIC: # %bb.0: +; PIC-NEXT: leaq .Llarge_in_small_implicit_bss$local(%rip), %rax +; PIC-NEXT: retq + ret ptr @large_in_small_implicit_bss +} + +define ptr @lea_large_in_small_implicit_data() { +; STATIC-LABEL: lea_large_in_small_implicit_data: +; STATIC: # %bb.0: +; STATIC-NEXT: movl $large_in_small_implicit_data, %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: lea_large_in_small_implicit_data: +; PIC: # %bb.0: +; PIC-NEXT: leaq .Llarge_in_small_implicit_data$local(%rip), %rax +; PIC-NEXT: retq + ret ptr @large_in_small_implicit_data +} + +define ptr @lea_large_in_small_implicit_rodata() { +; STATIC-LABEL: lea_large_in_small_implicit_rodata: +; STATIC: # %bb.0: +; STATIC-NEXT: movl $large_in_small_implicit_rodata, %eax +; STATIC-NEXT: retq +; +; PIC-LABEL: lea_large_in_small_implicit_rodata: +; PIC: # %bb.0: +; PIC-NEXT: leaq .Llarge_in_small_implicit_rodata$local(%rip), %rax +; PIC-NEXT: retq + ret ptr @large_in_small_implicit_rodata +} + +define ptr @lea_large_in_small_implicit_relro() { +; STATIC-LABEL: lea_large_in_small_implicit_relro: +; STATIC: # %bb.0: +; STATIC-NEXT: movabsq $large_in_small_implicit_relro, %rax +; STATIC-NEXT: retq +; +; PIC-LABEL: lea_large_in_small_implicit_relro: +; PIC: # %bb.0: +; PIC-NEXT: leaq .Llarge_in_small_implicit_relro$local(%rip), %rax +; PIC-NEXT: retq + ret ptr @large_in_small_implicit_relro +} From 8d8f3fd5becf0791f88f4e445648810a37c9e4d1 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 25 Jun 2026 02:47:23 +0200 Subject: [PATCH 439/511] [libclc] Delete wrong implementation nvptx clc_isinf (#205699) The file calls __nv_isinf which return 1 for true on vector input, while the generic clc_isinf which return -1 for true on vector input. Using nvptx clc_isinf in OpenCL isinf violates OpenCL spec. Found the issue in https://github.com/intel/llvm/pull/22413 --- libclc/clc/lib/nvptx/CMakeLists.txt | 1 - libclc/clc/lib/nvptx/relational/clc_isinf.cl | 33 -------------------- 2 files changed, 34 deletions(-) delete mode 100644 libclc/clc/lib/nvptx/relational/clc_isinf.cl diff --git a/libclc/clc/lib/nvptx/CMakeLists.txt b/libclc/clc/lib/nvptx/CMakeLists.txt index 2345d5aeed77b..58b7387853c27 100644 --- a/libclc/clc/lib/nvptx/CMakeLists.txt +++ b/libclc/clc/lib/nvptx/CMakeLists.txt @@ -3,7 +3,6 @@ libclc_add_sources(${LIBCLC_CLC_TARGET} FILES math/clc_rsqrt.cl math/clc_sinpi.cl math/clc_sqrt.cl - relational/clc_isinf.cl synchronization/clc_work_group_barrier.cl workitem/clc_get_global_id.cl workitem/clc_get_global_size.cl diff --git a/libclc/clc/lib/nvptx/relational/clc_isinf.cl b/libclc/clc/lib/nvptx/relational/clc_isinf.cl deleted file mode 100644 index 3b0055e995aaf..0000000000000 --- a/libclc/clc/lib/nvptx/relational/clc_isinf.cl +++ /dev/null @@ -1,33 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "clc/relational/clc_isinf.h" - -int __nv_isinff(float); -int __nv_isinfd(double); - -_CLC_OVERLOAD _CLC_DEF int __clc_isinf(float x) { return __nv_isinff(x); } - -#ifdef cl_khr_fp64 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable - -_CLC_OVERLOAD _CLC_DEF int __clc_isinf(double x) { return __nv_isinfd(x); } - -#endif - -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -_CLC_OVERLOAD _CLC_DEF int __clc_isinf(half x) { return __clc_isinf((float)x); } - -#endif - -#define __CLC_FUNCTION __clc_isinf -#define __CLC_BODY "clc/shared/unary_def_scalarize.inc" -#define __CLC_RET_TYPE __CLC_BIT_INT -#include "clc/math/gentype.inc" From 34483c2f9c810955c0c0cf7858f0b5cdbe9dff9e Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Thu, 25 Jun 2026 08:47:58 +0800 Subject: [PATCH 440/511] [DAG] Fix illegal type in srl(bitcast(build_vector)) fold (#205074) The fold ``` (srl (bitcast (build_vector e1, ..., eN)), (N-1) * eltsize) -> (zext eN) ``` added in #181412 built the result through a narrow element integer type, which can be illegal (e.g. i16 on RV32 with the P extension, where `<2 x i16>` is legal). When the fold runs in the last DAG combine that illegal type hits the "Unexpected illegal type!" assert. Build the result directly in the result type `VT` and mask off the high bits instead: ``` (and (zext/trunc eN to VT), (1 << eltsize) - 1) ``` `VT` is always legal, so no illegal type is created and the fold no longer needs to be gated on legalization. Assisted-by: Opus 4.8 --------- Co-authored-by: Matt Arsenault --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++-- llvm/test/CodeGen/RISCV/rvp-srl-bitcast-bv.ll | 36 +++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvp-srl-bitcast-bv.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 3c92f395a6453..2f2d5cb709642 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11705,11 +11705,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue LastElt = BV.getOperand(NumElts - 1); assert(LastElt.getScalarValueSizeInBits() >= EltSizeInBits && "Expected BUILD_VECTOR operand as wide as element type"); - EVT IntEltVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits); LastElt = DAG.getBitcast(LastElt.getValueType().changeTypeToInteger(), LastElt); - return DAG.getZExtOrTrunc(DAG.getZExtOrTrunc(LastElt, DL, IntEltVT), DL, - VT); + SDValue Ext = DAG.getZExtOrTrunc(LastElt, DL, VT); + APInt Mask = APInt::getLowBitsSet(VT.getSizeInBits(), EltSizeInBits); + return DAG.getNode(ISD::AND, DL, VT, Ext, + DAG.getConstant(Mask, DL, VT)); } } } diff --git a/llvm/test/CodeGen/RISCV/rvp-srl-bitcast-bv.ll b/llvm/test/CodeGen/RISCV/rvp-srl-bitcast-bv.ll new file mode 100644 index 0000000000000..0a9b3ff7c5505 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvp-srl-bitcast-bv.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+m,+zbb < %s | \ +; RUN: FileCheck --check-prefixes=CHECK-RV32 %s +; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+m,+zbb < %s | \ +; RUN: FileCheck --check-prefixes=CHECK-RV64 %s + +define i16 @srl_bitcast_buildvector_extract_last(<2 x i16> %v, ptr %p) { +; CHECK-RV32-LABEL: srl_bitcast_buildvector_extract_last: +; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: li a2, 1 +; CHECK-RV32-NEXT: srai a3, a0, 16 +; CHECK-RV32-NEXT: div a3, a2, a3 +; CHECK-RV32-NEXT: sext.h a0, a0 +; CHECK-RV32-NEXT: div a2, a2, a0 +; CHECK-RV32-NEXT: zext.h a0, a3 +; CHECK-RV32-NEXT: pack a2, a2, a3 +; CHECK-RV32-NEXT: sw a2, 0(a1) +; CHECK-RV32-NEXT: ret +; +; CHECK-RV64-LABEL: srl_bitcast_buildvector_extract_last: +; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: li a2, 1 +; CHECK-RV64-NEXT: sext.h a3, a0 +; CHECK-RV64-NEXT: divw a3, a2, a3 +; CHECK-RV64-NEXT: slli a0, a0, 32 +; CHECK-RV64-NEXT: srai a0, a0, 48 +; CHECK-RV64-NEXT: divw a2, a2, a0 +; CHECK-RV64-NEXT: sext.h a0, a2 +; CHECK-RV64-NEXT: ppaire.h a2, a3, a2 +; CHECK-RV64-NEXT: sw a2, 0(a1) +; CHECK-RV64-NEXT: ret + %div = sdiv <2 x i16> splat (i16 1), %v + store <2 x i16> %div, ptr %p + %e = extractelement <2 x i16> %div, i64 1 + ret i16 %e +} From 7a452abe114dc5cfdb6ca403f84c14be7f34b33a Mon Sep 17 00:00:00 2001 From: Kito Cheng Date: Thu, 25 Jun 2026 08:50:11 +0800 Subject: [PATCH 441/511] [RISCV] Emit .option arch extensions without the "experimental-" prefix (#205471) We currently emit the "experimental-" prefix in .option arch, e.g. `.option arch, +experimental-zicfiss`, but the assembler can't parse that back. There are two ways to fix this: 1. Teach the assembler to accept `.option arch, +experimental-zicfiss`. 2. Emit `.option arch, +zicfiss` instead of `.option arch, +experimental-zicfiss`. This patch takes the second approach, which better fits the .option arch syntax we defined. Experimental extensions are still guarded by `-menable-experimental-extensions`. --- llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 4 +++- llvm/test/CodeGen/RISCV/option-arch-experimental.ll | 13 +++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/option-arch-experimental.ll diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index ef140fc79ffd9..01ee3e01e2030 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -540,7 +540,9 @@ bool RISCVAsmPrinter::emitDirectiveOptionArch() { auto Delta = STI->hasFeature(Feature.Value) ? RISCVOptionArchArgType::Plus : RISCVOptionArchArgType::Minus; - NeedEmitStdOptionArgs.emplace_back(Delta, Feature.Key); + StringRef ExtName = Feature.Key; + ExtName.consume_front("experimental-"); + NeedEmitStdOptionArgs.emplace_back(Delta, ExtName.str()); } if (!NeedEmitStdOptionArgs.empty()) { RTS.emitDirectiveOptionPush(); diff --git a/llvm/test/CodeGen/RISCV/option-arch-experimental.ll b/llvm/test/CodeGen/RISCV/option-arch-experimental.ll new file mode 100644 index 0000000000000..b40e16acc735c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/option-arch-experimental.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple=riscv64 < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 < %s \ +; RUN: | llvm-mc -triple=riscv64 -mattr=+experimental -filetype=obj -o /dev/null + +; CHECK: .option push +; CHECK-NEXT: .option arch, +zicfiss, +zicsr, +zimop +; CHECK-NOT: experimental- +define void @f() "target-features"="+experimental-zicfiss" { +; CHECK-LABEL: f: +; CHECK: .option pop +entry: + ret void +} From 4fec16e07d221a876e9a11e4310ad754d74d9367 Mon Sep 17 00:00:00 2001 From: Jianhui Li Date: Wed, 24 Jun 2026 17:51:35 -0700 Subject: [PATCH 442/511] [MLIR][XeGPU] Fix order remapping in layout transpose (#205212) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LayoutAttr::transposeDims and LayoutAttr::isTransposeOf mishandled the `order` field when transposing a layout. The `order` field is fundamentally different from the size-valued fields (sg_layout, sg_data, inst_data, lane_layout, lane_data): its values are dimension indices (order[0] is the fastest-varying dim), not per-position sizes. The two require different transpose rules: - Size fields — reindex by position: new[i] = orig[perm[i]] - order — relabel values through the inverse permutation: newOrder[i] = inversePerm[origOrder[i]] Both functions incorrectly applied the size-field rule to `order`. Because the bug was applied consistently in both places, it stayed hidden for trivial/symmetric (e.g. 2D [1,0]) permutations, where the two rules happen to coincide. It only surfaces for non-trivial permutations such as the 3D [1,0,2] produced by a broadcast→transpose chain. Assist-by-Claude --------- Co-authored-by: Claude Opus 4.8 (1M context) --- mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp | 41 ++++++++++++++++--- .../XeGPU/propagate-layout-subgroup.mlir | 19 +++++++++ 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 311cf9a64c0c4..5d55342afea15 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -933,8 +933,22 @@ DistributeLayoutAttr LayoutAttr::transposeDims(ArrayRef permutation) { sgLayout.push_back(static_cast(origSgLayout[idx])); sgData.push_back(static_cast(origSgData[idx])); } - order.push_back(static_cast(origOrder[idx])); } + + // `order` is distinct from the size-valued fields above: its *values* are + // dimension indices (order[0] is the fastest-varying dim), not per-position + // sizes. A transpose relabels dimensions (source dim d becomes result dim + // inversePerm[d]) so the dimension values are remapped through the inverse + // permutation: newOrder[i] = inversePerm[origOrder[i]]. + // + // The linearization order this describes is invariant under transpose: a + // transpose only renames dimensions, so the subgroup ID assigned to a given + // block of data must stay the same. Remapping the values through the inverse + // permutation is exactly what preserves that order. + SmallVector inversePermutation = + invertPermutationVector(permutation); + for (int64_t dim : origOrder) + order.push_back(static_cast(inversePermutation[dim])); if (origLaneLayout.empty() && origSgLayout.empty()) order.clear(); @@ -968,13 +982,30 @@ bool LayoutAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other, } return true; }; + // `order` is different: its *values* are dimension indices, so a transpose + // relabels them through the inverse permutation rather than reindexing by + // position. `this` (= dst) is a transpose of `other` (= src) iff + // dst.order[i] == inversePerm[src.order[i]] for all i. This matches the + // convention produced by `transposeDims`. + auto checkOrderTranspose = [](ArrayRef dstOrder, + ArrayRef srcOrder, + ArrayRef perm) { + if (dstOrder.size() != srcOrder.size()) + return false; + SmallVector inversePerm = invertPermutationVector(perm); + for (auto [d, s] : llvm::zip_equal(dstOrder, srcOrder)) { + if (d != inversePerm[s]) + return false; + } + return true; + }; if (kind == xegpu::LayoutKind::Subgroup) return checkTranspose(getEffectiveSgLayoutAsInt(), other.getEffectiveSgLayoutAsInt(), perm) && checkTranspose(getEffectiveSgDataAsInt(), other.getEffectiveSgDataAsInt(), perm) && - checkTranspose(getEffectiveOrderAsInt(), - other.getEffectiveOrderAsInt(), perm); + checkOrderTranspose(getEffectiveOrderAsInt(), + other.getEffectiveOrderAsInt(), perm); if (kind == xegpu::LayoutKind::InstData) return checkTranspose(getEffectiveInstDataAsInt(), other.getEffectiveInstDataAsInt(), perm); @@ -983,8 +1014,8 @@ bool LayoutAttr::isTransposeOf(const xegpu::DistributeLayoutAttr &other, other.getEffectiveLaneLayoutAsInt(), perm) && checkTranspose(getEffectiveLaneDataAsInt(), other.getEffectiveLaneDataAsInt(), perm) && - checkTranspose(getEffectiveOrderAsInt(), - other.getEffectiveOrderAsInt(), perm); + checkOrderTranspose(getEffectiveOrderAsInt(), + other.getEffectiveOrderAsInt(), perm); return false; } diff --git a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir index d44497d0bba34..4c6353e45cfbe 100644 --- a/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir +++ b/mlir/test/Dialect/XeGPU/propagate-layout-subgroup.mlir @@ -589,3 +589,22 @@ gpu.module @test { gpu.return } } + +// ----- +gpu.module @test { +// CHECK-LABEL: gpu.func @transpose_3d_order_remap( +// CHECK: %[[TD_LD:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<32x2x32xf16> -> +// CHECK-SAME: !xegpu.tensor_desc<32x2x32xf16, #xegpu.layout> +// CHECK: %[[LD:.*]] = xegpu.load_nd %[[TD_LD]][%{{.*}}] <{layout = #xegpu.layout}> +// CHECK: %[[TR:.*]] = vector.transpose %[[LD]], [1, 0, 2] {layout_result_0 = #xegpu.layout} +// CHECK-SAME: : vector<32x2x32xf16> to vector<2x32x32xf16> + gpu.func @transpose_3d_order_remap(%src: memref<32x2x32xf16>, %dst: memref<2x32x32xf16>) kernel { + %c0 = arith.constant 0 : index + %td_in = xegpu.create_nd_tdesc %src : memref<32x2x32xf16> -> !xegpu.tensor_desc<32x2x32xf16> + %ld = xegpu.load_nd %td_in[%c0, %c0, %c0] : !xegpu.tensor_desc<32x2x32xf16> -> vector<32x2x32xf16> + %tr = vector.transpose %ld, [1, 0, 2] : vector<32x2x32xf16> to vector<2x32x32xf16> + %td_out = xegpu.create_nd_tdesc %dst : memref<2x32x32xf16> -> !xegpu.tensor_desc<2x32x32xf16> + xegpu.store_nd %tr, %td_out[%c0, %c0, %c0] <{layout = #xegpu.layout}> : vector<2x32x32xf16>, !xegpu.tensor_desc<2x32x32xf16> + gpu.return + } +} From fd42724956b961df39d6ad608da81aa3e5418420 Mon Sep 17 00:00:00 2001 From: jinge90 Date: Thu, 25 Jun 2026 10:07:13 +0800 Subject: [PATCH 443/511] [libc][complex] Add cargf and carg functions to libc complex math (#204087) This PR adds carg and cargf function to libc complex and also add test cases to cover some special inputs. --------- Signed-off-by: jinge90 --- libc/config/baremetal/aarch64/entrypoints.txt | 2 + libc/config/baremetal/arm/entrypoints.txt | 2 + libc/config/baremetal/riscv/entrypoints.txt | 2 + libc/config/darwin/aarch64/entrypoints.txt | 2 + libc/config/linux/aarch64/entrypoints.txt | 2 + libc/config/linux/arm/entrypoints.txt | 2 + libc/config/linux/riscv/entrypoints.txt | 2 + libc/config/linux/x86_64/entrypoints.txt | 2 + libc/docs/headers/complex.rst | 2 +- libc/include/complex.yaml | 12 ++ libc/src/complex/CMakeLists.txt | 4 + libc/src/complex/carg.h | 25 ++++ libc/src/complex/cargf.h | 25 ++++ libc/src/complex/generic/CMakeLists.txt | 27 +++++ libc/src/complex/generic/carg.cpp | 27 +++++ libc/src/complex/generic/cargf.cpp | 27 +++++ libc/test/src/complex/CArgTest.h | 107 ++++++++++++++++++ libc/test/src/complex/CMakeLists.txt | 26 +++++ libc/test/src/complex/carg_test.cpp | 36 ++++++ libc/test/src/complex/cargf_test.cpp | 37 ++++++ 20 files changed, 370 insertions(+), 1 deletion(-) create mode 100644 libc/src/complex/carg.h create mode 100644 libc/src/complex/cargf.h create mode 100644 libc/src/complex/generic/carg.cpp create mode 100644 libc/src/complex/generic/cargf.cpp create mode 100644 libc/test/src/complex/CArgTest.h create mode 100644 libc/test/src/complex/carg_test.cpp create mode 100644 libc/test/src/complex/cargf_test.cpp diff --git a/libc/config/baremetal/aarch64/entrypoints.txt b/libc/config/baremetal/aarch64/entrypoints.txt index ed419b1de5ea5..f64eb55b40acd 100644 --- a/libc/config/baremetal/aarch64/entrypoints.txt +++ b/libc/config/baremetal/aarch64/entrypoints.txt @@ -311,6 +311,8 @@ set(TARGET_LIBC_ENTRYPOINTS set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/baremetal/arm/entrypoints.txt b/libc/config/baremetal/arm/entrypoints.txt index 2dde53d2374a5..55a175923faa1 100644 --- a/libc/config/baremetal/arm/entrypoints.txt +++ b/libc/config/baremetal/arm/entrypoints.txt @@ -323,6 +323,8 @@ set(TARGET_LIBC_ENTRYPOINTS set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/baremetal/riscv/entrypoints.txt b/libc/config/baremetal/riscv/entrypoints.txt index aa1f92a3ce6c9..201d2e9cbf97b 100644 --- a/libc/config/baremetal/riscv/entrypoints.txt +++ b/libc/config/baremetal/riscv/entrypoints.txt @@ -321,6 +321,8 @@ set(TARGET_LIBC_ENTRYPOINTS set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/darwin/aarch64/entrypoints.txt b/libc/config/darwin/aarch64/entrypoints.txt index 894d90143d09f..413534a14f34e 100644 --- a/libc/config/darwin/aarch64/entrypoints.txt +++ b/libc/config/darwin/aarch64/entrypoints.txt @@ -132,6 +132,8 @@ endif() set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt index 251913fd57b99..599ae32a35073 100644 --- a/libc/config/linux/aarch64/entrypoints.txt +++ b/libc/config/linux/aarch64/entrypoints.txt @@ -444,6 +444,8 @@ endif() set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/linux/arm/entrypoints.txt b/libc/config/linux/arm/entrypoints.txt index 805738a3a5756..d610e663229ba 100644 --- a/libc/config/linux/arm/entrypoints.txt +++ b/libc/config/linux/arm/entrypoints.txt @@ -240,6 +240,8 @@ endif() set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt index a9839b44a0042..ef2d13e6d6f9e 100644 --- a/libc/config/linux/riscv/entrypoints.txt +++ b/libc/config/linux/riscv/entrypoints.txt @@ -448,6 +448,8 @@ endif() set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 037c8b50c352e..8046679785e68 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -514,6 +514,8 @@ endif() set(TARGET_LIBM_ENTRYPOINTS # complex.h entrypoints + libc.src.complex.carg + libc.src.complex.cargf libc.src.complex.creal libc.src.complex.crealf libc.src.complex.creall diff --git a/libc/docs/headers/complex.rst b/libc/docs/headers/complex.rst index ba7d73eb499ac..1a0137d6dea51 100644 --- a/libc/docs/headers/complex.rst +++ b/libc/docs/headers/complex.rst @@ -51,7 +51,7 @@ Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | csqrt | | | | | | 7.3.8.3 | G.6.5.2 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| carg | | | | | | 7.3.9.1 | N/A | +| carg | |check| | 1 ULP | | | | 7.3.9.1 | N/A | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | cimag | |check| | |check| | |check| | |check| | |check| | 7.3.9.2 | N/A | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/include/complex.yaml b/libc/include/complex.yaml index 4db42be6082da..29799799d2e6b 100644 --- a/libc/include/complex.yaml +++ b/libc/include/complex.yaml @@ -21,6 +21,18 @@ types: enums: [] objects: [] functions: + - name: carg + standards: + - stdc + return_type: double + arguments: + - type: _Complex double + - name: cargf + standards: + - stdc + return_type: float + arguments: + - type: _Complex float - name: cimag standards: - stdc diff --git a/libc/src/complex/CMakeLists.txt b/libc/src/complex/CMakeLists.txt index bc66a5445d727..1ebc1e8ac6141 100644 --- a/libc/src/complex/CMakeLists.txt +++ b/libc/src/complex/CMakeLists.txt @@ -36,3 +36,7 @@ add_complex_entrypoint_object(cprojf) add_complex_entrypoint_object(cprojl) add_complex_entrypoint_object(cprojf16) add_complex_entrypoint_object(cprojf128) + +add_complex_entrypoint_object(carg) +add_complex_entrypoint_object(cargf) + diff --git a/libc/src/complex/carg.h b/libc/src/complex/carg.h new file mode 100644 index 0000000000000..66b92656333fc --- /dev/null +++ b/libc/src/complex/carg.h @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of carg. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_COMPLEX_CARG_H +#define LLVM_LIBC_SRC_COMPLEX_CARG_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +double carg(_Complex double x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CARG_H diff --git a/libc/src/complex/cargf.h b/libc/src/complex/cargf.h new file mode 100644 index 0000000000000..03faca6aef999 --- /dev/null +++ b/libc/src/complex/cargf.h @@ -0,0 +1,25 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the declaration of cargf. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_COMPLEX_CARGF_H +#define LLVM_LIBC_SRC_COMPLEX_CARGF_H + +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +float cargf(_Complex float x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_COMPLEX_CARGF_H diff --git a/libc/src/complex/generic/CMakeLists.txt b/libc/src/complex/generic/CMakeLists.txt index f1c21e4ed7271..159118182384e 100644 --- a/libc/src/complex/generic/CMakeLists.txt +++ b/libc/src/complex/generic/CMakeLists.txt @@ -1,3 +1,30 @@ +add_entrypoint_object( + carg + SRCS + carg.cpp + HDRS + ../carg.h + DEPENDS + libc.src.__support.CPP.bit + libc.src.__support.common + libc.src.__support.complex_type + libc.src.__support.math.atan2 +) + +add_entrypoint_object( + cargf + SRCS + cargf.cpp + HDRS + ../cargf.h + DEPENDS + libc.src.__support.CPP.bit + libc.src.__support.common + libc.src.__support.complex_type + libc.src.__support.math.atan2f +) + + add_entrypoint_object( cproj SRCS diff --git a/libc/src/complex/generic/carg.cpp b/libc/src/complex/generic/carg.cpp new file mode 100644 index 0000000000000..4d5b0b9cf7f65 --- /dev/null +++ b/libc/src/complex/generic/carg.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of carg. +/// +//===----------------------------------------------------------------------===// + +#include "src/complex/carg.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/complex_type.h" +#include "src/__support/math/atan2.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(double, carg, (_Complex double x)) { + Complex x_c = cpp::bit_cast>(x); + return math::atan2(x_c.imag, x_c.real); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/complex/generic/cargf.cpp b/libc/src/complex/generic/cargf.cpp new file mode 100644 index 0000000000000..a0eb88303a0b1 --- /dev/null +++ b/libc/src/complex/generic/cargf.cpp @@ -0,0 +1,27 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the implementation of cargf. +/// +//===----------------------------------------------------------------------===// + +#include "src/complex/cargf.h" +#include "src/__support/CPP/bit.h" +#include "src/__support/common.h" +#include "src/__support/complex_type.h" +#include "src/__support/math/atan2f.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float, cargf, (_Complex float x)) { + Complex x_c = cpp::bit_cast>(x); + return math::atan2f(x_c.imag, x_c.real); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/complex/CArgTest.h b/libc/test/src/complex/CArgTest.h new file mode 100644 index 0000000000000..10061aac36ebe --- /dev/null +++ b/libc/test/src/complex/CArgTest.h @@ -0,0 +1,107 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains utility class to test different flavors of carg. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_TEST_SRC_COMPLEX_CARGTEST_H +#define LLVM_LIBC_TEST_SRC_COMPLEX_CARGTEST_H + +#include "test/UnitTest/FEnvSafeTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +#include "hdr/math_macros.h" + +template +class CArgTest : public LIBC_NAMESPACE::testing::FEnvSafeTest { + + DECLARE_SPECIAL_CONSTANTS(FPT) + +public: + using CArgFunc = FPT (*)(CFPT); + + void testZeroValues(CArgFunc func) { + EXPECT_FP_EQ(func(CFPT{0.0, 0.0}), zero); + EXPECT_FP_EQ(func(CFPT{0.0, -0.0}), neg_zero); + EXPECT_FP_EQ(func(CFPT{1.0, 0.0}), zero); + EXPECT_FP_EQ(func(CFPT{128.0, 0.0}), zero); + EXPECT_FP_EQ(func(CFPT{1.0, -0.0}), neg_zero); + } + + void testInfinityValues(CArgFunc func) { + // carg(+inf + yi) = +0 for finite y > 0 + EXPECT_FP_EQ(zero, func(CFPT{inf, 1.0})); + EXPECT_FP_EQ(zero, func(CFPT{inf, 256.0})); + // carg(+inf - yi) = -0 for finite y > 0 + EXPECT_FP_EQ(neg_zero, func(CFPT{inf, -1.0})); + EXPECT_FP_EQ(neg_zero, func(CFPT{inf, -1024.0})); + // carg(+inf + 0i) = +0 + EXPECT_FP_EQ(zero, func(CFPT{inf, 0.0})); + // carg(+inf - 0i) = -0 + EXPECT_FP_EQ(neg_zero, func(CFPT{inf, -0.0})); + + // carg(-inf + yi) = +pi for finite y > 0 + EXPECT_FP_EQ(FPT(M_PI), func(CFPT{neg_inf, 1.0})); + EXPECT_FP_EQ(FPT(M_PI), func(CFPT{neg_inf, 64.0})); + // carg(-inf - yi) = -pi for finite y > 0 + EXPECT_FP_EQ(FPT(-M_PI), func(CFPT{neg_inf, -1.0})); + EXPECT_FP_EQ(FPT(-M_PI), func(CFPT{neg_inf, -512.0})); + // carg(-inf + 0i) = +pi + EXPECT_FP_EQ(FPT(M_PI), func(CFPT{neg_inf, 0.0})); + // carg(-inf - 0i) = -pi + EXPECT_FP_EQ(FPT(-M_PI), func(CFPT{neg_inf, -0.0})); + + // carg(x + inf*i) = +pi/2 for finite x + EXPECT_FP_EQ(FPT(M_PI_2), func(CFPT{1.0, inf})); + EXPECT_FP_EQ(FPT(M_PI_2), func(CFPT{-1.0, inf})); + EXPECT_FP_EQ(FPT(M_PI_2), func(CFPT{0.0, inf})); + EXPECT_FP_EQ(FPT(M_PI_2), func(CFPT{4.0, inf})); + // carg(x - inf*i) = -pi/2 for finite x + EXPECT_FP_EQ(FPT(-M_PI_2), func(CFPT{1.0, neg_inf})); + EXPECT_FP_EQ(FPT(-M_PI_2), func(CFPT{-1.0, neg_inf})); + EXPECT_FP_EQ(FPT(-M_PI_2), func(CFPT{0.0, neg_inf})); + EXPECT_FP_EQ(FPT(-M_PI_2), func(CFPT{4.0, neg_inf})); + } + + void testNaNValues(CArgFunc func) { + // carg(NaN + yi) = NaN for finite y + EXPECT_FP_IS_NAN(func(CFPT{aNaN, 0.0})); + EXPECT_FP_IS_NAN(func(CFPT{aNaN, 1.0})); + EXPECT_FP_IS_NAN(func(CFPT{aNaN, -1.0})); + EXPECT_FP_IS_NAN(func(CFPT{aNaN, 512.0})); + + // carg(x + NaN*i) = NaN for finite x + EXPECT_FP_IS_NAN(func(CFPT{0.0, aNaN})); + EXPECT_FP_IS_NAN(func(CFPT{1.0, aNaN})); + EXPECT_FP_IS_NAN(func(CFPT{-1.0, aNaN})); + EXPECT_FP_IS_NAN(func(CFPT{4.0, aNaN})); + + // carg(NaN + NaN*i) = NaN + EXPECT_FP_IS_NAN(func(CFPT{aNaN, aNaN})); + + // carg(+inf + NaN*i) = NaN + EXPECT_FP_IS_NAN(func(CFPT{inf, aNaN})); + // carg(-inf + NaN*i) = NaN + EXPECT_FP_IS_NAN(func(CFPT{neg_inf, aNaN})); + // carg(NaN + inf*i) = NaN + EXPECT_FP_IS_NAN(func(CFPT{aNaN, inf})); + // carg(NaN - inf*i) = NaN + EXPECT_FP_IS_NAN(func(CFPT{aNaN, neg_inf})); + } +}; + +#define LIST_CARG_TESTS(U, T, func) \ + using LlvmLibcCArgTest = CArgTest; \ + TEST_F(LlvmLibcCArgTest, ZeroValues) { testZeroValues(&func); } \ + TEST_F(LlvmLibcCArgTest, InfinityValues) { testInfinityValues(&func); } \ + TEST_F(LlvmLibcCArgTest, NaNValues) { testNaNValues(&func); } + +#endif // LLVM_LIBC_TEST_SRC_COMPLEX_CARGTEST_H diff --git a/libc/test/src/complex/CMakeLists.txt b/libc/test/src/complex/CMakeLists.txt index efd1ede63eca5..fc16cf0d92ea2 100644 --- a/libc/test/src/complex/CMakeLists.txt +++ b/libc/test/src/complex/CMakeLists.txt @@ -1,5 +1,31 @@ add_custom_target(libc-complex-unittests) +add_fp_unittest( + carg_test + NEED_MPC + SUITE + libc-complex-unittests + SRCS + carg_test.cpp + HDRS + CArgTest.h + DEPENDS + libc.src.complex.carg +) + +add_fp_unittest( + cargf_test + NEED_MPC + SUITE + libc-complex-unittests + SRCS + cargf_test.cpp + HDRS + CArgTest.h + DEPENDS + libc.src.complex.cargf +) + add_fp_unittest( conj_test SUITE diff --git a/libc/test/src/complex/carg_test.cpp b/libc/test/src/complex/carg_test.cpp new file mode 100644 index 0000000000000..94ed815642001 --- /dev/null +++ b/libc/test/src/complex/carg_test.cpp @@ -0,0 +1,36 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains unittest for carg. +/// +//===----------------------------------------------------------------------===// + +#include "CArgTest.h" + +#include "src/complex/carg.h" +#include "utils/MPCWrapper/MPCUtils.h" + +using LlvmLibcCargMPCTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpc = LIBC_NAMESPACE::testing::mpc; + +TEST_F(LlvmLibcCargMPCTest, BasicForRounding) { + _Complex double test_values[] = { + 1.0 + 1.0i, -1.0 + 1.0i, 1.0 - 1.0i, -1.0 - 1.0i, 3.0 + 4.0i, + -3.0 + 4.0i, 3.0 - 4.0i, -3.0 - 4.0i, 0.5 + 0.5i, -0.5 + 0.5i, + 0.5 - 0.5i, -0.5 - 0.5i, 1.0 + 0.0i, -1.0 + 0.0i, 0.0 + 1.0i, + 0.0 - 1.0i, 5.0 + 12.0i, 100.0 + 1.0i, 0.001 + 1000.0i, + }; + for (_Complex double val : test_values) { + EXPECT_MPC_MATCH_ALL_ROUNDING(mpc::Operation::Carg, val, + LIBC_NAMESPACE::carg(val), 0.5); + } +} + +LIST_CARG_TESTS(_Complex double, double, LIBC_NAMESPACE::carg) diff --git a/libc/test/src/complex/cargf_test.cpp b/libc/test/src/complex/cargf_test.cpp new file mode 100644 index 0000000000000..ba0b0e0fc2af4 --- /dev/null +++ b/libc/test/src/complex/cargf_test.cpp @@ -0,0 +1,37 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains unittest for cargf. +/// +//===----------------------------------------------------------------------===// + +#include "CArgTest.h" + +#include "src/complex/cargf.h" +#include "utils/MPCWrapper/MPCUtils.h" + +using LlvmLibcCargfMPCTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpc = LIBC_NAMESPACE::testing::mpc; + +TEST_F(LlvmLibcCargfMPCTest, BasicForRounding) { + _Complex float test_values[] = { + 1.0f + 1.0fi, -1.0f + 1.0fi, 1.0f - 1.0fi, -1.0f - 1.0fi, + 3.0f + 4.0fi, -3.0f + 4.0fi, 3.0f - 4.0fi, -3.0f - 4.0fi, + 0.5f + 0.5fi, -0.5f + 0.5fi, 0.5f - 0.5fi, -0.5f - 0.5fi, + 1.0f + 0.0fi, -1.0f + 0.0fi, 0.0f + 1.0fi, 0.0f - 1.0fi, + 5.0f + 12.0fi, 100.0f + 1.0fi, 0.001f + 1000.0fi, + }; + for (_Complex float val : test_values) { + EXPECT_MPC_MATCH_ALL_ROUNDING(mpc::Operation::Carg, val, + LIBC_NAMESPACE::cargf(val), 0.5); + } +} + +LIST_CARG_TESTS(_Complex float, float, LIBC_NAMESPACE::cargf) From 27859ee3c11895954fa222d41d72a9c6bb51135e Mon Sep 17 00:00:00 2001 From: TPPPP Date: Thu, 25 Jun 2026 10:27:34 +0800 Subject: [PATCH 444/511] [Clang] Fixed an assertion in constant evaluation when using a defaulted comparison operator in a union (#198830) Fixes an assertion failure by decoupling `IsTrivialMemoryOperation` from assignment operators. fix #147127 --- clang/docs/ReleaseNotes.rst | 1 + clang/lib/AST/ExprConstant.cpp | 19 +++++++++++++------ clang/test/SemaCXX/gh147127.cpp | 12 ++++++++++++ 3 files changed, 26 insertions(+), 6 deletions(-) create mode 100644 clang/test/SemaCXX/gh147127.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4ca239ca5f2e4..5eea73ad9dc83 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -731,6 +731,7 @@ Bug Fixes in This Version EOF handling from accidentally restoring CLK_CachingLexer while a tentative parse is still active, which could trigger a caching lexer re-entry assertion in clangd signature help. (#GH200677) - Fixed a crash when ``#embed`` is used with C++ modules (#GH195350) +- Fixed an assertion in constant evaluation when using a defaulted comparison operator in a ``union``. (#GH147127) - Fixed a bug where ``-x cuda`` caused clang to immediately resolve templates that should not be. (#GH200545) - Fixed an issue where ``__typeof_unqual`` and ``__typeof_unqual__`` were rejected as a declaration specifier in block scope in C++. - Fixed crash when checking for overflow for unary operator that can't overflow (#GH170072) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index b628669880f2b..5ee27dd4e2ba2 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -7016,13 +7016,20 @@ static bool HandleFunctionCall(SourceLocation CallLoc, // Skip this for non-union classes with no fields; in that case, the defaulted // copy/move does not actually read the object. const CXXMethodDecl *MD = dyn_cast(Callee); - if (MD && MD->isDefaulted() && - (MD->getParent()->isUnion() || - (MD->isTrivial() && - isReadByLvalueToRvalueConversion(MD->getParent())))) { + + auto IsTrivialMemoryOperation = [&](const CXXMethodDecl *MD) { + if (!MD || !MD->isDefaulted()) + return false; + if (!MD->isCopyAssignmentOperator() && !MD->isMoveAssignmentOperator()) + return false; + return MD->getParent()->isUnion() || + (MD->isTrivial() && + isReadByLvalueToRvalueConversion(MD->getParent())); + }; + + if (IsTrivialMemoryOperation(MD)) { unsigned ExplicitOffset = MD->isExplicitObjectMemberFunction() ? 1 : 0; - assert(ObjectArg && - (MD->isCopyAssignmentOperator() || MD->isMoveAssignmentOperator())); + assert(ObjectArg); APValue RHSValue; if (!handleTrivialCopy(Info, MD->getParamDecl(0), Args[0], RHSValue, MD->getParent()->isUnion())) diff --git a/clang/test/SemaCXX/gh147127.cpp b/clang/test/SemaCXX/gh147127.cpp new file mode 100644 index 0000000000000..35bdc663c798f --- /dev/null +++ b/clang/test/SemaCXX/gh147127.cpp @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -fsyntax-only -std=c++17 -Wc++20-extensions -verify=cxx17 %s +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -verify=cxx20 %s +// RUN: %clang_cc1 -fsyntax-only -std=c++17 -Wc++20-extensions -fexperimental-new-constant-interpreter -verify=cxx17 %s +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -fexperimental-new-constant-interpreter -verify=cxx20 %s + +union A { + // cxx20-no-diagnostics + bool operator==(const A&) const = default; // cxx17-warning {{defaulted comparison operators are a C++20 extension}} +}; + +A a; +bool b = a == a; From 585260050f7071045007f361d2615a66314118ef Mon Sep 17 00:00:00 2001 From: I <1091761+wx257osn2@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:35:49 +0900 Subject: [PATCH 445/511] [Clang] Transform SubstNonTypeTemplateParmExpr replacements in a constant-evaluated context (#196791) Fixes #175831. When transforming a `SubstNonTypeTemplateParmExpr`, `TreeTransform::TransformSubstNonTypeTemplateParmExpr` calls `Sema::CheckTemplateArgument` so that any sema annotations (such as implicit casts) that were stripped from the replacement are recovered. This is done in whatever evaluation context the node happens to appear in after substitutions. Since the normalization of constraints, a `SubstNonTypeTemplateParmExpr` can end up inside an unevaluated operand, so the replacement gets rebuilt in an unevaluated context. Entities it refers to are then not odr-used: for example, when a call materializes a by-value function parameter of class type, the copy constructor is never marked odr-used and its definition is never instantiated. The constant evaluation performed by `CheckTemplateArgument` afterwards then fails for otherwise valid arguments. This caused rejects-valid regressions on real code such as mp-units. This PR inserts a constant-evaluated context in `TransformSubstNonTypeTemplateParmExpr`, so that the replacement is rebuilt and re-checked on the context, regardless of where normalization has placed it. -- Co-authored-by: A. Jiang Co-authored-by: Matheus Izvekov --- clang/docs/ReleaseNotes.rst | 2 +- clang/lib/Sema/TreeTransform.h | 13 ++++++ clang/test/SemaTemplate/concepts.cpp | 68 ++++++++++++++++++++++++++-- 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 5eea73ad9dc83..4db6cc0777ede 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -736,8 +736,8 @@ Bug Fixes in This Version - Fixed an issue where ``__typeof_unqual`` and ``__typeof_unqual__`` were rejected as a declaration specifier in block scope in C++. - Fixed crash when checking for overflow for unary operator that can't overflow (#GH170072) - Clang no longer handles a `" q-char-sequence "` header name as a string literal (#GH132643). -- Fixed an assertion when ``__attribute__((alloc_size))`` is used with an argument type wider than the target's pointer width. (#GH190445) - Fixed an assertion where we improperly handled implicit conversions to integral types from an atomic-type with a conversion function. (#GH201770) +- Fixed a regression where calling a function that takes a class-type parameter by value inside ``decltype`` of a concept could be incorrectly rejected when used as a non-type template argument. (#GH175831) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index 53107c827006d..3b99ff4bb9e23 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -16723,6 +16723,19 @@ template ExprResult TreeTransform::TransformSubstNonTypeTemplateParmExpr( SubstNonTypeTemplateParmExpr *E) { Expr *OrigReplacement = E->getReplacement()->IgnoreImplicitAsWritten(); + + // Insert a constant-evaluated context for the transform. + // Otherwise, when a normalized constraint places the replacement inside + // an unevaluated operand (e.g. decltype), entities it refers to are not + // odr-used, and the constant evaluation performed by CheckTemplateArgument + // below can spuriously fail for otherwise valid replacements, + // e.g. when a call materializes a function parameter of class type whose + // special members were never instantiated. + EnterExpressionEvaluationContext ConstantEvaluated( + SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated, + Sema::ReuseLambdaContextDecl, + Sema::ExpressionEvaluationContextRecord::EK_TemplateArgument); + ExprResult Replacement = getDerived().TransformExpr(OrigReplacement); if (Replacement.isInvalid()) return true; diff --git a/clang/test/SemaTemplate/concepts.cpp b/clang/test/SemaTemplate/concepts.cpp index b427e6a4d2fa5..6f7f00bf12e61 100644 --- a/clang/test/SemaTemplate/concepts.cpp +++ b/clang/test/SemaTemplate/concepts.cpp @@ -1,5 +1,5 @@ -// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -verify=expected,cxx20 %s -// RUN: %clang_cc1 -std=c++2c -ferror-limit 0 -verify=expected %s +// RUN: %clang_cc1 -std=c++20 -ferror-limit 0 -fexceptions -fcxx-exceptions -verify=expected,cxx20 %s +// RUN: %clang_cc1 -std=c++2c -ferror-limit 0 -fexceptions -fcxx-exceptions -verify=expected %s namespace PR47043 { template concept True = true; @@ -1851,7 +1851,6 @@ namespace GH191016 { void test(){ S s; } } - namespace GH188640 { namespace Ex1 { @@ -2002,3 +2001,66 @@ namespace GH196375 { static_assert(f<4>()); // expected-error@-1 {{no matching function for call to 'f'}} } // namespace GHGH196375 + +namespace GH175831 { + +namespace ShouldResolve { + +template +struct reference {}; +template +consteval Q get_spec(reference) { return {}; } + +template +concept repr_impl = sizeof(T) > 0; +template +concept representation_of = repr_impl; +template> +struct quantity {}; + +auto x = quantity{}, int>{}; + +} // namespace ShouldResolve + +namespace CannotResolve0 { + +template +struct reference {}; +template +consteval auto get_spec(reference) { return Q{}; } + +template +concept repr_impl = sizeof(T) > sizeof(char); +template +concept representation_of = repr_impl; +template> +struct quantity {}; + +auto x = quantity{}, char>{}; +// expected-error@-1 {{constraints not satisfied for class template 'quantity' [with V = reference{}, $1 = char]}} +// expected-note@-5 {{because 'representation_of{})>' evaluated to false}} +// expected-note-re@-7 {{because 'decltype({{.*}})' (aka 'char') does not satisfy 'repr_impl'}} +// expected-note@-10 {{because 'sizeof(char) > sizeof(char)' (1 > 1) evaluated to false}} + +} // namespace CannotResolve0 + +namespace CannotResolve1 { + +template +struct reference {}; +template +consteval Q get_spec(reference) { throw; } + +template +concept repr_impl = sizeof(T) > 0; +template +concept representation_of = repr_impl; +template> +struct quantity {}; + +auto x = quantity{}, int>{}; +// expected-error@-1 {{constraints not satisfied for class template 'quantity' [with V = reference{}, $1 = int]}} + +} // namespace CannotResolve1 + +} // namespace GH175831 From 41360c117158e8c9271ad28bb16259ea74803c53 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Wed, 24 Jun 2026 20:27:50 -0700 Subject: [PATCH 446/511] Fix ProcessElfCore::FindModuleUUID() so it work with symlinks. (#205235) ProcessElfCore was reading the NT_FILE list and using that to help FindModuleUUID to provide UUID information when loading core files. The NT_FILE list contains resolved paths only, while the DynamicLoaderPOSIXDYLD plug-in was using paths found in the r_debug structure which contains a linked list of all of the shared libraries in a process. The issue was these paths could be symlinks which would cause ProcessELFCore::FindModuleUUID(...) to fail because the paths wouldn't match up. This led to the ProcessELFCore often not being able to provide UUIDs for shared libraries and cause the incorrect binaries to be loaded from the current machine even when the shared library UUIDs don't match. The solution was to add the ability for a ModuleSpec to contain a load address for the shared library. This allows ProcessELFCore to uniquely identify a library regardless of the name used in NT_FILE. We can now correctly supply the UUID from the .gnu-build-id to any binaries which use symlinks when linking, but have differing resolved paths to the libraries. The process virtual function for finding a UUID was changed from: virtual lldb_private::UUID FindModuleUUID(const llvm::StringRef path); to: virtual bool FindModuleUUID(ModuleSpec &spec); to allow Process::FindModuleUUID to rely on other data in the ModuleSpec since the path isn't enough. We will be able to use the ModuleSpec's load address for creating a module from a ModuleSpec, but that isn't in this PR. --- lldb/include/lldb/Core/ModuleSpec.h | 30 ++++ lldb/include/lldb/Target/DynamicLoader.h | 4 +- lldb/include/lldb/Target/Process.h | 13 +- lldb/source/Core/DynamicLoader.cpp | 17 +-- .../POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp | 4 +- .../Process/elf-core/ProcessElfCore.cpp | 39 ++++- .../Plugins/Process/elf-core/ProcessElfCore.h | 2 +- lldb/source/Target/Process.cpp | 4 +- .../postmortem/elf-core/TestLinuxCore.py | 68 +++++++++ .../elf-core/elf-dyld-nt-file-mismatch.yaml | 135 ++++++++++++++++++ 10 files changed, 293 insertions(+), 23 deletions(-) create mode 100644 lldb/test/API/functionalities/postmortem/elf-core/elf-dyld-nt-file-mismatch.yaml diff --git a/lldb/include/lldb/Core/ModuleSpec.h b/lldb/include/lldb/Core/ModuleSpec.h index 0306dfc280e57..f74641ef0cf9f 100644 --- a/lldb/include/lldb/Core/ModuleSpec.h +++ b/lldb/include/lldb/Core/ModuleSpec.h @@ -118,6 +118,15 @@ class ModuleSpec { void SetObjectSize(uint64_t object_size) { m_object_size = object_size; } + /// Get the load address of a module in process memory. If the optional + /// has no value, there is no load address for this module spec. + std::optional GetLoadAddress() const { return m_load_addr; } + + /// Set the load address of a module in process memory. + void SetLoadAddress(lldb::addr_t addr) { m_load_addr = addr; } + + void ClearLoadAddress() { m_load_addr.reset(); } + llvm::sys::TimePoint<> &GetObjectModificationTime() { return m_object_mod_time; } @@ -159,9 +168,11 @@ class ModuleSpec { m_object_offset = 0; m_object_size = 0; m_source_mappings.Clear(false); + m_extractor_sp.reset(); m_object_mod_time = llvm::sys::TimePoint<>(); m_target_wp.reset(); m_platform_wp.reset(); + m_load_addr.reset(); } explicit operator bool() const { @@ -181,6 +192,8 @@ class ModuleSpec { return true; if (m_object_mod_time != llvm::sys::TimePoint<>()) return true; + if (m_load_addr.has_value()) + return true; return false; } @@ -245,6 +258,13 @@ class ModuleSpec { strm.PutCString(", "); strm.Format("object_mod_time = {0:x+}", uint64_t(llvm::sys::toTimeT(m_object_mod_time))); + dumped_something = true; + } + if (m_load_addr.has_value()) { + if (dumped_something) + strm.PutCString(", "); + strm.Printf("load_addr = 0x%" PRIx64, m_load_addr.value()); + dumped_something = true; } } @@ -280,6 +300,10 @@ class ModuleSpec { return false; } } + // Only match on load address if they both have a valid value. + if (m_load_addr.has_value() && match_module_spec.m_load_addr.has_value() && + match_module_spec.GetLoadAddress() != GetLoadAddress()) + return false; return true; } @@ -303,6 +327,12 @@ class ModuleSpec { llvm::sys::TimePoint<> m_object_mod_time; mutable PathMappingList m_source_mappings; lldb::DataExtractorSP m_extractor_sp = {}; + /// The load address of the module in a process. This allows for modules + /// to be uniquely identified and created by reading an object file from + /// memory when we can't locate the correct file on disk. Useful for post + /// mortem debugging when we might not be able to locate symbols for the + /// core file, but we can read the object file from memory. + std::optional m_load_addr = std::nullopt; }; class ModuleSpecList { diff --git a/lldb/include/lldb/Target/DynamicLoader.h b/lldb/include/lldb/Target/DynamicLoader.h index 784da7567fc4a..e85036c2887d1 100644 --- a/lldb/include/lldb/Target/DynamicLoader.h +++ b/lldb/include/lldb/Target/DynamicLoader.h @@ -359,8 +359,8 @@ class DynamicLoader : public PluginInterface { protected: // Utility methods for derived classes - /// Find a module in the target that matches the given file. - lldb::ModuleSP FindModuleViaTarget(const FileSpec &file); + /// Find a module in the target that matches the given module spec. + lldb::ModuleSP FindModuleViaTarget(const ModuleSpec &module_spec); /// Checks to see if the target module has changed, updates the target /// accordingly and returns the target executable module. diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 8432c326d3281..cdf0268ad6163 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -1421,7 +1421,18 @@ class Process : public std::enable_shared_from_this, virtual bool GetProcessInfo(ProcessInstanceInfo &info); - virtual lldb_private::UUID FindModuleUUID(const llvm::StringRef path); + /// Given a module spec, try to find the UUID information. + /// + /// \param [in,out] spec + /// A module specification with as much detail as possible about the + /// module for which we are trying to find a UUID. The + /// ModuleSpec.m_file should be filled in. If a dynamic loader is + /// calling this, the load address of the module can be filled in as + /// well. Sometimes the file path for a library can be a symlink and + /// the load address can help resolve the module. + /// + /// \return True if the UUID was added, false otherwise. + virtual bool FindModuleUUID(ModuleSpec &spec); /// Get the exit status for a process. /// diff --git a/lldb/source/Core/DynamicLoader.cpp b/lldb/source/Core/DynamicLoader.cpp index 0259e7e63a161..dd771a7a3a8ae 100644 --- a/lldb/source/Core/DynamicLoader.cpp +++ b/lldb/source/Core/DynamicLoader.cpp @@ -151,14 +151,12 @@ DynamicLoader::GetSectionListFromModule(const ModuleSP module) const { return sections; } -ModuleSP DynamicLoader::FindModuleViaTarget(const FileSpec &file) { +ModuleSP DynamicLoader::FindModuleViaTarget(const ModuleSpec &spec) { + ModuleSpec module_spec(spec); Target &target = m_process->GetTarget(); - ModuleSpec module_spec(file, target.GetArchitecture()); - if (UUID uuid = m_process->FindModuleUUID(file.GetPath())) { - // Process may be able to augment the module_spec with UUID, e.g. ELF core. - module_spec.GetUUID() = uuid; - } - + // The process may be able to augment the module_spec with a UUID. + if (!module_spec.GetUUID().IsValid()) + m_process->FindModuleUUID(module_spec); if (ModuleSP module_sp = target.GetImages().FindFirstModule(module_spec)) return module_sp; @@ -173,7 +171,10 @@ ModuleSP DynamicLoader::LoadModuleAtAddress(const FileSpec &file, addr_t link_map_addr, addr_t base_addr, bool base_addr_is_offset) { - ModuleSP module_sp = FindModuleViaTarget(file); + Target &target = m_process->GetTarget(); + ModuleSpec module_spec(file, target.GetArchitecture()); + module_spec.SetLoadAddress(base_addr); + ModuleSP module_sp = FindModuleViaTarget(module_spec); // We have a core file, try to load the image from memory if we didn't find // the module. if (!module_sp && !m_process->IsLiveDebugSession()) { diff --git a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp index 29723089ce7b1..6db4d99ccbdba 100644 --- a/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp +++ b/lldb/source/Plugins/DynamicLoader/POSIX-DYLD/DynamicLoaderPOSIXDYLD.cpp @@ -722,8 +722,8 @@ void DynamicLoaderPOSIXDYLD::LoadAllCurrentModules() { // Create placeholder modules for any modules we couldn't load from disk // or from memory. ModuleSpec module_spec(so_entry.file_spec, target.GetArchitecture()); - if (UUID uuid = m_process->FindModuleUUID(so_entry.file_spec.GetPath())) - module_spec.GetUUID() = uuid; + module_spec.SetLoadAddress(so_entry.base_addr); + m_process->FindModuleUUID(module_spec); module_sp = Module::CreateModuleFromObjectFile( module_spec, so_entry.base_addr, 512); bool load_addr_changed = false; diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp index 8a1c03ebf1dbe..ced84ab829772 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.cpp @@ -337,7 +337,7 @@ bool ProcessElfCore::GetMainExecutableModuleSpec(ModuleSpec &exe_spec) { GetNTFileEntryForExecutableELFHeader(); if (exe_header) { exe_spec.GetFileSpec() = CreateFileSpecFromPath(exe_header->path); - exe_spec.GetUUID() = FindModuleUUID(exe_header->path); + exe_spec.SetLoadAddress(exe_header->start); } // If we failed to find the executable program in the NT_FILE list with the @@ -364,7 +364,6 @@ bool ProcessElfCore::GetMainExecutableModuleSpec(ModuleSpec &exe_spec) { } else { // We don't have an executable file spec yet, lets set it. exe_spec.GetFileSpec() = execfn_spec; - exe_spec.GetUUID() = FindModuleUUID(execfn_str); } } } @@ -377,18 +376,44 @@ bool ProcessElfCore::GetMainExecutableModuleSpec(ModuleSpec &exe_spec) { if (!exe_spec.GetFileSpec() && !m_executable_name.empty()) exe_spec.GetFileSpec() = CreateFileSpecFromPath(m_executable_name); + // Try and find the UUID after the module spec was filled in. + FindModuleUUID(exe_spec); + // We succeeded if we got a path. return (bool)exe_spec.GetFileSpec(); } -UUID ProcessElfCore::FindModuleUUID(const llvm::StringRef path) { +bool ProcessElfCore::FindModuleUUID(ModuleSpec &spec) { + if (spec.GetUUID().IsValid()) + return true; // Lookup the UUID for the given path in the map. // Note that this could be called by multiple threads so make sure // we access the map in a thread safe way (i.e. don't use operator[]). - auto it = m_uuids.find(std::string(path)); - if (it != m_uuids.end()) - return it->second; - return UUID(); + std::string path; + // Sometimes the path to a file or shared library from the dynamic loader, + // one of the main clients of this function, is a symlink. The information + // in the NT_FILE note contains resolved paths and might not match. The + // best way for us to find a module is by load address, so use this trick + // if the load address is set in the module specification. + if (std::optional load_addr = spec.GetLoadAddress()) { + if (std::optional nt = + GetNTFileEntryContainingAddress(*load_addr)) + path = nt->path; + } + // If we didn't find a file spec from the load address, fall back to using + // the file spec. + if (path.empty()) + path = spec.GetFileSpec().GetPath(); + + auto it = m_uuids.find(path); + if (it != m_uuids.end()) { + Log *log = GetLog(LLDBLog::Process); + spec.GetUUID() = it->second; + LLDB_LOGF(log, "ProcessElfCore::FindModuleUUID() found UUID for %s: %s", + spec.GetFileSpec().GetPath().c_str(), + it->second.GetAsString().c_str()); + } + return spec.GetUUID().IsValid(); } lldb_private::DynamicLoader *ProcessElfCore::GetDynamicLoader() { diff --git a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h index e6f1fa0027554..846d8cb91cadf 100644 --- a/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h +++ b/lldb/source/Plugins/Process/elf-core/ProcessElfCore.h @@ -170,7 +170,7 @@ class ProcessElfCore : public lldb_private::PostMortemProcess { // Populate gnu uuid for each NT_FILE entry void UpdateBuildIdForNTFileEntries(); - lldb_private::UUID FindModuleUUID(const llvm::StringRef path) override; + bool FindModuleUUID(lldb_private::ModuleSpec &spec) override; // Extract the executable module spec for the executable in this core file. bool GetMainExecutableModuleSpec(lldb_private::ModuleSpec &exe_spec); diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp index 6e20703f65a45..ff7734a47fdca 100644 --- a/lldb/source/Target/Process.cpp +++ b/lldb/source/Target/Process.cpp @@ -6381,8 +6381,8 @@ bool Process::GetProcessInfo(ProcessInstanceInfo &info) { return platform_sp->GetProcessInfo(GetID(), info); } -lldb_private::UUID Process::FindModuleUUID(const llvm::StringRef path) { - return lldb_private::UUID(); +bool Process::FindModuleUUID(ModuleSpec &spec) { + return spec.GetUUID().IsValid(); } ThreadCollectionSP Process::GetHistoryThreads(lldb::addr_t addr) { diff --git a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py index 0a5dc6e6a41a2..e957625ef29c8 100644 --- a/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py +++ b/lldb/test/API/functionalities/postmortem/elf-core/TestLinuxCore.py @@ -1492,6 +1492,74 @@ def test_exe_name_extraction_nt_prpsinfo(self): self.dbg.DeleteTarget(target) + @skipIfLLVMTargetMissing("X86") + @skipIfWindows + def test_uuid_info_from_nt_file_and_gnu_build_id(self): + # This test loads a core file that has everything it needs in core + # memory to read the r_debug structure to get the shared library list + # and also the NT_FILE note where we are able to find the UUID for + # any library. Prior to this patch, the UUID for any library could + # be found in ProcessElfCore::FindModuleUUID(...) only for an + # executable if the resolved path found in NT_FILE matched the path + # that the dynamic loader used, which can often be different due to + # symlinks. + # + # This test verifies that ProcessElfCore::FindModuleUUID() is able to + # find the UUID for a library even if the resolved path (NT_FILE) + # does not match the path that the dynamic loader used. + # + # The libraries in this core file have the following paths: + # R_DEBUG NT_FILE + # ============================ ====================================== + # /lib64/libstdc++.so.6 /usr/lib64/libstdc++.so.6.0.29 + # /lib64/libm.so.6 /usr/lib64/libm.so.6 + # /lib64/libgcc_s.so.1 /usr/lib64/libgcc_s-11-20240719.so.1 + # /lib64/libc.so.6 /usr/lib64/libc.so.6 + # /lib64/ld-linux-x86-64.so.2 /usr/lib64/ld-linux-x86-64.so.2 + # + # The UUID map in ProcessELFCore is keyed off of the path from the + # NT_FILE info, so we verify that the new code that was added to + # ProcessELFCore::FindModuleUUID(...) can locate the module using the + # load address in the ModuleSpec that is now being passed to + # Process::FindModuleUUID(...). + yaml_path = self.getSourcePath("elf-dyld-nt-file-mismatch.yaml") + core_path = self.getBuildArtifact("elf-dyld-nt-file-mismatch.core") + log_path = self.getBuildArtifact("elf-dyld-nt-file-mismatch.log") + self.yaml2obj(yaml_path, core_path) + target = self.dbg.CreateTarget(None) + self.runCmd(f"log enable lldb process -f '{log_path}'") + self.addTearDownHook(lambda: self.runCmd("log disable lldb process")) + process = target.LoadCore(core_path) + prefix = "ProcessElfCore::FindModuleUUID() found UUID for " + with open(log_path, "r") as f: + log_text = f.read() + self.assertIn( + prefix + + "/lib64/libm.so.6: 25C2A650-E3E6-C2F3-25C8-AD803DBB58B3-13899C93", + log_text, + ) + self.assertIn( + prefix + + "/lib64/libgcc_s.so.1: A29B0CF0-634D-ECD5-76D6-36CF5B9B6412-AFEB4FAC", + log_text, + ) + self.assertIn( + prefix + + "/lib64/libstdc++.so.6: 0C8999CC-A62E-9B9F-0075-566ECB23D564-443ED68F", + log_text, + ) + self.assertIn( + prefix + + "/lib64/libc.so.6: CFCCBA85-5FC7-2F10-BC9D-E0ABC5AD605E-D8B1AA72", + log_text, + ) + self.assertIn( + prefix + + "/lib64/ld-linux-x86-64.so.2: ECBDF3F8-784D-7A13-EFF2-FDD4352ABBEE-93CCE02C", + log_text, + ) + self.dbg.DeleteTarget(target) + def replace_path(binary, replace_from, replace_to): src = replace_from.encode() diff --git a/lldb/test/API/functionalities/postmortem/elf-core/elf-dyld-nt-file-mismatch.yaml b/lldb/test/API/functionalities/postmortem/elf-core/elf-dyld-nt-file-mismatch.yaml new file mode 100644 index 0000000000000..09412aeed8151 --- /dev/null +++ b/lldb/test/API/functionalities/postmortem/elf-core/elf-dyld-nt-file-mismatch.yaml @@ -0,0 +1,135 @@ +# This file contains an minimal ELF core file that contains everything needed +# to load a core file where the executable and shared libraries all have +# their ELF headers in the core file memory. The core file has the main +# executable's PT_DYNAMIC section so that we can read the DT_DEBUG from the +# dynamic section of the core file and read the shared library list from core +# memory. So this core file can read the shared library list and also get +# the UUID from the .gnu-build-id data in the ELF headers of each library. +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_CORE + Machine: EM_X86_64 +ProgramHeaders: + - Type: PT_NOTE + Align: 0x4 + FileSize: 0x1718 + Offset: 0x2a8 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x55bc64882000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x1000 + Offset: 0x2000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x55bc64885000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x1000 + Offset: 0x3000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x7f8e30600000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x29000 + Offset: 0x4000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x7f8e30923000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0xd000 + Offset: 0x5000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x7f8e30a00000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x99000 + Offset: 0x6000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x7f8e30c99000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x3000 + Offset: 0x7000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + VAddr: 0x7f8e30cbe000 + Align: 0x1000 + FileSize: 0x2000 + MemSize: 0x2000 + Offset: 0x8000 + - Type: PT_LOAD + Flags: [ PF_X, PF_R ] + VAddr: 0x7f8e30cc4000 + Align: 0x1000 + FileSize: 0x2000 + MemSize: 0x2000 + Offset: 0xa000 + - Type: PT_LOAD + Flags: [ PF_R ] + VAddr: 0x7f8e30cc6000 + Align: 0x1000 + FileSize: 0x1000 + MemSize: 0x2000 + Offset: 0xc000 + - Type: PT_LOAD + Flags: [ PF_W, PF_R ] + VAddr: 0x7f8e30cff000 + Align: 0x1000 + FileSize: 0x2000 + MemSize: 0x2000 + Offset: 0xd000 +Sections: + - Type: Fill + Pattern: 050000005001000001000000434f5245000000000b00000000000000000000000b00000000000000000000000000000000000000d2962f001c5a1800d2962f001c5a1800000000000000000000000000000000000000000000000000a00b000000000000000000000000000000000000000000000000000000000000000000000000000000f0cf308e7f0000b85d8864bc55000030318864bc550000582774e3fd7f0000402674e3fd7f00000000000000000000a0bc7f308e7f0000e900000000000000c0c0cc308e7f000050d47f308e7f00000000000000000000b85d8864bc550000682774e3fd7f0000582774e3fd7f00000100000000000000ffffffffffffffff47318864bc55000033000000000000004602010000000000402674e3fd7f00002b000000000000004047c9308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000050000008800000003000000434f5245000000000052000000000000000640000000000071f4010064000000d2962f001c5a1800d2962f001c5a1800612e6f757400000000000000000000002e2f612e6f757420000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050000008000000049474953434f5245000000000b00000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050000007001000006000000434f52450000000021000000000000000040cc308e7f00003300000000000000300e0000000000001000000000000000fffbab1f000000000600000000000000001000000000000011000000000000006400000000000000030000000000000040208864bc5500000400000000000000380000000000000005000000000000000d0000000000000007000000000000000060cc308e7f000008000000000000000000000000000000090000000000000040308864bc5500000b0000000000000071f40100000000000c0000000000000071f40100000000000d0000000000000064000000000000000e000000000000006400000000000000170000000000000000000000000000001900000000000000592b74e3fd7f00001a0000000000000002000000000000001f00000000000000f04f74e3fd7f00000f00000000000000692b74e3fd7f00001b000000000000001c000000000000001c00000000000000200000000000000000000000000000000000000000000000050000003b060000454c4946434f5245000000001d00000000000000001000000000000000208864bc55000000308864bc550000000000000000000000308864bc55000000408864bc550000010000000000000000408864bc55000000508864bc550000020000000000000000508864bc55000000608864bc550000020000000000000000608864bc55000000708864bc5500000300000000000000000060308e7f0000009062308e7f00000000000000000000009062308e7f000000e079308e7f0000290000000000000000e079308e7f000000707f308e7f00009e0100000000000000707f308e7f000000b07f308e7f0000f60100000000000000b07f308e7f000000d07f308e7f0000fa01000000000000003092308e7f0000000093308e7f00000000000000000000000093308e7f000000209a308e7f00000d0000000000000000209a308e7f000000e09f308e7f00007f0000000000000000e09f308e7f000000f09f308e7f0000da0000000000000000f09f308e7f00000000a0308e7f0000db000000000000000000a0308e7f00000090a9308e7f000000000000000000000090a9308e7f00000050ba308e7f000099000000000000000050ba308e7f00000080c1308e7f0000a5010000000000000080c1308e7f00000050c2308e7f000017020000000000000050c2308e7f00000060c2308e7f000024020000000000000090c9308e7f000000c0c9308e7f0000000000000000000000c0c9308e7f00000000cb308e7f000003000000000000000000cb308e7f00000040cb308e7f000017000000000000000040cb308e7f00000050cb308e7f00001a000000000000000060cc308e7f00000080cc308e7f000000000000000000000080cc308e7f00000010cf308e7f000002000000000000000010cf308e7f000000d0cf308e7f00002b0000000000000000d0cf308e7f000000f0cf308e7f0000370000000000000000f0cf308e7f00000010d0308e7f000039000000000000002f646174612f75736572732f67636c6179746f6e2f63726173682d666f722d636f72652f612e6f7574002f646174612f75736572732f67636c6179746f6e2f63726173682d666f722d636f72652f612e6f7574002f646174612f75736572732f67636c6179746f6e2f63726173682d666f722d636f72652f612e6f7574002f646174612f75736572732f67636c6179746f6e2f63726173682d666f722d636f72652f612e6f7574002f646174612f75736572732f67636c6179746f6e2f63726173682d666f722d636f72652f612e6f7574002f7573722f6c696236342f6c6962632e736f2e36002f7573722f6c696236342f6c6962632e736f2e36002f7573722f6c696236342f6c6962632e736f2e36002f7573722f6c696236342f6c6962632e736f2e36002f7573722f6c696236342f6c6962632e736f2e36002f7573722f6c696236342f6c69626d2e736f2e36002f7573722f6c696236342f6c69626d2e736f2e36002f7573722f6c696236342f6c69626d2e736f2e36002f7573722f6c696236342f6c69626d2e736f2e36002f7573722f6c696236342f6c69626d2e736f2e36002f7573722f6c696236342f6c6962737464632b2b2e736f2e362e302e3239002f7573722f6c696236342f6c6962737464632b2b2e736f2e362e302e3239002f7573722f6c696236342f6c6962737464632b2b2e736f2e362e302e3239002f7573722f6c696236342f6c6962737464632b2b2e736f2e362e302e3239002f7573722f6c696236342f6c6962737464632b2b2e736f2e362e302e3239002f7573722f6c696236342f6c69626763635f732d31312d32303234303731392e736f2e31002f7573722f6c696236342f6c69626763635f732d31312d32303234303731392e736f2e31002f7573722f6c696236342f6c69626763635f732d31312d32303234303731392e736f2e31002f7573722f6c696236342f6c69626763635f732d31312d32303234303731392e736f2e31002f7573722f6c696236342f6c642d6c696e75782d7838362d36342e736f2e32002f7573722f6c696236342f6c642d6c696e75782d7838362d36342e736f2e32002f7573722f6c696236342f6c642d6c696e75782d7838362d36342e736f2e32002f7573722f6c696236342f6c642d6c696e75782d7838362d36342e736f2e32002f7573722f6c696236342f6c642d6c696e75782d7838362d36342e736f2e320000050000000002000002000000434f5245000000007f0300000000000000000000000000000000000000000000801f0000ffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffff000101474c4942435f50524956415445005f5f5f65000000000000000000000000000061726c795f696e6974007374726e6c65000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006000000880a0000020200004c494e55580000007f0300000000000000000000000000000000000000000000801f0000ffff0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000ffffffffffffffffffffffffff000101474c4942435f50524956415445005f5f5f65000000000000000000000000000061726c795f696e6974007374726e6c650000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e70200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a20200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000800080000000000002400400000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f2f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002e2f612e6f7574005348454c4c3d2f62696e2f6261736800434f4c4f5254455200000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005455555500000000 + Size: 0x1718 + Offset: 0x2a8 + - Type: Fill + Pattern: 7f454c4602010100000000000000000003003e000100000040100000000000004000000000000000b03f00000000000000000000400038000d004000280027000600000004000000400000000000000040000000000000004000000000000000d802000000000000d802000000000000080000000000000003000000040000001803000000000000180300000000000018030000000000001c000000000000001c000000000000000100000000000000010000000400000000000000000000000000000000000000000000000000000018060000000000001806000000000000001000000000000001000000050000000010000000000000001000000000000000100000000000005d010000000000005d0100000000000000100000000000000100000004000000002000000000000000200000000000000020000000000000a400000000000000a40000000000000000100000000000000100000006000000b02d000000000000b03d000000000000b03d0000000000007402000000000000780200000000000000100000000000000200000006000000c82d000000000000c83d000000000000c83d0000000000001002000000000000100200000000000008000000000000000400000004000000380300000000000038030000000000003803000000000000200000000000000020000000000000000800000000000000040000000400000058030000000000005803000000000000580300000000000044000000000000004400000000000000040000000000000053e574640400000038030000000000003803000000000000380300000000000020000000000000002000000000000000080000000000000050e574640400000004200000000000000420000000000000042000000000000024000000000000002400000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e5746404000000b02d000000000000b03d000000000000b03d0000000000005002000000000000500200000000000001000000000000002f6c696236342f6c642d6c696e75782d7838362d36342e736f2e320000000000040000001000000005000000474e5500028000c0040000000300000000000000040000001400000003000000474e55000da654f5bd3a60f5d3cfa88cd0abd565ac497cea040000001000000001000000474e5500000000000300000002000000000000000000000001000000010000000100000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004600000022000000000000000000000000000000000000005500000012000000000000000000000000000000000000001000000020000000000000000000000000000000000000000100000020000000000000000000000000000000000000002c0000002000000000000000000000000000000000000000005f5f676d6f6e5f73746172745f5f005f49544d5f64657265676973746572544d436c6f6e655461626c65005f49544d5f7265676973746572544d436c6f6e655461626c65005f5f6378615f66696e616c697a65005f5f6c6962635f73746172745f6d61696e006c6962737464632b2b2e736f2e36006c69626d2e736f2e36006c69626763635f732e736f2e31006c6962632e736f2e3600474c4942435f322e333400474c4942435f322e322e35000000000200030000000000000000000000010002008e0000001000000000000000b4919606000003009800000010000000751a690900000200a300000000000000b03d00000000000008000000000000002011000000000000b83d0000000000000800000000000000e010000000000000c03d0000000000000800000000000000c03d000000000000d83f00000000000006000000010000000000000000000000e03f00000000000006000000020000000000000000000000e83f00000000000006000000030000000000000000000000f03f00000000000006000000040000000000000000000000f83f0000000000000600000005000000000000000000000018400000000000000700000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + Size: 0x1000 + Offset: 0x2000 + - Type: Fill + Pattern: 01000200011b033b20000000030000001cf0ffff540000003cf0ffff3c0000002cf1ffff7c0000001400000000000000017a5200017810011b0c070890010000140000001c000000f8efffff2600000000440710000000002400000034000000c0efffff20000000000e10460e184a0f0b770880003f1a3b2a332422000000001c0000005c000000a8f0ffff1e00000000410e108602430d06590c0708000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020318864bc550000e0308864bc550000c05d8864bc55000001000000000000006700000000000000010000000000000076000000000000000100000000000000800000000000000001000000000000008e000000000000000c0000000000000000100000000000000d0000000000000050110000000000001900000000000000b03d0000000000001b0000000000000008000000000000001a00000000000000b83d0000000000001c000000000000000800000000000000f5feff6f00000000a0238864bc550000050000000000000050248864bc5500000600000000000000c0238864bc5500000a00000000000000af000000000000000b00000000000000180000000000000015000000000000005803d0308e7f0000030000000000000000608864bc5500000200000000000000180000000000000014000000000000000700000000000000170000000000000000268864bc550000070000000000000040258864bc5500000800000000000000c00000000000000009000000000000001800000000000000fbffff6f000000000000000800000000feffff6f000000001005000000000000ffffff6f000000000100000000000000f0ffff6f0000000000258864bc550000f9ffff6f0000000003000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000401864308e7f000040a662308e7f0000000000000000000000000000000000000000000000000000 + Size: 0x1000 + Offset: 0x3000 + - Type: Fill + Pattern: 7f454c4602010103000000000000000003003e0001000000d0a70200000000004000000000000000b8d426000000000000000000400038000e004000460045000600000004000000400000000000000040000000000000004000000000000000100300000000000010030000000000000800000000000000030000000400000020491c000000000020491c000000000020491c00000000001c000000000000001c00000000000000100000000000000001000000040000000000000000000000000000000000000000000000000000001881020000000000188102000000000000100000000000000100000005000000009002000000000000900200000000000090020000000000ec44170000000000ec441700000000000010000000000000010000000400000000e019000000000000e019000000000000e01900000000009480050000000000948005000000000000100000000000000100000006000000b8681f0000000000b8781f0000000000b8781f0000000000f04f000000000000182701000000000000100000000000000200000006000000a09b1f0000000000a0ab1f0000000000a0ab1f0000000000e001000000000000e0010000000000000800000000000000040000000400000050030000000000005003000000000000500300000000000030000000000000003000000000000000080000000000000004000000040000008003000000000000800300000000000080030000000000004400000000000000440000000000000004000000000000000700000004000000b8681f0000000000b8781f0000000000b8781f000000000028000000000000008800000000000000080000000000000053e574640400000050030000000000005003000000000000500300000000000030000000000000003000000000000000080000000000000050e57464040000003c491c00000000003c491c00000000003c491c000000000014740000000000001474000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e5746404000000b8681f0000000000b8781f0000000000b8781f0000000000483700000000000048370000000000000100000000000000040000002000000005000000474e5500020000c0040000000300000000000000028000c0040000000300000000000000040000001400000003000000474e5500cfccba855fc72f10bc9de0abc5ad605ed8b1aa72040000001000000001000000474e55000000000003000000020000000000000000000000ff03000017000000000100000e0000000030125ca02013018803e690c5458c00c400580007842070c280010d8a1cc0141180c84432082a4088563c2d204eb2482684d08e040c0003020eb1ac3a0f66c080c03300c00c5009208109940b242028240084245a30105880ca444228000680103a4300a8548000495004518a501204002400082020111048c85082b50021c8c6c183a0dbb45802080430500710040290443200205a980a19040226000da0aa101438940850030842e50a0300198438610212106081200005cc24c86004011406800a680e808c4481e8ac10000d202c14081062144120449508d06c639341821024a1008246801302080e5860a20a054c6b08f17181096c48e0203813005d402003302a000760070e4389364000204108e300200b5b1888c430e3c690e2004e81008a014040a02000000008e2031a2082160231100800a92428621010741a62646201020a2018416060a822060250221604b08812626b8a86dca81962631d0a09210800200216223850a144440c02f9c2406820268440b03d10a4a9c8804c91142014200089810c00057c501c2cc236440100a10101440c5007c8ede09dfc2880001015200080044626086c414031818804f680182a302220d048060140c9891100c98092db3a3620922641e08e4f4fc4204e0004000400a501b0cc1170467e0a2c902d9324f200020a810c8053410426c2806971100080144107d528801808a0086222522400020000908a68002180218095e2116809507440a00441158540544072030818568000540d1448248800000514840410084200401496809055080549260794e148020062029604de9f6329010b08574448222c6290c1587122981390a0010008600808a5d01d00900900021411281c8000802a410880010a00600400081c044000e200281839800784c09af3504d0001be8800425710c4f023911c6c15b01024d30ea4088a3698052a888a52982410490a0c2439184290240120f040048315106010006a1079808a008d20004cea08206410335c0081128848ea802200298108404284380d44238c98800134302c8004143700500a08a80a1020005506391020253b40000e700b69004000000846095004c00252040c410001a05890c948ff412c4118803c28e02300e522c0d10105404021421447000d4c910f06030a26b4823081809640188452064010000821284488229086592151118800a00524812e07832828f680d3a2018a306440c1d4a0d3200104012834e9a4c113105044c1320a52010890008960cc80480a61280d8b102330c002740800011880880040a0804303603110a93bb01408a040a18288604a94253c30d6d2a418068e18b888e18f9c00578d080b0248928e98401c500088019c4804928012b1f0144400ec484e0d00881400000004408012088c8f10047170a1456062c5f06114052011800000000141050000400020000696023340b0675420444120034a35410b002841309040c0093222049d460c0408861520236010000310c09a564a41807472997a1861b5800030208c024461021240562092f9a07907212040c1a0b220821142009474d00428c0c945430304a002904224a0811028840439c40914e010d04dc5d892e5c0010120680490140023041c0c149060003385888a0100200012ef48a10994d92781024811915809d22ee50600728005004861106340d312e20100925e0c0300d62a041d08f408023ab100102408a893014d298291600801c900800002000a00354c2002028808000020102200005a00005301c20201003311c2b5ce13801c8170052866846067688800403e1285d10020a2429210004004808458613800b8089d08000141c800aa308a00052040b108a00c6054b4e09841c0b00019a407c488909ad144480330806223808008410852505520420a150001000001102800c084cb02201a0913508200884310d0884a41c070342910ec43239200310324836a0052106001484081a228980048890482670857140008780d12050408cf30543b384217c63ef30b24a8000000a40090763211e71ca14e4049240c010100c20c64c70481c386831903201002182c2febd2a4d3802a746704c408b3ac8000c089c320f00007200ca0144a4243ed183c00390847e9400368c1012bb53ca05859026060a43400086b866b1885a41837a820320de4a6239342706888e08089840044493a11a0441000000220518fb184d0104200001080880462140365c4d086197aa11489aede4f01001411120e801d00a42d002713072a762426740a8c61190c08e1000824362408c1614583a211c3040133100371004282ad84cc1034084906480204ae2440c84002008010220a00c2e26022480011110081044e82594544000702b419750053120c01480624a00000d301004d020000064ce32831200e1d3240b84084c24802908046c08cc7004e2b0a224000812d72200401a0c108500004000391882c020144c40309a24760e140110410ac0462400804183600000004021440390e04495a2018a10110285aa8004071000200a0c6e1ff811015ff411630bf47200e3548db9024a16842c3a144803d2422a0e0400433061121402064080150081110001180839a021a20540501e103124580d2d48408222038000508034a00a21000018bd64141841404312099817b886dc08210090820329810160e1c9d61df63c33624600442333003c124888100926804082341180f1000180152420800501401800806ea1404353c9040e00d40041028011300c20080ff02d4114c0e9300228c0200f0921219040c385300c0c8efd891422004382912407054b4f110000a0888210c2d0220a1168201002208e89c30c060103100341880a8013548170000001d0000000000000020000000260000002b0000002c0000003000000034000000380000003d0000003f00000042000000470000004a0000004e000000000000005000000000000000530000005700000000000000580000005a0000005b0000005c0000005e000000610000006500000000000000680000006d00000070000000720000007300000074000000780000007a0000007c0000007f0000008000000083000000880000008b0000008d000000910000009400000095000000970000009b0000009c000000a3000000a5000000aa000000ad000000b100000000000000b4000000b7000000ba000000bb000000bd000000bf000000c4000000c7000000ca000000cc00000000000000d0000000d4000000d9000000dc000000e1000000e3000000e5000000e9000000ee000000f1000000f5000000f7000000f9000000fa000000fc000000fe000000ff00000003010000070100000b0100000d010000110100001501000018010000000000001b0100002001000022010000230100002601000028010000290100002b0100002c0100002e0100003101000035010000370100003e01000041010000490100004b0100004f0100005201000055010000570100005a0100005d0100005e01000062010000650100006c0100006e0100007301000075010000780100007c0100007f0100008001000083010000840100008701000000000000890100008e0100009201000095010000990100009a0100009d0100009f010000a1010000a5010000a7010000a8010000ac010000b201000000000000b6010000ba010000bb010000bc01000000000000bd010000bf010000c0010000c3010000c4010000c7010000cb010000cf010000d4010000d7010000d9010000dd010000e2010000e6010000e8010000eb010000ee010000f1010000f4010000f8010000fc010000ff0100000202000004020000070200000b0200000e020000110200001502000019020000000000001b0200001c0200001f02000022020000000000002302000025020000260200002702000000000000290200002c020000300200003302000035020000360200003a0200003d0200003f0200004102000000000000470200004c0200004e02000052020000590200005a0200005e0200006102000065020000000000006f020000000000007102000074020000750200007702000078020000000000007d0200008302000084020000870200008c0200008e0200009202000096020000980200009a0200009b0200009e020000a4020000a9020000ad020000af020000b5020000b9020000bb020000bc020000bd02000000000000be020000c1020000c6020000c7020000cd02000000000000cf020000d0020000d1020000d3020000d5020000d7020000d9020000da020000df020000e4020000e7020000eb020000ed020000f2020000f8020000fa020000fc020000 + Size: 0x1000 + Offset: 0x4000 + - Type: Fill + Pattern: 7f454c4602010103000000000000000003003e000100000050d4000000000000400000000000000040e60d000000000000000000400038000b00400020001f000100000004000000000000000000000000000000000000000000000000000000f0c3000000000000f0c30000000000000010000000000000010000000500000000d000000000000000d000000000000000d0000000000000e516070000000000e5160700000000000010000000000000010000000400000000f007000000000000f007000000000000f007000000000030b905000000000030b90500000000000010000000000000010000000600000068ad0d000000000068bd0d000000000068bd0d0000000000bc03000000000000c8030000000000000010000000000000020000000600000080ad0d000000000080bd0d000000000080bd0d00000000003002000000000000300200000000000008000000000000000400000004000000a802000000000000a802000000000000a8020000000000003000000000000000300000000000000008000000000000000400000004000000d802000000000000d802000000000000d80200000000000044000000000000004400000000000000040000000000000053e5746404000000a802000000000000a802000000000000a80200000000000030000000000000003000000000000000080000000000000050e574640400000070ee0c000000000070ee0c000000000070ee0c000000000064190000000000006419000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e574640400000068ad0d000000000068bd0d000000000068bd0d0000000000980200000000000098020000000000000100000000000000040000002000000005000000474e5500020000c0040000000300000000000000028000c0040000000300000000000000040000001400000003000000474e550025c2a650e3e6c2f325c8ad803dbb58b313899c93040000001000000001000000474e55000000000003000000020000000000000000000000fa03000011000000800000000d000000a5400040414810042254044e020f65004ae88022007108ca4860008000364a40148426105041dfc050010209001068821408009212008028aa0021408a6018000223004520102802102409660551068a110000000421124005082030d44a1a20d00008120184694a47890c1080482000602890140a05120202880006000802404008c3182284054130450420882309004042104083214100046000084a010994018080000030483202080801082040100121a80d0a0020040200c85322e00883002000404000100080808291002104490208120000290052080482000020080180640200200012320100220040040156000200815208a61c02a080004220288610a814631d88e91100c010840441180800000004000000081000104820504014804001140201100400410000820182e0a2006098868609a0810070041141e18a26d46001013000a320a0001052042004ad080800024718267000800440912c2f13421421880801244040104018a042106022000014004200020034201086001130024200040d20820482a01d12d5188400064a9720000080800101504a25900ad0425090296869d8f24bc25201250005161824001322095c802c003882144104018022220201080003239a158002001002418120080260824080890800801900000100418804180000021100159420503808206f964e434282300012811321588410801140a4274208a3004a002404024204911152a02c5002c741202000a8444882138c4844de5041650a1c0e223d800005618c202044c351c00e020641f20810804008080100501040063804041510c50902084200000905000108000000004800600c800e2002240a20a5000c101828012020000104301600008800c1008c1ef0208000c813804000800000000a00020a1020040180444303f4264400208501008116009002050401a220050008837140a081ca102024600002200800001d42002e4c04000070026102000aa501841026a080295795e1088026471809a10e00828010a01a4809f0200590d61125801250142b7a00108200b0200088402480104a05905b40609040180124066204188261000c488000a208c861d400fc41a091160180012200c0f810d418000014884000408e8686004080000c108b018a48b5e8208e8982114010fe201021861883010000802246718060002ab8100e4100014210b0240002910082408588202e000b09020012108c01488415282c8adc0020224d09904210262c66250c2008a000485e4d2000a8c280432820400892040210a0d010682009001000000208515400108050490800000002800423020e180c104012880420002e221580020c00080065c5151084d4101000014800120649c80518404326200801001015009140a040a0100e4010408000918810108242408200000000110000000000000000000000130000001500000000000000160000000000000000000000170000001a000000000000001b0000001c0000001e0000001f0000002000000000000000210000000000000023000000260000000000000027000000280000002b0000002c0000002d0000002e000000300000000000000000000000310000003400000000000000360000000000000037000000380000003b000000000000003c00000000000000000000003d0000003e00000000000000400000000000000044000000450000004700000000000000480000004a0000004b0000004c000000000000004d0000004e00000000000000000000000000000050000000000000005200000000000000540000000000000055000000000000005600000000000000570000005800000000000000000000005a0000005b000000000000005e0000000000000000000000610000006200000000000000660000000000000000000000000000000000000069000000000000006c0000006e0000006f0000007000000072000000730000007400000075000000780000007a000000000000007c000000000000007e000000000000007f0000000000000080000000000000008200000085000000000000000000000000000000000000000000000000000000860000000000000087000000000000000000000088000000890000008b0000008c000000000000008e00000000000000000000009000000091000000940000009500000098000000990000009b000000000000009c00000000000000000000009e0000009f000000a0000000a1000000a300000000000000a6000000a800000000000000a9000000000000000000000000000000ac0000000000000000000000ad0000000000000000000000ae0000000000000000000000af0000000000000000000000b1000000000000000000000000000000b2000000b4000000b5000000000000000000000000000000b600000000000000b8000000ba00000000000000bc000000bd00000000000000bf00000000000000c200000000000000c7000000ca000000cc000000cd000000ce000000d1000000d300000000000000d4000000d600000000000000d700000000000000da000000db000000dc000000df000000e0000000e2000000e3000000e400000000000000e50000000000000000000000e700000000000000e8000000000000000000000000000000e900000000000000ea0000000000000000000000eb000000ec000000ed000000f0000000f100000000000000f2000000f4000000f6000000f700000000000000f9000000fa000000fb000000fd00000000000000000000000000000000000000fe000000ff000000000000000201000003010000000000000401000005010000080100000901000000000000000000000c0100000d010000000000000e0100000f010000100100001201000015010000170100001a0100001c0100001d0100001f010000220100000000000023010000250100000000000028010000000000002a0100002b0100002d01000000000000000000003001000000000000330100000000000034010000350100000000000037010000380100003b0100003c0100003d0100003e010000400100004101000000000000000000000000000000000000430100004801000049010000000000004a010000000000004b0100004e0100004f01000000000000500100000000000051010000000000005301000054010000550100005601000057010000000000000000000058010000000000000000000000000000000000005a0100005b010000000000005c0100005d0100005f01000060010000610100006301000065010000000000000000000000000000670100006a0100006c010000000000006e010000000000006f01000071010000720100000000000000000000730100000000000074010000000000007601000077010000000000000000000078010000000000007a0100007b0100007e0100007f0100008001000082010000000000008501000086010000880100008a0100008d0100008e0100009001000092010000000000009401000095010000960100009701000098010000990100009a0100009c0100000000000000000000000000009e01000000000000a1010000a4010000a901000000000000ab010000ae010000b0010000b1010000b201000000000000b3010000b4010000b501000000000000b6010000ba0100000000000000000000bc010000bd01000000000000c0010000c1010000c5010000c8010000cc010000cd010000ce01000000000000d00100000000000000000000d101000000000000d201000000000000d4010000d5010000d6010000d7010000d8010000da01000000000000db010000dc010000dd010000df01000000000000e101000000000000e2010000e4010000000000000000000000000000e7010000e8010000e9010000eb0100000000000000000000ec0100000000000000000000ed01000000000000ee010000ef01000000000000f2010000f30100000000000000000000f501000000000000f601000000000000f8010000f90100000000000000000000fc010000fd010000fe0100000000000000000000ff010000000000000000000000000000000200000202000004020000060200000000000007020000080200000a0200000b0200000d0200001102000000000000130200000000000000000000000000001402000000000000000000001602000017020000180200001a0200001b0200001d02000000000000000000001e0200002102000000000000220200002302000026020000270200000000000000000000000000000000000028020000290200002a020000000000002b0200002c0200002e0200002f02000000000000300200003102000000000000320200003302000034020000360200003702000000000000390200003a020000000000003d0200003e0200004102000043020000000000000000000045020000460200004702000048020000000000004a0200004c020000000000004e020000000000000000000000000000000000004f020000500200000000000051020000 + Size: 0x1000 + Offset: 0x5000 + - Type: Fill + Pattern: 7f454c4602010103000000000000000003003e0001000000d0130a00000000004000000000000000d84823000000000000000000400038000c004000220021000100000004000000000000000000000000000000000000000000000000000000a88c090000000000a88c0900000000000010000000000000010000000500000000900900000000000090090000000000009009000000000051b510000000000051b51000000000000010000000000000010000000400000000501a000000000000501a000000000000501a00000000008e250700000000008e250700000000000010000000000000010000000600000008772100000000000887210000000000088721000000000088ca000000000000f80001000000000000100000000000000200000006000000300b220000000000301b220000000000301b2200000000004002000000000000400200000000000008000000000000000400000004000000e002000000000000e002000000000000e0020000000000002000000000000000200000000000000008000000000000000400000004000000000300000000000000030000000000000003000000000000240000000000000024000000000000000400000000000000070000000400000008772100000000000887210000000000088721000000000000000000000000002000000000000000080000000000000053e5746404000000e002000000000000e002000000000000e00200000000000020000000000000002000000000000000080000000000000050e57464040000008c3a1d00000000008c3a1d00000000008c3a1d0000000000bc99000000000000bc99000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e5746404000000087721000000000008872100000000000887210000000000f8c8000000000000f8c80000000000000100000000000000040000001000000005000000474e5500020000c0040000000300000000000000040000001400000003000000474e55000c8999cca62e9b9f0075566ecb23d564443ed68f00000000fc070000b8000000000200000f000000049047108350e39806160145a2208a104020210004e20001288c241320069028062c882502005a1000100e21810812825f40c05478a9b450c2056b90e1201530180021a10062c5205605b944900b0909243d4702020911a788802db89de886794494c480fb006c202fac43603690802832122820024258ce2084c20210824180a1486908221400402308690010014c0115a0484403a012804802001040800b20e20e2588100041408604022b78f536c081d333840012042d2420004858804c14000820041000060605204688405c0620401568c4702000e9a805015032f6ffc7df5f188b868412a08074e6b8a4555b23209051c024f0a7a021688888010380c10803000400e40890a510ca1448776e3804200584715231430040000201f1220f60326622c80488c8b081e22000c14900068845d0024051a59482000308044040c52800c790688011222602529090c8c10a5e49093619012d340ac120869030001984090650080000a3169003c2201000504014e00832014c1408484408783803723686998048d49b4910a281124060a2000fca31d4b5562c02c04dc362400ca810d03b084500c60438005180854812445514220400c0059900022000100401ca190001000148c150d0cdd0723240493dc6700084008c700006189214ac015940404000404068000039a3092c908800800535088c641a6c840010ca442b10801b084a2c0663206850033420456d28448e89100d0a113158c2888f125140d0e260890015874050a050c1a2110df5b043a8261196118a5809821fb624800290c000a2942d100005b19e11880231a24452a140290481101800080e1c0298429880a4804c40008493b200183612258f04640720122c609251231d06181f71400c80020410039200108e14b1501a0c448d401800486c0600812808157210242595a8801003210400001a31e440a121000a20889e18408c0413a6823349054b2884888a7371041b00d5400010d0901539a297a01000800180608545a054e0042d068ca78a1005a2441400874aaa488281a7aa43dd4c7327af11c0060a8064a301492a414040162640449481c4830aeed9606106444b386624224a7d06102c240400502302512d65a1024b91221c83004e42907621088150209859a40bd44b888b2e3994a0ec045d092b0de8af1920e43de0108c60442a100400a010a11a870caa403827a28d48086a0c1801183014205d25036442230000c0202525305092642809510112022124a481d99828008a48310754870142227c6108008a01021c00965221b80e032e435a43b0a2d018450011810ae45dad5452631902508881012163000a101b0844a880a5387228014703c801b0000a02820842515d48626c08840720421ba9813410220008130ba0808330082a5337d00a1c064910a56100825ea25a220b0465312807118162232361a751043d0905060154580431e400000e00ba2820840990790048c445428f0f0080023d4ec8f1d5c2542430488a50484e201ef0c11043200110200b52102f108801300a5200c500600248050602921000220c2000082df25010170cb91aaa4b40d8486fc3f95014042242a0000040141290213a029e8380b884ad5ca422e40064a19c0228830b009e9642100d104606bc97219010400e1a05080b010a0189b404ad128de0011001484a051900622fc040918a581858424a81809e2663684404b620052a36e994c11d0810244d0b0c00002004025020215012145404816c009213608a54000261a017b2503618411288ae19a50080080a16014e0e340311b50da0130048e0ad420fc45704c82004841c842a23080620e358529c0c19022200c10d12651c2d088c49c5e5201a8e231302fc1f5dc018c100402908c0c0009000240449900200008804c034a014108388253d0210101008042008224290c81026046180f08494317b8014360929000cb3443580b462c028965fc41b463b0429035dc02341809145c5a0a082c11f20000aa4d8fbca0da1d8023283218012c8710600200c801000541a13010102828301451097041a260a0031584f80dd461112b4313524a3f2151004009190080040940620f00a098850841d062a48d50842827400c7904e221101440dc600e238123c31490b68620b42c100a87c2400486141009084480092172aa010000112148103004544004c8211c843a00b0a0c202142145431444f40e9ffbe7f0400800126820a0614fdee213e23220490404047c8005a70cea9327ceaa9101a8b418400880855020832034c0200406846510a2231000aa4804826892d4074200182001802008008400000440400f16dc02120b2146c0006e50c00c8e58993bc07ac40200467a850402344e1a2092808640e200a1002004be802f755480a142260444c4042200340102086102204b906aa3e550318ed000cd06ea4c0032a108840c22a009c8621540b600002020004ce030800600a2002a20d1548231d800800c15104480e00c026748041ec088e10fc44a140920b04029010c0219800116a836981b0ae4640c0800120800a00039067ed0a16144283010800050a2051329239a4dd54321210dce20834d0b217eb1208612a0d2202127826f21d84281c38e008211132dd2085a2220c0209028040400142e808949209d424450800707850a808ec94b603e0a2392086420885b512212854878442828481ba57c707305b2b144510828890400045820029c118022401812120312300090a030000100544090c500625052090c3101601a20040408482240e208140219878100a0a0ec82407281eb9c3114f92e61617ecb98508011c3e870ca401488d4080c00268a20122200528080021692021d020c1ac568b05020c1580009014402400000043080368336048b020e6924b80099173b4a092c6e8400221012020404d1590c85840808762821300c24200d8156800000480003b210ac520549d4fe60070730e04c9e0040c710a78a11708054b1000011300140b002411f4040c021040ba345cce0dc09169505802410007cc264271500135330c0202c86289080ab001518000501123208104c002522b01803622836000205960108d1190ae4c95123720408a002800008654088211281c1000c882003a86f24a3710020208422254621c4146822264154426b59c7e428ca21c680112d04201af004022004d631c684c09740222cc0a1030282a01d3003804820001028080c0e201acb82408b45c3a49d712148300112000084500b01081310508886402cb0142869311e0c2001400101088e08a0287c2a350a38a030cc01810262ac384369200720010021604b000400a8241184f6312184141a0186a10182828081c060d1400525079a222e40802081210bc705614d2d96a4c4600c04048c0041408600d95483b8823a629c74b01894010490025210080020800a0221832105a514218c8c330c010a142e40301030b204e1081be85b4046680101004910080419e9503275428654c00502d94e4180a4837c8841e59c03c68c89914805292190118b340502815a31012081304c0400409805810210050a531313a5075c688b110441821003109e2a0304c68b2441110a1060c2d2911022c280041988206009210189225c40300018ed3226090062801800202012a0040025002140ec461742114140a0024b0080aa4260001080584807910f5ce20c070a640668a8c8c90d120358904205816598f8f5473a3992a79a38118884aa02a10490083000008401000a50a5192953a84618cdaa01c240208c49008200d95810a1015799080042080282868002500d0022110ce002473842e807f873c69401c4c00800218d00038a80742405580808224820020d99c0634352664517089ca0101a13190a285c5801800b1825ae00e0110ad3cca42a1013c12c210386000113853465cff833105407d198200000002000009296de0039c2c02c2c2e623830f002479e008046963a161080100000941a42144406040001b811884982463507a011c2e412be1173cd05073940201f01106e20583a4d5100120c1022004100c5180b10945407c014f5010081150202800802186100d188488e50323086803820b9002080ab10802cca42a4d84498c41be1d310262800b001040005144000288a1280482160064018534091020ba800040418882425d00244849011714004000088e0a081c40108645043c50feffdc0329079c82080a0c8001fc3510d310007a009a0440091c0022961484008000b4880e080088eb50d8680a40229188480920293526a900482c94b7148a9b30004810484c80185564b08045083c0e00042810031000d9213780a850293022105110d1b50c1a7188ac3b215c8422ffbfefde201aea42b214c860801085800c713a1c100a1090fffffe008a038569959811080920166014c0ff3f7b300e918008800007340284018519d399294086b4982002032009412140e08426f87dc82f0ae1c11c9b082609d270944044c04cb01180802c1a0c0c02f64c22101c461c89617f3068d11492471802014140068e0aea0040208473447c1c048122c80d238d4041040010008400c495c000d0897005 + Size: 0x1000 + Offset: 0x6000 + - Type: Fill + Pattern: 7f454c4602010100000000000000000003003e000100000050360000000000004000000000000000f8be01000000000000000000400038000b0040001f001e000100000004000000000000000000000000000000000000000000000000000000902a000000000000902a00000000000000100000000000000100000005000000003000000000000000300000000000000030000000000000a533010000000000a533010000000000001000000000000001000000040000000070010000000000007001000000000000700100000000006430000000000000643000000000000000100000000000000100000006000000f0ab010000000000f0bb010000000000f0bb0100000000001004000000000000d8050000000000000010000000000000020000000600000018ac01000000000018bc01000000000018bc0100000000001002000000000000100200000000000008000000000000000400000004000000a802000000000000a802000000000000a8020000000000002000000000000000200000000000000008000000000000000400000004000000c802000000000000c802000000000000c80200000000000024000000000000002400000000000000040000000000000053e5746404000000a802000000000000a802000000000000a80200000000000020000000000000002000000000000000080000000000000050e5746404000000487a010000000000487a010000000000487a010000000000bc06000000000000bc06000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e5746404000000f0ab010000000000f0bb010000000000f0bb010000000000100400000000000010040000000000000100000000000000040000001000000005000000474e5500020000c0040000000300000000000000040000001400000003000000474e5500a29b0cf0634decd576d636cf5b9b6412afeb4fac000000008300000018000000100000000a000000131c0318042400010181005404068810808400080008058002004010441000b60280d020c1009082740100040828c00248010b20d0010228133504606cd2002103706040d280086000400100800100001224000811020849c00124091128202008430ba6824809080803440c0a1c124fc601870b618496cd04c2c50ca2db0ecc18000000190000001c000000000000001f000000000000002100000022000000240000002700000029000000000000002a0000002c0000002d0000003000000032000000350000003600000039000000000000003a0000003b0000003f000000400000004200000043000000440000004500000046000000480000000000000049000000000000004a00000000000000000000004b0000004c0000004e0000004f000000500000000000000000000000520000005400000000000000000000005500000057000000000000005800000000000000000000005a0000005b0000005c00000000000000000000005e0000005f000000000000006000000000000000000000006200000063000000000000000000000065000000000000006600000000000000000000000000000000000000670000000000000000000000680000006a0000006b0000006d00000000000000700000000000000072000000730000007400000079000000000000007c0000007d0000007e000000000000000000000000000000000000007f00000080000000000000008300000000000000850000008700000088000000000000008b0000008d0000008e0000008f0000009000000091000000920000009400000095000000960000009a0000009c0000009d000000000000009e000000a0000000000000000000000000000000a6000000a9000000aa000000ac000000ad00000057eb14b8f2a732e440f0117ab39251bffebf141276a832e497679808e27961fee1b355b17370f0a986c8f8f5e57961fee67961fec25b21f4c1316517d29f21f45be3f3a86b27f4a87ae761174f705049e96b81c198ffce39ea43c2bf8791a71cfa87c2bffb87c2bf44d999798891a71c6d6bf3c03f5ac4914e9ec4914e9ec4912b7fd3a73bc3d3a76bd908f8a03648498efc5049e49f85b185e2ef1c09bbb91c90239891273bcd59fde8f1380192c54f9d75f1fb373dcd59b206e181618f62ae71d362aea563198251a2b79c012cf0a91070f0a91170f0a983a1b91c87ab14b896ef14b8ddb986f078b57cf15b00bccd159ef291fe98c2bf096bf3c0b141afb652afc491a1fa50492b105f3bb1fc504934fd5049f9f485b1c76e793bfd219291f0fbb89049fb690dab6ccf595aadc6f38b28efddbb6ecf591581f0a9d3623d85c0a51682399d8ca2c5a5b91c802362df0f7a86f01ebe86f0c088c2bf5ba4aa740cce2566835fd8a711870176fb430ef24221367a4221367a0a880ef20099b91cfd9228f9921b32e400321782116c0e82f35fe0a701b48890cfff1412dbd6cb50e2e129e4d2a732e4919b21f46aafbf796bafbf799abf4d7b458da71c334364a8d670f0a9ba83c2bf438764a8d211aaf0259a34df0d9ac491392c13d9497013d9bb75b779c7aecc59c8b6b91cb5df85b15f88c2bf771905d2929d6bdaa82a0e13d6b0cc5903233d85da97946513673d85113017823d926a83ec3576c2b3e361df20321782ee291a7ac22762dfa6453f8d1675c459ef291a7aeea32faa2677c459173dcd59bd219291ce6bf0a9e3a532e4b1fbb8909acb815de1561a9a000000000000000000000000000000000000000000000000700600001200000000000000000000000000000000000000b00000001200000000000000000000000000000000000000100000002000000000000000000000000000000000000000780700002200000000000000000000000000000000000000240800002200000000000000000000000000000000000000660700002200000000000000000000000000000000000000b205000012000000000000000000000000000000000000003a0600002200000000000000000000000000000000000000cc0700001200000000000000000000000000000000000000380600002200000000000000000000000000000000000000e807000012000000000000000000000000000000000000000100000020000000000000000000000000000000000000005a06000012000000000000000000000000000000000000008f0700002200000000000000000000000000000000000000610600001200000000000000000000000000000000000000560700001200000000000000000000000000000000000000ef07000012000000000000000000000000000000000000006806000012000000000000000000000000000000000000002c00000020000000000000000000000000000000000000004d0600002200000000000000000000000000000000000000460000002200000000000000000000000000000000000000a407000022000000000000000000000000000000000000001008000022000000000000000000000000000000000000001001000012000e00004b00000000000012000000000000000304000012000e00d0c2000000000000e5020000000000007204000012000e00c0dd0000000000003c02000000000000ef02000012000e00c06c00000000000096010000000000000b02000012000e0020620000000000000b000000000000009802000012000e00006b00000000000045000000000000007303000012000e0060b2000000000000b901000000000000500800001100f1ff00000000000000000000000000000000d307000012000e00e0610100000000007b010000000000004f03000012000e00f087000000000000330c0000000000008e00000012000e0060490000000000002200000000000000580800001100f1ff000000000000000000000000000000006a0800001100f1ff00000000000000000000000000000000ca00000012000e00204a00000000000013000000000000007b00000012000e00e0480000000000004700000000000000de00000012000e00604a00000000000017000000000000009401000012000e00b04e0000000000003a00000000000000a001000012000e00f04e00000000000040000000000000007100000012000e00904800000000000047000000000000009503000012000e0010b6000000000000a000000000000000e505000012000e001027010000000000da010000000000002606000012000e00402a010000000000f400000000000000d301000012000e00e052000000000000cd02000000000000720800001100f1ff000000000000000000000000000000009b04000012000e0090e9000000000000a8070000000000009b04000012000e0090e9000000000000a8070000000000008204000012000e0000e000000000000011020000000000007c0800001100f1ff000000000000000000000000000000005e00000012000e0020480000000000001900000000000000b601000012000e00804f0000000000004d00000000000000a404000012000e0040f10000000000008900000000000000a404000012000e0040f100000000000089000000000000005401000012000e00b04d0000000000000c000000000000005d01000012000e00c04d0000000000001e000000000000000403000012000e005070000000000000f800000000000000c603000012000e0000ba000000000000e700000000000000a602000012000e00806600000000000030010000000000004804000012000e0050d00000000000001305000000000000b80800001100f1ff00000000000000000000000000000000ae0800001100f1ff000000000000000000000000000000001104000012000e00c0c5000000000000cc01000000000000b903000012000e00c0b80000000000003b010000000000001e05000012000e00f0f40000000000000c000000000000005505000012000e0010f50000000000002200000000000000fe04000012000e00c0f40000000000001c00000000000000ea03000012000e00c0bd000000000000a8010000000000000d07000012000e00604f0100000000009b000000000000004201000012000e00604d00000000000017000000000000004b01000012000e00804d00000000000022000000000000008102000012000e00f0630000000000003d000000000000008405000012000e0040f50000000000000c00000000000000ee01000012000e00505c00000000000097020000000000009204000012000e0020e200000000000061070000000000009204000012000e0020e20000000000006107000000000000900800001100f1ff000000000000000000000000000000000601000012000e00e04a00000000000014000000000000001a01000012000e00204b000000000000b201000000000000b600000012000e00c0490000000000001600000000000000f606000012000e00404f0100000000001e000000000000006a03000012000e00d09f00000000000087120000000000002d04000012000e00a0c9000000000000ec01000000000000dc01000012000e00b0550000000000001e040000000000006103000012000e00 + Size: 0x1000 + Offset: 0x7000 + - Type: Fill + Pattern: 0040cc308e7f0000b543cc308e7f00002044cc308e7f0000e0e8cb308e7f00002005d0308e7f000000e0cb308e7f0000000000000000000090e5cb308e7f000000000000000000000000000000000000000000000000000000000000000000009044cc308e7f00006044cc308e7f00004044cc308e7f00000000000000000000000000000000000000000000000000007044cc308e7f00005044cc308e7f0000000000000000000000000000000000002044cc308e7f000000000000000000003044cc308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003044cc308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c044cc308e7f0000b044cc308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a044cc308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000008044cc308e7f00004040cc308e7f0000000000000000000004000c000000000028e0cb308e7f0000010000000000000088e5cb308e7f00000000000000000000000000000000000090fecb308e7f00000300000002000000030000001a0000009841cc308e7f0000b841cc308e7f0000bc41cc308e7f0000000000000500a00000000000000000000000000000000000000000000000000000000000000000000000000000000000d043cc308e7f000000000000000000000040cc308e7f00003650cc308e7f00003650cc308e7f0000e007d0308e7f0000c0e2cb308e7f000000000000000000000000000000000000040000000000000070e3cb308e7f0000c0e2cb308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f0000000000000000000000e0cb308e7f000000000000000000000000000000000000b543cc308e7f0000000000000000000001000000000000000000000000000000144dcf308e7f000009000000030000000a4dcf308e7f00000900000002000000004dcf308e7f0000090000000100000010e7cb308e7f0000170000000000000027e7cb308e7f000017000000000000003ee7cb308e7f0000170000000000000055e7cb308e7f00001c0000000000000055e7cb308e7f0000150000000000000071e7cb308e7f0000130000000000000071e7cb308e7f00000c0000000000000084e7cb308e7f0000140000000000000084e7cb308e7f00000d0000000000000098e7cb308e7f00000b0000000000000098e7cb308e7f0000040000000000000059e7cb308e7f0000180000000000000059e7cb308e7f0000110000000000000075e7cb308e7f00000f0000000000000075e7cb308e7f0000080000000000000088e7cb308e7f0000100000000000000088e7cb308e7f000009000000000000009ce7cb308e7f000007000000000000009ce7cb308e7f00000000000000000000676c6962632d6877636170732f7838362d36342d76342f676c6962632d6877636170732f7838362d36342d76332f676c6962632d6877636170732f7838362d36342d76322f746c732f68617377656c6c2f6176783531325f312f7838365f36342f746c732f68617377656c6c2f7838365f36342f746c732f6176783531325f312f7838365f36342f746c732f7838365f36342f00000000000000000000000000d0e7cb308e7f000048e8cb308e7f00000000000000000000000000000000000048e8cb308e7f0000c52dcf308e7f000000000000000000001010cf308e7f0000070000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c52dcf308e7f000000000000000000001810cf308e7f00000b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002f6c696236342f6c6962737464632b2b2e736f2e3600000000000000000000000000a0308e7f0000c0e8cb308e7f0000301bc2308e7f0000e0edcb308e7f000000e0cb308e7f0000e0e8cb308e7f0000000000000000000070edcb308e7f00000000000000000000601bc2308e7f0000401cc2308e7f0000301cc2308e7f00000000000000000000f01bc2308e7f0000001cc2308e7f0000701cc2308e7f0000801cc2308e7f0000901cc2308e7f0000101cc2308e7f0000201cc2308e7f0000801bc2308e7f0000901bc2308e7f0000701bc2308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000501cc2308e7f000000000000000000000000000000000000601cc2308e7f0000d01cc2308e7f0000a01bc2308e7f0000c01bc2308e7f0000b01bc2308e7f0000d01bc2308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f01cc2308e7f0000e01cc2308e7f0000b01cc2308e7f0000a01cc2308e7f0000d01cc2308e7f00000000000000000000101dc2308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001dc2308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e01bc2308e7f00004000a0308e7f0000d013aa308e7f00000c002400000000000000000000000000000000000000000068edcb308e7f000000000000000000002005d0308e7f00000070c9308e7f000041000000fc070000ff0100000f0000003803a0308e7f00003813a0308e7f00004830a0308e7f0000000000001d01c40000020000030000000000000000000000000000000000000000000000000000000000000000000000366ba7308e7f0000a0edcb308e7f00000000a0308e7f00000088c2308e7f00000050ba308e7f0000e007d0308e7f0000000000000000000000000000000000000000000000000000040000000000000050eccb308e7f0000a0ebcb308e7f000000000000000000001f000000000000003c9c0c1f0000000000000000000000000000000000000000c0fccb308e7f000000000000000000000000000001000000000000000100000000000000000000000000000000000000000000000000000000000000000000002830a2308e7f00000100000000000000e0e8cb308e7f00002830a2308e7f00000887c1308e7f000000000000000000002000000000000000080000000000000000000000000000002000000000000000010000000000000000000000000000000887210000000000f8c80000000000000300000000000000000000000000000088edcb308e7f0000000000000000000001000000000000006c6962737464632b2b2e736f2e36000000000000000000002f6c69623634006c6962737464632b2b2e736f2e3600000000000000000000002f6c696236342f6c69626d2e736f2e3600000000000000000000000000000000003092308e7f0000c0edcb308e7f000080ed9f308e7f0000e0f2cb308e7f0000e0e8cb308e7f0000e0edcb308e7f0000000000000000000070f2cb308e7f0000000000000000000090ed9f308e7f000080ee9f308e7f000070ee9f308e7f000010ee9f308e7f000030ee9f308e7f000040ee9f308e7f0000b0ee9f308e7f0000c0ee9f308e7f0000d0ee9f308e7f000050ee9f308e7f000060ee9f308e7f0000b0ed9f308e7f0000c0ed9f308e7f0000a0ed9f308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000090ee9f308e7f000000000000000000000000000000000000a0ee9f308e7f000010ef9f308e7f0000d0ed9f308e7f0000f0ed9f308e7f0000e0ed9f308e7f000000ee9f308e7f0000000000000000000000ef9f308e7f0000000000000000000000000000000000000000000000000000000000000000000030ef9f308e7f000020ef9f308e7f0000f0ee9f308e7f0000e0ee9f308e7f000010ef9f308e7f0000000000000000000050ef9f308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040ef9f308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000020ee9f308e7f0000403092308e7f0000500493308e7f00000b002300000000000000000000000000000000000000000068f2cb308e7f000000000000000000002005d0308e7f00002076c9308e7f000013000000fa0300007f0000000d000000303392308e7f0000303792308e7f0000d44692308e7f0000000000001d0184000002000003000000030000000000000000000000000000000000000000000000000000000000000096e492308e7f0000a0f2cb308e7f0000003092308e7f000030f19f308e7f000000209a308e7f0000e007d0308e7f0000000000000000000000000000000000000000000000000000040000000000000050f1cb308e7f0000a0f0cb308e7f000000000000000000001f000000000000006951e467000000000000000000000000000000000000000020fdcb308e7f00000000000000000000000000000100000000000000010000001800000000000000000000000000000000000000000000000000000000000000b85992308e7f00000100000000000000e0f7cb308e7f0000f05f60308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000068bd0d000000000098020000000000000400000000000000000000000000000088f2cb308e7f0000000000000000000001000000000000006c69626d2e736f2e360000000000000000000000000000002f6c69623634006c69626d2e736f2e36000000000000000000000000000000002f6c696236342f6c69626763635f732e736f2e310000000000000000000000000090c9308e7f0000c0f2cb308e7f0000184ccb308e7f0000e0f7cb308e7f0000e0edcb308e7f0000e0f2cb308e7f0000000000000000000070f7cb308e7f00000000000000000000184ccb308e7f0000f84ccb308e7f0000e84ccb308e7f00000000000000000000a84ccb308e7f0000b84ccb308e7f0000284dcb308e7f0000384dcb308e7f0000484dcb308e7f0000c84ccb308e7f0000d84ccb308e7f0000384ccb308e7f0000484ccb308e7f0000284ccb308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000084dcb308e7f000000000000000000000000000000000000184dcb308e7f0000884dcb308e7f0000584ccb308e7f0000784ccb308e7f0000684ccb308e7f0000884ccb308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000a84dcb308e7f0000984dcb308e7f0000684dcb308e7f0000584dcb308e7f0000884dcb308e7f00000000000000000000c84dcb308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b84dcb308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000984ccb308e7f00004090c9308e7f000050c6c9308e7f00000b002100000000000000000000000000000000000000000068f7cb308e7f000000000000000000002005d0308e7f0000f077c9308e7f000013000000830000000f0000000a0000000093c9308e7f00008093c9308e7f00002c95c9308e7f0000000000001d018400000200000300000000000000000000000000000000000000000000000000000000000000000000004cb1c9308e7f0000a0f7cb308e7f00000090c9308e7f0000c851cb308e7f00000000cb308e7f0000e007d0308e7f0000000000000000000000000000000000000000000000000000040000000000000050f6cb308e7f0000a0f5cb308e7f000000000000000000001f00000000000000309c0c1f000000000000000000000000000000000000000060fdcb308e7f00000000000000000000000000000100000000000000010000000000000000000000000000000000000000000000000000000000000000000000d89ec9308e7f00000100000000000000e0f2cb308e7f0000d89ec9308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f0bb01000000000010040000000000000500000000000000000000000000000088f7cb308e7f0000000000000000000001000000000000006c69626763635f732e736f2e3100000000000000000000002f6c69623634006c69626763635f732e736f2e310000000000000000000000002f6c696236342f6c6962632e736f2e3600000000000000000000000000000000000060308e7f0000c0f7cb308e7f0000a0ab7f308e7f000030facf308e7f0000e0f2cb308e7f0000e0f7cb308e7f0000000000000000000070fccb308e7f00000000000000000000a0ab7f308e7f000050ac7f308e7f000040ac7f308e7f0000e0ab7f308e7f000000ac7f308e7f000010ac7f308e7f000080ac7f308e7f000090ac7f308e7f0000a0ac7f308e7f000020ac7f308e7f000030ac7f308e7f000000000000000000000000000000000000b0ab7f308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000060ac7f308e7f00000000000000000000000000000000000070ac7f308e7f0000e0ac7f308e7f0000c0ab7f308e7f00000000000000000000d0ab7f308e7f000000000000000000000000000000000000d0ac7f308e7f0000000000000000000000000000000000000000000000000000000000000000000000ad7f308e7f0000f0ac7f308e7f0000c0ac7f308e7f0000b0ac7f308e7f0000e0ac7f308e7f0000000000000000000020ad7f308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000010ad7f308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000f0ab7f308e7f0000400060308e7f0000d0a762308e7f00000e001e00000000000000000000000000000000000000000068fccb308e7f000000000000000000002005d0308e7f0000c079c9308e7f00002b000000ff030000ff0000000e000000d80360308e7f0000d80b60308e7f0000781b60308e7f0000000000001d01c40000020000030000000300000000000000000000000000000000000000000000000000000000000000b4e561308e7f0000a0fccb308e7f0000000060308e7f0000d09f80308e7f000000e079308e7f0000e007d0308e7f0000000000000000000000000000000000000000000000000000040000000000000050fbcb308e7f0000a0facb308e7f000000000000000000001f000000000000006651e467000000000000000000000000000000000000000090fdcb308e7f00000000000000000000000000000100000000000000010000001800000000000000000000000000000000000000000000000000000000000000d04c60308e7f0000010000000000000030facf308e7f00004869cc308e7f0000b8787f308e7f00002800000000000000880000000000000008000000000000000000000000000000a80000000000000002000000000000000000000000000000b8781f000000000048370000000000000600000000000000000000000000000088fccb308e7f0000000000000000000001000000000000006c6962632e736f2e360000000000000000000000000000002f6c69623634006c6962632e736f2e3600000000000000000000000000000000e0e8cb308e7f0000e0edcb308e7f0000e0f7cb308e7f000030facf308e7f0000e0f2cb308e7f00000000000000000000e0e8cb308e7f0000e0edcb308e7f0000e0f7cb308e7f000030facf308e7f0000e0f2cb308e7f00000000000000000000e0edcb308e7f0000e0f7cb308e7f000030facf308e7f00000000000000000000e0edcb308e7f0000e0f7cb308e7f000030facf308e7f00000000000000000000e0f2cb308e7f0000e0f7cb308e7f00000000000000000000e0f2cb308e7f0000e0f7cb308e7f00000000000000000000e0f7cb308e7f000030facf308e7f00000000000000000000e0f7cb308e7f000030facf308e7f000000000000000000002005d0308e7f0000e0e8cb308e7f0000e0edcb308e7f0000e0f2cb308e7f0000e0f7cb308e7f000030facf308e7f000000000000000000002005d0308e7f0000e0e8cb308e7f0000e0edcb308e7f0000e0f2cb308e7f0000e0f7cb308e7f000030facf308e7f000000000000000000006d3fcf308e7f0000000000000000000000000000000000006d3fcf308e7f000000000000000000000000000000000000f3248864bc550000751a690900000000de248864bc550000e8248864bc550000b491960600000000de248864bc5500006d3fcf308e7f0000000000000000000000000000000000006d3fcf308e7f000000000000000000000000000000000000c543cc308e7f0000f675ae030000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + Size: 0x2000 + Offset: 0x8000 + - Type: Fill + Pattern: 7f454c4602010100000000000000000003003e000100000000000000000000004000000000000000d0100000000000000000000040003800040040000f000e0001000000050000000000000000000000000000000000000000000000000000003610000000000000361000000000000000100000000000000200000004000000200400000000000020040000000000002004000000000000c000000000000000c00000000000000008000000000000000400000004000000e004000000000000e004000000000000e00400000000000054000000000000005400000000000000040000000000000050e57464040000003405000000000000340500000000000034050000000000003c000000000000003c0000000000000004000000000000000c0000000c00000000000000090000000000000008000000000000000a0000000000000000000000000000000b00000000000000000000000000000000000000000000000200000000000000010000000300000005000000040000000000000006000000070000000200000001000000040000001a0000000120000b411000000004081c0020008080001080000000020000000480000000010000000700000000ca1bb018a3436e94789e7cda109a9e26b062654bcc086c0c8e1e82528f3068864b85e67e55dd716d5887ff000000000000000000000000000000000000000000000000000000000100000012000b008006000000000000f9020000000000002100000012000b00b00900000000000007040000000000005700000022000b0080090000000000002e000000000000006a00000022000b00c00d00000000000063000000000000007700000012000b00300e0000000000002a000000000000008c00000012000b00600e0000000000009c000000000000001500000012000b0080090000000000002e000000000000003600000012000b00c00d00000000000063000000000000004a00000022000b008006000000000000f9020000000000005c00000022000b00b00900000000000007040000000000008500000022000b00300e0000000000002a00000000000000005f5f7664736f5f67657474696d656f66646179005f5f7664736f5f74696d65005f5f7664736f5f636c6f636b5f67657474696d65005f5f7664736f5f636c6f636b5f6765747265730067657474696d656f666461790074696d6500636c6f636b5f67657474696d6500636c6f636b5f676574726573005f5f7664736f5f67657463707500676574637075005f5f7664736f5f7367785f656e7465725f656e636c617665006c696e75782d7664736f2e736f2e31004c494e55585f322e3600000000020002000200020002000200020002000200020002000100010001000100a1bfee0d140000001c000000a5000000000000000100000002000100f675ae031400000000000000b5000000000000000e00000000000000a5000000000000001e0000000000000002000000000000000600000000000000f0010000000000000b000000000000001800000000000000050000000000000010030000000000000a00000000000000bf00000000000000f5feff6f00000000880100000000000004000000000000002001000000000000f0ffff6f00000000d003000000000000fcffff6f00000000e803000000000000fdffff6f000000000200000000000000000000000000000000000000000000000600000004000000000000004c696e7578000000000906000600000001000000000100004c696e757800000000000000040000001400000003000000474e5500585a45bcff77f58f5f9063149c609fc61bfef054011b033b38000000060000004c010000540000004c0400007c0000007c0400009c0000008c080000cc000000fc080000f40000002c090000140100001400000000000000017a5200017810011b0c070890010000240000001c000000f0000000f902000000410e108602430d064583030368010c0708410c061000001c00000044000000c80300002e00000000410e108602430d06690c07080000002c00000064000000d80300000704000000410e108602430d064d83078c068d058e048f03036b010c0708410c061000002400000094000000b80700006300000000410e108602430d0602550c0708410c0610480c070800001c000000bc000000000800002a00000000410e108602430d06650c070800000024000000dc000000100800009c00000000410e108602430d06418303024e0c0708410c061000000000000000000000000000000000000000554889e5534883ec184885ff0f8452010000448b05e7b9ffff41f6c001754a8b05dfb9ffff83f80175530f01f9669048c1e2204809c24c8b15ebb9ffff488b05c4b9ffff448b1dcdb9ffff8b0dcbb9ffff4c8b0dc8b9ffff8b1da2b9ffff4439c30f8497000000eba9813d91b9ffffffffff7f0f843b010000f390eb9583f8020f8502020000448b0df3c8fffff60509c9ffff010f84ee0100004183e1fe0f01f9669048c1e2204809d0482b05d7c8ffff448b15e0c8ffff0fb615ddc8ffff89d1f6d94889c348d3eb89d148d3e084d2480f48c34c8955e848f765e8480facd020488b15a8c8ffff8b0d92c8ffff4439c94189c975974801c2e938ffffff49b8ffffffffffffff7f4c21c24829c2480fbae23e0f82920000004c21c2490fafd34c01d248d3ea488955f031c04881fa00ca9a3b72184881c2003665c4488955f0ffc04881faffc99a3b77ea89c04c01c848890789d04869c0d34d621048c1e8264889470831c04885f675074883c4185b5dc3813d88b8ffffffffff7f488d0d5de9ffff488d1556b9ffff480f44d18b0a890e488d0d4be9ffff488d1544b9ffff480f44d18b0a894e04ebc031d2e96efffffff390448b0545e8ffff41f6c00175f18b053de8ffff83f801753f0f01f9669048c1e2204809c24c8b0d49e8ffff4c8b1d22e8ffff448b152be8ffff8b0d29e8ffff488b0526e8ffff8b1d00e8ffff4439c30f848b000000eba983f8027578448b0d69c7fffff6057fc7ffff0174684183e1fe0f01f9669048c1e2204809d0482b0551c7ffff448b155ac7ffff440fb61d56c7ffff4489d9f6d94889c248d3ea4489d948d3e04584db480f48c24c8955e048f765e0480facd020488b151ec7ffff8b0d08c7ffff4439c94189c975974801c2e950ffffffb8600000000f05e9d7feffff49b8ffffffffffffff7f4c21c24c29da480fbae23e724a4c21c2490fafd24c01ca48d3ea48030561b7ffff48031562b7ffff488955f031c94881fa00ca9a3b72184881c2003665c4488955f0ffc14881faffc99a3b77ea89c94801c8e95bfeffff31d2ebb90f1f8000000000554889e5488d05f5b6ffff813defb6ffffffffff7f488d0de4e6ffff480f45c8488b41204885ff74034889075dc36690554889e54157415641554154534883ec3083ff0f0f87bc010000b80100000089f9d3e0a9830800000f844f0100004c8d159bb6ffff4189f849c1e0044f8d4c0220458b1a41f6c3017536418b420483f801753f0f01f9669048c1e2204809c24d8b7908498b4208458b6218418b4a1c4d8b31418b1a4439db0f8495000000ebc141817a04ffffff7f0f84d1010000f390ebaf83f8020f853b0100008b1dafc5fffff605c5c5ffff010f842801000083e3fe0f01f9669048c1e2204809d0482b0594c5ffff448b359dc5ffff440fb63d99c5ffff4489f9f6d94889c248d3ea4489f948d3e04584ff480f48c24c8975b048f765b0480facd020488b1561c5ffff8b0d4bc5ffff39d989cb75964801c2e94cffffff48bfffffffffffffff7f4821fa4829c2480fbae23e0f829d0000004821fa490fafd44c01fa48d3ea488955d031c04881fa00ca9a3b72184881c2003665c4488955d0ffc04881faffc99a3b77ea89c04c01f04889064889560831c04883c4305b415c415d415e415f5dc3a860745189fa48c1e204488d0542b5ffff488d4c02208b3d37b5ffff40f6c7017410813d2bb5ffffffffff7f7440f390ebe4488b01488906488b4108488946088b1d0db5ffff31c039fb74a5ebc831d2e963ffffffa8107509b8e40000000f05eb8f4c8d15dab5ffffe94afeffff488d05dee4ffff488d4402208b1dd3e4fffff6c30175134c8b00488b50088b3dc1e4ffff39df7406ebe2f390ebde48035108488b09488955d031c04881fa00ca9a3b721c31db4881c2003665c4488955d0ffc34881faffc99a3b77ea89dbeb0231db4c01c14801d948890e48895608e90fffffff83ff04488d0557e5ffff4c8d1d60e4ffff4c0f44d8488d0565e5ffff488d0d6ee4ffff480f44c84a8d0401488945b84a8d440108488945c0418b038945cca801757183ff04488d0525e4ffff488d0d0ee5ffff480f44c18b400483f80175580f01f966904989d049c1e0204909c0488b45c0488b184c8b35ede4ffff4c8b2df6e3ffff448b3defe4ffff8b15f9e3ffff448b15e6e4ffff8b0df0e3ffff488b45b8488b00458b23443b65cc0f8487000000eb85f390eb8183f8020f85b7feffff8b1d2bc3fffff60541c3ffff010f84a4feffff83e3fe0f01f9669048c1e2204809d0482b0510c3ffff448b0519c3ffff440fb61515c3ffff4489d1f6d94889c248d3ea4489d148d3e04584d2480f48c24c8945a848f765a8480facd0204c8b05ddc2ffff8b0dc7c2ffff39d989cb75964901c0e936ffffff49bbffffffffffffff7f4d21d883ff044d0f44ee410f44d74d29e8490fbae03e724a4d21d8490fafd04801da83ff04410f44ca48d3ea49030149035108488955d031c94881fa00ca9a3b72184881c2003665c4488955d0ffc14881faffc99a3b77ea89c94801c8e962fdffff31d2ebb9660f1f840000000000554889e583ff0f7751b80100000089f9d3e0a9930800007424488d05a0b2ffff813d9ab2ffffffffff7f488d0d8fe2ffff480f45c88b89e8000000eb09b940420f00a860741431c04885f6740b48c7060000000048894e085dc3b8e50000000f055dc390909090909090909090909090554889e5b87b000000f30fc7f84885ff740a89c181e1ff0f0000890f4885f67405c1e80c890631c05dc3909090909090554889e55389c883f802724883f8037743488b4d1048c7c32800000048833c190075314883c3084881fb0001000075ec488b19488d0d000000000f01d7488b5d10c743080400000048837b1800752131c05bc9c3b8eaffffffebf6488b5d1089430866897b0c6689730e48895310ebd84889e14889d84889e34883e30f4883e4f05050fc488b40180faee8ffd0488d641c1083f8007ebae96bffffffaef7ffffea000000820200000505a0f7ffffe10000003b000000050306f8ffffd6000000820200000505f8f7ffffcd0000003b000000050318f9ffffc20000008202000005050af9ffffb90000003b000000050354f9ffffae00000082020000050546f9ffffa50000003b000000050397faffff9a00000082020000050589faffff910000003b0000000503d9faffff86000000820200000505cbfaffff7d0000003b0000000503cafcffff72000000820200000505bcfcffff690000003b000000050325fdffff5e00000082020000050517fdffff550000003b00000005035dfeffff4a0000001602000004040faee80f310f01f90faee80f310f01f90faee80f310f01f90faee80f310f01f90faee80f310f01f90faee80f310f01f90faee80f310f01f90faee80f310f01f9f30fc7f80000000000000000002e74657874002e616c74696e7374725f7265706c6163656d656e74002e616c74696e737472756374696f6e73002e64796e737472002e65685f6672616d655f686472002e676e752e76657273696f6e002e64796e73796d002e676e752e68617368002e6e6f7465002e65685f6672616d65002e676e752e76657273696f6e5f64002e64796e616d6963002e7368737472746162000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000005d00000005000000020000000000000020010000000000002001000000000000680000000000000003000000000000000400000000000000040000000000000059000000f6ffff6f0200000000000000880100000000000088010000000000006400000000000000030000000000000008000000000000000000000000000000510000000b0000000200000000000000f001000000000000f00100000000000020010000000000000400000001000000080000000000000018000000000000002e00000003000000020000000000000010030000000000001003000000000000bf0000000000000000000000000000000100000000000000000000000000000044000000ffffff6f0200000000000000d003000000000000d003000000000000180000000000000003000000000000000200000000000000020000000000000073000000fdffff6f0200000000000000e803000000000000e80300000000000038000000000000000400000002000000040000000000000000000000000000008200000006000000030000000000000020040000000000002004000000000000c00000000000000004000000000000000800000000000000100000000000000063000000070000000200000000000000e004000000000000e004000000000000540000000000000000000000000000000400000000000000000000000000000036000000010000000200000000000000340500000000000034050000000000003c000000000000000000000000000000040000000000000000000000000000006900000001000000020000000000000070050000000000007005000000000000040100000000000000000000000000000800000000000000000000000000000001000000010000000600000000000000800600000000000080060000000000007c080000000000000000000000000000100000000000000000000000000000001d000000010000000200000000000000fc0e000000000000fc0e000000000000ee0000000000000000000000000000000100000000000000000000000000000007000000010000000600000000000000ea0f000000000000ea0f00000000000044000000000000000000000000000000010000000000000000000000000000008b000000030000000000000000000000000000000000000036100000000000009500000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + Size: 0x2000 + Offset: 0xa000 + - Type: Fill + Pattern: 7f454c4602010103000000000000000003003e000100000030ef0100000000004000000000000000404a0e000000000000000000400038000b004000230022000100000004000000000000000000000000000000000000000000000000000000301b000000000000301b00000000000000100000000000000100000005000000002000000000000000200000000000000020000000000000e581020000000000e5810200000000000010000000000000010000000400000000b002000000000000b002000000000000b0020000000000a8b8000000000000a8b800000000000000100000000000000100000006000000007303000000000000730300000000000073030000000000502d000000000000183200000000000000100000000000000200000006000000908e030000000000908e030000000000908e0300000000005001000000000000500100000000000008000000000000000400000004000000a802000000000000a802000000000000a8020000000000002000000000000000200000000000000008000000000000000400000004000000c802000000000000c802000000000000c80200000000000024000000000000002400000000000000040000000000000053e5746404000000a802000000000000a802000000000000a80200000000000020000000000000002000000000000000080000000000000050e5746404000000182503000000000018250300000000001825030000000000dc09000000000000dc09000000000000040000000000000051e574640600000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000052e5746404000000007303000000000000730300000000000073030000000000001d000000000000001d0000000000000100000000000000040000001000000005000000474e5500020000c0040000000300000000000000040000001400000003000000474e5500ecbdf3f8784d7a13eff2fdd4352abbee93cce02c00000000250000002a00000000000000040000001e0000000100000000000000000000000600000000000000280000002900000027000000100000002000000025000000120000000800000000000000030000002200000000000000050000000c00000009000000210000002300000000000000180000000f000000170000001600000000000000000000000200000000000000000000000000000000000000000000001f000000000000000000000000000000000000000000000000000000000000001c000000190000001d0000000000000000000000000000000e000000140000000000000024000000000000001b0000000000000000000000110000000a00000000000000000000000d000000000000000000000026000000000000001a000000000000000700000000000000130000000b000000000000000000000015000000000000000000000025000000010000000400000008000000203441281806801709c2204005078102580c3a28300224840320a4a710e6282a000000000100000002000000000000000300000004000000070000000800000000000000090000000a0000000b0000000d0000000000000000000000000000001000000011000000120000001300000018000000190000001a0000001b0000001d0000001f0000002100000000000000000000002400000025000000000000000000000026000000000000002700000029000000eb014688bf4bc20e797092cdba1573426a43289f0bd6bb242b9b6f366b803cb2ddb9d79f69374ab5a89414f04f0f35571ce9b5f292d170ed9db2544ea51b4381dfed6f8f3d107554e4c11ee8eed28a7c829587d03a85eb1ce9b366ffd76afaa155a9a54261a29206a4caf718c12e5c7dcc3bffcbd93d6cf6d83d6cf639b70257eeb5bd85da005cc29d0440172364bc3c49de5878b586c309e45530000df9f3c30df9f3c3000000000000000000000000000000000000000000000000e301000012000a0030a500000000000008020000000000009700000012000a0010a0010000000000d101000000000000390200001100130004a40300000000000100000000000000ad0200001100f1ff00000000000000000000000000000000d301000011000f00008b03000000000090030000000000002802000012000a00903901000000000063000000000000001b02000011000b0010250300000000000400000000000000010000001100120000900300000000005010000000000000ab01000012000a00a036010000000000e7020000000000004d00000011000f00a08a0300000000000800000000000000e700000011000f00a88a03000000000004000000000000007801000012000a0060520000000000003b000000000000007200000012000a0000670000000000000900000000000000ad00000012000a00003a010000000000ea00000000000000f701000012000a00d0970100000000000c000000000000009101000012000a0010550000000000005003000000000000c000000012000a0030210000000000005a000000000000008f0200001100130058a303000000000028000000000000005e00000012000a00c023000000000000a9010000000000000a01000012000a00807201000000000041000000000000003101000012000a00c0500000000000002b000000000000004401000012000a00f0380000000000000500000000000000c101000012000a0070f90000000000007b010000000000001901000012000a00903501000000000019000000000000002000000012000a00a0d3000000000000c400000000000000e30200001100f1ff00000000000000000000000000000000fc00000011000f00d07703000000000008000000000000005d01000012000a00e0a102000000000005000000000000008600000012000a0090ee000000000000a900000000000000b90200001100f1ff00000000000000000000000000000000c30200001100f1ff000000000000000000000000000000008302000011000f00c8770300000000000400000000000000d500000012000a00206e01000000000055000000000000005602000012000a00902100000000000067000000000000006702000012000a00304c00000000000088040000000000005401000011000f00e08a03000000000008000000000000001002000012000a0020f700000000000041020000000000003800000012000a00504b000000000000d4000000000000000e00000012000a00809f0100000000008200000000000000cd0200001100f1ff00000000000000000000000000000000d80200001100f1ff00000000000000000000000000000000005f72746c645f676c6f62616c005f646c5f61756469745f707265696e6974005f646c5f66696e645f64736f5f666f725f6f626a656374005f646c5f657863657074696f6e5f637265617465005f5f6c6962635f737461636b5f656e64005f646c5f63617463685f657863657074696f6e005f5f72746c645f6c6962635f66726565726573005f646c5f666174616c5f7072696e7466005f646c5f61756469745f73796d62696e645f616c74005f646c5f6465616c6c6f636174655f746c73005f646c5f7369676e616c5f657863657074696f6e005f5f74756e61626c655f6765745f76616c005f5f6c6962635f656e61626c655f736563757265005f5f727365715f6f6666736574005f5f746c735f6765745f61646472005f646c5f6765745f746c735f7374617469635f696e666f005f646c5f657863657074696f6e5f66726565005f646c5f64656275675f7374617465005f646c5f61726776005f5f72746c645f76657273696f6e5f706c616365686f6c646572005f5f6e70746c5f6368616e67655f737461636b5f7065726d005f5f646c5f66696e645f6f626a6563745f696e7465726e616c005f646c5f616c6c6f636174655f746c735f696e6974005f646c5f726561646f6e6c795f61726561005f72746c645f676c6f62616c5f726f005f646c5f72746c645f64695f736572696e666f005f646c5f7838365f6765745f6370755f6665617475726573005f646c5f6d636f756e74005f5f727365715f666c616773005f646c5f616c6c6f636174655f746c73005f5f6e70746c5f696e697469616c5f7265706f72745f6576656e7473005f646c5f7369676e616c5f6572726f72005f646c5f657863657074696f6e5f6372656174655f666f726d6174005f5f727365715f73697a65005f725f6465627567006c642d6c696e75782d7838362d36342e736f2e3200474c4942435f322e322e3500474c4942435f322e3300474c4942435f322e3400474c4942435f322e333400474c4942435f322e333500474c4942435f50524956415445000000000700070007000200070007000600070007000200070007000700070007000700070002000700030007000700070007000700070006000580070003000400060007000700070007000200070007000500060000000100010001000100f23b7d07140000001c00000098020000000000000100000002000100751a6909140000001c000000ad0200000000000001000000030002001369690d1400000024000000b902000008000000ad0200000000000001000000040002001469690d1400000024000000c302000008000000b9020000000000000100000005000200b49196061400000024000000cd02000008000000c3020000000000000100000006000200b59196061400000024000000d802000008000000cd02000000000000010000000700020085cf63091400000000000000e302000008000000d8020000000000000000000000730300000000000800000000000000bde0020000000000087303000000000008000000000000000dd902000000000010730300000000000800000000000000f3d802000000000018730300000000000800000000000000c5e002000000000020730300000000000800000000000000d5e00200000000002873030000000000080000000000000025d902000000000030730300000000000800000000000000ede00200000000003873030000000000080000000000000007e1020000000000407303000000000008000000000000001ee10200000000004873030000000000080000000000000030e10200000000005073030000000000080000000000000044e102000000000058730300000000000800000000000000a82003000000000060730300000000000800000000000000cbd80200000000006873030000000000080000000000000038d90200000000007073030000000000080000000000000057e10200000000007873030000000000080000000000000063e10200000000008073030000000000080000000000000079e10200000000008873030000000000080000000000000091e1020000000000907303000000000008000000000000009de102000000000098730300000000000800000000000000b7e1020000000000a0730300000000000800000000000000c6e1020000000000a8730300000000000800000000000000d6e1020000000000b0730300000000000800000000000000e2d8020000000000b8730300000000000800000000000000 + Size: 0x1000 + Offset: 0xc000 + - Type: Fill + Pattern: 2005d0308e7f00000700000000000000e007d0308e7f00000000000000000000e0f7cb308e7f000000000000000000000000000000000000010000000000000000000000000000000000000000000000f05dc9308e7f00007f000000000000005200000000000000c030ce308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000100000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000070000000000000000000000000000000000000000000000950c0000000000000805000000000000d0e7cb308e7f00000060cc308e7f000018238864bc55000090eecf308e7f00000000000000000000e0f7cb308e7f000030facf308e7f00000000000000000000c004d0308e7f00000000000000000000000000000000000000000000000000000000000000000000a0eecf308e7f0000c0eecf308e7f0000d0eecf308e7f000000efcf308e7f000010efcf308e7f000020efcf308e7f0000e0eecf308e7f0000f0eecf308e7f00000000000000000000000000000000000090eecf308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000050efcf308e7f000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000040efcf308e7f000030efcf308e7f000060efcf308e7f0000000000000000000080efcf308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000070efcf308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b0eecf308e7f00004060cc308e7f000000000000000000000b0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000d07dc9308e7f0000080000002500000003000000080000004864cc308e7f00006864cc308e7f0000f864cc308e7f0000000000001d01840000000000000000000000000000000000000000000000000000000000000000000000000000000000826ccc308e7f000000000000000000000060cc308e7f00001805d0308e7f0000e501cf308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000073030000000000001d000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006000000000000000200000000000000907ec9308e7f00000200000000000000a80000000000000000020000000000006051c9308e7f0000010000000000000000000000000000000800d0308e7f00000800d0308e7f0000004ac9308e7f0000004ac9308e7f00002800d0308e7f00002800d0308e7f0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001000000000000002005d0308e7f0000f098cc308e7f000000000000000000000060cc308e7f00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000537a0000000000000060cb308e7f0000000000000000000002000000000000000300000000000000b0e5cb308e7f0000d06dc9308e7f00000070c9308e7f0000906ec9308e7f0000000000000000000001000000000000000000000000000000286ccc308e7f00000000000000000000010000000000000080de0a0000000000000000000000000000000000000000000000000000000000000000000000000018238864bc5500008004d0308e7f0000000000000000000000708864bc550000020000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000682774e3fd7f0000000000000000000000208864bc550000c80ad0308e7f0000c85d8864bc55000000e0cb308e7f000000000000000000002005d0308e7f00000000000000000000b00ad0308e7f00000000000000000000f85d8864bc550000d85e8864bc550000c85e8864bc5500000000000000000000785e8864bc550000885e8864bc550000085f8864bc550000185f8864bc550000285f8864bc550000985e8864bc550000a85e8864bc550000085e8864bc550000185e8864bc550000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e85e8864bc550000b85e8864bc5500000000000000000000f85e8864bc5500000000000000000000285e8864bc550000485e8864bc550000385e8864bc550000585e8864bc550000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000585f8864bc550000485f8864bc55000000000000000000000000000000000000385f8864bc5500000000000000000000785f8864bc55000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000685f8864bc5500000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000685e8864bc55000040208864bc55000040308864bc5500000d00000000000000f8fdcb308e7f00000600000000000000a80ad0308e7f00000000000000000000000000000000000030fecb308e7f000004000000010000000000000000000000b0238864bc550000b8238864bc550000b8238864bc550000010000001c01840000020000000000000300000000000000ffffffffffffffff0000000000000000000000000000000000258864bc550000000000000000000000208864bc55000028608864bc5500005d318864bc550000e007d0308e7f000000000000000000000000000000000000000000000000000004000000000000009008d0308e7f0000e007d0308e7f0000000000000000000000000000000000000000000000000000ffffffffffffffff0000000000000000c0fdcb308e7f0000000000000000000000000000010000000000000000000008000000000000000000000000000000000000000000000000000000000000000038248864bc55000004000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000b03d000000000000500200000000000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000002005d0308e7f000000000000000000000000000000000000c80ad0308e7f00000000000000000000010000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 + Size: 0x2000 + Offset: 0xd000 + - Type: SectionHeaderTable + NoHeaders: true From 4449e317ebd89fe7b1f08b108c1ea74bfd4171e2 Mon Sep 17 00:00:00 2001 From: HighW4y2H3ll Date: Wed, 24 Jun 2026 17:31:38 -1000 Subject: [PATCH 447/511] [SampleProfileMatcher] Sample profile duplication to avoid stale CFG profile matching conflicts (#202460) Stale profile matching may map multiple different IR anchors into one profile anchor because of the common function basename. One example is `foo(int)` and `foo(float)` can both be mapped to `foo()` if `foo()` is the only function that has a profile. And this creates conflicting CFG matching for `foo(int)` and `foo(float)` when they each runs stale profile matching. The CFG matching results will be overwritten among the conflicting functions. And it will trigger the following assertation failure: https://github.com/llvm/llvm-project/blob/7087094b05a1bba64a99474cc501328919e11b4a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp#L332-L333 This patch tries to detect this conflict during the stale CG matching, and create duplicated profiles to avoid CFG matching conflicts. --- .../Transforms/IPO/SampleProfileMatcher.h | 2 + .../Transforms/IPO/SampleProfileMatcher.cpp | 62 ++++ ...o-probe-stale-profile-orphan-conflict.prof | 5 + ...udo-probe-stale-profile-orphan-conflict.ll | 278 ++++++++++++++++++ 4 files changed, 347 insertions(+) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-orphan-conflict.prof create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-orphan-conflict.ll diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h index 22767cd058e21..6d52d804ac67c 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h @@ -37,6 +37,8 @@ class SampleProfileMatcher { // mapping from the source location of current build to the source location // in the profile. StringMap FuncMappings; + // Hash mapping cache for matched anchor pairs in stale profile matching + DenseMap MatchedAnchorCache; // Match state for an anchor/callsite. enum class MatchState { diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index 77d45d7e3ca0c..6621684232812 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -362,6 +362,67 @@ void SampleProfileMatcher::runStaleProfileMatching( longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList, RunCGMatching /* Match unused functions */); + // Scan through the matched anchors to make sure functions and profiles are + // 1:1 mapped. If the profile has already been mapped to another function + // during previous fuzzy matching, create a new profile with the same sample + // counts and assumed to be pre-inlined. + for (const auto &IR : IRAnchors) { + bool ProfileConflicted = false; + const auto &Loc = IR.first; + Function *Callee = M.getFunction(IR.second.stringRef()); + if (!Callee) + continue; + FunctionId ProfAnchor; + auto AnchorLoc = MatchedAnchors.find(Loc); + if (AnchorLoc == MatchedAnchors.end()) { + // Search within the module and find if we have conflicts in pre-matched + // profiles for this anchor + auto PreMatched = FuncToProfileNameMap.find(Callee); + if (PreMatched == FuncToProfileNameMap.end()) + continue; + ProfAnchor = PreMatched->second; + } else { + const auto &Prof = ProfileAnchors.find(AnchorLoc->second); + if (Prof == ProfileAnchors.end()) + continue; + ProfAnchor = Prof->second; + } + + // Conflicting profile previously matched + auto Cached = MatchedAnchorCache.find(ProfAnchor); + if (Cached == MatchedAnchorCache.end()) + MatchedAnchorCache[ProfAnchor] = Callee; + else if (Cached->second != Callee) + ProfileConflicted = true; + + if (ProfileConflicted) { + // Create a flattened profile using the IR function name to avoid profile + // name conflicts + const auto *FSForMatching = getFlattenedSamplesFor(ProfAnchor); + if (!FSForMatching) + FSForMatching = Reader.getSamplesFor(ProfAnchor.stringRef()); + if (!FSForMatching) + continue; + + FunctionId NewAnchor( + FunctionSamples::getCanonicalFnName(IR.second.stringRef())); + auto R = FuncProfileMatchCache.find({Callee, NewAnchor}); + if (R != FuncProfileMatchCache.end() && R->second) + continue; + FunctionSamples &NewFS = FlattenedProfiles.create(NewAnchor); + NewFS.merge(*FSForMatching); + FuncToProfileNameMap[Callee] = NewAnchor; + FuncProfileMatchCache[{Callee, NewAnchor}] = true; + + // Update profile in the sample profile reader + SampleProfileMap &Profiles = Reader.getProfiles(); + SampleContext FContext(NewAnchor); + auto Res = Profiles.try_emplace(FContext.getHashCode(), FContext, NewFS); + FunctionSamples &FProfile = Res.first->second; + FProfile.setContext(FContext); + } + } + // CFG level matching: // Apply the callsite matchings to infer matching for the basic // block(non-callsite) locations and write the result to @@ -838,6 +899,7 @@ void SampleProfileMatcher::matchFunctionsWithoutProfileByBasename() { continue; FuncToProfileNameMap[OrphanFunc] = ProfId; + MatchedAnchorCache[ProfId] = OrphanFunc; if (const auto *FS = Reader.getSamplesFor(ProfId.stringRef())) NewlyLoadedProfiles.create(FS->getFunction()).merge(*FS); MatchCount++; diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-orphan-conflict.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-orphan-conflict.prof new file mode 100644 index 0000000000000..4aae1f1f38b3a --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-orphan-conflict.prof @@ -0,0 +1,5 @@ +_Z3fooi:1:0 + 57: _Z3bari:1 + 72: _Z3topi:1 + 2: _Z3midi:1 + 11: _Z3subi:1 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-orphan-conflict.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-orphan-conflict.ll new file mode 100644 index 0000000000000..d7886cc72dc8f --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-orphan-conflict.ll @@ -0,0 +1,278 @@ +; Test direct basename matching for orphan functions where multiple callee anchors may be +; matched to one same profile during stale profile matching. In this test case, both of the +; `mid` functions will be matched to the same `_Z3midi` function in the profile during stale +; profile matching. This ends up causing an assertation error because only one profile +; function is supposed to be matched to an IR function. +; +; IR Function: +; foo: top ; bar ; top(2) +; |_mid |_ mid(2) +; |_ sub +; +; Profile Function: +; foo: bar ; top ; mid +; |_ sub +; +; Stale profile match order: +; foo: top ; bar ; top(2) +; | | | +; top ; bar ; top +; +; top(2): mid(2) +; | +; mid +; +; top: mid +; | +; mid => (Assertation error) + + +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/pseudo-probe-stale-profile-orphan-conflict.prof -o %t.prof +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.prof --salvage-stale-profile --salvage-unused-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s + +; CHECK: Function _Z3midl is not in profile or profile symbol list. +; CHECK: Function _Z3midll is not in profile or profile symbol list. +; CHECK: Function _Z3topl is not in profile or profile symbol list. +; CHECK: Function _Z3barl is not in profile or profile symbol list. +; CHECK: Function _Z3topll is not in profile or profile symbol list. +; CHECK: Function _Z3fool is not in profile or profile symbol list. +; CHECK: Direct basename match: _Z3barl (IR) -> _Z3bari (Profile) [basename: bar] +; CHECK: Direct basename match: _Z3fool (IR) -> _Z3fooi (Profile) [basename: foo] +; CHECK: Direct basename matching found 2 matches +; CHECK: Run stale profile matching for _Z3fool +; CHECK: The functions _Z3topl(IR) and _Z3topi(Profile) share the same base name: top. +; CHECK: Function:_Z3topl matches profile:_Z3topi +; CHECK: The functions _Z3barl(IR) and _Z3bari(Profile) share the same base name: bar. +; CHECK: Function:_Z3barl matches profile:_Z3bari +; CHECK: The functions _Z3topll(IR) and _Z3topi(Profile) share the same base name: top. +; CHECK: Function:_Z3topll matches profile:_Z3topi +; CHECK: Location is matched from 2 to 2 +; CHECK: Callsite with callee:_Z3barl is matched from 3 to 57 +; CHECK: Callsite with callee:_Z3topll is matched from 4 to 72 +; CHECK: Run stale profile matching for _Z3topll +; CHECK: The functions _Z3midll(IR) and _Z3midi(Profile) share the same base name: mid. +; CHECK: Function:_Z3midll matches profile:_Z3midi +; CHECK: Callsite with callee:_Z3midll is matched from 1 to 2 +; CHECK: Run stale profile matching for _Z3barl +; CHECK: Run stale profile matching for _Z3topl +; CHECK: The functions _Z3midl(IR) and _Z3midi(Profile) share the same base name: mid. +; CHECK: Function:_Z3midl matches profile:_Z3midi +; CHECK: Callsite with callee:_Z3midl is matched from 1 to 2 +; CHECK: Run stale profile matching for _Z3midll +; CHECK: Callsite with callee:_Z3subi is matched from 1 to 11 +; CHECK: Run stale profile matching for _Z3subi +; CHECK: Run stale profile matching for _Z3midl +; CHECK: Function processing order: +; CHECK: _Z3topll +; CHECK: _Z3midl +; CHECK: _Z3fool +; CHECK: _Z3topl +; CHECK: _Z3midll +; CHECK: _Z3subi +; CHECK: _Z3barl + +target triple = "x86_64-redhat-linux-gnu" + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3midl(i64 noundef %l) #0 !dbg !14 { +entry: + %l.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !21, !DIExpression(), !22) + call void @llvm.pseudoprobe(i64 -4458821264266946817, i64 1, i32 0, i64 -1), !dbg !23 + ret ptr null, !dbg !23 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3subi(i32 noundef %i) #0 !dbg !24 { +entry: + %i.addr = alloca i32, align 4 + store i32 %i, ptr %i.addr, align 4 + #dbg_declare(ptr %i.addr, !28, !DIExpression(), !29) + call void @llvm.pseudoprobe(i64 8307782004441981189, i64 1, i32 0, i64 -1), !dbg !30 + ret ptr null, !dbg !30 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3midll(i64 noundef %l, i64 noundef %m) #0 !dbg !31 { +entry: + %l.addr = alloca i64, align 8 + %m.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !34, !DIExpression(), !35) + store i64 %m, ptr %m.addr, align 8 + #dbg_declare(ptr %m.addr, !36, !DIExpression(), !37) + call void @llvm.pseudoprobe(i64 -835688601043669768, i64 1, i32 0, i64 -1), !dbg !38 + %call = call noundef ptr @_Z3subi(i32 noundef 0), !dbg !39 + ret ptr %call, !dbg !41 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3topl(i64 noundef %l) #0 !dbg !42 { +entry: + %l.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !43, !DIExpression(), !44) + call void @llvm.pseudoprobe(i64 7421866232655760046, i64 1, i32 0, i64 -1), !dbg !45 + %0 = load i64, ptr %l.addr, align 8, !dbg !45 + %call = call noundef ptr @_Z3midl(i64 noundef %0), !dbg !46 + ret ptr %call, !dbg !48 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local void @_Z3barl(i64 noundef %l) #0 !dbg !49 { +entry: + %l.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !52, !DIExpression(), !53) + call void @llvm.pseudoprobe(i64 -9164787269840974918, i64 1, i32 0, i64 -1), !dbg !54 + ret void, !dbg !54 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3topll(i64 noundef %l, i64 noundef %m) #0 !dbg !55 { +entry: + %l.addr = alloca i64, align 8 + %m.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !56, !DIExpression(), !57) + store i64 %m, ptr %m.addr, align 8 + #dbg_declare(ptr %m.addr, !58, !DIExpression(), !59) + call void @llvm.pseudoprobe(i64 997868883951813144, i64 1, i32 0, i64 -1), !dbg !60 + %0 = load i64, ptr %l.addr, align 8, !dbg !60 + %1 = load i64, ptr %m.addr, align 8, !dbg !61 + %call = call noundef ptr @_Z3midll(i64 noundef %0, i64 noundef %1), !dbg !62 + ret ptr %call, !dbg !64 +} + +; Function Attrs: mustprogress noinline nounwind optnone uwtable +define dso_local noundef ptr @_Z3fool(i64 noundef %l) #0 !dbg !65 { +entry: + %retval = alloca ptr, align 8 + %l.addr = alloca i64, align 8 + store i64 %l, ptr %l.addr, align 8 + #dbg_declare(ptr %l.addr, !66, !DIExpression(), !67) + call void @llvm.pseudoprobe(i64 5326982120444056491, i64 1, i32 0, i64 -1), !dbg !68 + %0 = load i64, ptr %l.addr, align 8, !dbg !68 + %tobool = icmp ne i64 %0, 0, !dbg !68 + br i1 %tobool, label %if.then, label %if.end, !dbg !68 + +if.then: ; preds = %entry + call void @llvm.pseudoprobe(i64 5326982120444056491, i64 2, i32 0, i64 -1), !dbg !70 + %1 = load i64, ptr %l.addr, align 8, !dbg !70 + %call = call noundef ptr @_Z3topl(i64 noundef %1), !dbg !71 + store ptr %call, ptr %retval, align 8, !dbg !73 + br label %return, !dbg !73 + +if.end: ; preds = %entry + call void @llvm.pseudoprobe(i64 5326982120444056491, i64 4, i32 0, i64 -1), !dbg !74 + %2 = load i64, ptr %l.addr, align 8, !dbg !74 + call void @_Z3barl(i64 noundef %2), !dbg !75 + %3 = load i64, ptr %l.addr, align 8, !dbg !77 + %4 = load i64, ptr %l.addr, align 8, !dbg !78 + %call1 = call noundef ptr @_Z3topll(i64 noundef %3, i64 noundef %4), !dbg !79 + store ptr %call1, ptr %retval, align 8, !dbg !81 + br label %return, !dbg !81 + +return: ; preds = %if.end, %if.then + call void @llvm.pseudoprobe(i64 5326982120444056491, i64 7, i32 0, i64 -1), !dbg !82 + %5 = load ptr, ptr %retval, align 8, !dbg !82 + ret ptr %5, !dbg !82 +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1 + +attributes #0 = { mustprogress noinline nounwind optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5} +!llvm.ident = !{!6} +!llvm.pseudo_probe_desc = !{!7, !8, !9, !10, !11, !12, !13} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "test.cc", directory: "", checksumkind: CSK_MD5, checksum: "44fecbd11c1385709b8c0c240594ca47") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 7, !"uwtable", i32 2} +!5 = !{i32 7, !"frame-pointer", i32 2} +!6 = !{!"clang"} +!7 = !{i64 -4458821264266946817, i64 4294967295, !"_Z3midl"} +!8 = !{i64 8307782004441981189, i64 4294967295, !"_Z3subi"} +!9 = !{i64 -835688601043669768, i64 281479271677951, !"_Z3midll"} +!10 = !{i64 7421866232655760046, i64 281479271677951, !"_Z3topl"} +!11 = !{i64 -9164787269840974918, i64 4294967295, !"_Z3barl"} +!12 = !{i64 997868883951813144, i64 281479271677951, !"_Z3topll"} +!13 = !{i64 5326982120444056491, i64 844493665377046, !"_Z3fool"} +!14 = distinct !DISubprogram(name: "mid", linkageName: "_Z3midl", scope: !15, file: !15, line: 1, type: !16, scopeLine: 1, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!15 = !DIFile(filename: "test.cc", directory: "", checksumkind: CSK_MD5, checksum: "44fecbd11c1385709b8c0c240594ca47") +!16 = !DISubroutineType(types: !17) +!17 = !{!18, !19} +!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!19 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!20 = !{} +!21 = !DILocalVariable(name: "l", arg: 1, scope: !14, file: !15, line: 1, type: !19) +!22 = !DILocation(line: 1, column: 16, scope: !14) +!23 = !DILocation(line: 2, column: 3, scope: !14) +!24 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subi", scope: !15, file: !15, line: 5, type: !25, scopeLine: 5, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!25 = !DISubroutineType(types: !26) +!26 = !{!18, !27} +!27 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!28 = !DILocalVariable(name: "i", arg: 1, scope: !24, file: !15, line: 5, type: !27) +!29 = !DILocation(line: 5, column: 15, scope: !24) +!30 = !DILocation(line: 6, column: 3, scope: !24) +!31 = distinct !DISubprogram(name: "mid", linkageName: "_Z3midll", scope: !15, file: !15, line: 9, type: !32, scopeLine: 9, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!32 = !DISubroutineType(types: !33) +!33 = !{!18, !19, !19} +!34 = !DILocalVariable(name: "l", arg: 1, scope: !31, file: !15, line: 9, type: !19) +!35 = !DILocation(line: 9, column: 16, scope: !31) +!36 = !DILocalVariable(name: "m", arg: 2, scope: !31, file: !15, line: 9, type: !19) +!37 = !DILocation(line: 9, column: 24, scope: !31) +!38 = !DILocation(line: 10, column: 10, scope: !31) +!39 = !DILocation(line: 10, column: 10, scope: !40) +!40 = !DILexicalBlockFile(scope: !31, file: !15, discriminator: 455082007) +!41 = !DILocation(line: 10, column: 3, scope: !31) +!42 = distinct !DISubprogram(name: "top", linkageName: "_Z3topl", scope: !15, file: !15, line: 13, type: !16, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!43 = !DILocalVariable(name: "l", arg: 1, scope: !42, file: !15, line: 13, type: !19) +!44 = !DILocation(line: 13, column: 16, scope: !42) +!45 = !DILocation(line: 14, column: 14, scope: !42) +!46 = !DILocation(line: 14, column: 10, scope: !47) +!47 = !DILexicalBlockFile(scope: !42, file: !15, discriminator: 455082007) +!48 = !DILocation(line: 14, column: 3, scope: !42) +!49 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barl", scope: !15, file: !15, line: 17, type: !50, scopeLine: 17, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!50 = !DISubroutineType(types: !51) +!51 = !{null, !19} +!52 = !DILocalVariable(name: "l", arg: 1, scope: !49, file: !15, line: 17, type: !19) +!53 = !DILocation(line: 17, column: 15, scope: !49) +!54 = !DILocation(line: 17, column: 19, scope: !49) +!55 = distinct !DISubprogram(name: "top", linkageName: "_Z3topll", scope: !15, file: !15, line: 19, type: !32, scopeLine: 19, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!56 = !DILocalVariable(name: "l", arg: 1, scope: !55, file: !15, line: 19, type: !19) +!57 = !DILocation(line: 19, column: 16, scope: !55) +!58 = !DILocalVariable(name: "m", arg: 2, scope: !55, file: !15, line: 19, type: !19) +!59 = !DILocation(line: 19, column: 24, scope: !55) +!60 = !DILocation(line: 20, column: 14, scope: !55) +!61 = !DILocation(line: 20, column: 17, scope: !55) +!62 = !DILocation(line: 20, column: 10, scope: !63) +!63 = !DILexicalBlockFile(scope: !55, file: !15, discriminator: 455082007) +!64 = !DILocation(line: 20, column: 3, scope: !55) +!65 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !15, file: !15, line: 23, type: !16, scopeLine: 23, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !20) +!66 = !DILocalVariable(name: "l", arg: 1, scope: !65, file: !15, line: 23, type: !19) +!67 = !DILocation(line: 23, column: 16, scope: !65) +!68 = !DILocation(line: 24, column: 7, scope: !69) +!69 = distinct !DILexicalBlock(scope: !65, file: !15, line: 24, column: 7) +!70 = !DILocation(line: 25, column: 16, scope: !69) +!71 = !DILocation(line: 25, column: 12, scope: !72) +!72 = !DILexicalBlockFile(scope: !69, file: !15, discriminator: 455082015) +!73 = !DILocation(line: 25, column: 5, scope: !69) +!74 = !DILocation(line: 26, column: 7, scope: !65) +!75 = !DILocation(line: 26, column: 3, scope: !76) +!76 = !DILexicalBlockFile(scope: !65, file: !15, discriminator: 455082031) +!77 = !DILocation(line: 27, column: 14, scope: !65) +!78 = !DILocation(line: 27, column: 17, scope: !65) +!79 = !DILocation(line: 27, column: 10, scope: !80) +!80 = !DILexicalBlockFile(scope: !65, file: !15, discriminator: 455082039) +!81 = !DILocation(line: 27, column: 3, scope: !65) +!82 = !DILocation(line: 28, column: 1, scope: !65) From b3e3319548dd0fe4ed1a28e72567afaf9e366f2d Mon Sep 17 00:00:00 2001 From: Zhiheng Xie Date: Thu, 25 Jun 2026 11:50:27 +0800 Subject: [PATCH 448/511] [AArch64] Add missing SubtargetFeature for hip12 core (#205246) The initial patch for the hip12 core had omitted several subtarget features: FeatureFP16FML, FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, FeatureCCIDX, FeatureRandGen. --- clang/test/Driver/print-enabled-extensions/aarch64-hip12.c | 3 +++ llvm/lib/Target/AArch64/AArch64Processors.td | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/clang/test/Driver/print-enabled-extensions/aarch64-hip12.c b/clang/test/Driver/print-enabled-extensions/aarch64-hip12.c index 8469e912aa4db..6370303909839 100644 --- a/clang/test/Driver/print-enabled-extensions/aarch64-hip12.c +++ b/clang/test/Driver/print-enabled-extensions/aarch64-hip12.c @@ -11,6 +11,7 @@ // CHECK-NEXT: FEAT_BF16 Enable BFloat16 Extension // CHECK-NEXT: FEAT_BRBE Enable Branch Record Buffer Extension // CHECK-NEXT: FEAT_BTI Enable Branch Target Identification +// CHECK-NEXT: FEAT_CCIDX Enable Armv8.3-A Extend of the CCSIDR number of sets // CHECK-NEXT: FEAT_CRC32 Enable Armv8.0-A CRC-32 checksum instructions // CHECK-NEXT: FEAT_CSV2_2 Enable architectural speculation restriction // CHECK-NEXT: FEAT_DIT Enable Armv8.4-A Data Independent Timing instructions @@ -49,6 +50,7 @@ // CHECK-NEXT: FEAT_RAS, FEAT_RASv1p1 Enable Armv8.0-A Reliability, Availability and Serviceability Extensions // CHECK-NEXT: FEAT_RDM Enable Armv8.1-A Rounding Double Multiply Add/Subtract instructions // CHECK-NEXT: FEAT_RME Enable Realm Management Extension +// CHECK-NEXT: FEAT_RNG Enable Random Number generation instructions // CHECK-NEXT: FEAT_SB Enable Armv8.5-A Speculation Barrier // CHECK-NEXT: FEAT_SEL2 Enable Armv8.4-A Secure Exception Level 2 extension // CHECK-NEXT: FEAT_SHA1, FEAT_SHA256 Enable SHA1 and SHA256 support @@ -57,6 +59,7 @@ // CHECK-NEXT: FEAT_SPE Enable Statistical Profiling extension // CHECK-NEXT: FEAT_SPECRES Enable Armv8.5-A execution and data prediction invalidation instructions // CHECK-NEXT: FEAT_SPEv1p2 Enable extra register in the Statistical Profiling Extension +// CHECK-NEXT: FEAT_SSBS, FEAT_SSBS2 Enable Speculative Store Bypass Safe bit // CHECK-NEXT: FEAT_SVE Enable Scalable Vector Extension (SVE) instructions // CHECK-NEXT: FEAT_SVE2 Enable Scalable Vector Extension 2 (SVE2) instructions // CHECK-NEXT: FEAT_SVE_AES, FEAT_SVE_PMULL128 Enable SVE AES and quadword SVE polynomial multiply instructions diff --git a/llvm/lib/Target/AArch64/AArch64Processors.td b/llvm/lib/Target/AArch64/AArch64Processors.td index a4dcaa2d535ec..dfa2bd46c8e56 100644 --- a/llvm/lib/Target/AArch64/AArch64Processors.td +++ b/llvm/lib/Target/AArch64/AArch64Processors.td @@ -1213,7 +1213,9 @@ def ProcessorFeatures { list HIP12 = [HasV8_7aOps, FeatureSVE, FeatureSVE2, FeatureSVEBitPerm, FeatureSVEAES, FeatureSVESM4, FeatureSVESHA3, - FeatureFullFP16, FeaturePerfMon, + FeatureFullFP16, FeatureFP16FML, FeaturePerfMon, + FeatureFlagM, FeaturePredRes, FeatureSB, FeatureSSBS, + FeatureCCIDX, FeatureRandGen, FeatureETE, FeatureTRBE, FeatureSPE, FeatureSPE_EEF, FeatureNMI, FeatureHBC, FeatureRCPC3, FeatureBF16, From f63254be25c5cf1ad2143c03b66ccf4e6627dc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 24 Jun 2026 21:01:10 -0700 Subject: [PATCH 449/511] [flang][cuda] Accept cuf kernel do without scalar (#205705) The base compiler accept `!$cuf kernel do()` instead of raising an error. Update the parser to accept the same syntax. `!$cuf kernel do()` is equivalent to `!$cuf kernel do` --- flang/lib/Parser/executable-parsers.cpp | 4 +++- flang/test/Semantics/cuf23.cuf | 5 +++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp index 2241c04f5d26d..01f8d56c21723 100644 --- a/flang/lib/Parser/executable-parsers.cpp +++ b/flang/lib/Parser/executable-parsers.cpp @@ -583,7 +583,9 @@ TYPE_PARSER("<<<" >> TYPE_PARSER(sourced(beginDirective >> "$CUF KERNEL DO"_tok >> construct( - maybe(parenthesized(scalarIntConstantExpr)), + // Accept !$CUF KERNEL DO, !$CUF KERNEL DO(), and + // !$CUF KERNEL DO(). + defaulted(parenthesized(maybe(scalarIntConstantExpr))), maybe(Parser{}), many(Parser{}) / endDirective))) TYPE_CONTEXT_PARSER("!$CUF KERNEL DO construct"_en_US, diff --git a/flang/test/Semantics/cuf23.cuf b/flang/test/Semantics/cuf23.cuf index 93d875d9430ae..6291fcaaa752d 100644 --- a/flang/test/Semantics/cuf23.cuf +++ b/flang/test/Semantics/cuf23.cuf @@ -35,6 +35,11 @@ subroutine host() do i = 1, 10 print*, a(i) ! ok end do + + !$cuf kernel do() + do i = 1, 10 + print*, a(i) ! ok + end do end subroutine attributes(global) subroutine global1() From 5e7d4db7a726e726600a2e2e42a16ced2cf0e038 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 25 Jun 2026 06:27:00 +0200 Subject: [PATCH 450/511] [clang][bytecode] Ignore indeterminate APValues (#205555) They don't produce a value and for us, that means we just need to ignore them and not initialize anything. --- clang/lib/AST/ByteCode/Compiler.cpp | 9 +++++++++ clang/test/AST/ByteCode/evaluate-dtor-codegen.cpp | 14 ++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 clang/test/AST/ByteCode/evaluate-dtor-codegen.cpp diff --git a/clang/lib/AST/ByteCode/Compiler.cpp b/clang/lib/AST/ByteCode/Compiler.cpp index a74bea26f5c28..d2e28e516eaab 100644 --- a/clang/lib/AST/ByteCode/Compiler.cpp +++ b/clang/lib/AST/ByteCode/Compiler.cpp @@ -5421,6 +5421,7 @@ bool Compiler::visitDtorCall(const VarDecl *VD, const APValue &Value) { template bool Compiler::visitAPValue(const APValue &Val, PrimType ValType, SourceInfo Info) { + assert(!Val.isIndeterminate() && "Needs to be checked before"); assert(!DiscardResult); if (Val.isInt()) return this->emitConst(Val.getInt(), ValType, Info); @@ -5510,6 +5511,8 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, assert(R); for (unsigned I = 0, N = Val.getStructNumFields(); I != N; ++I) { const APValue &F = Val.getStructField(I); + if (F.isIndeterminate()) + continue; const Record::Field *RF = R->getField(I); QualType FieldType = RF->Decl->getType(); @@ -5537,6 +5540,8 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, if (I >= R->getNumBases()) break; const APValue &B = Val.getStructBase(I); + if (B.isIndeterminate()) + continue; const Record::Base *RB = R->getBase(I); QualType BaseType = Ctx.getASTContext().getCanonicalTagType(RB->Decl); @@ -5557,6 +5562,8 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, const Record *R = this->getRecord(T); assert(R); const APValue &F = Val.getUnionValue(); + if (F.isIndeterminate()) + return true; const Record::Field *RF = R->getField(UnionField); QualType FieldType = RF->Decl->getType(); @@ -5587,6 +5594,8 @@ bool Compiler::visitAPValueInitializer(const APValue &Val, const APValue &Elem = A >= InitializedElems ? Val.getArrayFiller() : Val.getArrayInitializedElt(A); + if (Elem.isIndeterminate()) + continue; if (ElemT) { if (!this->visitAPValue(Elem, *ElemT, Info)) diff --git a/clang/test/AST/ByteCode/evaluate-dtor-codegen.cpp b/clang/test/AST/ByteCode/evaluate-dtor-codegen.cpp new file mode 100644 index 0000000000000..24b17cceb8e41 --- /dev/null +++ b/clang/test/AST/ByteCode/evaluate-dtor-codegen.cpp @@ -0,0 +1,14 @@ +// RUN: %clang_cc1 -std=c++20 -verify=both,expected %s -Wexit-time-destructors -fexperimental-new-constant-interpreter +// RUN: %clang_cc1 -std=c++20 -verify=both,ref %s -Wexit-time-destructors + +// both-no-diagnostics + +struct S { + int a; + constexpr S() {} + constexpr ~S() { + } +}; +S s{}; + + From 8759f1364df21240d4da74c4241379bd7b8a6153 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Wed, 24 Jun 2026 21:28:45 -0700 Subject: [PATCH 451/511] [flang][cuda] Add NYI message for CUDA dynamic parallelism (#205628) --- flang/lib/Semantics/check-cuda.cpp | 4 ++++ flang/lib/Semantics/expression.cpp | 12 ++++++++++-- flang/test/Semantics/cuf30.cuf | 19 +++++++++++++++++++ 3 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 flang/test/Semantics/cuf30.cuf diff --git a/flang/lib/Semantics/check-cuda.cpp b/flang/lib/Semantics/check-cuda.cpp index 7d2c012d3a4dc..32ba35e83a120 100644 --- a/flang/lib/Semantics/check-cuda.cpp +++ b/flang/lib/Semantics/check-cuda.cpp @@ -92,6 +92,10 @@ struct DeviceExprChecker } return {}; } + if (*attrs == common::CUDASubprogramAttrs::Global) { + return parser::MessageFormattedText( + "not yet implemented: CUDA dynamic parallelism"_err_en_US); + } } } diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp index aea685e575754..02263fae0e55d 100644 --- a/flang/lib/Semantics/expression.cpp +++ b/flang/lib/Semantics/expression.cpp @@ -3468,8 +3468,16 @@ void ExpressionAnalyzer::CheckForBadRecursion( "Assumed-length CHARACTER(*) function '%s' cannot call itself"_err_en_US, callSite); } else if (FindCUDADeviceContext(scope)) { - msg = Say( - "Device subprogram '%s' cannot call itself"_err_en_US, callSite); + const auto *subp{ + proc.GetUltimate().detailsIf()}; + bool isGlobalCUDA{subp && subp->cudaSubprogramAttrs() && + *subp->cudaSubprogramAttrs() == + common::CUDASubprogramAttrs::Global}; + // CUDA global call diagnostics are handled by CUDA checks. + if (!isGlobalCUDA) { + msg = Say( + "Device subprogram '%s' cannot call itself"_err_en_US, callSite); + } } AttachDeclaration(msg, proc); } diff --git a/flang/test/Semantics/cuf30.cuf b/flang/test/Semantics/cuf30.cuf new file mode 100644 index 0000000000000..04d063abc33d4 --- /dev/null +++ b/flang/test/Semantics/cuf30.cuf @@ -0,0 +1,19 @@ +! RUN: %python %S/test_errors.py %s %flang_fc1 -Werror + +module m + +contains + +attributes(global) subroutine g1(a, m) + integer :: a(*), m + !ERROR: not yet implemented: CUDA dynamic parallelism + call g1<<<1, 1>>>(a, m) +end subroutine + +attributes(global) subroutine g2(a, m) + integer :: a(*), m + !ERROR: not yet implemented: CUDA dynamic parallelism + call g1<<<1, 1>>>(a, m) +end subroutine + +end module From 501a3ce537e0807c1765ab5e30588d54b5d5074e Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Wed, 24 Jun 2026 21:52:01 -0700 Subject: [PATCH 452/511] [clang-doc] Test more language constructs (#205585) We're missing several different language constructs in our tests. This patch simply adds the basic tests and captures the output without trying to fix or adjust any behavior, and can be considered a sort of precommit test for future fixes to the various documentation components. --- .../test/clang-doc/Inputs/array-type.cpp | 1 + .../Inputs/class-partial-specialization.cpp | 2 ++ .../Inputs/function-pointer-type.cpp | 1 + .../Inputs/member-function-pointer-type.cpp | 2 ++ .../Inputs/nested-pointer-qualifiers.cpp | 1 + .../test/clang-doc/array-type.cpp | 12 +++++++++ .../class-partial-specialization.cpp | 13 ++++++++++ .../test/clang-doc/function-pointer-type.cpp | 12 +++++++++ .../test/clang-doc/json/array-type.cpp | 18 +++++++++++++ .../json/class-partial-specialization.cpp | 25 +++++++++++++++++++ .../clang-doc/json/function-pointer-type.cpp | 18 +++++++++++++ .../json/member-function-pointer-type.cpp | 18 +++++++++++++ .../json/nested-pointer-qualifiers.cpp | 18 +++++++++++++ .../member-function-pointer-type.cpp | 12 +++++++++ .../clang-doc/nested-pointer-qualifiers.cpp | 12 +++++++++ 15 files changed, 165 insertions(+) create mode 100644 clang-tools-extra/test/clang-doc/Inputs/array-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/Inputs/class-partial-specialization.cpp create mode 100644 clang-tools-extra/test/clang-doc/Inputs/function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/Inputs/member-function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/Inputs/nested-pointer-qualifiers.cpp create mode 100644 clang-tools-extra/test/clang-doc/array-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/class-partial-specialization.cpp create mode 100644 clang-tools-extra/test/clang-doc/function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/json/array-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/json/class-partial-specialization.cpp create mode 100644 clang-tools-extra/test/clang-doc/json/function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/json/member-function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/json/nested-pointer-qualifiers.cpp create mode 100644 clang-tools-extra/test/clang-doc/member-function-pointer-type.cpp create mode 100644 clang-tools-extra/test/clang-doc/nested-pointer-qualifiers.cpp diff --git a/clang-tools-extra/test/clang-doc/Inputs/array-type.cpp b/clang-tools-extra/test/clang-doc/Inputs/array-type.cpp new file mode 100644 index 0000000000000..74053aa4b0f4a --- /dev/null +++ b/clang-tools-extra/test/clang-doc/Inputs/array-type.cpp @@ -0,0 +1 @@ +void qux(int (&arr)[5]); diff --git a/clang-tools-extra/test/clang-doc/Inputs/class-partial-specialization.cpp b/clang-tools-extra/test/clang-doc/Inputs/class-partial-specialization.cpp new file mode 100644 index 0000000000000..f36b320138691 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/Inputs/class-partial-specialization.cpp @@ -0,0 +1,2 @@ +template struct MyClass {}; +template struct MyClass {}; diff --git a/clang-tools-extra/test/clang-doc/Inputs/function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/Inputs/function-pointer-type.cpp new file mode 100644 index 0000000000000..adbdad6a23f17 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/Inputs/function-pointer-type.cpp @@ -0,0 +1 @@ +void bar(void (*fn)(int)); diff --git a/clang-tools-extra/test/clang-doc/Inputs/member-function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/Inputs/member-function-pointer-type.cpp new file mode 100644 index 0000000000000..4bb025a198b6a --- /dev/null +++ b/clang-tools-extra/test/clang-doc/Inputs/member-function-pointer-type.cpp @@ -0,0 +1,2 @@ +struct Class {}; +void baz(void (Class::*fn)(int)); diff --git a/clang-tools-extra/test/clang-doc/Inputs/nested-pointer-qualifiers.cpp b/clang-tools-extra/test/clang-doc/Inputs/nested-pointer-qualifiers.cpp new file mode 100644 index 0000000000000..49cfb4c8567f4 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/Inputs/nested-pointer-qualifiers.cpp @@ -0,0 +1 @@ +void foo(const int * const * ptr); diff --git a/clang-tools-extra/test/clang-doc/array-type.cpp b/clang-tools-extra/test/clang-doc/array-type.cpp new file mode 100644 index 0000000000000..0e7cef6a60853 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/array-type.cpp @@ -0,0 +1,12 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=html --executor=standalone %S/Inputs/array-type.cpp +// RUN: FileCheck %s --check-prefix=HTML < %t/html/GlobalNamespace/index.html +// RUN: clang-doc --output=%t --format=md --executor=standalone %S/Inputs/array-type.cpp +// RUN: FileCheck %s --check-prefix=MD < %t/GlobalNamespace/index.md +// RUN: clang-doc --output=%t --format=md_mustache --executor=standalone %S/Inputs/array-type.cpp +// RUN: FileCheck %s --check-prefix=MD-MUSTACHE < %t/md/GlobalNamespace/index.md + +// HTML:
void qux (int (&)[5] arr)
+ +// MD: *void qux(int (&)[5] arr)* +// MD-MUSTACHE: *void qux(int (&)[5] arr)* diff --git a/clang-tools-extra/test/clang-doc/class-partial-specialization.cpp b/clang-tools-extra/test/clang-doc/class-partial-specialization.cpp new file mode 100644 index 0000000000000..0eff263bc5632 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/class-partial-specialization.cpp @@ -0,0 +1,13 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=html --executor=standalone %S/Inputs/class-partial-specialization.cpp +// RUN: FileCheck %s --check-prefix=HTML < %t/html/GlobalNamespace/_ZTV7MyClassIPT_E.html +// RUN: clang-doc --output=%t --format=md --executor=standalone %S/Inputs/class-partial-specialization.cpp +// RUN: FileCheck %s --check-prefix=MD < %t/GlobalNamespace/MyClass.md +// RUN: clang-doc --output=%t --format=md_mustache --executor=standalone %S/Inputs/class-partial-specialization.cpp +// RUN: FileCheck %s --check-prefix=MD-MUSTACHE < %t/md/GlobalNamespace/_ZTV7MyClassIPT_E.md + +// HTML:
template <typename T>
+// HTML:

struct MyClass

+ +// MD: # struct MyClass +// MD-MUSTACHE: # struct MyClass diff --git a/clang-tools-extra/test/clang-doc/function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/function-pointer-type.cpp new file mode 100644 index 0000000000000..ede36472564aa --- /dev/null +++ b/clang-tools-extra/test/clang-doc/function-pointer-type.cpp @@ -0,0 +1,12 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=html --executor=standalone %S/Inputs/function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=HTML < %t/html/GlobalNamespace/index.html +// RUN: clang-doc --output=%t --format=md --executor=standalone %S/Inputs/function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=MD < %t/GlobalNamespace/index.md +// RUN: clang-doc --output=%t --format=md_mustache --executor=standalone %S/Inputs/function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=MD-MUSTACHE < %t/md/GlobalNamespace/index.md + +// HTML:
void bar (void (*)(int) fn)
+ +// MD: *void bar(void (*)(int) fn)* +// MD-MUSTACHE: *void bar(void (*)(int) fn)* diff --git a/clang-tools-extra/test/clang-doc/json/array-type.cpp b/clang-tools-extra/test/clang-doc/json/array-type.cpp new file mode 100644 index 0000000000000..861210879a887 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/array-type.cpp @@ -0,0 +1,18 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --pretty-json --output=%t --format=json --executor=standalone %S/../Inputs/array-type.cpp +// RUN: FileCheck %s < %t/json/GlobalNamespace/index.json + +// CHECK: "Functions": [ +// CHECK-NEXT: { +// CHECK: "Name": "qux", +// CHECK: "Params": [ +// CHECK-NEXT: { +// CHECK-NEXT: "Name": "arr", +// CHECK-NEXT: "ParamEnd": true, +// CHECK-NEXT: "Type": { +// CHECK-NEXT: "Name": "int (&)[5]", +// CHECK-NEXT: "QualName": "int (&)[5]", +// CHECK-NEXT: "USR": "0000000000000000000000000000000000000000" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] diff --git a/clang-tools-extra/test/clang-doc/json/class-partial-specialization.cpp b/clang-tools-extra/test/clang-doc/json/class-partial-specialization.cpp new file mode 100644 index 0000000000000..c5d2a65f5e53f --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/class-partial-specialization.cpp @@ -0,0 +1,25 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --pretty-json --output=%t --format=json --executor=standalone %S/../Inputs/class-partial-specialization.cpp +// RUN: FileCheck %s < %t/json/GlobalNamespace/_ZTV7MyClassIPT_E.json + +// CHECK: "MangledName": "_ZTV7MyClassIPT_E", +// CHECK-NEXT: "Name": "MyClass", +// CHECK: "Template": { +// CHECK-NEXT: "Parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "End": true, +// CHECK-NEXT: "Param": "typename T" +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "Specialization": { +// CHECK-NEXT: "Parameters": [ +// CHECK-NEXT: { +// CHECK-NEXT: "Param": "T*", +// CHECK-NEXT: "SpecParamEnd": true +// CHECK-NEXT: } +// CHECK-NEXT: ], +// CHECK-NEXT: "SpecializationOf": "{{[0-9A-F]*}}", +// CHECK-NEXT: "VerticalDisplay": false +// CHECK-NEXT: }, +// CHECK-NEXT: "VerticalDisplay": false +// CHECK-NEXT: } diff --git a/clang-tools-extra/test/clang-doc/json/function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/json/function-pointer-type.cpp new file mode 100644 index 0000000000000..34b693bc67d2b --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/function-pointer-type.cpp @@ -0,0 +1,18 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --pretty-json --output=%t --format=json --executor=standalone %S/../Inputs/function-pointer-type.cpp +// RUN: FileCheck %s < %t/json/GlobalNamespace/index.json + +// CHECK: "Functions": [ +// CHECK-NEXT: { +// CHECK: "Name": "bar", +// CHECK: "Params": [ +// CHECK-NEXT: { +// CHECK-NEXT: "Name": "fn", +// CHECK-NEXT: "ParamEnd": true, +// CHECK-NEXT: "Type": { +// CHECK-NEXT: "Name": "void (*)(int)", +// CHECK-NEXT: "QualName": "void (*)(int)", +// CHECK-NEXT: "USR": "0000000000000000000000000000000000000000" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] diff --git a/clang-tools-extra/test/clang-doc/json/member-function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/json/member-function-pointer-type.cpp new file mode 100644 index 0000000000000..8ca1ecbb1d14d --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/member-function-pointer-type.cpp @@ -0,0 +1,18 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --pretty-json --output=%t --format=json --executor=standalone %S/../Inputs/member-function-pointer-type.cpp +// RUN: FileCheck %s < %t/json/GlobalNamespace/index.json + +// CHECK: "Functions": [ +// CHECK-NEXT: { +// CHECK: "Name": "baz", +// CHECK: "Params": [ +// CHECK-NEXT: { +// CHECK-NEXT: "Name": "fn", +// CHECK-NEXT: "ParamEnd": true, +// CHECK-NEXT: "Type": { +// CHECK-NEXT: "Name": "void (Class::*)(int)", +// CHECK-NEXT: "QualName": "void (Class::*)(int)", +// CHECK-NEXT: "USR": "0000000000000000000000000000000000000000" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] diff --git a/clang-tools-extra/test/clang-doc/json/nested-pointer-qualifiers.cpp b/clang-tools-extra/test/clang-doc/json/nested-pointer-qualifiers.cpp new file mode 100644 index 0000000000000..5a3e8842b8036 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/json/nested-pointer-qualifiers.cpp @@ -0,0 +1,18 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --pretty-json --output=%t --format=json --executor=standalone %S/../Inputs/nested-pointer-qualifiers.cpp +// RUN: FileCheck %s < %t/json/GlobalNamespace/index.json + +// CHECK: "Functions": [ +// CHECK-NEXT: { +// CHECK: "Name": "foo", +// CHECK: "Params": [ +// CHECK-NEXT: { +// CHECK-NEXT: "Name": "ptr", +// CHECK-NEXT: "ParamEnd": true, +// CHECK-NEXT: "Type": { +// CHECK-NEXT: "Name": "const int *const *", +// CHECK-NEXT: "QualName": "const int *const *", +// CHECK-NEXT: "USR": "0000000000000000000000000000000000000000" +// CHECK-NEXT: } +// CHECK-NEXT: } +// CHECK-NEXT: ] diff --git a/clang-tools-extra/test/clang-doc/member-function-pointer-type.cpp b/clang-tools-extra/test/clang-doc/member-function-pointer-type.cpp new file mode 100644 index 0000000000000..4906c0c4ca966 --- /dev/null +++ b/clang-tools-extra/test/clang-doc/member-function-pointer-type.cpp @@ -0,0 +1,12 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=html --executor=standalone %S/Inputs/member-function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=HTML < %t/html/GlobalNamespace/index.html +// RUN: clang-doc --output=%t --format=md --executor=standalone %S/Inputs/member-function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=MD < %t/GlobalNamespace/index.md +// RUN: clang-doc --output=%t --format=md_mustache --executor=standalone %S/Inputs/member-function-pointer-type.cpp +// RUN: FileCheck %s --check-prefix=MD-MUSTACHE < %t/md/GlobalNamespace/index.md + +// HTML:
void baz (void (Class::*)(int) fn)
+ +// MD: *void baz(void (Class::*)(int) fn)* +// MD-MUSTACHE: *void baz(void (Class::*)(int) fn)* diff --git a/clang-tools-extra/test/clang-doc/nested-pointer-qualifiers.cpp b/clang-tools-extra/test/clang-doc/nested-pointer-qualifiers.cpp new file mode 100644 index 0000000000000..dc204f044711b --- /dev/null +++ b/clang-tools-extra/test/clang-doc/nested-pointer-qualifiers.cpp @@ -0,0 +1,12 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --output=%t --format=html --executor=standalone %S/Inputs/nested-pointer-qualifiers.cpp +// RUN: FileCheck %s --check-prefix=HTML < %t/html/GlobalNamespace/index.html +// RUN: clang-doc --output=%t --format=md --executor=standalone %S/Inputs/nested-pointer-qualifiers.cpp +// RUN: FileCheck %s --check-prefix=MD < %t/GlobalNamespace/index.md +// RUN: clang-doc --output=%t --format=md_mustache --executor=standalone %S/Inputs/nested-pointer-qualifiers.cpp +// RUN: FileCheck %s --check-prefix=MD-MUSTACHE < %t/md/GlobalNamespace/index.md + +// HTML:
void foo (const int *const * ptr)
+ +// MD: *void foo(const int *const * ptr)* +// MD-MUSTACHE: *void foo(const int *const * ptr)* From c3f9a06abcd1a7777cb9c2cc395cb35ce9660367 Mon Sep 17 00:00:00 2001 From: Hood Chatham Date: Thu, 25 Jun 2026 01:07:33 -0400 Subject: [PATCH 453/511] [WebAssembly] Represent reference types as TargetExtType (#203165) Originally #71540 by Paolo Matos, I picked it up and finished it. Resolves https://github.com/llvm/llvm-project/issues/69894. Model WebAssembly externref and funcref as target("wasm.externref") / target("wasm.funcref") TargetExtTypes instead of pointers in non-integral address spaces 10 and 20. The entire WebAssemblyLowerRefTypesIntPtrConv can be removed. This breaks the GlobalISel handling for reference types, I just disabled GlobalISel handling for functions that use them. I added intrinsics for `wasm.ptr.to_funcref` and `wasm.funcref.to_ptr`. ptr.to_funcref does a table.get from the indirect function pointer table. As a special case, 0 is converted to the null funcref rather than doing table.get on 0. `wasm.funcref.to_ptr` is only handled when we call it immediately, otherwise it will fail to lower. We could dynamically put the funcref into the table to make it work but that would require a stack of spilled funcrefs and isn't worth the effort. In the process of looking into this, I noticed that clang used to allow casting from funcref to function pointer but if the cast result escaped it would hit `Cannot select: FUNCREF_TO_PTR` in the backend. I added a diagnostic that says "a funcref can only be converted to a pointer to be directly called" to make this a little cleaner. cc @QuantumSegfault --------- Co-authored-by: Paulo Matos --- clang/docs/ReleaseNotes.rst | 6 ++ clang/lib/CodeGen/CGExpr.cpp | 21 ++++- clang/lib/CodeGen/CGExprConstant.cpp | 11 ++- clang/lib/CodeGen/CGExprScalar.cpp | 43 +++++++++- clang/lib/CodeGen/CodeGenTypes.cpp | 4 + .../WebAssembly/builtins-table-externref.c | 28 +++--- .../WebAssembly/builtins-table-funcref.c | 16 ++-- .../WebAssembly/builtins-test-fp-sig.c | 4 +- .../test/CodeGen/WebAssembly/wasm-externref.c | 8 +- .../WebAssembly/wasm-funcref-to-ptr-error.c | 21 +++++ clang/test/CodeGen/WebAssembly/wasm-funcref.c | 71 +++++++++++----- clang/test/CodeGen/builtins-wasm.c | 4 +- .../test/CodeGenCXX/wasm-reftypes-mangle.cpp | 4 +- llvm/docs/ReleaseNotes.md | 9 ++ llvm/include/llvm/IR/Intrinsics.h | 2 + llvm/include/llvm/IR/IntrinsicsWebAssembly.td | 11 +++ .../SelectionDAG/SelectionDAGBuilder.cpp | 10 +++ llvm/lib/CodeGen/ValueTypes.cpp | 4 + llvm/lib/IR/Intrinsics.cpp | 17 +++- llvm/lib/IR/Type.cpp | 12 ++- llvm/lib/Target/WebAssembly/CMakeLists.txt | 1 - .../GISel/WebAssemblyCallLowering.cpp | 53 +++++++----- .../WebAssembly/Utils/WasmAddressSpaces.h | 6 +- .../Utils/WebAssemblyTypeUtilities.h | 10 +-- llvm/lib/Target/WebAssembly/WebAssembly.h | 2 - .../WebAssembly/WebAssemblyFastISel.cpp | 70 ++++++++++++--- .../WebAssembly/WebAssemblyISelDAGToDAG.cpp | 21 +++++ .../WebAssembly/WebAssemblyISelLowering.cpp | 49 ++++++----- .../WebAssembly/WebAssemblyISelLowering.h | 3 - .../WebAssemblyLowerRefTypesIntPtrConv.cpp | 85 ------------------- .../WebAssemblyRefTypeMem2Local.cpp | 2 + .../WebAssembly/WebAssemblyTargetMachine.cpp | 2 - .../GlobalISel/irtranslator/args.ll | 24 +----- .../GlobalISel/irtranslator/call-basics.ll | 56 ++---------- .../GlobalISel/irtranslator/ret-basics.ll | 30 +------ .../GlobalISel/reference-types-fallback.ll | 40 +++++++++ .../WebAssembly/externref-globalget.ll | 2 +- .../WebAssembly/externref-globalset.ll | 2 +- .../CodeGen/WebAssembly/externref-inttoptr.ll | 12 +-- .../CodeGen/WebAssembly/externref-ptrtoint.ll | 11 +-- .../CodeGen/WebAssembly/externref-tableget.ll | 2 +- .../CodeGen/WebAssembly/externref-tableset.ll | 2 +- .../WebAssembly/externref-unsized-load.ll | 2 +- .../WebAssembly/externref-unsized-store.ll | 2 +- llvm/test/CodeGen/WebAssembly/funcref-call.ll | 15 +++- .../CodeGen/WebAssembly/funcref-globalget.ll | 2 +- .../CodeGen/WebAssembly/funcref-globalset.ll | 2 +- .../WebAssembly/funcref-ptr-conversion.ll | 28 ++++++ .../CodeGen/WebAssembly/funcref-table_call.ll | 6 +- .../CodeGen/WebAssembly/funcref-tableget.ll | 2 +- .../CodeGen/WebAssembly/funcref-tableset.ll | 2 +- .../WebAssembly/funcref-to-ptr-error.ll | 30 +++++++ .../WebAssembly/ref-null-zeroinitializer.ll | 67 +++++++++++++++ llvm/test/CodeGen/WebAssembly/ref-null.ll | 4 +- .../test/CodeGen/WebAssembly/ref-test-func.ll | 3 +- .../CodeGen/WebAssembly/ref-type-mem2local.ll | 44 +++++----- .../CodeGen/WebAssembly/select-reftype.ll | 24 +++--- llvm/test/CodeGen/WebAssembly/table-copy.ll | 2 +- llvm/test/CodeGen/WebAssembly/table-fill.ll | 2 +- llvm/test/CodeGen/WebAssembly/table-grow.ll | 2 +- llvm/test/CodeGen/WebAssembly/table-size.ll | 2 +- llvm/test/CodeGen/WebAssembly/table-types.ll | 4 +- .../WebAssembly/externref-no-vectorize.ll | 47 ++++++++++ .../llvm/lib/Target/WebAssembly/BUILD.gn | 1 - 64 files changed, 679 insertions(+), 405 deletions(-) create mode 100644 clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c delete mode 100644 llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp create mode 100644 llvm/test/CodeGen/WebAssembly/GlobalISel/reference-types-fallback.ll create mode 100644 llvm/test/CodeGen/WebAssembly/funcref-ptr-conversion.ll create mode 100644 llvm/test/CodeGen/WebAssembly/funcref-to-ptr-error.ll create mode 100644 llvm/test/CodeGen/WebAssembly/ref-null-zeroinitializer.ll create mode 100644 llvm/test/Transforms/SLPVectorizer/WebAssembly/externref-no-vectorize.ll diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 4db6cc0777ede..370f6944831a7 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -977,6 +977,12 @@ WebAssembly Support - Fixed a crash when ``__funcref`` is applied to a non-function pointer type. (#GH118233) +- WebAssembly reference types (``__externref_t`` and ``__funcref`` function + pointers) now lower to the opaque IR types ``target("wasm.externref")`` and + ``target("wasm.funcref")`` instead of ``ptr addrspace(10)`` / + ``ptr addrspace(20)``. +- Fixed a compiler crash at ``-O2`` when reference-type values were passed + through control flow that the SLP vectorizer tried to vectorize. AVR Support ^^^^^^^^^^^ diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 465a020e38e74..6abe4331db552 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/IR/MatrixBuilder.h" @@ -6578,6 +6579,21 @@ static GlobalDecl getGlobalDeclForDirectCall(const FunctionDecl *FD) { CGCallee CodeGenFunction::EmitCallee(const Expr *E) { E = E->IgnoreParens(); + // A WebAssembly funcref is an opaque reference type and llvm only accepts + // function pointers as the call target. To make an indirect call through a + // reference type, first use the llvm.wasm.funcref.to_ptr intrinsic to make a + // fake function pointer to it. The backend lowers the resulting indirect call + // to a table.set into a single element dummy table + call_indirect 0. + auto ConvertFuncrefToPtr = [&](llvm::Value *CalleePtr) -> llvm::Value * { + if (auto *TET = dyn_cast(CalleePtr->getType()); + TET && TET->getName() == "wasm.funcref") { + llvm::Function *ToPtr = + CGM.getIntrinsic(llvm::Intrinsic::wasm_funcref_to_ptr); + return Builder.CreateCall(ToPtr, {CalleePtr}); + } + return CalleePtr; + }; + // Look through function-to-pointer decay. if (auto ICE = dyn_cast(E)) { if (ICE->getCastKind() == CK_FunctionToPointerDecay || @@ -6602,7 +6618,8 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { GD = GlobalDecl(VD); } CGCalleeInfo CalleeInfo(FunctionType->getAs(), GD); - CGCallee Callee(CalleeInfo, Result.first, Result.second); + CGCallee Callee(CalleeInfo, ConvertFuncrefToPtr(Result.first), + Result.second); return Callee; } } @@ -6646,7 +6663,7 @@ CGCallee CodeGenFunction::EmitCallee(const Expr *E) { CGCalleeInfo calleeInfo(functionType->getAs(), GD); CGPointerAuthInfo pointerAuth = CGM.getFunctionPointerAuthInfo(functionType); - CGCallee callee(calleeInfo, calleePtr, pointerAuth); + CGCallee callee(calleeInfo, ConvertFuncrefToPtr(calleePtr), pointerAuth); return callee; } diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp index a8c5985cde705..7a2b00647f189 100644 --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -2881,9 +2881,14 @@ llvm::Constant *ConstantEmitter::emitNullForMemory(CodeGenModule &CGM, } llvm::Constant *CodeGenModule::EmitNullConstant(QualType T) { - if (T->getAs()) - return getNullPointer( - cast(getTypes().ConvertTypeForMem(T)), T); + if (T->getAs()) { + llvm::Type *LT = getTypes().ConvertTypeForMem(T); + if (auto *PT = dyn_cast(LT)) + return getNullPointer(PT, T); + // Some pointer types do not lower to an LLVM pointer (e.g. a WebAssembly + // funcref, which is an opaque reference type). Use the type's zero value. + return llvm::Constant::getNullValue(LT); + } if (getTypes().isZeroInitializable(T)) return llvm::Constant::getNullValue(getTypes().ConvertTypeForMem(T)); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 3a3dff7bec347..18ed6570730f4 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/IntrinsicsPowerPC.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/MatrixBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Support/TypeSize.h" @@ -2839,6 +2840,43 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { return CGF.authPointerToPointerCast(Result, E->getType(), DestTy); } case CK_AddressSpaceConversion: { + llvm::Type *DestLTy = ConvertType(DestTy); + // WebAssembly reference types are opaque target extension types so an + // "address space conversion" involving them is not a real pointer cast. + auto IsWasmFuncref = [](llvm::Type *T) { + auto *TET = dyn_cast(T); + return TET && TET->getName() == "wasm.funcref"; + }; + bool SrcIsFuncref = IsWasmFuncref(ConvertType(E->getType())); + bool DestIsFuncref = IsWasmFuncref(DestLTy); + if (SrcIsFuncref && DestIsFuncref) { + // funcref -> funcref (e.g. between differently-typed funcrefs) is the + // identity on the opaque reference value. + return Visit(E); + } + if (SrcIsFuncref && !DestIsFuncref) { + // funcref -> pointer: use wasm_funcref_to_ptr. This will probably crash + // later in codegen since we haven't implemented a way to actually get a + // function pointer from a funcref. + llvm::Function *ToPtr = + CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_funcref_to_ptr); + return CGF.Builder.CreateCall(ToPtr, {Visit(E)}); + } + if (!SrcIsFuncref && DestIsFuncref) { + // A null function pointer converts to a null funcref (ref.null func), + // rather than a table lookup at index 0. + Expr::EvalResult NullResult; + if (E->EvaluateAsRValue(NullResult, CGF.getContext()) && + NullResult.Val.isNullPointer()) { + if (NullResult.HasSideEffects) + Visit(E); + return llvm::Constant::getNullValue(DestLTy); + } + // pointer -> funcref: do a table.get from the indirect function table. + llvm::Function *ToFuncref = + CGF.CGM.getIntrinsic(llvm::Intrinsic::wasm_ptr_to_funcref); + return CGF.Builder.CreateCall(ToFuncref, {Visit(E)}); + } Expr::EvalResult Result; if (E->EvaluateAsRValue(Result, CGF.getContext()) && Result.Val.isNullPointer()) { @@ -2847,12 +2885,11 @@ Value *ScalarExprEmitter::VisitCastExpr(CastExpr *CE) { // eliminate the useless instructions emitted during translating E. if (Result.HasSideEffects) Visit(E); - return CGF.CGM.getNullPointer(cast( - ConvertType(DestTy)), DestTy); + return CGF.CGM.getNullPointer(cast(DestLTy), DestTy); } // Since target may map different address spaces in AST to the same address // space, an address space conversion may end up as a bitcast. - return CGF.performAddrSpaceCast(Visit(E), ConvertType(DestTy)); + return CGF.performAddrSpaceCast(Visit(E), DestLTy); } case CK_AtomicToNonAtomic: case CK_NonAtomicToAtomic: diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index b28a0eb82f302..3de3bad6affb5 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -645,6 +645,10 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { case Type::Pointer: { const PointerType *PTy = cast(Ty); QualType ETy = PTy->getPointeeType(); + if (ETy.getAddressSpace() == LangAS::wasm_funcref) { + ResultType = CGM.getTargetCodeGenInfo().getWasmFuncrefReferenceType(); + break; + } unsigned AS = getTargetAddressSpace(ETy); ResultType = llvm::PointerType::get(getLLVMContext(), AS); break; diff --git a/clang/test/CodeGen/WebAssembly/builtins-table-externref.c b/clang/test/CodeGen/WebAssembly/builtins-table-externref.c index 7600a53ba3aa2..454fc31a5f53f 100644 --- a/clang/test/CodeGen/WebAssembly/builtins-table-externref.c +++ b/clang/test/CodeGen/WebAssembly/builtins-table-externref.c @@ -8,8 +8,8 @@ static const __externref_t const_table[0]; // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_get // CHECK-SAME: (i32 noundef [[INDEX:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(10) @llvm.wasm.table.get.externref(ptr addrspace(1) @table, i32 [[INDEX]]) -// CHECK-NEXT: ret ptr addrspace(10) [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = call target("wasm.externref") @llvm.wasm.table.get.externref(ptr addrspace(1) @table, i32 [[INDEX]]) +// CHECK-NEXT: ret target("wasm.externref") [[TMP0]] // __externref_t test_builtin_wasm_table_get(int index) { return __builtin_wasm_table_get(table, index); @@ -18,18 +18,18 @@ __externref_t test_builtin_wasm_table_get(int index) { // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_get_const // CHECK-SAME: (i32 noundef [[INDEX:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(10) @llvm.wasm.table.get.externref(ptr addrspace(1) @table, i32 [[INDEX]]) -// CHECK-NEXT: ret ptr addrspace(10) [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = call target("wasm.externref") @llvm.wasm.table.get.externref(ptr addrspace(1) @table, i32 [[INDEX]]) +// CHECK-NEXT: ret target("wasm.externref") [[TMP0]] // __externref_t test_builtin_wasm_table_get_const(const int index) { return __builtin_wasm_table_get(table, index); } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_set -// CHECK-SAME: (i32 noundef [[INDEX:%.*]], ptr addrspace(10) [[REF:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[INDEX:%.*]], target("wasm.externref") [[REF:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @const_table, i32 [[INDEX]], ptr addrspace(10) [[REF]]) -// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @table, i32 [[INDEX]], ptr addrspace(10) [[REF]]) +// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @const_table, i32 [[INDEX]], target("wasm.externref") [[REF]]) +// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @table, i32 [[INDEX]], target("wasm.externref") [[REF]]) // CHECK-NEXT: ret void // void test_builtin_wasm_table_set(const int index, __externref_t ref) { @@ -38,10 +38,10 @@ void test_builtin_wasm_table_set(const int index, __externref_t ref) { } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_set_const -// CHECK-SAME: (i32 noundef [[INDEX:%.*]], ptr addrspace(10) [[REF:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[INDEX:%.*]], target("wasm.externref") [[REF:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @table, i32 [[INDEX]], ptr addrspace(10) [[REF]]) -// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @const_table, i32 [[INDEX]], ptr addrspace(10) [[REF]]) +// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @table, i32 [[INDEX]], target("wasm.externref") [[REF]]) +// CHECK-NEXT: call void @llvm.wasm.table.set.externref(ptr addrspace(1) @const_table, i32 [[INDEX]], target("wasm.externref") [[REF]]) // CHECK-NEXT: ret void // void test_builtin_wasm_table_set_const(const int index, const __externref_t ref) { @@ -60,9 +60,9 @@ int test_builtin_wasm_table_size() { } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_grow -// CHECK-SAME: (ptr addrspace(10) [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (target("wasm.externref") [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.wasm.table.grow.externref(ptr addrspace(1) @table, ptr addrspace(10) [[REF]], i32 [[NELEM]]) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.wasm.table.grow.externref(ptr addrspace(1) @table, target("wasm.externref") [[REF]], i32 [[NELEM]]) // CHECK-NEXT: ret i32 [[TMP0]] // int test_builtin_wasm_table_grow(__externref_t ref, int nelem) { @@ -70,9 +70,9 @@ int test_builtin_wasm_table_grow(__externref_t ref, int nelem) { } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_fill -// CHECK-SAME: (i32 noundef [[INDEX:%.*]], ptr addrspace(10) [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[INDEX:%.*]], target("wasm.externref") [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.wasm.table.fill.externref(ptr addrspace(1) @table, i32 [[INDEX]], ptr addrspace(10) [[REF]], i32 [[NELEM]]) +// CHECK-NEXT: call void @llvm.wasm.table.fill.externref(ptr addrspace(1) @table, i32 [[INDEX]], target("wasm.externref") [[REF]], i32 [[NELEM]]) // CHECK-NEXT: ret void // void test_builtin_wasm_table_fill(int index, __externref_t ref, int nelem) { diff --git a/clang/test/CodeGen/WebAssembly/builtins-table-funcref.c b/clang/test/CodeGen/WebAssembly/builtins-table-funcref.c index b4f729669a795..f80e9b10c4941 100644 --- a/clang/test/CodeGen/WebAssembly/builtins-table-funcref.c +++ b/clang/test/CodeGen/WebAssembly/builtins-table-funcref.c @@ -8,17 +8,17 @@ static funcref_t table[0]; // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_get // CHECK-SAME: (i32 noundef [[INDEX:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(20) @llvm.wasm.table.get.funcref(ptr addrspace(1) @table, i32 [[INDEX]]) -// CHECK-NEXT: ret ptr addrspace(20) [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = call target("wasm.funcref") @llvm.wasm.table.get.funcref(ptr addrspace(1) @table, i32 [[INDEX]]) +// CHECK-NEXT: ret target("wasm.funcref") [[TMP0]] // funcref_t test_builtin_wasm_table_get(int index) { return __builtin_wasm_table_get(table, index); } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_set -// CHECK-SAME: (i32 noundef [[INDEX:%.*]], ptr addrspace(20) noundef [[REF:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[INDEX:%.*]], target("wasm.funcref") noundef [[REF:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.wasm.table.set.funcref(ptr addrspace(1) @table, i32 [[INDEX]], ptr addrspace(20) [[REF]]) +// CHECK-NEXT: call void @llvm.wasm.table.set.funcref(ptr addrspace(1) @table, i32 [[INDEX]], target("wasm.funcref") [[REF]]) // CHECK-NEXT: ret void // void test_builtin_wasm_table_set(int index, funcref_t ref) { @@ -37,9 +37,9 @@ int test_builtin_wasm_table_size() { // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_grow -// CHECK-SAME: (ptr addrspace(20) noundef [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (target("wasm.funcref") noundef [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.wasm.table.grow.funcref(ptr addrspace(1) @table, ptr addrspace(20) [[REF]], i32 [[NELEM]]) +// CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.wasm.table.grow.funcref(ptr addrspace(1) @table, target("wasm.funcref") [[REF]], i32 [[NELEM]]) // CHECK-NEXT: ret i32 [[TMP0]] // int test_builtin_wasm_table_grow(funcref_t ref, int nelem) { @@ -47,9 +47,9 @@ int test_builtin_wasm_table_grow(funcref_t ref, int nelem) { } // CHECK-LABEL: define {{[^@]+}}@test_builtin_wasm_table_fill -// CHECK-SAME: (i32 noundef [[INDEX:%.*]], ptr addrspace(20) noundef [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { +// CHECK-SAME: (i32 noundef [[INDEX:%.*]], target("wasm.funcref") noundef [[REF:%.*]], i32 noundef [[NELEM:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: call void @llvm.wasm.table.fill.funcref(ptr addrspace(1) @table, i32 [[INDEX]], ptr addrspace(20) [[REF]], i32 [[NELEM]]) +// CHECK-NEXT: call void @llvm.wasm.table.fill.funcref(ptr addrspace(1) @table, i32 [[INDEX]], target("wasm.funcref") [[REF]], i32 [[NELEM]]) // CHECK-NEXT: ret void // void test_builtin_wasm_table_fill(int index, funcref_t ref, int nelem) { diff --git a/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c b/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c index 88447f7fa232d..c8825028e1789 100644 --- a/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c +++ b/clang/test/CodeGen/WebAssembly/builtins-test-fp-sig.c @@ -32,13 +32,13 @@ void test_function_pointer_signature_varargs(FVarArgs func) { typedef __externref_t (*FExternRef)(__externref_t, __externref_t); void test_function_pointer_externref(FExternRef func) { - // WEBASSEMBLY: %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr addrspace(10) poison, token poison, ptr addrspace(10) poison, ptr addrspace(10) poison) + // WEBASSEMBLY: %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, target("wasm.externref") poison, token poison, target("wasm.externref") poison, target("wasm.externref") poison) use(__builtin_wasm_test_function_pointer_signature(func)); } typedef __funcref Fpointers (*FFuncRef)(__funcref Fvoid, __funcref Ffloats); void test_function_pointer_funcref(FFuncRef func) { - // WEBASSEMBLY: %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr addrspace(20) poison, token poison, ptr addrspace(20) poison, ptr addrspace(20) poison) + // WEBASSEMBLY: %0 = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, target("wasm.funcref") poison, token poison, target("wasm.funcref") poison, target("wasm.funcref") poison) use(__builtin_wasm_test_function_pointer_signature(func)); } diff --git a/clang/test/CodeGen/WebAssembly/wasm-externref.c b/clang/test/CodeGen/WebAssembly/wasm-externref.c index 788438bb4a86a..d226c51b7fd4e 100644 --- a/clang/test/CodeGen/WebAssembly/wasm-externref.c +++ b/clang/test/CodeGen/WebAssembly/wasm-externref.c @@ -7,10 +7,10 @@ void helper(externref_t); // CHECK-LABEL: @handle( // CHECK-NEXT: entry: -// CHECK-NEXT: [[OBJ_ADDR:%.*]] = alloca ptr addrspace(10), align 1 -// CHECK-NEXT: store ptr addrspace(10) [[OBJ:%.*]], ptr [[OBJ_ADDR]], align 1 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(10), ptr [[OBJ_ADDR]], align 1 -// CHECK-NEXT: call void @helper(ptr addrspace(10) [[TMP0]]) +// CHECK-NEXT: [[OBJ_ADDR:%.*]] = alloca target("wasm.externref"), align 1 +// CHECK-NEXT: store target("wasm.externref") [[OBJ:%.*]], ptr [[OBJ_ADDR]], align 1 +// CHECK-NEXT: [[TMP0:%.*]] = load target("wasm.externref"), ptr [[OBJ_ADDR]], align 1 +// CHECK-NEXT: call void @helper(target("wasm.externref") [[TMP0]]) // CHECK-NEXT: ret void // void handle(externref_t obj) { diff --git a/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c b/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c new file mode 100644 index 0000000000000..ce663a70dd3b1 --- /dev/null +++ b/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c @@ -0,0 +1,21 @@ +// RUN: not %clang_cc1 -triple wasm32 -target-feature +reference-types -S -o /dev/null %s 2>&1 | FileCheck %s +// RUN: not %clang_cc1 -triple wasm64 -target-feature +reference-types -S -o /dev/null %s 2>&1 | FileCheck %s + +// We haven't implemented a way of converting a funcref to a function pointer. +// We can generate code for it if the result is immediately called, which avoids +// the need for creating a function pointer. If the resulting pointer escapes, +// we haven't implemented codegen for that. Diagnose it in the front end rather +// than crashing in the backend. + +typedef void (*__funcref funcref_t)(void); +typedef void (*fn_t)(void); + +// CHECK: error: a funcref can only be converted to a pointer to be directly called; the resulting pointer cannot otherwise be used +void store_funcref_as_ptr(funcref_t f, fn_t *out) { + *out = (fn_t)f; +} + +// CHECK: error: a funcref can only be converted to a pointer to be directly called; the resulting pointer cannot otherwise be used +fn_t return_funcref_as_ptr(funcref_t f) { + return (fn_t)f; +} diff --git a/clang/test/CodeGen/WebAssembly/wasm-funcref.c b/clang/test/CodeGen/WebAssembly/wasm-funcref.c index f01af0db321dd..45e0001c88ebc 100644 --- a/clang/test/CodeGen/WebAssembly/wasm-funcref.c +++ b/clang/test/CodeGen/WebAssembly/wasm-funcref.c @@ -8,8 +8,8 @@ typedef int (*fn_t)(int); // Null funcref builtin call // CHECK-LABEL: @get_null( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(20) @llvm.wasm.ref.null.func() -// CHECK-NEXT: ret ptr addrspace(20) [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = call target("wasm.funcref") @llvm.wasm.ref.null.func() +// CHECK-NEXT: ret target("wasm.funcref") [[TMP0]] // funcref_t get_null() { return __builtin_wasm_ref_null_func(); @@ -19,8 +19,8 @@ funcref_t get_null() { // default return value for builtin is a funcref with function type () -> (). // CHECK-LABEL: @get_null_ii( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = call ptr addrspace(20) @llvm.wasm.ref.null.func() -// CHECK-NEXT: ret ptr addrspace(20) [[TMP0]] +// CHECK-NEXT: [[TMP0:%.*]] = call target("wasm.funcref") @llvm.wasm.ref.null.func() +// CHECK-NEXT: ret target("wasm.funcref") [[TMP0]] // fn_funcref_t get_null_ii() { return (fn_funcref_t) __builtin_wasm_ref_null_func(); @@ -29,10 +29,10 @@ fn_funcref_t get_null_ii() { // Identity function for funcref. // CHECK-LABEL: @identity( // CHECK-NEXT: entry: -// CHECK-NEXT: [[FN_ADDR:%.*]] = alloca ptr addrspace(20), align 4 -// CHECK-NEXT: store ptr addrspace(20) [[FN:%.*]], ptr [[FN_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(20), ptr [[FN_ADDR]], align 4 -// CHECK-NEXT: ret ptr addrspace(20) [[TMP0]] +// CHECK-NEXT: [[FN_ADDR:%.*]] = alloca target("wasm.funcref"), align 4 +// CHECK-NEXT: store target("wasm.funcref") [[FN:%.*]], ptr [[FN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load target("wasm.funcref"), ptr [[FN_ADDR]], align 4 +// CHECK-NEXT: ret target("wasm.funcref") [[TMP0]] // funcref_t identity(funcref_t fn) { return fn; @@ -43,10 +43,10 @@ void helper(funcref_t); // Pass funcref ref as an argument to a helper function. // CHECK-LABEL: @handle( // CHECK-NEXT: entry: -// CHECK-NEXT: [[FN_ADDR:%.*]] = alloca ptr addrspace(20), align 4 -// CHECK-NEXT: store ptr addrspace(20) [[FN:%.*]], ptr [[FN_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(20), ptr [[FN_ADDR]], align 4 -// CHECK-NEXT: call void @helper(ptr addrspace(20) noundef [[TMP0]]) +// CHECK-NEXT: [[FN_ADDR:%.*]] = alloca target("wasm.funcref"), align 4 +// CHECK-NEXT: store target("wasm.funcref") [[FN:%.*]], ptr [[FN_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load target("wasm.funcref"), ptr [[FN_ADDR]], align 4 +// CHECK-NEXT: call void @helper(target("wasm.funcref") noundef [[TMP0]]) // CHECK-NEXT: ret i32 0 // int handle(funcref_t fn) { @@ -60,29 +60,58 @@ int handle(funcref_t fn) { // CHECK-NEXT: [[FNPTR_ADDR:%.*]] = alloca ptr, align 4 // CHECK-NEXT: store ptr [[FNPTR:%.*]], ptr [[FNPTR_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[FNPTR_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(20) -// CHECK-NEXT: ret ptr addrspace(20) [[TMP1]] +// CHECK-NEXT: [[TMP1:%.*]] = call target("wasm.funcref") @llvm.wasm.ptr.to_funcref(ptr [[TMP0]]) +// CHECK-NEXT: ret target("wasm.funcref") [[TMP1]] // fn_funcref_t get_ref(fn_t fnptr) { return (fn_funcref_t) fnptr; } +// Casting a null function pointer to a funcref yields a null funcref +// (ref.null func), not a lookup of index 0 in the indirect function table. +// CHECK-LABEL: @get_null_from_fnptr( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret target("wasm.funcref") zeroinitializer +// +fn_funcref_t get_null_from_fnptr() { + return (fn_funcref_t)(fn_t)0; +} + // Call funcref // CHECK-LABEL: @call_fn( // CHECK-NEXT: entry: -// CHECK-NEXT: [[REF_ADDR:%.*]] = alloca ptr addrspace(20), align 4 +// CHECK-NEXT: [[REF_ADDR:%.*]] = alloca target("wasm.funcref"), align 4 // CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: store ptr addrspace(20) [[REF:%.*]], ptr [[REF_ADDR]], align 4 +// CHECK-NEXT: store target("wasm.funcref") [[REF:%.*]], ptr [[REF_ADDR]], align 4 // CHECK-NEXT: store i32 [[X:%.*]], ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(20), ptr [[REF_ADDR]], align 4 -// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call addrspace(20) i32 [[TMP0]](i32 noundef [[TMP1]]) +// CHECK-NEXT: [[TMP0:%.*]] = load target("wasm.funcref"), ptr [[REF_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.wasm.funcref.to_ptr(target("wasm.funcref") [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP1]](i32 noundef [[TMP2]]) // CHECK-NEXT: ret i32 [[CALL]] // int call_fn(fn_funcref_t ref, int x) { return ref(x); } +// Explicitly casting a funcref to a plain function pointer and calling it +// immediately is allowed: the conversion feeds directly into the indirect call. +// CHECK-LABEL: @call_fn_via_cast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[REF_ADDR:%.*]] = alloca target("wasm.funcref"), align 4 +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store target("wasm.funcref") [[REF:%.*]], ptr [[REF_ADDR]], align 4 +// CHECK-NEXT: store i32 [[X:%.*]], ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load target("wasm.funcref"), ptr [[REF_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = call ptr @llvm.wasm.funcref.to_ptr(target("wasm.funcref") [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[CALL:%.*]] = call i32 [[TMP1]](i32 noundef [[TMP2]]) +// CHECK-NEXT: ret i32 [[CALL]] +// +int call_fn_via_cast(fn_funcref_t ref, int x) { + return ((fn_t)ref)(x); +} + typedef fn_funcref_t (*builtin_refnull_t)(); // Calling ref.null through a function pointer. @@ -91,8 +120,8 @@ typedef fn_funcref_t (*builtin_refnull_t)(); // CHECK-NEXT: [[REFNULL_ADDR:%.*]] = alloca ptr, align 4 // CHECK-NEXT: store ptr [[REFNULL:%.*]], ptr [[REFNULL_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[REFNULL_ADDR]], align 4 -// CHECK-NEXT: [[CALL:%.*]] = call ptr addrspace(20) [[TMP0]]() -// CHECK-NEXT: ret ptr addrspace(20) [[CALL]] +// CHECK-NEXT: [[CALL:%.*]] = call target("wasm.funcref") [[TMP0]]() +// CHECK-NEXT: ret target("wasm.funcref") [[CALL]] // fn_funcref_t get_null_fptr(builtin_refnull_t refnull) { return refnull(); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c index 375664b852636..40788b0afeb45 100644 --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -737,13 +737,13 @@ f16x8 pmax_f16x8(f16x8 a, f16x8 b) { } __externref_t externref_null() { return __builtin_wasm_ref_null_extern(); - // WEBASSEMBLY: tail call ptr addrspace(10) @llvm.wasm.ref.null.extern() + // WEBASSEMBLY: tail call target("wasm.externref") @llvm.wasm.ref.null.extern() // WEBASSEMBLY-NEXT: ret } int externref_is_null(__externref_t arg) { return __builtin_wasm_ref_is_null_extern(arg); - // WEBASSEMBLY: tail call i32 @llvm.wasm.ref.is_null.extern(ptr addrspace(10) %arg) + // WEBASSEMBLY: tail call i32 @llvm.wasm.ref.is_null.extern(target("wasm.externref") %arg) // WEBASSEMBLY-NEXT: ret } diff --git a/clang/test/CodeGenCXX/wasm-reftypes-mangle.cpp b/clang/test/CodeGenCXX/wasm-reftypes-mangle.cpp index 20e33cfdaa31c..e2e8109772d74 100644 --- a/clang/test/CodeGenCXX/wasm-reftypes-mangle.cpp +++ b/clang/test/CodeGenCXX/wasm-reftypes-mangle.cpp @@ -7,10 +7,10 @@ typedef void (*__funcref funcref_t)(); // Global funcref variables - test that codegen doesn't crash. -// CHECK-DAG: @fptr = global ptr addrspace(20) null +// CHECK-DAG: @fptr = global target("wasm.funcref") zeroinitializer funcref_t fptr; -// CHECK-DAG: @fpt2 = global ptr addrspace(20) null +// CHECK-DAG: @fpt2 = global target("wasm.funcref") zeroinitializer void (*__funcref fpt2)(); // CHECK-DAG: _Z2f1u11externref_t diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md index 213dd52d9a228..a365e30792b4c 100644 --- a/llvm/docs/ReleaseNotes.md +++ b/llvm/docs/ReleaseNotes.md @@ -259,6 +259,15 @@ Makes programs 10x faster by doing Special New Thing. ### Changes to the WebAssembly Backend +* WebAssembly reference types are now represented in LLVM IR as the target + extension types `target("wasm.externref")` and `target("wasm.funcref")`, + rather than as pointers in address spaces 10 and 20 (`ptr addrspace(10)` / + `ptr addrspace(20)`). +* As a consequence of the representation change, reference types are no longer + treated as vectorizable pointers. This fixes a crash in the SLP vectorizer, + which previously would attempt to gather `externref`/`funcref` values into a + vector and then crash. + ### Changes to the Windows Target * The `.seh_startchained` and `.seh_endchained` assembly instructions have been removed and replaced diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h index c2411c894d975..5ef2e33470634 100644 --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -177,6 +177,8 @@ struct IITDescriptor { AMX, PPCQuad, AArch64Svcount, + WasmExternref, + WasmFuncref, // Overloaded type. Overloaded, // AnyKind and overload index in OverloadInfo. diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td index c1e4b97e96bc8..a0e83cee9f055 100644 --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -47,6 +47,17 @@ def int_wasm_ref_test_func : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_vararg_ty], [IntrNoMem]>; +//===----------------------------------------------------------------------===// +// funcref <--> pointer conversion intrinsics +//===----------------------------------------------------------------------===// +def int_wasm_funcref_to_ptr : + DefaultAttrsIntrinsic<[llvm_ptr_ty], [llvm_funcref_ty], + [IntrNoMem], "llvm.wasm.funcref.to_ptr">; + +def int_wasm_ptr_to_funcref : + DefaultAttrsIntrinsic<[llvm_funcref_ty], [llvm_ptr_ty], + [IntrNoMem], "llvm.wasm.ptr.to_funcref">; + //===----------------------------------------------------------------------===// // Table intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index c12998df1c445..8aa184423c60c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1978,6 +1978,16 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DAG.getConstant(0, getCurSDLoc(), MVT::getIntegerVT(8)))); } + if (VT == MVT::externref || VT == MVT::funcref) { + assert(C->isNullValue() && "Can only zero this target type!"); + // The zero value of a WebAssembly reference type is the null reference, + // materialized with ref.null. + Intrinsic::ID IID = VT == MVT::externref ? Intrinsic::wasm_ref_null_extern + : Intrinsic::wasm_ref_null_func; + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VT, + DAG.getTargetConstant(IID, getCurSDLoc(), MVT::i32)); + } + VectorType *VecTy = cast(V->getType()); // Now that we know the number and type of the elements, get that number of diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp index e74068e22f4cd..a8eb5f801a280 100644 --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -268,6 +268,10 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){ TargetExtType *TargetExtTy = cast(Ty); if (TargetExtTy->getName() == "aarch64.svcount") return MVT(MVT::aarch64svcount); + else if (TargetExtTy->getName() == "wasm.externref") + return MVT(MVT::externref); + else if (TargetExtTy->getName() == "wasm.funcref") + return MVT(MVT::funcref); else if (TargetExtTy->getName().starts_with("spirv.")) return MVT(MVT::spirvbuiltin); if (TargetExtTy->getName() == "riscv.vector.tuple") { diff --git a/llvm/lib/IR/Intrinsics.cpp b/llvm/lib/IR/Intrinsics.cpp index decd700019e6d..eb9690f066251 100644 --- a/llvm/lib/IR/Intrinsics.cpp +++ b/llvm/lib/IR/Intrinsics.cpp @@ -358,10 +358,10 @@ DecodeIITType(unsigned &NextElt, ArrayRef Infos, DecodeIITType(NextElt, Infos, OutputTable); return; case IIT_EXTERNREF: - OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 10)); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::WasmExternref, 0)); return; case IIT_FUNCREF: - OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 20)); + OutputTable.push_back(IITDescriptor::get(IITDescriptor::WasmFuncref, 0)); return; case IIT_PTR: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Pointer, 0)); @@ -558,7 +558,10 @@ static Type *DecodeFixedType(ArrayRef &Infos, return Type::getPPC_FP128Ty(Context); case IITDescriptor::AArch64Svcount: return TargetExtType::get(Context, "aarch64.svcount"); - + case IITDescriptor::WasmExternref: + return TargetExtType::get(Context, "wasm.externref"); + case IITDescriptor::WasmFuncref: + return TargetExtType::get(Context, "wasm.funcref"); case IITDescriptor::Integer: return IntegerType::get(Context, D.IntegerWidth); case IITDescriptor::Vector: @@ -1031,6 +1034,14 @@ matchIntrinsicType(Type *Ty, ArrayRef &Infos, return PrintMsg(isa(Ty) && cast(Ty)->getName() == "aarch64.svcount", "aarch64.svcount"); + case IITDescriptor::WasmExternref: + return PrintMsg(isa(Ty) && + cast(Ty)->getName() == "wasm.externref", + "wasm.externref"); + case IITDescriptor::WasmFuncref: + return PrintMsg(isa(Ty) && + cast(Ty)->getName() == "wasm.funcref", + "wasm.funcref"); case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); StringRef Scalable = D.VectorWidth.isScalable() ? "vscale " : ""; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp index 47b230d44285b..d5f4671487acb 100644 --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -334,13 +334,11 @@ Type *Type::getByteFromIntType(Type *Ty) { } Type *Type::getWasm_ExternrefTy(LLVMContext &C) { - // opaque pointer in addrspace(10) - return PointerType::get(C, 10); + return TargetExtType::get(C, "wasm.externref", {}, {}); } Type *Type::getWasm_FuncrefTy(LLVMContext &C) { - // opaque pointer in addrspace(20) - return PointerType::get(C, 20); + return TargetExtType::get(C, "wasm.funcref", {}, {}); } //===----------------------------------------------------------------------===// @@ -1134,6 +1132,12 @@ static TargetTypeInfo getTargetTypeInfo(const TargetExtType *Ty) { TargetExtType::CanBeVectorElement); } + // Opaque types in the WebAssembly name space. + if (Name == "wasm.funcref" || Name == "wasm.externref") + return TargetTypeInfo(PointerType::getUnqual(C), TargetExtType::HasZeroInit, + TargetExtType::CanBeGlobal, + TargetExtType::CanBeLocal); + return TargetTypeInfo(Type::getVoidTy(C)); } diff --git a/llvm/lib/Target/WebAssembly/CMakeLists.txt b/llvm/lib/Target/WebAssembly/CMakeLists.txt index ef5f7d11e7e49..a024a354890d2 100644 --- a/llvm/lib/Target/WebAssembly/CMakeLists.txt +++ b/llvm/lib/Target/WebAssembly/CMakeLists.txt @@ -51,7 +51,6 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyInstrInfo.cpp WebAssemblyLowerBrUnless.cpp WebAssemblyLowerEmscriptenEHSjLj.cpp - WebAssemblyLowerRefTypesIntPtrConv.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp WebAssemblyMCLowerPrePass.cpp diff --git a/llvm/lib/Target/WebAssembly/GISel/WebAssemblyCallLowering.cpp b/llvm/lib/Target/WebAssembly/GISel/WebAssemblyCallLowering.cpp index 9f3a1d1ba7fa2..848796601b256 100644 --- a/llvm/lib/Target/WebAssembly/GISel/WebAssemblyCallLowering.cpp +++ b/llvm/lib/Target/WebAssembly/GISel/WebAssemblyCallLowering.cpp @@ -14,7 +14,7 @@ #include "WebAssemblyCallLowering.h" #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" -#include "Utils/WasmAddressSpaces.h" +#include "Utils/WebAssemblyTypeUtilities.h" #include "WebAssemblyISelLowering.h" #include "WebAssemblyMachineFunctionInfo.h" #include "WebAssemblyRegisterInfo.h" @@ -58,22 +58,18 @@ static unsigned extendOpFromFlags(ISD::ArgFlagsTy Flags) { return TargetOpcode::G_ANYEXT; } -static LLT getLLTForWasmMVT(MVT Ty, const DataLayout &DL) { - if (Ty == MVT::externref) { - return LLT::pointer( - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF, - DL.getPointerSizeInBits( - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)); - } - - if (Ty == MVT::funcref) { - return LLT::pointer( - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF, - DL.getPointerSizeInBits( - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)); - } - - return llvm::getLLTForMVT(Ty); +// GlobalISel doesn't handle reference types. We bail out of GlobalISel for +// functions passing/returning references and fall back to SDAG. +static bool typeContainsReference(const Type *Ty) { + if (WebAssembly::isWebAssemblyReferenceType(Ty)) + return true; + if (const auto *ArrTy = dyn_cast(Ty)) + return typeContainsReference(ArrTy->getElementType()); + if (const auto *StructTy = dyn_cast(Ty)) + return any_of(StructTy->elements(), [](const Type *ElemTy) { + return typeContainsReference(ElemTy); + }); + return false; } WebAssemblyCallLowering::WebAssemblyCallLowering( @@ -99,6 +95,9 @@ bool WebAssemblyCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, const WebAssemblyTargetLowering &TLI = *getTLI(); const DataLayout &DL = F.getDataLayout(); + if (Val && typeContainsReference(Val->getType())) + return false; + MachineInstrBuilder MIB = MIRBuilder.buildInstrNoInsert(WebAssembly::RETURN); assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) && @@ -134,7 +133,7 @@ bool WebAssemblyCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, TLI.getRegisterTypeForCallingConv(Ctx, CallConv, OrigVT); const LLT OrigLLT = getLLTForType(*OrigVT.getTypeForEVT(F.getContext()), DL); - const LLT NewLLT = getLLTForWasmMVT(NewVT, DL); + const LLT NewLLT = getLLTForMVT(NewVT); const TargetRegisterClass &NewRegClass = *TLI.getRegClassFor(NewVT); @@ -276,6 +275,12 @@ bool WebAssemblyCallLowering::lowerFormalArguments( if (!callingConvSupported(CallConv)) return false; + if (typeContainsReference(F.getReturnType())) + return false; + for (const Argument &Arg : F.args()) + if (typeContainsReference(Arg.getType())) + return false; + MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS); MF.front().addLiveIn(WebAssembly::ARGUMENTS); @@ -308,7 +313,7 @@ bool WebAssemblyCallLowering::lowerFormalArguments( const MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, OrigVT); const LLT OrigLLT = getLLTForType(*OrigVT.getTypeForEVT(F.getContext()), DL); - const LLT NewLLT = getLLTForWasmMVT(NewVT, DL); + const LLT NewLLT = getLLTForMVT(NewVT); // If we need to split the type over multiple regs, check it's a scenario // we currently support. @@ -410,6 +415,12 @@ bool WebAssemblyCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, if (!callingConvSupported(CallConv)) return false; + if (typeContainsReference(Info.OrigRet.Ty)) + return false; + for (const ArgInfo &Arg : Info.OrigArgs) + if (typeContainsReference(Arg.Ty)) + return false; + // TODO: tail calls if (Info.IsMustTailCall) return false; @@ -456,7 +467,7 @@ bool WebAssemblyCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const MVT NewVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, OrigVT); const LLT OrigLLT = getLLTForType(*OrigVT.getTypeForEVT(F.getContext()), DL); - const LLT NewLLT = getLLTForWasmMVT(NewVT, DL); + const LLT NewLLT = getLLTForMVT(NewVT); const TargetRegisterClass &NewRegClass = *TLI.getRegClassFor(NewVT); @@ -552,7 +563,7 @@ bool WebAssemblyCallLowering::lowerCall(MachineIRBuilder &MIRBuilder, TLI.getRegisterTypeForCallingConv(Ctx, CallConv, OrigVT); const LLT OrigLLT = getLLTForType(*OrigVT.getTypeForEVT(F.getContext()), DL); - const LLT NewLLT = getLLTForWasmMVT(NewVT, DL); + const LLT NewLLT = getLLTForMVT(NewVT); const TargetRegisterClass &NewRegClass = *TLI.getRegClassFor(NewVT); diff --git a/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h b/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h index 2239badca69c3..d2ab2c0f9e777 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h +++ b/llvm/lib/Target/WebAssembly/Utils/WasmAddressSpaces.h @@ -24,11 +24,7 @@ enum WasmAddressSpace : unsigned { // linear memory: WebAssembly globals or WebAssembly locals. Loads and stores // to these pointers are lowered to global.get / global.set or local.get / // local.set, as appropriate. - WASM_ADDRESS_SPACE_VAR = 1, - // A non-integral address space for externref values - WASM_ADDRESS_SPACE_EXTERNREF = 10, - // A non-integral address space for funcref values - WASM_ADDRESS_SPACE_FUNCREF = 20, + WASM_ADDRESS_SPACE_VAR = 1 }; inline bool isDefaultAddressSpace(unsigned AS) { diff --git a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h index c5e0dcfffef9c..47ba91df81161 100644 --- a/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h +++ b/llvm/lib/Target/WebAssembly/Utils/WebAssemblyTypeUtilities.h @@ -28,16 +28,14 @@ namespace WebAssembly { /// Return true if this is a WebAssembly Externref Type. inline bool isWebAssemblyExternrefType(const Type *Ty) { - return Ty->isPointerTy() && - Ty->getPointerAddressSpace() == - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF; + const TargetExtType *TargetTy = dyn_cast(Ty); + return TargetTy && TargetTy->getName() == "wasm.externref"; } /// Return true if this is a WebAssembly Funcref Type. inline bool isWebAssemblyFuncrefType(const Type *Ty) { - return Ty->isPointerTy() && - Ty->getPointerAddressSpace() == - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF; + const TargetExtType *TargetTy = dyn_cast(Ty); + return TargetTy && TargetTy->getName() == "wasm.funcref"; } /// Return true if this is a WebAssembly Reference Type. diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h index 101348501bece..6556cb5e25c0e 100644 --- a/llvm/lib/Target/WebAssembly/WebAssembly.h +++ b/llvm/lib/Target/WebAssembly/WebAssembly.h @@ -32,7 +32,6 @@ ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(); ModulePass *createWebAssemblyAddMissingPrototypes(); ModulePass *createWebAssemblyFixFunctionBitcasts(); FunctionPass *createWebAssemblyOptimizeReturned(); -FunctionPass *createWebAssemblyLowerRefTypesIntPtrConv(); FunctionPass *createWebAssemblyRefTypeMem2Local(); FunctionPass *createWebAssemblyReduceToAnyAllTrue(WebAssemblyTargetMachine &TM); @@ -94,7 +93,6 @@ void initializeWebAssemblyFixIrreducibleControlFlowPass(PassRegistry &); void initializeWebAssemblyLateEHPreparePass(PassRegistry &); void initializeWebAssemblyLowerBrUnlessPass(PassRegistry &); void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &); -void initializeWebAssemblyLowerRefTypesIntPtrConvPass(PassRegistry &); void initializeWebAssemblyMCLowerPrePassPass(PassRegistry &); void initializeWebAssemblyMemIntrinsicResultsPass(PassRegistry &); void initializeWebAssemblyNullifyDebugValueListsPass(PassRegistry &); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp index b107886a1f16e..95f4367f76cdf 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp @@ -35,6 +35,7 @@ #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicsWebAssembly.h" #include "llvm/IR/Operator.h" using namespace llvm; @@ -833,11 +834,6 @@ bool WebAssemblyFastISel::fastLowerArguments() { bool WebAssemblyFastISel::selectCall(const Instruction *I) { const auto *Call = cast(I); - // FastISel does not support calls through funcref - if (Call->getCalledOperand()->getType()->getPointerAddressSpace() != - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_DEFAULT) - return false; - // TODO: Support tail calls in FastISel if (Call->isMustTailCall() || Call->isInlineAsm() || Call->getFunctionType()->isVarArg()) @@ -942,10 +938,49 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { } unsigned CalleeReg = 0; + // A call through a funcref is expressed as a call through the pointer + // produced by llvm.wasm.funcref.to_ptr. Recover the funcref operand, place it + // into __funcref_call_table, and call it. + // + // TODO: Use call_ref if wasm-gc feature is available, would lead to simpler + // code here. + const Value *FuncrefArg = nullptr; + if (const auto *Conv = dyn_cast(Call->getCalledOperand())) + if (Conv->getIntrinsicID() == Intrinsic::wasm_funcref_to_ptr) + FuncrefArg = Conv->getArgOperand(0); + + const bool IsFuncrefCall = FuncrefArg != nullptr; + MCSymbolWasm *Table = nullptr; + if (!IsDirect) { - CalleeReg = getRegForValue(Call->getCalledOperand()); - if (!CalleeReg) - return false; + if (!IsFuncrefCall) { + // Table is ___indirect_function_table + Table = WebAssembly::getOrCreateFunctionTableSymbol(MF->getContext(), + Subtarget); + CalleeReg = getRegForValue(Call->getCalledOperand()); + if (!CalleeReg) + return false; + } else { + // Table is __funcref_call_table + Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(MF->getContext(), + Subtarget); + CalleeReg = getRegForValue(FuncrefArg); + // Put the funcref in slot 0 of __funcref_call_table + unsigned ZeroReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::TABLE_SET_FUNCREF)) + .addSym(Table) + .addReg(ZeroReg) + .addReg(CalleeReg); + // Set CalleeReg to an immediate 0 + CalleeReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::CONST_I32), CalleeReg) + .addImm(0); + } } auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)); @@ -958,9 +993,6 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { } else { // Placeholder for the type index. MIB.addImm(0); - // The table into which this call_indirect indexes. - MCSymbolWasm *Table = WebAssembly::getOrCreateFunctionTableSymbol( - MF->getContext(), Subtarget); if (Subtarget->hasCallIndirectOverlong()) { MIB.addSym(Table); } else { @@ -978,6 +1010,22 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) { if (!IsDirect) MIB.addReg(CalleeReg); + if (IsFuncrefCall) { + // Clear slot 0 of the funcref call table after the call. + unsigned ZeroReg = createResultReg(&WebAssembly::I32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + unsigned NullReg = createResultReg(&WebAssembly::FUNCREFRegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::REF_NULL_FUNCREF), NullReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, + TII.get(WebAssembly::TABLE_SET_FUNCREF)) + .addSym(Table) + .addReg(ZeroReg) + .addReg(NullReg); + } + if (!IsVoid) updateValueMap(Call, ResultReg); diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp index c7b57588877b7..69e9e6b5a9717 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp @@ -275,6 +275,27 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, TLSAlign); return; } + case Intrinsic::wasm_ptr_to_funcref: { + // Convert a function pointer to a funcref by reading the corresponding + // entry from the __indirect_function_table. + MachineFunction &MF = CurDAG->getMachineFunction(); + auto PtrVT = MVT::getIntegerVT(MF.getDataLayout().getPointerSizeInBits()); + MCSymbol *Table = WebAssembly::getOrCreateFunctionTableSymbol( + MF.getContext(), Subtarget); + SDValue TableSym = CurDAG->getMCSymbol(Table, PtrVT); + SDValue FuncPtr = Node->getOperand(1); + if (Subtarget->hasAddr64() && FuncPtr.getValueType() == MVT::i64) { + // table.get expects an i32 but on 64 bit platforms the function pointer + // is an i64. In that case, i32.wrap_i64 to convert. + FuncPtr = SDValue(CurDAG->getMachineNode(WebAssembly::I32_WRAP_I64, DL, + MVT::i32, FuncPtr), + 0); + } + MachineSDNode *FuncRef = CurDAG->getMachineNode( + WebAssembly::TABLE_GET_FUNCREF, DL, MVT::funcref, TableSym, FuncPtr); + ReplaceNode(Node, FuncRef); + return; + } case Intrinsic::wasm_ref_test_func: { // First emit the TABLE_GET instruction to convert function pointer ==> // funcref diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index bba3a34b08df8..f06ab5bd0942b 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -436,24 +436,6 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setMinimumJumpTableEntries(2); } -MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL, - uint32_t AS) const { - if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) - return MVT::externref; - if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) - return MVT::funcref; - return TargetLowering::getPointerTy(DL, AS); -} - -MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL, - uint32_t AS) const { - if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF) - return MVT::externref; - if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF) - return MVT::funcref; - return TargetLowering::getPointerMemTy(DL, AS); -} - TargetLowering::AtomicExpansionKind WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR( const AtomicRMWInst *AI) const { @@ -1294,6 +1276,17 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); auto Layout = MF.getDataLayout(); + // A call through a funcref is expressed in IR as a call through the pointer + // produced by the llvm.wasm.funcref.to_ptr intrinsic. Detect this here and + // recover the underlying funcref value so the call can be lowered to a + // table.set + call_indirect through the dedicated __funcref_call_table. + bool IsFuncrefCall = false; + if (Callee.getOpcode() == ISD::INTRINSIC_WO_CHAIN && + Callee.getConstantOperandVal(0) == Intrinsic::wasm_funcref_to_ptr) { + Callee = Callee.getOperand(1); + IsFuncrefCall = true; + } + CallingConv::ID CallConv = CLI.CallConv; if (!callingConvSupported(CallConv)) fail(DL, DAG, @@ -1537,8 +1530,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, // Lastly, if this is a call to a funcref we need to add an instruction // table.set to the chain and transform the call. - if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType( - CLI.CB->getCalledOperand()->getType())) { + if (IsFuncrefCall) { // In the absence of function references proposal where a funcref call is // lowered to call_ref, using reference types we generate a table.set to set // the funcref to a special table used solely for this purpose, followed by @@ -1554,11 +1546,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee}; SDValue TableSet = DAG.getMemIntrinsicNode( WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps, - MVT::funcref, - // Machine Mem Operand args - MachinePointerInfo( - WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF), - CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()), + MVT::funcref, MachinePointerInfo(), Align(1), MachineMemOperand::MOStore); Ops[0] = TableSet; // The new chain is the TableSet itself @@ -2277,6 +2265,17 @@ SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op, return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops); } + case Intrinsic::wasm_funcref_to_ptr: { + // llvm.wasm.funcref.to_ptr only has a defined lowering when its result + // feeds directly into an indirect call. Reaching here means the pointer + // escapes a direct call. We haven't implemented conversion of a funcref + // into a real function pointer so we crash if we get here. + fail(DL, DAG, + "a funcref can only be converted to a pointer to be directly called; " + "the resulting pointer cannot otherwise be used"); + return DAG.getPOISON(Op.getValueType()); + } + case Intrinsic::thread_pointer: { return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 42f047840e504..04e6d6f2d9367 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -26,9 +26,6 @@ class WebAssemblyTargetLowering final : public TargetLowering { WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI); - MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const override; - MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const override; - private: /// Keep a pointer to the WebAssemblySubtarget around so that we can make the /// right decision when generating code for different targets. diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp deleted file mode 100644 index be500de67e320..0000000000000 --- a/llvm/lib/Target/WebAssembly/WebAssemblyLowerRefTypesIntPtrConv.cpp +++ /dev/null @@ -1,85 +0,0 @@ -//=== WebAssemblyLowerRefTypesIntPtrConv.cpp - -// Lower IntToPtr and PtrToInt on Reference Types ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// Lowers IntToPtr and PtrToInt instructions on reference types to -/// Trap instructions since they have been allowed to operate -/// on non-integral pointers. -/// -//===----------------------------------------------------------------------===// - -#include "Utils/WebAssemblyTypeUtilities.h" -#include "WebAssembly.h" -#include "WebAssemblySubtarget.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Pass.h" -#include - -using namespace llvm; - -#define DEBUG_TYPE "wasm-lower-reftypes-intptr-conv" - -namespace { -class WebAssemblyLowerRefTypesIntPtrConv final : public FunctionPass { - StringRef getPassName() const override { - return "WebAssembly Lower RefTypes Int-Ptr Conversions"; - } - - bool runOnFunction(Function &MF) override; - -public: - static char ID; // Pass identification - WebAssemblyLowerRefTypesIntPtrConv() : FunctionPass(ID) {} -}; -} // end anonymous namespace - -char WebAssemblyLowerRefTypesIntPtrConv::ID = 0; -INITIALIZE_PASS(WebAssemblyLowerRefTypesIntPtrConv, DEBUG_TYPE, - "WebAssembly Lower RefTypes Int-Ptr Conversions", false, false) - -FunctionPass *llvm::createWebAssemblyLowerRefTypesIntPtrConv() { - return new WebAssemblyLowerRefTypesIntPtrConv(); -} - -bool WebAssemblyLowerRefTypesIntPtrConv::runOnFunction(Function &F) { - LLVM_DEBUG(dbgs() << "********** Lower RefTypes IntPtr Convs **********\n" - "********** Function: " - << F.getName() << '\n'); - - // This function will check for uses of ptrtoint and inttoptr on reference - // types and replace them with a trap instruction. - // - // We replace the instruction by a trap instruction - // and its uses by null in the case of inttoptr and 0 in the - // case of ptrtoint. - std::set worklist; - - for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - PtrToIntInst *PTI = dyn_cast(&*I); - IntToPtrInst *ITP = dyn_cast(&*I); - if (!(PTI && WebAssembly::isWebAssemblyReferenceType( - PTI->getPointerOperand()->getType())) && - !(ITP && WebAssembly::isWebAssemblyReferenceType(ITP->getDestTy()))) - continue; - - I->replaceAllUsesWith(PoisonValue::get(I->getType())); - - Function *TrapIntrin = - Intrinsic::getOrInsertDeclaration(F.getParent(), Intrinsic::debugtrap); - CallInst::Create(TrapIntrin, {}, "", I->getIterator()); - - worklist.insert(&*I); - } - - // erase each instruction replaced by trap - for (Instruction *I : worklist) - I->eraseFromParent(); - - return !worklist.empty(); -} diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRefTypeMem2Local.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRefTypeMem2Local.cpp index 04b4c7d78aabb..d2ff9b264d576 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRefTypeMem2Local.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRefTypeMem2Local.cpp @@ -64,6 +64,8 @@ void WebAssemblyRefTypeMem2Local::visitAllocaInst(AllocaInst &AI) { auto *NewAI = IRB.CreateAlloca(AI.getAllocatedType(), WebAssembly::WASM_ADDRESS_SPACE_VAR, nullptr, AI.getName() + ".var"); + // Preserve the original alloca's alignment. + NewAI->setAlignment(AI.getAlign()); // The below is basically equivalent to AI.replaceAllUsesWith(NewAI), but we // cannot use it because it requires the old and new types be the same, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 110d6820bb76e..858c9aab7298d 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -119,7 +119,6 @@ LLVMInitializeWebAssemblyTarget() { initializeWebAssemblyDebugFixupPass(PR); initializeWebAssemblyPeepholePass(PR); initializeWebAssemblyMCLowerPrePassPass(PR); - initializeWebAssemblyLowerRefTypesIntPtrConvPass(PR); initializeWebAssemblyFixBrTableDefaultsPass(PR); initializeWebAssemblyDAGToDAGISelLegacyPass(PR); } @@ -677,7 +676,6 @@ void WebAssemblyPassConfig::addPreEmitPass() { bool WebAssemblyPassConfig::addPreISel() { TargetPassConfig::addPreISel(); - addPass(createWebAssemblyLowerRefTypesIntPtrConv()); return false; } diff --git a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/args.ll b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/args.ll index 152a607b14d5a..97413111536be 100644 --- a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/args.ll +++ b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/args.ll @@ -144,27 +144,9 @@ define void @test_f128_arg(fp128 %arg) { ret void } -%externref = type ptr addrspace(10) -define void @test_externref_arg(%externref %arg) { - ; CHECK-LABEL: name: test_externref_arg - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ARGUMENT_externref:%[0-9]+]]:externref(p10) = ARGUMENT_externref 0, implicit $arguments - ; CHECK-NEXT: RETURN implicit-def $arguments - ret void -} - -%funcref = type ptr addrspace(20) -define void @test_funcref_arg(%funcref %arg) { - ; CHECK-LABEL: name: test_funcref_arg - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ARGUMENT_funcref:%[0-9]+]]:funcref(p20) = ARGUMENT_funcref 0, implicit $arguments - ; CHECK-NEXT: RETURN implicit-def $arguments - ret void -} +; NOTE: Reference types (externref/funcref) are intentionally not tested here. +; GlobalISel currently falls back to SelectionDAG for them; see +; reference-types-fallback.ll. define void @test_multiple_args(ptr %arg1, float %arg2, i1 %arg3) { ; WASM32-LABEL: name: test_multiple_args diff --git a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/call-basics.ll b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/call-basics.ll index d767ed5104dae..5a1a4fc8f3c68 100644 --- a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/call-basics.ll +++ b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/call-basics.ll @@ -5,8 +5,6 @@ ; RUN: llc -mtriple=wasm64 -mattr=+simd128,+multivalue -target-abi=experimental-mv -global-isel -stop-after=irtranslator < %s | FileCheck %s -check-prefixes=CHECK,WASM64,MULTIVAL-SIMD,WASM64-MULTIVAL-SIMD -%externref = type ptr addrspace(10) -%funcref = type ptr addrspace(20) declare void @ret_void_args_none() define void @call_ret_void_args_none() { @@ -87,29 +85,9 @@ define ptr @call_ret_ptr_args_none() { ret ptr %ret } -declare %externref @ret_externref_args_none() -define %externref @call_ret_externref_args_none() { - ; CHECK-LABEL: name: call_ret_externref_args_none - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[CALL:%[0-9]+]]:externref(p10) = CALL @ret_externref_args_none, implicit-def $arguments, implicit $sp32, implicit $sp64 - ; CHECK-NEXT: RETURN [[CALL]](p10), implicit-def $arguments - %ret = call %externref @ret_externref_args_none() - ret %externref %ret -} - -declare %funcref @ret_funcref_args_none() -define %funcref @call_ret_funcref_args_none() { - ; CHECK-LABEL: name: call_ret_funcref_args_none - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[CALL:%[0-9]+]]:funcref(p20) = CALL @ret_funcref_args_none, implicit-def $arguments, implicit $sp32, implicit $sp64 - ; CHECK-NEXT: RETURN [[CALL]](p20), implicit-def $arguments - %ret = call %funcref @ret_funcref_args_none() - ret %funcref %ret -} +; NOTE: Reference-type (externref/funcref) returns are intentionally not tested +; here. GlobalISel currently falls back to SelectionDAG for them; see +; reference-types-fallback.ll. declare i128 @ret_i128_args_none() define i128 @call_ret_i128_args_none() { @@ -234,31 +212,9 @@ define void @call_ret_void_args_ptr(ptr %a) { ret void } -declare void @ret_void_args_externref(%externref) -define void @call_ret_void_args_externref(%externref %a) { - ; CHECK-LABEL: name: call_ret_void_args_externref - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ARGUMENT_externref:%[0-9]+]]:externref(p10) = ARGUMENT_externref 0, implicit $arguments - ; CHECK-NEXT: CALL @ret_void_args_externref, [[ARGUMENT_externref]](p10), implicit-def $arguments, implicit $sp32, implicit $sp64 - ; CHECK-NEXT: RETURN implicit-def $arguments - call void @ret_void_args_externref(%externref %a) - ret void -} - -declare void @ret_void_args_funcref(%funcref) -define void @call_ret_void_args_funcref(%funcref %a) { - ; CHECK-LABEL: name: call_ret_void_args_funcref - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[ARGUMENT_funcref:%[0-9]+]]:funcref(p20) = ARGUMENT_funcref 0, implicit $arguments - ; CHECK-NEXT: CALL @ret_void_args_funcref, [[ARGUMENT_funcref]](p20), implicit-def $arguments, implicit $sp32, implicit $sp64 - ; CHECK-NEXT: RETURN implicit-def $arguments - call void @ret_void_args_funcref(%funcref %a) - ret void -} +; NOTE: Reference-type (externref/funcref) arguments are intentionally not +; tested here. GlobalISel currently falls back to SelectionDAG for them; see +; reference-types-fallback.ll. declare void @ret_void_args_i128(i128) define void @call_ret_void_args_i128(i128 %a) { diff --git a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/ret-basics.ll b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/ret-basics.ll index 21e066e78c515..3bc6aaaa3d48c 100644 --- a/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/ret-basics.ll +++ b/llvm/test/CodeGen/WebAssembly/GlobalISel/irtranslator/ret-basics.ll @@ -124,30 +124,6 @@ define double @test_ret_f64() { ret double 0.0 } -%externref = type ptr addrspace(10) -define %externref @test_ret_externref() { - ; CHECK-LABEL: name: test_ret_externref - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ref_ptr - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p10) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p10) from %ir.ref_ptr) - ; CHECK-NEXT: RETURN [[LOAD]](p10), implicit-def $arguments - %ref_ptr = alloca %externref - %ref = load %externref, ptr %ref_ptr - ret %externref %ref -} - -%funcref = type ptr addrspace(20) -define %funcref @test_ret_funcref() { - ; CHECK-LABEL: name: test_ret_funcref - ; CHECK: bb.1 (%ir-block.0): - ; CHECK-NEXT: liveins: $arguments - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0.ref_ptr - ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(p20) = G_LOAD [[FRAME_INDEX]](p0) :: (load (p20) from %ir.ref_ptr) - ; CHECK-NEXT: RETURN [[LOAD]](p20), implicit-def $arguments - %ref_ptr = alloca %funcref - %ref = load %funcref, ptr %ref_ptr - ret %funcref %ref -} +; NOTE: Reference types (externref/funcref) are intentionally not tested here. +; GlobalISel currently falls back to SelectionDAG for them; see +; reference-types-fallback.ll. diff --git a/llvm/test/CodeGen/WebAssembly/GlobalISel/reference-types-fallback.ll b/llvm/test/CodeGen/WebAssembly/GlobalISel/reference-types-fallback.ll new file mode 100644 index 0000000000000..bce9127a8969e --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/GlobalISel/reference-types-fallback.ll @@ -0,0 +1,40 @@ +; RUN: llc -mtriple=wasm32 -mattr=+reference-types -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s +; RUN: llc -mtriple=wasm64 -mattr=+reference-types -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' %s -o - 2>&1 | FileCheck %s + +; GlobalISel does not yet correctly model WebAssembly reference types +; (externref/funcref): the generic getLLTForType derives an integer scalar LLT +; from their pointer layout type, which would round-trip references through +; invalid integer loads/bitcasts. Until that is fixed, the WebAssembly +; CallLowering bails out for any function passing or returning a reference type, +; so instruction selection falls back to SelectionDAG. This test verifies that +; fallback (rather than emission of incorrect code). + +define target("wasm.externref") @ret_externref(target("wasm.externref") %a) { +; CHECK: remark: {{.*}} unable to lower arguments{{.*}}wasm.externref +; CHECK-LABEL: warning: Instruction selection used fallback path for ret_externref + ret target("wasm.externref") %a +} + +define target("wasm.funcref") @ret_funcref(target("wasm.funcref") %a) { +; CHECK: remark: {{.*}} unable to lower arguments{{.*}}wasm.funcref +; CHECK-LABEL: warning: Instruction selection used fallback path for ret_funcref + ret target("wasm.funcref") %a +} + +declare void @take_externref(target("wasm.externref")) + +define void @call_take_externref(target("wasm.externref") %a) { +; CHECK: remark: {{.*}} unable to lower arguments{{.*}}wasm.externref +; CHECK-LABEL: warning: Instruction selection used fallback path for call_take_externref + call void @take_externref(target("wasm.externref") %a) + ret void +} + +declare target("wasm.externref") @produce_externref() + +define void @call_produce_externref() { +; CHECK: remark: {{.*}} unable to {{.*}} +; CHECK-LABEL: warning: Instruction selection used fallback path for call_produce_externref + %ref = call target("wasm.externref") @produce_externref() + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalget.ll b/llvm/test/CodeGen/WebAssembly/externref-globalget.ll index 79d7932486e22..8d377cbfdad30 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-globalget.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-globalget.ll @@ -3,7 +3,7 @@ ; not error out. ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types -print-after=finalize-isel | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_global = local_unnamed_addr addrspace(1) global %externref undef diff --git a/llvm/test/CodeGen/WebAssembly/externref-globalset.ll b/llvm/test/CodeGen/WebAssembly/externref-globalset.ll index 5bfd673e89fa1..57cfffe82dee0 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-globalset.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-globalset.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_global = local_unnamed_addr addrspace(1) global %externref undef diff --git a/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll b/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll index 64f955b6ed0f0..bdf04f73c54a7 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-inttoptr.ll @@ -1,16 +1,10 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types 2>&1 | FileCheck %s +; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR -%externref = type ptr addrspace(10) +%externref = type target("wasm.externref") define %externref @int_to_externref(i32 %i) { %ref = inttoptr i32 %i to %externref ret %externref %ref } - -; CHECK-LABEL: int_to_externref: -; CHECK-NEXT: .functype int_to_externref (i32) -> (externref) -; CHECK-NEXT: .local externref -; CHECK-NEXT: unreachable -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: end_function +# CHECK-ERROR: error: invalid cast opcode for cast from 'i32' to 'target("wasm.externref")' diff --git a/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll b/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll index 22558796f0624..f3cfe31445b60 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-ptrtoint.ll @@ -1,15 +1,10 @@ -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types 2>&1 | FileCheck %s +; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR -%externref = type ptr addrspace(10) +%externref = type target("wasm.externref") define i32 @externref_to_int(%externref %ref) { %i = ptrtoint %externref %ref to i32 ret i32 %i } -; CHECK-LABEL: externref_to_int: -; CHECK-NEXT: .functype externref_to_int (externref) -> (i32) -; CHECK-NEXT: .local i32 -; CHECK-NEXT: unreachable -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: end_function +# CHECK-ERROR: error: invalid cast opcode for cast from 'target("wasm.externref")' to 'i32' diff --git a/llvm/test/CodeGen/WebAssembly/externref-tableget.ll b/llvm/test/CodeGen/WebAssembly/externref-tableget.ll index d9ae7c8f6c9b1..1189fc3571902 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-tableget.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-tableget.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/externref-tableset.ll b/llvm/test/CodeGen/WebAssembly/externref-tableset.ll index 37c663869428e..f84ae4dba68eb 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-tableset.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-tableset.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll index 1f8f4d5140c51..98a0e801af387 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-unsized-load.ll @@ -1,6 +1,6 @@ ; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR -%externref = type ptr addrspace(10) +%externref = type target("wasm.externref") define void @load_extern(%externref %ref) { %e = load %extern, %externref %ref diff --git a/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll b/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll index c7e062d1b0526..f3d3ee3685230 100644 --- a/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll +++ b/llvm/test/CodeGen/WebAssembly/externref-unsized-store.ll @@ -1,6 +1,6 @@ ; RUN: not llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR -%externref = type ptr addrspace(10) +%externref = type target("wasm.externref") define void @store_extern(%externref %ref) { store %extern undef, %externref %ref diff --git a/llvm/test/CodeGen/WebAssembly/funcref-call.ll b/llvm/test/CodeGen/WebAssembly/funcref-call.ll index 9904df2280e81..d8433d61a5db9 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-call.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-call.ll @@ -1,8 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -fast-isel=0 -mattr=+reference-types | FileCheck %s -; RUN: llc < %s --mtriple=wasm32-unknown-unknown -fast-isel=1 -mattr=+reference-types | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +; Use -fast-isel-abort=3 (never fall back to SelectionDAG) to make sure that the +; funcref call is selected by FastISel +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -fast-isel=1 -fast-isel-abort=3 -mattr=+reference-types | FileCheck %s + +%funcref = type target("wasm.funcref") + +declare ptr @llvm.wasm.funcref.to_ptr(%funcref) nounwind ; CHECK: .tabletype __funcref_call_table, funcref, 1 @@ -19,7 +24,8 @@ define void @call_funcref(%funcref %ref) { ; CHECK-NEXT: ref.null_func ; CHECK-NEXT: table.set __funcref_call_table ; CHECK-NEXT: # fallthrough-return - call addrspace(20) void %ref() + %refptr = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + call void %refptr() ret void } @@ -41,6 +47,7 @@ define float @call_funcref_with_args(%funcref %ref) { ; CHECK-NEXT: table.set __funcref_call_table ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: # fallthrough-return - %ret = call addrspace(20) float %ref(double 1.0, i32 2) + %refptr = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + %ret = call float %refptr(double 1.0, i32 2) ret float %ret } diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll index 9aa7fdabfdea9..2634e5a6fc57e 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-globalget.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%funcref = type target("wasm.funcref") @funcref_global = local_unnamed_addr addrspace(1) global %funcref undef diff --git a/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll b/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll index ca2feb6617996..791e8648537c0 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-globalset.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%funcref = type target("wasm.funcref") @funcref_global = local_unnamed_addr addrspace(1) global %funcref undef diff --git a/llvm/test/CodeGen/WebAssembly/funcref-ptr-conversion.ll b/llvm/test/CodeGen/WebAssembly/funcref-ptr-conversion.ll new file mode 100644 index 0000000000000..b9792b661a742 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/funcref-ptr-conversion.ll @@ -0,0 +1,28 @@ +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s +; RUN: llc < %s --mtriple=wasm64-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s --check-prefix=CHECK64 + +%funcref = type target("wasm.funcref") + +declare %funcref @llvm.wasm.ptr.to_funcref(ptr) nounwind + +; CHECK: .tabletype __indirect_function_table, funcref + +; Converting a function pointer to a funcref is a table.get from the +; __indirect_function_table. +define %funcref @ptr_to_funcref(ptr %p) { +; CHECK-LABEL: ptr_to_funcref: +; CHECK: .functype ptr_to_funcref (i32) -> (funcref) +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: table.get __indirect_function_table +; CHECK-NEXT: end_function +; +; On wasm64 the function pointer is an i64 and must be wrapped to i32 first. +; CHECK64-LABEL: ptr_to_funcref: +; CHECK64: .functype ptr_to_funcref (i64) -> (funcref) +; CHECK64-NEXT: local.get 0 +; CHECK64-NEXT: i32.wrap_i64 +; CHECK64-NEXT: table.get __indirect_function_table +; CHECK64-NEXT: end_function + %ref = call %funcref @llvm.wasm.ptr.to_funcref(ptr %p) + ret %funcref %ref +} diff --git a/llvm/test/CodeGen/WebAssembly/funcref-table_call.ll b/llvm/test/CodeGen/WebAssembly/funcref-table_call.ll index 74bbc802ac077..e5343fa80d846 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-table_call.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-table_call.ll @@ -1,12 +1,13 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%funcref = type target("wasm.funcref") @funcref_table = local_unnamed_addr addrspace(1) global [0 x %funcref] undef ; CHECK: .tabletype __funcref_call_table, funcref, 1 declare %funcref @llvm.wasm.table.get.funcref(ptr addrspace(1), i32) nounwind +declare ptr @llvm.wasm.funcref.to_ptr(%funcref) nounwind define void @call_funcref_from_table(i32 %i) { ; CHECK-LABEL: call_funcref_from_table: @@ -22,7 +23,8 @@ define void @call_funcref_from_table(i32 %i) { ; CHECK-NEXT: table.set __funcref_call_table ; CHECK-NEXT: end_function %ref = call %funcref @llvm.wasm.table.get.funcref(ptr addrspace(1) @funcref_table, i32 %i) - call addrspace(20) void %ref() + %refptr = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + call void %refptr() ret void } diff --git a/llvm/test/CodeGen/WebAssembly/funcref-tableget.ll b/llvm/test/CodeGen/WebAssembly/funcref-tableget.ll index 3df308c5ddf80..2621dcde6a79b 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-tableget.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-tableget.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%funcref = type target("wasm.funcref") @funcref_table = local_unnamed_addr addrspace(1) global [0 x %funcref] undef diff --git a/llvm/test/CodeGen/WebAssembly/funcref-tableset.ll b/llvm/test/CodeGen/WebAssembly/funcref-tableset.ll index 98e1b55613d7d..e62eb8a6f2d20 100644 --- a/llvm/test/CodeGen/WebAssembly/funcref-tableset.ll +++ b/llvm/test/CodeGen/WebAssembly/funcref-tableset.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%funcref = type target("wasm.funcref") @funcref_table = local_unnamed_addr addrspace(1) global [0 x %funcref] undef diff --git a/llvm/test/CodeGen/WebAssembly/funcref-to-ptr-error.ll b/llvm/test/CodeGen/WebAssembly/funcref-to-ptr-error.ll new file mode 100644 index 0000000000000..b756513ecbafa --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/funcref-to-ptr-error.ll @@ -0,0 +1,30 @@ +; RUN: not llc < %s --mtriple=wasm32-unknown-unknown -mattr=+reference-types 2>&1 | FileCheck %s + +; We have only implemented a lowering for llvm.wasm.funcref.to_ptr its result +; feeds directly into an indirect call. Check that we diagnose the case where we +; spill the result rather than crashing in the backend. + +%funcref = type target("wasm.funcref") + +declare ptr @llvm.wasm.funcref.to_ptr(%funcref) +declare void @sink(ptr) + +; CHECK: error: {{.*}}in function escape_via_store {{.*}}: a funcref can only be converted to a pointer to be directly called; the resulting pointer cannot otherwise be used +define void @escape_via_store(%funcref %ref, ptr %dst) { + %p = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + store ptr %p, ptr %dst + ret void +} + +; CHECK: error: {{.*}}in function escape_via_return {{.*}}: a funcref can only be converted to a pointer to be directly called; the resulting pointer cannot otherwise be used +define ptr @escape_via_return(%funcref %ref) { + %p = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + ret ptr %p +} + +; CHECK: error: {{.*}}in function escape_via_arg {{.*}}: a funcref can only be converted to a pointer to be directly called; the resulting pointer cannot otherwise be used +define void @escape_via_arg(%funcref %ref) { + %p = call ptr @llvm.wasm.funcref.to_ptr(%funcref %ref) + call void @sink(ptr %p) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/ref-null-zeroinitializer.ll b/llvm/test/CodeGen/WebAssembly/ref-null-zeroinitializer.ll new file mode 100644 index 0000000000000..7483a72d0b96c --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/ref-null-zeroinitializer.ll @@ -0,0 +1,67 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s --mtriple=wasm32-unknown-unknown -mattr=+reference-types | FileCheck %s + +; The zero value of a WebAssembly reference type is the null reference, so a +; zeroinitializer (or null) reference constant must lower to ref.null rather +; than crash instruction selection. + +%externref = type target("wasm.externref") +%funcref = type target("wasm.funcref") + +declare void @take_externref(%externref) +declare i32 @llvm.wasm.ref.is_null.extern(%externref) + +define %externref @ret_zero_externref() { +; CHECK-LABEL: ret_zero_externref: +; CHECK: .functype ret_zero_externref () -> (externref) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: ref.null_extern +; CHECK-NEXT: # fallthrough-return + ret %externref zeroinitializer +} + +define %funcref @ret_zero_funcref() { +; CHECK-LABEL: ret_zero_funcref: +; CHECK: .functype ret_zero_funcref () -> (funcref) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: ref.null_func +; CHECK-NEXT: # fallthrough-return + ret %funcref zeroinitializer +} + +define void @pass_zero_externref() { +; CHECK-LABEL: pass_zero_externref: +; CHECK: .functype pass_zero_externref () -> () +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: ref.null_extern +; CHECK-NEXT: call take_externref +; CHECK-NEXT: # fallthrough-return + call void @take_externref(%externref zeroinitializer) + ret void +} + +define %externref @select_zero_externref(i1 %c, %externref %x) { +; CHECK-LABEL: select_zero_externref: +; CHECK: .functype select_zero_externref (i32, externref) -> (externref) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 1 +; CHECK-NEXT: ref.null_extern +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i32.and +; CHECK-NEXT: externref.select +; CHECK-NEXT: # fallthrough-return + %r = select i1 %c, %externref %x, %externref zeroinitializer + ret %externref %r +} + +define i32 @is_null_zero_externref() { +; CHECK-LABEL: is_null_zero_externref: +; CHECK: .functype is_null_zero_externref () -> (i32) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: ref.null_extern +; CHECK-NEXT: ref.is_null +; CHECK-NEXT: # fallthrough-return + %r = call i32 @llvm.wasm.ref.is_null.extern(%externref zeroinitializer) + ret i32 %r +} diff --git a/llvm/test/CodeGen/WebAssembly/ref-null.ll b/llvm/test/CodeGen/WebAssembly/ref-null.ll index af6ddfd8e0814..f2f2a0dad378b 100644 --- a/llvm/test/CodeGen/WebAssembly/ref-null.ll +++ b/llvm/test/CodeGen/WebAssembly/ref-null.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mtriple=wasm32-unknown-unknown -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%externref = type target("wasm.externref") +%funcref = type target("wasm.funcref") declare %externref @llvm.wasm.ref.null.extern() nounwind declare %funcref @llvm.wasm.ref.null.func() nounwind diff --git a/llvm/test/CodeGen/WebAssembly/ref-test-func.ll b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll index 4fda253d39fe3..f74c968abdadc 100644 --- a/llvm/test/CodeGen/WebAssembly/ref-test-func.ll +++ b/llvm/test/CodeGen/WebAssembly/ref-test-func.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK32 %s ; RUN: llc < %s --mtriple=wasm64-unknown-unknown -mcpu=mvp -mattr=+reference-types -mattr=+gc -verify-machineinstrs | FileCheck --check-prefixes CHECK,CHK64 %s @@ -154,7 +153,7 @@ define void @test_reference_types(ptr noundef %func) local_unnamed_addr #0 { ; CHECK-NEXT: call use ; CHECK-NEXT: # fallthrough-return entry: - %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, ptr addrspace(10) poison, token poison, ptr addrspace(20) poison, ptr addrspace(10) poison) + %res = tail call i32 (ptr, ...) @llvm.wasm.ref.test.func(ptr %func, target("wasm.externref") poison, token poison, target("wasm.funcref") poison, target("wasm.externref") poison) tail call void @use(i32 noundef %res) #3 ret void } diff --git a/llvm/test/CodeGen/WebAssembly/ref-type-mem2local.ll b/llvm/test/CodeGen/WebAssembly/ref-type-mem2local.ll index 911e5bb516a2f..75b3002b3be76 100644 --- a/llvm/test/CodeGen/WebAssembly/ref-type-mem2local.ll +++ b/llvm/test/CodeGen/WebAssembly/ref-type-mem2local.ll @@ -3,8 +3,8 @@ target triple = "wasm32-unknown-unknown" -%externref = type ptr addrspace(10) -%funcref = type ptr addrspace(20) +%externref = type target("wasm.externref") +%funcref = type target("wasm.funcref") declare %externref @get_externref() declare %funcref @get_funcref() @@ -22,22 +22,22 @@ entry: store %externref %eref, ptr %alloc.externref, align 1 %eref.loaded = load %externref, ptr %alloc.externref, align 1 call void @take_externref(%externref %eref.loaded) - ; CHECK: %alloc.externref.var = alloca ptr addrspace(10), align 1, addrspace(1) - ; CHECK-NEXT: %eref = call ptr addrspace(10) @get_externref() - ; CHECK-NEXT: store ptr addrspace(10) %eref, ptr addrspace(1) %alloc.externref.var, align 1 - ; CHECK-NEXT: %eref.loaded = load ptr addrspace(10), ptr addrspace(1) %alloc.externref.var, align 1 - ; CHECK-NEXT: call void @take_externref(ptr addrspace(10) %eref.loaded) + ; CHECK: %alloc.externref.var = alloca target("wasm.externref"), align 1, addrspace(1) + ; CHECK-NEXT: %eref = call target("wasm.externref") @get_externref() + ; CHECK-NEXT: store target("wasm.externref") %eref, ptr addrspace(1) %alloc.externref.var, align 1 + ; CHECK-NEXT: %eref.loaded = load target("wasm.externref"), ptr addrspace(1) %alloc.externref.var, align 1 + ; CHECK-NEXT: call void @take_externref(target("wasm.externref") %eref.loaded) %alloc.funcref = alloca %funcref, align 1 %fref = call %funcref @get_funcref() store %funcref %fref, ptr %alloc.funcref, align 1 %fref.loaded = load %funcref, ptr %alloc.funcref, align 1 call void @take_funcref(%funcref %fref.loaded) - ; CHECK-NEXT: %alloc.funcref.var = alloca ptr addrspace(20), align 1, addrspace(1) - ; CHECK-NEXT: %fref = call ptr addrspace(20) @get_funcref() - ; CHECK-NEXT: store ptr addrspace(20) %fref, ptr addrspace(1) %alloc.funcref.var, align 1 - ; CHECK-NEXT: %fref.loaded = load ptr addrspace(20), ptr addrspace(1) %alloc.funcref.var, align 1 - ; CHECK-NEXT: call void @take_funcref(ptr addrspace(20) %fref.loaded) + ; CHECK-NEXT: %alloc.funcref.var = alloca target("wasm.funcref"), align 1, addrspace(1) + ; CHECK-NEXT: %fref = call target("wasm.funcref") @get_funcref() + ; CHECK-NEXT: store target("wasm.funcref") %fref, ptr addrspace(1) %alloc.funcref.var, align 1 + ; CHECK-NEXT: %fref.loaded = load target("wasm.funcref"), ptr addrspace(1) %alloc.funcref.var, align 1 + ; CHECK-NEXT: call void @take_funcref(target("wasm.funcref") %fref.loaded) ret void } @@ -68,22 +68,22 @@ entry: store %externref %eref, ptr %alloc.externref, align 1 %eref.loaded = load %externref, ptr %alloc.externref, align 1 call void @take_externref(%externref %eref.loaded) - ; ATTR: %alloc.externref.var = alloca ptr addrspace(10), align 1, addrspace(1) - ; ATTR-NEXT: %eref = call ptr addrspace(10) @get_externref() - ; ATTR-NEXT: store ptr addrspace(10) %eref, ptr addrspace(1) %alloc.externref.var, align 1 - ; ATTR-NEXT: %eref.loaded = load ptr addrspace(10), ptr addrspace(1) %alloc.externref.var, align 1 - ; ATTR-NEXT: call void @take_externref(ptr addrspace(10) %eref.loaded) + ; ATTR: %alloc.externref.var = alloca target("wasm.externref"), align 1, addrspace(1) + ; ATTR-NEXT: %eref = call target("wasm.externref") @get_externref() + ; ATTR-NEXT: store target("wasm.externref") %eref, ptr addrspace(1) %alloc.externref.var, align 1 + ; ATTR-NEXT: %eref.loaded = load target("wasm.externref"), ptr addrspace(1) %alloc.externref.var, align 1 + ; ATTR-NEXT: call void @take_externref(target("wasm.externref") %eref.loaded) %alloc.funcref = alloca %funcref, align 1 %fref = call %funcref @get_funcref() store %funcref %fref, ptr %alloc.funcref, align 1 %fref.loaded = load %funcref, ptr %alloc.funcref, align 1 call void @take_funcref(%funcref %fref.loaded) - ; ATTR-NEXT: %alloc.funcref.var = alloca ptr addrspace(20), align 1, addrspace(1) - ; ATTR-NEXT: %fref = call ptr addrspace(20) @get_funcref() - ; ATTR-NEXT: store ptr addrspace(20) %fref, ptr addrspace(1) %alloc.funcref.var, align 1 - ; ATTR-NEXT: %fref.loaded = load ptr addrspace(20), ptr addrspace(1) %alloc.funcref.var, align 1 - ; ATTR-NEXT: call void @take_funcref(ptr addrspace(20) %fref.loaded) + ; ATTR-NEXT: %alloc.funcref.var = alloca target("wasm.funcref"), align 1, addrspace(1) + ; ATTR-NEXT: %fref = call target("wasm.funcref") @get_funcref() + ; ATTR-NEXT: store target("wasm.funcref") %fref, ptr addrspace(1) %alloc.funcref.var, align 1 + ; ATTR-NEXT: %fref.loaded = load target("wasm.funcref"), ptr addrspace(1) %alloc.funcref.var, align 1 + ; ATTR-NEXT: call void @take_funcref(target("wasm.funcref") %fref.loaded) ret void } diff --git a/llvm/test/CodeGen/WebAssembly/select-reftype.ll b/llvm/test/CodeGen/WebAssembly/select-reftype.ll index baca3ca6258a3..985796048fb47 100644 --- a/llvm/test/CodeGen/WebAssembly/select-reftype.ll +++ b/llvm/test/CodeGen/WebAssembly/select-reftype.ll @@ -5,46 +5,46 @@ target triple = "wasm32-unknown-unknown" -define ptr addrspace(10) @select_externref_eq(i32 %a, ptr addrspace(10) %b, ptr addrspace(10) %c) { +define target("wasm.externref") @select_externref_eq(i32 %a, target("wasm.externref") %b, target("wasm.externref") %c) { ; CHECK-LABEL: select_externref_eq: ; CHECK: .functype select_externref_eq (i32, externref, externref) -> (externref) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: externref.select $push0=, $2, $1, $0 ; CHECK-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 - %cond = select i1 %cmp, ptr addrspace(10) %b, ptr addrspace(10) %c - ret ptr addrspace(10) %cond + %cond = select i1 %cmp, target("wasm.externref") %b, target("wasm.externref") %c + ret target("wasm.externref") %cond } -define ptr addrspace(10) @select_externref_ne(i32 %a, ptr addrspace(10) %b, ptr addrspace(10) %c) { +define target("wasm.externref") @select_externref_ne(i32 %a, target("wasm.externref") %b, target("wasm.externref") %c) { ; CHECK-LABEL: select_externref_ne: ; CHECK: .functype select_externref_ne (i32, externref, externref) -> (externref) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: externref.select $push0=, $1, $2, $0 ; CHECK-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 - %cond = select i1 %cmp, ptr addrspace(10) %b, ptr addrspace(10) %c - ret ptr addrspace(10) %cond + %cond = select i1 %cmp, target("wasm.externref") %b, target("wasm.externref") %c + ret target("wasm.externref") %cond } -define ptr addrspace(20) @select_funcref_eq(i32 %a, ptr addrspace(20) %b, ptr addrspace(20) %c) { +define target("wasm.funcref") @select_funcref_eq(i32 %a, target("wasm.funcref") %b, target("wasm.funcref") %c) { ; CHECK-LABEL: select_funcref_eq: ; CHECK: .functype select_funcref_eq (i32, funcref, funcref) -> (funcref) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: funcref.select $push0=, $2, $1, $0 ; CHECK-NEXT: return $pop0 %cmp = icmp eq i32 %a, 0 - %cond = select i1 %cmp, ptr addrspace(20) %b, ptr addrspace(20) %c - ret ptr addrspace(20) %cond + %cond = select i1 %cmp, target("wasm.funcref") %b, target("wasm.funcref") %c + ret target("wasm.funcref") %cond } -define ptr addrspace(20) @select_funcref_ne(i32 %a, ptr addrspace(20) %b, ptr addrspace(20) %c) { +define target("wasm.funcref") @select_funcref_ne(i32 %a, target("wasm.funcref") %b, target("wasm.funcref") %c) { ; CHECK-LABEL: select_funcref_ne: ; CHECK: .functype select_funcref_ne (i32, funcref, funcref) -> (funcref) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: funcref.select $push0=, $1, $2, $0 ; CHECK-NEXT: return $pop0 %cmp = icmp ne i32 %a, 0 - %cond = select i1 %cmp, ptr addrspace(20) %b, ptr addrspace(20) %c - ret ptr addrspace(20) %cond + %cond = select i1 %cmp, target("wasm.funcref") %b, target("wasm.funcref") %c + ret target("wasm.funcref") %cond } diff --git a/llvm/test/CodeGen/WebAssembly/table-copy.ll b/llvm/test/CodeGen/WebAssembly/table-copy.ll index 5c0647ada4ab0..d1387428c4368 100644 --- a/llvm/test/CodeGen/WebAssembly/table-copy.ll +++ b/llvm/test/CodeGen/WebAssembly/table-copy.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table1 = local_unnamed_addr addrspace(1) global [0 x %externref] undef @externref_table2 = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/table-fill.ll b/llvm/test/CodeGen/WebAssembly/table-fill.ll index 0b78124f038b1..7ed39361a1afd 100644 --- a/llvm/test/CodeGen/WebAssembly/table-fill.ll +++ b/llvm/test/CodeGen/WebAssembly/table-fill.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/table-grow.ll b/llvm/test/CodeGen/WebAssembly/table-grow.ll index 614c3400a782b..2b5801ea2fa17 100644 --- a/llvm/test/CodeGen/WebAssembly/table-grow.ll +++ b/llvm/test/CodeGen/WebAssembly/table-grow.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/table-size.ll b/llvm/test/CodeGen/WebAssembly/table-size.ll index 42cd2e8a909d7..2b8d0185bb049 100644 --- a/llvm/test/CodeGen/WebAssembly/table-size.ll +++ b/llvm/test/CodeGen/WebAssembly/table-size.ll @@ -1,6 +1,6 @@ ; RUN: llc --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types < %s | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral +%externref = type target("wasm.externref") @externref_table = local_unnamed_addr addrspace(1) global [0 x %externref] undef diff --git a/llvm/test/CodeGen/WebAssembly/table-types.ll b/llvm/test/CodeGen/WebAssembly/table-types.ll index cb5e54e2af230..da04ba35dd58e 100644 --- a/llvm/test/CodeGen/WebAssembly/table-types.ll +++ b/llvm/test/CodeGen/WebAssembly/table-types.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s --mtriple=wasm32-unknown-unknown -asm-verbose=false -mattr=+reference-types | FileCheck %s -%externref = type ptr addrspace(10) ;; addrspace 10 is nonintegral -%funcref = type ptr addrspace(20) ;; addrspace 20 is nonintegral +%externref = type target("wasm.externref") +%funcref = type target("wasm.funcref") ; CHECK: .tabletype eref_table, externref ; CHECK-NEXT: .globl eref_table diff --git a/llvm/test/Transforms/SLPVectorizer/WebAssembly/externref-no-vectorize.ll b/llvm/test/Transforms/SLPVectorizer/WebAssembly/externref-no-vectorize.ll new file mode 100644 index 0000000000000..f9320eb723f27 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/WebAssembly/externref-no-vectorize.ll @@ -0,0 +1,47 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=slp-vectorizer -mtriple=wasm32-unknown-unknown -mattr=+reference-types -S | FileCheck %s + +; The SLP vectorizer used to try to vectorize the two externref PHIs +; below into a <2 x externref> and then crashed. +; Check that they are left alone and the vectorizer doesn't crash. + +target datalayout = "e-m:e-p:32:32-p10:8:8-p20:8:8-i64:64-i128:128-n32:64-S128-ni:1:10:20" +target triple = "wasm32-unknown-unknown" + +%externref = type target("wasm.externref") + +declare %externref @foo() +declare void @bar(%externref) + +define void @test(i32 %flag, %externref %ref1, %externref %ref2) { +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[FLAG:%.*]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[CALL:%.*]] = call target("wasm.externref") @foo() +; CHECK-NEXT: [[CALL1:%.*]] = call target("wasm.externref") @foo() +; CHECK-NEXT: br label [[IF_END]] +; CHECK: if.end: +; CHECK-NEXT: [[R1:%.*]] = phi target("wasm.externref") [ [[CALL]], [[IF_THEN]] ], [ [[REF1:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[R2:%.*]] = phi target("wasm.externref") [ [[CALL1]], [[IF_THEN]] ], [ [[REF2:%.*]], [[ENTRY]] ] +; CHECK-NEXT: call void @bar(target("wasm.externref") [[R1]]) +; CHECK-NEXT: call void @bar(target("wasm.externref") [[R2]]) +; CHECK-NEXT: ret void +; +entry: + %cmp = icmp ne i32 %flag, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = call %externref @foo() + %call1 = call %externref @foo() + br label %if.end + +if.end: + %r1 = phi %externref [ %call, %if.then ], [ %ref1, %entry ] + %r2 = phi %externref [ %call1, %if.then ], [ %ref2, %entry ] + call void @bar(%externref %r1) + call void @bar(%externref %r2) + ret void +} diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn index f8bd3fa48a92d..67f9a904bb1e7 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/BUILD.gn @@ -106,7 +106,6 @@ static_library("LLVMWebAssemblyCodeGen") { "WebAssemblyLateEHPrepare.cpp", "WebAssemblyLowerBrUnless.cpp", "WebAssemblyLowerEmscriptenEHSjLj.cpp", - "WebAssemblyLowerRefTypesIntPtrConv.cpp", "WebAssemblyMCInstLower.cpp", "WebAssemblyMCLowerPrePass.cpp", "WebAssemblyMachineFunctionInfo.cpp", From 8775562036c012b32fc02ea7e16b93a5a4d44a5c Mon Sep 17 00:00:00 2001 From: Anshul Nigham Date: Wed, 24 Jun 2026 22:24:11 -0700 Subject: [PATCH 454/511] [NewPM][AArch64] Port AArch64SRLTDefineSuperRegs pass to NewPassManager (#202803) Standard port for the AArch64SRLTDefineSuperRegs pass. Assisted by Gemini --- llvm/lib/Target/AArch64/AArch64.h | 11 +++- .../Target/AArch64/AArch64PassRegistry.def | 1 + .../AArch64/AArch64SRLTDefineSuperRegs.cpp | 55 +++++++++++++------ .../Target/AArch64/AArch64TargetMachine.cpp | 4 +- ...iveness-fix-subreg-to-reg-implicit-def.mir | 1 + 5 files changed, 51 insertions(+), 21 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h index 4fdba2c7dbfcc..55cfdc1bceb72 100644 --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -77,7 +77,7 @@ FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); FunctionPass *createAArch64CollectLOHPass(); FunctionPass *createSMEPeepholeOptPass(); FunctionPass *createMachineSMEABIPass(CodeGenOptLevel); -FunctionPass *createAArch64SRLTDefineSuperRegsPass(); +FunctionPass *createAArch64SRLTDefineSuperRegsLegacyPass(); ModulePass *createSVEIntrinsicOptsPass(); Pass *createSVEShuffleOptsPass(); InstructionSelector * @@ -201,7 +201,7 @@ void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&); void initializeLDTLSCleanupPass(PassRegistry &); void initializeSMEPeepholeOptPass(PassRegistry &); void initializeMachineSMEABIPass(PassRegistry &); -void initializeAArch64SRLTDefineSuperRegsPass(PassRegistry &); +void initializeAArch64SRLTDefineSuperRegsLegacyPass(PassRegistry &); void initializeSVEIntrinsicOptsPass(PassRegistry &); void initializeSVEShuffleOptsPass(PassRegistry &); void initializeAArch64Arm64ECCallLoweringPass(PassRegistry &); @@ -357,6 +357,13 @@ class AArch64ConditionalComparesPass MachineFunctionAnalysisManager &MFAM); }; +class AArch64SRLTDefineSuperRegsPass + : public OptionalPassInfoMixin { +public: + PreservedAnalyses run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM); +}; + class AArch64LowerHomogeneousPrologEpilogPass : public PassInfoMixin { public: diff --git a/llvm/lib/Target/AArch64/AArch64PassRegistry.def b/llvm/lib/Target/AArch64/AArch64PassRegistry.def index 1fc09fc00e9ee..bdfa9bca299b1 100644 --- a/llvm/lib/Target/AArch64/AArch64PassRegistry.def +++ b/llvm/lib/Target/AArch64/AArch64PassRegistry.def @@ -60,6 +60,7 @@ MACHINE_FUNCTION_PASS("aarch64-redundantcondbranch", AArch64RedundantCondBranchP MACHINE_FUNCTION_PASS("aarch64-simd-instr-opt", AArch64SIMDInstrOptPass()) MACHINE_FUNCTION_PASS("aarch64-simd-scalar", AArch64AdvSIMDScalarPass()) MACHINE_FUNCTION_PASS("aarch64-sls-hardening", AArch64SLSHardeningPass()) +MACHINE_FUNCTION_PASS("aarch64-srlt-define-superregs", AArch64SRLTDefineSuperRegsPass()) MACHINE_FUNCTION_PASS("aarch64-stack-tagging-pre-ra", AArch64StackTaggingPreRAPass()) MACHINE_FUNCTION_PASS("aarch64-O0-prelegalizer-combiner", AArch64O0PreLegalizerCombinerPass()) diff --git a/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp b/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp index cb881692c43f0..6e6bbb47a6e23 100644 --- a/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp +++ b/llvm/lib/Target/AArch64/AArch64SRLTDefineSuperRegs.cpp @@ -82,6 +82,7 @@ // [1] https://github.com/llvm/llvm-project/pull/168353 //===----------------------------------------------------------------------===// +#include "AArch64.h" #include "AArch64InstrInfo.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" @@ -89,7 +90,9 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" @@ -101,16 +104,28 @@ using namespace llvm; namespace { -struct AArch64SRLTDefineSuperRegs : public MachineFunctionPass { - inline static char ID = 0; - - AArch64SRLTDefineSuperRegs() : MachineFunctionPass(ID) {} - - bool runOnMachineFunction(MachineFunction &MF) override; +class AArch64SRLTDefineSuperRegsImpl { +private: + const AArch64Subtarget *Subtarget = nullptr; + const AArch64RegisterInfo *TRI = nullptr; Register getWidestSuperReg(Register R, const BitVector &RequiredBaseRegUnits, const BitVector &QHiRegUnits); +public: + bool run(MachineFunction &MF); +}; + +class AArch64SRLTDefineSuperRegsLegacy : public MachineFunctionPass { +public: + inline static char ID = 0; + + AArch64SRLTDefineSuperRegsLegacy() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override { + return AArch64SRLTDefineSuperRegsImpl().run(MF); + } + StringRef getPassName() const override { return PASS_NAME; } void getAnalysisUsage(AnalysisUsage &AU) const override { @@ -119,16 +134,12 @@ struct AArch64SRLTDefineSuperRegs : public MachineFunctionPass { AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } - -private: - MachineFunction *MF = nullptr; - const AArch64Subtarget *Subtarget = nullptr; - const AArch64RegisterInfo *TRI = nullptr; }; } // end anonymous namespace -INITIALIZE_PASS(AArch64SRLTDefineSuperRegs, DEBUG_TYPE, PASS_NAME, false, false) +INITIALIZE_PASS(AArch64SRLTDefineSuperRegsLegacy, DEBUG_TYPE, PASS_NAME, false, + false) // Returns the widest super-reg for a given reg, or NoRegister if no suitable // wider super-reg has been found. For example: @@ -138,7 +149,7 @@ INITIALIZE_PASS(AArch64SRLTDefineSuperRegs, DEBUG_TYPE, PASS_NAME, false, false) // W1_W2 -> X1_X2 // D0_D1 -> Q0_Q1 (without SVE) // -> Z0_Z1 (with SVE) -Register AArch64SRLTDefineSuperRegs::getWidestSuperReg( +Register AArch64SRLTDefineSuperRegsImpl::getWidestSuperReg( Register R, const BitVector &RequiredBaseRegUnits, const BitVector &QHiRegUnits) { assert(R.isPhysical() && @@ -177,8 +188,7 @@ Register AArch64SRLTDefineSuperRegs::getWidestSuperReg( return LargestSuperReg; } -bool AArch64SRLTDefineSuperRegs::runOnMachineFunction(MachineFunction &MF) { - this->MF = &MF; +bool AArch64SRLTDefineSuperRegsImpl::run(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); TRI = Subtarget->getRegisterInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); @@ -243,6 +253,17 @@ bool AArch64SRLTDefineSuperRegs::runOnMachineFunction(MachineFunction &MF) { return Changed; } -FunctionPass *llvm::createAArch64SRLTDefineSuperRegsPass() { - return new AArch64SRLTDefineSuperRegs(); +FunctionPass *llvm::createAArch64SRLTDefineSuperRegsLegacyPass() { + return new AArch64SRLTDefineSuperRegsLegacy(); +} + +PreservedAnalyses +AArch64SRLTDefineSuperRegsPass::run(MachineFunction &MF, + MachineFunctionAnalysisManager &MFAM) { + const bool Changed = AArch64SRLTDefineSuperRegsImpl().run(MF); + if (!Changed) + return PreservedAnalyses::all(); + PreservedAnalyses PA = getMachineFunctionPassPreservedAnalyses(); + PA.preserveSet(); + return PA; } diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index 568563cf53220..898d92e74c85c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -276,7 +276,7 @@ LLVMInitializeAArch64Target() { initializeLDTLSCleanupPass(PR); initializeMachineKCFILegacyPass(PR); initializeMachineSMEABIPass(PR); - initializeAArch64SRLTDefineSuperRegsPass(PR); + initializeAArch64SRLTDefineSuperRegsLegacyPass(PR); initializeSMEPeepholeOptPass(PR); initializeSVEIntrinsicOptsPass(PR); initializeAArch64SpeculationHardeningPass(PR); @@ -860,7 +860,7 @@ void AArch64PassConfig::addPreRegAlloc() { void AArch64PassConfig::addPostRewrite() { if (EnableSRLTSubregToRegMitigation) - addPass(createAArch64SRLTDefineSuperRegsPass()); + addPass(createAArch64SRLTDefineSuperRegsLegacyPass()); } void AArch64PassConfig::addPostRegAlloc() { diff --git a/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir b/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir index 32574eb0c25d4..97ae67db9abe4 100644 --- a/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir +++ b/llvm/test/CodeGen/AArch64/subreg-liveness-fix-subreg-to-reg-implicit-def.mir @@ -1,5 +1,6 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 # RUN: llc -mtriple=aarch64 -run-pass=aarch64-srlt-define-superregs -enable-subreg-liveness -o - %s | FileCheck %s +# RUN: llc -mtriple=aarch64 -passes=aarch64-srlt-define-superregs -enable-subreg-liveness -o - %s | FileCheck %s --- | target triple = "aarch64" From 04cb6dd108e88ef46496717b6a3f751a7c73c8d2 Mon Sep 17 00:00:00 2001 From: Derek Schuff Date: Wed, 24 Jun 2026 22:28:12 -0700 Subject: [PATCH 455/511] Fix test from #203165 when no wasm target is configured (#205722) Fix test from #203165 when no wasm target is configured --- clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c b/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c index ce663a70dd3b1..6924238cab01f 100644 --- a/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c +++ b/clang/test/CodeGen/WebAssembly/wasm-funcref-to-ptr-error.c @@ -1,3 +1,4 @@ +// REQUIRES: webassembly-registered-target // RUN: not %clang_cc1 -triple wasm32 -target-feature +reference-types -S -o /dev/null %s 2>&1 | FileCheck %s // RUN: not %clang_cc1 -triple wasm64 -target-feature +reference-types -S -o /dev/null %s 2>&1 | FileCheck %s From 2a9c1d217d8d96e75387d8bbc9c4dd5290d4bc50 Mon Sep 17 00:00:00 2001 From: Matt Date: Thu, 25 Jun 2026 00:45:34 -0500 Subject: [PATCH 456/511] [flang][OpenMP] Fix declare reduction lookup for USE...ONLY imports CheckSymbolSupportsType walked every module in the global scope to find declare-reduction declarations. That accepted reductions from modules that were never USE'd, or were excluded via USE...ONLY, and it still rejected some valid imports such as a renamed operator. Replace the global scan with FindUserReduction(), which resolves the reduction the way name resolution resolves the operator. It checks a directly visible reduction first, then follows the operator's USE associations and merged-generic sources to the declaring modules, re-deriving the source module's mangled name for renamed operators. The search recurses through re-exporting (facade) modules and is type-aware, so an operator that carries reductions for several types resolves to the one supporting the requested type. A locally declared reduction is authoritative and shadows reductions reachable through the operator. Consolidate the duplicated GetReductionFortranId() (formerly static in both resolve-names.cpp and mod-file.cpp) into a shared utility, fixing a latent bug where defined operators were not correctly reverse-mapped. This corrects the semantic check only. The local lowering ICE for a user-defined operator declare reduction was fixed separately in https://github.com/llvm/llvm-project/issues/204299. The cross-module cases this change additionally accepts (USE-associated, renamed, or merged operators) now emit a clean "not yet implemented" diagnostic in lowering. Completing their lowering is a follow-up. Fixes https://github.com/llvm/llvm-project/issues/200300. Assisted-by: Claude Opus 4.8. --- flang/lib/Semantics/check-omp-structure.cpp | 161 ++++++++++++++---- flang/lib/Semantics/mod-file.cpp | 28 +-- flang/lib/Semantics/resolve-names-utils.cpp | 35 ++++ flang/lib/Semantics/resolve-names-utils.h | 6 + flang/lib/Semantics/resolve-names.cpp | 34 +--- .../declare-reduction-overbroad-lookup.f90 | 32 ++++ .../declare-reduction-use-mixed-merged.f90 | 61 +++++++ .../declare-reduction-use-only-defined-op.f90 | 33 ++++ .../declare-reduction-use-only-merged.f90 | 60 +++++++ .../declare-reduction-use-only-renamed.f90 | 36 ++++ .../OpenMP/declare-reduction-use-only.f90 | 35 ++++ .../declare-reduction-use-reexport-merged.f90 | 63 +++++++ ...declare-reduction-use-reexport-remerge.f90 | 93 ++++++++++ .../declare-reduction-use-shadow-merged.f90 | 59 +++++++ .../OpenMP/declare-reduction-use-shadow.f90 | 41 +++++ 15 files changed, 687 insertions(+), 90 deletions(-) create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-overbroad-lookup.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-mixed-merged.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-only-defined-op.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-only-merged.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-only-renamed.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-only.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-reexport-merged.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-reexport-remerge.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-shadow-merged.f90 create mode 100644 flang/test/Semantics/OpenMP/declare-reduction-use-shadow.f90 diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp index d4572ec685e61..061b2bdf775d0 100644 --- a/flang/lib/Semantics/check-omp-structure.cpp +++ b/flang/lib/Semantics/check-omp-structure.cpp @@ -38,6 +38,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Frontend/OpenMP/OMP.h" @@ -3745,6 +3746,131 @@ void OmpStructureChecker::Enter(const parser::OmpClause::TaskReduction &x) { CheckReductionObjects(objects, llvm::omp::Clause::OMPC_task_reduction); } +// Compute the mangled reduction name to look up in a reduction's source module. +// If the operator was renamed on import (e.g. USE m, ONLY: operator(.local.) => +// operator(.remote.)), the local mangled name will not match in the source +// module; re-derive the lookup name from the source operator's ultimate name. +// Only defined operators can be renamed (intrinsic operators and named +// reductions cannot), so a detected rename always has a ".op." source name. +// For non-renamed lookups the original mangled name is returned unchanged. +static std::string SourceReductionName(const parser::CharBlock &mangledName, + const parser::CharBlock &localName, const parser::CharBlock &sourceName) { + if (sourceName != localName && sourceName.size() >= 3 && + sourceName.front() == '.' && sourceName.back() == '.') { + return MangleDefinedOperator(sourceName); + } + return mangledName.ToString(); +} + +// Return the reduction details of `symbol` if it is a user reduction that +// supports `type` (any type when `type` is null). +static const UserReductionDetails *AcceptReduction( + const Symbol &symbol, const DeclTypeSpec *type) { + const auto *details{symbol.GetUltimate().detailsIf()}; + if (details && (!type || details->SupportsType(*type))) { + return details; + } + return nullptr; +} + +// A reduction symbol is locally declared (authoritative) when it is not reached +// through any USE association, even via host association. Such a reduction +// shadows reductions imported or reachable through its operator. +static bool IsLocalReduction(const Symbol &symbol) { + const Symbol *s{&symbol}; + while (const auto *host{s->detailsIf()}) { + s = &host->symbol(); + } + return !s->detailsIf(); +} + +// Search for a user reduction supporting `type` by following the operator/ +// procedure symbol `opSym` through its USE associations and merged generic +// sources. Each module the operator passes through is checked for a (possibly +// renamed) reduction; `localName` is the operator name written at the use site, +// used to detect renames. A locally declared reduction in a module is +// authoritative: it is returned if it supports the type, otherwise it shadows +// reductions reachable further along that branch. +static const UserReductionDetails *SearchOperatorReduction(const Symbol &opSym, + const parser::CharBlock &mangledName, const parser::CharBlock &localName, + const DeclTypeSpec *type, llvm::SmallPtrSetImpl &visited) { + if (!visited.insert(&opSym).second) { + return nullptr; + } + const Scope &scope{opSym.owner()}; + if (scope.kind() == Scope::Kind::Module) { + std::string lookupName{ + SourceReductionName(mangledName, localName, opSym.name())}; + auto it{scope.find(parser::CharBlock{lookupName})}; + if (it != scope.end()) { + const Symbol &reductionSym{*it->second}; + const Symbol &reductionUltimate{reductionSym.GetUltimate()}; + if (!reductionUltimate.attrs().test(Attr::PRIVATE)) { + if (const auto *details{AcceptReduction(reductionUltimate, type)}) { + return details; + } + // A locally declared reduction here shadows reductions reachable + // further along this branch. + if (reductionUltimate.detailsIf() && + IsLocalReduction(reductionSym)) { + return nullptr; + } + } + } + } + // Follow a USE-associated operator to the module it was imported from. + if (const auto *use{opSym.detailsIf()}) { + return SearchOperatorReduction( + use->symbol(), mangledName, localName, type, visited); + } + // Search each module merged into a generic operator (recursing through + // re-exporting facade modules). + if (const auto *generic{opSym.detailsIf()}) { + for (const Symbol &useSym : generic->uses()) { + if (const auto *details{SearchOperatorReduction( + useSym, mangledName, localName, type, visited)}) { + return details; + } + } + } + return nullptr; +} + +// Find user reduction details for a mangled name, following USE associations +// when the reduction is not directly visible in the scope. A type may be +// supplied to disambiguate an operator that carries reductions for several +// types (e.g. a generic merged from multiple modules); a candidate is accepted +// only if it supports that type. A locally declared reduction is authoritative +// for its operator in its scope and shadows USE-associated reductions. +static const UserReductionDetails *FindUserReduction(const Scope &scope, + const parser::CharBlock &mangledName, const DeclTypeSpec *type = nullptr) { + // Direct lookup: a reduction directly visible via bare USE or a local + // declaration. + const Symbol *directSymbol{scope.FindSymbol(mangledName)}; + if (directSymbol) { + if (const auto *details{AcceptReduction(*directSymbol, type)}) { + return details; + } + // A locally declared reduction that does not support the requested type is + // authoritative: it shadows USE-associated reductions (ProcessReduction- + // Specifier erases the latter), so do not resurrect them via the operator. + if (directSymbol->GetUltimate().detailsIf() && + IsLocalReduction(*directSymbol)) { + return nullptr; + } + } + // Trace the operator/procedure to the modules that declare its reduction. + std::string fortranName{GetReductionFortranId(mangledName)}; + const Symbol *opSymbol{ + fortranName.empty() ? nullptr : scope.FindSymbol(fortranName)}; + if (!opSymbol) { + return nullptr; + } + llvm::SmallPtrSet visited; + return SearchOperatorReduction( + *opSymbol, mangledName, opSymbol->name(), type, visited); +} + bool OmpStructureChecker::CheckReductionOperator( const parser::OmpReductionIdentifier &ident, parser::CharBlock source, llvm::omp::Clause clauseId) { @@ -3775,10 +3901,8 @@ bool OmpStructureChecker::CheckReductionOperator( if (const auto *definedOp{std::get_if(&dOpr.u)}) { std::string mangled{MangleDefinedOperator(definedOp->v.symbol->name())}; const Scope &scope{definedOp->v.symbol->owner()}; - if (const Symbol *symbol{scope.FindSymbol(mangled)}) { - if (symbol->GetUltimate().detailsIf()) { - return true; - } + if (FindUserReduction(scope, mangled)) { + return true; } } context_.Say(source, "Invalid reduction operator in %s clause."_err_en_US, @@ -3865,32 +3989,9 @@ void OmpStructureChecker::CheckReductionObjects( static bool CheckSymbolSupportsType(const Scope &scope, const parser::CharBlock &name, const DeclTypeSpec &type) { - if (const auto *symbol{scope.FindSymbol(name)}) { - const auto &ultimate{symbol->GetUltimate()}; - if (const auto *reductionDetails{ - ultimate.detailsIf()}) { - return reductionDetails->SupportsType(type); - } - } - // Look through module scopes in the global scope. - // This covers reductions declared in a module and used via USE association. - const SemanticsContext &semCtx{scope.context()}; - Scope &global = const_cast(semCtx).globalScope(); - for (const Scope &child : global.children()) { - if (child.kind() == Scope::Kind::Module) { - if (const auto *symbol{child.FindSymbol(name)}) { - // Skip PRIVATE reductions that aren't visible in the current scope. - if (symbol->attrs().test(Attr::PRIVATE)) { - continue; - } - if (const auto *reductionDetails{ - symbol->detailsIf()}) { - return reductionDetails->SupportsType(type); - } - } - } - } - return false; + // FindUserReduction only returns a reduction that supports the requested + // type. + return FindUserReduction(scope, name, &type) != nullptr; } static bool IsReductionAllowedForType( diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp index 89a535c6ff6f9..fd1b1caa7fce1 100644 --- a/flang/lib/Semantics/mod-file.cpp +++ b/flang/lib/Semantics/mod-file.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "mod-file.h" +#include "resolve-names-utils.h" #include "resolve-names.h" #include "flang/Common/restorer.h" #include "flang/Evaluate/tools.h" @@ -1145,33 +1146,6 @@ void ModFileWriter::PutTypeParam(llvm::raw_ostream &os, const Symbol &symbol) { os << '\n'; } -// Map a mangled reduction name to a valid Fortran accessibility identifier -// for module file serialization (e.g., op.+ → operator(+), op.max → max). -// Non-mangled names (procedure designators) are returned as-is. -static std::string GetReductionFortranId(const SourceName &mangledName) { - llvm::StringRef name{mangledName.begin(), mangledName.size()}; - if (!name.starts_with("op.")) { - return name.str(); - } - llvm::StringRef suffix{name.drop_front(3)}; - if (suffix == "+" || suffix == "-" || suffix == "*") { - return ("operator(" + suffix + ")").str(); - } - llvm::StringRef logicalOp{llvm::StringSwitch(suffix) - .Case("AND", ".and.") - .Case("OR", ".or.") - .Case("EQV", ".eqv.") - .Case("NEQV", ".neqv.") - .Default("")}; - if (!logicalOp.empty()) { - return ("operator(" + logicalOp + ")").str(); - } - if (suffix.size() > 2 && suffix.front() == '.' && suffix.back() == '.') { - return ("operator(" + suffix + ")").str(); - } - return suffix.str(); -} - void ModFileWriter::PutUserReduction( llvm::raw_ostream &os, const Symbol &symbol) { const auto &details{symbol.get()}; diff --git a/flang/lib/Semantics/resolve-names-utils.cpp b/flang/lib/Semantics/resolve-names-utils.cpp index 5adeb46f86a7e..03d5df73a9d79 100644 --- a/flang/lib/Semantics/resolve-names-utils.cpp +++ b/flang/lib/Semantics/resolve-names-utils.cpp @@ -20,6 +20,7 @@ #include "flang/Semantics/tools.h" #include "flang/Support/Fortran-features.h" #include "flang/Support/Fortran.h" +#include "llvm/ADT/StringRef.h" #include #include @@ -952,4 +953,38 @@ void MapSubprogramToNewSymbols(const Symbol &oldSymbol, Symbol &newSymbol, newScope.InstantiateDerivedTypes(); } +std::string GetReductionFortranId(const parser::CharBlock &mangledName) { + llvm::StringRef name{mangledName.begin(), mangledName.size()}; + if (!name.starts_with("op.")) { + return name.str(); + } + llvm::StringRef suffix{name.drop_front(3)}; + // Intrinsic arithmetic operators: op.+ -> operator(+) + if (suffix == "+" || suffix == "-" || suffix == "*") { + return ("operator(" + suffix + ")").str(); + } + // Intrinsic logical operators (mangled uppercase, scope uses lowercase) + if (suffix == "AND") { + return "operator(.and.)"; + } + if (suffix == "OR") { + return "operator(.or.)"; + } + if (suffix == "EQV") { + return "operator(.eqv.)"; + } + if (suffix == "NEQV") { + return "operator(.neqv.)"; + } + // Defined operators: op.combine. -> .combine. + // MangleDefinedOperator prepends "op" to the operator name (e.g., + // ".combine.") so after stripping "op.", the suffix ends with '.' for defined + // operators. + if (!suffix.empty() && suffix.back() == '.') { + return ("." + suffix).str(); + } + // Named functions: op.max -> max + return suffix.str(); +} + } // namespace Fortran::semantics diff --git a/flang/lib/Semantics/resolve-names-utils.h b/flang/lib/Semantics/resolve-names-utils.h index ee8113a3fda5e..a1bac72e45ca1 100644 --- a/flang/lib/Semantics/resolve-names-utils.h +++ b/flang/lib/Semantics/resolve-names-utils.h @@ -152,5 +152,11 @@ parser::CharBlock MakeNameFromOperator( parser::CharBlock MangleSpecialFunctions(const parser::CharBlock &name); std::string MangleDefinedOperator(const parser::CharBlock &name); +// Map a mangled declare reduction name (e.g., "op.+", "op.combine.", +// "op.max") back to the Fortran identifier used as the scope key for the +// corresponding operator or procedure (e.g., "operator(+)", ".combine.", +// "max"). Non-mangled names (procedure designators) are returned as-is. +std::string GetReductionFortranId(const parser::CharBlock &mangledName); + } // namespace Fortran::semantics #endif // FORTRAN_SEMANTICS_RESOLVE_NAMES_H_ diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp index a6f3fe12eb9b7..11e24a5e1cccb 100644 --- a/flang/lib/Semantics/resolve-names.cpp +++ b/flang/lib/Semantics/resolve-names.cpp @@ -4543,38 +4543,6 @@ Scope *ModuleVisitor::FindModule(const parser::Name &name, return scope; } -// Map a mangled declare reduction name (e.g., op.+, op.max, op..myop.) back -// to the Fortran identifier that controls its accessibility in a module scope. -// Intrinsic operators map to "operator(+)" etc., named functions to "max" etc., -// and defined operators to "operator(.myop.)" etc. -static std::string GetReductionIdentifierName(const SourceName &mangledName) { - llvm::StringRef name{mangledName.begin(), mangledName.size()}; - if (!name.starts_with("op.")) { - return {}; - } - llvm::StringRef suffix{name.drop_front(3)}; - // Intrinsic arithmetic operators: op.+ → operator(+) - if (suffix == "+" || suffix == "-" || suffix == "*") { - return ("operator(" + suffix + ")").str(); - } - // Intrinsic logical operators (mangled uppercase, scope uses lowercase) - llvm::StringRef logicalOp{llvm::StringSwitch(suffix) - .Case("AND", ".and.") - .Case("OR", ".or.") - .Case("EQV", ".eqv.") - .Case("NEQV", ".neqv.") - .Default("")}; - if (!logicalOp.empty()) { - return ("operator(" + logicalOp + ")").str(); - } - // Defined operators: op..myop. → operator(.myop.) - if (suffix.size() > 2 && suffix.front() == '.' && suffix.back() == '.') { - return ("operator(" + suffix + ")").str(); - } - // Named functions: op.max → max - return suffix.str(); -} - void ModuleVisitor::ApplyDefaultAccess() { const auto *moduleDetails{ DEREF(currScope().symbol()).detailsIf()}; @@ -4601,7 +4569,7 @@ void ModuleVisitor::ApplyDefaultAccess() { // a module has accessibility as if it were declared as a module entity. // If the corresponding operator/procedure has explicit accessibility, // the reduction inherits it. - std::string opName{GetReductionIdentifierName(symbol.name())}; + std::string opName{GetReductionFortranId(symbol.name())}; if (!opName.empty()) { if (auto *opSym{FindInScope(currScope(), SourceName{opName})}) { if (opSym->attrs().test(Attr::PUBLIC)) { diff --git a/flang/test/Semantics/OpenMP/declare-reduction-overbroad-lookup.f90 b/flang/test/Semantics/OpenMP/declare-reduction-overbroad-lookup.f90 new file mode 100644 index 0000000000000..1db247b4eaff5 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-overbroad-lookup.f90 @@ -0,0 +1,32 @@ +! RUN: not %flang_fc1 -fopenmp -fopenmp-version=52 %s 2>&1 | FileCheck %s + +! Test that a declare reduction from a module that was not USE'd (or only +! partially USE'd) is not incorrectly found during type checking. +! Related: https://github.com/llvm/llvm-project/issues/200300 + +module m_with_reduction + type :: t + integer :: val = 0 + end type + !$omp declare reduction(+:t:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t(0)) +end module + +! proxy re-exports only the type, not the reduction +module m_proxy + use m_with_reduction, only: t +end module + +program test_overbroad_lookup + use m_proxy + type(t) :: x + integer :: i + x = t(0) + !CHECK: error: The type of 'x' is incompatible with the reduction operator. + !$omp parallel do reduction(+:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + print *, x%val +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-mixed-merged.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-mixed-merged.f90 new file mode 100644 index 0000000000000..e3d90c02716ca --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-mixed-merged.f90 @@ -0,0 +1,61 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction resolves the type-correct reduction when an +! operator carries reductions for different types and one module is imported +! with a bare USE (making its reduction directly visible) while another is +! imported with USE...ONLY. The directly visible reduction must not shadow the +! reduction for the other type. + +module m_int + type :: t_int + integer :: val = 0 + end type + interface operator(.shared.) + module procedure add_int + end interface + !$omp declare reduction(.shared.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) +contains + type(t_int) function add_int(a, b) + type(t_int), intent(in) :: a, b + add_int%val = a%val + b%val + end function +end module + +module m_real + type :: t_real + real :: val = 0.0 + end type + interface operator(.shared.) + module procedure add_real + end interface + !$omp declare reduction(.shared.:t_real:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_real(0.0)) +contains + type(t_real) function add_real(a, b) + type(t_real), intent(in) :: a, b + add_real%val = a%val + b%val + end function +end module + +program test_mixed_merged_reduction + use m_int ! bare USE: t_int reduction directly visible + use m_real, only: t_real, operator(.shared.) ! USE...ONLY: operator merged in + type(t_int) :: x + type(t_real) :: y + integer :: i + x = t_int(0) + y = t_real(0.0) + !$omp parallel do reduction(.shared.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + ! The t_int reduction from m_int is directly visible, but resolving t_real + ! must still find m_real's reduction rather than stopping at t_int. + !$omp parallel do reduction(.shared.:y) + do i = 1, 10 + y%val = y%val + 1.0 + end do + !$omp end parallel do +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-only-defined-op.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-only-defined-op.f90 new file mode 100644 index 0000000000000..8e215e2b626a4 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-only-defined-op.f90 @@ -0,0 +1,33 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction for a defined operator works correctly with +! USE...ONLY when only the operator interface is imported. + +module m_defined_op_reduction + type :: dt2 + real :: x = 0.0 + end type + interface operator(.combine.) + module procedure combine_fn + end interface + !$omp declare reduction(.combine.:dt2:omp_out%x=omp_out%x+omp_in%x) & + !$omp initializer(omp_priv=dt2(0.0)) +contains + type(dt2) function combine_fn(a, b) + type(dt2), intent(in) :: a, b + combine_fn%x = a%x + b%x + end function +end module + +subroutine test_defined_op_use_only() + use m_defined_op_reduction, only: dt2, operator(.combine.) + type(dt2) :: y + integer :: i + y = dt2(0.0) + ! Should compile without error: reduction is accessible via operator(.combine.) + !$omp parallel do reduction(.combine.:y) + do i = 1, 10 + y%x = y%x + 1.0 + end do + !$omp end parallel do +end subroutine diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-only-merged.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-only-merged.f90 new file mode 100644 index 0000000000000..dda634c038cf8 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-only-merged.f90 @@ -0,0 +1,60 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction resolves correctly when the same operator is +! imported (and renamed) from several modules, each declaring a reduction for a +! different type. The merged generic must resolve to the reduction matching the +! requested type, not just the first module's reduction. + +module m_int + type :: t_int + integer :: val = 0 + end type + interface operator(.remote.) + module procedure add_int + end interface + !$omp declare reduction(.remote.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) +contains + type(t_int) function add_int(a, b) + type(t_int), intent(in) :: a, b + add_int%val = a%val + b%val + end function +end module + +module m_real + type :: t_real + real :: val = 0.0 + end type + interface operator(.remote.) + module procedure add_real + end interface + !$omp declare reduction(.remote.:t_real:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_real(0.0)) +contains + type(t_real) function add_real(a, b) + type(t_real), intent(in) :: a, b + add_real%val = a%val + b%val + end function +end module + +program test_merged_use_only_reduction + use m_int, only: t_int, operator(.local.) => operator(.remote.) + use m_real, only: t_real, operator(.local.) => operator(.remote.) + type(t_int) :: x + type(t_real) :: y + integer :: i + x = t_int(0) + y = t_real(0.0) + ! The reduction for the first module (t_int) appears first in the merged + ! generic. Resolving the second type (t_real) must still succeed. + !$omp parallel do reduction(.local.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + !$omp parallel do reduction(.local.:y) + do i = 1, 10 + y%val = y%val + 1.0 + end do + !$omp end parallel do +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-only-renamed.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-only-renamed.f90 new file mode 100644 index 0000000000000..b5eb85975d597 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-only-renamed.f90 @@ -0,0 +1,36 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fdebug-dump-symbols %s | FileCheck %s + +! Test that declare reduction works correctly with USE...ONLY when +! the operator is renamed during import. + +module m_remote_reduction + type :: t + integer :: val = 0 + end type + interface operator(.remote.) + module procedure add_t + end interface + !$omp declare reduction(.remote.:t:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t(0)) +! CHECK: op.remote., PUBLIC: UserReductionDetails TYPE(t) +contains + type(t) function add_t(a, b) + type(t), intent(in) :: a, b + add_t%val = a%val + b%val + end function +end module + +program test_renamed_use_only_reduction + use m_remote_reduction, only: t, operator(.local.) => operator(.remote.) +! CHECK: .local. (Function): Use from .remote. in m_remote_reduction + type(t) :: x + integer :: i + x = t(0) + ! Should compile without error: reduction is accessible via renamed operator + !$omp parallel do reduction(.local.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-only.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-only.f90 new file mode 100644 index 0000000000000..655087144250c --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-only.f90 @@ -0,0 +1,35 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction works correctly with USE...ONLY when +! only the operator (not the internal reduction symbol) is imported. + +module m_with_reduction + type :: dt + integer :: val = 0 + end type + interface operator(+) + module procedure add_dt + end interface + !$omp declare reduction(+:dt:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=dt(0)) +contains + type(dt) function add_dt(a, b) + type(dt), intent(in) :: a, b + add_dt%val = a%val + b%val + end function +end module + +! USE...ONLY imports operator(+) but not the internal op.+ symbol +program test_use_only + use m_with_reduction, only: dt, operator(+) + type(dt) :: x + integer :: i + x = dt(0) + ! Should compile without error: reduction is accessible via operator(+) + !$omp parallel do reduction(+:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + print *, x%val +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-merged.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-merged.f90 new file mode 100644 index 0000000000000..8d83c2c76dc02 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-merged.f90 @@ -0,0 +1,63 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction resolves through a re-exporting (facade) module. +! The intermediate module merges the same operator from two modules (each with +! a reduction for a different type) and re-exports it. A program that imports +! the facade must resolve the reduction for both types via the merged generic's +! USE associations, not only the operator directly in the facade. + +module m_int + type :: t_int + integer :: val = 0 + end type + interface operator(.shared.) + module procedure add_int + end interface + !$omp declare reduction(.shared.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) +contains + type(t_int) function add_int(a, b) + type(t_int), intent(in) :: a, b + add_int%val = a%val + b%val + end function +end module + +module m_real + type :: t_real + real :: val = 0.0 + end type + interface operator(.shared.) + module procedure add_real + end interface + !$omp declare reduction(.shared.:t_real:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_real(0.0)) +contains + type(t_real) function add_real(a, b) + type(t_real), intent(in) :: a, b + add_real%val = a%val + b%val + end function +end module + +module m_facade + use m_int, only: t_int, operator(.shared.) + use m_real, only: t_real, operator(.shared.) +end module + +program test_reexport_merged_reduction + use m_facade + type(t_int) :: x + type(t_real) :: y + integer :: i + x = t_int(0) + y = t_real(0.0) + !$omp parallel do reduction(.shared.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + !$omp parallel do reduction(.shared.:y) + do i = 1, 10 + y%val = y%val + 1.0 + end do + !$omp end parallel do +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-remerge.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-remerge.f90 new file mode 100644 index 0000000000000..882f8ee03b9e0 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-reexport-remerge.f90 @@ -0,0 +1,93 @@ +! RUN: %flang_fc1 -fopenmp -fopenmp-version=52 -fsyntax-only %s + +! Test that declare reduction resolves through a facade module that re-merges an +! already-merged operator with a further module's operator. This requires +! recursively traversing the merged generic's USE associations: the reductions +! for the first two types live in modules reached only through the inner facade. + +module m_int + type :: t_int + integer :: val = 0 + end type + interface operator(.shared.) + module procedure add_int + end interface + !$omp declare reduction(.shared.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) +contains + type(t_int) function add_int(a, b) + type(t_int), intent(in) :: a, b + add_int%val = a%val + b%val + end function +end module + +module m_real + type :: t_real + real :: val = 0.0 + end type + interface operator(.shared.) + module procedure add_real + end interface + !$omp declare reduction(.shared.:t_real:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_real(0.0)) +contains + type(t_real) function add_real(a, b) + type(t_real), intent(in) :: a, b + add_real%val = a%val + b%val + end function +end module + +module m_cplx + type :: t_cplx + complex :: val = (0.0, 0.0) + end type + interface operator(.shared.) + module procedure add_cplx + end interface + !$omp declare reduction(.shared.:t_cplx:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_cplx((0.0, 0.0))) +contains + type(t_cplx) function add_cplx(a, b) + type(t_cplx), intent(in) :: a, b + add_cplx%val = a%val + b%val + end function +end module + +! Inner facade merges .shared. from m_int and m_real. +module m_facade_inner + use m_int, only: t_int, operator(.shared.) + use m_real, only: t_real, operator(.shared.) +end module + +! Outer facade re-merges the inner (already-merged) operator with m_cplx. +module m_facade_outer + use m_facade_inner, only: t_int, t_real, operator(.shared.) + use m_cplx, only: t_cplx, operator(.shared.) +end module + +program test_reexport_remerge_reduction + use m_facade_outer + type(t_int) :: x + type(t_real) :: y + type(t_cplx) :: z + integer :: i + x = t_int(0) + y = t_real(0.0) + z = t_cplx((0.0, 0.0)) + ! t_int and t_real reach only through the inner facade (recursive traversal). + !$omp parallel do reduction(.shared.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + !$omp parallel do reduction(.shared.:y) + do i = 1, 10 + y%val = y%val + 1.0 + end do + !$omp end parallel do + !$omp parallel do reduction(.shared.:z) + do i = 1, 10 + z%val = z%val + (1.0, 0.0) + end do + !$omp end parallel do +end program diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-shadow-merged.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-shadow-merged.f90 new file mode 100644 index 0000000000000..67fa92e7e6667 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-shadow-merged.f90 @@ -0,0 +1,59 @@ +! RUN: not %flang_fc1 -fopenmp -fopenmp-version=52 %s 2>&1 | FileCheck %s + +! A local DECLARE REDUCTION for an operator is treated as authoritative for that +! operator in its scope: it shadows reductions that are otherwise reachable +! through the (merged) operator for other types. Here module `host` declares its +! own `.shared.` reduction for `t_loc` and also merges in m_int's `.shared.` +! reduction for `t_int` via USE...ONLY of the operator. Using `.shared.` on a +! `t_int` object is rejected, because the local declaration is authoritative. +! +! This is a deliberate, conservative choice. The precise OpenMP semantics for a +! local declaration coexisting with a merged-in reduction for a different type +! are not clearly specified, so the lookup never accepts a reduction that a +! local declaration might be intended to shadow (it never over-accepts). A +! future refinement could instead keep merged-in reductions for other types +! reachable; this test documents the current behavior so a change is deliberate. + +module m_int + type :: t_int + integer :: val = 0 + end type + interface operator(.shared.) + module procedure add_int + end interface + !$omp declare reduction(.shared.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) +contains + type(t_int) function add_int(a, b) + type(t_int), intent(in) :: a, b + add_int%val = a%val + b%val + end function +end module + +module host + use m_int, only: t_int, operator(.shared.) ! merge in the t_int reduction + type :: t_loc + real :: val = 0.0 + end type + interface operator(.shared.) + module procedure add_loc + end interface + !$omp declare reduction(.shared.:t_loc:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_loc(0.0)) +contains + type(t_loc) function add_loc(a, b) + type(t_loc), intent(in) :: a, b + add_loc%val = a%val + b%val + end function + subroutine s() + type(t_int) :: x + integer :: i + x = t_int(0) + !CHECK: error: The type of 'x' is incompatible with the reduction operator. + !$omp parallel do reduction(.shared.:x) + do i = 1, 10 + x%val = x%val + 1 + end do + !$omp end parallel do + end subroutine +end module diff --git a/flang/test/Semantics/OpenMP/declare-reduction-use-shadow.f90 b/flang/test/Semantics/OpenMP/declare-reduction-use-shadow.f90 new file mode 100644 index 0000000000000..dc48249ea1895 --- /dev/null +++ b/flang/test/Semantics/OpenMP/declare-reduction-use-shadow.f90 @@ -0,0 +1,41 @@ +! RUN: not %flang_fc1 -fopenmp -fopenmp-version=52 %s 2>&1 | FileCheck %s + +! A local DECLARE REDUCTION shadows a USE-associated reduction +! (ProcessReductionSpecifier erases the USE-associated symbol). The shadowed +! type must not be resurrected through the operator's source module: the local +! reduction is authoritative because the operator is not merged. + +module m_real_shadow + type :: t_real + real :: val = 0.0 + end type + interface operator(.shared.) + module procedure add_real + end interface + !$omp declare reduction(.shared.:t_real:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_real(0.0)) +contains + type(t_real) function add_real(a, b) + type(t_real), intent(in) :: a, b + add_real%val = a%val + b%val + end function +end module + +program test_shadowed_reduction + use m_real_shadow ! imports operator(.shared.) and the t_real reduction + type :: t_int + integer :: val = 0 + end type + type(t_real) :: y + integer :: i + ! Local declaration shadows the USE-associated reduction (now only t_int). + !$omp declare reduction(.shared.:t_int:omp_out%val=omp_out%val+omp_in%val) & + !$omp initializer(omp_priv=t_int(0)) + y = t_real(0.0) + !CHECK: error: The type of 'y' is incompatible with the reduction operator. + !$omp parallel do reduction(.shared.:y) + do i = 1, 10 + y%val = y%val + 1.0 + end do + !$omp end parallel do +end program From e392c53b6d41758f178671b09f47e58795cb23ee Mon Sep 17 00:00:00 2001 From: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> Date: Thu, 25 Jun 2026 11:35:32 +0530 Subject: [PATCH 457/511] [Flang][Driver]Add support for option '-fpseudo-probe-for-profiling' in flang (#205046) Added support for option `-fpseudo-probe-for-profiling` in flang. - When the option `-fpseudo-probe-for-profiling` is passed, the compiler sets the` PseudoProbeForProfiling` flag and triggers the `SampleProfileProbePass`. This pass inserts `llvm.pseudoprobe(..)` intrinsic calls and `!llvm.pseudo_probe_desc` metadata into the IR. --- clang/include/clang/Options/Options.td | 2 +- clang/lib/Driver/ToolChains/Flang.cpp | 5 +++ .../include/flang/Frontend/CodeGenOptions.def | 1 + flang/lib/Frontend/CompilerInvocation.cpp | 5 +++ flang/lib/Frontend/FrontendActions.cpp | 22 +++++++++---- .../Driver/fpseudo-probe-for-profiling.f90 | 15 +++++++++ .../pseudo-probe-for-profiling.f90 | 33 +++++++++++++++++++ 7 files changed, 75 insertions(+), 8 deletions(-) create mode 100644 flang/test/Driver/fpseudo-probe-for-profiling.f90 create mode 100644 flang/test/Integration/pseudo-probe-for-profiling.f90 diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 4fc9f4d4c3472..3c2091013d152 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -1944,7 +1944,7 @@ defm pseudo_probe_for_profiling CodeGenOpts<"PseudoProbeForProfiling">, DefaultFalse, PosFlag, NegFlag, - BothFlags<[], [ClangOption, CC1Option, CLOption], + BothFlags<[], [ClangOption, CC1Option, CLOption, FlangOption, FC1Option], " pseudo probes for sample profiling">>; def fprofile_list_EQ : Joined<["-"], "fprofile-list=">, Group, Visibility<[ClangOption, CC1Option, CLOption]>, diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp index 1d74a34583311..ea4df1db38ec8 100644 --- a/clang/lib/Driver/ToolChains/Flang.cpp +++ b/clang/lib/Driver/ToolChains/Flang.cpp @@ -1022,6 +1022,11 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, const JobAction &JA, A->render(Args, CmdArgs); } } + + //-fpseudo-probe-for-profiling + if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling, + options::OPT_fno_pseudo_probe_for_profiling, false)) + CmdArgs.push_back("-fpseudo-probe-for-profiling"); } void Flang::ConstructJob(Compilation &C, const JobAction &JA, diff --git a/flang/include/flang/Frontend/CodeGenOptions.def b/flang/include/flang/Frontend/CodeGenOptions.def index 37931c0ffecc1..a5907b6edbd97 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.def +++ b/flang/include/flang/Frontend/CodeGenOptions.def @@ -58,6 +58,7 @@ CODEGENOPT(UnrollLoops, 1, 0) ///< Enable loop unrolling CODEGENOPT(AliasAnalysis, 1, 0) ///< Enable alias analysis pass CODEGENOPT(DwarfVersion, 3, 0) ///< Dwarf version CODEGENOPT(DebugInfoForProfiling, 1, 0) ///< Emit extra debug info to make sample profile more accurate. +CODEGENOPT(PseudoProbeForProfiling, 1, 0) ///< Emit pseudo probes for sample profiling. CODEGENOPT(Underscoring, 1, 1) ENUM_CODEGENOPT(FPMaxminBehavior, Fortran::common::FPMaxminBehavior, 2, Fortran::common::FPMaxminBehavior::Legacy) diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp index ab961416441fe..0f1ace5d62667 100644 --- a/flang/lib/Frontend/CompilerInvocation.cpp +++ b/flang/lib/Frontend/CompilerInvocation.cpp @@ -488,6 +488,11 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts, opts.SampleProfileFile = args.getLastArgValue(clang::options::OPT_fprofile_sample_use_EQ); + if (args.hasFlag(clang::options::OPT_fpseudo_probe_for_profiling, + clang::options::OPT_fno_pseudo_probe_for_profiling, false)) { + opts.PseudoProbeForProfiling = 1; + } + // -mcmodel option. if (const llvm::opt::Arg *a = args.getLastArg(clang::options::OPT_mcmodel_EQ)) { diff --git a/flang/lib/Frontend/FrontendActions.cpp b/flang/lib/Frontend/FrontendActions.cpp index 426dea4869001..5fe876595d5c0 100644 --- a/flang/lib/Frontend/FrontendActions.cpp +++ b/flang/lib/Frontend/FrontendActions.cpp @@ -984,18 +984,26 @@ void CodeGenAction::runOptimizationPipeline(llvm::raw_pwrite_stream &os) { opts.ProfileInstrumentUsePath, "", opts.ProfileRemappingFile, opts.MemoryProfileUsePath, llvm::PGOOptions::IRUse, CSAction, llvm::PGOOptions::ColdFuncOpt::Default, opts.DebugInfoForProfiling); - } else if (opts.DebugInfoForProfiling) { - // -fdebug-info-for-profiling - pgoOpt = llvm::PGOOptions("", "", "", /*MemoryProfile=*/"", - llvm::PGOOptions::NoAction, - llvm::PGOOptions::NoCSAction, - llvm::PGOOptions::ColdFuncOpt::Default, true); } else if (!opts.SampleProfileFile.empty()) { pgoOpt = llvm::PGOOptions( opts.SampleProfileFile, "", opts.ProfileRemappingFile, opts.MemoryProfileUsePath, llvm::PGOOptions::SampleUse, llvm::PGOOptions::NoCSAction, llvm::PGOOptions::ColdFuncOpt::Default, - opts.DebugInfoForProfiling, /*PseudoProbeForProfiling=*/false); + opts.DebugInfoForProfiling, opts.PseudoProbeForProfiling); + } else if (opts.PseudoProbeForProfiling) { + pgoOpt = llvm::PGOOptions( + /*ProfileFile=*/"", /*CSProfileGenFile=*/"", + /*ProfileRemappingFile=*/"", + /*MemoryProfile=*/"", llvm::PGOOptions::NoAction, + llvm::PGOOptions::NoCSAction, llvm::PGOOptions::ColdFuncOpt::Default, + opts.DebugInfoForProfiling, /*PseudoProbeForProfiling=*/true); + } else if (opts.DebugInfoForProfiling) { + pgoOpt = llvm::PGOOptions(/*ProfileFile=*/"", /*CSProfileGenFile=*/"", + /*ProfileRemappingFile=*/"", /*MemoryProfile=*/"", + llvm::PGOOptions::NoAction, + llvm::PGOOptions::NoCSAction, + llvm::PGOOptions::ColdFuncOpt::Default, + /*DebugInfoForProfiling=*/true); } llvm::StandardInstrumentations si(llvmModule->getContext(), diff --git a/flang/test/Driver/fpseudo-probe-for-profiling.f90 b/flang/test/Driver/fpseudo-probe-for-profiling.f90 new file mode 100644 index 0000000000000..8d5fc72d314be --- /dev/null +++ b/flang/test/Driver/fpseudo-probe-for-profiling.f90 @@ -0,0 +1,15 @@ +! Test to check the option "-fpseudo-probe-for-profiling". + +! RUN: %flang -### %s 2>&1 | FileCheck %s --check-prefix=NO-PROBE +! RUN: %flang -### -fpseudo-probe-for-profiling %s 2>&1 | FileCheck %s --check-prefix=PROBE +! RUN: %flang -### -fno-pseudo-probe-for-profiling %s 2>&1 | FileCheck %s --check-prefix=NO-PROBE +! RUN: %flang -### -fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling %s 2>&1 | FileCheck %s --check-prefix=NO-PROBE +! RUN: %flang -### -fpseudo-probe-for-profiling -fno-pseudo-probe-for-profiling -fpseudo-probe-for-profiling %s 2>&1 | FileCheck %s --check-prefix=PROBE + +! PROBE: "-fpseudo-probe-for-profiling" +! NO-PROBE-NOT: "-fpseudo-probe-for-profiling" + +subroutine test + implicit none + print *, 1 +end subroutine test diff --git a/flang/test/Integration/pseudo-probe-for-profiling.f90 b/flang/test/Integration/pseudo-probe-for-profiling.f90 new file mode 100644 index 0000000000000..a11cb4502dcf1 --- /dev/null +++ b/flang/test/Integration/pseudo-probe-for-profiling.f90 @@ -0,0 +1,33 @@ +! Test -fpseudo-probe-for-profiling option runs SampleProfileProbePass and emits llvm.pseudoprobe intrinsic calls. +! +! RUN: %flang_fc1 -emit-llvm -fdebug-pass-manager -fpseudo-probe-for-profiling -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=PROBE-PASS +! RUN: %flang_fc1 -emit-llvm -O0 -fpseudo-probe-for-profiling -o - %s | FileCheck %s --check-prefix=PROBE +! RUN: %flang_fc1 -emit-llvm -O2 -fpseudo-probe-for-profiling -o - %s | FileCheck %s --check-prefix=PROBE + +! Test that -fdebug-info-for-profiling combined with -fpseudo-probe-for-profiling still emits pseudo-probes and debug info. +! RUN: %flang_fc1 -emit-llvm -O2 -debug-info-kind=standalone \ +! RUN: -fdebug-info-for-profiling -fpseudo-probe-for-profiling -o - %s | FileCheck %s --check-prefix=PROBE-AND-DEBUG + +! PROBE-PASS: Running pass: SampleProfileProbePass on {{.*}} + +! PROBE-LABEL: define void @foo +! PROBE: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) +! PROBE: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) +! PROBE: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) +! PROBE: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1) +! PROBE: !llvm.pseudo_probe_desc = !{ + +! PROBE-AND-DEBUG: call void @llvm.pseudoprobe +! PROBE-AND-DEBUG: !llvm.pseudo_probe_desc = !{ +! PROBE-AND-DEBUG: !DICompileUnit({{.*}}debugInfoForProfiling: true{{.*}}) +! PROBE-AND-DEBUG: !DILexicalBlockFile({{.*}}discriminator: + +subroutine foo(x) + implicit none + integer, intent(in) :: x + if (x == 0) then + call bar + else + call go + end if +end subroutine foo From 600479fe772429875082d638e25371269efbe906 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 25 Jun 2026 08:06:31 +0200 Subject: [PATCH 458/511] [clang][bytecode] Pass AccessKinds to Check{Constant,Mutable} (#205720) So we can pass them on do `diagnoseNonConstVariable`. This doesn't make a difference right now but is needed for a future commit. --- clang/lib/AST/ByteCode/Interp.cpp | 20 +++++++++++--------- clang/lib/AST/ByteCode/Interp.h | 3 ++- clang/lib/AST/ByteCode/InterpHelpers.h | 8 +++++--- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.cpp b/clang/lib/AST/ByteCode/Interp.cpp index 3772def47408f..3d5fda7ddf3c7 100644 --- a/clang/lib/AST/ByteCode/Interp.cpp +++ b/clang/lib/AST/ByteCode/Interp.cpp @@ -454,7 +454,8 @@ bool CheckLive(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return true; } -bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { +bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc, + AccessKinds AK) { assert(Desc); const auto *D = Desc->asVarDecl(); @@ -472,7 +473,7 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { bool IsConstant = T.isConstant(S.getASTContext()); if (T->isIntegralOrEnumerationType()) { if (!IsConstant) { - diagnoseNonConstVariable(S, OpPC, D); + diagnoseNonConstVariable(S, OpPC, D, AK); return false; } return true; @@ -496,22 +497,23 @@ bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc) { if (T->isPointerOrReferenceType()) { if (!T->getPointeeType().isConstant(S.getASTContext()) || !S.getLangOpts().CPlusPlus11) { - diagnoseNonConstVariable(S, OpPC, D); + diagnoseNonConstVariable(S, OpPC, D, AK); return false; } return true; } - diagnoseNonConstVariable(S, OpPC, D); + diagnoseNonConstVariable(S, OpPC, D, AK); return false; } -static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { +static bool CheckConstant(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK = AK_Read) { if (!Ptr.isStatic() || !Ptr.isBlockPointer()) return true; if (!Ptr.getDeclID()) return true; - return CheckConstant(S, OpPC, Ptr.getDeclDesc()); + return CheckConstant(S, OpPC, Ptr.getDeclDesc(), AK); } bool CheckNull(InterpState &S, CodePtr OpPC, const Pointer &Ptr, @@ -641,7 +643,7 @@ bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { return false; } -bool CheckMutable(InterpState &S, CodePtr OpPC, PtrView Ptr) { +bool CheckMutable(InterpState &S, CodePtr OpPC, PtrView Ptr, AccessKinds AK) { assert(Ptr.isLive() && "Pointer is not live"); if (!Ptr.isMutable()) return true; @@ -653,7 +655,7 @@ bool CheckMutable(InterpState &S, CodePtr OpPC, PtrView Ptr) { const SourceInfo &Loc = S.Current->getSource(OpPC); const FieldDecl *Field = Ptr.getField(); - S.FFDiag(Loc, diag::note_constexpr_access_mutable, 1) << AK_Read << Field; + S.FFDiag(Loc, diag::note_constexpr_access_mutable, 1) << AK << Field; S.Note(Field->getLocation(), diag::note_declared_at); return false; } @@ -874,7 +876,7 @@ bool CheckLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr, return CheckWeak(S, OpPC, Ptr.block()); } - if (!CheckConstant(S, OpPC, Ptr)) + if (!CheckConstant(S, OpPC, Ptr, AK)) return false; if (!CheckRange(S, OpPC, Ptr, AK)) return false; diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index d1836b6b739b2..ed640f7325f7e 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -75,7 +75,8 @@ bool CheckDowncast(InterpState &S, CodePtr OpPC, const Pointer &Ptr, bool CheckConst(InterpState &S, CodePtr OpPC, const Pointer &Ptr); /// Checks if the Descriptor is of a constexpr or const global variable. -bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc); +bool CheckConstant(InterpState &S, CodePtr OpPC, const Descriptor *Desc, + AccessKinds AK = AK_Read); bool CheckFinalLoad(InterpState &S, CodePtr OpPC, const Pointer &Ptr); diff --git a/clang/lib/AST/ByteCode/InterpHelpers.h b/clang/lib/AST/ByteCode/InterpHelpers.h index 5c6ba0eda5de3..da305dcb565d5 100644 --- a/clang/lib/AST/ByteCode/InterpHelpers.h +++ b/clang/lib/AST/ByteCode/InterpHelpers.h @@ -58,11 +58,13 @@ bool CheckRange(InterpState &S, CodePtr OpPC, const Pointer &Ptr, CheckSubobjectKind CSK); /// Checks if a pointer points to a mutable field. -bool CheckMutable(InterpState &S, CodePtr OpPC, PtrView Ptr); -inline bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr) { +bool CheckMutable(InterpState &S, CodePtr OpPC, PtrView Ptr, + AccessKinds AK = AK_Read); +inline bool CheckMutable(InterpState &S, CodePtr OpPC, const Pointer &Ptr, + AccessKinds AK = AK_Read) { if (!Ptr.isBlockPointer()) return true; - return CheckMutable(S, OpPC, Ptr.view()); + return CheckMutable(S, OpPC, Ptr.view(), AK); } /// Checks if a value can be loaded from a block. From 40dfcaa0b180adad71f868569207ff16cd12f6c8 Mon Sep 17 00:00:00 2001 From: David Green Date: Thu, 25 Jun 2026 07:42:22 +0100 Subject: [PATCH 459/511] [AArch64] Correct latency calculation in runSVEPseudoTestForCPU test. NFC (#205100) It does not look like this caused problems in the pseudo scheduling tests, but is accumulating the wrong latency. I added extra checks that the NumWriteLatencyEntries were the same in both cases whilst I was here too. --- .../Target/AArch64/AArch64SVESchedPseudoTest.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp index fcd988d66682e..9474502c805e1 100644 --- a/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp +++ b/llvm/unittests/Target/AArch64/AArch64SVESchedPseudoTest.cpp @@ -87,6 +87,13 @@ void runSVEPseudoTestForCPU(const std::string &CPU) { int Latency = 0; int LatencyOrig = 0; + ASSERT_TRUE(SCDesc->isValid()); + ASSERT_TRUE(SCDescOrig->isValid()); + // We need to handle the variant if this becomes true + ASSERT_FALSE(SCDesc->isVariant()); + ASSERT_FALSE(SCDescOrig->isVariant()); + ASSERT_EQ(SCDesc->NumWriteLatencyEntries, + SCDescOrig->NumWriteLatencyEntries); for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; DefIdx != DefEnd; ++DefIdx) { const MCWriteLatencyEntry *WLEntry = @@ -94,11 +101,11 @@ void runSVEPseudoTestForCPU(const std::string &CPU) { const MCWriteLatencyEntry *WLEntryOrig = STI.getWriteLatencyEntry(SCDescOrig, DefIdx); Latency = std::max(Latency, static_cast(WLEntry->Cycles)); - LatencyOrig = std::max(Latency, static_cast(WLEntryOrig->Cycles)); + LatencyOrig = + std::max(LatencyOrig, static_cast(WLEntryOrig->Cycles)); } ASSERT_EQ(Latency, LatencyOrig); - ASSERT_TRUE(SCDesc->isValid()); } } From 4cb5854c9ff1fe172a8a182a85a20e2004ad7e77 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jun 2026 09:10:58 +0200 Subject: [PATCH 460/511] clang: Fix referring to __builtin_amdgcn_is_processor in diagnostic (#205725) The builtin name is really __builtin_amdgcn_processor_is. --- clang/include/clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index cde99dfb16ec5..e3aa1c7bfeb5b 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -14267,7 +14267,7 @@ def warn_amdgcn_unguarded_asm_stmt InGroup, DefaultIgnore; def note_amdgcn_unguarded_asm_silence : Note<"enclose the '%0' ASM sequence in a scope controlled by a " - "__builtin_amdgcn_is_processor check to silence this warning">; + "__builtin_amdgcn_processor_is check to silence this warning">; def err_amdgcn_incompatible_builtin : Error<"%0 cannot be invoked in the current context, as it requires the " "'%1' feature(s)%select{|, which '%3' does not provide}2">; diff --git a/clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip b/clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip index 1566bc9aa0be5..345849374e6fd 100644 --- a/clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip +++ b/clang/test/SemaHIP/amdgpu-feature-predicates-guard-use.hip @@ -10,7 +10,7 @@ __device__ void g(); __device__ void f(int x, bool b) { long v15_16; __asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}} - // expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}} + // expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_processor_is check to silence this warning}} if (__builtin_amdgcn_processor_is("gfx90a")) { long v15_16; @@ -20,7 +20,7 @@ __device__ void f(int x, bool b) { if (!__builtin_amdgcn_processor_is("gfx90a")) { long v15_16; __asm volatile("v_lshlrev_b64 v[15:16], 0, %0" : "={v[15:16]}"(v15_16) : "v"(x)); // expected-warning {{the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence might be invalid for some AMDGPU targets}} - // expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_is_processor check to silence this warning}} + // expected-note@-1 {{enclose the 'v_lshlrev_b64 v[15:16], 0, $0' ASM sequence in a scope controlled by a __builtin_amdgcn_processor_is check to silence this warning}} } __builtin_amdgcn_is_invocable(__builtin_amdgcn_s_sleep_var) ? __builtin_amdgcn_s_sleep_var(x) : __builtin_trap(); From f424b74ae5c0109a1914af10e0b9a08f6c20833b Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 25 Jun 2026 09:11:33 +0200 Subject: [PATCH 461/511] [libc++] Enable clang-tidy in the CI again (#195047) clang-tidy currently doesn't run in the CI because CMake doesn't find the appropriate `ClangConfig.cmake` anymore. This adds the path so clang-tidy is built again and fixes most issues. Some things are disabled for now to get most checks back. They will be re-enabled later. --- cmake/Modules/FindLibcCommonUtils.cmake | 2 +- libcxx/include/__functional/function.h | 2 ++ libcxx/include/__locale_dir/wbuffer_convert.h | 2 ++ libcxx/include/__memory/is_sufficiently_aligned.h | 2 +- libcxx/include/__memory/unique_ptr.h | 4 ++-- libcxx/include/__tree | 4 ++-- libcxx/include/map | 12 ++++++------ libcxx/include/variant | 3 ++- libcxx/src/include/to_chars_floating_point.h | 2 +- libcxx/test/configs/llvm-libc++-llvm-libc.cfg.in | 2 +- libcxx/test/libcxx/clang_tidy.gen.py | 7 ++++++- libcxx/test/libcxx/module_std_compat.gen.py | 3 +++ .../test/tools/clang_tidy_checks/hide_from_abi.cpp | 10 ---------- libcxx/utils/ci/run-buildbot | 7 ++++++- libcxxabi/include/cxxabi.h | 2 +- .../test/configs/llvm-libc++abi-llvm-libc.cfg.in | 2 +- 16 files changed, 37 insertions(+), 29 deletions(-) diff --git a/cmake/Modules/FindLibcCommonUtils.cmake b/cmake/Modules/FindLibcCommonUtils.cmake index 81cf74fbd0d41..8f3a2738c6003 100644 --- a/cmake/Modules/FindLibcCommonUtils.cmake +++ b/cmake/Modules/FindLibcCommonUtils.cmake @@ -15,7 +15,7 @@ if(NOT TARGET llvm-libc-common-utilities) if (NOT(LIBCXX_ENABLE_THREADS)) target_compile_definitions(llvm-libc-common-utilities INTERFACE LIBC_THREAD_MODE=LIBC_THREAD_MODE_SINGLE) endif() - target_include_directories(llvm-libc-common-utilities INTERFACE ${libc_path}) + target_include_directories(llvm-libc-common-utilities SYSTEM INTERFACE ${libc_path}) target_compile_definitions(llvm-libc-common-utilities INTERFACE LIBC_NAMESPACE=__llvm_libc_common_utils) target_compile_features(llvm-libc-common-utilities INTERFACE cxx_std_17) endif() diff --git a/libcxx/include/__functional/function.h b/libcxx/include/__functional/function.h index d62b6b06722a7..90072e9528484 100644 --- a/libcxx/include/__functional/function.h +++ b/libcxx/include/__functional/function.h @@ -131,6 +131,7 @@ namespace __function { template class __base; +_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS template class __base<_Rp(_ArgTypes...)> { public: @@ -149,6 +150,7 @@ class __base<_Rp(_ArgTypes...)> { virtual const std::type_info& target_type() const _NOEXCEPT = 0; # endif // _LIBCPP_HAS_RTTI }; +_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS // __func implements __base for a given functor type. diff --git a/libcxx/include/__locale_dir/wbuffer_convert.h b/libcxx/include/__locale_dir/wbuffer_convert.h index c8bad68011a09..9c87fac6cf193 100644 --- a/libcxx/include/__locale_dir/wbuffer_convert.h +++ b/libcxx/include/__locale_dir/wbuffer_convert.h @@ -27,6 +27,7 @@ _LIBCPP_PUSH_MACROS # include <__undef_macros> _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_EXPLICIT_ABI_ANNOTATIONS template > class _LIBCPP_DEPRECATED_IN_CXX17 wbuffer_convert : public basic_streambuf<_Elem, _Tr> { @@ -419,6 +420,7 @@ wbuffer_convert<_Codecvt, _Elem, _Tr>* wbuffer_convert<_Codecvt, _Elem, _Tr>::__ _LIBCPP_SUPPRESS_DEPRECATED_POP +_LIBCPP_END_EXPLICIT_ABI_ANNOTATIONS _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS diff --git a/libcxx/include/__memory/is_sufficiently_aligned.h b/libcxx/include/__memory/is_sufficiently_aligned.h index a0ec198993afd..b8725880028db 100644 --- a/libcxx/include/__memory/is_sufficiently_aligned.h +++ b/libcxx/include/__memory/is_sufficiently_aligned.h @@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD template -[[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_sufficiently_aligned(_Tp* __ptr) { +[[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI bool __is_sufficiently_aligned(_Tp* __ptr) { return reinterpret_cast(__ptr) % _Alignment == 0; } diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h index 32f9459f9fc2c..7e68faa15dabb 100644 --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -189,7 +189,7 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI unique_ptr { #endif _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr& operator=(unique_ptr&& __u) _NOEXCEPT { - reset(__u.release()); + reset(__u.release()); // NOLINT(misc-uniqueptr-reset-release) __deleter_ = std::forward(__u.get_deleter()); return *this; } @@ -478,7 +478,7 @@ class _LIBCPP_UNIQUE_PTR_TRIVIAL_ABI unique_ptr<_Tp[], _Dp> { __checker_(std::move(__u.__checker_)) {} _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX23 unique_ptr& operator=(unique_ptr&& __u) _NOEXCEPT { - reset(__u.release()); + reset(__u.release()); // NOLINT(misc-uniqueptr-reset-release) __deleter_ = std::forward(__u.get_deleter()); __checker_ = std::move(__u.__checker_); return *this; diff --git a/libcxx/include/__tree b/libcxx/include/__tree index 94c88e273870a..5ff1387bc25d2 100644 --- a/libcxx/include/__tree +++ b/libcxx/include/__tree @@ -1257,7 +1257,7 @@ public: template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 iterator find(const _Key& __key) { - auto [__, __match] = __find_equal(__key); + auto [__dummy, __match] = __find_equal(__key); if (__match == nullptr) return end(); return iterator(std::__static_fancy_pointer_cast<__node_pointer>(__match)); @@ -1265,7 +1265,7 @@ public: template _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const_iterator find(const _Key& __key) const { - auto [__, __match] = __find_equal(__key); + auto [__dummy, __match] = __find_equal(__key); if (__match == nullptr) return end(); return const_iterator(std::__static_fancy_pointer_cast<__node_pointer>(__match)); diff --git a/libcxx/include/map b/libcxx/include/map index de0a475ee572f..8efe18fa3f839 100644 --- a/libcxx/include/map +++ b/libcxx/include/map @@ -1155,7 +1155,7 @@ public: template >, int> = 0> [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 mapped_type& at(_Arg&& __arg) { - auto [_, __child] = __tree_.__find_equal(__arg); + auto [__dummy, __child] = __tree_.__find_equal(__arg); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); return std::__static_fancy_pointer_cast<__node_pointer>(__child)->__get_value().second; @@ -1164,7 +1164,7 @@ public: template >, int> = 0> [[__nodiscard__]] _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 const mapped_type& at(_Arg&& __arg) const { - auto [_, __child] = __tree_.__find_equal(__arg); + auto [__dummy, __child] = __tree_.__find_equal(__arg); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); return std::__static_fancy_pointer_cast<__node_pointer>(__child)->__get_value().second; @@ -1581,7 +1581,7 @@ struct __specialized_algorithm<_Algorithm::__for_each, __single_range _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto operator()(_Map&& __map, _Func __func, _Proj __proj) { - auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range>()( + auto [__dummy, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range>()( __map.__tree_, std::move(__func), std::move(__proj)); return std::make_pair(__map.end(), std::move(__func2)); } @@ -1632,7 +1632,7 @@ _Tp& map<_Key, _Tp, _Compare, _Allocator>::operator[](const key_type& __k) { template _LIBCPP_CONSTEXPR_SINCE_CXX26 _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) { - auto [_, __child] = __tree_.__find_equal(__k); + auto [__dummy, __child] = __tree_.__find_equal(__k); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); return std::__static_fancy_pointer_cast<__node_pointer>(__child)->__get_value().second; @@ -1640,7 +1640,7 @@ _LIBCPP_CONSTEXPR_SINCE_CXX26 _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(cons template _LIBCPP_CONSTEXPR_SINCE_CXX26 const _Tp& map<_Key, _Tp, _Compare, _Allocator>::at(const key_type& __k) const { - auto [_, __child] = __tree_.__find_equal(__k); + auto [__dummy, __child] = __tree_.__find_equal(__k); if (__child == nullptr) std::__throw_out_of_range("map::at: key not found"); return std::__static_fancy_pointer_cast<__node_pointer>(__child)->__get_value().second; @@ -2247,7 +2247,7 @@ struct __specialized_algorithm<_Algorithm::__for_each, __single_range _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX26 static auto operator()(_Map&& __map, _Func __func, _Proj __proj) { - auto [_, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range>()( + auto [__dummy, __func2] = __specialized_algorithm<_Algorithm::__for_each, __single_range>()( __map.__tree_, std::move(__func), std::move(__proj)); return std::make_pair(__map.end(), std::move(__func2)); } diff --git a/libcxx/include/variant b/libcxx/include/variant index fd5765e54808a..8d76931e65de9 100644 --- a/libcxx/include/variant +++ b/libcxx/include/variant @@ -554,7 +554,7 @@ private: } template - using __expander = _Tp; + using __expander _LIBCPP_NODEBUG = _Tp; template _LIBCPP_HIDE_FROM_ABI static constexpr auto __make_fdiagonal_impl() { @@ -1593,6 +1593,7 @@ template < class _Visitor, class... _Vs, typename> _LIBCPP_HIDE_FROM_ABI constexpr decltype(auto) visit(_Visitor&& __visitor, _Vs&&... __vs) { using __variant_detail::__visitation::__variant; std::__throw_if_valueless(std::forward<_Vs>(__vs)...); + // NOLINTNEXTLINE(bugprone-use-after-move) __throw_if_valueless doesn't actually forward the variants return __variant::__visit_value(std::forward<_Visitor>(__visitor), std::forward<_Vs>(__vs)...); } diff --git a/libcxx/src/include/to_chars_floating_point.h b/libcxx/src/include/to_chars_floating_point.h index 01c26181697b7..8877c252d0399 100644 --- a/libcxx/src/include/to_chars_floating_point.h +++ b/libcxx/src/include/to_chars_floating_point.h @@ -38,7 +38,7 @@ namespace __itoa { inline constexpr char _Charconv_digits[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; static_assert(std::size(_Charconv_digits) == 36); -} // __itoa +} // namespace __itoa // vvvvvvvvvv DERIVED FROM corecrt_internal_fltintrn.h vvvvvvvvvv diff --git a/libcxx/test/configs/llvm-libc++-llvm-libc.cfg.in b/libcxx/test/configs/llvm-libc++-llvm-libc.cfg.in index 605a890709400..cae8a8007214e 100644 --- a/libcxx/test/configs/llvm-libc++-llvm-libc.cfg.in +++ b/libcxx/test/configs/llvm-libc++-llvm-libc.cfg.in @@ -7,7 +7,7 @@ config.substitutions.append(('%{flags}', '-pthread' + (' -isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '') )) config.substitutions.append(('%{compile_flags}', - '-nostdlibinc -I %{include-dir} -I %{target-include-dir} -I @CMAKE_BINARY_DIR@/libc/include -I %{libcxx-dir}/test/support -idirafter @LIBC_KERNEL_HEADERS@' + '-nostdlibinc -I %{include-dir} -I %{target-include-dir} -isystem @CMAKE_BINARY_DIR@/libc/include -I %{libcxx-dir}/test/support -idirafter @LIBC_KERNEL_HEADERS@' )) config.substitutions.append(('%{link_flags}', '-nodefaultlibs -nostartfiles -L %{lib-dir} -lc++ -lc++abi @CMAKE_BINARY_DIR@/libc/startup/linux/crt1.o @CMAKE_BINARY_DIR@/libc/lib/libc.a @CMAKE_BINARY_DIR@/libc/lib/libm.a -static -fno-use-cxa-atexit @CMAKE_BINARY_DIR@/compiler-rt/lib/linux/libclang_rt.builtins-x86_64.a' diff --git a/libcxx/test/libcxx/clang_tidy.gen.py b/libcxx/test/libcxx/clang_tidy.gen.py index 10dfe053f5ccb..3cedda06139e9 100644 --- a/libcxx/test/libcxx/clang_tidy.gen.py +++ b/libcxx/test/libcxx/clang_tidy.gen.py @@ -13,6 +13,9 @@ # The GCC compiler flags are not always compatible with clang-tidy. # UNSUPPORTED: gcc +# FIXME: Enable this again in C++03 +# UNSUPPORTED: c++03 + # RUN: %{python} %s %{libcxx-dir}/utils # END. @@ -20,6 +23,8 @@ sys.path.append(sys.argv[1]) from libcxx.header_information import lit_header_undeprecations, public_headers +# FIXME: Enable -Wweak-vtables again + for header in public_headers: print(f"""\ //--- {header}.sh.cpp @@ -30,7 +35,7 @@ // RUN: -header-filter=.* \\ // RUN: --config-file=%{{libcxx-dir}}/.clang-tidy \\ // RUN: --load=%{{test-tools-dir}}/clang_tidy_checks/libcxx-tidy.plugin \\ -// RUN: -- -Wweak-vtables %{{flags}} %{{compile_flags}} -fno-modules +// RUN: -- %{{flags}} %{{compile_flags}} -fno-modules #include <{header}> """) diff --git a/libcxx/test/libcxx/module_std_compat.gen.py b/libcxx/test/libcxx/module_std_compat.gen.py index 000aa29986122..947f1fb2815be 100644 --- a/libcxx/test/libcxx/module_std_compat.gen.py +++ b/libcxx/test/libcxx/module_std_compat.gen.py @@ -16,6 +16,9 @@ # to be one monolitic test. Since the test doesn't take very long it's # not a huge issue. +# FIXME: Re-enable this test when we fix the clang-tidy plugin that implements it +# UNSUPPORTED: true + # RUN: %{python} %s %{libcxx-dir}/utils import sys diff --git a/libcxx/test/tools/clang_tidy_checks/hide_from_abi.cpp b/libcxx/test/tools/clang_tidy_checks/hide_from_abi.cpp index 38bf62019599e..f05470e47d3e7 100644 --- a/libcxx/test/tools/clang_tidy_checks/hide_from_abi.cpp +++ b/libcxx/test/tools/clang_tidy_checks/hide_from_abi.cpp @@ -78,9 +78,6 @@ void hide_from_abi::registerMatchers(clang::ast_matchers::MatchFinder* finder) { isDefinition()) .bind("hide_from_abi_on_member_function"), this); - - finder->addMatcher( - cxxMethodDecl(has_hide_from_abi_attr, on_trivial).bind("hide_from_abi_on_defaulted_smf_in_trivial_class"), this); } void hide_from_abi::check(const clang::ast_matchers::MatchFinder::MatchResult& result) { @@ -99,12 +96,5 @@ void hide_from_abi::check(const clang::ast_matchers::MatchFinder::MatchResult& r call != nullptr) { diag(call->getLocation(), "_LIBCPP_HIDE_FROM_ABI or _LIBCPP_HIDE_FROM_ABI_VIRTUAL is missing"); } - - if (const auto* call = - result.Nodes.getNodeAs("hide_from_abi_on_defaulted_smf_in_trivial_class"); - call != nullptr) { - diag(call->getLocation(), - "_LIBCPP_HIDE_FROM_ABI should not be used for special member functions in trivial classes"); - } } } // namespace libcpp diff --git a/libcxx/utils/ci/run-buildbot b/libcxx/utils/ci/run-buildbot index 6376bc1d3dd5a..2ca14352b9b1b 100755 --- a/libcxx/utils/ci/run-buildbot +++ b/libcxx/utils/ci/run-buildbot @@ -119,6 +119,10 @@ function clean() { function generate-cmake-base() { step "Generating CMake" + # FIXME: This should really be set in the Dockerfile + export CMAKE_PREFIX_PATH="/opt/compiler-explorer/clang-trunk/lib/cmake" + export PATH="$PATH:/opt/compiler-explorer/clang-trunk/bin" + # We can remove -DCMAKE_INSTALL_MESSAGE=NEVER once https://gitlab.kitware.com/cmake/cmake/-/issues/26085 is fixed. cmake \ -S "${MONOREPO_ROOT}/runtimes" \ @@ -462,7 +466,8 @@ generic-llvm-libc) generate-cmake-base -C "${MONOREPO_ROOT}/libcxx/cmake/caches/Generic-llvm-libc.cmake" \ -DLIBCXX_TEST_CONFIG="llvm-libc++-llvm-libc.cfg.in" \ -DLIBCXXABI_TEST_CONFIG="llvm-libc++abi-llvm-libc.cfg.in" \ - -DLIBC_KERNEL_HEADERS="${BUILD_DIR}/linux-install/include" + -DLIBC_KERNEL_HEADERS="${BUILD_DIR}/linux-install/include" \ + -DCMAKE_EXPORT_COMPILE_COMMANDS=ON # Ensure we have the builtins archive built as we pass it in explicitly in # the test config. ninja -vC "${BUILD_DIR}" libclang_rt.builtins-x86_64.a diff --git a/libcxxabi/include/cxxabi.h b/libcxxabi/include/cxxabi.h index 2874d14a4ad4d..ff0d5d2aef47c 100644 --- a/libcxxabi/include/cxxabi.h +++ b/libcxxabi/include/cxxabi.h @@ -31,7 +31,7 @@ class _LIBCXXABI_TYPE_VIS type_info; // forward declaration #else class type_info; // forward declaration #endif -} +} // namespace std // runtime routines use C calling conventions, but are in __cxxabiv1 namespace diff --git a/libcxxabi/test/configs/llvm-libc++abi-llvm-libc.cfg.in b/libcxxabi/test/configs/llvm-libc++abi-llvm-libc.cfg.in index 88f5bd4c545ab..d46a1c6d4e58c 100644 --- a/libcxxabi/test/configs/llvm-libc++abi-llvm-libc.cfg.in +++ b/libcxxabi/test/configs/llvm-libc++abi-llvm-libc.cfg.in @@ -7,7 +7,7 @@ config.substitutions.append(('%{flags}', '-isysroot {}'.format('@CMAKE_OSX_SYSROOT@') if '@CMAKE_OSX_SYSROOT@' else '' )) config.substitutions.append(('%{compile_flags}', - '-nostdlibinc -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -I @CMAKE_BINARY_DIR@/libc/include -I %{libcxx}/test/support -I %{libcxx}/src -idirafter @LIBC_KERNEL_HEADERS@ -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' + '-nostdlibinc -I %{include} -I %{cxx-include} -I %{cxx-target-include} %{maybe-include-libunwind} -isystem @CMAKE_BINARY_DIR@/libc/include -I %{libcxx}/test/support -I %{libcxx}/src -idirafter @LIBC_KERNEL_HEADERS@ -D_LIBCPP_ENABLE_CXX17_REMOVED_UNEXPECTED_FUNCTIONS' )) config.substitutions.append(('%{link_flags}', '-nodefaultlibs -nostartfiles -L %{lib} -lc++ -lc++abi -pthread @CMAKE_BINARY_DIR@/libc/startup/linux/crt1.o @CMAKE_BINARY_DIR@/libc/lib/libc.a -static -fno-use-cxa-atexit' From 83a35c1245a54dd518ad024430790f66c384a541 Mon Sep 17 00:00:00 2001 From: Ming-Yi Lai Date: Thu, 25 Jun 2026 15:12:22 +0800 Subject: [PATCH 462/511] [RISCV][Zicfilp] Emit .note.gnu.property section for Zicfilp CFI unlabeled scheme (#141468) RISC-V Zicfilp-based CFI needs to let the linker/loader know if the binary is built with the mechanism enabled to support proper link-time/load-time management of this feature. The information is encoded as a bit in the `.note.gnu.property` section. This patch implements emitting the section for RISC-V targets when Zicfilp-based CFI with the "unlabeled" label scheme is enabled. When Clang receives the `-fcf-protection=branch` flag, which enables forward-edge CFI protection, it adds the `cf-protection-branch` attribute to the LLVM module. Additionally, if a branch label scheme is needed, another attribute (`cf-branch-label-scheme`) is added to indicate it. For RISC-V targets, the `cf-protection-branch` attribute indicates the adoption of Zicfilp-based forward-edge CFI protection, and the `cf-branch-label-scheme` attribute selects how the "label" operands of the Zicfilp `lpad` insns are produced. This patch emits the proper `.note.gnu.property` feature bit when the RISC-V backend sees that the LLVM module has the `cf-protection-branch` attribute set to `1`, and the `cf-branch-label-scheme` attribute set to `unlabeled`, so linkers/loaders know that the object file is built with Zicfilp-based forward-edge CFI protection with the "unlabeled" scheme. ----- Co-authored-by: Piyou chen --- .../llvm/TargetParser/RISCVTargetParser.h | 18 ++++++++++ llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp | 33 +++++++++++++++++-- .../CodeGen/RISCV/branch-relaxation-rv32.ll | 3 +- .../CodeGen/RISCV/branch-relaxation-rv64.ll | 3 +- llvm/test/CodeGen/RISCV/calls-cf-branch.ll | 3 +- .../test/CodeGen/RISCV/jumptable-swguarded.ll | 3 +- llvm/test/CodeGen/RISCV/lpad-setjmp.ll | 3 +- llvm/test/CodeGen/RISCV/lpad.ll | 3 +- llvm/test/CodeGen/RISCV/nest-register.ll | 3 +- .../RISCV/note-gnu-property-zicfilp-error.ll | 14 ++++++++ .../note-gnu-property-zicfilp-unlabeled.ll | 26 +++++++++++++++ .../test/CodeGen/RISCV/rv64-trampoline-cfi.ll | 3 +- llvm/test/CodeGen/RISCV/tail-calls.ll | 9 ++--- 13 files changed, 110 insertions(+), 14 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-error.ll create mode 100644 llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-unlabeled.ll diff --git a/llvm/include/llvm/TargetParser/RISCVTargetParser.h b/llvm/include/llvm/TargetParser/RISCVTargetParser.h index 0851dbb2a3899..8ded5e5575b94 100644 --- a/llvm/include/llvm/TargetParser/RISCVTargetParser.h +++ b/llvm/include/llvm/TargetParser/RISCVTargetParser.h @@ -15,6 +15,7 @@ #define LLVM_TARGETPARSER_RISCVTARGETPARSER_H #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSwitch.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Error.h" #include "llvm/Support/MathExtras.h" @@ -95,6 +96,23 @@ LLVM_ABI StringRef getCPUNameFromCPUModel(const CPUModel &Model); } // namespace RISCV +namespace RISCVCFI { +enum class ZicfilpLabelSchemeKind { + Invalid, + Unlabeled, + FuncSig, +}; + +// See clang::getCFBranchLabelSchemeFlagVal() for possible CFBranchLabelScheme. +inline ZicfilpLabelSchemeKind +getZicfilpLabelScheme(const StringRef CFBranchLabelScheme) { + return StringSwitch(CFBranchLabelScheme) + .Case("unlabeled", ZicfilpLabelSchemeKind::Unlabeled) + .Case("func-sig", ZicfilpLabelSchemeKind::FuncSig) + .Default(ZicfilpLabelSchemeKind::Invalid); +} +} // namespace RISCVCFI + namespace RISCVVType { enum VLMUL : uint8_t { LMUL_1 = 0, diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 01ee3e01e2030..7b4e2e7390058 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -1022,11 +1022,40 @@ void RISCVAsmPrinter::EmitHwasanMemaccessSymbols(Module &M) { void RISCVAsmPrinter::emitNoteGnuProperty(const Module &M) { assert(TM.getTargetTriple().isOSBinFormatELF() && "invalid binary format"); + uint32_t GnuProps = 0; if (const Metadata *const Flag = M.getModuleFlag("cf-protection-return"); + Flag && !mdconst::extract(Flag)->isZero()) + GnuProps |= ELF::GNU_PROPERTY_RISCV_FEATURE_1_CFI_SS; + + if (const Metadata *const Flag = M.getModuleFlag("cf-protection-branch"); Flag && !mdconst::extract(Flag)->isZero()) { - auto &RTS = static_cast(getTargetStreamer()); - RTS.emitNoteGnuPropertySection(ELF::GNU_PROPERTY_RISCV_FEATURE_1_CFI_SS); + using namespace llvm::RISCVISAUtils; + const Metadata *const CFBranchLabelSchemeFlag = + M.getModuleFlag("cf-branch-label-scheme"); + assert(CFBranchLabelSchemeFlag && + "cf-protection=branch should come with cf-branch-label-scheme=... " + "on RISC-V targets"); + const StringRef CFBranchLabelScheme = + cast(CFBranchLabelSchemeFlag)->getString(); + switch (llvm::RISCVCFI::getZicfilpLabelScheme(CFBranchLabelScheme)) { + case llvm::RISCVCFI::ZicfilpLabelSchemeKind::Invalid: + reportFatalInternalError("invalid RISC-V Zicfilp label scheme"); + case llvm::RISCVCFI::ZicfilpLabelSchemeKind::Unlabeled: + GnuProps |= ELF::GNU_PROPERTY_RISCV_FEATURE_1_CFI_LP_UNLABELED; + break; + case llvm::RISCVCFI::ZicfilpLabelSchemeKind::FuncSig: + // TODO: Emit the func-sig bit after the feature is implemented + reportFatalUsageError("the complete func-sig label scheme feature is not " + "implemented yet"); + break; + } } + + if (!GnuProps) + return; + + auto &RTS = static_cast(getTargetStreamer()); + RTS.emitNoteGnuPropertySection(GnuProps); } static MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym, diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation-rv32.ll b/llvm/test/CodeGen/RISCV/branch-relaxation-rv32.ll index f11b14853b7a5..97a9167ccc754 100644 --- a/llvm/test/CodeGen/RISCV/branch-relaxation-rv32.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation-rv32.ll @@ -1844,5 +1844,6 @@ tail: ret void } -;; CFBRANCH: !llvm.module.flags = !{!0} +;; CFBRANCH: !llvm.module.flags = !{!0, !1} ;; CFBRANCH: !0 = !{i32 8, !"cf-protection-branch", i32 1} +;; CFBRANCH: !1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation-rv64.ll b/llvm/test/CodeGen/RISCV/branch-relaxation-rv64.ll index 47a32bc2835f0..c4f2911a0699a 100644 --- a/llvm/test/CodeGen/RISCV/branch-relaxation-rv64.ll +++ b/llvm/test/CodeGen/RISCV/branch-relaxation-rv64.ll @@ -1848,5 +1848,6 @@ tail: ret void } -;; CFBRANCH: !llvm.module.flags = !{!0} +;; CFBRANCH: !llvm.module.flags = !{!0, !1} ;; CFBRANCH: !0 = !{i32 8, !"cf-protection-branch", i32 1} +;; CFBRANCH: !1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/calls-cf-branch.ll b/llvm/test/CodeGen/RISCV/calls-cf-branch.ll index 0ddde771a1f0c..fe40281fe4301 100644 --- a/llvm/test/CodeGen/RISCV/calls-cf-branch.ll +++ b/llvm/test/CodeGen/RISCV/calls-cf-branch.ll @@ -236,5 +236,6 @@ define fastcc void @fastcc_call_nonfastcc(){ ret void } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll b/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll index cfa04caf7ceff..f10fae3edf44e 100644 --- a/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll +++ b/llvm/test/CodeGen/RISCV/jumptable-swguarded.ll @@ -103,5 +103,6 @@ exit: ret void } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/lpad-setjmp.ll b/llvm/test/CodeGen/RISCV/lpad-setjmp.ll index 70d34194edd70..44b2206b3aa64 100644 --- a/llvm/test/CodeGen/RISCV/lpad-setjmp.ll +++ b/llvm/test/CodeGen/RISCV/lpad-setjmp.ll @@ -107,5 +107,6 @@ define i32 @test_indirect_returns_twice(ptr %fptr) { attributes #0 = { returns_twice } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/lpad.ll b/llvm/test/CodeGen/RISCV/lpad.ll index 468e18cad0e3a..6c01215cc19a8 100644 --- a/llvm/test/CodeGen/RISCV/lpad.ll +++ b/llvm/test/CodeGen/RISCV/lpad.ll @@ -364,5 +364,6 @@ define i32 @test_returns_twice() { ret i32 %call } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/nest-register.ll b/llvm/test/CodeGen/RISCV/nest-register.ll index aaf8be4d2d8a1..7f2edb90619d7 100644 --- a/llvm/test/CodeGen/RISCV/nest-register.ll +++ b/llvm/test/CodeGen/RISCV/nest-register.ll @@ -53,6 +53,7 @@ define ptr @nest_caller(ptr %arg) nounwind { } ; LP64E-ZICFILP: LLVM ERROR: Nested functions with control flow protection are not usable with ILP32E or LP64E ABI. -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-error.ll b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-error.ll new file mode 100644 index 0000000000000..b6acff5911a82 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-error.ll @@ -0,0 +1,14 @@ +; RUN: sed 's/SCHEME/func-sig/' %s | not llc --mtriple=riscv32 -o /dev/null 2>&1 | FileCheck %s --check-prefix=FUNC-SIG +; RUN: sed 's/SCHEME/func-sig/' %s | not llc --mtriple=riscv64 -o /dev/null 2>&1 | FileCheck %s --check-prefix=FUNC-SIG + +; RUN: sed 's/SCHEME/bogus/' %s | not --crash llc --mtriple=riscv32 -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID +; RUN: sed 's/SCHEME/bogus/' %s | not --crash llc --mtriple=riscv64 -o /dev/null 2>&1 | FileCheck %s --check-prefix=INVALID + +; FUNC-SIG: LLVM ERROR: the complete func-sig label scheme feature is not implemented yet + +; INVALID: LLVM ERROR: invalid RISC-V Zicfilp label scheme + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"SCHEME"} diff --git a/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-unlabeled.ll b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-unlabeled.ll new file mode 100644 index 0000000000000..e48d1b2cd86d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/note-gnu-property-zicfilp-unlabeled.ll @@ -0,0 +1,26 @@ +; RUN: llc --mtriple=riscv32 --filetype=obj -o - %s | llvm-readelf -n - | FileCheck --check-prefixes=READELF %s +; RUN: llc --mtriple=riscv64 --filetype=obj -o - %s | llvm-readelf -n - | FileCheck --check-prefixes=READELF %s +; RUN: llc --mtriple=riscv32 -o - %s | FileCheck --check-prefixes=ASM,ASM32 %s +; RUN: llc --mtriple=riscv64 -o - %s | FileCheck --check-prefixes=ASM,ASM64 %s + +; READELF: Properties: RISC-V feature: ZICFILP-unlabeled + +; ASM: .section ".note.GNU-stack","",@progbits +; ASM-NEXT: .section .note.gnu.property,"a",@note +; ASM32-NEXT: .p2align 2, 0x0 +; ASM64-NEXT: .p2align 3, 0x0 +; ASM-NEXT: .word 4 +; ASM32-NEXT: .word 12 +; ASM64-NEXT: .word 16 +; ASM-NEXT: .word 5 +; ASM-NEXT: .asciz "GNU" +; ASM-NEXT: .word 3221225472 +; ASM-NEXT: .word 4 +; ASM-NEXT: .word 1 +; ASM32-NEXT: .p2align 2, 0x0 +; ASM64-NEXT: .p2align 3, 0x0 + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/rv64-trampoline-cfi.ll b/llvm/test/CodeGen/RISCV/rv64-trampoline-cfi.ll index 06a818516c149..80a727d2f3acb 100644 --- a/llvm/test/CodeGen/RISCV/rv64-trampoline-cfi.ll +++ b/llvm/test/CodeGen/RISCV/rv64-trampoline-cfi.ll @@ -92,6 +92,7 @@ define i64 @test0(i64 %n, ptr %p) nounwind { ret i64 %ret } -!llvm.module.flags = !{!0} +!llvm.module.flags = !{!0, !1} !0 = !{i32 8, !"cf-protection-branch", i32 1} +!1 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 4d923cbabd11d..dc42ce296abce 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s ; RUN: llc -mtriple riscv32-unknown-elf -o - %s | FileCheck %s -; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15}/' -e 's/^;CF: //' %s \ +; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15, !16}/' -e 's/^;CF: //' %s \ ; RUN: | llc -mtriple=riscv32 -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=CHECK-CF-RV32 %s -; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15}/' -e 's/^;CF: //' %s \ +; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15, !16}/' -e 's/^;CF: //' %s \ ; RUN: | llc -mtriple=riscv64 -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=CHECK-CF-RV64 %s -; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15}/' -e 's/^;CF: //' %s \ +; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15, !16}/' -e 's/^;CF: //' %s \ ; RUN: | llc -code-model=large -mtriple=riscv32 -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=CHECK-CF-RV32-LARGE %s -; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15}/' -e 's/^;CF: //' %s \ +; RUN: sed -e 's/!llvm.module.flags = !{!0}/!llvm.module.flags = !{!0, !15, !16}/' -e 's/^;CF: //' %s \ ; RUN: | llc -code-model=large -mtriple=riscv64 -verify-machineinstrs \ ; RUN: | FileCheck -check-prefix=CHECK-CF-RV64-LARGE %s @@ -1400,3 +1400,4 @@ return: ; preds = %if.else8, %if.then6 !13 = !{i32 999999, i64 1, i32 2} !14 = !{!"function_entry_count", i64 0} ;CF: !15 = !{i32 8, !"cf-protection-branch", i32 1} +;CF: !16 = !{i32 1, !"cf-branch-label-scheme", !"unlabeled"} From c133955cab68344d0c44b44360749749d0dc0f6c Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 25 Jun 2026 09:12:51 +0200 Subject: [PATCH 463/511] [ADT][NFC] Use is_constructible instead of implementing it ourselves (#205341) --- llvm/include/llvm/ADT/iterator_range.h | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h index cc8070eb4d528..70f1add462104 100644 --- a/llvm/include/llvm/ADT/iterator_range.h +++ b/llvm/include/llvm/ADT/iterator_range.h @@ -20,7 +20,6 @@ #include "llvm/ADT/ADL.h" #include -#include namespace llvm { @@ -32,10 +31,6 @@ template class iterator_range { IteratorT begin_iterator, end_iterator; - template - using explicitly_converted_t = decltype(static_cast( - std::declval>())); - public: #if defined(__GNUC__) && \ (__GNUC__ == 7 || (__GNUC__ == 8 && __GNUC_MINOR__ < 4)) @@ -43,9 +38,11 @@ class iterator_range { // See https://github.com/llvm/llvm-project/issues/63843 template #else - template , IteratorT>> * = nullptr> + template < + typename Container, + std::enable_if_t &&>, + int> = 0> #endif iterator_range(Container &&c) : begin_iterator(adl_begin(c)), end_iterator(adl_end(c)) { From 4bfb8755a0aa416c1660d644ec2592e1aa17c27d Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 25 Jun 2026 09:22:23 +0200 Subject: [PATCH 464/511] clang/AMDGPU: Simplify cpu name checks for __builtin_amdgcn_is_processor (#205724) Instead of trying to figure out which TargetInfo to use, skip it and directly use the source of truth from TargetParser. This avoids regressions in future commits where isValidCPUName will be conditionally filtered. --- clang/lib/Sema/SemaAMDGPU.cpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index 29442617b6a13..bd9e7e7b71ed6 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -800,20 +800,16 @@ Expr *SemaAMDGPU::ExpandAMDGPUPredicateBuiltIn(Expr *E) { StringRef N = GFX->getString(); const TargetInfo &TI = Ctx.getTargetInfo(); - const TargetInfo *AuxTI = Ctx.getAuxTargetInfo(); - if (!TI.isValidCPUName(N) && (!AuxTI || !AuxTI->isValidCPUName(N))) { + if (llvm::AMDGPU::parseArchAMDGCN(N) == llvm::AMDGPU::GK_NONE) { Diag(Loc, diag::err_amdgcn_processor_is_arg_invalid_value) << N; - SmallVector ValidList; - if (TI.getTriple().getVendor() == llvm::Triple::VendorType::AMD) - TI.fillValidCPUList(ValidList); - else if (AuxTI) // Since the BI is present it must be an AMDGPU triple. - AuxTI->fillValidCPUList(ValidList); + SmallVector ValidList; + llvm::AMDGPU::fillValidArchListAMDGCN(ValidList); if (!ValidList.empty()) Diag(Loc, diag::note_amdgcn_processor_is_valid_options) << llvm::join(ValidList, ", "); return nullptr; } - if (Ctx.getTargetInfo().getTriple().isSPIRV()) { + if (TI.getTriple().isSPIRV()) { CE->setType(BoolTy); return *ExpandedPredicates.insert(CE).first; } From 007557da9a7973242352e07eb2cd3b5ab2a77754 Mon Sep 17 00:00:00 2001 From: William Tran-Viet Date: Thu, 25 Jun 2026 03:31:23 -0400 Subject: [PATCH 465/511] [libc++] Add implementations of `__get_locale_encoding(...)` to Fuschia and LLVM-Libc (#205716) - These were missed in #141312 and consequently broke the Fuschia and AMDGPU llvm-libc build bots. - Fuschia seems to have support for `nl_langinfo_l(...)`, so we can use that. - However, LLVM-libc does not implement it, so for now we can do a dummy implementation and mark `environment()` and `encoding()` as unsupported on LLVM libc. --- libcxx/include/__locale_dir/support/fuchsia.h | 5 +++++ libcxx/include/__locale_dir/support/llvm_libc.h | 2 ++ libcxx/test/libcxx/text/text_encoding/environment.pass.cpp | 1 + .../text_encoding/text_encoding.members/environment.pass.cpp | 1 + 4 files changed, 9 insertions(+) diff --git a/libcxx/include/__locale_dir/support/fuchsia.h b/libcxx/include/__locale_dir/support/fuchsia.h index 528bfeb0cb6e1..1fe292a908a9c 100644 --- a/libcxx/include/__locale_dir/support/fuchsia.h +++ b/libcxx/include/__locale_dir/support/fuchsia.h @@ -15,6 +15,7 @@ #include #include #include +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -69,6 +70,10 @@ inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t& __loc) { return std::localeconv(); } +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t __loc) { + return ::nl_langinfo_l(CODESET, __loc); +} + // // Other functions // diff --git a/libcxx/include/__locale_dir/support/llvm_libc.h b/libcxx/include/__locale_dir/support/llvm_libc.h index 1ce1c443d87ed..c494f07f4f1f4 100644 --- a/libcxx/include/__locale_dir/support/llvm_libc.h +++ b/libcxx/include/__locale_dir/support/llvm_libc.h @@ -47,6 +47,8 @@ inline _LIBCPP_HIDE_FROM_ABI char* __setlocale(int __category, char const* __loc } inline _LIBCPP_HIDE_FROM_ABI __lconv_t* __localeconv(__locale_t&) { return std::localeconv(); } + +inline _LIBCPP_HIDE_FROM_ABI const char* __get_locale_encoding(__locale_t) { return nullptr; } #endif // _LIBCPP_BUILDING_LIBRARY } // namespace __locale diff --git a/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp b/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp index b5c88c6e6d568..0bcabb235fc8b 100644 --- a/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp +++ b/libcxx/test/libcxx/text/text_encoding/environment.pass.cpp @@ -14,6 +14,7 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: windows, android // UNSUPPORTED: availability-te-environment-missing +// UNSUPPORTED: LLVM-LIBC-FIXME // std::text_encoding::environment() diff --git a/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp b/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp index 113b151765c81..8907f72b55140 100644 --- a/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp +++ b/libcxx/test/std/text/text_encoding/text_encoding.members/environment.pass.cpp @@ -11,6 +11,7 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: availability-te-environment-missing +// UNSUPPORTED: LLVM-LIBC-FIXME // From d994717ffda57f1a65f2b65f52449da579e6cf54 Mon Sep 17 00:00:00 2001 From: Zihan Qin Date: Thu, 25 Jun 2026 15:31:37 +0800 Subject: [PATCH 466/511] [clang-tidy] Guard `readability-identifier-naming` recursion in dependent base lookup (#204913) Prevent `readability-identifier-naming` from recursing indefinitely in dependent base lookup when AggressiveDependentMemberLookup` is enabled. In #204790, `findDeclInBases()` maps a dependent template base back to the primary template, creating a recursive cycle that crashes clang-tidy. Add a recursion guard to stop the crash. A more complete fix could be explored separately. Add a regression test covering the dependent base cycle reproducer. Closes https://github.com/llvm/llvm-project/issues/204790. --------- Co-authored-by: EugeneZelenko --- .../utils/RenamerClangTidyCheck.cpp | 20 +++++++++++++++---- clang-tools-extra/docs/ReleaseNotes.rst | 3 +++ .../identifier-naming-member-decl-usage.cpp | 15 ++++++++++++++ 3 files changed, 34 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp index cb7ef19827675..bfdcd381b4122 100644 --- a/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp @@ -17,6 +17,7 @@ #include "clang/Lex/Preprocessor.h" #include "llvm/ADT/DenseMapInfo.h" #include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/ScopeExit.h" #include #define DEBUG_TYPE "clang-tidy" @@ -118,6 +119,8 @@ static const NamedDecl *getFailureForNamedDecl(const NamedDecl *ND) { return ND; } +using RecursionProtectionSet = llvm::SmallPtrSet; + /// Returns a decl matching the \p DeclName in \p Parent or one of its base /// classes. If \p AggressiveTemplateLookup is `true` then it will check /// template dependent base classes as well. @@ -125,9 +128,17 @@ static const NamedDecl *getFailureForNamedDecl(const NamedDecl *ND) { /// flag indicating the multiple resolutions. static NameLookup findDeclInBases(const CXXRecordDecl &Parent, StringRef DeclName, - bool AggressiveTemplateLookup) { + bool AggressiveTemplateLookup, + RecursionProtectionSet &Visited) { if (!Parent.hasDefinition()) return NameLookup(nullptr); + + const auto *Definition = Parent.getDefinition(); + if (!Visited.insert(Definition).second) + return NameLookup(nullptr); + auto RemoveFromVisited = + llvm::scope_exit([&Visited, Definition] { Visited.erase(Definition); }); + if (const NamedDecl *InClassRef = findDecl(Parent, DeclName)) return NameLookup(InClassRef); const NamedDecl *Found = nullptr; @@ -144,8 +155,8 @@ static NameLookup findDeclInBases(const CXXRecordDecl &Parent, } if (!Record) continue; - if (auto Search = - findDeclInBases(*Record, DeclName, AggressiveTemplateLookup)) { + if (auto Search = findDeclInBases(*Record, DeclName, + AggressiveTemplateLookup, Visited)) { if (*Search) { if (Found) return NameLookup( @@ -301,8 +312,9 @@ class RenamerClangTidyVisitor return true; const StringRef DependentName = DeclName.getAsIdentifierInfo()->getName(); + RecursionProtectionSet Visited; if (const NameLookup Resolved = findDeclInBases( - *Base, DependentName, AggressiveDependentMemberLookup)) { + *Base, DependentName, AggressiveDependentMemberLookup, Visited)) { if (*Resolved) Check->addUsage(*Resolved, DepMemberRef->getMemberNameInfo().getSourceRange(), SM); diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 4a5863be59fe3..81e5de4e0a868 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -801,6 +801,9 @@ Changes in existing checks - Fixed a false positive where function templates could be diagnosed as generic identifiers when `DefaultCase` was enabled. + - Fixed a crash in dependent base lookup when + `AggressiveDependentMemberLookup` option is enabled. + - Improved :doc:`readability-implicit-bool-conversion ` check: diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-member-decl-usage.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-member-decl-usage.cpp index 61c3aeb043934..ce7065dc0643e 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-member-decl-usage.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/identifier-naming-member-decl-usage.cpp @@ -216,3 +216,18 @@ struct Derived : DependentBase { }; } // namespace unresolved_dependance + +namespace dependent_base_cycle { +template +struct CycleBase; + +template +struct CycleBase { + int Value; +}; + +template +struct CycleBase : CycleBase { + CycleBase() { this->Value; } +}; +} // namespace dependent_base_cycle From 263c84961f4448ce7dd02147986965f3ead6d719 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Thu, 25 Jun 2026 09:34:38 +0200 Subject: [PATCH 467/511] [TTI][Inline] Define inlining behavior in SubtargetFeature (#205348) This allows specifying the InliningBehavior as part of SubtargetFeature. The behavior is one of InlineDefault, InlineInverse, and InlineIgnore. * InlineDefault: A callee with this feature enabled cannot be inlined into a caller without it enabled. * InlineInverse: A callee without this feature enabled cannot be inlined into a caller with it enabled. * InlineIgnore: Arbitrary differences are allowed. The default areInlineCompatible() implementation respects these now. The intention behind this change is that it reduces the risk of forgetting to update feature lists in TTI when adding a new feature, e.g. when adding a new tuning flag that should be InlineIgnore. Move X86 and AMDGPU ignored features, and the one inverse feature on AArch64 to TableGen. There is various followup work to do here, e.g. currently tuning flags on AArch64 are not ignored (and on AMDGPU only some are ignored). --- llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 +- .../llvm/CodeGen/TargetSubtargetInfo.h | 6 + llvm/include/llvm/Target/Target.td | 18 +- llvm/lib/Target/AArch64/AArch64Features.td | 5 +- .../AArch64/AArch64TargetTransformInfo.cpp | 18 +- .../AArch64/AArch64TargetTransformInfo.h | 2 - llvm/lib/Target/AMDGPU/AMDGPU.td | 49 +++-- .../AMDGPU/AMDGPUTargetTransformInfo.cpp | 27 +-- llvm/lib/Target/X86/X86.td | 206 ++++++++++++------ .../lib/Target/X86/X86TargetTransformInfo.cpp | 6 +- llvm/lib/Target/X86/X86TargetTransformInfo.h | 76 ------- .../TableGen/aarch64-apple-tuning-features.td | 11 + llvm/unittests/CodeGen/MFCommon.inc | 10 + llvm/utils/TableGen/SubtargetEmitter.cpp | 26 +++ 14 files changed, 258 insertions(+), 216 deletions(-) diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h index d64cda740a77f..8c4df6664cf1d 100644 --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -397,10 +397,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { const Function *Callee) const override { const TargetMachine &TM = getTLI()->getTargetMachine(); - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); + const TargetSubtargetInfo *CallerSTI = TM.getSubtargetImpl(*Caller); + const TargetSubtargetInfo *CalleeSTI = TM.getSubtargetImpl(*Callee); + FeatureBitset InlineIgnoreFeatures = CallerSTI->getInlineIgnoreFeatures(); + FeatureBitset InlineInverseFeatures = CallerSTI->getInlineInverseFeatures(); + FeatureBitset CallerBits = + (CallerSTI->getFeatureBits() ^ InlineInverseFeatures) & + ~InlineIgnoreFeatures; + FeatureBitset CalleeBits = + (CalleeSTI->getFeatureBits() ^ InlineInverseFeatures) & + ~InlineIgnoreFeatures; // Inline a callee if its target-features are a subset of the callers // target-features. diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h index 6f95f0fea6441..fe71a17146184 100644 --- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -364,6 +364,12 @@ class LLVM_ABI TargetSubtargetInfo : public MCSubtargetInfo { } virtual bool isRegisterReservedByUser(Register R) const { return false; } + + /// Target features to ignore for inline compatibility check. + virtual const FeatureBitset &getInlineIgnoreFeatures() const = 0; + /// Target features where the callee may have an additional feature, + /// instead of the caller. + virtual const FeatureBitset &getInlineInverseFeatures() const = 0; }; } // end namespace llvm diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td index a02a4209f86fe..455b9f40f8b72 100644 --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -522,11 +522,25 @@ class DwarfRegAlias { Register DwarfAlias = reg; } +// Determine whether inlining is allowed if caller and callee differ in whether +// a feature is enabled. +class InlineBehavior; + +// Arbitrary differences are allowed. +def InlineIgnore : InlineBehavior; +// A callee with this feature enabled cannot be inlined into a caller without +// it enabled. +def InlineDefault : InlineBehavior; +// A callee without this feature enabled cannot be inlined into a caller with +// it enabled. +def InlineInverse : InlineBehavior; + //===----------------------------------------------------------------------===// // SubtargetFeature - A characteristic of the chip set. // class SubtargetFeature i = []> { + list i = [], + InlineBehavior ib = InlineDefault> { // Name - Feature name. Used by command line (-mattr=) to determine the // appropriate target chip. // @@ -555,6 +569,8 @@ class SubtargetFeature Implies = !sort(f, i, f.Name); + + InlineBehavior InlineBehavior = ib; } /// Specifies a Subtarget feature that this instruction is deprecated on. diff --git a/llvm/lib/Target/AArch64/AArch64Features.td b/llvm/lib/Target/AArch64/AArch64Features.td index e63f4886a1256..c351a92f2f673 100644 --- a/llvm/lib/Target/AArch64/AArch64Features.td +++ b/llvm/lib/Target/AArch64/AArch64Features.td @@ -703,10 +703,13 @@ def FeatureStrictAlign : SubtargetFeature<"strict-align", "Disallow all unaligned memory " "access">; +// +execute-only callee can be inlined into caller without +execute-only, +// but not vice versa. def FeatureExecuteOnly : SubtargetFeature<"execute-only", "GenExecuteOnly", "true", "Enable the generation of " - "execute only code.">; + "execute only code.", + [], InlineInverse>; foreach i = {1-7,9-15,18,20-28} in def FeatureReserveX#i : SubtargetFeature<"reserve-x"#i, "ReserveXRegister["#i#"]", "true", diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index f8d2006cbd7de..57ae3c75c07fa 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -276,10 +276,6 @@ bool AArch64TTIImpl::isMultiversionedFunction(const Function &F) const { return F.hasFnAttribute("fmv-features"); } -const FeatureBitset AArch64TTIImpl::InlineInverseFeatures = { - AArch64::FeatureExecuteOnly, -}; - bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, const Function *Callee) const { SMECallAttrs CallAttrs(*Caller, *Callee); @@ -308,19 +304,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, return false; } - const TargetMachine &TM = getTLI()->getTargetMachine(); - const FeatureBitset &CallerBits = - TM.getSubtargetImpl(*Caller)->getFeatureBits(); - const FeatureBitset &CalleeBits = - TM.getSubtargetImpl(*Callee)->getFeatureBits(); - // Adjust the feature bitsets by inverting some of the bits. This is needed - // for target features that represent restrictions rather than capabilities, - // for example a "+execute-only" callee can be inlined into a caller without - // "+execute-only", but not vice versa. - FeatureBitset EffectiveCallerBits = CallerBits ^ InlineInverseFeatures; - FeatureBitset EffectiveCalleeBits = CalleeBits ^ InlineInverseFeatures; - - return (EffectiveCallerBits & EffectiveCalleeBits) == EffectiveCalleeBits; + return BaseT::areInlineCompatible(Caller, Callee); } bool AArch64TTIImpl::areTypesABICompatible(const Function *Caller, diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h index 7f07b39bd9ee5..5df5f22793cc8 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h @@ -49,8 +49,6 @@ class AArch64TTIImpl final : public BasicTTIImplBase { const AArch64Subtarget *ST; const AArch64TargetLowering *TLI; - static const FeatureBitset InlineInverseFeatures; - const AArch64Subtarget *getST() const { return ST; } const AArch64TargetLowering *getTLI() const { return TLI; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td index 705e76ebeb7e5..e73f87c17e515 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -53,12 +53,14 @@ multiclass AMDGPUSubtargetFeature Deps = []> { + list Deps = [], + InlineBehavior IB = InlineDefault> { def Feature#NAME : SubtargetFeature; if GenPredicate then @@ -72,7 +74,7 @@ multiclass AMDGPUSubtargetFeature; defm FastDenormalF32 : AMDGPUSubtargetFeature<"fast-denormal-f32", @@ -87,7 +89,7 @@ defm MIMG_R128 : AMDGPUSubtargetFeature<"mimg-r128", defm HalfRate64Ops : AMDGPUSubtargetFeature<"half-rate-64-ops", "Most fp64 instructions are half rate instead of quarter", - /*GenPredicate=*/0 + /*GenPredicate=*/0, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm FullRate64Ops : AMDGPUSubtargetFeature<"full-rate-64-ops", @@ -144,11 +146,12 @@ defm UnalignedBufferAccess : AMDGPUSubtargetFeature<"unaligned-buffer-access", defm TrapHandler: AMDGPUSubtargetFeature<"trap-handler", "Trap handler support", - /*GenPredicate=*/0 + /*GenPredicate=*/0, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm UnalignedScratchAccess : AMDGPUSubtargetFeature<"unaligned-scratch-access", - "Support unaligned scratch loads and stores" + "Support unaligned scratch loads and stores", + /*GenPredicate=*/1, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm UnalignedDSAccess : AMDGPUSubtargetFeature<"unaligned-ds-access", @@ -204,14 +207,16 @@ defm Minimum3Maximum3PKF16 : AMDGPUSubtargetFeature<"minimum3-maximum3-pkf16", def FeatureSupportsXNACK : SubtargetFeature<"xnack-support", "SupportsXNACK", "true", - "Hardware supports XNACK" + "Hardware supports XNACK", + [], InlineIgnore >; defm XNACKOnOffModes : AMDGPUSubtargetFeature<"xnack-on-off-modes", "Target supports XNACK on/off modes", /*GenPredicate=*/1, /*GenAssemblerPredicate=*/0, - [FeatureSupportsXNACK] + [FeatureSupportsXNACK], + InlineIgnore >; // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support @@ -225,7 +230,8 @@ def FeatureXNACK : SubtargetFeature<"xnack", "EnableXNACK", "true", "Enable XNACK support", - [FeatureSupportsXNACK] + [FeatureSupportsXNACK], + InlineIgnore >; def FeatureTgSplit : SubtargetFeature<"tgsplit", @@ -245,7 +251,8 @@ def FeaturePreciseMemory "true", "Enable precise memory mode">; defm SGPRInitBug : AMDGPUSubtargetFeature<"sgpr-init-bug", - "VI SGPR initialization bug requiring a fixed SGPR allocation size" + "VI SGPR initialization bug requiring a fixed SGPR allocation size", + /*GenPredicate=*/1, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm UserSGPRInit16Bug : AMDGPUSubtargetFeature<"user-sgpr-init16-bug", @@ -1033,10 +1040,13 @@ defm DefaultComponentBroadcast : AMDGPUSubtargetFeature<"default-component-broad "BUFFER/IMAGE store instructions set unspecified components to x component (GFX12)" >; +// The default assumption needs to be ecc is enabled, but no directly +// exposed operations depend on it, so it can be safely inlined. def FeatureSupportsSRAMECC : SubtargetFeature<"sramecc-support", "SupportsSRAMECC", "true", - "Hardware supports SRAMECC" + "Hardware supports SRAMECC", + [], InlineIgnore >; def FeatureSRAMECC : SubtargetFeature<"sramecc", @@ -1243,7 +1253,8 @@ def FeatureDumpCodeLower : SubtargetFeature <"dumpcode", def FeatureEnableLoadStoreOpt : SubtargetFeature <"load-store-opt", "EnableLoadStoreOpt", "true", - "Enable SI load/store optimizer pass" + "Enable SI load/store optimizer pass", + [], InlineIgnore >; // Performance debugging feature. Allow using DS instruction immediate @@ -1255,13 +1266,15 @@ def FeatureEnableUnsafeDSOffsetFolding : SubtargetFeature < "unsafe-ds-offset-folding", "EnableUnsafeDSOffsetFolding", "true", - "Force using DS instruction immediate offsets on SI" + "Force using DS instruction immediate offsets on SI", + [], InlineIgnore >; def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "EnableSIScheduler", "true", - "Enable SI Machine Scheduler" + "Enable SI Machine Scheduler", + [], InlineIgnore >; def FeatureEnableDS128 : SubtargetFeature<"enable-ds128", @@ -1289,12 +1302,13 @@ def FeatureEnablePRTStrictNull : SubtargetFeature<"enable-prt-strict-null", def FeatureUseFlatForGlobal : SubtargetFeature<"flat-for-global", "UseFlatForGlobal", "true", - "Force to generate flat instruction for global" + "Force to generate flat instruction for global", + [], InlineIgnore >; defm AutoWaitcntBeforeBarrier : AMDGPUSubtargetFeature <"auto-waitcnt-before-barrier", "Hardware automatically inserts waitcnt before barrier", - /*GenPredicate=*/0 + /*GenPredicate=*/0, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm BackOffBarrier : AMDGPUSubtargetFeature <"back-off-barrier", @@ -1316,7 +1330,8 @@ defm KernargPreload : AMDGPUSubtargetFeature <"kernarg-preload", // SH_MEM_CONFIG.alignment_mode defm UnalignedAccessMode : AMDGPUSubtargetFeature<"unaligned-access-mode", "Enable unaligned global, local and region loads and stores if the hardware" - " supports it" + " supports it", + /*GenPredicate=*/1, /*GenAssemblerPredicate=*/1, [], InlineIgnore >; defm PackedTID : AMDGPUSubtargetFeature<"packed-tid", diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index fe66a1a5d7242..40227c496a589 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -283,26 +283,6 @@ uint64_t AMDGPUTTIImpl::getMaxMemIntrinsicInlineSizeThreshold() const { return 1024; } -const FeatureBitset GCNTTIImpl::InlineFeatureIgnoreList = { - // Codegen control options which don't matter. - AMDGPU::FeatureEnableLoadStoreOpt, AMDGPU::FeatureEnableSIScheduler, - AMDGPU::FeatureEnableUnsafeDSOffsetFolding, AMDGPU::FeatureUseFlatForGlobal, - AMDGPU::FeatureUnalignedScratchAccess, AMDGPU::FeatureUnalignedAccessMode, - - AMDGPU::FeatureAutoWaitcntBeforeBarrier, - - // Property of the kernel/environment which can't actually differ. - AMDGPU::FeatureSGPRInitBug, AMDGPU::FeatureXNACK, - AMDGPU::FeatureXNACKOnOffModes, AMDGPU::FeatureSupportsXNACK, - AMDGPU::FeatureTrapHandler, - - // The default assumption needs to be ecc is enabled, but no directly - // exposed operations depend on it, so it can be safely inlined. - AMDGPU::FeatureSRAMECC, - - // Perf-tuning features - AMDGPU::FeatureFastFMAF32, AMDGPU::FeatureHalfRate64Ops}; - GCNTTIImpl::GCNTTIImpl(const AMDGPUTargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(static_cast(TM->getSubtargetImpl(F))), @@ -1569,12 +1549,7 @@ bool GCNTTIImpl::areInlineCompatible(const Function *Caller, const GCNSubtarget *CalleeST = static_cast(TM.getSubtargetImpl(*Callee)); - const FeatureBitset &CallerBits = CallerST->getFeatureBits(); - const FeatureBitset &CalleeBits = CalleeST->getFeatureBits(); - - FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; - FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; - if ((RealCallerBits & RealCalleeBits) != RealCalleeBits) + if (!BaseT::areInlineCompatible(Caller, Callee)) return false; // FIXME: dx10_clamp can just take the caller setting, but there seems to be diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 7551966cb8e15..9e833344e7341 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -44,8 +44,10 @@ def FeatureReserveEDI : SubtargetFeature<"reserve-edi", "ReservedRReg[X86::EDI]" def FeatureX87 : SubtargetFeature<"x87","HasX87", "true", "Enable X87 float instructions">; +// Does not have intrinsics or ABI effects. def FeatureNOPL : SubtargetFeature<"nopl", "HasNOPL", "true", - "Enable NOPL instruction (generally pentium pro+)">; + "Enable NOPL instruction (generally pentium pro+)", + [], InlineIgnore>; def FeatureCMOV : SubtargetFeature<"cmov","HasCMOV", "true", "Enable conditional move instructions">; @@ -104,10 +106,12 @@ def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true", // without disabling 64-bit mode. Nothing should imply this feature bit. It // is used to enforce that only 64-bit capable CPUs are used in 64-bit mode. def FeatureX86_64 : SubtargetFeature<"64bit", "HasX86_64", "true", - "Support 64-bit instructions">; + "Support 64-bit instructions", [], + InlineIgnore>; +// TODO: This should not be InlineIgnore, as it affects the atomics ABI. def FeatureCX16 : SubtargetFeature<"cx16", "HasCX16", "true", "64-bit with cmpxchg16b (this is true for most x86-64 chips, but not the first AMD chips)", - [FeatureCX8]>; + [FeatureCX8], InlineIgnore>; def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", "Support SSE 4a instructions", [FeatureSSE3]>; @@ -204,9 +208,10 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureXOP : SubtargetFeature<"xop", "HasXOP", "true", "Enable XOP instructions", [FeatureFMA4]>; -def FeatureSSEUnalignedMem : SubtargetFeature<"sse-unaligned-mem", - "HasSSEUnalignedMem", "true", - "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)">; +def FeatureSSEUnalignedMem : SubtargetFeature< + "sse-unaligned-mem", "HasSSEUnalignedMem", "true", + "Allow unaligned memory operands with SSE instructions (this may require setting a configuration bit in the processor)", + [], InlineIgnore>; def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", "Enable AES instructions", [FeatureSSE2]>; @@ -253,8 +258,10 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +// Does not have intrinsics or ABI effects. def FeatureLAHFSAHF64 : SubtargetFeature<"sahf", "HasLAHFSAHF64", "true", - "Support LAHF and SAHF instructions in 64-bit mode">; + "Support LAHF and SAHF instructions in 64-bit mode", + [], InlineIgnore>; def FeatureMWAITX : SubtargetFeature<"mwaitx", "HasMWAITX", "true", "Enable MONITORX/MWAITX timer functionality">; def FeatureCLZERO : SubtargetFeature<"clzero", "HasCLZERO", "true", @@ -488,55 +495,67 @@ def FeatureHardenSlsIJmp //===----------------------------------------------------------------------===// def TuningPreferMovmskOverVTest : SubtargetFeature<"prefer-movmsk-over-vtest", "PreferMovmskOverVTest", "true", - "Prefer movmsk over vtest instruction">; + "Prefer movmsk over vtest instruction", + [], InlineIgnore>; def TuningSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true", - "SHLD instruction is slow">; + "SHLD instruction is slow", + [], InlineIgnore>; def TuningSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true", - "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)">; + "PMULLD instruction is slow (compared to PMULLW/PMULHW and PMULUDQ)", + [], InlineIgnore>; def TuningSlowPMULLQ : SubtargetFeature<"slow-pmullq", "IsPMULLQSlow", "true", - "PMULLQ instruction is slow">; + "PMULLQ instruction is slow", + [], InlineIgnore>; def TuningSlowPMADDWD : SubtargetFeature<"slow-pmaddwd", "IsPMADDWDSlow", "true", - "PMADDWD is slower than PMULLD">; + "PMADDWD is slower than PMULLD", + [], InlineIgnore>; // FIXME: This should not apply to CPUs that do not have SSE. def TuningSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16", "IsUnalignedMem16Slow", "true", - "Slow unaligned 16-byte memory access">; + "Slow unaligned 16-byte memory access", + [], InlineIgnore>; def TuningSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32", "IsUnalignedMem32Slow", "true", - "Slow unaligned 32-byte memory access">; + "Slow unaligned 32-byte memory access", + [], InlineIgnore>; def TuningLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", - "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)">; + "Use LEA for adjusting the stack pointer (this is an optimization for Intel Atom processors)", + [], InlineIgnore>; // True if 8-bit divisions are significantly faster than // 32-bit divisions and should be used when possible. def TuningSlowDivide32 : SubtargetFeature<"idivl-to-divb", "HasSlowDivide32", "true", - "Use 8-bit divide for positive values less than 256">; + "Use 8-bit divide for positive values less than 256", + [], InlineIgnore>; // True if 32-bit divides are significantly faster than // 64-bit divisions and should be used when possible. def TuningSlowDivide64 : SubtargetFeature<"idivq-to-divl", "HasSlowDivide64", "true", - "Use 32-bit divide for positive values less than 2^32">; + "Use 32-bit divide for positive values less than 2^32", + [], InlineIgnore>; def TuningPadShortFunctions : SubtargetFeature<"pad-short-functions", "PadShortFunctions", "true", - "Pad short functions (to prevent a stall when returning too early)">; + "Pad short functions (to prevent a stall when returning too early)", + [], InlineIgnore>; // On some processors, instructions that implicitly take two memory operands are // slow. In practice, this means that CALL, PUSH, and POP with memory operands // should be avoided in favor of a MOV + register CALL/PUSH/POP. def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", "SlowTwoMemOps", "true", - "Two memory operand instructions are slow">; + "Two memory operand instructions are slow", + [], InlineIgnore>; // On some processors, indirect calls from memory (CALL [mem]) are slow // compared to loading the address first and using a register indirect call. @@ -544,109 +563,131 @@ def TuningSlowTwoMemOps : SubtargetFeature<"slow-two-mem-ops", // affect PUSH from memory folding. def TuningSlowIndirectCall : SubtargetFeature<"slow-indirect-call", "SlowIndirectCall", "true", - "Indirect calls from memory are slow">; + "Indirect calls from memory are slow", + [], InlineIgnore>; // True if the LEA instruction inputs have to be ready at address generation // (AG) time. def TuningLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LeaUsesAG", "true", - "LEA instruction needs inputs at AG stage">; + "LEA instruction needs inputs at AG stage", + [], InlineIgnore>; def TuningSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", - "LEA instruction with certain arguments is slow">; + "LEA instruction with certain arguments is slow", + [], InlineIgnore>; // True if the LEA instruction has all three source operands: base, index, // and offset or if the LEA instruction uses base and index registers where // the base is EBP, RBP,or R13 def TuningSlow3OpsLEA : SubtargetFeature<"slow-3ops-lea", "Slow3OpsLEA", "true", - "LEA instruction with 3 ops or certain registers is slow">; + "LEA instruction with 3 ops or certain registers is slow", + [], InlineIgnore>; // True if INC and DEC instructions are slow when writing to flags def TuningSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", - "INC and DEC instructions are slower than ADD and SUB">; + "INC and DEC instructions are slower than ADD and SUB", + [], InlineIgnore>; def TuningPOPCNTFalseDeps : SubtargetFeature<"false-deps-popcnt", "HasPOPCNTFalseDeps", "true", - "POPCNT has a false dependency on dest register">; + "POPCNT has a false dependency on dest register", + [], InlineIgnore>; def TuningLZCNTFalseDeps : SubtargetFeature<"false-deps-lzcnt-tzcnt", "HasLZCNTFalseDeps", "true", - "LZCNT/TZCNT have a false dependency on dest register">; + "LZCNT/TZCNT have a false dependency on dest register", + [], InlineIgnore>; def TuningMULCFalseDeps : SubtargetFeature<"false-deps-mulc", "HasMULCFalseDeps", "true", - "VF[C]MULCPH/SH has a false dependency on dest register">; + "VF[C]MULCPH/SH has a false dependency on dest register", + [], InlineIgnore>; def TuningPERMFalseDeps : SubtargetFeature<"false-deps-perm", "HasPERMFalseDeps", "true", - "VPERMD/Q/PS/PD has a false dependency on dest register">; + "VPERMD/Q/PS/PD has a false dependency on dest register", + [], InlineIgnore>; def TuningRANGEFalseDeps : SubtargetFeature<"false-deps-range", "HasRANGEFalseDeps", "true", - "VRANGEPD/PS/SD/SS has a false dependency on dest register">; + "VRANGEPD/PS/SD/SS has a false dependency on dest register", + [], InlineIgnore>; def TuningGETMANTFalseDeps : SubtargetFeature<"false-deps-getmant", "HasGETMANTFalseDeps", "true", "VGETMANTSS/SD/SH and VGETMANDPS/PD(memory version) has a" - " false dependency on dest register">; + " false dependency on dest register", + [], InlineIgnore>; def TuningMULLQFalseDeps : SubtargetFeature<"false-deps-mullq", "HasMULLQFalseDeps", "true", - "VPMULLQ has a false dependency on dest register">; + "VPMULLQ has a false dependency on dest register", + [], InlineIgnore>; def TuningSBBDepBreaking : SubtargetFeature<"sbb-dep-breaking", "HasSBBDepBreaking", "true", - "SBB with same register has no source dependency">; + "SBB with same register has no source dependency", + [], InlineIgnore>; // On recent X86 (port bound) processors, its preferable to combine to a single shuffle // using a variable mask over multiple fixed shuffles. def TuningFastVariableCrossLaneShuffle : SubtargetFeature<"fast-variable-crosslane-shuffle", "HasFastVariableCrossLaneShuffle", - "true", "Cross-lane shuffles with variable masks are fast">; + "true", "Cross-lane shuffles with variable masks are fast", + [], InlineIgnore>; def TuningFastVariablePerLaneShuffle : SubtargetFeature<"fast-variable-perlane-shuffle", "HasFastVariablePerLaneShuffle", - "true", "Per-lane shuffles with variable masks are fast">; + "true", "Per-lane shuffles with variable masks are fast", + [], InlineIgnore>; // Goldmont / Tremont (atom in general) has no bypass delay def TuningNoDomainDelay : SubtargetFeature<"no-bypass-delay", "NoDomainDelay","true", - "Has no bypass delay when using the 'wrong' domain">; + "Has no bypass delay when using the 'wrong' domain", + [], InlineIgnore>; // Many processors (Nehalem+ on Intel) have no bypass delay when // using the wrong mov type. def TuningNoDomainDelayMov : SubtargetFeature<"no-bypass-delay-mov", "NoDomainDelayMov","true", - "Has no bypass delay when using the 'wrong' mov type">; + "Has no bypass delay when using the 'wrong' mov type", + [], InlineIgnore>; // Newer processors (Skylake+ on Intel) have no bypass delay when // using the wrong blend type. def TuningNoDomainDelayBlend : SubtargetFeature<"no-bypass-delay-blend", "NoDomainDelayBlend","true", - "Has no bypass delay when using the 'wrong' blend type">; + "Has no bypass delay when using the 'wrong' blend type", + [], InlineIgnore>; // Newer processors (Haswell+ on Intel) have no bypass delay when // using the wrong shuffle type. def TuningNoDomainDelayShuffle : SubtargetFeature<"no-bypass-delay-shuffle", "NoDomainDelayShuffle","true", - "Has no bypass delay when using the 'wrong' shuffle type">; + "Has no bypass delay when using the 'wrong' shuffle type", + [], InlineIgnore>; // Prefer lowering shuffles on AVX512 targets (e.g. Skylake Server) to // imm shifts/rotate if they can use more ports than regular shuffles. def TuningPreferShiftShuffle : SubtargetFeature<"faster-shift-than-shuffle", "PreferLowerShuffleAsShift", "true", - "Shifts are faster (or as fast) as shuffle">; + "Shifts are faster (or as fast) as shuffle", + [], InlineIgnore>; def TuningFastImmVectorShift : SubtargetFeature<"tuning-fast-imm-vector-shift", "FastImmVectorShift", "true", - "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)">; + "Vector shifts are fast (2/cycle) as opposed to slow (1/cycle)", + [], InlineIgnore>; // On some X86 processors, a vzeroupper instruction should be inserted after // using ymm/zmm registers before executing code that may use SSE instructions. def TuningInsertVZEROUPPER : SubtargetFeature<"vzeroupper", "InsertVZEROUPPER", - "true", "Should insert vzeroupper instructions">; + "true", "Should insert vzeroupper instructions", + [], InlineIgnore>; // TuningFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency // than the corresponding NR code. TuningFastVectorFSQRT should be enabled if @@ -660,37 +701,43 @@ def TuningInsertVZEROUPPER // RSQRTSS followed by a Newton-Raphson iteration. def TuningFastScalarFSQRT : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT", - "true", "Scalar SQRT is fast (disable Newton-Raphson)">; + "true", "Scalar SQRT is fast (disable Newton-Raphson)", + [], InlineIgnore>; // True if hardware SQRTPS/VSQRTPS instructions are at least as fast // (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration. def TuningFastVectorFSQRT : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT", - "true", "Vector SQRT is fast (disable Newton-Raphson)">; + "true", "Vector SQRT is fast (disable Newton-Raphson)", + [], InlineIgnore>; // If lzcnt has equivalent latency/throughput to most simple integer ops, it can // be used to replace test/set sequences. def TuningFastLZCNT : SubtargetFeature< "fast-lzcnt", "HasFastLZCNT", "true", - "LZCNT instructions are as fast as most simple integer ops">; + "LZCNT instructions are as fast as most simple integer ops", + [], InlineIgnore>; // If the target can efficiently decode NOPs upto 7-bytes in length. def TuningFast7ByteNOP : SubtargetFeature< "fast-7bytenop", "HasFast7ByteNOP", "true", - "Target can quickly decode up to 7 byte NOPs">; + "Target can quickly decode up to 7 byte NOPs", + [], InlineIgnore>; // If the target can efficiently decode NOPs upto 11-bytes in length. def TuningFast11ByteNOP : SubtargetFeature< "fast-11bytenop", "HasFast11ByteNOP", "true", - "Target can quickly decode up to 11 byte NOPs">; + "Target can quickly decode up to 11 byte NOPs", + [], InlineIgnore>; // If the target can efficiently decode NOPs upto 15-bytes in length. def TuningFast15ByteNOP : SubtargetFeature< "fast-15bytenop", "HasFast15ByteNOP", "true", - "Target can quickly decode up to 15 byte NOPs">; + "Target can quickly decode up to 15 byte NOPs", + [], InlineIgnore>; // Sandy Bridge and newer processors can use SHLD with the same source on both // inputs to implement rotate to avoid the partial flag update of the normal @@ -698,20 +745,23 @@ def TuningFast15ByteNOP def TuningFastSHLDRotate : SubtargetFeature< "fast-shld-rotate", "HasFastSHLDRotate", "true", - "SHLD can be used as a faster rotate">; + "SHLD can be used as a faster rotate", + [], InlineIgnore>; // Bulldozer and newer processors can merge CMP/TEST (but not other // instructions) with conditional branches. def TuningBranchFusion : SubtargetFeature<"branchfusion", "HasBranchFusion", "true", - "CMP/TEST can be fused with conditional branches">; + "CMP/TEST can be fused with conditional branches", + [], InlineIgnore>; // Sandy Bridge and newer processors have many instructions that can be // fused with conditional branches and pass through the CPU as a single // operation. def TuningMacroFusion : SubtargetFeature<"macrofusion", "HasMacroFusion", "true", - "Various instructions can be fused with conditional branches">; + "Various instructions can be fused with conditional branches", + [], InlineIgnore>; // Gather is available since Haswell (AVX2 set). So technically, we can // generate Gathers on all AVX2 processors. But the overhead on HSW is high. @@ -719,40 +769,49 @@ def TuningMacroFusion // similar to Skylake Server (AVX-512). def TuningFastGather : SubtargetFeature<"fast-gather", "HasFastGather", "true", - "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)">; + "Indicates if gather is reasonably fast (this is true for Skylake client and all AVX-512 CPUs)", + [], InlineIgnore>; // Generate vpdpwssd instead of vpmaddwd+vpaddd sequence. def TuningFastDPWSSD : SubtargetFeature< "fast-dpwssd", "HasFastDPWSSD", "true", - "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence">; + "Prefer vpdpwssd instruction over vpmaddwd+vpaddd instruction sequence", + [], InlineIgnore>; def TuningPreferNoGather : SubtargetFeature<"prefer-no-gather", "PreferGather", "false", - "Prefer no gather instructions">; + "Prefer no gather instructions", + [], InlineIgnore>; def TuningPreferNoScatter : SubtargetFeature<"prefer-no-scatter", "PreferScatter", "false", - "Prefer no scatter instructions">; + "Prefer no scatter instructions", + [], InlineIgnore>; def TuningPrefer128Bit : SubtargetFeature<"prefer-128-bit", "Prefer128Bit", "true", - "Prefer 128-bit AVX instructions">; + "Prefer 128-bit AVX instructions", + [], InlineIgnore>; def TuningPrefer256Bit : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true", - "Prefer 256-bit AVX instructions">; + "Prefer 256-bit AVX instructions", + [], InlineIgnore>; def TuningAllowLight256Bit : SubtargetFeature<"allow-light-256-bit", "AllowLight256Bit", "true", - "Enable generation of 256-bit load/stores even if we prefer 128-bit">; + "Enable generation of 256-bit load/stores even if we prefer 128-bit", + [], InlineIgnore>; def TuningPreferMaskRegisters : SubtargetFeature<"prefer-mask-registers", "PreferMaskRegisters", "true", - "Prefer AVX512 mask registers over PTEST/MOVMSK">; + "Prefer AVX512 mask registers over PTEST/MOVMSK", + [], InlineIgnore>; def TuningFastBEXTR : SubtargetFeature<"fast-bextr", "HasFastBEXTR", "true", "Indicates that the BEXTR instruction is implemented as a single uop " - "with good throughput">; + "with good throughput", + [], InlineIgnore>; // Combine vector math operations with shuffles into horizontal math // instructions if a CPU implements horizontal operations (introduced with @@ -761,46 +820,53 @@ def TuningFastHorizontalOps : SubtargetFeature< "fast-hops", "HasFastHorizontalOps", "true", "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " - "normal vector instructions with shuffles">; + "normal vector instructions with shuffles", + [], InlineIgnore>; def TuningFastScalarShiftMasks : SubtargetFeature< "fast-scalar-shift-masks", "HasFastScalarShiftMasks", "true", - "Prefer a left/right scalar logical shift pair over a shift+and pair">; + "Prefer a left/right scalar logical shift pair over a shift+and pair", + [], InlineIgnore>; def TuningFastVectorShiftMasks : SubtargetFeature< "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", - "Prefer a left/right vector logical shift pair over a shift+and pair">; + "Prefer a left/right vector logical shift pair over a shift+and pair", + [], InlineIgnore>; def TuningFastMOVBE : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true", - "Prefer a movbe over a single-use load + bswap / single-use bswap + store">; + "Prefer a movbe over a single-use load + bswap / single-use bswap + store", + [], InlineIgnore>; def TuningFastImm16 : SubtargetFeature<"fast-imm16", "HasFastImm16", "true", - "Prefer a i16 instruction with i16 immediate over extension to i32">; + "Prefer a i16 instruction with i16 immediate over extension to i32", + [], InlineIgnore>; def TuningUseSLMArithCosts : SubtargetFeature<"use-slm-arith-costs", "UseSLMArithCosts", "true", - "Use Silvermont specific arithmetic costs">; + "Use Silvermont specific arithmetic costs", [], InlineIgnore>; def TuningUseGLMDivSqrtCosts : SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true", - "Use Goldmont specific floating point div/sqrt costs">; + "Use Goldmont specific floating point div/sqrt costs", + [], InlineIgnore>; def TuningNDDMem : SubtargetFeature<"prefer-ndd-mem", "HasNDDM", - "true", "Prefer NDD memory addressing">; + "true", "Prefer NDD memory addressing", [], InlineIgnore>; // Starting with Redwood Cove architecture, the branch has branch taken hint // (i.e., instruction prefix 3EH). def TuningBranchHint: SubtargetFeature<"branch-hint", "HasBranchHint", "true", - "Target has branch hint feature">; + "Target has branch hint feature", + [], InlineIgnore>; def TuningPreferLegacySetCC : SubtargetFeature<"prefer-legacy-setcc", "PreferLegacySetCC", "true", - "Prefer to emit legacy SetCC.">; + "Prefer to emit legacy SetCC.", [], InlineIgnore>; //===----------------------------------------------------------------------===// // X86 CPU Families @@ -808,7 +874,7 @@ def TuningPreferLegacySetCC //===----------------------------------------------------------------------===// // Bonnell -def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor">; +def ProcIntelAtom : SubtargetFeature<"", "IsAtom", "true", "Is Intel Atom processor", [], InlineIgnore>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index fba2f08912fed..0dee9be6667c4 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -6727,8 +6727,10 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller, // Check whether callee features are a subset of caller features // (apart from the ignore list). - FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; - FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; + const FeatureBitset &InlineIgnoreFeatures = + CallerSubtarget.getInlineIgnoreFeatures(); + FeatureBitset RealCallerBits = CallerBits & ~InlineIgnoreFeatures; + FeatureBitset RealCalleeBits = CalleeBits & ~InlineIgnoreFeatures; if ((RealCallerBits & RealCalleeBits) != RealCalleeBits) return false; diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h index 4120421622b21..556e997b01bd3 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.h +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h @@ -36,82 +36,6 @@ class X86TTIImpl final : public BasicTTIImplBase { const X86Subtarget *getST() const { return ST; } const X86TargetLowering *getTLI() const { return TLI; } - const FeatureBitset InlineFeatureIgnoreList = { - // clang-format off - // This indicates the CPU is 64 bit capable not that we are in 64-bit - // mode. - X86::FeatureX86_64, - - // These features don't have any intrinsics or ABI effect. - X86::FeatureNOPL, - X86::FeatureCX16, - X86::FeatureLAHFSAHF64, - - // Some older targets can be setup to fold unaligned loads. - X86::FeatureSSEUnalignedMem, - - // Codegen control options. - X86::TuningFast11ByteNOP, - X86::TuningFast15ByteNOP, - X86::TuningFastBEXTR, - X86::TuningFastHorizontalOps, - X86::TuningFastLZCNT, - X86::TuningFastScalarFSQRT, - X86::TuningFastSHLDRotate, - X86::TuningFastScalarShiftMasks, - X86::TuningFastVectorShiftMasks, - X86::TuningFastVariableCrossLaneShuffle, - X86::TuningFastVariablePerLaneShuffle, - X86::TuningFastVectorFSQRT, - X86::TuningLEAForSP, - X86::TuningLEAUsesAG, - X86::TuningLZCNTFalseDeps, - X86::TuningBranchFusion, - X86::TuningMacroFusion, - X86::TuningPadShortFunctions, - X86::TuningPOPCNTFalseDeps, - X86::TuningMULCFalseDeps, - X86::TuningPERMFalseDeps, - X86::TuningRANGEFalseDeps, - X86::TuningGETMANTFalseDeps, - X86::TuningMULLQFalseDeps, - X86::TuningSlow3OpsLEA, - X86::TuningSlowDivide32, - X86::TuningSlowDivide64, - X86::TuningSlowIncDec, - X86::TuningSlowIndirectCall, - X86::TuningSlowLEA, - X86::TuningSlowPMADDWD, - X86::TuningSlowPMULLD, - X86::TuningSlowSHLD, - X86::TuningSlowTwoMemOps, - X86::TuningSlowUAMem16, - X86::TuningPreferMaskRegisters, - X86::TuningInsertVZEROUPPER, - X86::TuningUseSLMArithCosts, - X86::TuningUseGLMDivSqrtCosts, - X86::TuningNoDomainDelay, - X86::TuningNoDomainDelayMov, - X86::TuningNoDomainDelayShuffle, - X86::TuningNoDomainDelayBlend, - X86::TuningPreferShiftShuffle, - X86::TuningFastImmVectorShift, - X86::TuningFastDPWSSD, - - // Perf-tuning flags. - X86::TuningFastGather, - X86::TuningSlowUAMem32, - X86::TuningAllowLight256Bit, - - // Based on whether user set the -mprefer-vector-width command line. - X86::TuningPrefer128Bit, - X86::TuningPrefer256Bit, - - // CPU name enums. These just follow CPU string. - X86::ProcIntelAtom - // clang-format on - }; - public: explicit X86TTIImpl(const X86TargetMachine *TM, const Function &F) : BaseT(TM, F.getDataLayout()), ST(TM->getSubtargetImpl(F)), diff --git a/llvm/test/TableGen/aarch64-apple-tuning-features.td b/llvm/test/TableGen/aarch64-apple-tuning-features.td index 94501fd6043fe..fb0bd1e1c204f 100644 --- a/llvm/test/TableGen/aarch64-apple-tuning-features.td +++ b/llvm/test/TableGen/aarch64-apple-tuning-features.td @@ -25,6 +25,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA11 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a11"; @@ -45,6 +46,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA12 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a12"; @@ -65,6 +67,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA13 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a13"; @@ -85,6 +88,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA14 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a14"; @@ -113,6 +117,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA15 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a15"; @@ -141,6 +146,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA16 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a16"; @@ -170,6 +176,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA17 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a17"; @@ -199,6 +206,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleA7 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-a7"; @@ -220,6 +228,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleM4 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-m4"; @@ -248,6 +257,7 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } // CHECK-LABEL: def TuneAppleM5 { // SubtargetFeature // CHECK-NEXT: string Name = "apple-m5"; @@ -276,4 +286,5 @@ // CHECK-NEXT: FeatureZCZeroingFPR128, // CHECK-NEXT: FeatureZCZeroingGPR32, // CHECK-NEXT: FeatureZCZeroingGPR64]; +// CHECK-NEXT: InlineBehavior InlineBehavior = InlineDefault; // CHECK-NEXT: } diff --git a/llvm/unittests/CodeGen/MFCommon.inc b/llvm/unittests/CodeGen/MFCommon.inc index 6c47461629070..a3a8b3652ad47 100644 --- a/llvm/unittests/CodeGen/MFCommon.inc +++ b/llvm/unittests/CodeGen/MFCommon.inc @@ -101,6 +101,16 @@ public: const TargetRegisterInfo *getRegisterInfo() const override { return &TRI; } + const FeatureBitset &getInlineIgnoreFeatures() const override { + static constexpr FeatureBitset Features; + return Features; + } + + const FeatureBitset &getInlineInverseFeatures() const override { + static constexpr FeatureBitset Features; + return Features; + } + private: BogusFrameLowering FL; BogusRegisterInfo TRI; diff --git a/llvm/utils/TableGen/SubtargetEmitter.cpp b/llvm/utils/TableGen/SubtargetEmitter.cpp index c3a9d69d6ec3e..a915ce0daec2d 100644 --- a/llvm/utils/TableGen/SubtargetEmitter.cpp +++ b/llvm/utils/TableGen/SubtargetEmitter.cpp @@ -136,6 +136,8 @@ class SubtargetEmitter : TargetFeaturesEmitter { void emitGetMacroFusions(const std::string &ClassName, raw_ostream &OS); void emitHwModeCheck(const std::string &ClassName, raw_ostream &OS, bool IsMC); + void emitInlineFeatures(const std::string &ClassName, raw_ostream &OS, + StringRef Behavior); void parseFeaturesFunction(raw_ostream &OS); public: @@ -1990,6 +1992,25 @@ void SubtargetEmitter::parseFeaturesFunction(raw_ostream &OS) { OS << "}\n"; } +void SubtargetEmitter::emitInlineFeatures(const std::string &ClassName, + raw_ostream &OS, StringRef Behavior) { + std::vector FeatureList = + Records.getAllDerivedDefinitions("SubtargetFeature"); + llvm::sort(FeatureList, LessRecordFieldFieldName()); + + OS << "const FeatureBitset &" << ClassName << "::get" << Behavior + << "Features() const {\n" + << " static constexpr FeatureBitset Features = {\n"; + + for (const Record *Feature : FeatureList) + if (Behavior == Feature->getValueAsDef("InlineBehavior")->getName()) + OS << Target << "::" << Feature->getName() << ",\n"; + + OS << " };\n" + << " return Features;\n" + << "}\n\n"; +} + void SubtargetEmitter::emitGenMCSubtargetInfo(raw_ostream &OS) { { NamespaceEmitter NS(OS, (Target + Twine("_MC")).str()); @@ -2177,6 +2198,9 @@ void SubtargetEmitter::emitHeader(raw_ostream &OS) { OS << " std::vector getMacroFusions() const " "final;\n"; + OS << " const FeatureBitset &getInlineIgnoreFeatures() const override;\n"; + OS << " const FeatureBitset &getInlineInverseFeatures() const override;\n"; + STIPredicateExpander PE(Target); PE.setByRef(false); for (const STIPredicateFunction &Fn : SchedModels.getSTIPredicates()) @@ -2244,6 +2268,8 @@ void SubtargetEmitter::emitCtor(raw_ostream &OS, unsigned NumNames, emitSchedModelHelpers(ClassName, OS); emitHwModeCheck(ClassName, OS, /*IsMC=*/false); emitGetMacroFusions(ClassName, OS); + emitInlineFeatures(ClassName, OS, "InlineIgnore"); + emitInlineFeatures(ClassName, OS, "InlineInverse"); } // From 9bd4d62e42a29e6f27a56390428302591f225421 Mon Sep 17 00:00:00 2001 From: LukeZhuang Date: Thu, 25 Jun 2026 15:49:56 +0800 Subject: [PATCH 468/511] [RISCV] Add explicit defs in PseudoLA_TLSDESC (#205494) (Separated from #205027, per this discussion thread: https://github.com/llvm/llvm-project/pull/205027#discussion_r3464170432 and https://github.com/llvm/llvm-project/pull/205027#issuecomment-4785382877) Per the psABI, la.tlsdesc clobbers a0/t0. The defs are not strictly required now because the expanding happens pre-RA, while just in case some passes may need it or the expanding may be moved to post-RA in the future. Also, a test case is added as well showing the right behavior of TLSDESC clobbering. --- llvm/lib/Target/RISCV/RISCVInstrInfo.td | 4 +- llvm/test/CodeGen/RISCV/tlsdesc-clobber.ll | 53 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/tlsdesc-clobber.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 2abd63202a4bf..7b72dfbbad91e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1956,7 +1956,9 @@ def PseudoLA_TLS_GD : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), [], "la.tls.gd", "$dst, $src">; let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Size = 32 in -def PseudoLA_TLSDESC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), []>; +def PseudoLA_TLSDESC : Pseudo<(outs GPR:$dst), (ins bare_symbol:$src), []> { + let Defs = [X5, X10]; +} def TLSDESCCallSymbol : AsmOperandClass { let Name = "TLSDESCCallSymbol"; diff --git a/llvm/test/CodeGen/RISCV/tlsdesc-clobber.ll b/llvm/test/CodeGen/RISCV/tlsdesc-clobber.ll new file mode 100644 index 0000000000000..132ff7d83235d --- /dev/null +++ b/llvm/test/CodeGen/RISCV/tlsdesc-clobber.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=riscv64 -relocation-model=pic -enable-tlsdesc < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv32 -relocation-model=pic -enable-tlsdesc < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 + +;; Verify that the TLSDESC resolver only clobbers a0 and t0 per the psABI. +;; Arguments in ra/a1-a3 need not be saved/restored across the TLSDESC call, +;; unlike a normal function call. + +@tls_var = external thread_local global i32 + +;; ra/a1-a3 need not be saved to stack or moved to s-regs. + +define i32 @test_clobber(i32 %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV64-LABEL: test_clobber: +; RV64: # %bb.0: # %entry +; RV64-NEXT: .Ltlsdesc_hi0: +; RV64-NEXT: auipc a5, %tlsdesc_hi(tls_var) +; RV64-NEXT: ld a6, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a5) +; RV64-NEXT: mv a4, a0 +; RV64-NEXT: addi a0, a5, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV64-NEXT: jalr t0, 0(a6), %tlsdesc_call(.Ltlsdesc_hi0) +; RV64-NEXT: add a1, a4, a1 +; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: add a3, a0, tp +; RV64-NEXT: addw a0, a1, a2 +; RV64-NEXT: li a1, 1 +; RV64-NEXT: sw a1, 0(a3) +; RV64-NEXT: ret +; +; RV32-LABEL: test_clobber: +; RV32: # %bb.0: # %entry +; RV32-NEXT: .Ltlsdesc_hi0: +; RV32-NEXT: auipc a5, %tlsdesc_hi(tls_var) +; RV32-NEXT: lw a6, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a5) +; RV32-NEXT: mv a4, a0 +; RV32-NEXT: addi a0, a5, %tlsdesc_add_lo(.Ltlsdesc_hi0) +; RV32-NEXT: jalr t0, 0(a6), %tlsdesc_call(.Ltlsdesc_hi0) +; RV32-NEXT: add a1, a4, a1 +; RV32-NEXT: add a2, a2, a3 +; RV32-NEXT: add a3, a0, tp +; RV32-NEXT: add a0, a1, a2 +; RV32-NEXT: li a1, 1 +; RV32-NEXT: sw a1, 0(a3) +; RV32-NEXT: ret +entry: + store i32 1, ptr @tls_var + %sum1 = add i32 %a, %b + %sum2 = add i32 %sum1, %c + %sum3 = add i32 %sum2, %d + ret i32 %sum3 +} From 432ecada4c53fb5bd0f07db1ff2d18b487a7c492 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 25 Jun 2026 10:12:14 +0200 Subject: [PATCH 469/511] [clang][ExprConst] Add early exit in `evaluateDestruction()` (#205476) There's nothing to do here for them. It's a little sad that we still crated the vector holding the notes int he caller but setting `HasConstantDestruction` is a side-effect of `evaluateDestruction()` that some callers rely on. --- clang/lib/AST/ExprConstant.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5ee27dd4e2ba2..28ac44edd800c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -21714,6 +21714,19 @@ bool Expr::EvaluateAsInitializer(const ASTContext &Ctx, const VarDecl *VD, bool VarDecl::evaluateDestruction( SmallVectorImpl &Notes) const { + // This function is only meaningful for records and arrays of records. + QualType VarTy = getType(); + if (VarTy->isArrayType()) { + QualType ElemTy = getASTContext().getBaseElementType(VarTy); + if (!ElemTy->isRecordType()) { + ensureEvaluatedStmt()->HasConstantDestruction = true; + return true; + } + } else if (!VarTy->isRecordType()) { + ensureEvaluatedStmt()->HasConstantDestruction = true; + return true; + } + Expr::EvalStatus EStatus; EStatus.Diag = &Notes; @@ -21728,7 +21741,7 @@ bool VarDecl::evaluateDestruction( APValue DestroyedValue; if (getEvaluatedValue()) DestroyedValue = *getEvaluatedValue(); - else if (!handleDefaultInitValue(getType(), DestroyedValue)) + else if (!handleDefaultInitValue(VarTy, DestroyedValue)) return false; if (Ctx.getLangOpts().EnableNewConstInterp) { @@ -21745,7 +21758,7 @@ bool VarDecl::evaluateDestruction( return true; } - if (!EvaluateDestruction(Ctx, this, std::move(DestroyedValue), getType(), + if (!EvaluateDestruction(Ctx, this, std::move(DestroyedValue), VarTy, getLocation(), EStatus, IsConstantDestruction) || EStatus.HasSideEffects) return false; From 8560f143a8d01fad802f97fae63709396f92f6af Mon Sep 17 00:00:00 2001 From: Volodymyr Turanskyy Date: Thu, 25 Jun 2026 09:23:06 +0100 Subject: [PATCH 470/511] [clang][Driver] Pass --cstdlib through to multilibs (#203884) Pass the --cstdlib through to the multilibs backend so that the value specified on the command line can be used inside the multilibs YAML file. Assisted-by: codex, reviewed and tested by me. --- clang/lib/Driver/ToolChain.cpp | 4 ++++ clang/test/Driver/baremetal-multilib.yaml | 11 +++++++++++ clang/test/Driver/print-multi-selection-flags.c | 4 ++++ 3 files changed, 19 insertions(+) diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp index 328f4f8c8f420..726c8a6ad229a 100644 --- a/clang/lib/Driver/ToolChain.cpp +++ b/clang/lib/Driver/ToolChain.cpp @@ -504,6 +504,10 @@ ToolChain::getMultilibFlags(const llvm::opt::ArgList &Args) const { processMultilibCustomFlags(Result, Args); + if (Arg *CStdLibArg = Args.getLastArg(options::OPT_cstdlib_EQ)) + Result.push_back(std::string(CStdLibArg->getOption().getPrefixedName()) + + CStdLibArg->getValue()); + // Include fno-exceptions and fno-rtti // to improve multilib selection if (getRTTIMode() == ToolChain::RTTIMode::RM_Disabled) diff --git a/clang/test/Driver/baremetal-multilib.yaml b/clang/test/Driver/baremetal-multilib.yaml index c2b37fad97dea..17df2972116b1 100644 --- a/clang/test/Driver/baremetal-multilib.yaml +++ b/clang/test/Driver/baremetal-multilib.yaml @@ -14,6 +14,14 @@ # CHECK-SAME: "-lc" # CHECK-SAME: "-o" "{{.*}}.tmp.out" +# RUN: %clang --multi-lib-config=%s -no-canonical-prefixes -x c++ %s -### -Werror -o %t.out 2>&1 \ +# RUN: --target=thumbv8m.main-none-eabihf --cstdlib=picolibc --sysroot= \ +# RUN: | FileCheck --check-prefix=CHECK-CSTDLIB %s +# CHECK-CSTDLIB: "-cc1" "-triple" "thumbv8m.main-unknown-none-eabihf" +# CHECK-CSTDLIB-SAME: "-internal-isystem" "[[SYSROOT:[^"]*]]/bin/../lib/clang-runtimes/arm-none-eabi/thumb/v8-m.main/fp/picolibc/include/c++/v1" +# CHECK-CSTDLIB-NEXT: ld{{(.exe)?}}" "-Bstatic" +# CHECK-CSTDLIB-SAME: "-L[[SYSROOT]]/bin/../lib/clang-runtimes/arm-none-eabi/thumb/v8-m.main/fp/picolibc/lib" + # RUN: %clang --multi-lib-config=%s -no-canonical-prefixes -x c++ %s -### -o %t.out 2>&1 \ # RUN: --target=thumbv7em-none-eabi -mfpu=fpv4-sp-d16 --sysroot= \ # RUN: | FileCheck --check-prefix=CHECK-NO-MATCH %s @@ -106,6 +114,9 @@ Variants: - Dir: arm-none-eabi/thumb/v8-m.main/fp Flags: [--target=thumbv8m.main-unknown-none-eabihf, -mfpu=fpv5-d16] +- Dir: arm-none-eabi/thumb/v8-m.main/fp/picolibc + Flags: [--target=thumbv8m.main-unknown-none-eabihf, --cstdlib=picolibc, -mfpu=fpv5-d16] + - Dir: arm-none-eabi/thumb/v8.1-m.main/fp Flags: [--target=thumbv8.1m.main-unknown-none-eabihf, -mfpu=fp-armv8-fullfp16-sp-d16] diff --git a/clang/test/Driver/print-multi-selection-flags.c b/clang/test/Driver/print-multi-selection-flags.c index e5a116234c321..0d71a52d553bb 100644 --- a/clang/test/Driver/print-multi-selection-flags.c +++ b/clang/test/Driver/print-multi-selection-flags.c @@ -71,6 +71,10 @@ // RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=aarch64-none-elf -mbranch-protection=standard | FileCheck --check-prefix=CHECK-BRANCH-PROTECTION %s // CHECK-BRANCH-PROTECTION: -mbranch-protection=standard +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi --cstdlib=picolibc | FileCheck --check-prefix=CHECK-CSTDLIB %s +// RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi --cstdlib picolibc | FileCheck --check-prefix=CHECK-CSTDLIB %s +// CHECK-CSTDLIB: --cstdlib=picolibc + // RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -mno-unaligned-access | FileCheck --check-prefix=CHECK-NO-UNALIGNED-ACCESS %s // RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi -mstrict-align | FileCheck --check-prefix=CHECK-NO-UNALIGNED-ACCESS %s // RUN: %clang -multi-lib-config=%S/Inputs/multilib/empty.yaml -print-multi-flags-experimental --target=arm-none-eabi | FileCheck --check-prefix=CHECK-NO-UNALIGNED-ACCESS %s From e5e44448698bae2c802f5eb5412df0e596721352 Mon Sep 17 00:00:00 2001 From: Wenju He Date: Thu, 25 Jun 2026 10:44:28 +0200 Subject: [PATCH 471/511] [SYCL][Test] Fix nvvm-annotations test for 3-operand reqd_work_group_size (#22431) Upstream commit 6794e31e894d added a verifier check requiring reqd_work_group_size to have exactly three operands. Pad the 1D and 2D test metadata nodes to comply; work_group_num_dim still limits which dimensions are emitted so behavior is unchanged. Co-authored-by: Claude Sonnet 4.6 --- llvm/test/SYCLLowerIR/nvvm-annotations.ll | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/test/SYCLLowerIR/nvvm-annotations.ll b/llvm/test/SYCLLowerIR/nvvm-annotations.ll index b5f52f5dab0a7..bd80b54337e39 100644 --- a/llvm/test/SYCLLowerIR/nvvm-annotations.ll +++ b/llvm/test/SYCLLowerIR/nvvm-annotations.ll @@ -87,8 +87,8 @@ define ptx_kernel void @foo_maxwgpermp() !max_work_groups_per_mp !3 { !2 = !{i32 2} !3 = !{i32 3} -!4 = !{i32 4} -!5 = !{i32 4, i32 8} +!4 = !{i32 4, i32 1, i32 1} +!5 = !{i32 4, i32 8, i32 1} !6 = !{i32 4, i32 8, i32 16} ;. @@ -111,9 +111,9 @@ define ptx_kernel void @foo_maxwgpermp() !max_work_groups_per_mp !3 { ; CHECK: [[META16:![0-9]+]] = !{ptr @foo_minwgpercu0, !"minctasm", i32 2} ; CHECK: [[META17:![0-9]+]] = !{ptr @foo_maxwgpermp, !"maxclusterrank", i32 3} ; CHECK: [[META18]] = !{i32 4, i32 8, i32 16} -; CHECK: [[META19]] = !{i32 4} +; CHECK: [[META19]] = !{i32 4, i32 1, i32 1} ; CHECK: [[META20]] = !{i32 1} -; CHECK: [[META21]] = !{i32 4, i32 8} +; CHECK: [[META21]] = !{i32 4, i32 8, i32 1} ; CHECK: [[META22]] = !{i32 2} ; CHECK: [[META23]] = !{i32 3} ;. From 275b5f664f43140cbf8eb4fce1365abd651427da Mon Sep 17 00:00:00 2001 From: Jean-Didier PAILLEUX Date: Thu, 25 Jun 2026 10:47:21 +0200 Subject: [PATCH 472/511] [flang][MIF] Use default datalayout and cleanup MIF tests (#205662) As mentioned in the PR #192944, we are now using the default data layout. --- flang/include/flang/Optimizer/Transforms/Passes.td | 2 +- flang/lib/Optimizer/Transforms/MIFOpConversion.cpp | 3 ++- flang/test/Fir/MIF/change_team.mlir | 2 -- flang/test/Fir/MIF/change_team2.mlir | 2 -- flang/test/Fir/MIF/co_broadcast.mlir | 2 -- flang/test/Fir/MIF/co_max.mlir | 2 -- flang/test/Fir/MIF/co_min.mlir | 2 -- flang/test/Fir/MIF/co_sum.mlir | 4 +--- flang/test/Fir/MIF/coarray-alloc.mlir | 2 -- flang/test/Fir/MIF/form_team.mlir | 2 -- flang/test/Fir/MIF/get_team.mlir | 2 -- flang/test/Fir/MIF/init.mlir | 2 -- flang/test/Fir/MIF/num_images.mlir | 2 -- flang/test/Fir/MIF/sync_all.mlir | 2 -- flang/test/Fir/MIF/sync_images.mlir | 2 -- flang/test/Fir/MIF/sync_memory.mlir | 2 -- flang/test/Fir/MIF/sync_team.mlir | 2 -- flang/test/Fir/MIF/team_number.mlir | 2 -- flang/test/Fir/MIF/this_image.mlir | 2 -- 19 files changed, 4 insertions(+), 37 deletions(-) diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index 8c082fb073451..e7a83f64acef9 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -670,7 +670,7 @@ def OptimizeArrayRepacking def MIFOpConversion : Pass<"mif-convert", "mlir::ModuleOp"> { let summary = "Convert some MIF operations to runtime calls"; let dependentDialects = ["fir::FIROpsDialect", "mlir::LLVM::LLVMDialect", - "mlir::cf::ControlFlowDialect"]; + "mlir::cf::ControlFlowDialect", "mlir::DLTIDialect"]; } def LoopInvariantCodeMotion : Pass<"flang-licm", "::mlir::func::FuncOp"> { diff --git a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp index 5121455817bd6..3eb46efdcd45a 100644 --- a/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/MIFOpConversion.cpp @@ -23,6 +23,7 @@ #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Runtime/stop.h" #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" +#include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/IR/Matchers.h" #include "mlir/Transforms/DialectConversion.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" @@ -1127,7 +1128,7 @@ class MIFOpConversion : public fir::impl::MIFOpConversionBase { mlir::SymbolTable symtab(module); std::optional dl = fir::support::getOrSetMLIRDataLayout( - module, /*allowDefaultLayout=*/false); + module, /*allowDefaultLayout=*/true); if (!dl.has_value()) { mlir::emitError( module.getLoc(), diff --git a/flang/test/Fir/MIF/change_team.mlir b/flang/test/Fir/MIF/change_team.mlir index 2d7f4c682944d..b80f121e09f2e 100644 --- a/flang/test/Fir/MIF/change_team.mlir +++ b/flang/test/Fir/MIF/change_team.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_CHANGE_TEAM"} { %0 = fir.dummy_scope : !fir.dscope %c10 = arith.constant 10 : index @@ -29,7 +28,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec } return } -} // CHECK: %[[VAL_1:.*]] = fir.absent !fir.ref // CHECK: %[[VAL_2:.*]] = fir.absent !fir.box> diff --git a/flang/test/Fir/MIF/change_team2.mlir b/flang/test/Fir/MIF/change_team2.mlir index 26d67f5fa1e04..a9cc62dd7cc65 100644 --- a/flang/test/Fir/MIF/change_team2.mlir +++ b/flang/test/Fir/MIF/change_team2.mlir @@ -1,7 +1,6 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s // mlir generated by the example in flang/test/Lower/MIF/change_team2.f90 -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)"} { func.func @_QQmain() { %0 = fir.dummy_scope : !fir.dscope %1 = fir.alloca i32 {bindc_name = "image_status", uniq_name = "_QFEimage_status"} @@ -69,7 +68,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec ^bb4: // pred: ^bb2 return } -} // CHECK: %[[VAL_0:.*]] = fir.alloca i64 // CHECK: %[[VAL_1:.*]] = fir.alloca i64 diff --git a/flang/test/Fir/MIF/co_broadcast.mlir b/flang/test/Fir/MIF/co_broadcast.mlir index 2f5782b2a825a..59cde70f56834 100644 --- a/flang/test/Fir/MIF/co_broadcast.mlir +++ b/flang/test/Fir/MIF/co_broadcast.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_CO_BROADCAST"} { %0 = fir.dummy_scope : !fir.dscope %c2 = arith.constant 2 : index @@ -65,7 +64,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.co_broadcast %38 source %c1_i32_9 stat %24#0 errmsg %39 : (!fir.box>, i32, !fir.ref, !fir.box>) return } -} // CHECK: %[[C1_i32:.*]] = arith.constant 1 : i32 // CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref) -> !fir.box diff --git a/flang/test/Fir/MIF/co_max.mlir b/flang/test/Fir/MIF/co_max.mlir index f74513d862ebf..3ec65ccb49f8e 100644 --- a/flang/test/Fir/MIF/co_max.mlir +++ b/flang/test/Fir/MIF/co_max.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_CO_MAX"} { %0 = fir.dummy_scope : !fir.dscope %c1 = arith.constant 1 : index @@ -71,7 +70,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.co_max %41 result %c1_i32_9 stat %24#0 errmsg %42 : (!fir.box>, i32, !fir.ref, !fir.box>) return } -} // CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref) -> !fir.box // CHECK: %[[V2:.*]] = fir.absent !fir.ref diff --git a/flang/test/Fir/MIF/co_min.mlir b/flang/test/Fir/MIF/co_min.mlir index 97806bb376d38..92d54be3a222c 100644 --- a/flang/test/Fir/MIF/co_min.mlir +++ b/flang/test/Fir/MIF/co_min.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_CO_MIN"} { %0 = fir.dummy_scope : !fir.dscope %c1 = arith.constant 1 : index @@ -71,7 +70,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.co_min %41 result %c1_i32_9 stat %24#0 errmsg %42 : (!fir.box>, i32, !fir.ref, !fir.box>) return } -} // CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref) -> !fir.box // CHECK: %[[V2:.*]] = fir.absent !fir.ref diff --git a/flang/test/Fir/MIF/co_sum.mlir b/flang/test/Fir/MIF/co_sum.mlir index 8afce3582a97f..bb44ff952ae19 100644 --- a/flang/test/Fir/MIF/co_sum.mlir +++ b/flang/test/Fir/MIF/co_sum.mlir @@ -1,7 +1,6 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { - func.func @_QQmain() attributes {fir.bindc_name = "TEST_CO_SUM"} { + func.func @_QQmain() attributes {fir.bindc_name = "TEST_CO_SUM"} { %0 = fir.dummy_scope : !fir.dscope %c2 = arith.constant 2 : index %1 = fir.alloca !fir.array<2xf64> {bindc_name = "array_d", uniq_name = "_QFEarray_d"} @@ -57,7 +56,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.co_sum %33 result %c1_i32_5 stat %19#0 errmsg %34 : (!fir.box>, i32, !fir.ref, !fir.box>) return } -} // CHECK: %[[V1:.*]] = fir.embox %[[VAR_I:.*]]#0 : (!fir.ref) -> !fir.box // CHECK: %[[V2:.*]] = fir.absent !fir.ref diff --git a/flang/test/Fir/MIF/coarray-alloc.mlir b/flang/test/Fir/MIF/coarray-alloc.mlir index 7596ffc61a00f..a96bfc5935534 100644 --- a/flang/test/Fir/MIF/coarray-alloc.mlir +++ b/flang/test/Fir/MIF/coarray-alloc.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 23.0.0 (git@github.com:SiPearl/llvm-project.git 3d6a6306c5cd826a2306fd17b1f65de34bcf866b)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "ALLOC_TEST"} { %0 = fir.alloca !fir.array<1xi64> %1 = fir.alloca !fir.array<1xi64> @@ -231,7 +230,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.dealloc_coarray %34#0 : (!fir.ref) -> () return } -} // CHECK: fir.global linkonce_odr @_QMprifE.dt.prif_coarray_handle : !fir.type<_QMprifTprif_coarray_handle{info:!fir.box>}> // CHECK: fir.global linkonce @_QFEa_coarray_handle : !fir.box>}>> diff --git a/flang/test/Fir/MIF/form_team.mlir b/flang/test/Fir/MIF/form_team.mlir index 6b170876d29f3..d53f1582bd4e6 100644 --- a/flang/test/Fir/MIF/form_team.mlir +++ b/flang/test/Fir/MIF/form_team.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_FORM_TEAM"} { %0 = fir.dummy_scope : !fir.dscope %c10 = arith.constant 10 : index @@ -32,7 +31,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec mif.form_team team_number %20 team_var %21 errmsg %19 : (i32, !fir.box>, !fir.box>) -> () return } -} // CHECK: %[[VAL_1:.*]] = fir.absent !fir.ref // CHECK: %[[VAL_2:.*]] = fir.absent !fir.ref diff --git a/flang/test/Fir/MIF/get_team.mlir b/flang/test/Fir/MIF/get_team.mlir index 80e84412e47a8..c7b98581fcb89 100644 --- a/flang/test/Fir/MIF/get_team.mlir +++ b/flang/test/Fir/MIF/get_team.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_FORM_TEAM"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.address_of(@_QMiso_fortran_envECcurrent_team) : !fir.ref @@ -51,7 +50,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec hlfir.destroy %27 : !hlfir.expr> return } -} // CHECK: %[[VAL_1:.*]] = fir.absent !fir.ref // CHECK: %[[RESULT:.*]] = fir.convert %[[TEAM:.*]] : ({{.*}}) -> !fir.box diff --git a/flang/test/Fir/MIF/init.mlir b/flang/test/Fir/MIF/init.mlir index 355e5d651666d..022513d3066a1 100644 --- a/flang/test/Fir/MIF/init.mlir +++ b/flang/test/Fir/MIF/init.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --split-input-file --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_INIT"} { %0 = fir.dummy_scope : !fir.dscope return @@ -16,7 +15,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %c0_i32 = arith.constant 0 : i32 return %c0_i32 : i32 } -} // CHECK-LABEL: func.func @main // CHECK: %[[VAL_0:.*]] = fir.alloca i32 diff --git a/flang/test/Fir/MIF/num_images.mlir b/flang/test/Fir/MIF/num_images.mlir index afa33a9063c3d..8aa41c8ade929 100644 --- a/flang/test/Fir/MIF/num_images.mlir +++ b/flang/test/Fir/MIF/num_images.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} @@ -14,7 +13,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec hlfir.assign %7 to %2#0 : i32, !fir.ref return } -} // CHECK-LABEL: func.func @_QQmain diff --git a/flang/test/Fir/MIF/sync_all.mlir b/flang/test/Fir/MIF/sync_all.mlir index b2b98b9aae9e8..ce8a7408eec71 100644 --- a/flang/test/Fir/MIF/sync_all.mlir +++ b/flang/test/Fir/MIF/sync_all.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_SYNC_ALL"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.address_of(@_QFEerror_message) : !fir.ref> @@ -20,7 +19,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %0 = fir.zero_bits !fir.char<1,128> fir.has_value %0 : !fir.char<1,128> } -} // CHECK: %[[ERRMSG:.*]]:2 = hlfir.declare %[[E:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Fir/MIF/sync_images.mlir b/flang/test/Fir/MIF/sync_images.mlir index e38fdaa5a4c14..d241901f76dc0 100644 --- a/flang/test/Fir/MIF/sync_images.mlir +++ b/flang/test/Fir/MIF/sync_images.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_SYNC_IMAGES"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.address_of(@_QFEerror_message) : !fir.ref> @@ -39,7 +38,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %0 = fir.zero_bits !fir.char<1,128> fir.has_value %0 : !fir.char<1,128> } -} // CHECK: %[[ERRMSG:.*]]:2 = hlfir.declare %[[E:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) // CHECK: %[[ME:.*]]:2 = hlfir.declare %[[M:.*]] {uniq_name = "_QFEme"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Fir/MIF/sync_memory.mlir b/flang/test/Fir/MIF/sync_memory.mlir index d6f24416bc61c..452f57b27adae 100644 --- a/flang/test/Fir/MIF/sync_memory.mlir +++ b/flang/test/Fir/MIF/sync_memory.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_sync_memory"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.address_of(@_QFEerror_message) : !fir.ref> @@ -20,7 +19,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %0 = fir.zero_bits !fir.char<1,128> fir.has_value %0 : !fir.char<1,128> } -} // CHECK: %[[ERRMSG:.*]]:2 = hlfir.declare %[[E:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Fir/MIF/sync_team.mlir b/flang/test/Fir/MIF/sync_team.mlir index c7e2c2c169694..94b66123e28c0 100644 --- a/flang/test/Fir/MIF/sync_team.mlir +++ b/flang/test/Fir/MIF/sync_team.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_SYNC_TEAM"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.address_of(@_QFEerror_message) : !fir.ref> @@ -28,7 +27,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %0 = fir.zero_bits !fir.char<1,128> fir.has_value %0 : !fir.char<1,128> } -} // CHECK: %[[ERRMSG:.*]]:2 = hlfir.declare %[[E:.*]] typeparams %[[C_128:.*]] {uniq_name = "_QFEerror_message"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) // CHECK: %[[STAT:.*]]:2 = hlfir.declare %[[S:.*]] {uniq_name = "_QFEsync_status"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Fir/MIF/team_number.mlir b/flang/test/Fir/MIF/team_number.mlir index 55e10448d2003..00fc473965ee2 100644 --- a/flang/test/Fir/MIF/team_number.mlir +++ b/flang/test/Fir/MIF/team_number.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST_TEAM_NUMBER"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.alloca i32 {bindc_name = "t", uniq_name = "_QFEt"} @@ -18,7 +17,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec hlfir.assign %10 to %2#0 : i32, !fir.ref return } - } // CHECK: %[[VAL_1:.*]] = fir.convert %[[TEAM:.*]] : ({{.*}}) -> !fir.box // CHECK: fir.call @_QMprifPprif_team_number(%[[VAL_1]], %[[RESULT:.*]]) : (!fir.box, !fir.ref) -> () diff --git a/flang/test/Fir/MIF/this_image.mlir b/flang/test/Fir/MIF/this_image.mlir index 25eafc09ef58c..13aec81ec3755 100644 --- a/flang/test/Fir/MIF/this_image.mlir +++ b/flang/test/Fir/MIF/this_image.mlir @@ -1,6 +1,5 @@ // RUN: fir-opt --mif-convert %s | FileCheck %s -module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vector<4xi64>, !llvm.ptr<271> = dense<32> : vector<4xi64>, !llvm.ptr<272> = dense<64> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i128 = dense<128> : vector<2xi64>, f80 = dense<128> : vector<2xi64>, !llvm.ptr = dense<64> : vector<4xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.mangling_mode" = "e", "dlti.legal_int_widths" = array, "dlti.stack_alignment" = 128 : i64>, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", llvm.ident = "flang version 22.0.0 (git@github.com:SiPearl/llvm-project.git 666e4313ebc03587f27774139ad8f780bac15c3e)", llvm.target_triple = "x86_64-unknown-linux-gnu"} { func.func @_QQmain() attributes {fir.bindc_name = "TEST"} { %0 = fir.dummy_scope : !fir.dscope %1 = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFEi"} @@ -9,7 +8,6 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec hlfir.assign %3 to %2#0 : i32, !fir.ref return } -} // CHECK-LABEL: func.func @_QQmain From daa676f6413d0ed9f0a4367f4e295cf2710821d0 Mon Sep 17 00:00:00 2001 From: Kaviya Rajendiran <67495422+kaviya2510@users.noreply.github.com> Date: Thu, 25 Jun 2026 14:27:17 +0530 Subject: [PATCH 473/511] [Flang]Add support for inlining hlfir.assign operation where both LHS and RHS are slices of the same array (#204532) Added support for inlining hlfir.assign when both LHS and RHS are slices of the same array. When overlap between the slices cannot be determined, the pass introduces a disjointness check: - genIndexBasedDisjointnessCheck(..) is used when both sides are sections of the same array. - genAddressBasedDisjointnessCheck(..) used as a fallback for more complex cases. At runtime: - If the slices are disjoint, a direct element-wise copy is performed without allocating a temporary buffer. - If overlap is possible, a temporary buffer is allocated, the RHS is first copied into it and then the data is copied from the temporary buffer to the LHS. Fixes https://github.com/llvm/llvm-project/issues/203228 --- .../Optimizer/Analysis/ArraySectionAnalyzer.h | 2 +- .../flang/Optimizer/Builder/FIRBuilder.h | 10 + flang/lib/Optimizer/Builder/FIRBuilder.cpp | 137 ++++++++++++++ .../HLFIR/Transforms/InlineHLFIRAssign.cpp | 60 ++++-- .../inline-hlfir-assign-pointer-overlap.fir | 132 +++++++++++++ .../inline-hlfir-assign-scalar-index.fir | 98 ++++++++++ ...-hlfir-assign-self-copy-runtime-stride.fir | 141 ++++++++++++++ .../HLFIR/inline-hlfir-assign-self-copy.fir | 178 ++++++++++++++++++ 8 files changed, 745 insertions(+), 13 deletions(-) create mode 100644 flang/test/HLFIR/inline-hlfir-assign-pointer-overlap.fir create mode 100644 flang/test/HLFIR/inline-hlfir-assign-scalar-index.fir create mode 100644 flang/test/HLFIR/inline-hlfir-assign-self-copy-runtime-stride.fir create mode 100644 flang/test/HLFIR/inline-hlfir-assign-self-copy.fir diff --git a/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h b/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h index e87e37c3c5590..aae9cbde2d7be 100644 --- a/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h +++ b/flang/include/flang/Optimizer/Analysis/ArraySectionAnalyzer.h @@ -65,7 +65,6 @@ class ArraySectionAnalyzer { static bool isDesignatingArrayInOrder(hlfir::DesignateOp designate, hlfir::ElementalOpInterface elemental); -private: struct SectionDesc { // An array section is described by tuple. // If the designator's subscript is not a triple, then @@ -103,6 +102,7 @@ class ArraySectionAnalyzer { static std::pair getOrderedBounds(const SectionDesc &desc); +private: // Given two array sections and // , return true only if the sections // are known to be disjoint. diff --git a/flang/include/flang/Optimizer/Builder/FIRBuilder.h b/flang/include/flang/Optimizer/Builder/FIRBuilder.h index a41be5efacb56..6592c384541d4 100644 --- a/flang/include/flang/Optimizer/Builder/FIRBuilder.h +++ b/flang/include/flang/Optimizer/Builder/FIRBuilder.h @@ -988,6 +988,16 @@ mlir::Value getDescriptorWithNewBaseAddress(fir::FirOpBuilder &builder, mlir::Location loc, mlir::Value box, mlir::Value newAddr); +/// Generate a index-based disjointness check. +std::optional +genIndexBasedDisjointnessCheck(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value lhsRef, mlir::Value rhsRef); + +/// Generate a address-based disjointness check. +std::optional +genAddressBasedDisjointnessCheck(mlir::Location loc, fir::FirOpBuilder &builder, + mlir::Value lhsRef, mlir::Value rhsRef); + } // namespace fir::factory #endif // FORTRAN_OPTIMIZER_BUILDER_FIRBUILDER_H diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index c7df3abda156c..5cbc9fcedc9b5 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -8,6 +8,7 @@ #include "flang/Optimizer/Builder/FIRBuilder.h" #include "flang/Optimizer/Analysis/AliasAnalysis.h" +#include "flang/Optimizer/Analysis/ArraySectionAnalyzer.h" #include "flang/Optimizer/Builder/BoxValue.h" #include "flang/Optimizer/Builder/Character.h" #include "flang/Optimizer/Builder/Complex.h" @@ -21,10 +22,12 @@ #include "flang/Optimizer/Dialect/FIRDialect.h" #include "flang/Optimizer/Dialect/FIROpsSupport.h" #include "flang/Optimizer/Dialect/FIRType.h" +#include "flang/Optimizer/HLFIR/HLFIROps.h" #include "flang/Optimizer/Support/DataLayout.h" #include "flang/Optimizer/Support/FatalError.h" #include "flang/Optimizer/Support/InternalNames.h" #include "flang/Optimizer/Support/Utils.h" +#include "mlir/Dialect/Arith/IR/Arith.h" #include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" @@ -2006,3 +2009,137 @@ mlir::Value fir::factory::getDescriptorWithNewBaseAddress( return builder.createBox(loc, boxType, newAddr, shape, /*slice=*/{}, fir::getTypeParams(openedInput), typeMold); } + +std::optional fir::factory::genIndexBasedDisjointnessCheck( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value lhsRef, + mlir::Value rhsRef) { + auto des1 = lhsRef.getDefiningOp(); + auto des2 = rhsRef.getDefiningOp(); + if (!des1 || !des2) + return std::nullopt; + + if (des1.getMemref() != des2.getMemref()) + return std::nullopt; + + if (des1.getComponent() != des2.getComponent() || + des1.getComponentShape() != des2.getComponentShape() || + des1.getSubstring() != des2.getSubstring() || + des1.getComplexPart() != des2.getComplexPart() || + des1.getTypeparams() != des2.getTypeparams()) + return std::nullopt; + + if (des1.getIsTriplet().empty() || + !llvm::equal(des1.getIsTriplet(), des2.getIsTriplet())) + return std::nullopt; + + using Analyzer = fir::ArraySectionAnalyzer; + mlir::Type idxTy = builder.getIndexType(); + auto toIdx = [&](mlir::Value v) -> mlir::Value { + return fir::ConvertOp::create(builder, loc, idxTy, v); + }; + + mlir::Value disjoint; + auto des1It = des1.getIndices().begin(); + auto des2It = des2.getIndices().begin(); + for (bool isTriplet : des1.getIsTriplet()) { + Analyzer::SectionDesc desc1 = Analyzer::readSectionDesc(des1It, isTriplet); + Analyzer::SectionDesc desc2 = Analyzer::readSectionDesc(des2It, isTriplet); + auto [lb1, ub1] = Analyzer::getOrderedBounds(desc1); + auto [lb2, ub2] = Analyzer::getOrderedBounds(desc2); + if (!lb1 || !lb2) + continue; + + mlir::Value c1 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::slt, toIdx(ub1), toIdx(lb2)); + mlir::Value c2 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::slt, toIdx(ub2), toIdx(lb1)); + mlir::Value c1Orc2 = mlir::arith::OrIOp::create(builder, loc, c1, c2); + disjoint = disjoint + ? mlir::arith::OrIOp::create(builder, loc, disjoint, c1Orc2) + : c1Orc2; + } + if (!disjoint) + return std::nullopt; + return disjoint; +} + +std::optional fir::factory::genAddressBasedDisjointnessCheck( + mlir::Location loc, fir::FirOpBuilder &builder, mlir::Value lhsRef, + mlir::Value rhsRef) { + if (!mlir::isa(lhsRef.getType()) || + !mlir::isa(rhsRef.getType())) + return std::nullopt; + + mlir::Type idxTy = builder.getIndexType(); + mlir::Type intPtrTy = builder.getIntPtrType(); + + // Disjoint if: xEnd < yStart || yEnd < xStart. + auto computeRange = + [&](mlir::Value box) -> std::pair { + mlir::Value baseAddr = fir::BoxAddrOp::create(builder, loc, box); + mlir::Value baseInt = + fir::ConvertOp::create(builder, loc, intPtrTy, baseAddr); + + mlir::Value eleSize = fir::BoxEleSizeOp::create(builder, loc, idxTy, box); + mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); + mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0); + + mlir::Value least = zero; + mlir::Value most = zero; + + auto boxTy = mlir::cast(box.getType()); + unsigned rank = 0; + if (auto seqTy = mlir::dyn_cast( + fir::unwrapRefType(boxTy.getEleTy()))) + rank = seqTy.getShape().size(); + + if (rank == 0) + return {nullptr, nullptr}; + + for (unsigned dim = 0; dim < rank; ++dim) { + mlir::Value dimVal = builder.createIntegerConstant(loc, idxTy, dim); + auto dims = fir::BoxDimsOp::create(builder, loc, idxTy, idxTy, idxTy, box, + dimVal); + mlir::Value extent = dims.getExtent(); + mlir::Value stride = dims.getByteStride(); + + mlir::Value extentM1 = + mlir::arith::SubIOp::create(builder, loc, extent, one); + mlir::Value dimOffset = + mlir::arith::MulIOp::create(builder, loc, extentM1, stride); + + mlir::Value isStrideNeg = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::slt, stride, zero); + mlir::Value addToLeast = mlir::arith::SelectOp::create( + builder, loc, isStrideNeg, dimOffset, zero); + mlir::Value addToMost = mlir::arith::SelectOp::create( + builder, loc, isStrideNeg, zero, dimOffset); + least = mlir::arith::AddIOp::create(builder, loc, least, addToLeast); + most = mlir::arith::AddIOp::create(builder, loc, most, addToMost); + } + + mlir::Value eleSizeM1 = + mlir::arith::SubIOp::create(builder, loc, eleSize, one); + most = mlir::arith::AddIOp::create(builder, loc, most, eleSizeM1); + + mlir::Value leastInt = + fir::ConvertOp::create(builder, loc, intPtrTy, least); + mlir::Value mostInt = fir::ConvertOp::create(builder, loc, intPtrTy, most); + mlir::Value rangeStart = + mlir::arith::AddIOp::create(builder, loc, baseInt, leastInt); + mlir::Value rangeEnd = + mlir::arith::AddIOp::create(builder, loc, baseInt, mostInt); + return {rangeStart, rangeEnd}; + }; + + auto [lhsStart, lhsEnd] = computeRange(lhsRef); + auto [rhsStart, rhsEnd] = computeRange(rhsRef); + if (!lhsStart || !rhsStart) + return std::nullopt; + + mlir::Value cond1 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::ult, lhsEnd, rhsStart); + mlir::Value cond2 = mlir::arith::CmpIOp::create( + builder, loc, mlir::arith::CmpIPredicate::ult, rhsEnd, lhsStart); + return mlir::arith::OrIOp::create(builder, loc, cond1, cond2); +} diff --git a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp index b795d0b46da85..e6dac7e7cabf8 100644 --- a/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp +++ b/flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRAssign.cpp @@ -103,11 +103,9 @@ class InlineHLFIRAssignConversion return rewriter.notifyMatchFailure(assign, "RHS/LHS element types mismatch"); + bool rhsNeedsTemporary = false; + if (rhs.isArray() && !mlir::isa(rhs.getType())) { - // If RHS is not an hlfir.expr, then we should prove that - // LHS and RHS do not alias. - // TODO: if they may alias, we can insert hlfir.as_expr for RHS, - // and proceed with the inlining. fir::AliasAnalysis aliasAnalysis; mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs); if (!aliasRes.isNo()) { @@ -121,7 +119,14 @@ class InlineHLFIRAssignConversion << "\tLHS: " << lhs << "\n" << "\tRHS: " << rhs << "\n" << "\tALIAS: " << aliasRes << "\n"); - return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias"); + // Overlap is Unknown: unsafe to read RHS while writing LHS + // without a temp. Call genIndexBasedDisjointnessCheck(..) or + // genAddressBasedDisjointnessCheck(..) to check if the slices are + // disjoint. + // 1. If disjoint -> direct element-wise copy (no temp). + // 2. If not disjoint -> allocate a temporary and copy RHS into it, + // then copy the temporary to LHS.. + rhsNeedsTemporary = true; } } } @@ -130,6 +135,42 @@ class InlineHLFIRAssignConversion fir::FirOpBuilder builder(rewriter, assign.getOperation()); builder.setInsertionPoint(assign); + const bool useWorkshare = flangomp::shouldUseWorkshareLowering(assign); + mlir::ArrayAttr accessGroups; + if (auto attrs = assign.getOperation()->getAttrOfType( + fir::getAccessGroupsAttrName())) + accessGroups = attrs; + + auto emitAssignFrom = [&](hlfir::Entity rhsEntity) { + hlfir::genNoAliasArrayAssignment( + loc, builder, rhsEntity, lhs, useWorkshare, + /*temporaryLHS=*/false, nullptr, accessGroups); + }; + + if (rhsNeedsTemporary) { + std::optional disjoint = + fir::factory::genIndexBasedDisjointnessCheck(loc, builder, lhs, rhs); + if (!disjoint) { + disjoint = fir::factory::genAddressBasedDisjointnessCheck(loc, builder, + lhs, rhs); + } + if (!disjoint) + return rewriter.notifyMatchFailure( + assign, "Failed to generate runtime disjointness check," + "deferring to runtime assignment implementation"); + + builder.genIfThenElse(loc, *disjoint) + .genThen([&]() { emitAssignFrom(rhs); }) + .genElse([&]() { + mlir::Value tempExpr = hlfir::AsExprOp::create(builder, loc, rhs); + emitAssignFrom(hlfir::Entity{tempExpr}); + hlfir::DestroyOp::create(builder, loc, tempExpr); + }) + .end(); + rewriter.eraseOp(assign); + return mlir::success(); + } + // Materialize scalar RHS before the assignment loop. Fortran 10.2.1.3 // requires that the RHS expression is fully evaluated before any part // of the LHS variable is defined. When the scalar RHS is a reference @@ -138,13 +179,8 @@ class InlineHLFIRAssignConversion if (!rhs.isArray()) rhs = hlfir::loadTrivialScalar(loc, builder, rhs); - mlir::ArrayAttr accessGroups; - if (auto attrs = assign.getOperation()->getAttrOfType( - fir::getAccessGroupsAttrName())) - accessGroups = attrs; - hlfir::genNoAliasArrayAssignment( - loc, builder, rhs, lhs, flangomp::shouldUseWorkshareLowering(assign), - /*temporaryLHS=*/false, /*combiner=*/nullptr, accessGroups); + emitAssignFrom(rhs); + rewriter.eraseOp(assign); return mlir::success(); } diff --git a/flang/test/HLFIR/inline-hlfir-assign-pointer-overlap.fir b/flang/test/HLFIR/inline-hlfir-assign-pointer-overlap.fir new file mode 100644 index 0000000000000..cb3fa5334e568 --- /dev/null +++ b/flang/test/HLFIR/inline-hlfir-assign-pointer-overlap.fir @@ -0,0 +1,132 @@ +// Test inlining of hlfir.assign for pointer-based array assignment + +// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s + +// Fortran source code: +// subroutine assign_pointers(arr, N, lo1, hi1, lo2, hi2) +// integer, intent(in) :: N, lo1, hi1, lo2, hi2 +// integer, pointer, intent(inout) :: arr(:) +// integer, pointer :: p1(:), p2(:) +// +// p1 => arr(lo1:hi1) +// p2 => arr(lo2:hi2) +// +// p1 = p2 +// end subroutine assign_pointers + +// CHECK-LABEL: func.func @_QPassign_pointers + +// Address-based runtime disjointness check: +// CHECK: %[[LHS_ADDR:.*]] = fir.box_addr %{{.*}} : (!fir.box>>) -> !fir.ptr> +// CHECK: %[[LHS_BASE:.*]] = fir.convert %[[LHS_ADDR]] : (!fir.ptr>) -> i64 +// CHECK: fir.box_elesize %{{.*}} : (!fir.box>>) -> index +// CHECK: fir.box_dims %{{.*}}, %{{.*}} +// CHECK: arith.subi %{{.*}}, %{{.*}} : index +// CHECK: arith.muli %{{.*}}, %{{.*}} : index +// CHECK: arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: arith.subi %{{.*}}, %{{.*}} : index +// CHECK: arith.addi %{{.*}}, %{{.*}} : index +// CHECK: fir.convert %{{.*}} : (index) -> i64 +// CHECK: fir.convert %{{.*}} : (index) -> i64 +// CHECK: %[[LHS_START:.*]] = arith.addi %[[LHS_BASE]], %{{.*}} : i64 +// CHECK: %[[LHS_END:.*]] = arith.addi %[[LHS_BASE]], %{{.*}} : i64 +// CHECK: %[[RHS_ADDR:.*]] = fir.box_addr %{{.*}} : (!fir.box>>) -> !fir.ptr> +// CHECK: %[[RHS_BASE:.*]] = fir.convert %[[RHS_ADDR]] : (!fir.ptr>) -> i64 +// CHECK: fir.box_elesize %{{.*}} : (!fir.box>>) -> index +// CHECK: fir.box_dims %{{.*}}, %{{.*}} +// CHECK: arith.subi %{{.*}}, %{{.*}} : index +// CHECK: arith.muli %{{.*}}, %{{.*}} : index +// CHECK: arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: arith.subi %{{.*}}, %{{.*}} : index +// CHECK: arith.addi %{{.*}}, %{{.*}} : index +// CHECK: fir.convert %{{.*}} : (index) -> i64 +// CHECK: fir.convert %{{.*}} : (index) -> i64 +// CHECK: %[[RHS_START:.*]] = arith.addi %[[RHS_BASE]], %{{.*}} : i64 +// CHECK: %[[RHS_END:.*]] = arith.addi %[[RHS_BASE]], %{{.*}} : i64 +// CHECK: %[[CMP1:.*]] = arith.cmpi ult, %[[LHS_END]], %[[RHS_START]] : i64 +// CHECK: %[[CMP2:.*]] = arith.cmpi ult, %[[RHS_END]], %[[LHS_START]] : i64 +// CHECK: %[[DISJOINT:.*]] = arith.ori %[[CMP1]], %[[CMP2]] : i1 +// CHECK: fir.if %[[DISJOINT]] { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: hlfir.designate +// CHECK: fir.load +// CHECK: hlfir.designate +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref +// CHECK: } +// CHECK: } else { +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %{{.*}} +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: hlfir.apply %[[EXPR]], %{{.*}} +// CHECK: hlfir.designate +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : i32, !fir.ref +// CHECK: } +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK: } + +func.func @_QPassign_pointers(%arg0: !fir.ref>>> {fir.bindc_name = "arr"}, %arg1: !fir.ref {fir.bindc_name = "n"}, %arg2: !fir.ref {fir.bindc_name = "lo1"}, %arg3: !fir.ref {fir.bindc_name = "hi1"}, %arg4: !fir.ref {fir.bindc_name = "lo2"}, %arg5: !fir.ref {fir.bindc_name = "hi2"}) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg0 dummy_scope %0 arg 1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEarr"} : (!fir.ref>>>, !fir.dscope) -> (!fir.ref>>>, !fir.ref>>>) + %2:2 = hlfir.declare %arg3 dummy_scope %0 arg 4 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEhi1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg5 dummy_scope %0 arg 6 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEhi2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg2 dummy_scope %0 arg 3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersElo1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %5:2 = hlfir.declare %arg4 dummy_scope %0 arg 5 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersElo2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %6:2 = hlfir.declare %arg1 dummy_scope %0 arg 2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %7 = fir.alloca !fir.box>> {bindc_name = "p1", uniq_name = "_QFassign_pointersEp1"} + %8 = fir.zero_bits !fir.ptr> + %c0 = arith.constant 0 : index + %9 = fir.shape %c0 : (index) -> !fir.shape<1> + %10 = fir.embox %8(%9) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + fir.store %10 to %7 : !fir.ref>>> + %11:2 = hlfir.declare %7 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEp1"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %12 = fir.alloca !fir.box>> {bindc_name = "p2", uniq_name = "_QFassign_pointersEp2"} + %13 = fir.zero_bits !fir.ptr> + %c0_0 = arith.constant 0 : index + %14 = fir.shape %c0_0 : (index) -> !fir.shape<1> + %15 = fir.embox %13(%14) : (!fir.ptr>, !fir.shape<1>) -> !fir.box>> + fir.store %15 to %12 : !fir.ref>>> + %16:2 = hlfir.declare %12 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFassign_pointersEp2"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) + %17 = fir.load %1#0 : !fir.ref>>> + %18 = fir.load %4#0 : !fir.ref + %19 = fir.convert %18 : (i32) -> i64 + %20 = fir.load %2#0 : !fir.ref + %21 = fir.convert %20 : (i32) -> i64 + %22 = fir.convert %19 : (i64) -> index + %23 = fir.convert %21 : (i64) -> index + %c1 = arith.constant 1 : index + %c0_1 = arith.constant 0 : index + %24 = arith.subi %23, %22 : index + %25 = arith.addi %24, %c1 : index + %26 = arith.divsi %25, %c1 : index + %27 = arith.cmpi sgt, %26, %c0_1 : index + %28 = arith.select %27, %26, %c0_1 : index + %29 = fir.shape %28 : (index) -> !fir.shape<1> + %30 = hlfir.designate %17 (%22:%23:%c1) shape %29 : (!fir.box>>, index, index, index, !fir.shape<1>) -> !fir.box> + %31 = fir.rebox %30 : (!fir.box>) -> !fir.box>> + fir.store %31 to %11#0 : !fir.ref>>> + %32 = fir.load %1#0 : !fir.ref>>> + %33 = fir.load %5#0 : !fir.ref + %34 = fir.convert %33 : (i32) -> i64 + %35 = fir.load %3#0 : !fir.ref + %36 = fir.convert %35 : (i32) -> i64 + %37 = fir.convert %34 : (i64) -> index + %38 = fir.convert %36 : (i64) -> index + %c1_2 = arith.constant 1 : index + %c0_3 = arith.constant 0 : index + %39 = arith.subi %38, %37 : index + %40 = arith.addi %39, %c1_2 : index + %41 = arith.divsi %40, %c1_2 : index + %42 = arith.cmpi sgt, %41, %c0_3 : index + %43 = arith.select %42, %41, %c0_3 : index + %44 = fir.shape %43 : (index) -> !fir.shape<1> + %45 = hlfir.designate %32 (%37:%38:%c1_2) shape %44 : (!fir.box>>, index, index, index, !fir.shape<1>) -> !fir.box> + %46 = fir.rebox %45 : (!fir.box>) -> !fir.box>> + fir.store %46 to %16#0 : !fir.ref>>> + %47 = fir.load %16#0 : !fir.ref>>> + %48 = fir.load %11#0 : !fir.ref>>> + hlfir.assign %47 to %48 : !fir.box>>, !fir.box>> + return +} diff --git a/flang/test/HLFIR/inline-hlfir-assign-scalar-index.fir b/flang/test/HLFIR/inline-hlfir-assign-scalar-index.fir new file mode 100644 index 0000000000000..f15cf75c761bf --- /dev/null +++ b/flang/test/HLFIR/inline-hlfir-assign-scalar-index.fir @@ -0,0 +1,98 @@ +// Test inlining of hlfir.assign for self-copy with scalar index and triplet dimensions. +// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s + +// Fortran source code: +// subroutine copy_row(a, n, i, j, lo1, hi1, lo2, hi2) +// integer, intent(in) :: n, i, j, lo1, hi1, lo2, hi2 +// real, intent(inout) :: a(n, n) +// a(i, lo1:hi1) = a(j, lo2:hi2) +// end subroutine copy_row + +// CHECK-LABEL: func.func @_QPcopy_row + +// Index-based disjointness check for scalar index (dim1) and triplet (dim2): +// Dim1 (scalar): compares i vs j : disjoint if i != j (i.e., i < j || j < i) +// CHECK: %[[CMP1:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[CMP2:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[DISJ_DIM1:.*]] = arith.ori %[[CMP1]], %[[CMP2]] : i1 + +// Dim2 (triplet): compares lo1:hi1 vs lo2:hi2 — disjoint if hi1 < lo2 || hi2 < lo1 +// CHECK: %[[CMP3:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[CMP4:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[DISJ_DIM2:.*]] = arith.ori %[[CMP3]], %[[CMP4]] : i1 +// Overall: disjoint if ANY dimension is disjoint +// CHECK: %[[DISJOINT:.*]] = arith.ori %[[DISJ_DIM1]], %[[DISJ_DIM2]] : i1 +// CHECK: fir.if %[[DISJOINT]] { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: hlfir.designate +// CHECK: fir.load +// CHECK: hlfir.designate +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref +// CHECK: } +// CHECK: } else { +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %{{.*}} +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: hlfir.apply %[[EXPR]], %{{.*}} +// CHECK: hlfir.designate +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref +// CHECK: } +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK: } + +func.func @_QPcopy_row(%arg0: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "n"}, %arg2: !fir.ref {fir.bindc_name = "i"}, %arg3: !fir.ref {fir.bindc_name = "j"}, %arg4: !fir.ref {fir.bindc_name = "lo1"}, %arg5: !fir.ref {fir.bindc_name = "hi1"}, %arg6: !fir.ref {fir.bindc_name = "lo2"}, %arg7: !fir.ref {fir.bindc_name = "hi2"}) { + %c1 = arith.constant 1 : index + %c0 = arith.constant 0 : index + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 arg 2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg2 dummy_scope %0 arg 3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEi"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg3 dummy_scope %0 arg 4 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEj"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg4 dummy_scope %0 arg 5 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowElo1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %5:2 = hlfir.declare %arg5 dummy_scope %0 arg 6 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEhi1"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %6:2 = hlfir.declare %arg6 dummy_scope %0 arg 7 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowElo2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %7:2 = hlfir.declare %arg7 dummy_scope %0 arg 8 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEhi2"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + + %8 = fir.load %1#0 : !fir.ref + %9 = fir.convert %8 : (i32) -> i64 + %10 = fir.convert %9 : (i64) -> index + %11 = arith.cmpi sgt, %10, %c0 : index + %12 = arith.select %11, %10, %c0 : index + %13 = fir.shape %12, %12 : (index, index) -> !fir.shape<2> + %14:2 = hlfir.declare %arg0(%13) dummy_scope %0 arg 1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFcopy_rowEa"} : (!fir.ref>, !fir.shape<2>, !fir.dscope) -> (!fir.box>, !fir.ref>) + + // RHS: a(j, lo2:hi2) + %15 = fir.load %3#0 : !fir.ref + %16 = fir.convert %15 : (i32) -> i64 + %17 = fir.convert %16 : (i64) -> index + %18 = fir.load %6#0 : !fir.ref + %19 = fir.convert %18 : (i32) -> i64 + %20 = fir.convert %19 : (i64) -> index + %21 = fir.load %7#0 : !fir.ref + %22 = fir.convert %21 : (i32) -> i64 + %23 = fir.convert %22 : (i64) -> index + %24 = arith.subi %23, %20 : index + %25 = arith.addi %24, %c1 : index + %26 = arith.cmpi sgt, %25, %c0 : index + %27 = arith.select %26, %25, %c0 : index + %28 = fir.shape %27 : (index) -> !fir.shape<1> + %29 = hlfir.designate %14#0 (%17, %20:%23:%c1) shape %28 : (!fir.box>, index, index, index, index, !fir.shape<1>) -> !fir.box> + + // LHS: a(i, lo1:hi1) + %30 = fir.load %2#0 : !fir.ref + %31 = fir.convert %30 : (i32) -> i64 + %32 = fir.convert %31 : (i64) -> index + %33 = fir.load %4#0 : !fir.ref + %34 = fir.convert %33 : (i32) -> i64 + %35 = fir.convert %34 : (i64) -> index + %36 = fir.load %5#0 : !fir.ref + %37 = fir.convert %36 : (i32) -> i64 + %38 = fir.convert %37 : (i64) -> index + %39 = arith.subi %38, %35 : index + %40 = arith.addi %39, %c1 : index + %41 = arith.cmpi sgt, %40, %c0 : index + %42 = arith.select %41, %40, %c0 : index + %43 = fir.shape %42 : (index) -> !fir.shape<1> + %44 = hlfir.designate %14#0 (%32, %35:%38:%c1) shape %43 : (!fir.box>, index, index, index, index, !fir.shape<1>) -> !fir.box> + + hlfir.assign %29 to %44 : !fir.box>, !fir.box> + return +} diff --git a/flang/test/HLFIR/inline-hlfir-assign-self-copy-runtime-stride.fir b/flang/test/HLFIR/inline-hlfir-assign-self-copy-runtime-stride.fir new file mode 100644 index 0000000000000..ee53844b010a8 --- /dev/null +++ b/flang/test/HLFIR/inline-hlfir-assign-self-copy-runtime-stride.fir @@ -0,0 +1,141 @@ +// Test inlining of hlfir.assign for array sections with the same runtime stride on LHS and RHS +// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s + +// Fortran source: +// subroutine testcase(a, n, step) +// implicit none +// integer, intent(in) :: n, step +// real, intent(inout) :: a(n) +// integer :: lo1, hi1, lo2, hi2 +// lo1 = 1 +// hi1 = max(n / 2, 1) +// lo2 = 2 +// hi2 = n +// a(lo1:hi1:step) = a(lo2:hi2:step) +// end subroutine testcase +// + +// CHECK-LABEL: func.func @_QPtestcase +// CHECK-SAME: %[[A:.*]]: !fir.ref> + +// CHECK: %{{.*}} = hlfir.designate +// CHECK: %{{.*}} = hlfir.designate +// CHECK: %[[LHS_ADDR:.*]] = fir.box_addr %{{.*}} +// CHECK: %[[LHS_BASE:.*]] = fir.convert %[[LHS_ADDR]] : (!fir.ref>) -> i64 +// CHECK: %[[LHS_ELESZ:.*]] = fir.box_elesize %{{.*}} +// CHECK: %[[LHS_DIMS:.*]]:3 = fir.box_dims %{{.*}}, %{{.*}} +// CHECK: %{{.*}} = arith.subi %[[LHS_DIMS]]#1, %{{.*}} : index +// CHECK: %{{.*}} = arith.muli %{{.*}}, %[[LHS_DIMS]]#2 : index +// CHECK: %{{.*}} = arith.cmpi slt, %[[LHS_DIMS]]#2, %{{.*}} : index +// CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = arith.subi %[[LHS_ELESZ]], %{{.*}} : index +// CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i64 +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i64 +// CHECK: %[[LHS_START:.*]] = arith.addi %[[LHS_BASE]], %{{.*}} : i64 +// CHECK: %[[LHS_END:.*]] = arith.addi %[[LHS_BASE]], %{{.*}} : i64 +// CHECK: %[[RHS_ADDR:.*]] = fir.box_addr %{{.*}} +// CHECK: %[[RHS_BASE:.*]] = fir.convert %[[RHS_ADDR]] : (!fir.ref>) -> i64 +// CHECK: %[[RHS_ELESZ:.*]] = fir.box_elesize %{{.*}} +// CHECK: %[[RHS_DIMS:.*]]:3 = fir.box_dims %{{.*}}, %{{.*}} +// CHECK: %{{.*}} = arith.subi %[[RHS_DIMS]]#1, %{{.*}} : index +// CHECK: %{{.*}} = arith.muli %{{.*}}, %[[RHS_DIMS]]#2 : index +// CHECK: %{{.*}} = arith.cmpi slt, %[[RHS_DIMS]]#2, %{{.*}} : index +// CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = arith.select %{{.*}}, %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = arith.subi %[[RHS_ELESZ]], %{{.*}} : index +// CHECK: %{{.*}} = arith.addi %{{.*}}, %{{.*}} : index +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i64 +// CHECK: %{{.*}} = fir.convert %{{.*}} : (index) -> i64 +// CHECK: %[[RHS_START:.*]] = arith.addi %[[RHS_BASE]], %{{.*}} : i64 +// CHECK: %[[RHS_END:.*]] = arith.addi %[[RHS_BASE]], %{{.*}} : i64 +// CHECK: %[[CMP1:.*]] = arith.cmpi ult, %[[LHS_END]], %[[RHS_START]] : i64 +// CHECK: %[[CMP2:.*]] = arith.cmpi ult, %[[RHS_END]], %[[LHS_START]] : i64 +// CHECK: %[[DISJOINT:.*]] = arith.ori %[[CMP1]], %[[CMP2]] : i1 +// CHECK: fir.if %[[DISJOINT]] { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: %{{.*}} = hlfir.designate %{{.*}} (%{{.*}}) +// CHECK: %{{.*}} = fir.load %{{.*}} +// CHECK: %{{.*}} = hlfir.designate %{{.*}} (%{{.*}}) +// CHECK: hlfir.assign %{{.*}} to %{{.*}} +// CHECK: } +// CHECK: } else { +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %{{.*}} +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: %{{.*}} = hlfir.apply %[[EXPR]], %{{.*}} +// CHECK: %{{.*}} = hlfir.designate %{{.*}} (%{{.*}}) +// CHECK: hlfir.assign %{{.*}} to %{{.*}} +// CHECK: } +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK: } + +func.func @_QPtestcase(%arg0: !fir.ref> {fir.bindc_name = "a"}, %arg1: !fir.ref {fir.bindc_name = "n"}, %arg2: !fir.ref {fir.bindc_name = "step"}) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 arg 2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestcaseEn"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2 = fir.alloca i32 {bindc_name = "hi1", uniq_name = "_QFtestcaseEhi1"} + %3:2 = hlfir.declare %2 {uniq_name = "_QFtestcaseEhi1"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %4 = fir.alloca i32 {bindc_name = "hi2", uniq_name = "_QFtestcaseEhi2"} + %5:2 = hlfir.declare %4 {uniq_name = "_QFtestcaseEhi2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %6 = fir.alloca i32 {bindc_name = "lo1", uniq_name = "_QFtestcaseElo1"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFtestcaseElo1"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %8 = fir.alloca i32 {bindc_name = "lo2", uniq_name = "_QFtestcaseElo2"} + %9:2 = hlfir.declare %8 {uniq_name = "_QFtestcaseElo2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %10:2 = hlfir.declare %arg2 dummy_scope %0 arg 3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestcaseEstep"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %11 = fir.load %1#0 : !fir.ref + %12 = fir.convert %11 : (i32) -> i64 + %13 = fir.convert %12 : (i64) -> index + %c0 = arith.constant 0 : index + %14 = arith.cmpi sgt, %13, %c0 : index + %15 = arith.select %14, %13, %c0 : index + %16 = fir.shape %15 : (index) -> !fir.shape<1> + %17:2 = hlfir.declare %arg0(%16) dummy_scope %0 arg 1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFtestcaseEa"} : (!fir.ref>, !fir.shape<1>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %c1_i32 = arith.constant 1 : i32 + hlfir.assign %c1_i32 to %7#0 : i32, !fir.ref + %18 = fir.load %1#0 : !fir.ref + %c2_i32 = arith.constant 2 : i32 + %19 = arith.divsi %18, %c2_i32 : i32 + %c1_i32_0 = arith.constant 1 : i32 + %20 = arith.maxsi %19, %c1_i32_0 : i32 + hlfir.assign %20 to %3#0 : i32, !fir.ref + %c2_i32_1 = arith.constant 2 : i32 + hlfir.assign %c2_i32_1 to %9#0 : i32, !fir.ref + %21 = fir.load %1#0 : !fir.ref + hlfir.assign %21 to %5#0 : i32, !fir.ref + %22 = fir.load %9#0 : !fir.ref + %23 = fir.convert %22 : (i32) -> i64 + %24 = fir.load %5#0 : !fir.ref + %25 = fir.convert %24 : (i32) -> i64 + %26 = fir.convert %23 : (i64) -> index + %27 = fir.convert %25 : (i64) -> index + %28 = fir.load %10#0 : !fir.ref + %29 = fir.convert %28 : (i32) -> i64 + %30 = fir.convert %29 : (i64) -> index + %c0_2 = arith.constant 0 : index + %31 = arith.subi %27, %26 : index + %32 = arith.addi %31, %30 : index + %33 = arith.divsi %32, %30 : index + %34 = arith.cmpi sgt, %33, %c0_2 : index + %35 = arith.select %34, %33, %c0_2 : index + %36 = fir.shape %35 : (index) -> !fir.shape<1> + %37 = hlfir.designate %17#0 (%26:%27:%30) shape %36 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + %38 = fir.load %7#0 : !fir.ref + %39 = fir.convert %38 : (i32) -> i64 + %40 = fir.load %3#0 : !fir.ref + %41 = fir.convert %40 : (i32) -> i64 + %42 = fir.convert %39 : (i64) -> index + %43 = fir.convert %41 : (i64) -> index + %44 = fir.load %10#0 : !fir.ref + %45 = fir.convert %44 : (i32) -> i64 + %46 = fir.convert %45 : (i64) -> index + %c0_3 = arith.constant 0 : index + %47 = arith.subi %43, %42 : index + %48 = arith.addi %47, %46 : index + %49 = arith.divsi %48, %46 : index + %50 = arith.cmpi sgt, %49, %c0_3 : index + %51 = arith.select %50, %49, %c0_3 : index + %52 = fir.shape %51 : (index) -> !fir.shape<1> + %53 = hlfir.designate %17#0 (%42:%43:%46) shape %52 : (!fir.box>, index, index, index, !fir.shape<1>) -> !fir.box> + hlfir.assign %37 to %53 : !fir.box>, !fir.box> + return +} diff --git a/flang/test/HLFIR/inline-hlfir-assign-self-copy.fir b/flang/test/HLFIR/inline-hlfir-assign-self-copy.fir new file mode 100644 index 0000000000000..ba1d278080a02 --- /dev/null +++ b/flang/test/HLFIR/inline-hlfir-assign-self-copy.fir @@ -0,0 +1,178 @@ +// Test inlining of hlfir.assign for self-copy array section + +// RUN: fir-opt --inline-hlfir-assign %s | FileCheck %s + +// Fortran source code: +// subroutine self_copy_ew(field, xm, ym, levs, xh, yh) +// integer, intent(in) :: xm, ym, levs, xh, yh +// real, intent(inout) :: field(xm + 2 * xh, ym + 2 * yh, levs) +// integer :: x1, x2, x5, x6 +// x1 = 1 +// x2 = xh +// x5 = xm + 1 +// x6 = xm + xh +// field(x1:x2, :, :) = field(x5:x6, :, :) +// end subroutine self_copy_ew + +// CHECK-LABEL: func.func @_QPself_copy_ew +// CHECK-SAME: %[[ARGF:.*]]: !fir.ref> {fir.bindc_name = "field"} + +// CHECK: hlfir.declare %[[ARGF]](%{{.*}}) {{.*}} uniq_name = "_QFself_copy_ewEfield" + +// Index-based runtime disjointness check: +// CHECK: %[[RHS:.*]] = hlfir.designate %{{.*}}#0 (%{{.*}}:%{{.*}}:%{{.*}}, %{{.*}}:%{{.*}}:%{{.*}}, %{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} +// CHECK: %[[LHS:.*]] = hlfir.designate %{{.*}}#0 (%{{.*}}:%{{.*}}:%{{.*}}, %{{.*}}:%{{.*}}:%{{.*}}, %{{.*}}:%{{.*}}:%{{.*}}) shape %{{.*}} +// CHECK: %[[CMP1:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[CMP2:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[DIM1_DISJ:.*]] = arith.ori %[[CMP1]], %[[CMP2]] : i1 +// CHECK: %[[CMP3:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[CMP4:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[DIM2_DISJ:.*]] = arith.ori %[[CMP3]], %[[CMP4]] : i1 +// CHECK: %[[DISJ12:.*]] = arith.ori %[[DIM1_DISJ]], %[[DIM2_DISJ]] : i1 +// CHECK: %[[CMP5:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[CMP6:.*]] = arith.cmpi slt, %{{.*}}, %{{.*}} : index +// CHECK: %[[DIM3_DISJ:.*]] = arith.ori %[[CMP5]], %[[CMP6]] : i1 +// CHECK: %[[DISJOINT:.*]] = arith.ori %[[DISJ12]], %[[DIM3_DISJ]] : i1 +// CHECK: fir.if %[[DISJOINT]] { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: %{{.*}} = hlfir.designate %[[RHS]] (%{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: %{{.*}} = fir.load %{{.*}} +// CHECK: %{{.*}} = hlfir.designate %[[LHS]] (%{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: } else { +// CHECK: %[[EXPR:.*]] = hlfir.as_expr %[[RHS]] +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// CHECK: %{{.*}} = hlfir.apply %[[EXPR]], %{{.*}}, %{{.*}}, %{{.*}} +// CHECK: %{{.*}} = hlfir.designate %[[LHS]] (%{{.*}}, %{{.*}}, %{{.*}}) +// CHECK: hlfir.assign %{{.*}} to %{{.*}} : f32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: hlfir.destroy %[[EXPR]] +// CHECK: } + +func.func @_QPself_copy_ew(%arg0: !fir.ref> {fir.bindc_name = "field"}, %arg1: !fir.ref {fir.bindc_name = "xm"}, %arg2: !fir.ref {fir.bindc_name = "ym"}, %arg3: !fir.ref {fir.bindc_name = "levs"}, %arg4: !fir.ref {fir.bindc_name = "xh"}, %arg5: !fir.ref {fir.bindc_name = "yh"}) { + %0 = fir.dummy_scope : !fir.dscope + %1:2 = hlfir.declare %arg1 dummy_scope %0 arg 2 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewExm"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %2:2 = hlfir.declare %arg4 dummy_scope %0 arg 5 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewExh"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %3:2 = hlfir.declare %arg2 dummy_scope %0 arg 3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewEym"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %4:2 = hlfir.declare %arg5 dummy_scope %0 arg 6 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewEyh"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %5:2 = hlfir.declare %arg3 dummy_scope %0 arg 4 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewElevs"} : (!fir.ref, !fir.dscope) -> (!fir.ref, !fir.ref) + %6 = fir.alloca i32 {bindc_name = "x1", uniq_name = "_QFself_copy_ewEx1"} + %7:2 = hlfir.declare %6 {uniq_name = "_QFself_copy_ewEx1"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %8 = fir.alloca i32 {bindc_name = "x2", uniq_name = "_QFself_copy_ewEx2"} + %9:2 = hlfir.declare %8 {uniq_name = "_QFself_copy_ewEx2"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %10 = fir.alloca i32 {bindc_name = "x5", uniq_name = "_QFself_copy_ewEx5"} + %11:2 = hlfir.declare %10 {uniq_name = "_QFself_copy_ewEx5"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %12 = fir.alloca i32 {bindc_name = "x6", uniq_name = "_QFself_copy_ewEx6"} + %13:2 = hlfir.declare %12 {uniq_name = "_QFself_copy_ewEx6"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %14 = fir.load %1#0 : !fir.ref + %c2_i32 = arith.constant 2 : i32 + %15 = fir.load %2#0 : !fir.ref + %16 = arith.muli %c2_i32, %15 : i32 + %17 = arith.addi %14, %16 : i32 + %18 = fir.convert %17 : (i32) -> i64 + %19 = fir.convert %18 : (i64) -> index + %c0 = arith.constant 0 : index + %20 = arith.cmpi sgt, %19, %c0 : index + %21 = arith.select %20, %19, %c0 : index + %22 = fir.load %3#0 : !fir.ref + %c2_i32_0 = arith.constant 2 : i32 + %23 = fir.load %4#0 : !fir.ref + %24 = arith.muli %c2_i32_0, %23 : i32 + %25 = arith.addi %22, %24 : i32 + %26 = fir.convert %25 : (i32) -> i64 + %27 = fir.convert %26 : (i64) -> index + %c0_1 = arith.constant 0 : index + %28 = arith.cmpi sgt, %27, %c0_1 : index + %29 = arith.select %28, %27, %c0_1 : index + %30 = fir.load %5#0 : !fir.ref + %31 = fir.convert %30 : (i32) -> i64 + %32 = fir.convert %31 : (i64) -> index + %c0_2 = arith.constant 0 : index + %33 = arith.cmpi sgt, %32, %c0_2 : index + %34 = arith.select %33, %32, %c0_2 : index + %35 = fir.shape %21, %29, %34 : (index, index, index) -> !fir.shape<3> + %36:2 = hlfir.declare %arg0(%35) dummy_scope %0 arg 1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFself_copy_ewEfield"} : (!fir.ref>, !fir.shape<3>, !fir.dscope) -> (!fir.box>, !fir.ref>) + %c1_i32 = arith.constant 1 : i32 + hlfir.assign %c1_i32 to %7#0 : i32, !fir.ref + %37 = fir.load %2#0 : !fir.ref + hlfir.assign %37 to %9#0 : i32, !fir.ref + %38 = fir.load %1#0 : !fir.ref + %c1_i32_3 = arith.constant 1 : i32 + %39 = arith.addi %38, %c1_i32_3 : i32 + hlfir.assign %39 to %11#0 : i32, !fir.ref + %40 = fir.load %1#0 : !fir.ref + %41 = fir.load %2#0 : !fir.ref + %42 = arith.addi %40, %41 : i32 + hlfir.assign %42 to %13#0 : i32, !fir.ref + %43 = fir.load %11#0 : !fir.ref + %44 = fir.convert %43 : (i32) -> i64 + %45 = fir.load %13#0 : !fir.ref + %46 = fir.convert %45 : (i32) -> i64 + %47 = fir.convert %44 : (i64) -> index + %48 = fir.convert %46 : (i64) -> index + %c1 = arith.constant 1 : index + %c0_4 = arith.constant 0 : index + %49 = arith.subi %48, %47 : index + %50 = arith.addi %49, %c1 : index + %51 = arith.divsi %50, %c1 : index + %52 = arith.cmpi sgt, %51, %c0_4 : index + %53 = arith.select %52, %51, %c0_4 : index + %c1_5 = arith.constant 1 : index + %c1_6 = arith.constant 1 : index + %c0_7 = arith.constant 0 : index + %54 = arith.subi %29, %c1_5 : index + %55 = arith.addi %54, %c1_6 : index + %56 = arith.divsi %55, %c1_6 : index + %57 = arith.cmpi sgt, %56, %c0_7 : index + %58 = arith.select %57, %56, %c0_7 : index + %c1_8 = arith.constant 1 : index + %c0_9 = arith.constant 0 : index + %59 = arith.subi %34, %c1_5 : index + %60 = arith.addi %59, %c1_8 : index + %61 = arith.divsi %60, %c1_8 : index + %62 = arith.cmpi sgt, %61, %c0_9 : index + %63 = arith.select %62, %61, %c0_9 : index + %64 = fir.shape %53, %58, %63 : (index, index, index) -> !fir.shape<3> + %65 = hlfir.designate %36#0 (%47:%48:%c1, %c1_5:%29:%c1_6, %c1_5:%34:%c1_8) shape %64 : (!fir.box>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.box> + %66 = fir.load %7#0 : !fir.ref + %67 = fir.convert %66 : (i32) -> i64 + %68 = fir.load %9#0 : !fir.ref + %69 = fir.convert %68 : (i32) -> i64 + %70 = fir.convert %67 : (i64) -> index + %71 = fir.convert %69 : (i64) -> index + %c1_10 = arith.constant 1 : index + %c0_11 = arith.constant 0 : index + %72 = arith.subi %71, %70 : index + %73 = arith.addi %72, %c1_10 : index + %74 = arith.divsi %73, %c1_10 : index + %75 = arith.cmpi sgt, %74, %c0_11 : index + %76 = arith.select %75, %74, %c0_11 : index + %c1_12 = arith.constant 1 : index + %c1_13 = arith.constant 1 : index + %c0_14 = arith.constant 0 : index + %77 = arith.subi %29, %c1_12 : index + %78 = arith.addi %77, %c1_13 : index + %79 = arith.divsi %78, %c1_13 : index + %80 = arith.cmpi sgt, %79, %c0_14 : index + %81 = arith.select %80, %79, %c0_14 : index + %c1_15 = arith.constant 1 : index + %c0_16 = arith.constant 0 : index + %82 = arith.subi %34, %c1_12 : index + %83 = arith.addi %82, %c1_15 : index + %84 = arith.divsi %83, %c1_15 : index + %85 = arith.cmpi sgt, %84, %c0_16 : index + %86 = arith.select %85, %84, %c0_16 : index + %87 = fir.shape %76, %81, %86 : (index, index, index) -> !fir.shape<3> + %88 = hlfir.designate %36#0 (%70:%71:%c1_10, %c1_12:%29:%c1_13, %c1_12:%34:%c1_15) shape %87 : (!fir.box>, index, index, index, index, index, index, index, index, index, !fir.shape<3>) -> !fir.box> + hlfir.assign %65 to %88 : !fir.box>, !fir.box> + return +} From 74a18318642f7b8532e1600776b14f49a2f415c3 Mon Sep 17 00:00:00 2001 From: Mark Murray Date: Thu, 25 Jun 2026 09:57:56 +0100 Subject: [PATCH 474/511] Add armagicpu to release notes (#205599) Add armagicpu to release notes Raised by validator. --------- Co-authored-by: Volodymyr Turanskyy --- clang/docs/ReleaseNotes.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 370f6944831a7..96e36f99f558b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -890,6 +890,8 @@ X86 Support Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ +Added support for the Arm AGI CPU via the ``-mcpu=armagicpu`` command-line option. + Android Support ^^^^^^^^^^^^^^^ From 2e18254609f08ab4c9c5fc0d713f3ba4c4d42a41 Mon Sep 17 00:00:00 2001 From: Piotr Fusik Date: Thu, 25 Jun 2026 11:25:29 +0200 Subject: [PATCH 475/511] [Clang] Fix missing vtable for `dynamic_cast(this)` in a function template (#202594) 9d525bf94b255df89587db955b5fa2d3c03c2c3e introduced an optimization of `dynamic_cast` by comparing vtable pointers. This requires the vtable to be emitted, which was fixed for most cases in #64088. This change addresses a missing case of a `dynamic_cast` of `this` in a function template. We ensure that `Sema::MarkVTableUsed` gets called during template instantiation. It wasn't because `CXXThisExpr` is unaffected by template instantiation. Fix #198511 --- clang/docs/ReleaseNotes.rst | 1 + clang/include/clang/AST/TypeBase.h | 2 +- clang/lib/AST/Type.cpp | 2 +- clang/lib/Sema/SemaTemplateInstantiate.cpp | 10 ++++++++++ clang/test/CodeGenCXX/dynamic-cast-exact.cpp | 14 ++++++++++++++ 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 96e36f99f558b..d4c286644033b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -811,6 +811,7 @@ Bug Fixes to C++ Support - Fixed a crash in constant evaluation using placement new on an array which was later initialized. (#GH196450) - Fixed an issue where Clang incorrectly accepted invalid unqualified uses of local nested class names outside their declaring scope. (#GH184622) - Fixed a crash when parsing invalid friend declaration with storage-class specifier. (#GH186569) +- Fixed a missing vtable for ``dynamic_cast(this)`` in a function template. (#GH198511) Bug Fixes to AST Handling ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/TypeBase.h b/clang/include/clang/AST/TypeBase.h index c9658775f0470..3a801e2857b13 100644 --- a/clang/include/clang/AST/TypeBase.h +++ b/clang/include/clang/AST/TypeBase.h @@ -2950,7 +2950,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { /// /// If this is not a pointer or reference, or the type being pointed to does /// not refer to a CXXRecordDecl, returns NULL. - const CXXRecordDecl *getPointeeCXXRecordDecl() const; + CXXRecordDecl *getPointeeCXXRecordDecl() const; /// Get the DeducedType whose type will be deduced for a variable with /// an initializer of this type. This looks through declarators like pointer diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp index b7bef40ca89f3..dffb3f1b50207 100644 --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -1955,7 +1955,7 @@ const ObjCObjectPointerType *Type::getAsObjCInterfacePointerType() const { return nullptr; } -const CXXRecordDecl *Type::getPointeeCXXRecordDecl() const { +CXXRecordDecl *Type::getPointeeCXXRecordDecl() const { QualType PointeeType; if (const auto *PT = getAsCanonical()) PointeeType = PT->getPointeeType(); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 427b634a92e46..a77ea5fd3dfff 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1886,6 +1886,16 @@ namespace { SmallVectorImpl &PTypes, SmallVectorImpl &TransParams, Sema::ExtParameterInfoBuilder &PInfos); + + ExprResult TransformCXXDynamicCastExpr(CXXDynamicCastExpr *E) { + ExprResult Ret = inherited::TransformCXXDynamicCastExpr(E); + if (Ret.isInvalid()) + return Ret; + auto *DestDecl = Ret.get()->getType()->getPointeeCXXRecordDecl(); + if (DestDecl && DestDecl->isEffectivelyFinal()) + getSema().MarkVTableUsed(Ret.get()->getExprLoc(), DestDecl); + return Ret; + } }; } diff --git a/clang/test/CodeGenCXX/dynamic-cast-exact.cpp b/clang/test/CodeGenCXX/dynamic-cast-exact.cpp index 588d80844a2fa..86a97f764e729 100644 --- a/clang/test/CodeGenCXX/dynamic-cast-exact.cpp +++ b/clang/test/CodeGenCXX/dynamic-cast-exact.cpp @@ -125,3 +125,17 @@ namespace GH64088 { struct B final : A { virtual ~B() = default; }; B *cast(A *p) { return dynamic_cast(p); } } + +namespace GH198511 { + // Ensure we mark the B vtable as used here, because we're going to emit a + // reference to it. + // CHECK: define {{.*}} @_ZN8GH1985111BD0 + struct B; + struct A { + virtual ~A() = default; + template B *cast(); + }; + struct B final : A { }; + template B *A::cast() { return dynamic_cast(this); } + template B *A::cast(); +} From a0badea5a0ee065f97246089d19ad2416877f144 Mon Sep 17 00:00:00 2001 From: Michael Toguchi Date: Thu, 25 Jun 2026 02:26:14 -0700 Subject: [PATCH 476/511] [Driver][SYCL] Generate integration files for host only mode (#22422) When performing --offload-host-only compilation for SYCL and having the new offloading model enabled, we need to be sure that we perform a device only compilation first to generate the integration header and footer files that will be consumed by the host compile. --- clang/lib/Driver/Driver.cpp | 27 +++++- .../sycl-offload-host-only-new-driver.cpp | 88 +++++++++++++++++++ 2 files changed, 114 insertions(+), 1 deletion(-) create mode 100644 clang/test/Driver/sycl-offload-host-only-new-driver.cpp diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index c2c9b62603931..e2650a49a8a50 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -7884,7 +7884,11 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, ActionList *HIPAsmBundleDeviceOut) const { // Don't build offloading actions if explicitly disabled or we do not have a // valid source input. - if (offloadHostOnly() || + // Exception: For SYCL, we need to build device actions even in host-only mode + // to generate integration header/footer files. + bool IsSYCLHostOnly = + offloadHostOnly() && C.isOffloadingHostKind(Action::OFK_SYCL); + if ((offloadHostOnly() && !IsSYCLHostOnly) || !(types::isSrcFile(Input.first) || Input.first == types::TY_PP_CXX)) return HostAction; @@ -8041,6 +8045,27 @@ Driver::BuildOffloadingActions(Compilation &C, llvm::opt::DerivedArgList &Args, ++TCAndArch; } } + + // For SYCL host-only mode, replace device actions with syntax-only compile + // actions to generate integration header/footer files without producing + // device binaries. + if (IsSYCLHostOnly && Kind == Action::OFK_SYCL) { + for (Action *&A : DeviceActions) { + // Find the input action (should be first in the chain) + Action *InputAction = A; + while (InputAction->getInputs().size() == 1) + InputAction = *InputAction->input_begin(); + + // Create a syntax-only compile action from the input + A = C.MakeAction(InputAction, types::TY_Nothing); + + // Propagate device offload info + auto *TCAndArch = TCAndArchs.begin(); + A->propagateDeviceOffloadInfo(Kind, TCAndArch->second.data(), + TCAndArch->first); + } + } + // Use of -fsycl-device-obj=spirv converts the original LLVM-IR file to // SPIR-V for later consumption. for (Action *&A : DeviceActions) { diff --git a/clang/test/Driver/sycl-offload-host-only-new-driver.cpp b/clang/test/Driver/sycl-offload-host-only-new-driver.cpp new file mode 100644 index 0000000000000..8030d8dcb846c --- /dev/null +++ b/clang/test/Driver/sycl-offload-host-only-new-driver.cpp @@ -0,0 +1,88 @@ +/// +/// Tests for -fsycl --offload-host-only with the new offloading driver. +/// + +/// ########################################################################### +/// Test phase output with -ccc-print-phases +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -ccc-print-phases -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-PHASES %s + +// CHK-PHASES: 0: input, "[[INPUT:.+\.cpp]]", c++, (host-sycl) +// CHK-PHASES: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) +// CHK-PHASES: 2: compiler, {1}, ir, (host-sycl) +// CHK-PHASES: 3: input, "[[INPUT]]", c++, (device-sycl) +// CHK-PHASES: 4: compiler, {3}, none, (device-sycl) +// CHK-PHASES: 5: offload, "host-sycl (x86_64-unknown-linux-gnu)" {2}, "device-sycl (spir64-unknown-unknown)" {4}, ir +// CHK-PHASES: 6: backend, {5}, assembler, (host-sycl) +// CHK-PHASES: 7: assembler, {6}, object, (host-sycl) + +/// ########################################################################### +/// Test that device compile generates integration header/footer, and the host +/// compiler consumes these. +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -### -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-INT-HEADER %s + +// CHK-INT-HEADER: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" +// CHK-INT-HEADER-SAME: "-fsycl-is-device" +// CHK-INT-HEADER-SAME: "-fsycl-int-header=[[HEADER:.+\.h]]" "-fsycl-int-footer=[[FOOTER:.+\.h]]" +// CHK-INT-HEADER-SAME: "-fsyntax-only" +// CHK-INT-HEADER: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHK-INT-HEADER-SAME: "-fsycl-is-host" +// CHK-INT-HEADER-SAME: "-include-internal-header" "[[HEADER]]" +// CHK-INT-HEADER-SAME: "-dependency-filter" "[[HEADER]]" +// CHK-INT-HEADER-SAME: "-include-internal-footer" "[[FOOTER]]" +// CHK-INT-HEADER-SAME: "-dependency-filter" "[[FOOTER]]" +// CHK-INT-HEADER-SAME: "-emit-obj" +// CHK-INT-HEADER-SAME: "-o" "{{.*\.o}}" + +/// ########################################################################### +/// Test that no bundled output is created (only 2 invocations: device + host) +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -### -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-BUNDLER %s + +// CHK-NO-BUNDLER-NOT: clang-offload-bundler +// CHK-NO-BUNDLER-NOT: clang-offload-packager + +/// ########################################################################### +/// Test with explicit target triple +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -fsycl-targets=spir64 -### -c %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-TARGET %s + +// CHK-TARGET: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" +// CHK-TARGET-SAME: "-fsycl-is-device" +// CHK-TARGET-SAME: "-fsycl-int-header= +// CHK-TARGET-SAME: "-fsyntax-only" +// CHK-TARGET: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHK-TARGET-SAME: "-fsycl-is-host" +// CHK-TARGET-SAME: "-include-internal-header" + +/// ########################################################################### +/// Test that linking works with host-only objects +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-LINK %s + +// CHK-LINK: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" +// CHK-LINK-SAME: "-fsycl-is-device" +// CHK-LINK-SAME: "-fsyntax-only" +// CHK-LINK: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHK-LINK-SAME: "-fsycl-is-host" +// CHK-LINK-SAME: "-emit-obj" +// CHK-LINK: ld{{.*}} + +/// ########################################################################### +/// Test with multiple source files +// RUN: %clang -fsycl --target=x86_64-unknown-linux-gnu --offload-new-driver --offload-host-only -### -c %s %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-MULTI %s + +// CHK-MULTI: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" +// CHK-MULTI-SAME: "-fsycl-is-device" +// CHK-MULTI-SAME: "-fsyntax-only" +// CHK-MULTI: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHK-MULTI-SAME: "-fsycl-is-host" +// CHK-MULTI-SAME: "-emit-obj" +// CHK-MULTI: clang{{.*}} "-cc1" "-triple" "spir64-unknown-unknown" +// CHK-MULTI-SAME: "-fsycl-is-device" +// CHK-MULTI-SAME: "-fsyntax-only" +// CHK-MULTI: clang{{.*}} "-cc1" "-triple" "x86_64-unknown-linux-gnu" +// CHK-MULTI-SAME: "-fsycl-is-host" +// CHK-MULTI-SAME: "-emit-obj" From 6f70d56dfba59a3e1b04e7d65ee275fd6f950270 Mon Sep 17 00:00:00 2001 From: Javed Absar Date: Thu, 25 Jun 2026 10:31:48 +0100 Subject: [PATCH 477/511] [mlir][linalg] Add category-to-named conversion in linalg-morph-ops (#205582) Add the missing `category-to-named` morphism path that converts `linalg.elementwise` ops to their equivalent named ops (e.g. `linalg.elementwise kind=add` -> `linalg.add`). This completes the set of conversions in the linalg-morph-ops pass: generic <---> category <---> named The conversion only applies to elementwise ops with identity indexing maps, since named elementwise ops cannot carry custom maps. Kinds without a named op equivalent (e.g. sin, cos) are left unconverted. Co-authored-by: mabsar --- mlir/include/mlir/Dialect/Linalg/Passes.td | 5 +- .../Dialect/Linalg/Transforms/Transforms.h | 5 + .../Dialect/Linalg/Transforms/CMakeLists.txt | 1 + .../Linalg/Transforms/CategoryToNamedOp.cpp | 103 ++++++++ .../Dialect/Linalg/Transforms/MorphOps.cpp | 2 + .../linalg-morph-elementwise-to-named.mlir | 239 ++++++++++++++++++ 6 files changed, 353 insertions(+), 2 deletions(-) create mode 100644 mlir/lib/Dialect/Linalg/Transforms/CategoryToNamedOp.cpp create mode 100644 mlir/test/Dialect/Linalg/linalg-morph-elementwise-to-named.mlir diff --git a/mlir/include/mlir/Dialect/Linalg/Passes.td b/mlir/include/mlir/Dialect/Linalg/Passes.td index b873f260e7d92..3a43af9ca1855 100644 --- a/mlir/include/mlir/Dialect/Linalg/Passes.td +++ b/mlir/include/mlir/Dialect/Linalg/Passes.td @@ -72,8 +72,9 @@ def LinalgMorphOpsPass : Pass<"linalg-morph-ops"> { Option<"genericToNamed", "generic-to-named", "bool", /*default=*/"false", "convert linalg.generic to equivalent named ops">, Option<"genericToCategory", "generic-to-category", "bool", /*default=*/"false", - "convert linalg.generic to equivalent category ops"> ]; - // TODOs: `category-to-named` + "convert linalg.generic to equivalent category ops">, + Option<"categoryToNamed", "category-to-named", "bool", /*default=*/"false", + "convert category ops e.g. `linalg.elementwise` to equivalent named ops"> ]; } def LinalgGeneralizeNamedOpsPass : Pass<"linalg-generalize-named-ops">, diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h index 486ef75b76859..836682de4c404 100644 --- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h +++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h @@ -1922,6 +1922,11 @@ void populateLinalgGenericOpsSpecializationPatterns( /// to equivalent `linalg.elementwise`. void populateLinalgNamedToElementwisePatterns(RewritePatternSet &patterns); +/// Populates `patterns` that convert linalg category ops (e.g. +/// `linalg.elementwise`, `linalg.contract`) to equivalent linalg named ops +/// (e.g. `linalg.add`, `linalg.matmul`). +void populateLinalgCategoryToNamedPatterns(RewritePatternSet &patterns); + /// Populates `patterns` with patterns that fold operations like /// `linalg.transform` into elementwise op map. void populateLinalgFoldIntoElementwisePatterns(RewritePatternSet &patterns); diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt index a2149478e4c2d..6dcc6e6d18429 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt @@ -10,6 +10,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms DropUnitDims.cpp ElementwiseOpFusion.cpp ElementwiseToLinalg.cpp + CategoryToNamedOp.cpp EliminateEmptyTensors.cpp EraseUnusedOperandsAndResults.cpp FoldAddIntoDest.cpp diff --git a/mlir/lib/Dialect/Linalg/Transforms/CategoryToNamedOp.cpp b/mlir/lib/Dialect/Linalg/Transforms/CategoryToNamedOp.cpp new file mode 100644 index 0000000000000..6d1f61100dbf5 --- /dev/null +++ b/mlir/lib/Dialect/Linalg/Transforms/CategoryToNamedOp.cpp @@ -0,0 +1,103 @@ +//===- CategoryToNamedOp.cpp - convert category ops to linalg named ops ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements rewriting of linalg category ops (e.g. +// `linalg.elementwise`) to their equivalent named ops (e.g. `linalg.add`, +// `linalg.exp`). This is the reverse of NamedToElementwise.cpp. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/Linalg/Transforms/Transforms.h" +#include "mlir/IR/PatternMatch.h" + +using namespace mlir; +using namespace mlir::linalg; + +#define DEBUG_TYPE "linalg-category-to-named" + +namespace { +struct ElementwiseToNamedPattern : public OpRewritePattern { + using OpRewritePattern::OpRewritePattern; + + LogicalResult matchAndRewrite(ElementwiseOp op, + PatternRewriter &rewriter) const override { + // Named elementwise ops only support identity indexing maps. + if (!op.getIndexingMapsArray().empty() && + !llvm::all_of(op.getIndexingMapsArray(), + [](AffineMap map) { return map.isIdentity(); })) + return failure(); + + auto inputs = op.getDpsInputs(); + auto inits = op.getDpsInits(); + auto loc = op.getLoc(); + + // Helper to create a named op and replace the elementwise op. + auto replaceWith = [&](auto namedOp) { + using OpTy = decltype(namedOp); + rewriter.replaceOp(op, OpTy::create(rewriter, loc, inputs, inits, + ArrayRef{})); + return success(); + }; + + switch (op.getKind()) { + case ElementwiseKind::exp: + return replaceWith(ExpOp{}); + case ElementwiseKind::log: + return replaceWith(LogOp{}); + case ElementwiseKind::abs: + return replaceWith(AbsOp{}); + case ElementwiseKind::ceil: + return replaceWith(CeilOp{}); + case ElementwiseKind::floor: + return replaceWith(FloorOp{}); + case ElementwiseKind::negf: + return replaceWith(NegFOp{}); + case ElementwiseKind::reciprocal: + return replaceWith(ReciprocalOp{}); + case ElementwiseKind::round: + return replaceWith(RoundOp{}); + case ElementwiseKind::sqrt: + return replaceWith(SqrtOp{}); + case ElementwiseKind::rsqrt: + return replaceWith(RsqrtOp{}); + case ElementwiseKind::square: + return replaceWith(SquareOp{}); + case ElementwiseKind::tanh: + return replaceWith(TanhOp{}); + case ElementwiseKind::erf: + return replaceWith(ErfOp{}); + case ElementwiseKind::add: + return replaceWith(AddOp{}); + case ElementwiseKind::sub: + return replaceWith(SubOp{}); + case ElementwiseKind::mul: + return replaceWith(MulOp{}); + case ElementwiseKind::div: + return replaceWith(DivOp{}); + case ElementwiseKind::div_unsigned: + return replaceWith(DivUnsignedOp{}); + case ElementwiseKind::max_signed: + return replaceWith(MaxOp{}); + case ElementwiseKind::min_signed: + return replaceWith(MinOp{}); + case ElementwiseKind::powf: + return replaceWith(PowFOp{}); + case ElementwiseKind::select: + return replaceWith(SelectOp{}); + default: + return failure(); + } + } +}; +} // namespace + +void mlir::linalg::populateLinalgCategoryToNamedPatterns( + RewritePatternSet &patterns) { + patterns.add(patterns.getContext()); +} diff --git a/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp b/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp index fee293647deda..7d360ee734249 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/MorphOps.cpp @@ -50,6 +50,8 @@ void LinalgMorphOpsPass::runOnOperation() { populateLinalgNamedOpsGeneralizationPatterns(patterns); // Lifting paths (named <- category <- generic) + if (categoryToNamed) + populateLinalgCategoryToNamedPatterns(patterns); if (genericToNamed || genericToCategory) { GenericOpSpecializationOptions opts; opts.emitCategoryOps = genericToCategory; diff --git a/mlir/test/Dialect/Linalg/linalg-morph-elementwise-to-named.mlir b/mlir/test/Dialect/Linalg/linalg-morph-elementwise-to-named.mlir new file mode 100644 index 0000000000000..82365f5de8f92 --- /dev/null +++ b/mlir/test/Dialect/Linalg/linalg-morph-elementwise-to-named.mlir @@ -0,0 +1,239 @@ +// Category to named conversion and roundtrip (output is identical). +// RUN: mlir-opt %s -linalg-morph-ops=category-to-named -split-input-file | \ +// RUN: FileCheck %s +// RUN: mlir-opt %s -linalg-morph-ops=category-to-named -split-input-file | \ +// RUN: mlir-opt -linalg-morph-ops=named-to-category -split-input-file | \ +// RUN: mlir-opt -linalg-morph-ops=category-to-named -split-input-file | \ +// RUN: FileCheck %s + +func.func @unary_ops(%A : tensor<16x8xf32>, %B : tensor<16x8xf32>) -> tensor<16x8xf32> { + %exp = linalg.elementwise kind=#linalg.elementwise_kind + ins(%A : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %log = linalg.elementwise kind=#linalg.elementwise_kind + ins(%exp : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %abs = linalg.elementwise kind=#linalg.elementwise_kind + ins(%log : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %ceil = linalg.elementwise kind=#linalg.elementwise_kind + ins(%abs : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %floor = linalg.elementwise kind=#linalg.elementwise_kind + ins(%ceil : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %negf = linalg.elementwise kind=#linalg.elementwise_kind + ins(%floor : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %recip = linalg.elementwise kind=#linalg.elementwise_kind + ins(%negf : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %round = linalg.elementwise kind=#linalg.elementwise_kind + ins(%recip : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %sqrt = linalg.elementwise kind=#linalg.elementwise_kind + ins(%round : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %rsqrt = linalg.elementwise kind=#linalg.elementwise_kind + ins(%sqrt : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %square = linalg.elementwise kind=#linalg.elementwise_kind + ins(%rsqrt : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %tanh = linalg.elementwise kind=#linalg.elementwise_kind + ins(%square : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + %erf = linalg.elementwise kind=#linalg.elementwise_kind + ins(%tanh : tensor<16x8xf32>) outs(%B : tensor<16x8xf32>) -> tensor<16x8xf32> + return %erf : tensor<16x8xf32> +} + +// CHECK-LABEL: unary_ops +// CHECK-SAME: %[[A:.+]]: tensor<16x8xf32>, %[[B:.+]]: tensor<16x8xf32>) +// CHECK-NOT: linalg.elementwise +// CHECK: %[[EXP:.+]] = linalg.exp +// CHECK-SAME: ins(%[[A]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[LOG:.+]] = linalg.log +// CHECK-SAME: ins(%[[EXP]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[ABS:.+]] = linalg.abs +// CHECK-SAME: ins(%[[LOG]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[CEIL:.+]] = linalg.ceil +// CHECK-SAME: ins(%[[ABS]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[FLOOR:.+]] = linalg.floor +// CHECK-SAME: ins(%[[CEIL]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[NEGF:.+]] = linalg.negf +// CHECK-SAME: ins(%[[FLOOR]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[RECIP:.+]] = linalg.reciprocal +// CHECK-SAME: ins(%[[NEGF]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[ROUND:.+]] = linalg.round +// CHECK-SAME: ins(%[[RECIP]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[SQRT:.+]] = linalg.sqrt +// CHECK-SAME: ins(%[[ROUND]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[RSQRT:.+]] = linalg.rsqrt +// CHECK-SAME: ins(%[[SQRT]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[SQUARE:.+]] = linalg.square +// CHECK-SAME: ins(%[[RSQRT]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: %[[TANH:.+]] = linalg.tanh +// CHECK-SAME: ins(%[[SQUARE]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> +// CHECK: linalg.erf +// CHECK-SAME: ins(%[[TANH]] : tensor<16x8xf32>) +// CHECK-SAME: outs(%[[B]] : tensor<16x8xf32>) -> tensor<16x8xf32> + +// ----- + +func.func @binary_ops_int(%A: tensor, %B: tensor, + %Out: tensor) -> tensor { + %0 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%A, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %1 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%0, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %2 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%1, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %3 = linalg.elementwise kind=#linalg.elementwise_kind
+ ins(%2, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %4 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%3, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %5 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%4, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %6 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%5, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + return %6 : tensor +} + +// CHECK-LABEL: binary_ops_int +// CHECK-SAME: %[[A:.+]]: tensor, %[[B:.+]]: tensor, +// CHECK-SAME: %[[OUT:.+]]: tensor) +// CHECK-NOT: linalg.elementwise +// CHECK: %[[ADD:.+]] = linalg.add +// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[SUB:.+]] = linalg.sub +// CHECK-SAME: ins(%[[ADD]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[MUL:.+]] = linalg.mul +// CHECK-SAME: ins(%[[SUB]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[DIV:.+]] = linalg.div +// CHECK-SAME: ins(%[[MUL]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[DIVU:.+]] = linalg.div_unsigned +// CHECK-SAME: ins(%[[DIV]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[MAX:.+]] = linalg.max +// CHECK-SAME: ins(%[[DIVU]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: linalg.min +// CHECK-SAME: ins(%[[MAX]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor + +// ----- + +func.func @binary_ops_float(%A: tensor, %B: tensor, + %Out: tensor) -> tensor { + %0 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%A, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %1 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%0, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %2 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%1, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %3 = linalg.elementwise kind=#linalg.elementwise_kind
+ ins(%2, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %4 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%3, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %5 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%4, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + %6 = linalg.elementwise kind=#linalg.elementwise_kind + ins(%5, %B : tensor, tensor) + outs(%Out : tensor) -> tensor + return %6 : tensor +} + +// CHECK-LABEL: binary_ops_float +// CHECK-SAME: %[[A:.+]]: tensor, %[[B:.+]]: tensor, +// CHECK-SAME: %[[OUT:.+]]: tensor) +// CHECK-NOT: linalg.elementwise +// CHECK: %[[ADD:.+]] = linalg.add +// CHECK-SAME: ins(%[[A]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[SUB:.+]] = linalg.sub +// CHECK-SAME: ins(%[[ADD]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[MUL:.+]] = linalg.mul +// CHECK-SAME: ins(%[[SUB]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[DIV:.+]] = linalg.div +// CHECK-SAME: ins(%[[MUL]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[MAX:.+]] = linalg.max +// CHECK-SAME: ins(%[[DIV]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: %[[MIN:.+]] = linalg.min +// CHECK-SAME: ins(%[[MAX]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor +// CHECK: linalg.powf +// CHECK-SAME: ins(%[[MIN]], %[[B]] : tensor, tensor) +// CHECK-SAME: outs(%[[OUT]] : tensor) -> tensor + +// ----- + +func.func @ternary_select(%A: tensor, %B: tensor, + %C: tensor, + %Out: tensor) -> tensor { + %0 = linalg.elementwise kind=#linalg.elementwise_kind